Generate a “graphical” Zipf distribution for the entire text of Moby Dick

In line 12, we remove HTML tags from a version of the text found on the web. Line 14 splits the entire text into words, and continues with a Linq grouping expression that tallies distinct words into an anonymous type. After deriving a scaling factor for the graph, line 22 prints an ASCII histogram bar for each of the top 35 words.

moby_dick.html

using System;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

class MainClass
{
	static void Main()
	{
		String text = new StreamReader("moby_dick.html").ReadToEnd();

		text = Regex.Replace(text, "<(.|\n)*?>", String.Empty);

		var tallies = text
			.Split(" \n\",.;-!?".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
			.GroupBy(w => w.ToLower())
			.Select(g => new { g.Key, Tally = g.Count() })
			.OrderByDescending(e => e.Tally);

		int scale = tallies.First().Tally / 60;
		foreach (var tally in tallies.Take(35))
			Console.WriteLine("{0,6} {1}", tally.Key, new String('*', tally.Tally / scale));
	}
};
Output.
   the ************************************************************
    of ***************************
   and **************************
    to *******************
     a *******************
    in *****************
  that ************
   his **********
    it **********
     i ********
   but *******
    he *******
    as *******
  with *******
    is *******
   was ******
   for ******
   all ******
  this *****
    at *****
    by ****
   not ****
  from ****
   him ****
    so ****
    on ****
 whale ****
    be ****
   one ***
   you ***
 there ***
   now ***
   had ***
  have ***
    or **