Articles

Pluralization Helper for C#

In .Net, Rails on October 28, 2009 by Matt Grande Tagged: , , ,

UPDATE – This code is now available on GitHub.  Click here for the repo.

I recently wanted a pluralization inflector in a C# project, one similar to the one in Ruby on Rails. Unable to find a satisfactory one, I whipped up my own. Here’s what I’ve got.

namespace MyNamespace
{
    public class Formatting
    {
        private static readonly IList<string> Unpluralizables = new List<string>
        { "equipment", "information", "rice", "money", "species", "series", "fish", "sheep", "deer" };
        private static readonly IDictionary<string, string> Pluralizations = new Dictionary<string, string>
        {
            // Start with the rarest cases, and move to the most common
            { "person", "people" },
            { "ox", "oxen" },
            { "child", "children" },
            { "foot", "feet" },
            { "tooth", "teeth" },
            { "goose", "geese" },
            // And now the more standard rules.
            { "(.*)fe?", "$1ves" },         // ie, wolf, wife
            { "(.*)man$", "$1men" },
            { "(.+[aeiou]y)$", "$1s" },
            { "(.+[^aeiou])y$", "$1ies" },
            { "(.+z)$", "$1zes" },
            { "([m|l])ouse$", "$1ice" },
            { "(.+)(e|i)x$", @"$1ices"},    // ie, Matrix, Index
            { "(octop|vir)us$", "$1i"},
            { "(.+(s|x|sh|ch))$", @"$1es"},
            { "(.+)", @"$1s" }
        };

        public static string Pluralize(int count, string singular)
        {
            if (count == 1)
                return singular;

            if (Unpluralizables.Contains(singular))
                return singular;

            var plural = "";

            foreach (var pluralization in Pluralizations)
            {
                if (Regex.IsMatch(singular, pluralization.Key))
                {
                    plural = Regex.Replace(singular, pluralization.Key, pluralization.Value);
                    break;
                }
            }

            return plural;
        }
    }
}

And of course, some NUnit tests.

namespace AutomatedTests
{
    [TestFixture]
    public class FormattingTests
    {
        [Test]
        public void StandardPluralizationTests()
        {
            var dictionary = new Dictionary<string, string>();
            dictionary.Add("sausage", "sausages");  // Most words - Just add an 's'
            dictionary.Add("status", "statuses");   // Words that end in 's' - Add 'es'
            dictionary.Add("ax", "axes");           // Words that end in 'x' - Add 'es'
            dictionary.Add("octopus", "octopi");    // Some Words that end in 'us' - Replace 'us' with 'i'
            dictionary.Add("virus", "viri");        // Some Words that end in 'us' - Replace 'us' with 'i'
            dictionary.Add("crush", "crushes");     // Words that end in 'sh' - Add 'es'
            dictionary.Add("crutch", "crutches");   // Words that end in 'ch' - Add 'es'
            dictionary.Add("matrix", "matrices");   // Words that end in 'ix' - Replace with 'ices'
            dictionary.Add("index", "indices");     // Words that end in 'ex' - Replace with 'ices'
            dictionary.Add("mouse", "mice");        // Some Words that end in 'ouse' - Replace with 'ice'
            dictionary.Add("quiz", "quizzes");      // Words that end in 'z' - Add 'zes'
            dictionary.Add("mailman", "mailmen");   // Words that end in 'man' - Replace with 'men'
            dictionary.Add("man", "men");           // Words that end in 'man' - Replace with 'men'
            dictionary.Add("wolf", "wolves");       // Words that end in 'f' - Replace with 'ves'
            dictionary.Add("wife", "wives");        // Words that end in 'fe' - Replace with 'ves'
            dictionary.Add("day", "days");          // Words that end in '[vowel]y' - Replace with 'ys'
            dictionary.Add("sky", "skies");         // Words that end in '[consonant]y' - Replace with 'ies'

            foreach (var singular in dictionary.Keys)
            {
                var plural = dictionary[singular];

                Assert.AreEqual(plural, Formatting.Pluralize(2, singular));
                Assert.AreEqual(singular, Formatting.Pluralize(1, singular));
            }
        }

        [Test]
        public void IrregularPluralizationTests()
        {
            var dictionary = new Dictionary<string, string>();
            dictionary.Add("person", "people");
            dictionary.Add("child", "children");
            dictionary.Add("ox", "oxen");

            foreach (var singular in dictionary.Keys)
            {
                var plural = dictionary[singular];

                Assert.AreEqual(plural, Formatting.Pluralize(2, singular));
                Assert.AreEqual(singular, Formatting.Pluralize(1, singular));
            }
        }

        [Test]
        public void NonPluralizingPluralizationTests()
        {
            var nonPluralizingWords = new List<string> { "equipment", "information", "rice", "money", "species", "series", "fish", "sheep", "deer" };

            foreach (var word in nonPluralizingWords)
            {
                Assert.AreEqual(word, Formatting.Pluralize(2, word));
                Assert.AreEqual(word, Formatting.Pluralize(1, word));
            }
        }
    }
}

And finally, usage.

var output = Formatting.Pluralization(2, "item");
// Produces "items"
output = Formatting.Pluralization(5, "sheep");
// Produces "sheep"
output = Formatting.Pluralization(100, "sausage");
// Produces "sausages"
output = Formatting.Pluralization(1, "sausage");
// Produces "sausage"

Now, I’m sure that I’m missing some cases in there. For example, I haven’t found a good way to pluralize “proof.” If any of you wonderful people find another missing case, or if you want to add one, let me know in the comments.

Advertisements

21 Responses to “Pluralization Helper for C#”

  1. HI

    I am Naveen.I had learnt ,how to run selenium IDE & selenium RC.The main thing is how to generate a Test Report after the running the test Succesfully.I am using the Selenium RC and NUNIT for the testing .How to get the Test actions and descriptions in a Report.And another thing is How to run Selenium Core and when it is useful .And suggest some more open source tools for testing .Net and Java Projects ………………………..

    Try to help me…..

    Thanks
    Naveen

  2. Been meaning to write one of these for ages, ever since I used the lovely Ruby/Rails one, so I hit Google to see if anyone had done it for me..
    Your code works like a charm, many thanks for sharing!

    I wrapped your stuff up with a method signature like this:

    public static string Pluralize(this string s, int count)

    ..to create an extension method for strings, now I can smile to myself and remember rails each time I do something like this in c#:

    string foo = “Mushroom”.Pluralize(numberOfMushrooms)

    John.
    Nottingham,
    UK

  3. Hi John,

    Thanks for commenting, glad you like it. I never even considered adding an extension method, that’s a great idea. Thanks!

    – Matt.

  4. […] singularize words in C# – specifically, I needed to singularize table names. I was able to locate code for pluralizing words, by nothing for singularization. So I converted the singularization […]

  5. I used your code to create a Singluarizer, you can find it here ; http://lotsacode.wordpress.com/2010/03/05/singularization-pluralization-in-c/

    cheers,
    mattias

  6. Is this a verbatim port? Two quick suggestions (perf/memory):

    private static readonly HashSet Unpluralizables = new HashSet
    { “equipment”, “information”, “rice”, “money”, “species”, “series”, “fish”, “sheep”, “deer” };

    The reason for this is that the HashSet is much faster at doing set operations (e.g. Contains) than List.

    private static readonly IList<Tuple> Pluralizations = new List<Tuple> …

    Small perf gain from not forcing the BCL to determine whether it has already compiled the regex expression you are giving it.

    Furthermore, if you are going to be doing a scan (foreach KeyValuePair) you shouldn’t be using a Dictionary because there is memory and performance overhead.

    Otherwise, great work!

  7. Hi Jonathan,

    Thanks for your reply! This isn’t a verbatim port. The Rails version was more of an inspiration than anything.

    The reason I wasn’t using a HashSet was because I wrote this for a .Net 2.0 project, and HashSet was introduced in 3.0.

    Again, thanks for the comment!

  8. Some common words that fail…

    box, fox, paradox, equinox, toxin, giraffe, safe, mongoose, football, human, talisman,
    annex, complex, reflex, sex, axis, cervix, fix, mix, prefix, six, suffix, computer mouse (debatable), phylum, crisis, aircraft, deer, trout, swine, criterion, spectrum, millenium, radius, fungus, etc. etc.

    fungus currently comes out as vesungus!

    I’d also suggest some of the ‘unpluralizables’ aren’t really needed because the function only makes sense to be called for countable objects.

  9. private static readonly IDictionary Pluralizations = new Dictionary
    {
    { “person$”, “people” },
    { “^ox$”, “oxen” },
    { “child$”, “children” },
    { “foot$”, “feet” },
    { “tooth$”, “teeth” },
    { “goose$”, “geese” },
    { “(deer|trout|swine|sheep|fish)$”, “$1” },
    { “(.*)[^af]fe?$”, “$1ves” }, // ie, wolf, wife, but not for giraffe, gaffe, safe
    { “(hu|talis|otto|Ger|Ro|brah)man$”, “$1mans” }, // exceptons for man -> men
    { “(.*)man$”, “$1men” },
    { “(.+[^aeiou])y$”, “$1ies” },
    { “(.+zz)$”, “$1es” }, // buzz -> buzzes
    { “(.+z)$”, “$1zes” }, // quiz -> quizzes
    { “([m|l])ouse$”, “$1ice” },
    { “(append|matr|ind)[ie]x$”, @”$1ices”},
    { “(octop|vir|radi|fung)us$”, “$1i”},
    { “(phyl|milleni|spectr)um$”, “$1a” },
    { “(cris|ax)is$”, “$1es” },
    { “(.+(s|x|sh|ch))$”, @”$1es”},
    { “(.+)ies$”, “$1ies” },
    { “(.+)”, @”$1s” }
    };

    public static string Pluralize(int count, string singular)
    {
    if (count == 1 || singular.Trim().Length == 0) return singular;
    var match = Pluralizations.Where(p => Regex.IsMatch(singular, p.Key)).FirstOrDefault();
    return Regex.Replace(singular, match.Key, match.Value);
    }

  10. “Fertiliser” gets replaced with “vesrtiliser” as it seems to match the (.*)fe? regex pattern. Is there a quick fix for this?

    Also, “Human” gets replaced with “Humen”.

  11. Also, perhaps it would be better if the pluralize method optionally took the pluralized word to use if count != 1.

  12. Thanks for the comments, everyone! I’ve been out of the country and unable to reply. I’ve been thinking of doing some updates to this (and putting it on GitHub so others can edit it). Thanks for finding these problems!

  13. Nice work, one comment though … doesn’t work with non-lower case words.

    eg:
    Person -> Persons
    person -> people

    One fix is to

    1) add RegexOptions.IgnoreCase on the regex operations.
    2) change the whole word replacements to at least handle proper case:

    eg: “(p|P)erson$”, “$1eople”

    • Hey Brad, thanks for pointing this out! I’ve made some fairly substantial changes to this within the past couple months, and I keep meaning to update the post. I’ll be sure to make sure it works with multiple cases, though!

  14. Have you any updates?
    You could consider putting this on github and allowing people to extend?

  15. Thanks for posting this!

  16. hi, is there a way to pluralize in other languages? Thanks.

  17. Try pluralize “definitions”

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: