Source code for flask_unchained.clips_pattern

#### PATTERN | EN | INFLECT ########################################################################
# -*- coding: utf-8 -*-
# Copyright (c) 2010 University of Antwerp, Belgium
# Author: Tom De Smedt <tom@organisms.be>
# License: BSD (see LICENSE.txt for details).

####################################################################################################
# Regular expressions-based rules for English word inflection:
# - pluralization and singularization of nouns and adjectives,
# - conjugation of verbs,
# - comparative and superlative of adjectives.

# Accuracy (measured on CELEX English morphology word forms):
# 95% for pluralize()
# 96% for singularize()

# DEV NOTES:
# upstream source file:
# https://github.com/clips/pattern/blob/e18fd2a5d6416d7ffdbd83c823027e1111d34af2/pattern/text/en/inflect.py
#
# de_camel source (same project, different source file):
# https://github.com/clips/pattern/blob/e18fd2a5d6416d7ffdbd83c823027e1111d34af2/pattern/text/__init__.py#L91
#
# Modified to add a LRU cache decorator


from __future__ import unicode_literals
from __future__ import division

from builtins import str, range
import functools
import re

VERB, NOUN, ADJECTIVE, ADVERB = "VB", "NN", "JJ", "RB"


CACHED_FUNCTIONS = {}

def maybe_lru_cached(maxsize):
    def wrapper(fn):
        @functools.wraps(fn)
        def wrapped(*args, **kwargs):
            fn_name = fn.__name__
            if fn_name not in CACHED_FUNCTIONS:
                CACHED_FUNCTIONS[fn_name] = functools.lru_cache(maxsize)(fn)
            if 'custom' in kwargs:
                return fn(*args, **kwargs)
            return CACHED_FUNCTIONS[fn_name](*args, **kwargs)
        return wrapped
    return wrapper


@functools.lru_cache(maxsize=256)
def de_camel(s, separator="_", _lowercase=True):
    """ Returns the string with CamelCase converted to underscores, e.g.,
        de_camel("TomDeSmedt", "-") => "tom-de-smedt"
        de_camel("getHTTPResponse2) => "get_http_response2"
    """
    s = re.sub(r"([a-z0-9])([A-Z])", "\\1%s\\2" % separator, s)
    s = re.sub(r"([A-Z])([A-Z][a-z])", "\\1%s\\2" % separator, s)
    return s.lower() if _lowercase else s


#### PLURALIZE #####################################################################################
# Based on "An Algorithmic Approach to English Pluralization" by Damian Conway:
# http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html

# Prepositions are used in forms like "mother-in-law" and "man at arms".
plural_prepositions = set((
    "about"  , "before" , "during", "of"   , "till" ,
    "above"  , "behind" , "except", "off"  , "to"   ,
    "across" , "below"  , "for"   , "on"   , "under",
    "after"  , "beneath", "from"  , "onto" , "until",
    "among"  , "beside" , "in"    , "out"  , "unto" ,
    "around" , "besides", "into"  , "over" , "upon" ,
    "at"     , "between", "near"  , "since", "with" ,
    "athwart", "betwixt",
               "beyond",
               "but",
               "by"))

# Inflection rules that are either:
# - general,
# - apply to a certain category of words,
# - apply to a certain category of words only in classical mode,
# - apply only in classical mode.
# Each rule is a (suffix, inflection, category, classic)-tuple.
plural_rules = [
       # 0) Indefinite articles and demonstratives.
    ((   r"^a$|^an$", "some"       , None, False),
     (     r"^this$", "these"      , None, False),
     (     r"^that$", "those"      , None, False),
     (      r"^any$", "all"        , None, False)
    ), # 1) Possessive adjectives.
    ((       r"^my$", "our"        , None, False),
     (     r"^your$", "your"       , None, False),
     (      r"^thy$", "your"       , None, False),
     (r"^her$|^his$", "their"      , None, False),
     (      r"^its$", "their"      , None, False),
     (    r"^their$", "their"      , None, False)
    ), # 2) Possessive pronouns.
    ((     r"^mine$", "ours"       , None, False),
     (    r"^yours$", "yours"      , None, False),
     (    r"^thine$", "yours"      , None, False),
     (r"^her$|^his$", "theirs"     , None, False),
     (      r"^its$", "theirs"     , None, False),
     (    r"^their$", "theirs"     , None, False)
    ), # 3) Personal pronouns.
    ((        r"^I$", "we"         , None, False),
     (       r"^me$", "us"         , None, False),
     (   r"^myself$", "ourselves"  , None, False),
     (      r"^you$", "you"        , None, False),
     (r"^thou$|^thee$", "ye"       , None, False),
     ( r"^yourself$", "yourself"   , None, False),
     (  r"^thyself$", "yourself"   , None, False),
     ( r"^she$|^he$", "they"       , None, False),
     (r"^it$|^they$", "they"       , None, False),
     (r"^her$|^him$", "them"       , None, False),
     (r"^it$|^them$", "them"       , None, False),
     (  r"^herself$", "themselves" , None, False),
     (  r"^himself$", "themselves" , None, False),
     (   r"^itself$", "themselves" , None, False),
     ( r"^themself$", "themselves" , None, False),
     (  r"^oneself$", "oneselves"  , None, False)
    ), # 4) Words that do not inflect.
    ((          r"$", ""  , "uninflected", False),
     (          r"$", ""  , "uncountable", False),
     (         r"s$", "s" , "s-singular" , False),
     (      r"fish$", "fish"       , None, False),
     (r"([- ])bass$", "\\1bass"    , None, False),
     (       r"ois$", "ois"        , None, False),
     (     r"sheep$", "sheep"      , None, False),
     (      r"deer$", "deer"       , None, False),
     (       r"pox$", "pox"        , None, False),
     (r"([A-Z].*)ese$", "\\1ese"   , None, False),
     (      r"itis$", "itis"       , None, False),
     (r"(fruct|gluc|galact|lact|ket|malt|rib|sacchar|cellul)ose$", "\\1ose", None, False)
    ), # 5) Irregular plural forms (e.g., mongoose, oxen).
    ((     r"atlas$", "atlantes"   , None, True ),
     (     r"atlas$", "atlases"    , None, False),
     (      r"beef$", "beeves"     , None, True ),
     (   r"brother$", "brethren"   , None, True ),
     (     r"child$", "children"   , None, False),
     (    r"corpus$", "corpora"    , None, True ),
     (    r"corpus$", "corpuses"   , None, False),
     (      r"^cow$", "kine"       , None, True ),
     ( r"ephemeris$", "ephemerides", None, False),
     (  r"ganglion$", "ganglia"    , None, True ),
     (     r"genie$", "genii"      , None, True ),
     (     r"genus$", "genera"     , None, False),
     (  r"graffito$", "graffiti"   , None, False),
     (      r"loaf$", "loaves"     , None, False),
     (     r"money$", "monies"     , None, True ),
     (  r"mongoose$", "mongooses"  , None, False),
     (    r"mythos$", "mythoi"     , None, False),
     (   r"octopus$", "octopodes"  , None, True ),
     (      r"opus$", "opera"      , None, True ),
     (      r"opus$", "opuses"     , None, False),
     (       r"^ox$", "oxen"       , None, False),
     (     r"penis$", "penes"      , None, True ),
     (     r"penis$", "penises"    , None, False),
     ( r"soliloquy$", "soliloquies", None, False),
     (    r"testis$", "testes"     , None, False),
     (    r"trilby$", "trilbys"    , None, False),
     (      r"turf$", "turves"     , None, True ),
     (     r"numen$", "numena"     , None, False),
     (   r"occiput$", "occipita"   , None, True )
    ), # 6) Irregular inflections for common suffixes (e.g., synopses, mice, men).
    ((       r"man$", "men"        , None, False),
     (    r"person$", "people"     , None, False),
     (r"([lm])ouse$", "\\1ice"     , None, False),
     (     r"tooth$", "teeth"      , None, False),
     (     r"goose$", "geese"      , None, False),
     (      r"foot$", "feet"       , None, False),
     (      r"zoon$", "zoa"        , None, False),
     ( r"([csx])is$", "\\1es"      , None, False)
    ), # 7) Fully assimilated classical inflections
       #    (e.g., vertebrae, codices).
    ((        r"ex$", "ices" , "ex-ices" , False),
     (        r"ex$", "ices" , "ex-ices*", True ), # * = classical mode
     (        r"um$", "a"    ,    "um-a" , False),
     (        r"um$", "a"    ,    "um-a*", True ),
     (        r"on$", "a"    ,    "on-a" , False),
     (         r"a$", "ae"   ,    "a-ae" , False),
     (         r"a$", "ae"   ,    "a-ae*", True )
    ), # 8) Classical variants of modern inflections
       #    (e.g., stigmata, soprani).
    ((      r"trix$", "trices"     , None, True),
     (       r"eau$", "eaux"       , None, True),
     (       r"ieu$", "ieu"        , None, True),
     ( r"([iay])nx$", "\\1nges"    , None, True),
     (        r"en$", "ina"  ,  "en-ina*", True),
     (         r"a$", "ata"  ,   "a-ata*", True),
     (        r"is$", "ides" , "is-ides*", True),
     (        r"us$", "i"    ,    "us-i*", True),
     (        r"us$", "us "  ,   "us-us*", True),
     (         r"o$", "i"    ,     "o-i*", True),
     (          r"$", "i"    ,      "-i*", True),
     (          r"$", "im"   ,     "-im*", True)
    ), # 9) -ch, -sh and -ss take -es in the plural
       #    (e.g., churches, classes).
    ((   r"([cs])h$", "\\1hes"     , None, False),
     (        r"ss$", "sses"       , None, False),
     (         r"x$", "xes"        , None, False)
    ), # 10) -f or -fe sometimes take -ves in the plural
       #     (e.g, lives, wolves).
    (( r"([aeo]l)f$", "\\1ves"     , None, False),
     ( r"([^d]ea)f$", "\\1ves"     , None, False),
     (       r"arf$", "arves"      , None, False),
     (r"([nlw]i)fe$", "\\1ves"     , None, False),
    ), # 11) -y takes -ys if preceded by a vowel, -ies otherwise
       #     (e.g., storeys, Marys, stories).
    ((r"([aeiou])y$", "\\1ys"      , None, False),
     (r"([A-Z].*)y$", "\\1ys"      , None, False),
     (         r"y$", "ies"        , None, False)
    ), # 12) -o sometimes takes -os, -oes otherwise.
       #     -o is preceded by a vowel takes -os
       #     (e.g., lassos, potatoes, bamboos).
    ((         r"o$", "os",        "o-os", False),
     (r"([aeiou])o$", "\\1os"      , None, False),
     (         r"o$", "oes"        , None, False)
    ), # 13) Miltary stuff
       #     (e.g., Major Generals).
    ((         r"l$", "ls", "general-generals", False),
    ), # 14) Assume that the plural takes -s
       #     (cats, programmes, ...).
    ((          r"$", "s"          , None, False),)
]

# For performance, compile the regular expressions once:
plural_rules = [[(re.compile(r[0]), r[1], r[2], r[3]) for r in grp] for grp in plural_rules]

# Suffix categories.
plural_categories = {
    "uninflected": [
        "bison"      , "debris"     , "headquarters" , "news"       , "swine"        ,
        "bream"      , "diabetes"   , "herpes"       , "pincers"    , "trout"        ,
        "breeches"   , "djinn"      , "high-jinks"   , "pliers"     , "tuna"         ,
        "britches"   , "eland"      , "homework"     , "proceedings", "whiting"      ,
        "carp"       , "elk"        , "innings"      , "rabies"     , "wildebeest"   ,
        "chassis"    , "flounder"   , "jackanapes"   , "salmon"     ,
        "clippers"   , "gallows"    , "mackerel"     , "scissors"   ,
        "cod"        , "graffiti"   , "measles"      , "series"     ,
        "contretemps",                "mews"         , "shears"     ,
        "corps"      ,                "mumps"        , "species"
        ],
    "uncountable": [
        "advice"     , "fruit"      , "ketchup"      , "meat"       , "sand"         ,
        "bread"      , "furniture"  , "knowledge"    , "mustard"    , "software"     ,
        "butter"     , "garbage"    , "love"         , "news"       , "understanding",
        "cheese"     , "gravel"     , "luggage"      , "progress"   , "water"        ,
        "electricity", "happiness"  , "mathematics"  , "research"   ,
        "equipment"  , "information", "mayonnaise"   , "rice"
        ],
    "s-singular": [
        "acropolis"  , "caddis"     , "dais"         , "glottis"    , "pathos"       ,
        "aegis"      , "cannabis"   , "digitalis"    , "ibis"       , "pelvis"       ,
        "alias"      , "canvas"     , "epidermis"    , "lens"       , "polis"        ,
        "asbestos"   , "chaos"      , "ethos"        , "mantis"     , "rhinoceros"   ,
        "bathos"     , "cosmos"     , "gas"          , "marquis"    , "sassafras"    ,
        "bias"       ,                "glottis"      , "metropolis" , "trellis"
        ],
    "ex-ices": [
        "codex"      , "murex"      , "silex"
        ],
    "ex-ices*": [
        "apex"       , "index"      , "pontifex"     , "vertex"     ,
        "cortex"     , "latex"      , "simplex"      , "vortex"
        ],
    "um-a": [
        "agendum"    , "candelabrum", "desideratum"  , "extremum"   , "stratum"      ,
        "bacterium"  , "datum"      , "erratum"      , "ovum"
        ],
    "um-a*": [
        "aquarium"   , "emporium"   , "maximum"      , "optimum"    , "stadium"      ,
        "compendium" , "enconium"   , "medium"       , "phylum"     , "trapezium"    ,
        "consortium" , "gymnasium"  , "memorandum"   , "quantum"    , "ultimatum"    ,
        "cranium"    , "honorarium" , "millenium"    , "rostrum"    , "vacuum"       ,
        "curriculum" , "interregnum", "minimum"      , "spectrum"   , "velum"        ,
        "dictum"     , "lustrum"    , "momentum"     , "speculum"
        ],
    "on-a": [
        "aphelion"   , "hyperbaton" , "perihelion"   ,
        "asyndeton"  , "noumenon"   , "phenomenon"   ,
        "criterion"  , "organon"    , "prolegomenon"
        ],
    "a-ae": [
        "alga"       , "alumna"     , "vertebra"
        ],
    "a-ae*": [
        "abscissa"   , "aurora"     , "hyperbola"    , "nebula"     ,
        "amoeba"     , "formula"    , "lacuna"       , "nova"       ,
        "antenna"    , "hydra"      , "medusa"       , "parabola"
        ],
    "en-ina*": [
        "foramen"    , "lumen"      , "stamen"
    ],
    "a-ata*": [
        "anathema"   , "dogma"      , "gumma"        , "miasma"     , "stigma"       ,
        "bema"       , "drama"      , "lemma"        , "schema"     , "stoma"        ,
        "carcinoma"  , "edema"      , "lymphoma"     , "oedema"     , "trauma"       ,
        "charisma"   , "enema"      , "magma"        , "sarcoma"    ,
        "diploma"    , "enigma"     , "melisma"      , "soma"       ,
        ],
    "is-ides*": [
        "clitoris"   , "iris"
        ],
    "us-i*": [
        "focus"      , "nimbus"     , "succubus"     ,
        "fungus"     , "nucleolus"  , "torus"        ,
        "genius"     , "radius"     , "umbilicus"    ,
        "incubus"    , "stylus"     , "uterus"
        ],
    "us-us*": [
        "apparatus"  , "hiatus"     , "plexus"       , "status" ,
        "cantus"     , "impetus"    , "prospectus"   ,
        "coitus"     , "nexus"      , "sinus"        ,
        ],
    "o-i*": [
        "alto"       , "canto"      , "crescendo"    , "soprano"    ,
        "basso"      , "contralto"  , "solo"         , "tempo"
        ],
    "-i*": [
        "afreet"     , "afrit"      , "efreet"
        ],
    "-im*": [
        "cherub"     , "goy"        , "seraph"
        ],
    "o-os": [
        "albino"     , "dynamo"     , "guano"        , "lumbago"    , "photo"        ,
        "archipelago", "embryo"     , "inferno"      , "magneto"    , "pro"          ,
        "armadillo"  , "fiasco"     , "jumbo"        , "manifesto"  , "quarto"       ,
        "commando"   , "generalissimo",                "medico"     , "rhino"        ,
        "ditto"      , "ghetto"     , "lingo"        , "octavo"     , "stylo"
        ],
    "general-generals": [
        "Adjutant"   , "Brigadier"  , "Lieutenant"   , "Major"      , "Quartermaster",
        "adjutant"   , "brigadier"  , "lieutenant"   , "major"      , "quartermaster"
        ]
}


[docs]@maybe_lru_cached(maxsize=256) def pluralize(word, pos=NOUN, custom=None, classical=True): """ Returns the plural of a given word, e.g., child => children. Handles nouns and adjectives, using classical inflection by default (i.e., where "matrix" pluralizes to "matrices" and not "matrixes"). The custom dictionary is for user-defined replacements. """ if custom and word in custom: return custom[word] # Recurse genitives. # Remove the apostrophe and any trailing -s, # form the plural of the resultant noun, and then append an apostrophe (dog's => dogs'). if word.endswith(("'", "'s")): w = word.rstrip("'s") w = pluralize(w, pos, custom, classical) if w.endswith("s"): return w + "'" else: return w + "'s" # Recurse compound words # (e.g., Postmasters General, mothers-in-law, Roman deities). w = word.replace("-", " ").split(" ") if len(w) > 1: if w[1] == "general" or \ w[1] == "General" and \ w[0] not in plural_categories["general-generals"]: return word.replace(w[0], pluralize(w[0], pos, custom, classical)) elif w[1] in plural_prepositions: return word.replace(w[0], pluralize(w[0], pos, custom, classical)) else: return word.replace(w[-1], pluralize(w[-1], pos, custom, classical)) # Only a very few number of adjectives inflect. n = range(len(plural_rules)) if pos.startswith(ADJECTIVE): n = [0, 1] # Apply pluralization rules. for i in n: for suffix, inflection, category, classic in plural_rules[i]: # A general rule, or a classic rule in classical mode. if category is None: if not classic or (classic and classical): if suffix.search(word) is not None: return suffix.sub(inflection, word) # A rule pertaining to a specific category of words. if category is not None: if word in plural_categories[category] and (not classic or (classic and classical)): if suffix.search(word) is not None: return suffix.sub(inflection, word) return word
#### SINGULARIZE ################################################################################### # Adapted from Bermi Ferrer's Inflector for Python: # http://www.bermi.org/inflector/ # Copyright (c) 2006 Bermi Ferrer Martinez # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software to deal in this software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of this software, and to permit # persons to whom this software is furnished to do so, subject to the following # condition: # # THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN # THIS SOFTWARE. singular_rules = [ (r'(?i)(.)ae$' , '\\1a' ), (r'(?i)(.)itis$' , '\\1itis' ), (r'(?i)(.)eaux$' , '\\1eau' ), (r'(?i)(quiz)zes$' , '\\1' ), (r'(?i)(matr)ices$' , '\\1ix' ), (r'(?i)(ap|vert|ind)ices$', '\\1ex' ), (r'(?i)^(ox)en' , '\\1' ), (r'(?i)(alias|status)es$' , '\\1' ), (r'(?i)([octop|vir])i$' , '\\1us' ), (r'(?i)(cris|ax|test)es$' , '\\1is' ), (r'(?i)(shoe)s$' , '\\1' ), (r'(?i)(o)es$' , '\\1' ), (r'(?i)(bus)es$' , '\\1' ), (r'(?i)([m|l])ice$' , '\\1ouse' ), (r'(?i)(x|ch|ss|sh)es$' , '\\1' ), (r'(?i)(m)ovies$' , '\\1ovie' ), (r'(?i)(.)ombies$' , '\\1ombie'), (r'(?i)(s)eries$' , '\\1eries'), (r'(?i)([^aeiouy]|qu)ies$', '\\1y' ), # -f, -fe sometimes take -ves in the plural # (e.g., lives, wolves). (r"([aeo]l)ves$" , "\\1f" ), (r"([^d]ea)ves$" , "\\1f" ), (r"arves$" , "arf" ), (r"erves$" , "erve" ), (r"([nlw]i)ves$" , "\\1fe" ), (r'(?i)([lr])ves$' , '\\1f' ), (r"([aeo])ves$" , "\\1ve" ), (r'(?i)(sive)s$' , '\\1' ), (r'(?i)(tive)s$' , '\\1' ), (r'(?i)(hive)s$' , '\\1' ), (r'(?i)([^f])ves$' , '\\1fe' ), # -ses suffixes. (r'(?i)(^analy)ses$' , '\\1sis' ), (r'(?i)((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$', '\\1\\2sis'), (r'(?i)(.)opses$' , '\\1opsis'), (r'(?i)(.)yses$' , '\\1ysis' ), (r'(?i)(h|d|r|o|n|b|cl|p)oses$', '\\1ose'), (r'(?i)(fruct|gluc|galact|lact|ket|malt|rib|sacchar|cellul)ose$', '\\1ose'), (r'(?i)(.)oses$' , '\\1osis' ), # -a (r'(?i)([ti])a$' , '\\1um' ), (r'(?i)(n)ews$' , '\\1ews' ), (r'(?i)s$' , '' ), ] # For performance, compile the regular expressions only once: singular_rules = [(re.compile(r[0]), r[1]) for r in singular_rules] singular_uninflected = set(( "bison" , "debris" , "headquarters", "pincers" , "trout" , "bream" , "diabetes" , "herpes" , "pliers" , "tuna" , "breeches" , "djinn" , "high-jinks" , "proceedings", "whiting" , "britches" , "eland" , "homework" , "rabies" , "wildebeest", "carp" , "elk" , "innings" , "salmon" , "chassis" , "flounder" , "jackanapes" , "scissors" , "christmas" , "gallows" , "mackerel" , "series" , "clippers" , "georgia" , "measles" , "shears" , "cod" , "graffiti" , "mews" , "species" , "contretemps", "mumps" , "swine" , "corps" , "news" , "swiss" , )) singular_uncountable = set(( "advice" , "equipment", "happiness" , "luggage" , "news" , "software" , "bread" , "fruit" , "information" , "mathematics", "progress" , "understanding", "butter" , "furniture", "ketchup" , "mayonnaise" , "research" , "water" , "cheese" , "garbage" , "knowledge" , "meat" , "rice" , "electricity", "gravel" , "love" , "mustard" , "sand" , )) singular_ie = set(( "alergie" , "cutie" , "hoagie" , "newbie" , "softie" , "veggie" , "auntie" , "doggie" , "hottie" , "nightie" , "sortie" , "weenie" , "beanie" , "eyrie" , "indie" , "oldie" , "stoolie" , "yuppie" , "birdie" , "freebie" , "junkie" , "^pie" , "sweetie" , "zombie" , "bogie" , "goonie" , "laddie" , "pixie" , "techie" , "bombie" , "groupie" , "laramie" , "quickie" , "^tie" , "collie" , "hankie" , "lingerie" , "reverie" , "toughie" , "cookie" , "hippie" , "meanie" , "rookie" , "valkyrie" , )) singular_irregular = { "atlantes": "atlas", "atlases": "atlas", "axes": "axe", "beeves": "beef", "brethren": "brother", "children": "child", "corpora": "corpus", "corpuses": "corpus", "ephemerides": "ephemeris", "feet": "foot", "ganglia": "ganglion", "geese": "goose", "genera": "genus", "genii": "genie", "graffiti": "graffito", "helves": "helve", "kine": "cow", "leaves": "leaf", "loaves": "loaf", "men": "man", "mongooses": "mongoose", "monies": "money", "moves": "move", "mythoi": "mythos", "numena": "numen", "occipita": "occiput", "octopodes": "octopus", "opera": "opus", "opuses": "opus", "our": "my", "oxen": "ox", "penes": "penis", "penises": "penis", "people": "person", "sexes": "sex", "soliloquies": "soliloquy", "teeth": "tooth", "testes": "testis", "trilbys": "trilby", "turves": "turf", "zoa": "zoon", }
[docs]@maybe_lru_cached(maxsize=256) def singularize(word, pos=NOUN, custom=None): """ Returns the singular of a given word. """ if custom and word in custom: return custom[word] # Recurse compound words (e.g. mothers-in-law). if "-" in word: w = word.split("-") if len(w) > 1 and w[1] in plural_prepositions: return singularize(w[0], pos, custom) + "-" + "-".join(w[1:]) # dogs' => dog's if word.endswith("'"): return singularize(word[:-1]) + "'s" w = word.lower() for x in singular_uninflected: if x.endswith(w): return word for x in singular_uncountable: if x.endswith(w): return word for x in singular_ie: if w.endswith(x + "s"): return w[:-1] for x in singular_irregular: if w.endswith(x): return re.sub('(?i)' + x + '$', singular_irregular[x], word) for suffix, inflection in singular_rules: m = suffix.search(word) g = m and m.groups() or [] if m: for k in range(len(g)): if g[k] is None: inflection = inflection.replace('\\' + str(k + 1), '') return suffix.sub(inflection, word) return word
__all__ = [ 'de_camel', 'pluralize', 'singularize', ]