"""
Overview:
Useful utilities for word inflections.
Extended based on `jpvanhal/inflection <https://github.com/jpvanhal/inflection>`_.
"""
import re
import unicodedata
__all__ = [
'camelize',
'dasherize',
'humanize',
'ordinal',
'ordinalize',
'parameterize',
'pluralize',
'singularize',
'tableize',
'titleize',
'transliterate',
'underscore',
]
PLURALS = [
(r"(?i)(quiz)$", r'\1zes'),
(r"(?i)^(oxen)$", r'\1'),
(r"(?i)^(ox)$", r'\1en'),
(r"(?i)(m|l)ice$", r'\1ice'),
(r"(?i)(m|l)ouse$", r'\1ice'),
(r"(?i)(passer)s?by$", r'\1sby'),
(r"(?i)(matr|vert|ind)(?:ix|ex)$", r'\1ices'),
(r"(?i)(x|ch|ss|sh)$", r'\1es'),
(r"(?i)([^aeiouy]|qu)y$", r'\1ies'),
(r"(?i)(hive)$", r'\1s'),
(r"(?i)([lr])f$", r'\1ves'),
(r"(?i)([^f])fe$", r'\1ves'),
(r"(?i)sis$", 'ses'),
(r"(?i)([ti])a$", r'\1a'),
(r"(?i)([ti])um$", r'\1a'),
(r"(?i)(buffal|potat|tomat)o$", r'\1oes'),
(r"(?i)(bu)s$", r'\1ses'),
(r"(?i)(alias|status)$", r'\1es'),
(r"(?i)(octop|vir)i$", r'\1i'),
(r"(?i)(octop|vir)us$", r'\1i'),
(r"(?i)^(ax|test)is$", r'\1es'),
(r"(?i)s$", 's'),
(r"$", 's'),
]
SINGULARS = [
(r"(?i)(database)s$", r'\1'),
(r"(?i)(quiz)zes$", r'\1'),
(r"(?i)(matr)ices$", r'\1ix'),
(r"(?i)(vert|ind)ices$", r'\1ex'),
(r"(?i)(passer)sby$", r'\1by'),
(r"(?i)^(ox)en", r'\1'),
(r"(?i)(alias|status)(es)?$", r'\1'),
(r"(?i)(octop|vir)(us|i)$", r'\1us'),
(r"(?i)^(a)x[ie]s$", r'\1xis'),
(r"(?i)(cris|test)(is|es)$", r'\1is'),
(r"(?i)(shoe)s$", r'\1'),
(r"(?i)(o)es$", r'\1'),
(r"(?i)(bus)(es)?$", r'\1'),
(r"(?i)(m|l)ice$", r'\1ouse'),
(r"(?i)(x|ch|ss|sh)es$", r'\1'),
(r"(?i)(m)ovies$", r'\1ovie'),
(r"(?i)(s)eries$", r'\1eries'),
(r"(?i)([^aeiouy]|qu)ies$", r'\1y'),
(r"(?i)([lr])ves$", r'\1f'),
(r"(?i)(tive)s$", r'\1'),
(r"(?i)(hive)s$", r'\1'),
(r"(?i)([^f])ves$", r'\1fe'),
(r"(?i)(t)he(sis|ses)$", r"\1hesis"),
(r"(?i)(s)ynop(sis|ses)$", r"\1ynopsis"),
(r"(?i)(p)rogno(sis|ses)$", r"\1rognosis"),
(r"(?i)(p)arenthe(sis|ses)$", r"\1arenthesis"),
(r"(?i)(d)iagno(sis|ses)$", r"\1iagnosis"),
(r"(?i)(b)a(sis|ses)$", r"\1asis"),
(r"(?i)(a)naly(sis|ses)$", r"\1nalysis"),
(r"(?i)([ti])a$", r'\1um'),
(r"(?i)(n)ews$", r'\1ews'),
(r"(?i)(ss)$", r'\1'),
(r"(?i)s$", ''),
]
UNCOUNTABLES = {
'equipment',
'fish',
'information',
'jeans',
'money',
'rice',
'series',
'sheep',
'species'
}
def _irregular(singular: str, plural: str, *plurals: str) -> None:
"""
A convenience function to add appropriate rules to plurals and singular
for irregular words.
:param singular: irregular word in singular form (such as `it`)
:param plural: irregular word in plural form (such as `they`)
:param plurals: extended words in plural form (such as `them`)
"""
def caseinsensitive(string: str) -> str:
return ''.join('[' + char + char.upper() + ']' for char in string)
def _register_singular(singular_: str, plural_: str) -> None:
if singular_[0].upper() == plural_[0].upper():
SINGULARS.insert(0, (
r"(?i)({}){}$".format(plural_[0], plural_[1:]),
r'\1' + singular_[1:]
))
else:
SINGULARS.insert(0, (
r"{}{}$".format(plural_[0].upper(), caseinsensitive(plural_[1:])),
singular_[0].upper() + singular_[1:]
))
SINGULARS.insert(0, (
r"{}{}$".format(plural_[0].lower(), caseinsensitive(plural_[1:])),
singular_[0].lower() + singular_[1:]
))
def _register_plural(singular_: str, plural_: str) -> None:
if singular_[0].upper() == plural_[0].upper():
PLURALS.insert(0, (
r"(?i)({}){}$".format(singular_[0], singular_[1:]),
r'\1' + plural_[1:]
))
PLURALS.insert(0, (
r"(?i)({}){}$".format(plural_[0], plural_[1:]),
r'\1' + plural_[1:]
))
else:
PLURALS.insert(0, (
r"{}{}$".format(singular_[0].upper(),
caseinsensitive(singular_[1:])),
plural_[0].upper() + plural_[1:]
))
PLURALS.insert(0, (
r"{}{}$".format(singular_[0].lower(),
caseinsensitive(singular_[1:])),
plural_[0].lower() + plural_[1:]
))
PLURALS.insert(0, (
r"{}{}$".format(plural_[0].upper(), caseinsensitive(plural_[1:])),
plural_[0].upper() + plural_[1:]
))
PLURALS.insert(0, (
r"{}{}$".format(plural_[0].lower(), caseinsensitive(plural_[1:])),
plural_[0].lower() + plural_[1:]
))
pass
_register_plural(singular, plural)
for p in [plural, *plurals]:
_register_singular(singular, p)
[docs]def camelize(string: str, uppercase_first_letter: bool = True) -> str:
"""
Overview:
Convert strings to CamelCase.
:param string: Original string.
:param uppercase_first_letter: if set to `True` :func:`camelize` converts \
strings to UpperCamelCase. If set to `False` :func:`camelize` produces \
lowerCamelCase. Defaults to `True`.
Examples::
>>> camelize("device_type")
'DeviceType'
>>> camelize("device_type", False)
'deviceType'
.. note::
:func:`camelize` can be thought of as a inverse of :func:`underscore`,
although there are some cases where that does not hold::
>>> camelize(underscore("IOError"))
'IoError'
"""
if uppercase_first_letter:
return re.sub(r"(?:^|_)(.)", lambda m: m.group(1).upper(), string)
else:
return string[0].lower() + camelize(string)[1:]
[docs]def dasherize(word: str) -> str:
"""
Overview:
Replace underscores with dashes in the string.
:param word: Original word.
Example::
>>> dasherize("puni_puni")
'puni-puni'
"""
return word.replace('_', '-')
[docs]def humanize(word: str) -> str:
"""
Overview:
Capitalize the first word and turn underscores into spaces and strip a
trailing ``"_id"``, if any. Like :func:`titleize`, this is meant for
creating pretty output.
:param word: Original word.
Examples::
>>> humanize("employee_salary")
'Employee salary'
>>> humanize("author_id")
'Author'
"""
word = re.sub(r"_id$", "", word)
word = word.replace('_', ' ')
word = re.sub(r"(?i)([a-z\d]*)", lambda m: m.group(1).lower(), word)
word = re.sub(r"^\w", lambda m: m.group(0).upper(), word)
return word
[docs]def ordinal(number: int) -> str:
"""
Overview:
Return the suffix that should be added to a number to denote the position
in an ordered sequence such as 1st, 2nd, 3rd, 4th.
:param number: Int format number.
Examples::
>>> ordinal(1)
'st'
>>> ordinal(2)
'nd'
>>> ordinal(1002)
'nd'
>>> ordinal(1003)
'rd'
>>> ordinal(-11)
'th'
>>> ordinal(-1021)
'st'
"""
number = abs(int(number))
if number % 100 in (11, 12, 13):
return "th"
else:
return {
1: "st",
2: "nd",
3: "rd",
}.get(number % 10, "th")
[docs]def ordinalize(number: int) -> str:
"""
Overview:
Turn a number into an ordinal string used to denote the position in an
ordered sequence such as 1st, 2nd, 3rd, 4th.
:param number: Int format number.
Examples::
>>> ordinalize(1)
'1st'
>>> ordinalize(2)
'2nd'
>>> ordinalize(1002)
'1002nd'
>>> ordinalize(1003)
'1003rd'
>>> ordinalize(-11)
'-11th'
>>> ordinalize(-1021)
'-1021st'
"""
return "{}{}".format(number, ordinal(number))
[docs]def parameterize(string: str, separator: str = '-') -> str:
"""
Overview:
Replace special characters in a string so that it may be used as part of a
'pretty' URL.
:param string: Original string.
:param separator: Separator of parameter words.
Example::
>>> parameterize(u"Donald E. Knuth")
'donald-e-knuth'
"""
string = transliterate(string)
# Turn unwanted chars into the separator
string = re.sub(r"(?i)[^a-z0-9\-_]+", separator, string)
if separator:
re_sep = re.escape(separator)
# No more than one of the separator in a row.
string = re.sub(r'%s{2,}' % re_sep, separator, string)
# Remove leading/trailing separator.
string = re.sub(r"(?i)^{sep}|{sep}$".format(sep=re_sep), '', string)
return string.lower()
[docs]def pluralize(word: str) -> str:
"""
Overview:
Return the plural form of a word.
:param word: Original word.
Examples::
>>> pluralize("posts")
'posts'
>>> pluralize("octopus")
'octopi'
>>> pluralize("sheep")
'sheep'
>>> pluralize("CamelOctopus")
'CamelOctopi'
"""
if not word or word.lower() in UNCOUNTABLES:
return word
else:
for rule, replacement in PLURALS:
if re.search(rule, word):
return re.sub(rule, replacement, word)
return word
[docs]def singularize(word: str) -> str:
"""
Overview:
Return the singular form of a word, the reverse of :func:`pluralize`.
:param word: Original word.
Examples::
>>> singularize("posts")
'post'
>>> singularize("octopi")
'octopus'
>>> singularize("sheep")
'sheep'
>>> singularize("word")
'word'
>>> singularize("CamelOctopi")
'CamelOctopus'
"""
for inflection in UNCOUNTABLES:
if re.search(r'(?i)\b(%s)\Z' % inflection, word):
return word
for rule, replacement in SINGULARS:
if re.search(rule, word):
return re.sub(rule, replacement, word)
return word
[docs]def tableize(word: str) -> str:
"""
Overview:
Create the name of a table like Rails does for models to table names. This
method uses the :func:`pluralize` method on the last word in the string.
:param word: Original word.
Examples::
>>> tableize('RawScaledScorer')
'raw_scaled_scorers'
>>> tableize('egg_and_ham')
'egg_and_hams'
>>> tableize('fancyCategory')
'fancy_categories'
"""
return pluralize(underscore(word))
[docs]def titleize(word: str) -> str:
"""
Overview:
Capitalize all the words and replace some characters in the string to
create a nicer looking title. :func:`titleize` is meant for creating pretty
output.
:param word: Original word.
Examples::
>>> titleize("man from the boondocks")
'Man From The Boondocks'
>>> titleize("x-men: the last stand")
'X Men: The Last Stand'
>>> titleize("TheManWithoutAPast")
'The Man Without A Past'
>>> titleize("raiders_of_the_lost_ark")
'Raiders Of The Lost Ark'
"""
return re.sub(
r"\b('?\w)",
lambda match: match.group(1).capitalize(),
humanize(underscore(word)).title()
)
[docs]def transliterate(string: str) -> str:
"""
Overview:
Replace non-ASCII characters with an ASCII approximation. If no
approximation exists, the non-ASCII character is ignored. The string must
be ``unicode``.
:param string: Original string.
Examples::
>>> transliterate('älämölö')
'alamolo'
>>> transliterate('Ærøskøbing')
'rskbing'
"""
normalized = unicodedata.normalize('NFKD', string)
return normalized.encode('ascii', 'ignore').decode('ascii')
[docs]def underscore(word: str) -> str:
"""
Overview:
Make an underscored, lowercase form from the expression in the string.
:param word: Original word.
Example::
>>> underscore("DeviceType")
'device_type'
.. note::
As a rule of thumb you can think of :func:`underscore` as the inverse of
:func:`camelize`, though there are cases where that does not hold::
>>> camelize(underscore("IOError"))
'IoError'
"""
word = re.sub(r"([A-Z]+)([A-Z][a-z])", r'\1_\2', word)
word = re.sub(r"([a-z\d])([A-Z])", r'\1_\2', word)
word = word.replace("-", "_")
return word.lower()
_irregular('person', 'people')
_irregular('man', 'men')
_irregular('human', 'humans')
_irregular('child', 'children')
_irregular('sex', 'sexes')
_irregular('move', 'moves')
_irregular('cow', 'kine')
_irregular('zombie', 'zombies')
# self added patterns
_irregular('it', 'they', 'them')
_irregular('this', 'these')
_irregular('that', 'those')