diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..55170d0 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,20 @@ +# http://editorconfig.org + +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{py,rst,ini}] +indent_style = space +indent_size = 4 + +[*.{html,json,yml}] +indent_style = space +indent_size = 2 + +[*.md] +trim_trailing_whitespace = false diff --git a/.gitignore b/.gitignore index c3fe42b..3874fab 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ build .coverage dist .idea +Pipfile +Pipfile.lock # docs docs/_* diff --git a/docs/customize.rst b/docs/customize.rst index 6097050..1e4f38d 100644 --- a/docs/customize.rst +++ b/docs/customize.rst @@ -57,6 +57,8 @@ Other editable attributes * :py:obj:`~nameparser.config.Constants.string_format` - controls output from `str()` * :py:obj:`~nameparser.config.Constants.empty_attribute_default` - value returned by empty attributes, defaults to empty string +* :py:obj:`~nameparser.config.Constants.capitalize_name` - If set, applies :py:meth:`~nameparser.parser.HumanName.capitalize` to :py:class:`~nameparser.parser.HumanName` instance. +* :py:obj:`~nameparser.config.Constants.force_mixed_case_capitalization` - If set, forces the capitalization of mixed case strings when :py:meth:`~nameparser.parser.HumanName.capitalize` is called. diff --git a/docs/usage.rst b/docs/usage.rst index 45f67a4..6a65c4e 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -72,9 +72,8 @@ Capitalization Support The HumanName class can try to guess the correct capitalization of name entered in all upper or lower case. By default, it will not adjust -the case of names entered in mixed case. To run capitalization on all names -pass the parameter `force=True`. - +the case of names entered in mixed case. To run capitalization on a +`HumanName` instance, pass the parameter `force=True`. Capitalize the name. @@ -94,6 +93,31 @@ pass the parameter `force=True`. >>> str(name) 'Shirley MacLaine' +To apply capitalization to all `HumanName` instances, set +:py:attr:`~nameparser.config.Constants.capitalize_name` to `True`. + +.. doctest:: capitalize_name + :options: +NORMALIZE_WHITESPACE + + >>> from nameparser.config import CONSTANTS + >>> CONSTANTS.capitalize_name = True + >>> name = HumanName("bob v. de la macdole-eisenhower phd") + >>> str(name) + 'Bob V. de la MacDole-Eisenhower Ph.D.' + +To force the capitalization of mixed case strings on all `HumanName` instances, +set :py:attr:`~nameparser.config.Constants.force_mixed_case_capitalization` to `True`. + +.. doctest:: force_mixed_case_capitalization + :options: +NORMALIZE_WHITESPACE + + >>> from nameparser.config import CONSTANTS + >>> CONSTANTS.force_mixed_case_capitalization = True + >>> name = HumanName('Shirley Maclaine') + >>> name.capitalize() + >>> str(name) + 'Shirley MacLaine' + Nickname Handling ------------------ diff --git a/nameparser/config/__init__.py b/nameparser/config/__init__.py index b9d6e1e..602afe8 100644 --- a/nameparser/config/__init__.py +++ b/nameparser/config/__init__.py @@ -179,8 +179,37 @@ class Constants(object): 'John' """ - - + capitalize_name = False + """ + If set, applies :py:meth:`~nameparser.parser.HumanName.capitalize` to + :py:class:`~nameparser.parser.HumanName` instance. + + .. doctest:: + + >>> from nameparser.config import CONSTANTS + >>> CONSTANTS.capitalize_name = True + >>> name = HumanName("bob v. de la macdole-eisenhower phd") + >>> str(name) + 'Bob V. de la MacDole-Eisenhower Ph.D.' + + """ + force_mixed_case_capitalization = False + """ + If set, forces the capitalization of mixed case strings when + :py:meth:`~nameparser.parser.HumanName.capitalize` is called. + + .. doctest:: + + >>> from nameparser.config import CONSTANTS + >>> CONSTANTS.force_mixed_case_capitalization = True + >>> name = HumanName('Shirley Maclaine') + >>> name.capitalize() + >>> str(name) + 'Shirley MacLaine' + + """ + + def __init__(self, prefixes=PREFIXES, suffix_acronyms=SUFFIX_ACRONYMS, diff --git a/nameparser/config/capitalization.py b/nameparser/config/capitalization.py index 4aa3214..84dfbef 100644 --- a/nameparser/config/capitalization.py +++ b/nameparser/config/capitalization.py @@ -2,11 +2,11 @@ from __future__ import unicode_literals CAPITALIZATION_EXCEPTIONS = ( - ('ii' ,'II'), - ('iii','III'), - ('iv' ,'IV'), - ('md' ,'M.D.'), - ('phd','Ph.D.'), + ('ii', 'II'), + ('iii', 'III'), + ('iv', 'IV'), + ('md', 'M.D.'), + ('phd', 'Ph.D.'), ) """ Any pieces that are not capitalized by capitalizing the first letter. diff --git a/nameparser/config/prefixes.py b/nameparser/config/prefixes.py index 542ea03..2f5eb31 100644 --- a/nameparser/config/prefixes.py +++ b/nameparser/config/prefixes.py @@ -4,7 +4,7 @@ #: Name pieces that appear before a last name. Prefixes join to the piece #: that follows them to make one new piece. They can be chained together, e.g #: "von der" and "de la". Because they only appear in middle or last names, -#: they also signifiy that all following name pieces should be in the same name +#: they also signify that all following name pieces should be in the same name #: part, for example, "von" will be joined to all following pieces that are not #: prefixes or suffixes, allowing recognition of double last names when they #: appear after a prefixes. So in "pennie von bergen wessels MD", "von" will diff --git a/nameparser/parser.py b/nameparser/parser.py index a2dfa8a..e90c99b 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -387,7 +387,7 @@ def pre_process(self): This method happens at the beginning of the :py:func:`parse_full_name` before any other processing of the string aside from unicode normalization, so it's a good place to do any custom handling in a - subclass. Runs :py:func:`parse_nicknames` and py:func:`squash_emoji`. + subclass. Runs :py:func:`parse_nicknames` and :py:func:`squash_emoji`. """ self.fix_phd() @@ -397,9 +397,11 @@ def pre_process(self): def post_process(self): """ This happens at the end of the :py:func:`parse_full_name` after - all other processing has taken place. Runs :py:func:`handle_firstnames`. + all other processing has taken place. Runs :py:func:`handle_firstnames` + and :py:func:`handle_capitalization`. """ self.handle_firstnames() + self.handle_capitalization() def fix_phd(self): _re = self.C.regexes.phd @@ -675,9 +677,9 @@ def join_on_conjunctions(self, pieces, additional_parts_count=0): :param list pieces: name pieces strings after split on spaces :param int additional_parts_count: :return: new list with piece next to conjunctions merged into one piece - with spaces in it. + with spaces in it. :rtype: list - + """ length = len(pieces) + additional_parts_count # don't join on conjunctions if there's only 2 parts @@ -833,14 +835,16 @@ def cap_piece(self, piece, attribute): replacement = lambda m: self.cap_word(m.group(0), attribute) return self.C.regexes.word.sub(replacement, piece) - def capitalize(self, force=False): + def capitalize(self, force=None): """ The HumanName class can try to guess the correct capitalization of name entered in all upper or lower case. By default, it will not adjust the case of names entered in mixed case. To run capitalization on all names pass the parameter `force=True`. - :param bool force: force capitalization of strings that include mixed case + :param bool force: Forces capitalization of mixed case strings. This + parameter overrides rules set within + :py:class:`~nameparser.config.CONSTANTS`. **Usage** @@ -861,6 +865,9 @@ def capitalize(self, force=False): """ name = u(self) + force = self.C.force_mixed_case_capitalization \ + if force is None else force + if not force and not (name == name.upper() or name == name.lower()): return self.title_list = self.cap_piece(self.title , 'title').split(' ') @@ -868,3 +875,11 @@ def capitalize(self, force=False): self.middle_list = self.cap_piece(self.middle, 'middle').split(' ') self.last_list = self.cap_piece(self.last , 'last').split(' ') self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ') + + def handle_capitalization(self): + """ + Handles capitalization configurations set within + :py:class:`~nameparser.config.CONSTANTS`. + """ + if self.C.capitalize_name: + self.capitalize() diff --git a/tests.py b/tests.py index 5e2ddab..0e948c7 100644 --- a/tests.py +++ b/tests.py @@ -40,7 +40,7 @@ class HumanNameTestBase(unittest.TestCase): def m(self, actual, expected, hn): - """assertEquals with a better message and awareness of hn.C.empty_attribute_default""" + """assertEqual with a better message and awareness of hn.C.empty_attribute_default""" expected = expected or hn.C.empty_attribute_default try: self.assertEqual(actual, expected, "'%s' != '%s' for '%s'\n%r" % ( @@ -50,7 +50,7 @@ def m(self, actual, expected, hn): hn )) except UnicodeDecodeError: - self.assertEquals(actual, expected) + self.assertEqual(actual, expected) class HumanNamePythonTests(HumanNameTestBase): @@ -62,8 +62,6 @@ def test_utf8(self): def test_string_output(self): hn = HumanName("de la Véña, Jüan") - print(hn) - print(repr(hn)) def test_escaped_utf8_bytes(self): hn = HumanName(b'B\xc3\xb6ck, Gerald') @@ -1267,7 +1265,7 @@ class ConstantsCustomization(HumanNameTestBase): def test_add_title(self): hn = HumanName("Te Awanui-a-Rangi Black", constants=None) start_len = len(hn.C.titles) - self.assert_(start_len > 0) + self.assertTrue(start_len > 0) hn.C.titles.add('te') self.assertEqual(start_len + 1, len(hn.C.titles)) hn.parse_full_name() @@ -1278,7 +1276,7 @@ def test_add_title(self): def test_remove_title(self): hn = HumanName("Hon Solo", constants=None) start_len = len(hn.C.titles) - self.assert_(start_len > 0) + self.assertTrue(start_len > 0) hn.C.titles.remove('hon') self.assertEqual(start_len - 1, len(hn.C.titles)) hn.parse_full_name() @@ -2090,6 +2088,28 @@ def test_formatting_constants_attribute(self): self.assertEqual(u(hn), "TEST2") CONSTANTS.string_format = _orig + def test_capitalize_name_constants_attribute(self): + from nameparser.config import CONSTANTS + CONSTANTS.capitalize_name = True + hn = HumanName("bob v. de la macdole-eisenhower phd") + self.assertEqual(str(hn), "Bob V. de la MacDole-Eisenhower Ph.D.") + CONSTANTS.capitalize_name = False + + def test_force_mixed_case_capitalization_constants_attribute(self): + from nameparser.config import CONSTANTS + CONSTANTS.force_mixed_case_capitalization = True + hn = HumanName('Shirley Maclaine') + hn.capitalize() + self.assertEqual(str(hn), "Shirley MacLaine") + CONSTANTS.force_mixed_case_capitalization = False + + def test_capitalize_name_and_force_mixed_case_capitalization_constants_attributes(self): + from nameparser.config import CONSTANTS + CONSTANTS.capitalize_name = True + CONSTANTS.force_mixed_case_capitalization = True + hn = HumanName('Shirley Maclaine') + self.assertEqual(str(hn), "Shirley MacLaine") + def test_quote_nickname_formating(self): hn = HumanName("Rev John A. Kenneth Doe III (Kenny)") hn.string_format = "{title} {first} {middle} {last} {suffix} '{nickname}'"