Remove the u function and u literals

2025-12-06 04:44:29 +08:00 · 2020-07-24 01:39:03 +02:00
parent 020b2861df
commit 164489cf97
11 changed files with 49 additions and 66 deletions
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -43,8 +43,8 @@ source_encoding = 'utf-8'
 master_doc = 'index'
 # General information about the project.
-project = u'parso'
+project = 'parso'
-copyright = u'parso contributors'
+copyright = 'parso contributors'
 import parso
 from parso.utils import version_info
@@ -200,8 +200,8 @@ latex_elements = {
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-    ('index', 'parso.tex', u'parso documentation',
+    ('index', 'parso.tex', 'parso documentation',
-     u'parso contributors', 'manual'),
+     'parso contributors', 'manual'),
 ]
 # The name of an image file (relative to this directory) to place at the top of
@@ -230,8 +230,8 @@ latex_documents = [
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'parso', u'parso Documentation',
+    ('index', 'parso', 'parso Documentation',
-     [u'parso contributors'], 1)
+     ['parso contributors'], 1)
 ]
 # If true, show URL addresses after external links.
@@ -244,8 +244,8 @@ man_pages = [
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    ('index', 'parso', u'parso documentation',
+    ('index', 'parso', 'parso documentation',
-     u'parso contributors', 'parso', 'Awesome Python autocompletion library.',
+     'parso contributors', 'parso', 'Awesome Python autocompletion library.',
     'Miscellaneous'),
 ]
--- a/parso/_compatibility.py
+++ b/parso/_compatibility.py
@@ -2,7 +2,6 @@
 To ensure compatibility from Python ``2.7`` - ``3.3``, a module has been
 created. Clearly there is huge need to use conforming syntax.
 """
 import sys
 import platform
 # unicode function
@@ -12,17 +11,3 @@ except NameError:
    unicode = str
 is_pypy = platform.python_implementation() == 'PyPy'
 def u(string):
    """Cast to unicode DAMMIT!
    Written because Python2 repr always implicitly casts to a string, so we
    have to cast back to a unicode (and we know that we always deal with valid
    unicode, because we check that in the beginning).
    """
    if sys.version_info.major >= 3:
        return str(string)
    if not isinstance(string, unicode):
        return unicode(str(string), 'UTF-8')
    return string
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -145,7 +145,7 @@ def _create_token_collection(version_info):
        # just use this one.
        Name = r'(\w+)'
    else:
-        Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'
+        Name = '([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'
    if version_info >= (3, 6):
        Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
--- a/parso/utils.py
+++ b/parso/utils.py
@@ -10,15 +10,15 @@ from parso._compatibility import unicode
 # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed,
 # 0xA) are allowed to split lines.
 _NON_LINE_BREAKS = (
-    u'\v',  # Vertical Tabulation 0xB
+    '\v',  # Vertical Tabulation 0xB
-    u'\f',  # Form Feed 0xC
+    '\f',  # Form Feed 0xC
-    u'\x1C',  # File Separator
+    '\x1C',  # File Separator
-    u'\x1D',  # Group Separator
+    '\x1D',  # Group Separator
-    u'\x1E',  # Record Separator
+    '\x1E',  # Record Separator
-    u'\x85',  # Next Line (NEL - Equivalent to CR+LF.
+    '\x85',  # Next Line (NEL - Equivalent to CR+LF.
-              # Used to mark end-of-line on some IBM mainframes.)
+             # Used to mark end-of-line on some IBM mainframes.)
-    u'\u2028',  # Line Separator
+    '\u2028',  # Line Separator
-    u'\u2029',  # Paragraph Separator
+    '\u2029',  # Paragraph Separator
 )
 Version = namedtuple('Version', 'major, minor, micro')
--- a/scripts/diff_parser_profile.py
+++ b/scripts/diff_parser_profile.py
@@ -18,7 +18,6 @@ from docopt import docopt
 from jedi.parser.python import load_grammar
 from jedi.parser.diff import DiffParser
 from jedi.parser.python import ParserWithRecovery
 from jedi._compatibility import u
 from jedi.common import splitlines
 import jedi
@@ -37,14 +36,15 @@ def main(args):
    with open(args['<file>']) as f:
        code = f.read()
    grammar = load_grammar()
-    parser = ParserWithRecovery(grammar, u(code))
+    parser = ParserWithRecovery(grammar, code)
    # Make sure used_names is loaded
    parser.module.used_names
-    code =  code + '\na\n'  # Add something so the diff parser needs to run.
+    code = code + '\na\n'  # Add something so the diff parser needs to run.
    lines = splitlines(code, keepends=True)
    cProfile.runctx('run(parser, lines)', globals(), locals(), sort=args['-s'])
 if __name__ == '__main__':
    args = docopt(__doc__)
    main(args)
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -973,17 +973,17 @@ def test_random_unicode_characters(differ):
    Those issues were all found with the fuzzer.
    """
    differ.initialize('')
-    differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1,
+    differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1,
                 expect_error_leaves=True)
-    differ.parse(u'\r\r', parsers=1)
+    differ.parse('\r\r', parsers=1)
-    differ.parse(u"˟Ę\x05À\r   rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
+    differ.parse("˟Ę\x05À\r   rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
-    differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1,
+    differ.parse('a\ntaǁ\rGĒōns__\n\nb', parsers=1,
                 expect_error_leaves=sys.version_info[0] == 2)
    s = '        if not (self, "_fi\x02\x0e\x08\n\nle"):'
    differ.parse(s, parsers=1, expect_error_leaves=True)
    differ.parse('')
    differ.parse(s + '\n', parsers=1, expect_error_leaves=True)
-    differ.parse(u'   result = (\r\f\x17\t\x11res)', parsers=1, expect_error_leaves=True)
+    differ.parse('   result = (\r\f\x17\t\x11res)', parsers=1, expect_error_leaves=True)
    differ.parse('')
    differ.parse('   a( # xx\ndef', parsers=1, expect_error_leaves=True)
@@ -996,7 +996,7 @@ def test_dedent_end_positions(differ):
                c = {
                     5}
        ''')
-    code2 = dedent(u'''\
+    code2 = dedent('''\
        if 1:
            if ⌟ഒᜈྡྷṭb:
                2
@@ -1588,14 +1588,14 @@ def test_byte_order_mark(differ):
 def test_byte_order_mark2(differ):
-    code = u'\ufeff# foo'
+    code = '\ufeff# foo'
    differ.initialize(code)
    differ.parse(code + 'x', parsers=ANY)
 def test_byte_order_mark3(differ):
-    code1 = u"\ufeff#\ny\n"
+    code1 = "\ufeff#\ny\n"
-    code2 = u'x\n\ufeff#\n\ufeff#\ny\n'
+    code2 = 'x\n\ufeff#\n\ufeff#\ny\n'
    differ.initialize(code1)
    differ.parse(code2, expect_error_leaves=True, parsers=ANY, copies=ANY)
    differ.parse(code1, parsers=1)
--- a/test/test_old_fast_parser.py
+++ b/test/test_old_fast_parser.py
@@ -8,12 +8,11 @@ However the tests might still be relevant for the parser.
 from textwrap import dedent
 from parso._compatibility import u
 from parso import parse
 def test_carriage_return_splitting():
-    source = u(dedent('''
+    source = dedent('''
@@ -21,7 +20,7 @@ def test_carriage_return_splitting():
        class Foo():
            pass
-        '''))
+        ''')
    source = source.replace('\n', '\r\n')
    module = parse(source)
    assert [n.value for lst in module.get_used_names().values() for n in lst] == ['Foo']
--- a/test/test_param_splitting.py
+++ b/test/test_param_splitting.py
@@ -23,15 +23,15 @@ def assert_params(param_string, version=None, **wanted_dct):
 def test_split_params_with_separation_star():
-    assert_params(u'x, y=1, *, z=3', x=None, y='1', z='3', version='3.5')
+    assert_params('x, y=1, *, z=3', x=None, y='1', z='3', version='3.5')
-    assert_params(u'*, x', x=None, version='3.5')
+    assert_params('*, x', x=None, version='3.5')
-    assert_params(u'*', version='3.5')
+    assert_params('*', version='3.5')
 def test_split_params_with_stars():
-    assert_params(u'x, *args', x=None, args=None)
+    assert_params('x, *args', x=None, args=None)
-    assert_params(u'**kwargs', kwargs=None)
+    assert_params('**kwargs', kwargs=None)
-    assert_params(u'*args, **kwargs', args=None, kwargs=None)
+    assert_params('*args, **kwargs', args=None, kwargs=None)
 def test_kw_only_no_kw(works_ge_py3):
--- a/test/test_parser.py
+++ b/test/test_parser.py
@@ -3,7 +3,6 @@ from textwrap import dedent
 import pytest
 from parso._compatibility import u
 from parso import parse
 from parso.python import tree
 from parso.utils import split_lines
@@ -126,7 +125,7 @@ def test_param_splitting(each_version):
 def test_unicode_string():
-    s = tree.String(None, u('bö'), (0, 0))
+    s = tree.String(None, 'bö', (0, 0))
    assert repr(s)  # Should not raise an Error!
@@ -135,7 +134,7 @@ def test_backslash_dos_style(each_version):
 def test_started_lambda_stmt(each_version):
-    m = parse(u'lambda a, b: a i', version=each_version)
+    m = parse('lambda a, b: a i', version=each_version)
    assert m.children[0].type == 'error_node'
--- a/test/test_pgen2.py
+++ b/test/test_pgen2.py
@@ -30,15 +30,15 @@ def _invalid_syntax(code, version=None, **kwargs):
 def test_formfeed(each_version):
-    s = u"foo\n\x0c\nfoo\n"
+    s = "foo\n\x0c\nfoo\n"
    t = _parse(s, each_version)
    assert t.children[0].children[0].type == 'name'
    assert t.children[1].children[0].type == 'name'
-    s = u"1\n\x0c\x0c\n2\n"
+    s = "1\n\x0c\x0c\n2\n"
    t = _parse(s, each_version)
    with pytest.raises(ParserSyntaxError):
-        s = u"\n\x0c2\n"
+        s = "\n\x0c2\n"
        _parse(s, each_version)
@@ -266,7 +266,7 @@ def test_multiline_bytes_literals(each_version):
    It's not possible to get the same result when using \xaa in Python 2/3,
    because it's treated differently.
    """
-    s = u"""
+    s = """
        md5test(b"\xaa" * 80,
                (b"Test Using Larger Than Block-Size Key "
                 b"and Larger Than One Block-Size Data"),
@@ -295,7 +295,7 @@ def test_dict_unpacking(works_ge_py35):
 def test_multiline_str_literals(each_version):
-    s = u"""
+    s = """
        md5test("\xaa" * 80,
                ("Test Using Larger Than Block-Size Key "
                 "and Larger Than One Block-Size Data"),
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -244,11 +244,11 @@ xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Pyth
        # Name stuff
        ('1foo1', [NUMBER, NAME]),
        pytest.param(
-            u'மெல்லினம்', [NAME],
+            'மெல்லினம்', [NAME],
            **xfail_py2),
-        pytest.param(u'²', [ERRORTOKEN], **xfail_py2),
+        pytest.param('²', [ERRORTOKEN], **xfail_py2),
-        pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
+        pytest.param('ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
-        pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
+        pytest.param('ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
        (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]),
        (dedent('''\
            class BaseCache: