diff --git a/docs/conf.py b/docs/conf.py index f1d0d59..0e6ac90 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -43,8 +43,8 @@ source_encoding = 'utf-8' master_doc = 'index' # General information about the project. -project = u'parso' -copyright = u'parso contributors' +project = 'parso' +copyright = 'parso contributors' import parso from parso.utils import version_info @@ -200,8 +200,8 @@ latex_elements = { # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'parso.tex', u'parso documentation', - u'parso contributors', 'manual'), + ('index', 'parso.tex', 'parso documentation', + 'parso contributors', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -230,8 +230,8 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'parso', u'parso Documentation', - [u'parso contributors'], 1) + ('index', 'parso', 'parso Documentation', + ['parso contributors'], 1) ] # If true, show URL addresses after external links. @@ -244,8 +244,8 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'parso', u'parso documentation', - u'parso contributors', 'parso', 'Awesome Python autocompletion library.', + ('index', 'parso', 'parso documentation', + 'parso contributors', 'parso', 'Awesome Python autocompletion library.', 'Miscellaneous'), ] diff --git a/parso/_compatibility.py b/parso/_compatibility.py index 54ce8f8..99a9702 100644 --- a/parso/_compatibility.py +++ b/parso/_compatibility.py @@ -2,7 +2,6 @@ To ensure compatibility from Python ``2.7`` - ``3.3``, a module has been created. Clearly there is huge need to use conforming syntax. """ -import sys import platform # unicode function @@ -12,17 +11,3 @@ except NameError: unicode = str is_pypy = platform.python_implementation() == 'PyPy' - - -def u(string): - """Cast to unicode DAMMIT! - Written because Python2 repr always implicitly casts to a string, so we - have to cast back to a unicode (and we know that we always deal with valid - unicode, because we check that in the beginning). - """ - if sys.version_info.major >= 3: - return str(string) - - if not isinstance(string, unicode): - return unicode(str(string), 'UTF-8') - return string diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index fdcd8e0..15b8e9e 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -145,7 +145,7 @@ def _create_token_collection(version_info): # just use this one. Name = r'(\w+)' else: - Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)' + Name = '([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)' if version_info >= (3, 6): Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' diff --git a/parso/utils.py b/parso/utils.py index 99a1307..153b843 100644 --- a/parso/utils.py +++ b/parso/utils.py @@ -10,15 +10,15 @@ from parso._compatibility import unicode # not in Python. In Python only \r (Carriage Return, 0xD) and \n (Line Feed, # 0xA) are allowed to split lines. _NON_LINE_BREAKS = ( - u'\v', # Vertical Tabulation 0xB - u'\f', # Form Feed 0xC - u'\x1C', # File Separator - u'\x1D', # Group Separator - u'\x1E', # Record Separator - u'\x85', # Next Line (NEL - Equivalent to CR+LF. - # Used to mark end-of-line on some IBM mainframes.) - u'\u2028', # Line Separator - u'\u2029', # Paragraph Separator + '\v', # Vertical Tabulation 0xB + '\f', # Form Feed 0xC + '\x1C', # File Separator + '\x1D', # Group Separator + '\x1E', # Record Separator + '\x85', # Next Line (NEL - Equivalent to CR+LF. + # Used to mark end-of-line on some IBM mainframes.) + '\u2028', # Line Separator + '\u2029', # Paragraph Separator ) Version = namedtuple('Version', 'major, minor, micro') diff --git a/scripts/diff_parser_profile.py b/scripts/diff_parser_profile.py index a152a3e..93a1202 100755 --- a/scripts/diff_parser_profile.py +++ b/scripts/diff_parser_profile.py @@ -18,7 +18,6 @@ from docopt import docopt from jedi.parser.python import load_grammar from jedi.parser.diff import DiffParser from jedi.parser.python import ParserWithRecovery -from jedi._compatibility import u from jedi.common import splitlines import jedi @@ -37,14 +36,15 @@ def main(args): with open(args['']) as f: code = f.read() grammar = load_grammar() - parser = ParserWithRecovery(grammar, u(code)) + parser = ParserWithRecovery(grammar, code) # Make sure used_names is loaded parser.module.used_names - code = code + '\na\n' # Add something so the diff parser needs to run. + code = code + '\na\n' # Add something so the diff parser needs to run. lines = splitlines(code, keepends=True) cProfile.runctx('run(parser, lines)', globals(), locals(), sort=args['-s']) + if __name__ == '__main__': args = docopt(__doc__) main(args) diff --git a/test/test_diff_parser.py b/test/test_diff_parser.py index 1904314..3201fbd 100644 --- a/test/test_diff_parser.py +++ b/test/test_diff_parser.py @@ -973,17 +973,17 @@ def test_random_unicode_characters(differ): Those issues were all found with the fuzzer. """ differ.initialize('') - differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, + differ.parse('\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True) - differ.parse(u'\r\r', parsers=1) - differ.parse(u"˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) - differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1, + differ.parse('\r\r', parsers=1) + differ.parse("˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) + differ.parse('a\ntaǁ\rGĒōns__\n\nb', parsers=1, expect_error_leaves=sys.version_info[0] == 2) s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):' differ.parse(s, parsers=1, expect_error_leaves=True) differ.parse('') differ.parse(s + '\n', parsers=1, expect_error_leaves=True) - differ.parse(u' result = (\r\f\x17\t\x11res)', parsers=1, expect_error_leaves=True) + differ.parse(' result = (\r\f\x17\t\x11res)', parsers=1, expect_error_leaves=True) differ.parse('') differ.parse(' a( # xx\ndef', parsers=1, expect_error_leaves=True) @@ -996,7 +996,7 @@ def test_dedent_end_positions(differ): c = { 5} ''') - code2 = dedent(u'''\ + code2 = dedent('''\ if 1: if ⌟ഒᜈྡྷṭb: 2 @@ -1588,14 +1588,14 @@ def test_byte_order_mark(differ): def test_byte_order_mark2(differ): - code = u'\ufeff# foo' + code = '\ufeff# foo' differ.initialize(code) differ.parse(code + 'x', parsers=ANY) def test_byte_order_mark3(differ): - code1 = u"\ufeff#\ny\n" - code2 = u'x\n\ufeff#\n\ufeff#\ny\n' + code1 = "\ufeff#\ny\n" + code2 = 'x\n\ufeff#\n\ufeff#\ny\n' differ.initialize(code1) differ.parse(code2, expect_error_leaves=True, parsers=ANY, copies=ANY) differ.parse(code1, parsers=1) diff --git a/test/test_old_fast_parser.py b/test/test_old_fast_parser.py index 7e12a03..a51a7dd 100644 --- a/test/test_old_fast_parser.py +++ b/test/test_old_fast_parser.py @@ -8,12 +8,11 @@ However the tests might still be relevant for the parser. from textwrap import dedent -from parso._compatibility import u from parso import parse def test_carriage_return_splitting(): - source = u(dedent(''' + source = dedent(''' @@ -21,7 +20,7 @@ def test_carriage_return_splitting(): class Foo(): pass - ''')) + ''') source = source.replace('\n', '\r\n') module = parse(source) assert [n.value for lst in module.get_used_names().values() for n in lst] == ['Foo'] diff --git a/test/test_param_splitting.py b/test/test_param_splitting.py index f04fea7..6b3d353 100644 --- a/test/test_param_splitting.py +++ b/test/test_param_splitting.py @@ -23,15 +23,15 @@ def assert_params(param_string, version=None, **wanted_dct): def test_split_params_with_separation_star(): - assert_params(u'x, y=1, *, z=3', x=None, y='1', z='3', version='3.5') - assert_params(u'*, x', x=None, version='3.5') - assert_params(u'*', version='3.5') + assert_params('x, y=1, *, z=3', x=None, y='1', z='3', version='3.5') + assert_params('*, x', x=None, version='3.5') + assert_params('*', version='3.5') def test_split_params_with_stars(): - assert_params(u'x, *args', x=None, args=None) - assert_params(u'**kwargs', kwargs=None) - assert_params(u'*args, **kwargs', args=None, kwargs=None) + assert_params('x, *args', x=None, args=None) + assert_params('**kwargs', kwargs=None) + assert_params('*args, **kwargs', args=None, kwargs=None) def test_kw_only_no_kw(works_ge_py3): diff --git a/test/test_parser.py b/test/test_parser.py index e9a9dda..6f63179 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -3,7 +3,6 @@ from textwrap import dedent import pytest -from parso._compatibility import u from parso import parse from parso.python import tree from parso.utils import split_lines @@ -126,7 +125,7 @@ def test_param_splitting(each_version): def test_unicode_string(): - s = tree.String(None, u('bö'), (0, 0)) + s = tree.String(None, 'bö', (0, 0)) assert repr(s) # Should not raise an Error! @@ -135,7 +134,7 @@ def test_backslash_dos_style(each_version): def test_started_lambda_stmt(each_version): - m = parse(u'lambda a, b: a i', version=each_version) + m = parse('lambda a, b: a i', version=each_version) assert m.children[0].type == 'error_node' diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 158ec29..854b33c 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -30,15 +30,15 @@ def _invalid_syntax(code, version=None, **kwargs): def test_formfeed(each_version): - s = u"foo\n\x0c\nfoo\n" + s = "foo\n\x0c\nfoo\n" t = _parse(s, each_version) assert t.children[0].children[0].type == 'name' assert t.children[1].children[0].type == 'name' - s = u"1\n\x0c\x0c\n2\n" + s = "1\n\x0c\x0c\n2\n" t = _parse(s, each_version) with pytest.raises(ParserSyntaxError): - s = u"\n\x0c2\n" + s = "\n\x0c2\n" _parse(s, each_version) @@ -266,7 +266,7 @@ def test_multiline_bytes_literals(each_version): It's not possible to get the same result when using \xaa in Python 2/3, because it's treated differently. """ - s = u""" + s = """ md5test(b"\xaa" * 80, (b"Test Using Larger Than Block-Size Key " b"and Larger Than One Block-Size Data"), @@ -295,7 +295,7 @@ def test_dict_unpacking(works_ge_py35): def test_multiline_str_literals(each_version): - s = u""" + s = """ md5test("\xaa" * 80, ("Test Using Larger Than Block-Size Key " "and Larger Than One Block-Size Data"), diff --git a/test/test_tokenize.py b/test/test_tokenize.py index 7afa373..1e3673f 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -244,11 +244,11 @@ xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Pyth # Name stuff ('1foo1', [NUMBER, NAME]), pytest.param( - u'மெல்லினம்', [NAME], + 'மெல்லினம்', [NAME], **xfail_py2), - pytest.param(u'²', [ERRORTOKEN], **xfail_py2), - pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2), - pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2), + pytest.param('²', [ERRORTOKEN], **xfail_py2), + pytest.param('ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2), + pytest.param('ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2), (' \x00a', [INDENT, ERRORTOKEN, NAME, DEDENT]), (dedent('''\ class BaseCache: