diff --git a/parso/python/normalizer.py b/parso/python/normalizer.py index b081aeb..00372a0 100644 --- a/parso/python/normalizer.py +++ b/parso/python/normalizer.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import codecs from contextlib import contextmanager from parso.normalizer import Normalizer, NormalizerConfig, Issue @@ -514,22 +515,40 @@ class ErrorFinder(Normalizer): self._add_syntax_error('invalid syntax', leaf) elif leaf.type == 'name': if leaf.value == '__debug__' and leaf.is_definition(): - if self._version <= (2, 7): + if self._version < (3, 0): message = 'cannot assign to __debug__' else: message = 'assignment to keyword' self._add_syntax_error(message, leaf) - if leaf.value == 'None' and self._version <= (2, 7) and leaf.is_definition(): + if leaf.value == 'None' and self._version < (3, 0) and leaf.is_definition(): self._add_syntax_error('cannot assign to None', leaf) self._context.add_name(leaf) elif leaf.type == 'string': - if 'b' in leaf.string_prefix.lower() \ + string_prefix = leaf.string_prefix.lower() + if 'b' in string_prefix \ and any(c for c in leaf.value if ord(c) > 127): # TODO add check for python 3 # b'รค' message = "bytes can only contain ASCII literal characters." self._add_syntax_error(message, leaf) + + if 'r' not in string_prefix: + # Raw strings don't need to be checked if they have proper + # escaping. + is_bytes = self._version < (3, 0) + if 'b' in string_prefix: + is_bytes = True + if 'u' in string_prefix: + is_bytes = False + func = codecs.escape_decode if is_bytes else codecs.unicode_escape_decode + try: + func(leaf._get_payload()) + except UnicodeDecodeError as e: + self._add_syntax_error('(unicode error) ' + str(e), leaf) + except ValueError as e: + self._add_syntax_error('(value error) ' + str(e), leaf) + elif leaf.value == 'continue': in_loop = False for block in self._context.blocks: diff --git a/parso/python/tree.py b/parso/python/tree.py index 46ec8cd..c93af25 100644 --- a/parso/python/tree.py +++ b/parso/python/tree.py @@ -214,6 +214,14 @@ class String(Literal): def string_prefix(self): return re.match('\w*(?=[\'"])', self.value).group(0) + def _get_payload(self): + match = re.search( + r'''('{3}|"{3}|'|")(.*)$''', + self.value, + flags=re.DOTALL + ) + return match.group(2)[:-len(match.group(1))] + class _StringComparisonMixin(object): def __eq__(self, other): diff --git a/test/normalizer_issue_files/allowed_syntax.py b/test/normalizer_issue_files/allowed_syntax.py index 2a7346e..237b818 100644 --- a/test/normalizer_issue_files/allowed_syntax.py +++ b/test/normalizer_issue_files/allowed_syntax.py @@ -39,3 +39,8 @@ except: pass except ZeroDivisionError: pass + + +r'\n' +r'\x' +b'\n' diff --git a/test/test_python_errors.py b/test/test_python_errors.py index 4e1328c..2d41faf 100644 --- a/test/test_python_errors.py +++ b/test/test_python_errors.py @@ -95,6 +95,16 @@ FAILING_EXAMPLES = [ '(x for 1 in y)', '{x for 1 in y}', '{x:x for 1 in y}', + # Unicode/Bytes issues. + r'u"\x"', + r'u"\"', + r'u"\u"', + r'u"""\U"""', + r'u"\Uffffffff"', + r"u'''\N{}'''", + r"u'\N{foo}'", + r'b"\x"', + r'b"\"', # SyntaxErrors from Python/symtable.c 'def f(x, x): pass', @@ -304,6 +314,10 @@ def _get_actual_exception(code): if sys.version_info[:2] == (2, 6) and wanted == 'SyntaxError: unexpected EOF while parsing': wanted = 'SyntaxError: invalid syntax' + if wanted == 'SyntaxError: EOL while scanning string literal': + # TODO This is not what we want in the future. Remove this. + wanted = 'SyntaxError: invalid syntax' + if wanted == 'SyntaxError: non-keyword arg after keyword arg': # The python 3.5+ way, a bit nicer. wanted = 'SyntaxError: positional argument follows keyword argument'