Added the unicode/bytes literal escaping issues.

This commit is contained in:
Dave Halter
2017-08-02 10:17:15 +02:00
parent d6c624bd34
commit 3ccbf4326c
4 changed files with 49 additions and 3 deletions

View File

@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import codecs
from contextlib import contextmanager from contextlib import contextmanager
from parso.normalizer import Normalizer, NormalizerConfig, Issue from parso.normalizer import Normalizer, NormalizerConfig, Issue
@@ -514,22 +515,40 @@ class ErrorFinder(Normalizer):
self._add_syntax_error('invalid syntax', leaf) self._add_syntax_error('invalid syntax', leaf)
elif leaf.type == 'name': elif leaf.type == 'name':
if leaf.value == '__debug__' and leaf.is_definition(): if leaf.value == '__debug__' and leaf.is_definition():
if self._version <= (2, 7): if self._version < (3, 0):
message = 'cannot assign to __debug__' message = 'cannot assign to __debug__'
else: else:
message = 'assignment to keyword' message = 'assignment to keyword'
self._add_syntax_error(message, leaf) self._add_syntax_error(message, leaf)
if leaf.value == 'None' and self._version <= (2, 7) and leaf.is_definition(): if leaf.value == 'None' and self._version < (3, 0) and leaf.is_definition():
self._add_syntax_error('cannot assign to None', leaf) self._add_syntax_error('cannot assign to None', leaf)
self._context.add_name(leaf) self._context.add_name(leaf)
elif leaf.type == 'string': elif leaf.type == 'string':
if 'b' in leaf.string_prefix.lower() \ string_prefix = leaf.string_prefix.lower()
if 'b' in string_prefix \
and any(c for c in leaf.value if ord(c) > 127): and any(c for c in leaf.value if ord(c) > 127):
# TODO add check for python 3 # TODO add check for python 3
# b'ä' # b'ä'
message = "bytes can only contain ASCII literal characters." message = "bytes can only contain ASCII literal characters."
self._add_syntax_error(message, leaf) self._add_syntax_error(message, leaf)
if 'r' not in string_prefix:
# Raw strings don't need to be checked if they have proper
# escaping.
is_bytes = self._version < (3, 0)
if 'b' in string_prefix:
is_bytes = True
if 'u' in string_prefix:
is_bytes = False
func = codecs.escape_decode if is_bytes else codecs.unicode_escape_decode
try:
func(leaf._get_payload())
except UnicodeDecodeError as e:
self._add_syntax_error('(unicode error) ' + str(e), leaf)
except ValueError as e:
self._add_syntax_error('(value error) ' + str(e), leaf)
elif leaf.value == 'continue': elif leaf.value == 'continue':
in_loop = False in_loop = False
for block in self._context.blocks: for block in self._context.blocks:

View File

@@ -214,6 +214,14 @@ class String(Literal):
def string_prefix(self): def string_prefix(self):
return re.match('\w*(?=[\'"])', self.value).group(0) return re.match('\w*(?=[\'"])', self.value).group(0)
def _get_payload(self):
match = re.search(
r'''('{3}|"{3}|'|")(.*)$''',
self.value,
flags=re.DOTALL
)
return match.group(2)[:-len(match.group(1))]
class _StringComparisonMixin(object): class _StringComparisonMixin(object):
def __eq__(self, other): def __eq__(self, other):

View File

@@ -39,3 +39,8 @@ except:
pass pass
except ZeroDivisionError: except ZeroDivisionError:
pass pass
r'\n'
r'\x'
b'\n'

View File

@@ -95,6 +95,16 @@ FAILING_EXAMPLES = [
'(x for 1 in y)', '(x for 1 in y)',
'{x for 1 in y}', '{x for 1 in y}',
'{x:x for 1 in y}', '{x:x for 1 in y}',
# Unicode/Bytes issues.
r'u"\x"',
r'u"\"',
r'u"\u"',
r'u"""\U"""',
r'u"\Uffffffff"',
r"u'''\N{}'''",
r"u'\N{foo}'",
r'b"\x"',
r'b"\"',
# SyntaxErrors from Python/symtable.c # SyntaxErrors from Python/symtable.c
'def f(x, x): pass', 'def f(x, x): pass',
@@ -304,6 +314,10 @@ def _get_actual_exception(code):
if sys.version_info[:2] == (2, 6) and wanted == 'SyntaxError: unexpected EOF while parsing': if sys.version_info[:2] == (2, 6) and wanted == 'SyntaxError: unexpected EOF while parsing':
wanted = 'SyntaxError: invalid syntax' wanted = 'SyntaxError: invalid syntax'
if wanted == 'SyntaxError: EOL while scanning string literal':
# TODO This is not what we want in the future. Remove this.
wanted = 'SyntaxError: invalid syntax'
if wanted == 'SyntaxError: non-keyword arg after keyword arg': if wanted == 'SyntaxError: non-keyword arg after keyword arg':
# The python 3.5+ way, a bit nicer. # The python 3.5+ way, a bit nicer.
wanted = 'SyntaxError: positional argument follows keyword argument' wanted = 'SyntaxError: positional argument follows keyword argument'