mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-08 05:34:51 +08:00
Make the tokenizer version independent.
This commit is contained in:
@@ -96,7 +96,9 @@ class Grammar(object):
|
|||||||
if old_lines == lines:
|
if old_lines == lines:
|
||||||
return module_node
|
return module_node
|
||||||
|
|
||||||
new_node = self._diff_parser(self._pgen_grammar, module_node).update(
|
new_node = self._diff_parser(
|
||||||
|
self._pgen_grammar, self._tokenizer, module_node
|
||||||
|
).update(
|
||||||
old_lines=old_lines,
|
old_lines=old_lines,
|
||||||
new_lines=lines
|
new_lines=lines
|
||||||
)
|
)
|
||||||
@@ -106,7 +108,11 @@ class Grammar(object):
|
|||||||
|
|
||||||
tokens = self._tokenizer(lines)
|
tokens = self._tokenizer(lines)
|
||||||
|
|
||||||
p = self._parser(self._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol)
|
p = self._parser(
|
||||||
|
self._pgen_grammar,
|
||||||
|
error_recovery=error_recovery,
|
||||||
|
start_symbol=start_symbol
|
||||||
|
)
|
||||||
root_node = p.parse(tokens=tokens)
|
root_node = p.parse(tokens=tokens)
|
||||||
|
|
||||||
if cache or diff_cache:
|
if cache or diff_cache:
|
||||||
@@ -120,6 +126,20 @@ class Grammar(object):
|
|||||||
return '<%s:%s>' % (self.__class__.__name__, txt)
|
return '<%s:%s>' % (self.__class__.__name__, txt)
|
||||||
|
|
||||||
|
|
||||||
|
class PythonGrammar(Grammar):
|
||||||
|
def __init__(self, version_int, bnf_text):
|
||||||
|
super(PythonGrammar, self).__init__(
|
||||||
|
bnf_text,
|
||||||
|
tokenizer=self._tokenize_lines,
|
||||||
|
parser=PythonParser,
|
||||||
|
diff_parser=DiffParser
|
||||||
|
)
|
||||||
|
self._version_int = version_int
|
||||||
|
|
||||||
|
def _tokenize_lines(self, lines):
|
||||||
|
return tokenize_lines(lines, self._version_int)
|
||||||
|
|
||||||
|
|
||||||
def load_grammar(version=None):
|
def load_grammar(version=None):
|
||||||
"""
|
"""
|
||||||
Loads a Python grammar. The default version is the current Python version.
|
Loads a Python grammar. The default version is the current Python version.
|
||||||
@@ -147,12 +167,7 @@ def load_grammar(version=None):
|
|||||||
with open(path) as f:
|
with open(path) as f:
|
||||||
bnf_text = f.read()
|
bnf_text = f.read()
|
||||||
|
|
||||||
grammar = Grammar(
|
grammar = PythonGrammar(version_int, bnf_text)
|
||||||
bnf_text,
|
|
||||||
tokenizer=tokenize_lines,
|
|
||||||
parser=PythonParser,
|
|
||||||
diff_parser=DiffParser
|
|
||||||
)
|
|
||||||
return _loaded_grammars.setdefault(path, grammar)
|
return _loaded_grammars.setdefault(path, grammar)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
message = "Python version %s is currently not supported." % version
|
message = "Python version %s is currently not supported." % version
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from parso.python import tokenize
|
|||||||
class ParserGenerator(object):
|
class ParserGenerator(object):
|
||||||
def __init__(self, bnf_text):
|
def __init__(self, bnf_text):
|
||||||
self._bnf_text = bnf_text
|
self._bnf_text = bnf_text
|
||||||
self.generator = tokenize.tokenize(bnf_text)
|
self.generator = tokenize.tokenize(bnf_text, version_int=36)
|
||||||
self._gettoken() # Initialize lookahead
|
self._gettoken() # Initialize lookahead
|
||||||
self.dfas, self.startsymbol = self._parse()
|
self.dfas, self.startsymbol = self._parse()
|
||||||
self.first = {} # map from symbol name to set of tokens
|
self.first = {} # map from symbol name to set of tokens
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import logging
|
|||||||
from parso.utils import splitlines
|
from parso.utils import splitlines
|
||||||
from parso.python.parser import Parser
|
from parso.python.parser import Parser
|
||||||
from parso.python.tree import EndMarker
|
from parso.python.tree import EndMarker
|
||||||
from parso.python.tokenize import (tokenize_lines, NEWLINE, TokenInfo,
|
from parso.python.tokenize import (NEWLINE, TokenInfo,
|
||||||
ENDMARKER, INDENT, DEDENT, ERRORTOKEN)
|
ENDMARKER, INDENT, DEDENT, ERRORTOKEN)
|
||||||
|
|
||||||
|
|
||||||
@@ -89,8 +89,9 @@ class DiffParser(object):
|
|||||||
An advanced form of parsing a file faster. Unfortunately comes with huge
|
An advanced form of parsing a file faster. Unfortunately comes with huge
|
||||||
side effects. It changes the given module.
|
side effects. It changes the given module.
|
||||||
"""
|
"""
|
||||||
def __init__(self, pgen_grammar, module):
|
def __init__(self, pgen_grammar, tokenizer, module):
|
||||||
self._pgen_grammar = pgen_grammar
|
self._pgen_grammar = pgen_grammar
|
||||||
|
self._tokenizer = tokenizer
|
||||||
self._module = module
|
self._module = module
|
||||||
|
|
||||||
def _reset(self):
|
def _reset(self):
|
||||||
@@ -286,7 +287,7 @@ class DiffParser(object):
|
|||||||
is_first_token = True
|
is_first_token = True
|
||||||
omitted_first_indent = False
|
omitted_first_indent = False
|
||||||
indents = []
|
indents = []
|
||||||
tokens = tokenize_lines(lines)
|
tokens = self._tokenizer(lines)
|
||||||
stack = self._active_parser.pgen_parser.stack
|
stack = self._active_parser.pgen_parser.stack
|
||||||
for typ, string, start_pos, prefix in tokens:
|
for typ, string, start_pos, prefix in tokens:
|
||||||
start_pos = start_pos[0] + line_offset, start_pos[1]
|
start_pos = start_pos[0] + line_offset, start_pos[1]
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ memory optimizations here.
|
|||||||
"""
|
"""
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import sys
|
||||||
import string
|
import string
|
||||||
import re
|
import re
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
@@ -19,12 +20,19 @@ from codecs import BOM_UTF8
|
|||||||
|
|
||||||
from parso.python.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
|
from parso.python.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
|
||||||
NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
||||||
from parso._compatibility import py_version, u
|
from parso._compatibility import py_version
|
||||||
from parso.utils import splitlines
|
from parso.utils import splitlines
|
||||||
|
|
||||||
|
|
||||||
|
TokenCollection = namedtuple(
|
||||||
|
'TokenCollection',
|
||||||
|
'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
|
||||||
|
)
|
||||||
|
|
||||||
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
||||||
|
|
||||||
|
_token_collection_cache = {}
|
||||||
|
|
||||||
if py_version >= 30:
|
if py_version >= 30:
|
||||||
# Python 3 has str.isidentifier() to check if a char is a valid identifier
|
# Python 3 has str.isidentifier() to check if a char is a valid identifier
|
||||||
is_identifier = str.isidentifier
|
is_identifier = str.isidentifier
|
||||||
@@ -46,55 +54,24 @@ def group(*choices, **kwargs):
|
|||||||
start += '?:'
|
start += '?:'
|
||||||
return start + '|'.join(choices) + ')'
|
return start + '|'.join(choices) + ')'
|
||||||
|
|
||||||
|
|
||||||
def any(*choices):
|
def any(*choices):
|
||||||
return group(*choices) + '*'
|
return group(*choices) + '*'
|
||||||
|
|
||||||
|
|
||||||
def maybe(*choices):
|
def maybe(*choices):
|
||||||
return group(*choices) + '?'
|
return group(*choices) + '?'
|
||||||
|
|
||||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
|
||||||
# number literals.
|
|
||||||
Whitespace = r'[ \f\t]*'
|
|
||||||
Comment = r'#[^\r\n]*'
|
|
||||||
Name = r'\w+'
|
|
||||||
|
|
||||||
if py_version >= 36:
|
|
||||||
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
|
|
||||||
Binnumber = r'0[bB](?:_?[01])+'
|
|
||||||
Octnumber = r'0[oO](?:_?[0-7])+'
|
|
||||||
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
|
|
||||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
|
||||||
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
|
|
||||||
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
|
|
||||||
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
|
|
||||||
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
|
|
||||||
Floatnumber = group(Pointfloat, Expfloat)
|
|
||||||
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
|
|
||||||
else:
|
|
||||||
Hexnumber = r'0[xX][0-9a-fA-F]+'
|
|
||||||
Binnumber = r'0[bB][01]+'
|
|
||||||
if py_version >= 30:
|
|
||||||
Octnumber = r'0[oO][0-7]+'
|
|
||||||
else:
|
|
||||||
Octnumber = '0[0-7]+'
|
|
||||||
Decnumber = r'(?:0+|[1-9][0-9]*)'
|
|
||||||
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
|
||||||
Exponent = r'[eE][-+]?[0-9]+'
|
|
||||||
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
|
|
||||||
Expfloat = r'[0-9]+' + Exponent
|
|
||||||
Floatnumber = group(Pointfloat, Expfloat)
|
|
||||||
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
|
||||||
Number = group(Imagnumber, Floatnumber, Intnumber)
|
|
||||||
|
|
||||||
# Return the empty string, plus all of the valid string prefixes.
|
# Return the empty string, plus all of the valid string prefixes.
|
||||||
def _all_string_prefixes():
|
def _all_string_prefixes(version_int):
|
||||||
# The valid string prefixes. Only contain the lower case versions,
|
# The valid string prefixes. Only contain the lower case versions,
|
||||||
# and don't contain any permuations (include 'fr', but not
|
# and don't contain any permuations (include 'fr', but not
|
||||||
# 'rf'). The various permutations will be generated.
|
# 'rf'). The various permutations will be generated.
|
||||||
_valid_string_prefixes = ['b', 'r', 'u', 'br']
|
_valid_string_prefixes = ['b', 'r', 'u', 'br']
|
||||||
if py_version >= 36:
|
if version_int >= 36:
|
||||||
_valid_string_prefixes += ['f', 'fr']
|
_valid_string_prefixes += ['f', 'fr']
|
||||||
if py_version <= 27:
|
if version_int <= 27:
|
||||||
# TODO this is actually not 100% valid. ur is valid in Python 2.7,
|
# TODO this is actually not 100% valid. ur is valid in Python 2.7,
|
||||||
# while ru is not.
|
# while ru is not.
|
||||||
_valid_string_prefixes.append('ur')
|
_valid_string_prefixes.append('ur')
|
||||||
@@ -109,12 +86,59 @@ def _all_string_prefixes():
|
|||||||
result.add(''.join(s))
|
result.add(''.join(s))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _compile(expr):
|
def _compile(expr):
|
||||||
return re.compile(expr, re.UNICODE)
|
return re.compile(expr, re.UNICODE)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_token_collection(version_int):
|
||||||
|
try:
|
||||||
|
return _token_collection_cache[version_int]
|
||||||
|
except KeyError:
|
||||||
|
_token_collection_cache[version_int] = result = \
|
||||||
|
_create_token_collection(version_int)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _create_token_collection(version_int):
|
||||||
|
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||||
|
# number literals.
|
||||||
|
Whitespace = r'[ \f\t]*'
|
||||||
|
Comment = r'#[^\r\n]*'
|
||||||
|
Name = r'\w+'
|
||||||
|
|
||||||
|
if version_int >= 36:
|
||||||
|
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
|
||||||
|
Binnumber = r'0[bB](?:_?[01])+'
|
||||||
|
Octnumber = r'0[oO](?:_?[0-7])+'
|
||||||
|
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
|
||||||
|
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||||
|
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
|
||||||
|
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
|
||||||
|
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
|
||||||
|
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
|
||||||
|
Floatnumber = group(Pointfloat, Expfloat)
|
||||||
|
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
|
||||||
|
else:
|
||||||
|
Hexnumber = r'0[xX][0-9a-fA-F]+'
|
||||||
|
Binnumber = r'0[bB][01]+'
|
||||||
|
if version_int >= 30:
|
||||||
|
Octnumber = r'0[oO][0-7]+'
|
||||||
|
else:
|
||||||
|
Octnumber = '0[0-7]+'
|
||||||
|
Decnumber = r'(?:0+|[1-9][0-9]*)'
|
||||||
|
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
|
||||||
|
Exponent = r'[eE][-+]?[0-9]+'
|
||||||
|
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
|
||||||
|
Expfloat = r'[0-9]+' + Exponent
|
||||||
|
Floatnumber = group(Pointfloat, Expfloat)
|
||||||
|
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
|
||||||
|
Number = group(Imagnumber, Floatnumber, Intnumber)
|
||||||
|
|
||||||
# Note that since _all_string_prefixes includes the empty string,
|
# Note that since _all_string_prefixes includes the empty string,
|
||||||
# StringPrefix can be the empty string (making it optional).
|
# StringPrefix can be the empty string (making it optional).
|
||||||
StringPrefix = group(*_all_string_prefixes())
|
possible_prefixes = _all_string_prefixes(version_int)
|
||||||
|
StringPrefix = group(*possible_prefixes)
|
||||||
|
|
||||||
# Tail end of ' string.
|
# Tail end of ' string.
|
||||||
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
|
||||||
@@ -138,8 +162,6 @@ Bracket = '[][(){}]'
|
|||||||
Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
|
Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
|
||||||
Funny = group(Operator, Bracket, Special)
|
Funny = group(Operator, Bracket, Special)
|
||||||
|
|
||||||
PlainToken = group(Number, Funny, Name, capture=True)
|
|
||||||
|
|
||||||
# First (or only) line of ' or " string.
|
# First (or only) line of ' or " string.
|
||||||
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
|
||||||
group("'", r'\\\r?\n'),
|
group("'", r'\\\r?\n'),
|
||||||
@@ -153,7 +175,7 @@ PseudoToken = group(Whitespace, capture=True) + \
|
|||||||
# to match the remainder of that string. _prefix can be empty, for
|
# to match the remainder of that string. _prefix can be empty, for
|
||||||
# a normal single or triple quoted string (with no prefix).
|
# a normal single or triple quoted string (with no prefix).
|
||||||
endpats = {}
|
endpats = {}
|
||||||
for _prefix in _all_string_prefixes():
|
for _prefix in possible_prefixes:
|
||||||
endpats[_prefix + "'"] = _compile(Single)
|
endpats[_prefix + "'"] = _compile(Single)
|
||||||
endpats[_prefix + '"'] = _compile(Double)
|
endpats[_prefix + '"'] = _compile(Double)
|
||||||
endpats[_prefix + "'''"] = _compile(Single3)
|
endpats[_prefix + "'''"] = _compile(Single3)
|
||||||
@@ -163,16 +185,19 @@ for _prefix in _all_string_prefixes():
|
|||||||
# including the opening quotes.
|
# including the opening quotes.
|
||||||
single_quoted = set()
|
single_quoted = set()
|
||||||
triple_quoted = set()
|
triple_quoted = set()
|
||||||
for t in _all_string_prefixes():
|
for t in possible_prefixes:
|
||||||
for p in (t + '"', t + "'"):
|
for p in (t + '"', t + "'"):
|
||||||
single_quoted.add(p)
|
single_quoted.add(p)
|
||||||
for p in (t + '"""', t + "'''"):
|
for p in (t + '"""', t + "'''"):
|
||||||
triple_quoted.add(p)
|
triple_quoted.add(p)
|
||||||
|
|
||||||
|
|
||||||
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
|
||||||
'finally', 'while', 'with', 'return')
|
'finally', 'while', 'with', 'return')
|
||||||
pseudo_token_compiled = _compile(PseudoToken)
|
pseudo_token_compiled = _compile(PseudoToken)
|
||||||
|
return TokenCollection(
|
||||||
|
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
||||||
|
ALWAYS_BREAK_TOKENS
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
||||||
@@ -203,13 +228,13 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
|||||||
return self.start_pos[0], self.start_pos[1] + len(self.string)
|
return self.start_pos[0], self.start_pos[1] + len(self.string)
|
||||||
|
|
||||||
|
|
||||||
def tokenize(code):
|
def tokenize(code, version_int):
|
||||||
"""Generate tokens from a the source code (string)."""
|
"""Generate tokens from a the source code (string)."""
|
||||||
lines = splitlines(code, keepends=True)
|
lines = splitlines(code, keepends=True)
|
||||||
return tokenize_lines(lines)
|
return tokenize_lines(lines, version_int)
|
||||||
|
|
||||||
|
|
||||||
def tokenize_lines(lines):
|
def tokenize_lines(lines, version_int):
|
||||||
"""
|
"""
|
||||||
A heavily modified Python standard library tokenizer.
|
A heavily modified Python standard library tokenizer.
|
||||||
|
|
||||||
@@ -217,6 +242,8 @@ def tokenize_lines(lines):
|
|||||||
token. This idea comes from lib2to3. The prefix contains all information
|
token. This idea comes from lib2to3. The prefix contains all information
|
||||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||||
"""
|
"""
|
||||||
|
pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
|
||||||
|
_get_token_collection(version_int)
|
||||||
paren_level = 0 # count parentheses
|
paren_level = 0 # count parentheses
|
||||||
indents = [0]
|
indents = [0]
|
||||||
max = 0
|
max = 0
|
||||||
@@ -252,7 +279,7 @@ def tokenize_lines(lines):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
while pos < max:
|
while pos < max:
|
||||||
pseudomatch = pseudo_token_compiled.match(line, pos)
|
pseudomatch = pseudo_token.match(line, pos)
|
||||||
if not pseudomatch: # scan for tokens
|
if not pseudomatch: # scan for tokens
|
||||||
txt = line[pos:]
|
txt = line[pos:]
|
||||||
if txt.endswith('\n'):
|
if txt.endswith('\n'):
|
||||||
@@ -329,7 +356,7 @@ def tokenize_lines(lines):
|
|||||||
else: # ordinary string
|
else: # ordinary string
|
||||||
yield TokenInfo(STRING, token, spos, prefix)
|
yield TokenInfo(STRING, token, spos, prefix)
|
||||||
elif is_identifier(initial): # ordinary name
|
elif is_identifier(initial): # ordinary name
|
||||||
if token in ALWAYS_BREAK_TOKENS:
|
if token in always_break_tokens:
|
||||||
paren_level = 0
|
paren_level = 0
|
||||||
while True:
|
while True:
|
||||||
indent = indents.pop()
|
indent = indents.pop()
|
||||||
@@ -370,12 +397,16 @@ def tokenize_lines(lines):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
|
||||||
if len(sys.argv) >= 2:
|
if len(sys.argv) >= 2:
|
||||||
path = sys.argv[1]
|
path = sys.argv[1]
|
||||||
with open(path) as f:
|
with open(path) as f:
|
||||||
code = u(f.read())
|
code = f.read()
|
||||||
else:
|
else:
|
||||||
code = u(sys.stdin.read())
|
code = sys.stdin.read()
|
||||||
|
|
||||||
|
if isinstance(code, bytes):
|
||||||
|
from parso.utils import source_to_unicode
|
||||||
|
code = source_to_unicode(code)
|
||||||
|
|
||||||
for token in tokenize(code):
|
for token in tokenize(code):
|
||||||
print(token)
|
print(token)
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ def _parse_version(version):
|
|||||||
return int(major + minor)
|
return int(major + minor)
|
||||||
|
|
||||||
|
|
||||||
def version_string_to_int(version):
|
def version_string_to_int(version=None):
|
||||||
"""
|
"""
|
||||||
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
|
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
|
||||||
returns a corresponding int that is always two characters long in decimal.
|
returns a corresponding int that is always two characters long in decimal.
|
||||||
|
|||||||
@@ -65,7 +65,11 @@ class Differ(object):
|
|||||||
def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
|
def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
|
||||||
logging.debug('differ: parse copies=%s parsers=%s', copies, parsers)
|
logging.debug('differ: parse copies=%s parsers=%s', copies, parsers)
|
||||||
lines = splitlines(code, keepends=True)
|
lines = splitlines(code, keepends=True)
|
||||||
diff_parser = DiffParser(self.grammar._pgen_grammar, self.module)
|
diff_parser = DiffParser(
|
||||||
|
self.grammar._pgen_grammar,
|
||||||
|
self.grammar._tokenizer,
|
||||||
|
self.module,
|
||||||
|
)
|
||||||
new_module = diff_parser.update(self.lines, lines)
|
new_module = diff_parser.update(self.lines, lines)
|
||||||
self.lines = lines
|
self.lines = lines
|
||||||
assert code == new_module.get_code()
|
assert code == new_module.get_code()
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ tests of pydocstyle.
|
|||||||
|
|
||||||
import difflib
|
import difflib
|
||||||
import re
|
import re
|
||||||
from _compatibility import total_ordering
|
from test._compatibility import total_ordering
|
||||||
|
|
||||||
import parso
|
import parso
|
||||||
from parso.utils import source_to_unicode
|
from parso.utils import source_to_unicode
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from textwrap import dedent
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from parso._compatibility import py_version
|
from parso._compatibility import py_version
|
||||||
from parso.utils import splitlines
|
from parso.utils import splitlines, version_string_to_int
|
||||||
from parso.python.token import (
|
from parso.python.token import (
|
||||||
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER)
|
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER)
|
||||||
from parso.python import tokenize
|
from parso.python import tokenize
|
||||||
@@ -14,7 +14,9 @@ from parso.python.tokenize import TokenInfo
|
|||||||
|
|
||||||
|
|
||||||
def _get_token_list(string):
|
def _get_token_list(string):
|
||||||
return list(tokenize.tokenize(string))
|
# Load the current version.
|
||||||
|
version_int = version_string_to_int()
|
||||||
|
return list(tokenize.tokenize(string, version_int))
|
||||||
|
|
||||||
|
|
||||||
def test_end_pos_one_line():
|
def test_end_pos_one_line():
|
||||||
@@ -41,8 +43,7 @@ def test_end_pos_multi_line():
|
|||||||
def test_simple_no_whitespace():
|
def test_simple_no_whitespace():
|
||||||
# Test a simple one line string, no preceding whitespace
|
# Test a simple one line string, no preceding whitespace
|
||||||
simple_docstring = '"""simple one line docstring"""'
|
simple_docstring = '"""simple one line docstring"""'
|
||||||
tokens = tokenize.tokenize(simple_docstring)
|
token_list = _get_token_list(simple_docstring)
|
||||||
token_list = list(tokens)
|
|
||||||
_, value, _, prefix = token_list[0]
|
_, value, _, prefix = token_list[0]
|
||||||
assert prefix == ''
|
assert prefix == ''
|
||||||
assert value == '"""simple one line docstring"""'
|
assert value == '"""simple one line docstring"""'
|
||||||
@@ -51,8 +52,7 @@ def test_simple_no_whitespace():
|
|||||||
def test_simple_with_whitespace():
|
def test_simple_with_whitespace():
|
||||||
# Test a simple one line string with preceding whitespace and newline
|
# Test a simple one line string with preceding whitespace and newline
|
||||||
simple_docstring = ' """simple one line docstring""" \r\n'
|
simple_docstring = ' """simple one line docstring""" \r\n'
|
||||||
tokens = tokenize.tokenize(simple_docstring)
|
token_list = _get_token_list(simple_docstring)
|
||||||
token_list = list(tokens)
|
|
||||||
assert token_list[0][0] == INDENT
|
assert token_list[0][0] == INDENT
|
||||||
typ, value, start_pos, prefix = token_list[1]
|
typ, value, start_pos, prefix = token_list[1]
|
||||||
assert prefix == ' '
|
assert prefix == ' '
|
||||||
@@ -71,8 +71,7 @@ def test_function_whitespace():
|
|||||||
if x > 0:
|
if x > 0:
|
||||||
print(True)
|
print(True)
|
||||||
''')
|
''')
|
||||||
tokens = tokenize.tokenize(fundef)
|
token_list = _get_token_list(fundef)
|
||||||
token_list = list(tokens)
|
|
||||||
for _, value, _, prefix in token_list:
|
for _, value, _, prefix in token_list:
|
||||||
if value == 'test_whitespace':
|
if value == 'test_whitespace':
|
||||||
assert prefix == ' '
|
assert prefix == ' '
|
||||||
@@ -92,8 +91,7 @@ def test_tokenize_multiline_I():
|
|||||||
# Make sure multiline string having newlines have the end marker on the
|
# Make sure multiline string having newlines have the end marker on the
|
||||||
# next line
|
# next line
|
||||||
fundef = '''""""\n'''
|
fundef = '''""""\n'''
|
||||||
tokens = tokenize.tokenize(fundef)
|
token_list = _get_token_list(fundef)
|
||||||
token_list = list(tokens)
|
|
||||||
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
|
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
|
||||||
TokenInfo(ENDMARKER , '', (2, 0), '')]
|
TokenInfo(ENDMARKER , '', (2, 0), '')]
|
||||||
|
|
||||||
@@ -102,8 +100,7 @@ def test_tokenize_multiline_II():
|
|||||||
# Make sure multiline string having no newlines have the end marker on
|
# Make sure multiline string having no newlines have the end marker on
|
||||||
# same line
|
# same line
|
||||||
fundef = '''""""'''
|
fundef = '''""""'''
|
||||||
tokens = tokenize.tokenize(fundef)
|
token_list = _get_token_list(fundef)
|
||||||
token_list = list(tokens)
|
|
||||||
assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
|
assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
|
||||||
TokenInfo(ENDMARKER, '', (1, 4), '')]
|
TokenInfo(ENDMARKER, '', (1, 4), '')]
|
||||||
|
|
||||||
@@ -112,8 +109,7 @@ def test_tokenize_multiline_III():
|
|||||||
# Make sure multiline string having newlines have the end marker on the
|
# Make sure multiline string having newlines have the end marker on the
|
||||||
# next line even if several newline
|
# next line even if several newline
|
||||||
fundef = '''""""\n\n'''
|
fundef = '''""""\n\n'''
|
||||||
tokens = tokenize.tokenize(fundef)
|
token_list = _get_token_list(fundef)
|
||||||
token_list = list(tokens)
|
|
||||||
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
|
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
|
||||||
TokenInfo(ENDMARKER, '', (3, 0), '')]
|
TokenInfo(ENDMARKER, '', (3, 0), '')]
|
||||||
|
|
||||||
@@ -123,8 +119,7 @@ def test_identifier_contains_unicode():
|
|||||||
def 我あφ():
|
def 我あφ():
|
||||||
pass
|
pass
|
||||||
''')
|
''')
|
||||||
tokens = tokenize.tokenize(fundef)
|
token_list = _get_token_list(fundef)
|
||||||
token_list = list(tokens)
|
|
||||||
unicode_token = token_list[1]
|
unicode_token = token_list[1]
|
||||||
if py_version >= 30:
|
if py_version >= 30:
|
||||||
assert unicode_token[0] == NAME
|
assert unicode_token[0] == NAME
|
||||||
|
|||||||
Reference in New Issue
Block a user