28 Commits

Author SHA1 Message Date
Dave Halter
c0ace63a69 For Python 2.7 and 3.4 pytest 5 doesn't work anymore 2019-07-13 15:46:58 +02:00
Dave Halter
399e8e5043 Prepare the 0.5.1 release 2019-07-13 15:39:44 +02:00
Dave Halter
0a5b5f3346 Fix name tokenizing for Python 2 2019-07-13 15:34:23 +02:00
Dave Halter
2b8544021f Fix positioning for names that are interleaved with error tokens 2019-07-13 12:34:49 +02:00
Dave Halter
99dd4a84d4 Merge branch 'master' of github.com:davidhalter/parso 2019-07-12 21:35:06 +02:00
Dave Halter
9501b0bde0 Fixed name tokenizing issues for tamil characters, fixes davidhalter/jedi#1368 2019-07-12 21:31:49 +02:00
Benjamin Woodruff
ad57a51800 Fix line continuation characters inside f-strings
Line continuation characters are valid inside of strings, but weren't
handled correctly in certain cases with f-strings, due to some small
tokenizer bugs.

This pull request to address those issues, and adds tests to validate
the new logic.
2019-07-12 21:20:00 +02:00
Dave Halter
19de3eb5ca Document that the cache uses pickle files 2019-07-10 00:17:28 -07:00
Dave Halter
7441e6b1d2 Fix changelog dates, fixes #77 2019-06-28 02:00:35 -07:00
Dave Halter
df3c494e02 Try to use collections.abc.Mapping instead of collections.Mapping
The latter is deprecated and will be removed in Python 3.9, fixes #76
2019-06-21 10:17:18 +02:00
Dave Halter
59df3fab43 Some small changes to the changelog 2019-06-20 21:15:53 +02:00
Dave Halter
803cb5f25f Make parso work at least somewhat with an older Jedi version 2019-06-20 20:33:14 +02:00
Dave Halter
3fa8630ba9 Use an immutable map for used names, so that it can be use for hashing 2019-06-18 09:12:33 +02:00
Dave Halter
1ca5ae4008 Bump the version number to the next release: 0.5.0 2019-06-13 17:26:08 +02:00
Dave Halter
c3c16169b5 Ignore positional only arguments slash when listing params 2019-06-09 22:55:37 +02:00
Dave Halter
ecbe2b9926 Add positional only arguments to grammar 2019-06-09 21:15:03 +02:00
Dave Halter
1929c144dc Increate the _PICKLE_VERSION to avoid issues with the latest breaking change 2019-06-09 18:11:21 +02:00
Dave Halter
b5d50392a4 comp_for is now called sync_comp_for for all Python versions to be compatible with the Python 3.8 Grammar 2019-06-09 18:00:32 +02:00
Dave Halter
a7aa23a7f0 Parse named expressions 2019-06-02 23:34:37 +02:00
Dave Halter
5430415d44 Change a test, because it doesn't really matter
The test had changed behavior for Python 3.8, a syntax error of:

SyntaxError: unexpected EOF while parsing

instead of

SyntaxError: invalid syntax
2019-06-02 22:54:45 +02:00
Dave Halter
6cdd47fe2b f-string syntax in Python 3.8 was enhanced
See e.g. https://twitter.com/raymondh/status/1135253771846471680
2019-06-02 22:48:47 +02:00
Dave Halter
917b4421f3 Fix fstring format spec parsing, fixes #74 2019-06-02 15:18:42 +02:00
Dave Halter
4f5fdd5a70 Add release notes for the next release 0.4.1 2019-06-02 11:28:00 +02:00
prim
93ddf5322a parse long number notation (#72)
* parse long number notation

* parse long number notation
2019-06-02 11:14:15 +02:00
Dave Halter
a9b61149eb Fix get_decorators for async functions 2019-05-27 01:08:42 +02:00
Dave Halter
de416b082e Make it clear that get_last_modified should not raise an exception, but return None, if it cannot look up a file 2019-05-22 00:16:26 +02:00
Carl Meyer
4b440159b1 Fix __init__.pyi re-exports. 2019-05-10 09:12:32 +02:00
Carl Meyer
6f2d2362c9 Add type stubs. 2019-05-10 09:12:32 +02:00
33 changed files with 606 additions and 126 deletions

View File

@@ -3,25 +3,39 @@
Changelog Changelog
--------- ---------
0.5.1 (2019-07-13)
++++++++++++++++++
- Fix: Some unicode identifiers were not correctly tokenized
- Fix: Line continuations in f-strings are now working
0.5.0 (2019-06-20)
++++++++++++++++++
- **Breaking Change** comp_for is now called sync_comp_for for all Python
versions to be compatible with the Python 3.8 Grammar
- Added .pyi stubs for a lot of the parso API
- Small FileIO changes
0.4.0 (2019-04-05) 0.4.0 (2019-04-05)
++++++++++++++++++ ++++++++++++++++++
- Python 3.8 support - Python 3.8 support
- FileIO support, it's now possible to use abstract file IO, support is alpha - FileIO support, it's now possible to use abstract file IO, support is alpha
0.3.4 (2018-02-13) 0.3.4 (2019-02-13)
+++++++++++++++++++ +++++++++++++++++++
- Fix an f-string tokenizer error - Fix an f-string tokenizer error
0.3.3 (2018-02-06) 0.3.3 (2019-02-06)
+++++++++++++++++++ +++++++++++++++++++
- Fix async errors in the diff parser - Fix async errors in the diff parser
- A fix in iter_errors - A fix in iter_errors
- This is a very small bugfix release - This is a very small bugfix release
0.3.2 (2018-01-24) 0.3.2 (2019-01-24)
+++++++++++++++++++ +++++++++++++++++++
- 20+ bugfixes in the diff parser and 3 in the tokenizer - 20+ bugfixes in the diff parser and 3 in the tokenizer

View File

@@ -14,7 +14,7 @@ from parso.utils import parse_version_string
collect_ignore = ["setup.py"] collect_ignore = ["setup.py"]
VERSIONS_2 = '2.6', '2.7' VERSIONS_2 = '2.6', '2.7'
VERSIONS_3 = '3.3', '3.4', '3.5', '3.6', '3.7' VERSIONS_3 = '3.3', '3.4', '3.5', '3.6', '3.7', '3.8'
@pytest.fixture(scope='session') @pytest.fixture(scope='session')
@@ -155,3 +155,9 @@ def works_ge_py3(each_version):
def works_ge_py35(each_version): def works_ge_py35(each_version):
version_info = parse_version_string(each_version) version_info = parse_version_string(each_version)
return Checker(each_version, version_info >= (3, 5)) return Checker(each_version, version_info >= (3, 5))
@pytest.fixture
def works_ge_py38(each_version):
version_info = parse_version_string(each_version)
return Checker(each_version, version_info >= (3, 8))

View File

@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
from parso.utils import split_lines, python_bytes_to_unicode from parso.utils import split_lines, python_bytes_to_unicode
__version__ = '0.4.0' __version__ = '0.5.1'
def parse(code=None, **kwargs): def parse(code=None, **kwargs):

19
parso/__init__.pyi Normal file
View File

@@ -0,0 +1,19 @@
from typing import Any, Optional, Union
from parso.grammar import Grammar as Grammar, load_grammar as load_grammar
from parso.parser import ParserSyntaxError as ParserSyntaxError
from parso.utils import python_bytes_to_unicode as python_bytes_to_unicode, split_lines as split_lines
__version__: str = ...
def parse(
code: Optional[Union[str, bytes]],
*,
version: Optional[str] = None,
error_recovery: bool = True,
path: Optional[str] = None,
start_symbol: Optional[str] = None,
cache: bool = False,
diff_cache: bool = False,
cache_path: Optional[str] = None,
) -> Any: ...

View File

@@ -18,7 +18,7 @@ from parso._compatibility import FileNotFoundError
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
_PICKLE_VERSION = 31 _PICKLE_VERSION = 32
""" """
Version number (integer) for file system cache. Version number (integer) for file system cache.
@@ -82,9 +82,8 @@ def load_module(hashed_grammar, file_io, cache_path=None):
""" """
Returns a module or None, if it fails. Returns a module or None, if it fails.
""" """
try: p_time = file_io.get_last_modified()
p_time = file_io.get_last_modified() if p_time is None:
except FileNotFoundError:
return None return None
try: try:

View File

@@ -14,10 +14,13 @@ class FileIO(object):
def get_last_modified(self): def get_last_modified(self):
""" """
Returns float - timestamp Returns float - timestamp or None, if path doesn't exist.
Might raise FileNotFoundError
""" """
return os.path.getmtime(self.path) try:
return os.path.getmtime(self.path)
except OSError:
# Might raise FileNotFoundError, OSError for Python 2
return None
def __repr__(self): def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, self.path) return '%s(%s)' % (self.__class__.__name__, self.path)

View File

@@ -57,7 +57,8 @@ class Grammar(object):
:param str path: The path to the file you want to open. Only needed for caching. :param str path: The path to the file you want to open. Only needed for caching.
:param bool cache: Keeps a copy of the parser tree in RAM and on disk :param bool cache: Keeps a copy of the parser tree in RAM and on disk
if a path is given. Returns the cached trees if the corresponding if a path is given. Returns the cached trees if the corresponding
files on disk have not changed. files on disk have not changed. Note that this stores pickle files
on your file system (e.g. for Linux in ``~/.cache/parso/``).
:param bool diff_cache: Diffs the cached python module against the new :param bool diff_cache: Diffs the cached python module against the new
code and tries to parse only the parts that have changed. Returns code and tries to parse only the parts that have changed. Returns
the same (changed) module that is found in cache. Using this option the same (changed) module that is found in cache. Using this option

38
parso/grammar.pyi Normal file
View File

@@ -0,0 +1,38 @@
from typing import Any, Callable, Generic, Optional, Sequence, TypeVar, Union
from typing_extensions import Literal
from parso.utils import PythonVersionInfo
_Token = Any
_NodeT = TypeVar("_NodeT")
class Grammar(Generic[_NodeT]):
_default_normalizer_config: Optional[Any] = ...
_error_normalizer_config: Optional[Any] = None
_start_nonterminal: str = ...
_token_namespace: Optional[str] = None
def __init__(
self,
text: str,
tokenizer: Callable[[Sequence[str], int], Sequence[_Token]],
parser: Any = ...,
diff_parser: Any = ...,
) -> None: ...
def parse(
self,
code: Union[str, bytes] = ...,
error_recovery: bool = ...,
path: Optional[str] = ...,
start_symbol: Optional[str] = ...,
cache: bool = ...,
diff_cache: bool = ...,
cache_path: Optional[str] = ...,
) -> _NodeT: ...
class PythonGrammar(Grammar):
version_info: PythonVersionInfo
def __init__(self, version_info: PythonVersionInfo, bnf_text: str) -> None: ...
def load_grammar(
language: Literal["python"] = "python", version: Optional[str] = ..., path: str = ...
) -> Grammar: ...

View File

@@ -41,8 +41,8 @@ class Normalizer(use_metaclass(_NormalizerMeta)):
except AttributeError: except AttributeError:
return self.visit_leaf(node) return self.visit_leaf(node)
else: else:
with self.visit_node(node): with self.visit_node(node):
return ''.join(self.visit(child) for child in children) return ''.join(self.visit(child) for child in children)
@contextmanager @contextmanager
def visit_node(self, node): def visit_node(self, node):
@@ -147,7 +147,6 @@ class Issue(object):
return '<%s: %s>' % (self.__class__.__name__, self.code) return '<%s: %s>' % (self.__class__.__name__, self.code)
class Rule(object): class Rule(object):
code = None code = None
message = None message = None

1
parso/pgen2/__init__.pyi Normal file
View File

@@ -0,0 +1 @@
from parso.pgen2.generator import generate_grammar as generate_grammar

38
parso/pgen2/generator.pyi Normal file
View File

@@ -0,0 +1,38 @@
from typing import Any, Generic, Mapping, Sequence, Set, TypeVar, Union
from parso.pgen2.grammar_parser import NFAState
_TokenTypeT = TypeVar("_TokenTypeT")
class Grammar(Generic[_TokenTypeT]):
nonterminal_to_dfas: Mapping[str, Sequence[DFAState[_TokenTypeT]]]
reserved_syntax_strings: Mapping[str, ReservedString]
start_nonterminal: str
def __init__(
self,
start_nonterminal: str,
rule_to_dfas: Mapping[str, Sequence[DFAState]],
reserved_syntax_strings: Mapping[str, ReservedString],
) -> None: ...
class DFAPlan:
next_dfa: DFAState
dfa_pushes: Sequence[DFAState]
class DFAState(Generic[_TokenTypeT]):
from_rule: str
nfa_set: Set[NFAState]
is_final: bool
arcs: Mapping[str, DFAState] # map from all terminals/nonterminals to DFAState
nonterminal_arcs: Mapping[str, DFAState]
transitions: Mapping[Union[_TokenTypeT, ReservedString], DFAPlan]
def __init__(
self, from_rule: str, nfa_set: Set[NFAState], final: NFAState
) -> None: ...
class ReservedString:
value: str
def __init__(self, value: str) -> None: ...
def __repr__(self) -> str: ...
def generate_grammar(bnf_grammar: str, token_namespace: Any) -> Grammar[Any]: ...

View File

@@ -0,0 +1,20 @@
from typing import Generator, List, Optional, Tuple
from parso.python.token import TokenType
class GrammarParser:
generator: Generator[TokenType, None, None]
def __init__(self, bnf_grammar: str) -> None: ...
def parse(self) -> Generator[Tuple[NFAState, NFAState], None, None]: ...
class NFAArc:
next: NFAState
nonterminal_or_string: Optional[str]
def __init__(
self, next_: NFAState, nonterminal_or_string: Optional[str]
) -> None: ...
class NFAState:
from_rule: str
arcs: List[NFAArc]
def __init__(self, from_rule: str) -> None: ...

View File

@@ -953,20 +953,17 @@ class _CheckAssignmentRule(SyntaxRule):
self.add_issue(node, message=message) self.add_issue(node, message=message)
@ErrorFinder.register_rule(type='comp_for')
@ErrorFinder.register_rule(type='sync_comp_for') @ErrorFinder.register_rule(type='sync_comp_for')
class _CompForRule(_CheckAssignmentRule): class _CompForRule(_CheckAssignmentRule):
message = "asynchronous comprehension outside of an asynchronous function" message = "asynchronous comprehension outside of an asynchronous function"
def is_issue(self, node): def is_issue(self, node):
# Some of the nodes here are already used, so no else if expr_list = node.children[1]
if node.type != 'comp_for' or self._normalizer.version < (3, 8): print(expr_list)
# comp_for was replaced by sync_comp_for in Python 3.8. if expr_list.type != 'expr_list': # Already handled.
expr_list = node.children[1 + int(node.children[0] == 'async')] self._check_assignment(expr_list)
if expr_list.type != 'expr_list': # Already handled.
self._check_assignment(expr_list)
return node.children[0] == 'async' \ return node.parent.children[0] == 'async' \
and not self._normalizer.context.is_async_funcdef() and not self._normalizer.context.is_async_funcdef()

View File

@@ -107,7 +107,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
NAME | NUMBER | strings) NAME | NUMBER | strings)
strings: STRING+ strings: STRING+
listmaker: test ( list_for | (',' test)* [','] ) listmaker: test ( list_for | (',' test)* [','] )
testlist_comp: test ( comp_for | (',' test)* [','] ) testlist_comp: test ( sync_comp_for | (',' test)* [','] )
lambdef: 'lambda' [varargslist] ':' test lambdef: 'lambda' [varargslist] ':' test
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [','] subscriptlist: subscript (',' subscript)* [',']
@@ -115,8 +115,8 @@ subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
sliceop: ':' [test] sliceop: ':' [test]
exprlist: expr (',' expr)* [','] exprlist: expr (',' expr)* [',']
testlist: test (',' test)* [','] testlist: test (',' test)* [',']
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
(test (comp_for | (',' test)* [','])) ) (test (sync_comp_for | (',' test)* [','])) )
classdef: 'class' NAME ['(' [testlist] ')'] ':' suite classdef: 'class' NAME ['(' [testlist] ')'] ':' suite
@@ -125,14 +125,14 @@ arglist: (argument ',')* (argument [',']
|'**' test) |'**' test)
# The reason that keywords are test nodes instead of NAME is that using NAME # The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME. # results in an ambiguity. ast.c makes sure it's a NAME.
argument: test [comp_for] | test '=' test argument: test [sync_comp_for] | test '=' test
list_iter: list_for | list_if list_iter: list_for | list_if
list_for: 'for' exprlist 'in' testlist_safe [list_iter] list_for: 'for' exprlist 'in' testlist_safe [list_iter]
list_if: 'if' old_test [list_iter] list_if: 'if' old_test [list_iter]
comp_iter: comp_for | comp_if comp_iter: sync_comp_for | comp_if
comp_for: 'for' exprlist 'in' or_test [comp_iter] sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' old_test [comp_iter] comp_if: 'if' old_test [comp_iter]
testlist1: test (',' test)* testlist1: test (',' test)*

View File

@@ -105,15 +105,15 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
'{' [dictorsetmaker] '}' | '{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
strings: STRING+ strings: STRING+
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [','] subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop] subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test] sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [','] testlist: test (',' test)* [',']
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
(test (comp_for | (',' test)* [','])) ) (test (sync_comp_for | (',' test)* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
@@ -122,9 +122,9 @@ arglist: (argument ',')* (argument [',']
|'**' test) |'**' test)
# The reason that keywords are test nodes instead of NAME is that using NAME # The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME. # results in an ambiguity. ast.c makes sure it's a NAME.
argument: test [comp_for] | test '=' test # Really [keyword '='] test argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test
comp_iter: comp_for | comp_if comp_iter: sync_comp_for | comp_if
comp_for: 'for' exprlist 'in' or_test [comp_iter] sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' test_nocond [comp_iter] comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler # not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@@ -105,15 +105,15 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
'{' [dictorsetmaker] '}' | '{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
strings: STRING+ strings: STRING+
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [','] subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop] subscript: test | [test] ':' [test] [sliceop]
sliceop: ':' [test] sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [','] testlist: test (',' test)* [',']
dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | dictorsetmaker: ( (test ':' test (sync_comp_for | (',' test ':' test)* [','])) |
(test (comp_for | (',' test)* [','])) ) (test (sync_comp_for | (',' test)* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
@@ -122,9 +122,9 @@ arglist: (argument ',')* (argument [',']
|'**' test) |'**' test)
# The reason that keywords are test nodes instead of NAME is that using NAME # The reason that keywords are test nodes instead of NAME is that using NAME
# results in an ambiguity. ast.c makes sure it's a NAME. # results in an ambiguity. ast.c makes sure it's a NAME.
argument: test [comp_for] | test '=' test # Really [keyword '='] test argument: test [sync_comp_for] | test '=' test # Really [keyword '='] test
comp_iter: comp_for | comp_if comp_iter: sync_comp_for | comp_if
comp_for: 'for' exprlist 'in' or_test [comp_iter] sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' test_nocond [comp_iter] comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler # not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@@ -112,7 +112,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
'{' [dictorsetmaker] '}' | '{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
strings: STRING+ strings: STRING+
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) testlist_comp: (test|star_expr) ( sync_comp_for | (',' (test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [','] subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop] subscript: test | [test] ':' [test] [sliceop]
@@ -120,9 +120,9 @@ sliceop: ':' [test]
exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
testlist: test (',' test)* [','] testlist: test (',' test)* [',']
dictorsetmaker: ( ((test ':' test | '**' expr) dictorsetmaker: ( ((test ':' test | '**' expr)
(comp_for | (',' (test ':' test | '**' expr))* [','])) | (sync_comp_for | (',' (test ':' test | '**' expr))* [','])) |
((test | star_expr) ((test | star_expr)
(comp_for | (',' (test | star_expr))* [','])) ) (sync_comp_for | (',' (test | star_expr))* [','])) )
classdef: 'class' NAME ['(' [arglist] ')'] ':' suite classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
@@ -137,13 +137,13 @@ arglist: argument (',' argument)* [',']
# Illegal combinations and orderings are blocked in ast.c: # Illegal combinations and orderings are blocked in ast.c:
# multiple (test comp_for) arguments are blocked; keyword unpackings # multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc. # that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] | argument: ( test [sync_comp_for] |
test '=' test | test '=' test |
'**' test | '**' test |
'*' test ) '*' test )
comp_iter: comp_for | comp_if comp_iter: sync_comp_for | comp_if
comp_for: 'for' exprlist 'in' or_test [comp_iter] sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_if: 'if' test_nocond [comp_iter] comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler # not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@@ -140,7 +140,8 @@ argument: ( test [comp_for] |
'*' test ) '*' test )
comp_iter: comp_for | comp_if comp_iter: comp_for | comp_if
comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_for: ['async'] sync_comp_for
comp_if: 'if' test_nocond [comp_iter] comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler # not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@@ -138,7 +138,8 @@ argument: ( test [comp_for] |
'*' test ) '*' test )
comp_iter: comp_for | comp_if comp_iter: comp_for | comp_if
comp_for: ['async'] 'for' exprlist 'in' or_test [comp_iter] sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
comp_for: ['async'] sync_comp_for
comp_if: 'if' test_nocond [comp_iter] comp_if: 'if' test_nocond [comp_iter]
# not used in grammar, but may appear in "node" passed from Parser to Compiler # not used in grammar, but may appear in "node" passed from Parser to Compiler

View File

@@ -20,13 +20,25 @@ async_funcdef: 'async' funcdef
funcdef: 'def' NAME parameters ['->' test] ':' suite funcdef: 'def' NAME parameters ['->' test] ':' suite
parameters: '(' [typedargslist] ')' parameters: '(' [typedargslist] ')'
typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ typedargslist: (
(tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
',' tfpdef ['=' test])* ([',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]])
| '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
| '**' tfpdef [',']]] )
| (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
'*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']]] | '**' tfpdef [',']]]
| '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
| '**' tfpdef [',']) | '**' tfpdef [','])
)
tfpdef: NAME [':' test] tfpdef: NAME [':' test]
varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
'*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
| '**' vfpdef [',']]] | '**' vfpdef [',']]]
| '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
@@ -69,8 +81,8 @@ assert_stmt: 'assert' test [',' test]
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
async_stmt: 'async' (funcdef | with_stmt | for_stmt) async_stmt: 'async' (funcdef | with_stmt | for_stmt)
if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' test ':' suite ['else' ':' suite] while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
try_stmt: ('try' ':' suite try_stmt: ('try' ':' suite
((except_clause ':' suite)+ ((except_clause ':' suite)+
@@ -83,6 +95,7 @@ with_item: test ['as' expr]
except_clause: 'except' [test ['as' NAME]] except_clause: 'except' [test ['as' NAME]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
namedexpr_test: test [':=' test]
test: or_test ['if' or_test 'else' test] | lambdef test: or_test ['if' or_test 'else' test] | lambdef
test_nocond: or_test | lambdef_nocond test_nocond: or_test | lambdef_nocond
lambdef: 'lambda' [varargslist] ':' test lambdef: 'lambda' [varargslist] ':' test
@@ -108,7 +121,7 @@ atom: ('(' [yield_expr|testlist_comp] ')' |
'[' [testlist_comp] ']' | '[' [testlist_comp] ']' |
'{' [dictorsetmaker] '}' | '{' [dictorsetmaker] '}' |
NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False') NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
subscriptlist: subscript (',' subscript)* [','] subscriptlist: subscript (',' subscript)* [',']
subscript: test | [test] ':' [test] [sliceop] subscript: test | [test] ':' [test] [sliceop]
@@ -134,6 +147,7 @@ arglist: argument (',' argument)* [',']
# multiple (test comp_for) arguments are blocked; keyword unpackings # multiple (test comp_for) arguments are blocked; keyword unpackings
# that precede iterable unpackings are blocked; etc. # that precede iterable unpackings are blocked; etc.
argument: ( test [comp_for] | argument: ( test [comp_for] |
test ':=' test |
test '=' test | test '=' test |
'**' test | '**' test |
'*' test ) '*' test )
@@ -153,5 +167,5 @@ strings: (STRING | fstring)+
fstring: FSTRING_START fstring_content* FSTRING_END fstring: FSTRING_START fstring_content* FSTRING_END
fstring_content: FSTRING_STRING | fstring_expr fstring_content: FSTRING_STRING | fstring_expr
fstring_conversion: '!' NAME fstring_conversion: '!' NAME
fstring_expr: '{' testlist [ fstring_conversion ] [ fstring_format_spec ] '}' fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
fstring_format_spec: ':' fstring_content* fstring_format_spec: ':' fstring_content*

View File

@@ -39,13 +39,13 @@ class Parser(BaseParser):
'for_stmt': tree.ForStmt, 'for_stmt': tree.ForStmt,
'while_stmt': tree.WhileStmt, 'while_stmt': tree.WhileStmt,
'try_stmt': tree.TryStmt, 'try_stmt': tree.TryStmt,
'comp_for': tree.CompFor, 'sync_comp_for': tree.SyncCompFor,
# Not sure if this is the best idea, but IMO it's the easiest way to # Not sure if this is the best idea, but IMO it's the easiest way to
# avoid extreme amounts of work around the subtle difference of 2/3 # avoid extreme amounts of work around the subtle difference of 2/3
# grammar in list comoprehensions. # grammar in list comoprehensions.
'list_for': tree.CompFor, 'list_for': tree.SyncCompFor,
# Same here. This just exists in Python 2.6. # Same here. This just exists in Python 2.6.
'gen_for': tree.CompFor, 'gen_for': tree.SyncCompFor,
'decorator': tree.Decorator, 'decorator': tree.Decorator,
'lambdef': tree.Lambda, 'lambdef': tree.Lambda,
'old_lambdef': tree.Lambda, 'old_lambdef': tree.Lambda,

30
parso/python/token.pyi Normal file
View File

@@ -0,0 +1,30 @@
from typing import Container, Iterable
class TokenType:
name: str
contains_syntax: bool
def __init__(self, name: str, contains_syntax: bool) -> None: ...
class TokenTypes:
def __init__(
self, names: Iterable[str], contains_syntax: Container[str]
) -> None: ...
# not an actual class in the source code, but we need this class to type the fields of
# PythonTokenTypes
class _FakePythonTokenTypesClass(TokenTypes):
STRING: TokenType
NUMBER: TokenType
NAME: TokenType
ERRORTOKEN: TokenType
NEWLINE: TokenType
INDENT: TokenType
DEDENT: TokenType
ERROR_DEDENT: TokenType
FSTRING_STRING: TokenType
FSTRING_START: TokenType
FSTRING_END: TokenType
OP: TokenType
ENDMARKER: TokenType
PythonTokenTypes: _FakePythonTokenTypesClass = ...

View File

@@ -23,6 +23,9 @@ from parso._compatibility import py_version
from parso.utils import split_lines from parso.utils import split_lines
# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
MAX_UNICODE = '\U0010ffff'
STRING = PythonTokenTypes.STRING STRING = PythonTokenTypes.STRING
NAME = PythonTokenTypes.NAME NAME = PythonTokenTypes.NAME
NUMBER = PythonTokenTypes.NUMBER NUMBER = PythonTokenTypes.NUMBER
@@ -51,8 +54,13 @@ if py_version >= 30:
# Python 3 has str.isidentifier() to check if a char is a valid identifier # Python 3 has str.isidentifier() to check if a char is a valid identifier
is_identifier = str.isidentifier is_identifier = str.isidentifier
else: else:
namechars = string.ascii_letters + '_' # Python 2 doesn't, but it's not that important anymore and if you tokenize
is_identifier = lambda s: s in namechars # Python 2 code with this, it's still ok. It's just that parsing Python 3
# code with this function is not 100% correct.
# This just means that Python 2 code matches a few identifiers too much,
# but that doesn't really matter.
def is_identifier(s):
return True
def group(*choices, **kwargs): def group(*choices, **kwargs):
@@ -118,8 +126,10 @@ def _get_token_collection(version_info):
return result return result
fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+') fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+')
fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
fstring_format_spec_multi_line = _compile(r'[^{}]+')
def _create_token_collection(version_info): def _create_token_collection(version_info):
@@ -128,7 +138,16 @@ def _create_token_collection(version_info):
Whitespace = r'[ \f\t]*' Whitespace = r'[ \f\t]*'
whitespace = _compile(Whitespace) whitespace = _compile(Whitespace)
Comment = r'#[^\r\n]*' Comment = r'#[^\r\n]*'
Name = r'\w+' # Python 2 is pretty much not working properly anymore, we just ignore
# parsing unicode properly, which is fine, I guess.
if version_info[0] == 2:
Name = r'([A-Za-z_0-9]+)'
elif sys.version_info[0] == 2:
# Unfortunately the regex engine cannot deal with the regex below, so
# just use this one.
Name = r'(\w+)'
else:
Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'
if version_info >= (3, 6): if version_info >= (3, 6):
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
@@ -151,6 +170,8 @@ def _create_token_collection(version_info):
Octnumber = '0[oO]?[0-7]+' Octnumber = '0[oO]?[0-7]+'
Decnumber = r'(?:0+|[1-9][0-9]*)' Decnumber = r'(?:0+|[1-9][0-9]*)'
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber) Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
if version_info[0] < 3:
Intnumber += '[lL]?'
Exponent = r'[eE][-+]?[0-9]+' Exponent = r'[eE][-+]?[0-9]+'
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent) Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
Expfloat = r'[0-9]+' + Exponent Expfloat = r'[0-9]+' + Exponent
@@ -186,9 +207,13 @@ def _create_token_collection(version_info):
Bracket = '[][(){}]' Bracket = '[][(){}]'
special_args = [r'\r\n?', r'\n', r'[:;.,@]'] special_args = [r'\r\n?', r'\n', r'[;.,@]']
if version_info >= (3, 0): if version_info >= (3, 0):
special_args.insert(0, r'\.\.\.') special_args.insert(0, r'\.\.\.')
if version_info >= (3, 8):
special_args.insert(0, ":=?")
else:
special_args.insert(0, ":")
Special = group(*special_args) Special = group(*special_args)
Funny = group(Operator, Bracket, Special) Funny = group(Operator, Bracket, Special)
@@ -281,7 +306,10 @@ class FStringNode(object):
return len(self.quote) == 3 return len(self.quote) == 3
def is_in_expr(self): def is_in_expr(self):
return (self.parentheses_count - self.format_spec_count) > 0 return self.parentheses_count > self.format_spec_count
def is_in_format_spec(self):
return not self.is_in_expr() and self.format_spec_count
def _close_fstring_if_necessary(fstring_stack, string, start_pos, additional_prefix): def _close_fstring_if_necessary(fstring_stack, string, start_pos, additional_prefix):
@@ -303,10 +331,18 @@ def _close_fstring_if_necessary(fstring_stack, string, start_pos, additional_pre
def _find_fstring_string(endpats, fstring_stack, line, lnum, pos): def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
tos = fstring_stack[-1] tos = fstring_stack[-1]
allow_multiline = tos.allow_multiline() allow_multiline = tos.allow_multiline()
if allow_multiline: if tos.is_in_format_spec():
match = fstring_string_multi_line.match(line, pos) if allow_multiline:
regex = fstring_format_spec_multi_line
else:
regex = fstring_format_spec_single_line
else: else:
match = fstring_string_single_line.match(line, pos) if allow_multiline:
regex = fstring_string_multi_line
else:
regex = fstring_string_single_line
match = regex.match(line, pos)
if match is None: if match is None:
return tos.previous_lines, pos return tos.previous_lines, pos
@@ -321,7 +357,9 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):
new_pos = pos new_pos = pos
new_pos += len(string) new_pos += len(string)
if allow_multiline and (string.endswith('\n') or string.endswith('\r')): # even if allow_multiline is False, we still need to check for trailing
# newlines, because a single-line f-string can contain line continuations
if string.endswith('\n') or string.endswith('\r'):
tos.previous_lines += string tos.previous_lines += string
string = '' string = ''
else: else:
@@ -491,6 +529,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if (initial in numchars or # ordinary number if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')): (initial == '.' and token != '.' and token != '...')):
yield PythonToken(NUMBER, token, spos, prefix) yield PythonToken(NUMBER, token, spos, prefix)
elif pseudomatch.group(3) is not None: # ordinary name
if token in always_break_tokens:
fstring_stack[:] = []
paren_level = 0
# We only want to dedent if the token is on a new line.
if re.match(r'[ \f\t]*$', line[:start]):
while True:
indent = indents.pop()
if indent > start:
yield PythonToken(DEDENT, '', spos, '')
else:
indents.append(indent)
break
if is_identifier(token):
yield PythonToken(NAME, token, spos, prefix)
else:
for t in _split_illegal_unicode_name(token, spos, prefix):
yield t # yield from Python 2
elif initial in '\r\n': elif initial in '\r\n':
if any(not f.allow_multiline() for f in fstring_stack): if any(not f.allow_multiline() for f in fstring_stack):
# Would use fstring_stack.clear, but that's not available # Would use fstring_stack.clear, but that's not available
@@ -545,20 +601,6 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
elif token in fstring_pattern_map: # The start of an fstring. elif token in fstring_pattern_map: # The start of an fstring.
fstring_stack.append(FStringNode(fstring_pattern_map[token])) fstring_stack.append(FStringNode(fstring_pattern_map[token]))
yield PythonToken(FSTRING_START, token, spos, prefix) yield PythonToken(FSTRING_START, token, spos, prefix)
elif is_identifier(initial): # ordinary name
if token in always_break_tokens:
fstring_stack[:] = []
paren_level = 0
# We only want to dedent if the token is on a new line.
if re.match(r'[ \f\t]*$', line[:start]):
while True:
indent = indents.pop()
if indent > start:
yield PythonToken(DEDENT, '', spos, '')
else:
indents.append(indent)
break
yield PythonToken(NAME, token, spos, prefix)
elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt
additional_prefix += prefix + line[start:] additional_prefix += prefix + line[start:]
break break
@@ -575,7 +617,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
if paren_level: if paren_level:
paren_level -= 1 paren_level -= 1
elif token == ':' and fstring_stack \ elif token == ':' and fstring_stack \
and fstring_stack[-1].parentheses_count == 1: and fstring_stack[-1].parentheses_count \
- fstring_stack[-1].format_spec_count == 1:
fstring_stack[-1].format_spec_count += 1 fstring_stack[-1].format_spec_count += 1
yield PythonToken(OP, token, spos, prefix) yield PythonToken(OP, token, spos, prefix)
@@ -593,6 +636,39 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)
def _split_illegal_unicode_name(token, start_pos, prefix):
def create_token():
return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix)
found = ''
is_illegal = False
pos = start_pos
for i, char in enumerate(token):
if is_illegal:
if is_identifier(char):
yield create_token()
found = char
is_illegal = False
prefix = ''
pos = start_pos[0], start_pos[1] + i
else:
found += char
else:
new_found = found + char
if is_identifier(new_found):
found = new_found
else:
if found:
yield create_token()
prefix = ''
pos = start_pos[0], start_pos[1] + i
found = char
is_illegal = True
if found:
yield create_token()
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) >= 2: if len(sys.argv) >= 2:
path = sys.argv[1] path = sys.argv[1]

24
parso/python/tokenize.pyi Normal file
View File

@@ -0,0 +1,24 @@
from typing import Generator, Iterable, NamedTuple, Tuple
from parso.python.token import TokenType
from parso.utils import PythonVersionInfo
class Token(NamedTuple):
type: TokenType
string: str
start_pos: Tuple[int, int]
prefix: str
@property
def end_pos(self) -> Tuple[int, int]: ...
class PythonToken(Token):
def __repr__(self) -> str: ...
def tokenize(
code: str, version_info: PythonVersionInfo, start_pos: Tuple[int, int] = (1, 0)
) -> Generator[PythonToken, None, None]: ...
def tokenize_lines(
lines: Iterable[str],
version_info: PythonVersionInfo,
start_pos: Tuple[int, int] = (1, 0),
) -> Generator[PythonToken, None, None]: ...

View File

@@ -43,6 +43,10 @@ Parser Tree Classes
""" """
import re import re
try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
from parso._compatibility import utf8_repr, unicode from parso._compatibility import utf8_repr, unicode
from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
@@ -55,7 +59,7 @@ _FLOW_CONTAINERS = set(['if_stmt', 'while_stmt', 'for_stmt', 'try_stmt',
_RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS _RETURN_STMT_CONTAINERS = set(['suite', 'simple_stmt']) | _FLOW_CONTAINERS
_FUNC_CONTAINERS = set(['suite', 'simple_stmt', 'decorated']) | _FLOW_CONTAINERS _FUNC_CONTAINERS = set(['suite', 'simple_stmt', 'decorated']) | _FLOW_CONTAINERS
_GET_DEFINITION_TYPES = set([ _GET_DEFINITION_TYPES = set([
'expr_stmt', 'comp_for', 'with_stmt', 'for_stmt', 'import_name', 'expr_stmt', 'sync_comp_for', 'with_stmt', 'for_stmt', 'import_name',
'import_from', 'param' 'import_from', 'param'
]) ])
_IMPORTS = set(['import_name', 'import_from']) _IMPORTS = set(['import_name', 'import_from'])
@@ -442,7 +446,7 @@ class Module(Scope):
recurse(child) recurse(child)
recurse(self) recurse(self)
self._used_names = dct self._used_names = UsedNamesMapping(dct)
return self._used_names return self._used_names
@@ -466,6 +470,9 @@ class ClassOrFunc(Scope):
:rtype: list of :class:`Decorator` :rtype: list of :class:`Decorator`
""" """
decorated = self.parent decorated = self.parent
if decorated.type == 'async_funcdef':
decorated = decorated.parent
if decorated.type == 'decorated': if decorated.type == 'decorated':
if decorated.children[0].type == 'decorators': if decorated.children[0].type == 'decorators':
return decorated.children[0].children return decorated.children[0].children
@@ -545,7 +552,8 @@ def _create_params(parent, argslist_list):
if param_children[0] == '*' \ if param_children[0] == '*' \
and (len(param_children) == 1 and (len(param_children) == 1
or param_children[1] == ',') \ or param_children[1] == ',') \
or check_python2_nested_param(param_children[0]): or check_python2_nested_param(param_children[0]) \
or param_children[0] == '/':
for p in param_children: for p in param_children:
p.parent = parent p.parent = parent
new_children += param_children new_children += param_children
@@ -1158,6 +1166,13 @@ class Param(PythonBaseNode):
index -= 2 index -= 2
except ValueError: except ValueError:
pass pass
try:
keyword_only_index = self.parent.children.index('/')
if index > keyword_only_index:
# Skip the ` /, `
index -= 2
except ValueError:
pass
return index - 1 return index - 1
def get_parent_function(self): def get_parent_function(self):
@@ -1189,8 +1204,8 @@ class Param(PythonBaseNode):
return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default) return '<%s: %s>' % (type(self).__name__, str(self._tfpdef()) + default)
class CompFor(PythonBaseNode): class SyncCompFor(PythonBaseNode):
type = 'comp_for' type = 'sync_comp_for'
__slots__ = () __slots__ = ()
def get_defined_names(self): def get_defined_names(self):
@@ -1198,4 +1213,33 @@ class CompFor(PythonBaseNode):
Returns the a list of `Name` that the comprehension defines. Returns the a list of `Name` that the comprehension defines.
""" """
# allow async for # allow async for
return _defined_names(self.children[self.children.index('for') + 1]) return _defined_names(self.children[1])
# This is simply here so an older Jedi version can work with this new parso
# version. Can be deleted in the next release.
CompFor = SyncCompFor
class UsedNamesMapping(Mapping):
"""
This class exists for the sole purpose of creating an immutable dict.
"""
def __init__(self, dct):
self._dict = dct
def __getitem__(self, key):
return self._dict[key]
def __len__(self):
return len(self._dict)
def __iter__(self):
return iter(self._dict)
def __hash__(self):
return id(self)
def __eq__(self, other):
# Comparing these dicts does not make sense.
return self is other

29
parso/utils.pyi Normal file
View File

@@ -0,0 +1,29 @@
from typing import NamedTuple, Optional, Sequence, Union
class Version(NamedTuple):
major: int
minor: int
micro: int
def split_lines(string: str, keepends: bool = ...) -> Sequence[str]: ...
def python_bytes_to_unicode(
source: Union[str, bytes], encoding: str = ..., errors: str = ...
) -> str: ...
def version_info() -> Version:
"""
Returns a namedtuple of parso's version, similar to Python's
``sys.version_info``.
"""
...
class PythonVersionInfo(NamedTuple):
major: int
minor: int
def parse_version_string(version: Optional[str]) -> PythonVersionInfo:
"""
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
returns a corresponding version info that is always two characters long in
decimal.
"""
...

View File

@@ -146,7 +146,7 @@ FAILING_EXAMPLES = [
# Now nested parsing # Now nested parsing
"f'{continue}'", "f'{continue}'",
"f'{1;1}'", "f'{1;1}'",
"f'{a=3}'", "f'{a;}'",
"f'{b\"\" \"\"}'", "f'{b\"\" \"\"}'",
] ]

View File

@@ -974,10 +974,12 @@ def test_random_unicode_characters(differ):
Those issues were all found with the fuzzer. Those issues were all found with the fuzzer.
""" """
differ.initialize('') differ.initialize('')
differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True) differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1,
expect_error_leaves=True)
differ.parse(u'\r\r', parsers=1) differ.parse(u'\r\r', parsers=1)
differ.parse(u"˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) differ.parse(u"˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1) differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1,
expect_error_leaves=sys.version_info[0] == 2)
s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):' s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):'
differ.parse(s, parsers=1, expect_error_leaves=True) differ.parse(s, parsers=1, expect_error_leaves=True)
differ.parse('') differ.parse('')

View File

@@ -7,31 +7,62 @@ from parso.python.tokenize import tokenize
@pytest.fixture @pytest.fixture
def grammar(): def grammar():
return load_grammar(version='3.6') return load_grammar(version='3.8')
@pytest.mark.parametrize( @pytest.mark.parametrize(
'code', [ 'code', [
'{1}', # simple cases
'{1:}', 'f"{1}"',
'', 'f"""{1}"""',
'{1!a}', 'f"{foo} {bar}"',
'{1!a:1}',
'{1:1}', # empty string
'{1:1.{32}}', 'f""',
'{1::>4}', 'f""""""',
'{foo} {bar}',
# empty format specifier is okay
'f"{1:}"',
# use of conversion options
'f"{1!a}"',
'f"{1!a:1}"',
# format specifiers
'f"{1:1}"',
'f"{1:1.{32}}"',
'f"{1::>4}"',
'f"{x:{y}}"',
'f"{x:{y:}}"',
'f"{x:{y:1}}"',
# Escapes # Escapes
'{{}}', 'f"{{}}"',
'{{{1}}}', 'f"{{{1}}}"',
'{{{1}', 'f"{{{1}"',
'1{{2{{3', 'f"1{{2{{3"',
'}}', 'f"}}"',
# New Python 3.8 syntax f'{a=}'
'f"{a=}"',
'f"{a()=}"',
# multiline f-string
'f"""abc\ndef"""',
'f"""abc{\n123}def"""',
# a line continuation inside of an fstring_string
'f"abc\\\ndef"',
'f"\\\n{123}\\\n"',
# a line continuation inside of an fstring_expr
'f"{\\\n123}"',
# a line continuation inside of an format spec
'f"{123:.2\\\nf}"',
] ]
) )
def test_valid(code, grammar): def test_valid(code, grammar):
code = 'f"""%s"""' % code
module = grammar.parse(code, error_recovery=False) module = grammar.parse(code, error_recovery=False)
fstring = module.children[0] fstring = module.children[0]
assert fstring.type == 'fstring' assert fstring.type == 'fstring'
@@ -40,23 +71,34 @@ def test_valid(code, grammar):
@pytest.mark.parametrize( @pytest.mark.parametrize(
'code', [ 'code', [
'}', # an f-string can't contain unmatched curly braces
'{', 'f"}"',
'{1!{a}}', 'f"{"',
'{!{a}}', 'f"""}"""',
'{}', 'f"""{"""',
'{:}',
'{:}}}', # invalid conversion characters
'{:1}', 'f"{1!{a}}"',
'{!:}', 'f"{!{a}}"',
'{!}',
'{!a}', # The curly braces must contain an expression
'{1:{}}', 'f"{}"',
'{1:{:}}', 'f"{:}"',
'f"{:}}}"',
'f"{:1}"',
'f"{!:}"',
'f"{!}"',
'f"{!a}"',
# invalid (empty) format specifiers
'f"{1:{}}"',
'f"{1:{:}}"',
# a newline without a line continuation inside a single-line string
'f"abc\ndef"',
] ]
) )
def test_invalid(code, grammar): def test_invalid(code, grammar):
code = 'f"""%s"""' % code
with pytest.raises(ParserSyntaxError): with pytest.raises(ParserSyntaxError):
grammar.parse(code, error_recovery=False) grammar.parse(code, error_recovery=False)
@@ -88,6 +130,7 @@ def test_tokenize_start_pos(code, positions):
"""), """),
'f"foo', 'f"foo',
'f"""foo', 'f"""foo',
'f"abc\ndef"',
] ]
) )
def test_roundtrip(grammar, code): def test_roundtrip(grammar, code):

View File

@@ -189,3 +189,22 @@ def test_no_error_nodes(each_version):
check(child) check(child)
check(parse("if foo:\n bar", version=each_version)) check(parse("if foo:\n bar", version=each_version))
def test_named_expression(works_ge_py38):
works_ge_py38.parse("(a := 1, a + 1)")
@pytest.mark.parametrize(
'param_code', [
'a=1, /',
'a, /',
'a=1, /, b=3',
'a, /, b',
'a, /, b',
'a, /, *, b',
'a, /, **kwargs',
]
)
def test_positional_only_arguments(works_ge_py38, param_code):
works_ge_py38.parse("def x(%s): pass" % param_code)

View File

@@ -190,6 +190,19 @@ def test_old_octal_notation(works_in_py2):
works_in_py2.parse("07") works_in_py2.parse("07")
def test_long_notation(works_in_py2):
works_in_py2.parse("0xFl")
works_in_py2.parse("0xFL")
works_in_py2.parse("0b1l")
works_in_py2.parse("0B1L")
works_in_py2.parse("0o7l")
works_in_py2.parse("0O7L")
works_in_py2.parse("0l")
works_in_py2.parse("0L")
works_in_py2.parse("10l")
works_in_py2.parse("10L")
def test_new_binary_notation(each_version): def test_new_binary_notation(each_version):
_parse("""0b101010""", each_version) _parse("""0b101010""", each_version)
_invalid_syntax("""0b0101021""", each_version) _invalid_syntax("""0b0101021""", each_version)

View File

@@ -1,5 +1,6 @@
# -*- coding: utf-8 # This file contains Unicode characters. # -*- coding: utf-8 # This file contains Unicode characters.
import sys
from textwrap import dedent from textwrap import dedent
import pytest import pytest
@@ -16,6 +17,7 @@ from parso.python.tokenize import PythonToken
NAME = PythonTokenTypes.NAME NAME = PythonTokenTypes.NAME
NEWLINE = PythonTokenTypes.NEWLINE NEWLINE = PythonTokenTypes.NEWLINE
STRING = PythonTokenTypes.STRING STRING = PythonTokenTypes.STRING
NUMBER = PythonTokenTypes.NUMBER
INDENT = PythonTokenTypes.INDENT INDENT = PythonTokenTypes.INDENT
DEDENT = PythonTokenTypes.DEDENT DEDENT = PythonTokenTypes.DEDENT
ERRORTOKEN = PythonTokenTypes.ERRORTOKEN ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
@@ -140,7 +142,7 @@ def test_identifier_contains_unicode():
else: else:
# Unicode tokens in Python 2 seem to be identified as operators. # Unicode tokens in Python 2 seem to be identified as operators.
# They will be ignored in the parser, that's ok. # They will be ignored in the parser, that's ok.
assert unicode_token[0] == OP assert unicode_token[0] == ERRORTOKEN
def test_quoted_strings(): def test_quoted_strings():
@@ -228,16 +230,29 @@ def test_endmarker_end_pos():
check('a\\') check('a\\')
xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Python 2')])
@pytest.mark.parametrize( @pytest.mark.parametrize(
('code', 'types'), [ ('code', 'types'), [
# Indentation
(' foo', [INDENT, NAME, DEDENT]), (' foo', [INDENT, NAME, DEDENT]),
(' foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), (' foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
(' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, (' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME,
NEWLINE, ERROR_DEDENT, NAME, DEDENT]), NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
(' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]), (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]),
# Name stuff
('1foo1', [NUMBER, NAME]),
pytest.param(
u'மெல்லினம்', [NAME],
**xfail_py2),
pytest.param(u'²', [ERRORTOKEN], **xfail_py2),
pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
] ]
) )
def test_indentation(code, types): def test_token_types(code, types):
actual_types = [t.type for t in _get_token_list(code)] actual_types = [t.type for t in _get_token_list(code)]
assert actual_types == types + [ENDMARKER] assert actual_types == types + [ENDMARKER]
@@ -330,13 +345,46 @@ def test_backslash():
('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
(r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
# format spec
(r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP, (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]), FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
# multiline f-string
('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
('f"""abc{\n123}def"""', [
FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
FSTRING_END
]),
# a line continuation inside of an fstring_string
('f"abc\\\ndef"', [
FSTRING_START, FSTRING_STRING, FSTRING_END
]),
('f"\\\n{123}\\\n"', [
FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
FSTRING_END
]),
# a line continuation inside of an fstring_expr
('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]),
# a line continuation inside of an format spec
('f"{123:.2\\\nf}"', [
FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END
]),
# a newline without a line continuation inside a single-line string is
# wrong, and will generate an ERRORTOKEN
('f"abc\ndef"', [
FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN
]),
# a more complex example
(r'print(f"Some {x:.2f}a{y}")', [ (r'print(f"Some {x:.2f}a{y}")', [
NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP, NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
]), ]),
] ]
) )
def test_fstring(code, types, version_ge_py36): def test_fstring(code, types, version_ge_py36):

View File

@@ -4,6 +4,7 @@ envlist = {py26,py27,py33,py34,py35,py36,py37}
extras = testing extras = testing
deps = deps =
py26,py33: pytest>=3.0.7,<3.3 py26,py33: pytest>=3.0.7,<3.3
py27,py34: pytest<5
py26,py33: setuptools<37 py26,py33: setuptools<37
coverage: coverage coverage: coverage
setenv = setenv =