Add a release date to the Changelog

Prepare the next release v0.5.2
Merge pull request #93 from yangyangxcf/fstring_tokenize
2025-12-06 21:04:29 +08:00 · 2019-12-15 01:00:38 +01:00 · 2019-12-15 00:55:19 +01:00 · 2019-12-15 00:47:32 +01:00 · 2019-12-15 00:29:41 +01:00 · 2019-12-15 00:13:48 +01:00
19 changed files with 766 additions and 166 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,7 @@ python:
  - 3.5
  - 3.6
  - 3.7
-  - 3.8-dev
+  - 3.8
  - pypy2.7-6.0
  - pypy3.5-6.0
 matrix:
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -3,6 +3,19 @@
 Changelog
 ---------

+0.5.2 (2019-12-15)
++++++++++++++++++
+
+- Add include_setitem to get_definition/is_definition and get_defined_names (#66)
+- Fix named expression error listing (#89, #90)
+- Fix some f-string tokenizer issues (#93)
+
+0.5.1 (2019-07-13)
++++++++++++++++++
+
+- Fix: Some unicode identifiers were not correctly tokenized
+- Fix: Line continuations in f-strings are now working
+
 0.5.0 (2019-06-20)
 ++++++++++++++++++

@@ -17,19 +30,19 @@ Changelog
 - Python 3.8 support
 - FileIO support, it's now possible to use abstract file IO, support is alpha

-0.3.4 (2018-02-13)
+0.3.4 (2019-02-13)
 +++++++++++++++++++

 - Fix an f-string tokenizer error

-0.3.3 (2018-02-06)
+0.3.3 (2019-02-06)
 +++++++++++++++++++

 - Fix async errors in the diff parser
 - A fix in iter_errors
 - This is a very small bugfix release

-0.3.2 (2018-01-24)
+0.3.2 (2019-01-24)
 +++++++++++++++++++

 - 20+ bugfixes in the diff parser and 3 in the tokenizer
--- a/conftest.py
+++ b/conftest.py
@@ -58,7 +58,9 @@ def pytest_generate_tests(metafunc):
    elif 'each_py3_version' in metafunc.fixturenames:
        metafunc.parametrize('each_py3_version', VERSIONS_3)
    elif 'version_ge_py36' in metafunc.fixturenames:
-        metafunc.parametrize('version_ge_py36', ['3.6', '3.7'])
+        metafunc.parametrize('version_ge_py36', ['3.6', '3.7', '3.8'])
+    elif 'version_ge_py38' in metafunc.fixturenames:
+        metafunc.parametrize('version_ge_py38', ['3.8'])


 class NormalizerIssueCase(object):
--- a/parso/init.py
+++ b/parso/init.py
@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
 from parso.utils import split_lines, python_bytes_to_unicode


-__version__ = '0.5.0'
+__version__ = '0.5.2'


 def parse(code=None, **kwargs):
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -57,7 +57,8 @@ class Grammar(object):
        :param str path: The path to the file you want to open. Only needed for caching.
        :param bool cache: Keeps a copy of the parser tree in RAM and on disk
            if a path is given. Returns the cached trees if the corresponding
-            files on disk have not changed.
+            files on disk have not changed. Note that this stores pickle files
+            on your file system (e.g. for Linux in ``~/.cache/parso/``).
        :param bool diff_cache: Diffs the cached python module against the new
            code and tries to parse only the parts that have changed. Returns
            the same (changed) module that is found in cache. Using this option
--- a/parso/pgen2/generator.py
+++ b/parso/pgen2/generator.py
@@ -309,13 +309,39 @@ def _calculate_tree_traversal(nonterminal_to_dfas):
            _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal)

    # Now that we have calculated the first terminals, we are sure that
-    # there is no left recursion or ambiguities.
+    # there is no left recursion.

    for dfas in nonterminal_to_dfas.values():
        for dfa_state in dfas:
+            transitions = dfa_state.transitions
            for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items():
                for transition, pushes in first_plans[nonterminal].items():
-                    dfa_state.transitions[transition] = DFAPlan(next_dfa, pushes)
+                    if transition in transitions:
+                        prev_plan = transitions[transition]
+                        # Make sure these are sorted so that error messages are
+                        # at least deterministic
+                        choices = sorted([
+                            (
+                                prev_plan.dfa_pushes[0].from_rule
+                                if prev_plan.dfa_pushes
+                                else prev_plan.next_dfa.from_rule
+                            ),
+                            (
+                                pushes[0].from_rule
+                                if pushes else next_dfa.from_rule
+                            ),
+                        ])
+                        raise ValueError(
+                            "Rule %s is ambiguous; given a %s token, we "
+                            "can't determine if we should evaluate %s or %s."
+                            % (
+                                (
+                                    dfa_state.from_rule,
+                                    transition,
+                                ) + tuple(choices)
+                            )
+                        )
+                    transitions[transition] = DFAPlan(next_dfa, pushes)


 def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
@@ -345,13 +371,6 @@ def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal):
                raise ValueError("left recursion for rule %r" % nonterminal)

        for t, pushes in first_plans2.items():
-            check = new_first_plans.get(t)
-            if check is not None:
-                raise ValueError(
-                    "Rule %s is ambiguous; %s is the"
-                    " start of the rule %s as well as %s."
-                    % (nonterminal, t, nonterminal2, check[-1].from_rule)
-                )
            new_first_plans[t] = [next_] + pushes

    first_plans[nonterminal] = new_first_plans
--- a/parso/pgen2/grammar_parser.py
+++ b/parso/pgen2/grammar_parser.py
@@ -141,6 +141,9 @@ class NFAArc(object):
        self.next = next_
        self.nonterminal_or_string = nonterminal_or_string

+    def __repr__(self):
+        return '<%s: %s>' % (self.__class__.__name__, self.nonterminal_or_string)
+

 class NFAState(object):
    def __init__(self, from_rule):
--- a/parso/python/errors.py
+++ b/parso/python/errors.py
@@ -52,7 +52,7 @@ def _is_future_import(import_from):
    # It looks like a __future__ import that is relative is still a future
    # import. That feels kind of odd, but whatever.
    # if import_from.level != 0:
-        # return False
+    #     return False
    from_names = import_from.get_from_names()
    return [n.value for n in from_names] == ['__future__']

@@ -94,19 +94,32 @@ def _is_future_import_first(import_from):


 def _iter_definition_exprs_from_lists(exprlist):
-    for child in exprlist.children[::2]:
-        if child.type == 'atom' and child.children[0] in ('(', '['):
-            testlist_comp = child.children[0]
-            if testlist_comp.type == 'testlist_comp':
-                for expr in _iter_definition_exprs_from_lists(testlist_comp):
-                    yield expr
-                continue
+    def check_expr(child):
+        if child.type == 'atom':
+            if child.children[0] == '(':
+                testlist_comp = child.children[1]
+                if testlist_comp.type == 'testlist_comp':
+                    for expr in _iter_definition_exprs_from_lists(testlist_comp):
+                        yield expr
+                    return
+                else:
+                    # It's a paren that doesn't do anything, like 1 + (1)
+                    for c in check_expr(testlist_comp):
+                        yield c
+                    return
            elif child.children[0] == '[':
                yield testlist_comp
-                continue
-
+                return
        yield child

+    if exprlist.type in _STAR_EXPR_PARENTS:
+        for child in exprlist.children[::2]:
+            for c in check_expr(child):  # Python 2 sucks
+                yield c
+    else:
+        for c in check_expr(exprlist):  # Python 2 sucks
+            yield c
+

 def _get_expr_stmt_definition_exprs(expr_stmt):
    exprs = []
@@ -120,8 +133,6 @@ def _get_expr_stmt_definition_exprs(expr_stmt):

 def _get_for_stmt_definition_exprs(for_stmt):
    exprlist = for_stmt.children[1]
-    if exprlist.type != 'exprlist':
-        return [exprlist]
    return list(_iter_definition_exprs_from_lists(exprlist))


@@ -478,38 +489,38 @@ class _StringChecks(SyntaxRule):
    message = "bytes can only contain ASCII literal characters."

    def is_issue(self, leaf):
-            string_prefix = leaf.string_prefix.lower()
-            if 'b' in string_prefix \
-                    and self._normalizer.version >= (3, 0) \
-                    and any(c for c in leaf.value if ord(c) > 127):
-                # b'ä'
-                return True
+        string_prefix = leaf.string_prefix.lower()
+        if 'b' in string_prefix \
+                and self._normalizer.version >= (3, 0) \
+                and any(c for c in leaf.value if ord(c) > 127):
+            # b'ä'
+            return True

-            if 'r' not in string_prefix:
-                # Raw strings don't need to be checked if they have proper
-                # escaping.
-                is_bytes = self._normalizer.version < (3, 0)
-                if 'b' in string_prefix:
-                    is_bytes = True
-                if 'u' in string_prefix:
-                    is_bytes = False
+        if 'r' not in string_prefix:
+            # Raw strings don't need to be checked if they have proper
+            # escaping.
+            is_bytes = self._normalizer.version < (3, 0)
+            if 'b' in string_prefix:
+                is_bytes = True
+            if 'u' in string_prefix:
+                is_bytes = False

-                payload = leaf._get_payload()
-                if is_bytes:
-                    payload = payload.encode('utf-8')
-                    func = codecs.escape_decode
-                else:
-                    func = codecs.unicode_escape_decode
+            payload = leaf._get_payload()
+            if is_bytes:
+                payload = payload.encode('utf-8')
+                func = codecs.escape_decode
+            else:
+                func = codecs.unicode_escape_decode

-                try:
-                    with warnings.catch_warnings():
-                        # The warnings from parsing strings are not relevant.
-                        warnings.filterwarnings('ignore')
-                        func(payload)
-                except UnicodeDecodeError as e:
-                    self.add_issue(leaf, message='(unicode error) ' + str(e))
-                except ValueError as e:
-                    self.add_issue(leaf, message='(value error) ' + str(e))
+            try:
+                with warnings.catch_warnings():
+                    # The warnings from parsing strings are not relevant.
+                    warnings.filterwarnings('ignore')
+                    func(payload)
+            except UnicodeDecodeError as e:
+                self.add_issue(leaf, message='(unicode error) ' + str(e))
+            except ValueError as e:
+                self.add_issue(leaf, message='(value error) ' + str(e))


@ErrorFinder.register_rule(value='*')
@@ -586,7 +597,7 @@ class _TrailingImportComma(SyntaxRule):
    message = "trailing comma not allowed without surrounding parentheses"

    def is_issue(self, node):
-        if node.children[-1] == ',':
+        if node.children[-1] == ',' and node.parent.children[-1] != ')':
            return True


@@ -883,7 +894,7 @@ class _FStringRule(SyntaxRule):


 class _CheckAssignmentRule(SyntaxRule):
-    def _check_assignment(self, node, is_deletion=False):
+    def _check_assignment(self, node, is_deletion=False, is_namedexpr=False):
        error = None
        type_ = node.type
        if type_ == 'lambdef':
@@ -907,9 +918,9 @@ class _CheckAssignmentRule(SyntaxRule):
                        # This is not a comprehension, they were handled
                        # further above.
                        for child in second.children[::2]:
-                            self._check_assignment(child, is_deletion)
+                            self._check_assignment(child, is_deletion, is_namedexpr)
                    else:  # Everything handled, must be useless brackets.
-                        self._check_assignment(second, is_deletion)
+                        self._check_assignment(second, is_deletion, is_namedexpr)
        elif type_ == 'keyword':
            if self._normalizer.version < (3, 8):
                error = 'keyword'
@@ -939,17 +950,24 @@ class _CheckAssignmentRule(SyntaxRule):
                assert trailer.type == 'trailer'
                if trailer.children[0] == '(':
                    error = 'function call'
+                elif is_namedexpr and trailer.children[0] == '[':
+                    error = 'subscript'
+                elif is_namedexpr and trailer.children[0] == '.':
+                    error = 'attribute'
        elif type_ in ('testlist_star_expr', 'exprlist', 'testlist'):
            for child in node.children[::2]:
-                self._check_assignment(child, is_deletion)
+                self._check_assignment(child, is_deletion, is_namedexpr)
        elif ('expr' in type_ and type_ != 'star_expr'  # is a substring
              or '_test' in type_
              or type_ in ('term', 'factor')):
            error = 'operator'

        if error is not None:
-            cannot = "can't" if self._normalizer.version < (3, 8) else "cannot"
-            message = ' '.join([cannot, "delete" if is_deletion else "assign to", error])
+            if is_namedexpr:
+                message = 'cannot use named assignment with %s' % error
+            else:
+                cannot = "can't" if self._normalizer.version < (3, 8) else "cannot"
+                message = ' '.join([cannot, "delete" if is_deletion else "assign to", error])
            self.add_issue(node, message=message)


@@ -959,7 +977,6 @@ class _CompForRule(_CheckAssignmentRule):

    def is_issue(self, node):
        expr_list = node.children[1]
-        print(expr_list)
        if expr_list.type != 'expr_list':  # Already handled.
            self._check_assignment(expr_list)

@@ -1009,3 +1026,71 @@ class _ForStmtRule(_CheckAssignmentRule):
        expr_list = for_stmt.children[1]
        if expr_list.type != 'expr_list':  # Already handled.
            self._check_assignment(expr_list)
+
+
+@ErrorFinder.register_rule(type='namedexpr_test')
+class _NamedExprRule(_CheckAssignmentRule):
+    # namedexpr_test: test [':=' test]
+
+    def is_issue(self, namedexpr_test):
+        # assigned name
+        first = namedexpr_test.children[0]
+
+        def search_namedexpr_in_comp_for(node):
+            while True:
+                parent = node.parent
+                if parent is None:
+                    return parent
+                if parent.type == 'sync_comp_for' and parent.children[3] == node:
+                    return parent
+                node = parent
+
+        if search_namedexpr_in_comp_for(namedexpr_test):
+            # [i+1 for i in (i := range(5))]
+            # [i+1 for i in (j := range(5))]
+            # [i+1 for i in (lambda: (j := range(5)))()]
+            message = 'assignment expression cannot be used in a comprehension iterable expression'
+            self.add_issue(namedexpr_test, message=message)
+
+        # defined names
+        exprlist = list()
+
+        def process_comp_for(comp_for):
+            if comp_for.type == 'sync_comp_for':
+                comp = comp_for
+            elif comp_for.type == 'comp_for':
+                comp = comp_for.children[1]
+            exprlist.extend(_get_for_stmt_definition_exprs(comp))
+
+        def search_all_comp_ancestors(node):
+            has_ancestors = False
+            while True:
+                node = search_ancestor(node, 'testlist_comp', 'dictorsetmaker')
+                if node is None:
+                    break
+                for child in node.children:
+                    if child.type in _COMP_FOR_TYPES:
+                        process_comp_for(child)
+                        has_ancestors = True
+                        break
+            return has_ancestors
+
+        # check assignment expressions in comprehensions
+        search_all = search_all_comp_ancestors(namedexpr_test)
+        if search_all:
+            if self._normalizer.context.node.type == 'classdef':
+                message = 'assignment expression within a comprehension ' \
+                          'cannot be used in a class body'
+                self.add_issue(namedexpr_test, message=message)
+
+            namelist = [expr.value for expr in exprlist if expr.type == 'name']
+            if first.type == 'name' and first.value in namelist:
+                # [i := 0 for i, j in range(5)]
+                # [[(i := i) for j in range(5)] for i in range(5)]
+                # [i for i, j in range(5) if True or (i := 1)]
+                # [False and (i := 0) for i, j in range(5)]
+                message = 'assignment expression cannot rebind ' \
+                          'comprehension iteration variable %r' % first.value
+                self.add_issue(namedexpr_test, message=message)
+
+        self._check_assignment(first, is_namedexpr=True)
--- a/parso/python/grammar39.txt
+++ b/parso/python/grammar39.txt
@@ -0,0 +1,171 @@
+# Grammar for Python
+
+# NOTE WELL: You should also follow all the steps listed at
+# https://devguide.python.org/grammar/
+
+# Start symbols for the grammar:
+#       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
+#       eval_input is the input for the eval() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef | async_funcdef)
+
+async_funcdef: 'async' funcdef
+funcdef: 'def' NAME parameters ['->' test] ':' suite
+
+parameters: '(' [typedargslist] ')'
+typedargslist: (
+  (tfpdef ['=' test] (',' tfpdef ['=' test])* ',' '/' [',' [ tfpdef ['=' test] (
+        ',' tfpdef ['=' test])* ([',' [
+        '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
+      | '**' tfpdef [',']]])
+  | '*' [tfpdef] (',' tfpdef ['=' test])* ([',' ['**' tfpdef [',']]])
+  | '**' tfpdef [',']]] )
+|  (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
+        '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
+      | '**' tfpdef [',']]]
+  | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
+  | '**' tfpdef [','])
+)
+tfpdef: NAME [':' test]
+varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
+        '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
+      | '**' vfpdef [',']]]
+  | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
+  | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
+        '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
+      | '**' vfpdef [',']]]
+  | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
+  | '**' vfpdef [',']
+)
+vfpdef: NAME
+
+stmt: simple_stmt | compound_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
+expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+annassign: ':' test ['=' test]
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+# For normal and annotated assignments, additional restrictions enforced by the interpreter
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist_star_expr]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test]]
+import_stmt: import_name | import_from
+import_name: 'import' dotted_as_names
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names))
+import_as_name: NAME ['as' NAME]
+dotted_as_name: dotted_name ['as' NAME]
+import_as_names: import_as_name (',' import_as_name)* [',']
+dotted_as_names: dotted_as_name (',' dotted_as_name)*
+dotted_name: NAME ('.' NAME)*
+global_stmt: 'global' NAME (',' NAME)*
+nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
+async_stmt: 'async' (funcdef | with_stmt | for_stmt)
+if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+            ['else' ':' suite]
+            ['finally' ':' suite] |
+           'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test ['as' NAME]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+namedexpr_test: test [':=' test]
+test: or_test ['if' or_test 'else' test] | lambdef
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401 (which really works :-)
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'@'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power
+power: atom_expr ['**' factor]
+atom_expr: ['await'] atom trailer*
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [testlist_comp] ']' |
+       '{' [dictorsetmaker] '}' |
+       NAME | NUMBER | strings | '...' | 'None' | 'True' | 'False')
+testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictorsetmaker: ( ((test ':' test | '**' expr)
+                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
+                  ((test | star_expr)
+                   (comp_for | (',' (test | star_expr))* [','])) )
+
+classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
+
+arglist: argument (',' argument)*  [',']
+
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+# "test '=' test" is really "keyword '=' test", but we have no such token.
+# These need to be in a single rule to avoid grammar that is ambiguous
+# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
+# we explicitly match '*' here, too, to give it proper precedence.
+# Illegal combinations and orderings are blocked in ast.c:
+# multiple (test comp_for) arguments are blocked; keyword unpackings
+# that precede iterable unpackings are blocked; etc.
+argument: ( test [comp_for] |
+            test ':=' test |
+            test '=' test |
+            '**' test |
+            '*' test )
+
+comp_iter: comp_for | comp_if
+sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
+comp_for: ['async'] sync_comp_for
+comp_if: 'if' test_nocond [comp_iter]
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist_star_expr
+
+strings: (STRING | fstring)+
+fstring: FSTRING_START fstring_content* FSTRING_END
+fstring_content: FSTRING_STRING | fstring_expr
+fstring_conversion: '!' NAME
+fstring_expr: '{' testlist ['='] [ fstring_conversion ] [ fstring_format_spec ] '}'
+fstring_format_spec: ':' fstring_content*
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -23,6 +23,9 @@ from parso._compatibility import py_version
 from parso.utils import split_lines


+# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111)
+MAX_UNICODE = '\U0010ffff'
+
 STRING = PythonTokenTypes.STRING
 NAME = PythonTokenTypes.NAME
 NUMBER = PythonTokenTypes.NUMBER
@@ -51,8 +54,13 @@ if py_version >= 30:
    # Python 3 has str.isidentifier() to check if a char is a valid identifier
    is_identifier = str.isidentifier
 else:
-    namechars = string.ascii_letters + '_'
-    is_identifier = lambda s: s in namechars
+    # Python 2 doesn't, but it's not that important anymore and if you tokenize
+    # Python 2 code with this, it's still ok. It's just that parsing Python 3
+    # code with this function is not 100% correct.
+    # This just means that Python 2 code matches a few identifiers too much,
+    # but that doesn't really matter.
+    def is_identifier(s):
+        return True


 def group(*choices, **kwargs):
@@ -118,9 +126,9 @@ def _get_token_collection(version_info):
        return result


-fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+')
+fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+')
 fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+')
-fstring_format_spec_single_line = _compile(r'[^{}\r\n]+')
+fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+')
 fstring_format_spec_multi_line = _compile(r'[^{}]+')


@@ -130,7 +138,16 @@ def _create_token_collection(version_info):
    Whitespace = r'[ \f\t]*'
    whitespace = _compile(Whitespace)
    Comment = r'#[^\r\n]*'
-    Name = r'\w+'
+    # Python 2 is pretty much not working properly anymore, we just ignore
+    # parsing unicode properly, which is fine, I guess.
+    if version_info[0] == 2:
+        Name = r'([A-Za-z_0-9]+)'
+    elif sys.version_info[0] == 2:
+        # Unfortunately the regex engine cannot deal with the regex below, so
+        # just use this one.
+        Name = r'(\w+)'
+    else:
+        Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)'

    if version_info >= (3, 6):
        Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
@@ -297,17 +314,19 @@ class FStringNode(object):

 def _close_fstring_if_necessary(fstring_stack, string, start_pos, additional_prefix):
    for fstring_stack_index, node in enumerate(fstring_stack):
-        if string.startswith(node.quote):
+        lstripped_string = string.lstrip()
+        len_lstrip = len(string) - len(lstripped_string)
+        if lstripped_string.startswith(node.quote):
            token = PythonToken(
                FSTRING_END,
                node.quote,
                start_pos,
-                prefix=additional_prefix,
+                prefix=additional_prefix+string[:len_lstrip],
            )
            additional_prefix = ''
            assert not node.previous_lines
            del fstring_stack[fstring_stack_index:]
-            return token, '', len(node.quote)
+            return token, '', len(node.quote) + len_lstrip
    return None, additional_prefix, 0


@@ -340,7 +359,9 @@ def _find_fstring_string(endpats, fstring_stack, line, lnum, pos):

    new_pos = pos
    new_pos += len(string)
-    if allow_multiline and (string.endswith('\n') or string.endswith('\r')):
+    # even if allow_multiline is False, we still need to check for trailing
+    # newlines, because a single-line f-string can contain line continuations
+    if string.endswith('\n') or string.endswith('\r'):
        tos.previous_lines += string
        string = ''
    else:
@@ -463,8 +484,20 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                    yield fstring_end_token
                    continue

-            pseudomatch = pseudo_token.match(line, pos)
-            if not pseudomatch:                             # scan for tokens
+            # in an f-string, match until the end of the string
+            if fstring_stack:
+                string_line = line
+                for fstring_stack_node in fstring_stack:
+                    quote = fstring_stack_node.quote
+                    end_match = endpats[quote].match(line, pos)
+                    if end_match is not None:
+                        end_match_string = end_match.group(0)
+                        if len(end_match_string) - len(quote) + pos < len(string_line):
+                            string_line = line[:pos] + end_match_string[:-len(quote)]
+                pseudomatch = pseudo_token.match(string_line, pos)
+            else:
+                pseudomatch = pseudo_token.match(line, pos)
+            if not pseudomatch:  # scan for tokens
                match = whitespace.match(line, pos)
                if pos == 0:
                    for t in dedent_if_necessary(match.end()):
@@ -510,6 +543,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
            if (initial in numchars or                      # ordinary number
                    (initial == '.' and token != '.' and token != '...')):
                yield PythonToken(NUMBER, token, spos, prefix)
+            elif pseudomatch.group(3) is not None:            # ordinary name
+                if token in always_break_tokens:
+                    fstring_stack[:] = []
+                    paren_level = 0
+                    # We only want to dedent if the token is on a new line.
+                    if re.match(r'[ \f\t]*$', line[:start]):
+                        while True:
+                            indent = indents.pop()
+                            if indent > start:
+                                yield PythonToken(DEDENT, '', spos, '')
+                            else:
+                                indents.append(indent)
+                                break
+                if is_identifier(token):
+                    yield PythonToken(NAME, token, spos, prefix)
+                else:
+                    for t in _split_illegal_unicode_name(token, spos, prefix):
+                        yield t  # yield from Python 2
            elif initial in '\r\n':
                if any(not f.allow_multiline() for f in fstring_stack):
                    # Would use fstring_stack.clear, but that's not available
@@ -523,7 +574,12 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                new_line = True
            elif initial == '#':  # Comments
                assert not token.endswith("\n")
-                additional_prefix = prefix + token
+                if fstring_stack and fstring_stack[-1].is_in_expr():
+                    # `#` is not allowed in f-string expressions
+                    yield PythonToken(ERRORTOKEN, initial, spos, prefix)
+                    pos = start + 1
+                else:
+                    additional_prefix = prefix + token
            elif token in triple_quoted:
                endprog = endpats[token]
                endmatch = endprog.match(line, pos)
@@ -564,20 +620,6 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
            elif token in fstring_pattern_map:  # The start of an fstring.
                fstring_stack.append(FStringNode(fstring_pattern_map[token]))
                yield PythonToken(FSTRING_START, token, spos, prefix)
-            elif is_identifier(initial):                      # ordinary name
-                if token in always_break_tokens:
-                    fstring_stack[:] = []
-                    paren_level = 0
-                    # We only want to dedent if the token is on a new line.
-                    if re.match(r'[ \f\t]*$', line[:start]):
-                        while True:
-                            indent = indents.pop()
-                            if indent > start:
-                                yield PythonToken(DEDENT, '', spos, '')
-                            else:
-                                indents.append(indent)
-                                break
-                yield PythonToken(NAME, token, spos, prefix)
            elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'):  # continued stmt
                additional_prefix += prefix + line[start:]
                break
@@ -593,10 +635,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
                    else:
                        if paren_level:
                            paren_level -= 1
-                elif token == ':' and fstring_stack \
+                elif token.startswith(':') and fstring_stack \
                        and fstring_stack[-1].parentheses_count \
                        - fstring_stack[-1].format_spec_count == 1:
+                    # `:` and `:=` both count
                    fstring_stack[-1].format_spec_count += 1
+                    token = ':'
+                    pos = start + 1

                yield PythonToken(OP, token, spos, prefix)

@@ -613,6 +658,39 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
    yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)


+def _split_illegal_unicode_name(token, start_pos, prefix):
+    def create_token():
+        return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix)
+
+    found = ''
+    is_illegal = False
+    pos = start_pos
+    for i, char in enumerate(token):
+        if is_illegal:
+            if is_identifier(char):
+                yield create_token()
+                found = char
+                is_illegal = False
+                prefix = ''
+                pos = start_pos[0], start_pos[1] + i
+            else:
+                found += char
+        else:
+            new_found = found + char
+            if is_identifier(new_found):
+                found = new_found
+            else:
+                if found:
+                    yield create_token()
+                    prefix = ''
+                    pos = start_pos[0], start_pos[1] + i
+                found = char
+                is_illegal = True
+
+    if found:
+        yield create_token()
+
+
 if __name__ == "__main__":
    if len(sys.argv) >= 2:
        path = sys.argv[1]
--- a/parso/python/tree.py
+++ b/parso/python/tree.py
@@ -43,7 +43,10 @@ Parser Tree Classes
 """

 import re
-from collections import Mapping
+try:
+    from collections.abc import Mapping
+except ImportError:
+    from collections import Mapping

 from parso._compatibility import utf8_repr, unicode
 from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \
@@ -197,25 +200,22 @@ class Name(_LeafWithoutNewlines):
        return "<%s: %s@%s,%s>" % (type(self).__name__, self.value,
                                   self.line, self.column)

-    def is_definition(self):
+    def is_definition(self, include_setitem=False):
        """
        Returns True if the name is being defined.
        """
-        return self.get_definition() is not None
+        return self.get_definition(include_setitem=include_setitem) is not None

-    def get_definition(self, import_name_always=False):
+    def get_definition(self, import_name_always=False, include_setitem=False):
        """
-        Returns None if there's on definition for a name.
+        Returns None if there's no definition for a name.

-        :param import_name_alway: Specifies if an import name is always a
+        :param import_name_always: Specifies if an import name is always a
            definition. Normally foo in `from foo import bar` is not a
            definition.
        """
        node = self.parent
        type_ = node.type
-        if type_ in ('power', 'atom_expr'):
-            # In `self.x = 3` self is not a definition, but x is.
-            return None

        if type_ in ('funcdef', 'classdef'):
            if self == node.name:
@@ -234,7 +234,7 @@ class Name(_LeafWithoutNewlines):
            if node.type == 'suite':
                return None
            if node.type in _GET_DEFINITION_TYPES:
-                if self in node.get_defined_names():
+                if self in node.get_defined_names(include_setitem):
                    return node
                if import_name_always and node.type in _IMPORTS:
                    return node
@@ -772,8 +772,8 @@ class ForStmt(Flow):
        """
        return self.children[3]

-    def get_defined_names(self):
-        return _defined_names(self.children[1])
+    def get_defined_names(self, include_setitem=False):
+        return _defined_names(self.children[1], include_setitem)


 class TryStmt(Flow):
@@ -796,7 +796,7 @@ class WithStmt(Flow):
    type = 'with_stmt'
    __slots__ = ()

-    def get_defined_names(self):
+    def get_defined_names(self, include_setitem=False):
        """
        Returns the a list of `Name` that the with statement defines. The
        defined names are set after `as`.
@@ -805,7 +805,7 @@ class WithStmt(Flow):
        for with_item in self.children[1:-2:2]:
            # Check with items for 'as' names.
            if with_item.type == 'with_item':
-                names += _defined_names(with_item.children[2])
+                names += _defined_names(with_item.children[2], include_setitem)
        return names

    def get_test_node_from_name(self, name):
@@ -846,7 +846,7 @@ class ImportFrom(Import):
    type = 'import_from'
    __slots__ = ()

-    def get_defined_names(self):
+    def get_defined_names(self, include_setitem=False):
        """
        Returns the a list of `Name` that the import defines. The
        defined names are set after `import` or in case an alias - `as` - is
@@ -917,7 +917,7 @@ class ImportName(Import):
    type = 'import_name'
    __slots__ = ()

-    def get_defined_names(self):
+    def get_defined_names(self, include_setitem=False):
        """
        Returns the a list of `Name` that the import defines. The defined names
        is always the first name after `import` or in case an alias - `as` - is
@@ -1018,7 +1018,7 @@ class YieldExpr(PythonBaseNode):
    __slots__ = ()


-def _defined_names(current):
+def _defined_names(current, include_setitem):
    """
    A helper function to find the defined names in statements, for loops and
    list comprehensions.
@@ -1026,14 +1026,22 @@ def _defined_names(current):
    names = []
    if current.type in ('testlist_star_expr', 'testlist_comp', 'exprlist', 'testlist'):
        for child in current.children[::2]:
-            names += _defined_names(child)
+            names += _defined_names(child, include_setitem)
    elif current.type in ('atom', 'star_expr'):
-        names += _defined_names(current.children[1])
+        names += _defined_names(current.children[1], include_setitem)
    elif current.type in ('power', 'atom_expr'):
        if current.children[-2] != '**':  # Just if there's no operation
            trailer = current.children[-1]
            if trailer.children[0] == '.':
                names.append(trailer.children[1])
+            elif trailer.children[0] == '[' and include_setitem:
+                for node in current.children[-2::-1]:
+                    if node.type == 'trailer':
+                        names.append(node.children[1])
+                        break
+                    if node.type == 'name':
+                        names.append(node)
+                        break
    else:
        names.append(current)
    return names
@@ -1043,18 +1051,18 @@ class ExprStmt(PythonBaseNode, DocstringMixin):
    type = 'expr_stmt'
    __slots__ = ()

-    def get_defined_names(self):
+    def get_defined_names(self, include_setitem=False):
        """
        Returns a list of `Name` defined before the `=` sign.
        """
        names = []
        if self.children[1].type == 'annassign':
-            names = _defined_names(self.children[0])
+            names = _defined_names(self.children[0], include_setitem)
        return [
            name
            for i in range(0, len(self.children) - 2, 2)
            if '=' in self.children[i + 1].value
-            for name in _defined_names(self.children[i])
+            for name in _defined_names(self.children[i], include_setitem)
        ] + names

    def get_rhs(self):
@@ -1147,7 +1155,7 @@ class Param(PythonBaseNode):
        else:
            return self._tfpdef()

-    def get_defined_names(self):
+    def get_defined_names(self, include_setitem=False):
        return [self.name]

    @property
@@ -1205,12 +1213,12 @@ class SyncCompFor(PythonBaseNode):
    type = 'sync_comp_for'
    __slots__ = ()

-    def get_defined_names(self):
+    def get_defined_names(self, include_setitem=False):
        """
        Returns the a list of `Name` that the comprehension defines.
        """
        # allow async for
-        return _defined_names(self.children[1])
+        return _defined_names(self.children[1], include_setitem)


 # This is simply here so an older Jedi version can work with this new parso
--- a/test/failing_examples.py
+++ b/test/failing_examples.py
@@ -319,3 +319,35 @@ if sys.version_info[:2] < (3, 8):
                    continue
            '''),  # 'continue' not supported inside 'finally' clause"
    ]
+
+if sys.version_info[:2] >= (3, 8):
+    # assignment expressions from issue#89
+    FAILING_EXAMPLES += [
+        # Case 2
+        '(lambda: x := 1)',
+        '((lambda: x) := 1)',
+        # Case 3
+        '(a[i] := x)',
+        '((a[i]) := x)',
+        '(a(i) := x)',
+        # Case 4
+        '(a.b := c)',
+        '[(i.i:= 0) for ((i), j) in range(5)]',
+        # Case 5
+        '[i:= 0 for i, j in range(5)]',
+        '[(i:= 0) for ((i), j) in range(5)]',
+        '[(i:= 0) for ((i), j), in range(5)]',
+        '[(i:= 0) for ((i), j.i), in range(5)]',
+        '[[(i:= i) for j in range(5)] for i in range(5)]',
+        '[i for i, j in range(5) if True or (i:= 1)]',
+        '[False and (i:= 0) for i, j in range(5)]',
+        # Case 6
+        '[i+1 for i in (i:= range(5))]',
+        '[i+1 for i in (j:= range(5))]',
+        '[i+1 for i in (lambda: (j:= range(5)))()]',
+        # Case 7
+        'class Example:\n [(j := i) for i in range(5)]',
+        # Not in that issue
+        '(await a := x)',
+        '((await a) := x)',
+    ]
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -974,10 +974,12 @@ def test_random_unicode_characters(differ):
    Those issues were all found with the fuzzer.
    """
    differ.initialize('')
-    differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True)
+    differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1,
+                 expect_error_leaves=True)
    differ.parse(u'\r\r', parsers=1)
    differ.parse(u"˟Ę\x05À\r   rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True)
-    differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1)
+    differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1,
+                 expect_error_leaves=sys.version_info[0] == 2)
    s = '        if not (self, "_fi\x02\x0e\x08\n\nle"):'
    differ.parse(s, parsers=1, expect_error_leaves=True)
    differ.parse('')
--- a/test/test_fstring.py
+++ b/test/test_fstring.py
@@ -12,33 +12,57 @@ def grammar():

@pytest.mark.parametrize(
    'code', [
-        '{1}',
-        '{1:}',
-        '',
-        '{1!a}',
-        '{1!a:1}',
-        '{1:1}',
-        '{1:1.{32}}',
-        '{1::>4}',
-        '{foo} {bar}',
-        '{x:{y}}',
-        '{x:{y:}}',
-        '{x:{y:1}}',
+        # simple cases
+        'f"{1}"',
+        'f"""{1}"""',
+        'f"{foo} {bar}"',
+
+        # empty string
+        'f""',
+        'f""""""',
+
+        # empty format specifier is okay
+        'f"{1:}"',
+
+        # use of conversion options
+        'f"{1!a}"',
+        'f"{1!a:1}"',
+
+        # format specifiers
+        'f"{1:1}"',
+        'f"{1:1.{32}}"',
+        'f"{1::>4}"',
+        'f"{x:{y}}"',
+        'f"{x:{y:}}"',
+        'f"{x:{y:1}}"',

        # Escapes
-        '{{}}',
-        '{{{1}}}',
-        '{{{1}',
-        '1{{2{{3',
-        '}}',
+        'f"{{}}"',
+        'f"{{{1}}}"',
+        'f"{{{1}"',
+        'f"1{{2{{3"',
+        'f"}}"',

        # New Python 3.8 syntax f'{a=}'
-        '{a=}',
-        '{a()=}',
+        'f"{a=}"',
+        'f"{a()=}"',
+
+        # multiline f-string
+        'f"""abc\ndef"""',
+        'f"""abc{\n123}def"""',
+
+        # a line continuation inside of an fstring_string
+        'f"abc\\\ndef"',
+        'f"\\\n{123}\\\n"',
+
+        # a line continuation inside of an fstring_expr
+        'f"{\\\n123}"',
+
+        # a line continuation inside of an format spec
+        'f"{123:.2\\\nf}"',
    ]
 )
 def test_valid(code, grammar):
-    code = 'f"""%s"""' % code
    module = grammar.parse(code, error_recovery=False)
    fstring = module.children[0]
    assert fstring.type == 'fstring'
@@ -47,23 +71,34 @@ def test_valid(code, grammar):

@pytest.mark.parametrize(
    'code', [
-        '}',
-        '{',
-        '{1!{a}}',
-        '{!{a}}',
-        '{}',
-        '{:}',
-        '{:}}}',
-        '{:1}',
-        '{!:}',
-        '{!}',
-        '{!a}',
-        '{1:{}}',
-        '{1:{:}}',
+        # an f-string can't contain unmatched curly braces
+        'f"}"',
+        'f"{"',
+        'f"""}"""',
+        'f"""{"""',
+
+        # invalid conversion characters
+        'f"{1!{a}}"',
+        'f"{!{a}}"',
+
+        # The curly braces must contain an expression
+        'f"{}"',
+        'f"{:}"',
+        'f"{:}}}"',
+        'f"{:1}"',
+        'f"{!:}"',
+        'f"{!}"',
+        'f"{!a}"',
+
+        # invalid (empty) format specifiers
+        'f"{1:{}}"',
+        'f"{1:{:}}"',
+
+        # a newline without a line continuation inside a single-line string
+        'f"abc\ndef"',
    ]
 )
 def test_invalid(code, grammar):
-    code = 'f"""%s"""' % code
    with pytest.raises(ParserSyntaxError):
        grammar.parse(code, error_recovery=False)

@@ -95,6 +130,7 @@ def test_tokenize_start_pos(code, positions):
            """),
        'f"foo',
        'f"""foo',
+        'f"abc\ndef"',
    ]
 )
 def test_roundtrip(grammar, code):
--- a/test/test_parser_tree.py
+++ b/test/test_parser_tree.py
@@ -180,3 +180,45 @@ def top_function_three():

    r = get_raise_stmts(code, 2) #  Lists inside try-catch
    assert len(list(r)) == 2
+
+
+@pytest.mark.parametrize(
+    'code, name_index, is_definition, include_setitem', [
+        ('x = 3', 0, True, False),
+        ('x.y = 3', 0, False, False),
+        ('x.y = 3', 1, True, False),
+        ('x.y = u.v = z', 0, False, False),
+        ('x.y = u.v = z', 1, True, False),
+        ('x.y = u.v = z', 2, False, False),
+        ('x.y = u.v, w = z', 3, True, False),
+        ('x.y = u.v, w = z', 4, True, False),
+        ('x.y = u.v, w = z', 5, False, False),
+
+        ('x, y = z', 0, True, False),
+        ('x, y = z', 1, True, False),
+        ('x, y = z', 2, False, False),
+        ('x, y = z', 2, False, False),
+        ('x[0], y = z', 2, False, False),
+        ('x[0] = z', 0, False, False),
+        ('x[0], y = z', 0, False, False),
+        ('x[0], y = z', 2, False, True),
+        ('x[0] = z', 0, True, True),
+        ('x[0], y = z', 0, True, True),
+        ('x: int = z', 0, True, False),
+        ('x: int = z', 1, False, False),
+        ('x: int = z', 2, False, False),
+        ('x: int', 0, True, False),
+        ('x: int', 1, False, False),
+    ]
+)
+def test_is_definition(code, name_index, is_definition, include_setitem):
+    module = parse(code, version='3.8')
+    name = module.get_first_leaf()
+    while True:
+        if name.type == 'name':
+            if name_index == 0:
+                break
+            name_index -= 1
+        name = name.get_next_leaf()
+
+    assert name.is_definition(include_setitem=include_setitem) == is_definition
--- a/test/test_pgen2.py
+++ b/test/test_pgen2.py
@@ -292,12 +292,22 @@ def test_left_recursion():
        generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes)


-def test_ambiguities():
-    with pytest.raises(ValueError, match='ambiguous'):
-        generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes)
-
-    with pytest.raises(ValueError, match='ambiguous'):
-        generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes)
-
-    with pytest.raises(ValueError, match='ambiguous'):
-        generate_grammar('''foo: bar | 'x'\nbar: 'x'\n''', tokenize.PythonTokenTypes)
+@pytest.mark.parametrize(
+    'grammar, error_match', [
+        ['foo: bar | baz\nbar: NAME\nbaz: NAME\n',
+         r"foo is ambiguous.*given a TokenType\(NAME\).*bar or baz"],
+        ['''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''',
+         r"foo is ambiguous.*given a ReservedString\(x\).*bar or baz"],
+        ['''foo: bar | 'x'\nbar: 'x'\n''',
+         r"foo is ambiguous.*given a ReservedString\(x\).*bar or foo"],
+        # An ambiguity with the second (not the first) child of a production
+        ['outer: "a" [inner] "b" "c"\ninner: "b" "c" [inner]\n',
+         r"outer is ambiguous.*given a ReservedString\(b\).*inner or outer"],
+        # An ambiguity hidden by a level of indirection (middle)
+        ['outer: "a" [middle] "b" "c"\nmiddle: inner\ninner: "b" "c" [inner]\n',
+         r"outer is ambiguous.*given a ReservedString\(b\).*middle or outer"],
+    ]
+)
+def test_ambiguities(grammar, error_match):
+    with pytest.raises(ValueError, match=error_match):
+        generate_grammar(grammar, tokenize.PythonTokenTypes)
--- a/test/test_python_errors.py
+++ b/test/test_python_errors.py
@@ -293,6 +293,19 @@ def test_valid_fstrings(code):
    assert not _get_error_list(code, version='3.6')


+@pytest.mark.parametrize(
+    'code', [
+        'a = (b := 1)',
+        '[x4 := x ** 5 for x in range(7)]',
+        '[total := total + v for v in range(10)]',
+        'while chunk := file.read(2):\n pass',
+        'numbers = [y := math.factorial(x), y**2, y**3]',
+    ]
+)
+def test_valid_namedexpr(code):
+    assert not _get_error_list(code, version='3.8')
+
+
@pytest.mark.parametrize(
    ('code', 'message'), [
        ("f'{1+}'", ('invalid syntax')),
@@ -307,3 +320,15 @@ def test_invalid_fstrings(code, message):
    """
    error, = _get_error_list(code, version='3.6')
    assert message in error.message
+
+
+@pytest.mark.parametrize(
+    'code', [
+        "from foo import (\nbar,\n rab,\n)",
+        "from foo import (bar, rab, )",
+    ]
+)
+def test_trailing_comma(code):
+    errors = _get_error_list(code)
+    assert not errors
+
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8    # This file contains Unicode characters.

+import sys
 from textwrap import dedent

 import pytest
@@ -16,6 +17,7 @@ from parso.python.tokenize import PythonToken
 NAME = PythonTokenTypes.NAME
 NEWLINE = PythonTokenTypes.NEWLINE
 STRING = PythonTokenTypes.STRING
+NUMBER = PythonTokenTypes.NUMBER
 INDENT = PythonTokenTypes.INDENT
 DEDENT = PythonTokenTypes.DEDENT
 ERRORTOKEN = PythonTokenTypes.ERRORTOKEN
@@ -140,7 +142,7 @@ def test_identifier_contains_unicode():
    else:
        # Unicode tokens in Python 2 seem to be identified as operators.
        # They will be ignored in the parser, that's ok.
-        assert unicode_token[0] == OP
+        assert unicode_token[0] == ERRORTOKEN


 def test_quoted_strings():
@@ -228,16 +230,29 @@ def test_endmarker_end_pos():
    check('a\\')


+xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Python 2')])
+
+
@pytest.mark.parametrize(
    ('code', 'types'), [
+        # Indentation
        (' foo', [INDENT, NAME, DEDENT]),
        ('  foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
        ('  foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME,
                                NEWLINE, ERROR_DEDENT, NAME, DEDENT]),
        (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]),
+
+        # Name stuff
+        ('1foo1', [NUMBER, NAME]),
+        pytest.param(
+            u'மெல்லினம்', [NAME],
+            **xfail_py2),
+        pytest.param(u'²', [ERRORTOKEN], **xfail_py2),
+        pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
+        pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2),
    ]
 )
-def test_indentation(code, types):
+def test_token_types(code, types):
    actual_types = [t.type for t in _get_token_list(code)]
    assert actual_types == types + [ENDMARKER]

@@ -330,15 +345,72 @@ def test_backslash():
        ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]),
        (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
        (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
+
+        # format spec
        (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP,
                                 FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]),
+
+        # multiline f-string
+        ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]),
+        ('f"""abc{\n123}def"""', [
+            FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
+            FSTRING_END
+        ]),
+
+        # a line continuation inside of an fstring_string
+        ('f"abc\\\ndef"', [
+            FSTRING_START, FSTRING_STRING, FSTRING_END
+        ]),
+        ('f"\\\n{123}\\\n"', [
+            FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING,
+            FSTRING_END
+        ]),
+
+        # a line continuation inside of an fstring_expr
+        ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]),
+
+        # a line continuation inside of an format spec
+        ('f"{123:.2\\\nf}"', [
+            FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END
+        ]),
+
+        # a newline without a line continuation inside a single-line string is
+        # wrong, and will generate an ERRORTOKEN
+        ('f"abc\ndef"', [
+            FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN
+        ]),
+
+        # a more complex example
        (r'print(f"Some {x:.2f}a{y}")', [
            NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP,
            FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP
        ]),
-
+        # issue #86, a string-like in an f-string expression
+        ('f"{ ""}"', [
+            FSTRING_START, OP, FSTRING_END, STRING
+        ]),
+        ('f"{ f""}"', [
+            FSTRING_START, OP, NAME, FSTRING_END, STRING
+        ]),
    ]
 )
 def test_fstring(code, types, version_ge_py36):
    actual_types = [t.type for t in _get_token_list(code, version_ge_py36)]
    assert types + [ENDMARKER] == actual_types
+
+
+@pytest.mark.parametrize(
+    ('code', 'types'), [
+        # issue #87, `:=` in the outest paratheses should be tokenized
+        # as a format spec marker and part of the format
+        ('f"{x:=10}"', [
+            FSTRING_START, OP, NAME, OP, FSTRING_STRING, OP, FSTRING_END
+        ]),
+        ('f"{(x:=10)}"', [
+            FSTRING_START, OP, OP, NAME, OP, NUMBER, OP, OP, FSTRING_END
+        ]),
+    ]
+)
+def test_fstring_assignment_expression(code, types, version_ge_py38):
+    actual_types = [t.type for t in _get_token_list(code, version_ge_py38)]
+    assert types + [ENDMARKER] == actual_types
--- a/tox.ini
+++ b/tox.ini
@@ -1,9 +1,10 @@
 [tox]
-envlist = {py26,py27,py33,py34,py35,py36,py37}
+envlist = {py26,py27,py33,py34,py35,py36,py37,py38}
 [testenv]
 extras = testing
 deps =
    py26,py33: pytest>=3.0.7,<3.3
+    py27,py34: pytest<3.3
    py26,py33: setuptools<37
    coverage: coverage
 setenv =
Author	SHA1	Message	Date
Dave Halter	fb010f2b5d	Add a release date to the Changelog	2019-12-15 01:00:38 +01:00
Dave Halter	5e12ea5e04	Prepare the next release v0.5.2	2019-12-15 00:55:19 +01:00
Dave Halter	ceb1ee81fa	Merge pull request #93 from yangyangxcf/fstring_tokenize fixed #86 and #87	2019-12-15 00:47:32 +01:00
Dave Halter	bc94293794	Add information about named expressions (#90 ) to the Changelog	2019-12-15 00:29:41 +01:00
Dave Halter	1122822b7d	Use a lower pytest version so python3.4 is able to pass	2019-12-15 00:13:48 +01:00
Dave Halter	09abe42cce	Use Python 3.8 on travis for testing	2019-12-15 00:12:36 +01:00
Dave Halter	38cdcceba5	Whitespace changes	2019-12-15 00:06:37 +01:00
Dave Halter	753e1999fe	Fix: Add more cases for named expression errors, see #89 , #90	2019-12-15 00:04:38 +01:00
Dave Halter	3c475b1e63	Add Python 3.8 to tested environments for tox	2019-12-14 23:59:16 +01:00
Dave Halter	5f04dad9ab	Fix: Catch some additional cases named expr errors, see #89 , #90	2019-12-14 23:31:43 +01:00
Dave Halter	dbba1959f7	Make sure that function executions are errors as well, see #90	2019-12-14 23:23:00 +01:00
Dave Halter	5fda85275b	Some minor refactorings for #90 - search_ancestor is now used instead of using node = node.parent - Some lines were too long	2019-12-14 23:12:16 +01:00
Dave Halter	32584ac731	Merge https://github.com/JarryShaw/parso into master	2019-12-14 22:21:22 +01:00
Jarry Shaw	89c4d959e9	* moved all namedexpr_test related rules to `_NamedExprRule` * added valid examples	2019-12-14 09:37:16 +01:00
Jarry Shaw	776e151370	Revised implementation * search ancestors of namedexpr_test directly for comprehensions * added test samples for invalid namedexpr_test syntax	2019-12-13 11:55:53 +08:00
yangyang	53a6d0c17a	spelling	2019-12-06 15:24:33 +08:00
yangyang	b90e5cd758	fixed #86 and #87	2019-12-05 19:22:58 +08:00
Robin Fourcade	e496b07b63	Fix trailing comma error	2019-12-04 22:59:24 +01:00
Jarry Shaw	76fe4792e7	Deal with nested comprehension e.g. `[i for i, j in range(5) for k in range (10) if True or (i := 1)]`	2019-12-01 16:23:18 +08:00
Jarry Shaw	8cae7ed526	Fixing davidhalter/parso#89 [all changes are in parso/python/errors.py] * utility function (`_get_namedexpr`) extracting all assignment expression (`namedexpr_test`) nodes * add `is_namedexpr` parameter to `_CheckAssignmentRule._check_assignment` and special error message for assignment expression related assignment issues (cannot use named assignment with xxx) * add assignment expression check to `_CompForRule` (assignment expression cannot be used in a comprehension iterable expression) * add `_NamedExprRule` for special assignment expression checks - cannot use named assignment with lambda - cannot use named assignment with subscript - cannot use named assignment with attribute - and fallback general checks in `_CheckAssignmentRule._check_assignment` * add `_ComprehensionRule` for special checks on assignment expression in a comprehension - assignment expression within a comprehension cannot be used in a class body - assignment expression cannot rebind comprehension iteration variable 'xxx'	2019-12-01 15:43:17 +08:00
Ian Tabolt	ee2995c110	Remove debug print statement	2019-09-28 11:01:52 +02:00
Naglis	76aaa2ddba	Fix typo (#84 )	2019-09-15 19:53:30 +02:00
Dave Halter	3ecd4dddb4	Fix is_definition test	2019-09-05 23:28:46 +02:00
Dave Halter	8f83e9b3c5	Add include_setitem for get_defined_names, is_definition and get_definition	2019-09-04 09:52:55 +02:00
Dave Halter	e8653a49ff	Make is_definition work on setitem modifications, see #66	2019-09-04 09:52:55 +02:00
Thomas A Caswell	3bb46563d4	ENH: update grammar for py39 (#78 ) * ENH: update grammar for py39 Grammar is copied from cpython commit b4e68960b90627422325fdb75f463df1e4153c6e There appears to be 3 new tokens in the grammar (ASYNC, AWAIT, and TYPE_COMMENT) * MNT: revert back to py38 grammar as py39 grammar pt1: comments Looks like upstream has added some comments, remove them * MNT: remove TYPE_COMMENT added upstream * MNT: add string / fstring related changes from parso 38 grammer * MNT: remove changes to support upstream grammar file	2019-07-21 23:45:51 +02:00
Dave Halter	e723b3e74b	Refactor the ambiguity tests a bit, see #70	2019-07-13 20:15:56 +02:00
Benjamin Woodruff	0032bae041	Make pgen2's grammar ambiguity detection handle more cases Under the old implementation, ``` outer: A [inner] B C inner: B C [inner] ``` wouldn't get detected as the ambiguous grammar that it is, whereas ``` outer: A rest rest: [inner] B C inner: B C [inner] ``` would. This would manifest itself as non-determinism in the DFA state generation. See the discussion #62 on for a full explanation. This modifies the ambiguity detection to work on a broader class of issues, so it should now hopefully detect all cases where the given grammar is ambiguous. At some point, we could extend this logic to allow developers to optionally set precedence of grammar productions, which could resolve ambiguities, but that's not a strict requirement for parsing python.	2019-07-13 20:04:32 +02:00
Dave Halter	c0ace63a69	For Python 2.7 and 3.4 pytest 5 doesn't work anymore	2019-07-13 15:46:58 +02:00
Dave Halter	399e8e5043	Prepare the 0.5.1 release	2019-07-13 15:39:44 +02:00
Dave Halter	0a5b5f3346	Fix name tokenizing for Python 2	2019-07-13 15:34:23 +02:00
Dave Halter	2b8544021f	Fix positioning for names that are interleaved with error tokens	2019-07-13 12:34:49 +02:00
Dave Halter	99dd4a84d4	Merge branch 'master' of github.com:davidhalter/parso	2019-07-12 21:35:06 +02:00
Dave Halter	9501b0bde0	Fixed name tokenizing issues for tamil characters, fixes davidhalter/jedi#1368	2019-07-12 21:31:49 +02:00
Benjamin Woodruff	ad57a51800	Fix line continuation characters inside f-strings Line continuation characters are valid inside of strings, but weren't handled correctly in certain cases with f-strings, due to some small tokenizer bugs. This pull request to address those issues, and adds tests to validate the new logic.	2019-07-12 21:20:00 +02:00
Dave Halter	19de3eb5ca	Document that the cache uses pickle files	2019-07-10 00:17:28 -07:00
Dave Halter	7441e6b1d2	Fix changelog dates, fixes #77	2019-06-28 02:00:35 -07:00
Dave Halter	df3c494e02	Try to use collections.abc.Mapping instead of collections.Mapping The latter is deprecated and will be removed in Python 3.9, fixes #76	2019-06-21 10:17:18 +02:00