From 6b391af0717a78d1e8896731402bafd6fe053821 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 16:36:27 +0200 Subject: [PATCH 01/76] Use sets instead of dicts if possible --- parso/pgen2/pgen.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index abfad0a..4efe237 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -58,11 +58,11 @@ class ParserGenerator(object): def _make_first(self, grammar, name): rawfirst = self._first[name] - first = {} + first = set() for label in rawfirst: ilabel = self._make_label(grammar, label) - ##assert ilabel not in first # XXX failed on <> ... != - first[ilabel] = 1 + ##assert ilabel not in first, "%s failed on <> ... !=" % label + first.add(ilabel) return first def _make_label(self, grammar, label): @@ -98,7 +98,6 @@ class ParserGenerator(object): if value in grammar.keywords: return grammar.keywords[value] else: - # TODO this might be an issue?! Using token.NAME here? grammar.labels.append((token.NAME, value)) grammar.keywords[value] = ilabel return ilabel From 23362ec2d3262e0aac76a269c796439db8ae6443 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 16:40:11 +0200 Subject: [PATCH 02/76] Start using the term nonterminal --- parso/pgen2/grammar_parser.py | 12 ++++++------ parso/pgen2/pgen.py | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/parso/pgen2/grammar_parser.py b/parso/pgen2/grammar_parser.py index a8c9694..17aac0c 100644 --- a/parso/pgen2/grammar_parser.py +++ b/parso/pgen2/grammar_parser.py @@ -134,20 +134,20 @@ class GrammarParser(): class NFAArc(object): - def __init__(self, next_, label_or_string): + def __init__(self, next_, nonterminal_or_string): self.next = next_ - self.label_or_string = label_or_string + self.nonterminal_or_string = nonterminal_or_string class NFAState(object): def __init__(self, from_rule): self.from_rule = from_rule - self.arcs = [] # list of (label, NFAState) pairs + self.arcs = [] # List[nonterminal (str), NFAState] - def add_arc(self, next_, label=None): - assert label is None or isinstance(label, str) + def add_arc(self, next_, nonterminal_or_string=None): + assert nonterminal_or_string is None or isinstance(nonterminal_or_string, str) assert isinstance(next_, NFAState) - self.arcs.append(NFAArc(next_, label)) + self.arcs.append(NFAArc(next_, nonterminal_or_string)) def __repr__(self): return '<%s: from %s>' % (self.__class__.__name__, self.from_rule) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 4efe237..15d24e3 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -218,7 +218,7 @@ def _make_dfas(start, finish): return base_nfa_set.add(nfa_state) for nfa_arc in nfa_state.arcs: - if nfa_arc.label_or_string is None: + if nfa_arc.nonterminal_or_string is None: addclosure(nfa_arc.next, base_nfa_set) base_nfa_set = set() @@ -229,14 +229,14 @@ def _make_dfas(start, finish): # Find state transitions and store them in arcs. for nfa_state in state.nfa_set: for nfa_arc in nfa_state.arcs: - if nfa_arc.label_or_string is not None: - nfa_set = arcs.setdefault(nfa_arc.label_or_string, set()) + if nfa_arc.nonterminal_or_string is not None: + nfa_set = arcs.setdefault(nfa_arc.nonterminal_or_string, set()) addclosure(nfa_arc.next, nfa_set) # Now create the dfa's with no None's in arcs anymore. All Nones have # been eliminated and state transitions (arcs) are properly defined, we # just need to create the dfa's. - for label_or_string, nfa_set in arcs.items(): + for nonterminal_or_string, nfa_set in arcs.items(): for nested_state in states: if nested_state.nfa_set == nfa_set: # The DFA state already exists for this rule. @@ -245,7 +245,7 @@ def _make_dfas(start, finish): nested_state = DFAState(start.from_rule, nfa_set, finish) states.append(nested_state) - state.add_arc(nested_state, label_or_string) + state.add_arc(nested_state, nonterminal_or_string) return states # List of DFAState instances; first one is start @@ -270,8 +270,8 @@ def _dump_dfas(dfas): print("Dump of DFA for", dfas[0].from_rule) for i, state in enumerate(dfas): print(" State", i, state.isfinal and "(final)" or "") - for label, next in state.arcs.items(): - print(" %s -> %d" % (label, dfas.index(next))) + for nonterminal, next in state.arcs.items(): + print(" %s -> %d" % (nonterminal, dfas.index(next))) def generate_grammar(bnf_grammar, token_namespace): From 1f27fa9320a03a896b6f9efc128aeb1082efb2bc Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 17:37:15 +0200 Subject: [PATCH 03/76] Use more nonterminal/terminal terminology --- parso/pgen2/pgen.py | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 15d24e3..d2d9890 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -26,12 +26,12 @@ from parso.pgen2.grammar_parser import GrammarParser, NFAState class ParserGenerator(object): def __init__(self, rule_to_dfas, token_namespace): self._token_namespace = token_namespace - self._rule_to_dfas = rule_to_dfas + self._nonterminal_to_dfas = rule_to_dfas def make_grammar(self, grammar): self._first = {} # map from symbol name to set of tokens - names = list(self._rule_to_dfas.keys()) + names = list(self._nonterminal_to_dfas.keys()) names.sort() for name in names: if name not in self._first: @@ -43,7 +43,7 @@ class ParserGenerator(object): grammar.number2symbol[i] = name for name in names: - dfas = self._rule_to_dfas[name] + dfas = self._nonterminal_to_dfas[name] states = [] for state in dfas: arcs = [] @@ -112,33 +112,37 @@ class ParserGenerator(object): return ilabel def _calcfirst(self, name): - dfa = self._rule_to_dfas[name] + dfas = self._nonterminal_to_dfas[name] self._first[name] = None # dummy to detect left recursion - state = dfa[0] + state = dfas[0] totalset = {} overlapcheck = {} - for label, next in state.arcs.items(): - if label in self._rule_to_dfas: - if label in self._first: - fset = self._first[label] + for nonterminal_or_string, next in state.arcs.items(): + if nonterminal_or_string in self._nonterminal_to_dfas: + # It's a nonterminal and we have either a left recursion issue + # in the grammare or we have to recurse. + try: + fset = self._first[nonterminal_or_string] + except KeyError: + self._calcfirst(nonterminal_or_string) + fset = self._first[nonterminal_or_string] + else: if fset is None: raise ValueError("left recursion for rule %r" % name) - else: - self._calcfirst(label) - fset = self._first[label] totalset.update(fset) - overlapcheck[label] = fset + overlapcheck[nonterminal_or_string] = fset else: - totalset[label] = 1 - overlapcheck[label] = {label: 1} + # It's a string. We have finally found a possible first token. + totalset[nonterminal_or_string] = 1 + overlapcheck[nonterminal_or_string] = {nonterminal_or_string: 1} inverse = {} - for label, itsfirst in overlapcheck.items(): + for nonterminal_or_string, itsfirst in overlapcheck.items(): for symbol in itsfirst: if symbol in inverse: raise ValueError("rule %s is ambiguous; %s is in the" " first sets of %s as well as %s" % - (name, symbol, label, inverse[symbol])) - inverse[symbol] = label + (name, symbol, nonterminal_or_string, inverse[symbol])) + inverse[symbol] = nonterminal_or_string self._first[name] = totalset @@ -150,7 +154,7 @@ class DFAState(object): self.from_rule = from_rule self.nfa_set = nfa_set self.isfinal = final in nfa_set - self.arcs = {} # map from label to DFAState + self.arcs = {} # map from nonterminals or strings to DFAState def add_arc(self, next_, label): assert isinstance(label, str) From fbed1ecfe001650548e68c208eb779d25c06e522 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 17:45:40 +0200 Subject: [PATCH 04/76] More dict to set --- parso/pgen2/pgen.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index d2d9890..c69c4ed 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -115,7 +115,7 @@ class ParserGenerator(object): dfas = self._nonterminal_to_dfas[name] self._first[name] = None # dummy to detect left recursion state = dfas[0] - totalset = {} + totalset = set() overlapcheck = {} for nonterminal_or_string, next in state.arcs.items(): if nonterminal_or_string in self._nonterminal_to_dfas: @@ -133,8 +133,8 @@ class ParserGenerator(object): overlapcheck[nonterminal_or_string] = fset else: # It's a string. We have finally found a possible first token. - totalset[nonterminal_or_string] = 1 - overlapcheck[nonterminal_or_string] = {nonterminal_or_string: 1} + totalset.add(nonterminal_or_string) + overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) inverse = {} for nonterminal_or_string, itsfirst in overlapcheck.items(): for symbol in itsfirst: From 95e4ecf592ffca6eabd74ce467a034103a08a5b0 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 17:49:43 +0200 Subject: [PATCH 05/76] next -> next_ --- parso/pgen2/pgen.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index c69c4ed..b6896e5 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -47,8 +47,8 @@ class ParserGenerator(object): states = [] for state in dfas: arcs = [] - for label, next in state.arcs.items(): - arcs.append((self._make_label(grammar, label), dfas.index(next))) + for label, next_ in state.arcs.items(): + arcs.append((self._make_label(grammar, label), dfas.index(next_))) if state.isfinal: arcs.append((0, dfas.index(state))) states.append(arcs) @@ -117,7 +117,7 @@ class ParserGenerator(object): state = dfas[0] totalset = set() overlapcheck = {} - for nonterminal_or_string, next in state.arcs.items(): + for nonterminal_or_string, next_ in state.arcs.items(): if nonterminal_or_string in self._nonterminal_to_dfas: # It's a nonterminal and we have either a left recursion issue # in the grammare or we have to recurse. @@ -163,8 +163,8 @@ class DFAState(object): self.arcs[label] = next_ def unifystate(self, old, new): - for label, next in self.arcs.items(): - if next is old: + for label, next_ in self.arcs.items(): + if next_ is old: self.arcs[label] = new def __eq__(self, other): @@ -176,8 +176,8 @@ class DFAState(object): # would invoke this method recursively, with cycles... if len(self.arcs) != len(other.arcs): return False - for label, next in self.arcs.items(): - if next is not other.arcs.get(label): + for label, next_ in self.arcs.items(): + if next_ is not other.arcs.get(label): return False return True @@ -258,12 +258,12 @@ def _dump_nfa(start, finish): todo = [start] for i, state in enumerate(todo): print(" State", i, state is finish and "(final)" or "") - for label, next in state.arcs: - if next in todo: - j = todo.index(next) + for label, next_ in state.arcs: + if next_ in todo: + j = todo.index(next_) else: j = len(todo) - todo.append(next) + todo.append(next_) if label is None: print(" -> %d" % j) else: @@ -274,8 +274,8 @@ def _dump_dfas(dfas): print("Dump of DFA for", dfas[0].from_rule) for i, state in enumerate(dfas): print(" State", i, state.isfinal and "(final)" or "") - for nonterminal, next in state.arcs.items(): - print(" %s -> %d" % (nonterminal, dfas.index(next))) + for nonterminal, next_ in state.arcs.items(): + print(" %s -> %d" % (nonterminal, dfas.index(next_))) def generate_grammar(bnf_grammar, token_namespace): From b6cbf306d79dd825b34cb05c74717afce521cfbb Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 18:09:12 +0200 Subject: [PATCH 06/76] Use the name first_terminals instead of first --- parso/pgen2/pgen.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index b6896e5..b310333 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -29,19 +29,21 @@ class ParserGenerator(object): self._nonterminal_to_dfas = rule_to_dfas def make_grammar(self, grammar): - self._first = {} # map from symbol name to set of tokens + self._first_terminals = {} # map from symbol name to set of tokens names = list(self._nonterminal_to_dfas.keys()) names.sort() for name in names: - if name not in self._first: - self._calcfirst(name) - #print name, self._first[name].keys() + if name not in self._first_terminals: + self._calculate_first_terminals(name) i = 256 + len(grammar.symbol2number) grammar.symbol2number[name] = i grammar.number2symbol[i] = name + # Now that we have calculated the first terminals, we are sure that + # there is no left recursion or ambiguities. + for name in names: dfas = self._nonterminal_to_dfas[name] states = [] @@ -57,7 +59,7 @@ class ParserGenerator(object): return grammar def _make_first(self, grammar, name): - rawfirst = self._first[name] + rawfirst = self._first_terminals[name] first = set() for label in rawfirst: ilabel = self._make_label(grammar, label) @@ -111,9 +113,11 @@ class ParserGenerator(object): grammar.tokens[itoken] = ilabel return ilabel - def _calcfirst(self, name): + def _calculate_first_terminals(self, name): dfas = self._nonterminal_to_dfas[name] - self._first[name] = None # dummy to detect left recursion + self._first_terminals[name] = None # dummy to detect left recursion + # We only need to check the first dfa. All the following ones are not + # interesting to find first terminals. state = dfas[0] totalset = set() overlapcheck = {} @@ -122,10 +126,10 @@ class ParserGenerator(object): # It's a nonterminal and we have either a left recursion issue # in the grammare or we have to recurse. try: - fset = self._first[nonterminal_or_string] + fset = self._first_terminals[nonterminal_or_string] except KeyError: - self._calcfirst(nonterminal_or_string) - fset = self._first[nonterminal_or_string] + self._calculate_first_terminals(nonterminal_or_string) + fset = self._first_terminals[nonterminal_or_string] else: if fset is None: raise ValueError("left recursion for rule %r" % name) @@ -143,7 +147,7 @@ class ParserGenerator(object): " first sets of %s as well as %s" % (name, symbol, nonterminal_or_string, inverse[symbol])) inverse[symbol] = nonterminal_or_string - self._first[name] = totalset + self._first_terminals[name] = totalset class DFAState(object): From 640f544af9d792861734c196d28daf973b2e26a5 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 18:12:13 +0200 Subject: [PATCH 07/76] One instance of symbol -> terminal --- parso/pgen2/pgen.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index b310333..9d0988f 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -139,14 +139,15 @@ class ParserGenerator(object): # It's a string. We have finally found a possible first token. totalset.add(nonterminal_or_string) overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) + inverse = {} - for nonterminal_or_string, itsfirst in overlapcheck.items(): - for symbol in itsfirst: - if symbol in inverse: + for nonterminal_or_string, first_set in overlapcheck.items(): + for terminal in first_set: + if terminal in inverse: raise ValueError("rule %s is ambiguous; %s is in the" " first sets of %s as well as %s" % - (name, symbol, nonterminal_or_string, inverse[symbol])) - inverse[symbol] = nonterminal_or_string + (name, terminal, nonterminal_or_string, inverse[terminal])) + inverse[terminal] = nonterminal_or_string self._first_terminals[name] = totalset From 73ce57428be069d2c34eb1aae0325e0dc7cbd997 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 18:30:20 +0200 Subject: [PATCH 08/76] Try to completely remove the word symbol and use nonterminal The ones that we could not remove are in grammar.py, because that's the public documented API. --- parso/grammar.py | 12 ++++----- parso/parser.py | 14 +++++----- parso/pgen2/grammar.py | 36 +++++++++++++------------- parso/pgen2/parse.py | 10 ++++---- parso/pgen2/pgen.py | 35 ++++++++++++------------- parso/python/diff.py | 10 ++++---- parso/python/parser.py | 58 +++++++++++++++++++++--------------------- 7 files changed, 89 insertions(+), 86 deletions(-) diff --git a/parso/grammar.py b/parso/grammar.py index 6c13f00..2906b5d 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -51,8 +51,8 @@ class Grammar(object): it is invalid, it will be returned as an error node. If disabled, you will get a ParseError when encountering syntax errors in your code. - :param str start_symbol: The grammar symbol that you want to parse. Only - allowed to be used when error_recovery is False. + :param str start_symbol: The grammar rule (nonterminal) that you want + to parse. Only allowed to be used when error_recovery is False. :param str path: The path to the file you want to open. Only needed for caching. :param bool cache: Keeps a copy of the parser tree in RAM and on disk if a path is given. Returns the cached trees if the corresponding @@ -88,7 +88,7 @@ class Grammar(object): raise TypeError("Please provide either code or a path.") if start_symbol is None: - start_symbol = self._start_symbol + start_symbol = self._start_nonterminal if error_recovery and start_symbol != 'file_input': raise NotImplementedError("This is currently not implemented.") @@ -136,7 +136,7 @@ class Grammar(object): p = self._parser( self._pgen_grammar, error_recovery=error_recovery, - start_symbol=start_symbol + start_nonterminal=start_symbol ) root_node = p.parse(tokens=tokens) @@ -186,7 +186,7 @@ class Grammar(object): return normalizer.issues def __repr__(self): - labels = self._pgen_grammar.number2symbol.values() + labels = self._pgen_grammar.number2nonterminal.values() txt = ' '.join(list(labels)[:3]) + ' ...' return '<%s:%s>' % (self.__class__.__name__, txt) @@ -194,7 +194,7 @@ class Grammar(object): class PythonGrammar(Grammar): _error_normalizer_config = ErrorFinderConfig() _token_namespace = token - _start_symbol = 'file_input' + _start_nonterminal = 'file_input' def __init__(self, version_info, bnf_text): super(PythonGrammar, self).__init__( diff --git a/parso/parser.py b/parso/parser.py index 555ebc7..c9df89e 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -38,13 +38,13 @@ class BaseParser(object): } default_leaf = tree.Leaf - def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False): + def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False): self._pgen_grammar = pgen_grammar - self._start_symbol = start_symbol + self._start_nonterminal = start_nonterminal self._error_recovery = error_recovery def parse(self, tokens): - start_number = self._pgen_grammar.symbol2number[self._start_symbol] + start_number = self._pgen_grammar.nonterminal2number[self._start_nonterminal] self.pgen_parser = PgenParser( self._pgen_grammar, self.convert_node, self.convert_leaf, self.error_recovery, start_number @@ -64,12 +64,12 @@ class BaseParser(object): raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) def convert_node(self, pgen_grammar, type_, children): - # TODO REMOVE symbol, we don't want type here. - symbol = pgen_grammar.number2symbol[type_] + # TODO REMOVE nonterminal, we don't want type here. + nonterminal = pgen_grammar.number2nonterminal[type_] try: - return self.node_map[symbol](children) + return self.node_map[nonterminal](children) except KeyError: - return self.default_node(symbol, children) + return self.default_node(nonterminal, children) def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos): try: diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 1a2c6e9..00a6e8c 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -28,12 +28,14 @@ class Grammar(object): The instance variables are as follows: - symbol2number -- a dict mapping symbol names to numbers. Symbol - numbers are always 256 or higher, to distinguish - them from token numbers, which are between 0 and - 255 (inclusive). + nonterminal2number -- + A dict mapping nonterminal names to numbers. + Nonterminal numbers are always 256 or higher, to + distinguish them from token numbers, which are between 0 + and 255 (inclusive). - number2symbol -- a dict mapping numbers to symbol names; + number2nonterminal -- + A dict mapping numbers to nonterminal names; these two are each other's inverse. states -- a list of DFAs, where each DFA is a list of @@ -44,20 +46,20 @@ class Grammar(object): Final states are represented by a special arc of the form (0, j) where j is its own state number. - dfas -- a dict mapping symbol numbers to (DFA, first) + dfas -- a dict mapping nonterminal numbers to (DFA, first) pairs, where DFA is an item from the states list above, and first is a set of tokens that can begin this grammar rule (represented by a dict whose values are always 1). labels -- a list of (x, y) pairs where x is either a token - number or a symbol number, and y is either None + number or a nonterminal number, and y is either None or a string; the strings are keywords. The label number is the index in this list; label numbers are used to mark state transitions (arcs) in the DFAs. - start -- the number of the grammar's start symbol. + start -- the number of the grammar's start nonterminal. keywords -- a dict mapping keyword strings to arc labels. @@ -65,29 +67,29 @@ class Grammar(object): """ - def __init__(self, bnf_text, start_symbol): - self.symbol2number = {} - self.number2symbol = {} + def __init__(self, bnf_text, start_nonterminal): + self.nonterminal2number = {} + self.number2nonterminal = {} self.states = [] self.dfas = {} self.labels = [(0, "EMPTY")] self.keywords = {} self.tokens = {} - self.symbol2label = {} - self.label2symbol = {} - self.start_symbol = start_symbol + self.nonterminal2label = {} + self.label2nonterminal = {} + self.start_nonterminal = start_nonterminal @property def start(self): - return self.symbol2number[self.start_symbol] + return self.nonterminal2number[self.start_nonterminal] def report(self): """Dump the grammar tables to standard output, for debugging.""" from pprint import pprint print("s2n") - pprint(self.symbol2number) + pprint(self.nonterminal2number) print("n2s") - pprint(self.number2symbol) + pprint(self.number2nonterminal) print("states") pprint(self.states) print("dfas") diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index e2d9593..4e1ad6c 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -118,8 +118,8 @@ class PgenParser(object): up. A concrete syntax tree node is a (type, nodes) tuple, where - type is the node type (a token or symbol number) and nodes - is a list of children for symbols, and None for tokens. + type is the node type (a token or nonterminal number) and nodes + is a list of children for nonterminals, and None for tokens. An abstract syntax tree node may be anything; this is entirely up to the converter function. @@ -184,11 +184,11 @@ class PgenParser(object): # Done with this token return False elif t >= 256: - # See if it's a symbol and if we're in its first set + # See if it's a nonterminal and if we're in its first set itsdfa = _gram.dfas[t] itsstates, itsfirst = itsdfa if ilabel in itsfirst: - # Push a symbol + # Push a nonterminal _push(t, itsdfa, newstate) break # To continue the outer while loop else: @@ -231,7 +231,7 @@ class PgenParser(object): try: # Equal to: # dfa, state, node = self.stack[-1] - # symbol, children = node + # nonterminal, children = node self.stack[-1][2][1].append(newnode) except IndexError: # Stack is empty, set the rootnode. diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 9d0988f..de1efcb 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -29,7 +29,8 @@ class ParserGenerator(object): self._nonterminal_to_dfas = rule_to_dfas def make_grammar(self, grammar): - self._first_terminals = {} # map from symbol name to set of tokens + # Map from grammar rule (nonterminal) name to a set of tokens. + self._first_terminals = {} names = list(self._nonterminal_to_dfas.keys()) names.sort() @@ -37,9 +38,9 @@ class ParserGenerator(object): if name not in self._first_terminals: self._calculate_first_terminals(name) - i = 256 + len(grammar.symbol2number) - grammar.symbol2number[name] = i - grammar.number2symbol[i] = name + i = 256 + len(grammar.nonterminal2number) + grammar.nonterminal2number[name] = i + grammar.number2nonterminal[i] = name # Now that we have calculated the first terminals, we are sure that # there is no left recursion or ambiguities. @@ -55,7 +56,7 @@ class ParserGenerator(object): arcs.append((0, dfas.index(state))) states.append(arcs) grammar.states.append(states) - grammar.dfas[grammar.symbol2number[name]] = (states, self._make_first(grammar, name)) + grammar.dfas[grammar.nonterminal2number[name]] = (states, self._make_first(grammar, name)) return grammar def _make_first(self, grammar, name): @@ -71,15 +72,15 @@ class ParserGenerator(object): # XXX Maybe this should be a method on a subclass of converter? ilabel = len(grammar.labels) if label[0].isalpha(): - # Either a symbol name or a named token - if label in grammar.symbol2number: - # A symbol name (a non-terminal) - if label in grammar.symbol2label: - return grammar.symbol2label[label] + # Either a nonterminal name or a named token + if label in grammar.nonterminal2number: + # A nonterminal name (a non-terminal) + if label in grammar.nonterminal2label: + return grammar.nonterminal2label[label] else: - grammar.labels.append((grammar.symbol2number[label], None)) - grammar.symbol2label[label] = ilabel - grammar.label2symbol[ilabel] = label + grammar.labels.append((grammar.nonterminal2number[label], None)) + grammar.nonterminal2label[label] = ilabel + grammar.label2nonterminal[ilabel] = label return ilabel else: # A named token (NAME, NUMBER, STRING) @@ -293,7 +294,7 @@ def generate_grammar(bnf_grammar, token_namespace): own parser. """ rule_to_dfas = {} - start_symbol = None + start_nonterminal = None for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse(): #_dump_nfa(a, z) dfas = _make_dfas(nfa_a, nfa_z) @@ -304,8 +305,8 @@ def generate_grammar(bnf_grammar, token_namespace): rule_to_dfas[nfa_a.from_rule] = dfas #print(nfa_a.from_rule, oldlen, newlen) - if start_symbol is None: - start_symbol = nfa_a.from_rule + if start_nonterminal is None: + start_nonterminal = nfa_a.from_rule p = ParserGenerator(rule_to_dfas, token_namespace) - return p.make_grammar(Grammar(bnf_grammar, start_symbol)) + return p.make_grammar(Grammar(bnf_grammar, start_nonterminal)) diff --git a/parso/python/diff.py b/parso/python/diff.py index f8b73c7..529f06a 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -41,9 +41,9 @@ def _flows_finished(pgen_grammar, stack): if, while, for and try might not be finished, because another part might still be parsed. """ - for dfa, newstate, (symbol_number, nodes) in stack: - if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt', - 'for_stmt', 'try_stmt'): + for dfa, newstate, (nonterminal_number, nodes) in stack: + if pgen_grammar.number2nonterminal[nonterminal_number] \ + in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'): return False return True @@ -52,8 +52,8 @@ def suite_or_file_input_is_valid(pgen_grammar, stack): if not _flows_finished(pgen_grammar, stack): return False - for dfa, newstate, (symbol_number, nodes) in reversed(stack): - if pgen_grammar.number2symbol[symbol_number] == 'suite': + for dfa, newstate, (nonterminal_number, nodes) in reversed(stack): + if pgen_grammar.number2nonterminal[nonterminal_number] == 'suite': # If only newline is in the suite, the suite is not valid, yet. return len(nodes) > 1 # Not reaching a suite means that we're dealing with file_input levels diff --git a/parso/python/parser.py b/parso/python/parser.py index b99053b..cb283e8 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -62,8 +62,8 @@ class Parser(BaseParser): FSTRING_END: tree.FStringEnd, } - def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'): - super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery) + def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): + super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery) self.syntax_errors = [] self._omit_dedent_list = [] @@ -81,19 +81,19 @@ class Parser(BaseParser): def parse(self, tokens): if self._error_recovery: - if self._start_symbol != 'file_input': + if self._start_nonterminal != 'file_input': raise NotImplementedError tokens = self._recovery_tokenize(tokens) node = super(Parser, self).parse(tokens) - if self._start_symbol == 'file_input' != node.type: + if self._start_nonterminal == 'file_input' != node.type: # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: node = self.convert_node( self._pgen_grammar, - self._pgen_grammar.symbol2number['file_input'], + self._pgen_grammar.nonterminal2number['file_input'], [node] ) @@ -107,24 +107,24 @@ class Parser(BaseParser): grammar rule produces a new complete node, so that the tree is build strictly bottom-up. """ - # TODO REMOVE symbol, we don't want type here. - symbol = pgen_grammar.number2symbol[type] + # TODO REMOVE nonterminal, we don't want type here. + nonterminal = pgen_grammar.number2nonterminal[type] try: - return self.node_map[symbol](children) + return self.node_map[nonterminal](children) except KeyError: - if symbol == 'suite': + if nonterminal == 'suite': # We don't want the INDENT/DEDENT in our parser tree. Those # leaves are just cancer. They are virtual leaves and not real # ones and therefore have pseudo start/end positions and no # prefixes. Just ignore them. children = [children[0]] + children[2:-1] - elif symbol == 'list_if': + elif nonterminal == 'list_if': # Make transitioning from 2 to 3 easier. - symbol = 'comp_if' - elif symbol == 'listmaker': + nonterminal = 'comp_if' + elif nonterminal == 'listmaker': # Same as list_if above. - symbol = 'testlist_comp' - return self.default_node(symbol, children) + nonterminal = 'testlist_comp' + return self.default_node(nonterminal, children) def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) @@ -138,10 +138,10 @@ class Parser(BaseParser): def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, add_token_callback): - def get_symbol_and_nodes(stack): + def get_nonterminal_and_nodes(stack): for dfa, state, (type_, nodes) in stack: - symbol = pgen_grammar.number2symbol[type_] - yield symbol, nodes + nonterminal = pgen_grammar.number2nonterminal[type_] + yield nonterminal, nodes tos_nodes = stack.get_tos_nodes() if tos_nodes: @@ -149,7 +149,7 @@ class Parser(BaseParser): else: last_leaf = None - if self._start_symbol == 'file_input' and \ + if self._start_nonterminal == 'file_input' and \ (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value): def reduce_stack(states, newstate): # reduce @@ -168,13 +168,13 @@ class Parser(BaseParser): ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) dfa, state, (type_, nodes) = stack[-1] - symbol = pgen_grammar.number2symbol[type_] + nonterminal = pgen_grammar.number2nonterminal[type_] states, first = dfa arcs = states[state] # Look for a state with this label for i, newstate in arcs: if ilabel == i: - if symbol == 'simple_stmt': + if nonterminal == 'simple_stmt': # This is basically shifting stack[-1] = (dfa, newstate, (type_, nodes)) @@ -182,12 +182,12 @@ class Parser(BaseParser): add_token_callback(typ, value, start_pos, prefix) return # Check if we're at the right point - #for symbol, nodes in get_symbol_and_nodes(stack): + #for nonterminal, nodes in get_nonterminal_and_nodes(stack): # self.pgen_parser._pop() #break break - #symbol = pgen_grammar.number2symbol[type_] + #nonterminal = pgen_grammar.number2nonterminal[type_] if not self._error_recovery: return super(Parser, self).error_recovery( @@ -198,21 +198,21 @@ class Parser(BaseParser): # For now just discard everything that is not a suite or # file_input, if we detect an error. one_line_suite = False - for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))): + for index, (nonterminal, nodes) in reversed(list(enumerate(get_nonterminal_and_nodes(stack)))): # `suite` can sometimes be only simple_stmt, not stmt. if one_line_suite: break - elif symbol == 'file_input': + elif nonterminal == 'file_input': break - elif symbol == 'suite': + elif nonterminal == 'suite': if len(nodes) > 1: break elif not nodes: one_line_suite = True # `suite` without an indent are error nodes. - return index, symbol, nodes + return index, nonterminal, nodes - index, symbol, nodes = current_suite(stack) + index, nonterminal, nodes = current_suite(stack) # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index) if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos): @@ -226,11 +226,11 @@ class Parser(BaseParser): error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) stack[-1][2][1].append(error_leaf) - if symbol == 'suite': + if nonterminal == 'suite': dfa, state, node = stack[-1] states, first = dfa arcs = states[state] - intended_label = pgen_grammar.symbol2label['stmt'] + intended_label = pgen_grammar.nonterminal2label['stmt'] # Introduce a proper state transition. We're basically allowing # there to be no valid statements inside a suite. if [x[0] for x in arcs] == [intended_label]: From a06c3a312971f4631fecdd91c2187c2c9de1ad93 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 17 Jun 2018 23:10:27 +0200 Subject: [PATCH 09/76] name -> nonterminal --- parso/pgen2/pgen.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index de1efcb..4480663 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -32,21 +32,21 @@ class ParserGenerator(object): # Map from grammar rule (nonterminal) name to a set of tokens. self._first_terminals = {} - names = list(self._nonterminal_to_dfas.keys()) - names.sort() - for name in names: - if name not in self._first_terminals: - self._calculate_first_terminals(name) + nonterminals = list(self._nonterminal_to_dfas.keys()) + nonterminals.sort() + for nonterminal in nonterminals: + if nonterminal not in self._first_terminals: + self._calculate_first_terminals(nonterminal) i = 256 + len(grammar.nonterminal2number) - grammar.nonterminal2number[name] = i - grammar.number2nonterminal[i] = name + grammar.nonterminal2number[nonterminal] = i + grammar.number2nonterminal[i] = nonterminal # Now that we have calculated the first terminals, we are sure that # there is no left recursion or ambiguities. - for name in names: - dfas = self._nonterminal_to_dfas[name] + for nonterminal in nonterminals: + dfas = self._nonterminal_to_dfas[nonterminal] states = [] for state in dfas: arcs = [] @@ -56,11 +56,11 @@ class ParserGenerator(object): arcs.append((0, dfas.index(state))) states.append(arcs) grammar.states.append(states) - grammar.dfas[grammar.nonterminal2number[name]] = (states, self._make_first(grammar, name)) + grammar.dfas[grammar.nonterminal2number[nonterminal]] = (states, self._make_first(grammar, nonterminal)) return grammar - def _make_first(self, grammar, name): - rawfirst = self._first_terminals[name] + def _make_first(self, grammar, nonterminal): + rawfirst = self._first_terminals[nonterminal] first = set() for label in rawfirst: ilabel = self._make_label(grammar, label) @@ -74,7 +74,7 @@ class ParserGenerator(object): if label[0].isalpha(): # Either a nonterminal name or a named token if label in grammar.nonterminal2number: - # A nonterminal name (a non-terminal) + # A nonterminal name if label in grammar.nonterminal2label: return grammar.nonterminal2label[label] else: @@ -114,9 +114,9 @@ class ParserGenerator(object): grammar.tokens[itoken] = ilabel return ilabel - def _calculate_first_terminals(self, name): - dfas = self._nonterminal_to_dfas[name] - self._first_terminals[name] = None # dummy to detect left recursion + def _calculate_first_terminals(self, nonterminal): + dfas = self._nonterminal_to_dfas[nonterminal] + self._first_terminals[nonterminal] = None # dummy to detect left recursion # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] @@ -133,7 +133,7 @@ class ParserGenerator(object): fset = self._first_terminals[nonterminal_or_string] else: if fset is None: - raise ValueError("left recursion for rule %r" % name) + raise ValueError("left recursion for rule %r" % nonterminal) totalset.update(fset) overlapcheck[nonterminal_or_string] = fset else: @@ -147,9 +147,9 @@ class ParserGenerator(object): if terminal in inverse: raise ValueError("rule %s is ambiguous; %s is in the" " first sets of %s as well as %s" % - (name, terminal, nonterminal_or_string, inverse[terminal])) + (nonterminal, terminal, nonterminal_or_string, inverse[terminal])) inverse[terminal] = nonterminal_or_string - self._first_terminals[name] = totalset + self._first_terminals[nonterminal] = totalset class DFAState(object): From 453471eeb60479614c73d1f94201fd8bd29034d4 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Mon, 18 Jun 2018 00:15:21 +0200 Subject: [PATCH 10/76] Move some ParserGenerator stuff into the Grammar class --- parso/pgen2/grammar.py | 131 +++++++++++++++++++++++++++++++++++++++- parso/pgen2/pgen.py | 133 +---------------------------------------- 2 files changed, 131 insertions(+), 133 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 00a6e8c..86ed4d9 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -16,6 +16,8 @@ fallback token code OP, but the parser needs the actual token code. """ +from parso.python import token + class Grammar(object): """Pgen parsing tables conversion class. @@ -67,7 +69,10 @@ class Grammar(object): """ - def __init__(self, bnf_text, start_nonterminal): + def __init__(self, bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace): + self._token_namespace = token_namespace + self._nonterminal_to_dfas = rule_to_dfas + self.nonterminal2number = {} self.number2nonterminal = {} self.states = [] @@ -79,6 +84,130 @@ class Grammar(object): self.label2nonterminal = {} self.start_nonterminal = start_nonterminal + self._make_grammar() + + def _make_grammar(self): + # Map from grammar rule (nonterminal) name to a set of tokens. + self._first_terminals = {} + + nonterminals = list(self._nonterminal_to_dfas.keys()) + nonterminals.sort() + for nonterminal in nonterminals: + if nonterminal not in self._first_terminals: + self._calculate_first_terminals(nonterminal) + + i = 256 + len(self.nonterminal2number) + self.nonterminal2number[nonterminal] = i + self.number2nonterminal[i] = nonterminal + + # Now that we have calculated the first terminals, we are sure that + # there is no left recursion or ambiguities. + + for nonterminal in nonterminals: + dfas = self._nonterminal_to_dfas[nonterminal] + states = [] + for state in dfas: + arcs = [] + for label, next_ in state.arcs.items(): + arcs.append((self._make_label(label), dfas.index(next_))) + if state.isfinal: + arcs.append((0, dfas.index(state))) + states.append(arcs) + self.states.append(states) + self.dfas[self.nonterminal2number[nonterminal]] = (states, self._make_first(nonterminal)) + + def _make_first(self, nonterminal): + rawfirst = self._first_terminals[nonterminal] + first = set() + for label in rawfirst: + ilabel = self._make_label(label) + ##assert ilabel not in first, "%s failed on <> ... !=" % label + first.add(ilabel) + return first + + def _make_label(self, label): + # XXX Maybe this should be a method on a subclass of converter? + ilabel = len(self.labels) + if label[0].isalpha(): + # Either a nonterminal name or a named token + if label in self.nonterminal2number: + # A nonterminal name + if label in self.nonterminal2label: + return self.nonterminal2label[label] + else: + self.labels.append((self.nonterminal2number[label], None)) + self.nonterminal2label[label] = ilabel + self.label2nonterminal[ilabel] = label + return ilabel + else: + # A named token (NAME, NUMBER, STRING) + itoken = getattr(self._token_namespace, label, None) + assert isinstance(itoken, int), label + if itoken in self.tokens: + return self.tokens[itoken] + else: + self.labels.append((itoken, None)) + self.tokens[itoken] = ilabel + return ilabel + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + value = eval(label) + if value[0].isalpha(): + # A keyword + if value in self.keywords: + return self.keywords[value] + else: + self.labels.append((token.NAME, value)) + self.keywords[value] = ilabel + return ilabel + else: + # An operator (any non-numeric token) + itoken = self._token_namespace.generate_token_id(value) + if itoken in self.tokens: + return self.tokens[itoken] + else: + self.labels.append((itoken, None)) + self.tokens[itoken] = ilabel + return ilabel + + def _calculate_first_terminals(self, nonterminal): + dfas = self._nonterminal_to_dfas[nonterminal] + self._first_terminals[nonterminal] = None # dummy to detect left recursion + # We only need to check the first dfa. All the following ones are not + # interesting to find first terminals. + state = dfas[0] + totalset = set() + overlapcheck = {} + for nonterminal_or_string, next_ in state.arcs.items(): + if nonterminal_or_string in self._nonterminal_to_dfas: + # It's a nonterminal and we have either a left recursion issue + # in the grammare or we have to recurse. + try: + fset = self._first_terminals[nonterminal_or_string] + except KeyError: + self._calculate_first_terminals(nonterminal_or_string) + fset = self._first_terminals[nonterminal_or_string] + else: + if fset is None: + raise ValueError("left recursion for rule %r" % nonterminal) + totalset.update(fset) + overlapcheck[nonterminal_or_string] = fset + else: + # It's a string. We have finally found a possible first token. + totalset.add(nonterminal_or_string) + overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) + + inverse = {} + for nonterminal_or_string, first_set in overlapcheck.items(): + for terminal in first_set: + if terminal in inverse: + raise ValueError("rule %s is ambiguous; %s is in the" + " first sets of %s as well as %s" % + (nonterminal, terminal, nonterminal_or_string, inverse[terminal])) + inverse[terminal] = nonterminal_or_string + self._first_terminals[nonterminal] = totalset + @property def start(self): return self.nonterminal2number[self.start_nonterminal] diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 4480663..4453577 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -19,139 +19,9 @@ This grammar is self-referencing. """ from parso.pgen2.grammar import Grammar -from parso.python import token from parso.pgen2.grammar_parser import GrammarParser, NFAState -class ParserGenerator(object): - def __init__(self, rule_to_dfas, token_namespace): - self._token_namespace = token_namespace - self._nonterminal_to_dfas = rule_to_dfas - - def make_grammar(self, grammar): - # Map from grammar rule (nonterminal) name to a set of tokens. - self._first_terminals = {} - - nonterminals = list(self._nonterminal_to_dfas.keys()) - nonterminals.sort() - for nonterminal in nonterminals: - if nonterminal not in self._first_terminals: - self._calculate_first_terminals(nonterminal) - - i = 256 + len(grammar.nonterminal2number) - grammar.nonterminal2number[nonterminal] = i - grammar.number2nonterminal[i] = nonterminal - - # Now that we have calculated the first terminals, we are sure that - # there is no left recursion or ambiguities. - - for nonterminal in nonterminals: - dfas = self._nonterminal_to_dfas[nonterminal] - states = [] - for state in dfas: - arcs = [] - for label, next_ in state.arcs.items(): - arcs.append((self._make_label(grammar, label), dfas.index(next_))) - if state.isfinal: - arcs.append((0, dfas.index(state))) - states.append(arcs) - grammar.states.append(states) - grammar.dfas[grammar.nonterminal2number[nonterminal]] = (states, self._make_first(grammar, nonterminal)) - return grammar - - def _make_first(self, grammar, nonterminal): - rawfirst = self._first_terminals[nonterminal] - first = set() - for label in rawfirst: - ilabel = self._make_label(grammar, label) - ##assert ilabel not in first, "%s failed on <> ... !=" % label - first.add(ilabel) - return first - - def _make_label(self, grammar, label): - # XXX Maybe this should be a method on a subclass of converter? - ilabel = len(grammar.labels) - if label[0].isalpha(): - # Either a nonterminal name or a named token - if label in grammar.nonterminal2number: - # A nonterminal name - if label in grammar.nonterminal2label: - return grammar.nonterminal2label[label] - else: - grammar.labels.append((grammar.nonterminal2number[label], None)) - grammar.nonterminal2label[label] = ilabel - grammar.label2nonterminal[ilabel] = label - return ilabel - else: - # A named token (NAME, NUMBER, STRING) - itoken = getattr(self._token_namespace, label, None) - assert isinstance(itoken, int), label - if itoken in grammar.tokens: - return grammar.tokens[itoken] - else: - grammar.labels.append((itoken, None)) - grammar.tokens[itoken] = ilabel - return ilabel - else: - # Either a keyword or an operator - assert label[0] in ('"', "'"), label - value = eval(label) - if value[0].isalpha(): - # A keyword - if value in grammar.keywords: - return grammar.keywords[value] - else: - grammar.labels.append((token.NAME, value)) - grammar.keywords[value] = ilabel - return ilabel - else: - # An operator (any non-numeric token) - itoken = self._token_namespace.generate_token_id(value) - if itoken in grammar.tokens: - return grammar.tokens[itoken] - else: - grammar.labels.append((itoken, None)) - grammar.tokens[itoken] = ilabel - return ilabel - - def _calculate_first_terminals(self, nonterminal): - dfas = self._nonterminal_to_dfas[nonterminal] - self._first_terminals[nonterminal] = None # dummy to detect left recursion - # We only need to check the first dfa. All the following ones are not - # interesting to find first terminals. - state = dfas[0] - totalset = set() - overlapcheck = {} - for nonterminal_or_string, next_ in state.arcs.items(): - if nonterminal_or_string in self._nonterminal_to_dfas: - # It's a nonterminal and we have either a left recursion issue - # in the grammare or we have to recurse. - try: - fset = self._first_terminals[nonterminal_or_string] - except KeyError: - self._calculate_first_terminals(nonterminal_or_string) - fset = self._first_terminals[nonterminal_or_string] - else: - if fset is None: - raise ValueError("left recursion for rule %r" % nonterminal) - totalset.update(fset) - overlapcheck[nonterminal_or_string] = fset - else: - # It's a string. We have finally found a possible first token. - totalset.add(nonterminal_or_string) - overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) - - inverse = {} - for nonterminal_or_string, first_set in overlapcheck.items(): - for terminal in first_set: - if terminal in inverse: - raise ValueError("rule %s is ambiguous; %s is in the" - " first sets of %s as well as %s" % - (nonterminal, terminal, nonterminal_or_string, inverse[terminal])) - inverse[terminal] = nonterminal_or_string - self._first_terminals[nonterminal] = totalset - - class DFAState(object): def __init__(self, from_rule, nfa_set, final): assert isinstance(nfa_set, set) @@ -308,5 +178,4 @@ def generate_grammar(bnf_grammar, token_namespace): if start_nonterminal is None: start_nonterminal = nfa_a.from_rule - p = ParserGenerator(rule_to_dfas, token_namespace) - return p.make_grammar(Grammar(bnf_grammar, start_nonterminal)) + return Grammar(bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace) From 55d6a69aada2ec98ac40bb698b31f2a1c39d73d5 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Mon, 18 Jun 2018 01:14:09 +0200 Subject: [PATCH 11/76] Some more renames --- parso/pgen2/grammar.py | 10 +++++----- parso/pgen2/pgen.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 86ed4d9..727dd7d 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -108,8 +108,8 @@ class Grammar(object): states = [] for state in dfas: arcs = [] - for label, next_ in state.arcs.items(): - arcs.append((self._make_label(label), dfas.index(next_))) + for terminal_or_nonterminal, next_ in state.arcs.items(): + arcs.append((self._make_label(terminal_or_nonterminal), dfas.index(next_))) if state.isfinal: arcs.append((0, dfas.index(state))) states.append(arcs) @@ -119,9 +119,9 @@ class Grammar(object): def _make_first(self, nonterminal): rawfirst = self._first_terminals[nonterminal] first = set() - for label in rawfirst: - ilabel = self._make_label(label) - ##assert ilabel not in first, "%s failed on <> ... !=" % label + for terminal_or_nonterminal in rawfirst: + ilabel = self._make_label(terminal_or_nonterminal) + ##assert ilabel not in first, "%s failed on <> ... !=" % terminal_or_nonterminal first.add(ilabel) return first diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 4453577..8caff85 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -30,7 +30,7 @@ class DFAState(object): self.from_rule = from_rule self.nfa_set = nfa_set self.isfinal = final in nfa_set - self.arcs = {} # map from nonterminals or strings to DFAState + self.arcs = {} # map from terminals/nonterminals to DFAState def add_arc(self, next_, label): assert isinstance(label, str) From 5712ffb5ca97f1919a301b2545291d5404af8913 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Mon, 18 Jun 2018 20:14:29 +0200 Subject: [PATCH 12/76] Introduce a label cache that is currently not used --- parso/pgen2/grammar.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 727dd7d..8247b26 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -84,6 +84,7 @@ class Grammar(object): self.label2nonterminal = {} self.start_nonterminal = start_nonterminal + self._label_cache = {} self._make_grammar() def _make_grammar(self): @@ -125,6 +126,18 @@ class Grammar(object): first.add(ilabel) return first + def _cache_labels(func): + def wrapper(self, label): + try: + return self._label_cache[label] + except KeyError: + result = func(self, label) + self._label_cache[label] = result + return result + + return wrapper + + #@_cache_labels def _make_label(self, label): # XXX Maybe this should be a method on a subclass of converter? ilabel = len(self.labels) @@ -152,6 +165,7 @@ class Grammar(object): else: # Either a keyword or an operator assert label[0] in ('"', "'"), label + # TODO use literal_eval instead of a simple eval. value = eval(label) if value[0].isalpha(): # A keyword From d691bf0fd1fec2fa4376bd8bbb6d87cbdc62516c Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 20 Jun 2018 09:42:21 +0200 Subject: [PATCH 13/76] Some minor changes --- parso/pgen2/parse.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 4e1ad6c..42d2ad5 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -56,6 +56,8 @@ class Stack(list): def token_to_ilabel(grammar, type_, value): # Map from token to label + # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the + # grammar. if type_ == tokenize.NAME: # Check for reserved words (keywords) try: @@ -186,8 +188,8 @@ class PgenParser(object): elif t >= 256: # See if it's a nonterminal and if we're in its first set itsdfa = _gram.dfas[t] - itsstates, itsfirst = itsdfa - if ilabel in itsfirst: + itsstates, first_terminals = itsdfa + if ilabel in first_terminals: # Push a nonterminal _push(t, itsdfa, newstate) break # To continue the outer while loop From d8554d86d1d44f442a07459594560038d8800749 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 21 Jun 2018 18:17:32 +0200 Subject: [PATCH 14/76] A lot of new code to hopefully transition to a better parsing mechanism in the future --- parso/pgen2/grammar.py | 17 +++++++++++++++ parso/pgen2/parse.py | 48 ++++++++++++++++++++++++++++++++++++++++++ parso/pgen2/pgen.py | 1 + 3 files changed, 66 insertions(+) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 8247b26..f8292b7 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -19,6 +19,12 @@ fallback token code OP, but the parser needs the actual token code. from parso.python import token +class DFAPlan(object): + def __init__(self, next_dfa, pushes=[]): + self.next_dfa = next_dfa + self.pushes = pushes + + class Grammar(object): """Pgen parsing tables conversion class. @@ -117,6 +123,17 @@ class Grammar(object): self.states.append(states) self.dfas[self.nonterminal2number[nonterminal]] = (states, self._make_first(nonterminal)) + for dfas in self._nonterminal_to_dfas.values(): + for dfa_state in dfas: + dfa_state.ilabel_to_plan = plans = {} + for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): + if terminal_or_nonterminal in self.nonterminal2number: + for first in self._make_first(terminal_or_nonterminal): + plans[first] = None + else: + ilabel = self._make_label(terminal_or_nonterminal) + plans[ilabel] = DFAPlan(next_dfa) + def _make_first(self, nonterminal): rawfirst = self._first_terminals[nonterminal] first = set() diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 42d2ad5..8c3cf33 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -54,6 +54,16 @@ class Stack(list): return sorted(check()) +class StackNode(object): + def __init__(self, dfa): + self.dfa = dfa + self.nodes = [] + + @property + def nonterminal(self): + return self.dfa.from_rule + + def token_to_ilabel(grammar, type_, value): # Map from token to label # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the @@ -152,6 +162,44 @@ class PgenParser(object): raise InternalParseError("incomplete input", type_, value, start_pos) return self.rootnode + def add_token(self, type_, value, start_pos, prefix): + """Add a token; return True if this is the end of the program.""" + ilabel = token_to_ilabel(self.grammar, type_, value) + stack = self.stack + + while True: + ilabel + try: + plan = stack[-1].current_dfa.ilabel_to_plan[ilabel] + except KeyError: + self.error_recovery(self.grammar, stack, type_, + value, start_pos, prefix, self.add_token) + break + + stack[-1].current_dfa = plan.next_dfa + for push in plan.pushes: + stack.append(StackNode(push.dfa)) + + leaf = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) + stack[-1].nodes.append(leaf) + + while stack[-1].current_dfa.is_final: + tos = self.stack.pop() + # If there's exactly one child, return that child instead of + # creating a new node. We still create expr_stmt and + # file_input though, because a lot of Jedi depends on its + # logic. + if len(tos.nodes) == 1: + new_node = tos.nodes[0] + else: + new_node = self.convert_node(self.grammar, type_, tos.nodes) + + try: + stack[-1].nodes.append(new_node) + except IndexError: + # Stack is empty, set the rootnode. + self.rootnode = new_node + def add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" ilabel = token_to_ilabel(self.grammar, type_, value) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 8caff85..cf37e12 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -31,6 +31,7 @@ class DFAState(object): self.nfa_set = nfa_set self.isfinal = final in nfa_set self.arcs = {} # map from terminals/nonterminals to DFAState + self.ilabel_to_plan = {} def add_arc(self, next_, label): assert isinstance(label, str) From 31aecf2d3536ba75b7e6a9dab2d55db9df89dcf1 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 21 Jun 2018 21:10:28 +0200 Subject: [PATCH 15/76] Calculate the first plans in a very messy way --- parso/pgen2/grammar.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index f8292b7..f9d2f53 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -96,6 +96,7 @@ class Grammar(object): def _make_grammar(self): # Map from grammar rule (nonterminal) name to a set of tokens. self._first_terminals = {} + self._first_plans = {} nonterminals = list(self._nonterminal_to_dfas.keys()) nonterminals.sort() @@ -128,8 +129,8 @@ class Grammar(object): dfa_state.ilabel_to_plan = plans = {} for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): if terminal_or_nonterminal in self.nonterminal2number: - for first in self._make_first(terminal_or_nonterminal): - plans[first] = None + for t, plan in self._first_plans[terminal_or_nonterminal].items(): + plans[t] = plan else: ilabel = self._make_label(terminal_or_nonterminal) plans[ilabel] = DFAPlan(next_dfa) @@ -205,6 +206,7 @@ class Grammar(object): def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] self._first_terminals[nonterminal] = None # dummy to detect left recursion + self._first_plans[nonterminal] = {} # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] @@ -213,7 +215,7 @@ class Grammar(object): for nonterminal_or_string, next_ in state.arcs.items(): if nonterminal_or_string in self._nonterminal_to_dfas: # It's a nonterminal and we have either a left recursion issue - # in the grammare or we have to recurse. + # in the grammar or we have to recurse. try: fset = self._first_terminals[nonterminal_or_string] except KeyError: @@ -224,10 +226,18 @@ class Grammar(object): raise ValueError("left recursion for rule %r" % nonterminal) totalset.update(fset) overlapcheck[nonterminal_or_string] = fset + + for t, plan in self._first_plans[nonterminal_or_string].items(): + assert not self._first_plans[nonterminal].get(t) + self._first_plans[nonterminal][t] = DFAPlan( + plan.next_dfa, + [nonterminal_or_string] + plan.pushes + ) else: # It's a string. We have finally found a possible first token. totalset.add(nonterminal_or_string) overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) + self._first_plans[nonterminal][nonterminal_or_string] = DFAPlan(next_) inverse = {} for nonterminal_or_string, first_set in overlapcheck.items(): From cc8038966b376d6ca25d509d20aa6b5b835528fa Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 21 Jun 2018 21:11:46 +0200 Subject: [PATCH 16/76] isfinal -> is_final --- parso/pgen2/grammar.py | 2 +- parso/pgen2/pgen.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index f9d2f53..3d7f104 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -118,7 +118,7 @@ class Grammar(object): arcs = [] for terminal_or_nonterminal, next_ in state.arcs.items(): arcs.append((self._make_label(terminal_or_nonterminal), dfas.index(next_))) - if state.isfinal: + if state.is_final: arcs.append((0, dfas.index(state))) states.append(arcs) self.states.append(states) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index cf37e12..4d3c103 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -29,7 +29,7 @@ class DFAState(object): assert isinstance(final, NFAState) self.from_rule = from_rule self.nfa_set = nfa_set - self.isfinal = final in nfa_set + self.is_final = final in nfa_set self.arcs = {} # map from terminals/nonterminals to DFAState self.ilabel_to_plan = {} @@ -47,7 +47,7 @@ class DFAState(object): def __eq__(self, other): # Equality test -- ignore the nfa_set instance variable assert isinstance(other, DFAState) - if self.isfinal != other.isfinal: + if self.is_final != other.is_final: return False # Can't just return self.arcs == other.arcs, because that # would invoke this method recursively, with cycles... @@ -150,7 +150,7 @@ def _dump_nfa(start, finish): def _dump_dfas(dfas): print("Dump of DFA for", dfas[0].from_rule) for i, state in enumerate(dfas): - print(" State", i, state.isfinal and "(final)" or "") + print(" State", i, state.is_final and "(final)" or "") for nonterminal, next_ in state.arcs.items(): print(" %s -> %d" % (nonterminal, dfas.index(next_))) From 12e11b3d161d9fa69714b5a569c549a78319ff32 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 21 Jun 2018 21:14:14 +0200 Subject: [PATCH 17/76] Remove a while loop that is not necessary --- parso/pgen2/parse.py | 62 ++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 8c3cf33..c0ccb5c 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -167,38 +167,38 @@ class PgenParser(object): ilabel = token_to_ilabel(self.grammar, type_, value) stack = self.stack - while True: - ilabel + try: + plan = stack[-1].current_dfa.ilabel_to_plan[ilabel] + except KeyError: + self.error_recovery(self.grammar, stack, type_, + value, start_pos, prefix, self.add_token) + return False + + stack[-1].current_dfa = plan.next_dfa + for push in plan.pushes: + stack.append(StackNode(push.dfa)) + + leaf = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) + stack[-1].nodes.append(leaf) + + while stack[-1].current_dfa.is_final: + tos = self.stack.pop() + # If there's exactly one child, return that child instead of + # creating a new node. We still create expr_stmt and + # file_input though, because a lot of Jedi depends on its + # logic. + if len(tos.nodes) == 1: + new_node = tos.nodes[0] + else: + new_node = self.convert_node(self.grammar, type_, tos.nodes) + try: - plan = stack[-1].current_dfa.ilabel_to_plan[ilabel] - except KeyError: - self.error_recovery(self.grammar, stack, type_, - value, start_pos, prefix, self.add_token) - break - - stack[-1].current_dfa = plan.next_dfa - for push in plan.pushes: - stack.append(StackNode(push.dfa)) - - leaf = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) - stack[-1].nodes.append(leaf) - - while stack[-1].current_dfa.is_final: - tos = self.stack.pop() - # If there's exactly one child, return that child instead of - # creating a new node. We still create expr_stmt and - # file_input though, because a lot of Jedi depends on its - # logic. - if len(tos.nodes) == 1: - new_node = tos.nodes[0] - else: - new_node = self.convert_node(self.grammar, type_, tos.nodes) - - try: - stack[-1].nodes.append(new_node) - except IndexError: - # Stack is empty, set the rootnode. - self.rootnode = new_node + stack[-1].nodes.append(new_node) + except IndexError: + # Stack is empty, set the rootnode. + self.rootnode = new_node + return True + return False def add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" From e6fc739670fc4b424c723b1f1c0193270f00594c Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 21 Jun 2018 21:32:05 +0200 Subject: [PATCH 18/76] Get most things ready for plans --- parso/pgen2/parse.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index c0ccb5c..dfe5364 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -147,6 +147,8 @@ class PgenParser(object): newnode = (start, []) stackentry = (self.grammar.dfas[start], 0, newnode) self.stack = Stack([stackentry]) + start_nonterminal = grammar.number2nonterminal[start] + self._stack = Stack([StackNode(grammar._nonterminal_to_dfas[start_nonterminal][0])]) self.rootnode = None self.error_recovery = error_recovery @@ -162,15 +164,16 @@ class PgenParser(object): raise InternalParseError("incomplete input", type_, value, start_pos) return self.rootnode - def add_token(self, type_, value, start_pos, prefix): + def _new_add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" ilabel = token_to_ilabel(self.grammar, type_, value) - stack = self.stack + stack = self._stack + grammar = self.grammar try: plan = stack[-1].current_dfa.ilabel_to_plan[ilabel] except KeyError: - self.error_recovery(self.grammar, stack, type_, + self.error_recovery(grammar, stack, type_, value, start_pos, prefix, self.add_token) return False @@ -178,11 +181,11 @@ class PgenParser(object): for push in plan.pushes: stack.append(StackNode(push.dfa)) - leaf = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) + leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) stack[-1].nodes.append(leaf) while stack[-1].current_dfa.is_final: - tos = self.stack.pop() + tos = stack.pop() # If there's exactly one child, return that child instead of # creating a new node. We still create expr_stmt and # file_input though, because a lot of Jedi depends on its @@ -190,7 +193,7 @@ class PgenParser(object): if len(tos.nodes) == 1: new_node = tos.nodes[0] else: - new_node = self.convert_node(self.grammar, type_, tos.nodes) + new_node = self.convert_node(grammar, type_, tos.nodes) try: stack[-1].nodes.append(new_node) From 2a082d69df2c27ed9517d8aaa69e506681e6045d Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 21 Jun 2018 22:12:26 +0200 Subject: [PATCH 19/76] Add better reprs --- parso/pgen2/grammar.py | 11 +++++++---- parso/pgen2/parse.py | 13 ++++++++----- parso/pgen2/pgen.py | 5 +++++ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 3d7f104..208912d 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -20,9 +20,12 @@ from parso.python import token class DFAPlan(object): - def __init__(self, next_dfa, pushes=[]): + def __init__(self, next_dfa, dfa_pushes=[]): self.next_dfa = next_dfa - self.pushes = pushes + self.dfa_pushes = dfa_pushes + + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) class Grammar(object): @@ -130,7 +133,7 @@ class Grammar(object): for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): if terminal_or_nonterminal in self.nonterminal2number: for t, plan in self._first_plans[terminal_or_nonterminal].items(): - plans[t] = plan + plans[self._make_label(t)] = plan else: ilabel = self._make_label(terminal_or_nonterminal) plans[ilabel] = DFAPlan(next_dfa) @@ -231,7 +234,7 @@ class Grammar(object): assert not self._first_plans[nonterminal].get(t) self._first_plans[nonterminal][t] = DFAPlan( plan.next_dfa, - [nonterminal_or_string] + plan.pushes + [next_] + plan.dfa_pushes ) else: # It's a string. We have finally found a possible first token. diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index dfe5364..473bc6b 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -63,6 +63,9 @@ class StackNode(object): def nonterminal(self): return self.dfa.from_rule + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes) + def token_to_ilabel(grammar, type_, value): # Map from token to label @@ -171,20 +174,20 @@ class PgenParser(object): grammar = self.grammar try: - plan = stack[-1].current_dfa.ilabel_to_plan[ilabel] + plan = stack[-1].dfa.ilabel_to_plan[ilabel] except KeyError: self.error_recovery(grammar, stack, type_, value, start_pos, prefix, self.add_token) return False - stack[-1].current_dfa = plan.next_dfa - for push in plan.pushes: - stack.append(StackNode(push.dfa)) + stack[-1].dfa = plan.next_dfa + for push in plan.dfa_pushes: + stack.append(StackNode(push)) leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) stack[-1].nodes.append(leaf) - while stack[-1].current_dfa.is_final: + while stack[-1].dfa.is_final: tos = stack.pop() # If there's exactly one child, return that child instead of # creating a new node. We still create expr_stmt and diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 4d3c103..e194c3f 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -60,6 +60,11 @@ class DFAState(object): __hash__ = None # For Py3 compatibility. + def __repr__(self): + return '<%s: %s is_final=%s>' % ( + self.__class__.__name__, self.from_rule, self.is_final + ) + def _simplify_dfas(dfas): # This is not theoretically optimal, but works well enough. From f03a87b876937e2027b6554d36ce84ee2944a672 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 21 Jun 2018 23:56:34 +0200 Subject: [PATCH 20/76] Actually parse some first things with the new approach --- parso/pgen2/grammar.py | 13 ++++------- parso/pgen2/parse.py | 53 ++++++++++++++++++++++++------------------ 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 208912d..988cdd8 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -132,8 +132,8 @@ class Grammar(object): dfa_state.ilabel_to_plan = plans = {} for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): if terminal_or_nonterminal in self.nonterminal2number: - for t, plan in self._first_plans[terminal_or_nonterminal].items(): - plans[self._make_label(t)] = plan + for t, pushes in self._first_plans[terminal_or_nonterminal].items(): + plans[self._make_label(t)] = DFAPlan(next_dfa, pushes) else: ilabel = self._make_label(terminal_or_nonterminal) plans[ilabel] = DFAPlan(next_dfa) @@ -230,17 +230,14 @@ class Grammar(object): totalset.update(fset) overlapcheck[nonterminal_or_string] = fset - for t, plan in self._first_plans[nonterminal_or_string].items(): + for t, pushes in self._first_plans[nonterminal_or_string].items(): assert not self._first_plans[nonterminal].get(t) - self._first_plans[nonterminal][t] = DFAPlan( - plan.next_dfa, - [next_] + plan.dfa_pushes - ) + self._first_plans[nonterminal][t] = [next_] + pushes else: # It's a string. We have finally found a possible first token. totalset.add(nonterminal_or_string) overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) - self._first_plans[nonterminal][nonterminal_or_string] = DFAPlan(next_) + self._first_plans[nonterminal][nonterminal_or_string] = [next_] inverse = {} for nonterminal_or_string, first_set in overlapcheck.items(): diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 473bc6b..2c5e753 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -173,41 +173,48 @@ class PgenParser(object): stack = self._stack grammar = self.grammar - try: - plan = stack[-1].dfa.ilabel_to_plan[ilabel] - except KeyError: - self.error_recovery(grammar, stack, type_, - value, start_pos, prefix, self.add_token) - return False + while True: + try: + plan = stack[-1].dfa.ilabel_to_plan[ilabel] + break + except KeyError: + if stack[-1].dfa.is_final: + tos = stack.pop() + # If there's exactly one child, return that child instead of + # creating a new node. We still create expr_stmt and + # file_input though, because a lot of Jedi depends on its + # logic. + if len(tos.nodes) == 1: + new_node = tos.nodes[0] + else: + # XXX don't use that type + xxx_type = grammar.nonterminal2number[tos.dfa.from_rule] + new_node = self.convert_node(grammar, xxx_type, tos.nodes) + + try: + stack[-1].nodes.append(new_node) + except IndexError: + # Stack is empty, set the rootnode. + self.rootnode = new_node + return True + else: + self.error_recovery(grammar, stack, type_, + value, start_pos, prefix, self.add_token) + return False stack[-1].dfa = plan.next_dfa + for push in plan.dfa_pushes: stack.append(StackNode(push)) leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) stack[-1].nodes.append(leaf) - while stack[-1].dfa.is_final: - tos = stack.pop() - # If there's exactly one child, return that child instead of - # creating a new node. We still create expr_stmt and - # file_input though, because a lot of Jedi depends on its - # logic. - if len(tos.nodes) == 1: - new_node = tos.nodes[0] - else: - new_node = self.convert_node(grammar, type_, tos.nodes) - - try: - stack[-1].nodes.append(new_node) - except IndexError: - # Stack is empty, set the rootnode. - self.rootnode = new_node - return True return False def add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" + self._new_add_token(type_, value, start_pos, prefix) ilabel = token_to_ilabel(self.grammar, type_, value) # Loop until the token is shifted; may raise exceptions From 79c7e0b59de53b5cb12ca6b81d87611bd19a31ea Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 00:38:18 +0200 Subject: [PATCH 21/76] Use the new parser. Error recovery is not yet working --- parso/parser.py | 4 +-- parso/pgen2/parse.py | 71 ++++++++++++++++++++++-------------------- parso/python/parser.py | 4 +-- 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/parso/parser.py b/parso/parser.py index c9df89e..600ed79 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -63,9 +63,7 @@ class BaseParser(object): error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix) raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) - def convert_node(self, pgen_grammar, type_, children): - # TODO REMOVE nonterminal, we don't want type here. - nonterminal = pgen_grammar.number2nonterminal[type_] + def convert_node(self, pgen_grammar, nonterminal, children): try: return self.node_map[nonterminal](children) except KeyError: diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 2c5e753..58e52dc 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -147,30 +147,32 @@ class PgenParser(object): # Each stack entry is a tuple: (dfa, state, node). # A node is a tuple: (type, children), # where children is a list of nodes or None - newnode = (start, []) - stackentry = (self.grammar.dfas[start], 0, newnode) - self.stack = Stack([stackentry]) +# newnode = (start, []) +# stackentry = (self.grammar.dfas[start], 0, newnode) +# self.stack = Stack([stackentry]) start_nonterminal = grammar.number2nonterminal[start] - self._stack = Stack([StackNode(grammar._nonterminal_to_dfas[start_nonterminal][0])]) + self.stack = Stack([StackNode(grammar._nonterminal_to_dfas[start_nonterminal][0])]) self.rootnode = None self.error_recovery = error_recovery def parse(self, tokens): for type_, value, start_pos, prefix in tokens: - if self.add_token(type_, value, start_pos, prefix): - break - else: + self.add_token(type_, value, start_pos, prefix) + + while self.stack and self.stack[-1].dfa.is_final: + self._pop() + + if self.stack: # We never broke out -- EOF is too soon -- Unfinished statement. # However, the error recovery might have added the token again, if # the stack is empty, we're fine. - if self.stack: - raise InternalParseError("incomplete input", type_, value, start_pos) + raise InternalParseError("incomplete input", type_, value, start_pos) return self.rootnode - def _new_add_token(self, type_, value, start_pos, prefix): + def add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" ilabel = token_to_ilabel(self.grammar, type_, value) - stack = self._stack + stack = self.stack grammar = self.grammar while True: @@ -179,40 +181,25 @@ class PgenParser(object): break except KeyError: if stack[-1].dfa.is_final: - tos = stack.pop() - # If there's exactly one child, return that child instead of - # creating a new node. We still create expr_stmt and - # file_input though, because a lot of Jedi depends on its - # logic. - if len(tos.nodes) == 1: - new_node = tos.nodes[0] - else: - # XXX don't use that type - xxx_type = grammar.nonterminal2number[tos.dfa.from_rule] - new_node = self.convert_node(grammar, xxx_type, tos.nodes) - - try: - stack[-1].nodes.append(new_node) - except IndexError: - # Stack is empty, set the rootnode. - self.rootnode = new_node - return True + self._pop() else: self.error_recovery(grammar, stack, type_, value, start_pos, prefix, self.add_token) - return False + return + except IndexError: + raise InternalParseError("too much input", type_, value, start_pos) stack[-1].dfa = plan.next_dfa for push in plan.dfa_pushes: + print('insert', push.from_rule) stack.append(StackNode(push)) + print('set next', plan.next_dfa.from_rule) leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) stack[-1].nodes.append(leaf) - return False - - def add_token(self, type_, value, start_pos, prefix): + def _old_add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" self._new_add_token(type_, value, start_pos, prefix) ilabel = token_to_ilabel(self.grammar, type_, value) @@ -299,3 +286,21 @@ class PgenParser(object): except IndexError: # Stack is empty, set the rootnode. self.rootnode = newnode + + def _pop(self): + tos = self.stack.pop() + print('pop', tos.nonterminal, tos.nodes) + # If there's exactly one child, return that child instead of + # creating a new node. We still create expr_stmt and + # file_input though, because a lot of Jedi depends on its + # logic. + if len(tos.nodes) == 1: + new_node = tos.nodes[0] + else: + new_node = self.convert_node(self.grammar, tos.dfa.from_rule, tos.nodes) + + try: + self.stack[-1].nodes.append(new_node) + except IndexError: + # Stack is empty, set the rootnode. + self.rootnode = new_node diff --git a/parso/python/parser.py b/parso/python/parser.py index cb283e8..16748bc 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -99,7 +99,7 @@ class Parser(BaseParser): return node - def convert_node(self, pgen_grammar, type, children): + def convert_node(self, pgen_grammar, nonterminal, children): """ Convert raw node information to a PythonBaseNode instance. @@ -107,8 +107,6 @@ class Parser(BaseParser): grammar rule produces a new complete node, so that the tree is build strictly bottom-up. """ - # TODO REMOVE nonterminal, we don't want type here. - nonterminal = pgen_grammar.number2nonterminal[type] try: return self.node_map[nonterminal](children) except KeyError: From d9264609f2c013d70de2adf03d03dd9cb4b5c493 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 01:56:29 +0200 Subject: [PATCH 22/76] Get quite a bit of the error recovery working --- parso/parser.py | 2 +- parso/python/parser.py | 37 ++++++++++++------------------------- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/parso/parser.py b/parso/parser.py index 600ed79..80ea7b2 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -55,7 +55,7 @@ class BaseParser(object): del self.pgen_parser return node - def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, + def error_recovery(self, pgen_grammar, stack, typ, value, start_pos, prefix, add_token_callback): if self._error_recovery: raise NotImplementedError("Error Recovery is not implemented") diff --git a/parso/python/parser.py b/parso/python/parser.py index 16748bc..f05f1a8 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -134,14 +134,14 @@ class Parser(BaseParser): return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) - def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix, + def error_recovery(self, pgen_grammar, stack, typ, value, start_pos, prefix, add_token_callback): def get_nonterminal_and_nodes(stack): for dfa, state, (type_, nodes) in stack: nonterminal = pgen_grammar.number2nonterminal[type_] yield nonterminal, nodes - tos_nodes = stack.get_tos_nodes() + tos_nodes = stack[-1].nodes if tos_nodes: last_leaf = tos_nodes[-1].get_last_leaf() else: @@ -164,32 +164,19 @@ class Parser(BaseParser): # error recovery. #print('x', pprint.pprint(stack)) ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) - - dfa, state, (type_, nodes) = stack[-1] - nonterminal = pgen_grammar.number2nonterminal[type_] - states, first = dfa - arcs = states[state] - # Look for a state with this label - for i, newstate in arcs: - if ilabel == i: - if nonterminal == 'simple_stmt': - # This is basically shifting - stack[-1] = (dfa, newstate, (type_, nodes)) - - reduce_stack(states, newstate) - add_token_callback(typ, value, start_pos, prefix) - return - # Check if we're at the right point - #for nonterminal, nodes in get_nonterminal_and_nodes(stack): - # self.pgen_parser._pop() - - #break - break - #nonterminal = pgen_grammar.number2nonterminal[type_] + try: + plan = stack[-1].dfa.ilabel_to_plan[ilabel] + except KeyError: + pass + else: + if plan.next_dfa.is_final and not plan.dfa_pushes: + stack[-1].dfa = plan.next_dfa + add_token_callback(typ, value, start_pos, prefix) + return if not self._error_recovery: return super(Parser, self).error_recovery( - pgen_grammar, stack, arcs, typ, value, start_pos, prefix, + pgen_grammar, stack, typ, value, start_pos, prefix, add_token_callback) def current_suite(stack): From 68eab722293393a91ac01ab6809a0e7fcb74b51c Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 01:59:39 +0200 Subject: [PATCH 23/76] Some slight changes to error recovery --- parso/python/parser.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index f05f1a8..32fff51 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -162,17 +162,19 @@ class Parser(BaseParser): # possible (and valid in Python ) that there's no newline at the # end of a file, we have to recover even if the user doesn't want # error recovery. - #print('x', pprint.pprint(stack)) - ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) - try: - plan = stack[-1].dfa.ilabel_to_plan[ilabel] - except KeyError: - pass - else: - if plan.next_dfa.is_final and not plan.dfa_pushes: - stack[-1].dfa = plan.next_dfa - add_token_callback(typ, value, start_pos, prefix) - return + if stack[-1].dfa.from_rule == 'simple_stmt': + ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) + try: + plan = stack[-1].dfa.ilabel_to_plan[ilabel] + except KeyError: + pass + else: + if plan.next_dfa.is_final and not plan.dfa_pushes: + # We are ignoring here that the newline would be + # required for a simple_stmt. + stack[-1].dfa = plan.next_dfa + add_token_callback(typ, value, start_pos, prefix) + return if not self._error_recovery: return super(Parser, self).error_recovery( From 9e8066c6fd16f3d9a411e490f0c253037635d0cc Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 09:56:58 +0200 Subject: [PATCH 24/76] Fix a lot of the old error recovery --- parso/pgen2/parse.py | 3 --- parso/python/parser.py | 40 +++++++++++++++++----------------------- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 58e52dc..9c56257 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -192,10 +192,8 @@ class PgenParser(object): stack[-1].dfa = plan.next_dfa for push in plan.dfa_pushes: - print('insert', push.from_rule) stack.append(StackNode(push)) - print('set next', plan.next_dfa.from_rule) leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) stack[-1].nodes.append(leaf) @@ -289,7 +287,6 @@ class PgenParser(object): def _pop(self): tos = self.stack.pop() - print('pop', tos.nonterminal, tos.nodes) # If there's exactly one child, return that child instead of # creating a new node. We still create expr_stmt and # file_input though, because a lot of Jedi depends on its diff --git a/parso/python/parser.py b/parso/python/parser.py index 32fff51..9e15d50 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -185,24 +185,23 @@ class Parser(BaseParser): # For now just discard everything that is not a suite or # file_input, if we detect an error. one_line_suite = False - for index, (nonterminal, nodes) in reversed(list(enumerate(get_nonterminal_and_nodes(stack)))): + for until_index, stack_node in reversed(list(enumerate(stack))): # `suite` can sometimes be only simple_stmt, not stmt. if one_line_suite: break - elif nonterminal == 'file_input': + elif stack_node.nonterminal == 'file_input': break - elif nonterminal == 'suite': - if len(nodes) > 1: + elif stack_node.nonterminal == 'suite': + if len(stack_node.nodes) > 1: break - elif not nodes: + elif not stack_node.nodes: one_line_suite = True # `suite` without an indent are error nodes. - return index, nonterminal, nodes + return until_index - index, nonterminal, nodes = current_suite(stack) + until_index = current_suite(stack) - # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index) - if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos): + if self._stack_removal(stack, until_index + 1): add_token_callback(typ, value, start_pos, prefix) else: if typ == INDENT: @@ -211,9 +210,10 @@ class Parser(BaseParser): self._omit_dedent_list.append(self._indent_counter) error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) - stack[-1][2][1].append(error_leaf) + stack[-1].nodes.append(error_leaf) - if nonterminal == 'suite': + tos = stack[-1] + if tos.nonterminal == 'suite': dfa, state, node = stack[-1] states, first = dfa arcs = states[state] @@ -224,21 +224,15 @@ class Parser(BaseParser): new_state = arcs[0][1] stack[-1] = dfa, new_state, node - def _stack_removal(self, pgen_grammar, stack, arcs, start_index, value, start_pos): - failed_stack = False - found = False + def _stack_removal(self, stack, start_index): all_nodes = [] - for dfa, state, (type_, nodes) in stack[start_index:]: - if nodes: - found = True - if found: - failed_stack = True - all_nodes += nodes - if failed_stack: - stack[start_index - 1][2][1].append(tree.PythonErrorNode(all_nodes)) + for stack_node in stack[start_index:]: + all_nodes += stack_node.nodes + if all_nodes: + stack[start_index - 1].nodes.append(tree.PythonErrorNode(all_nodes)) stack[start_index:] = [] - return failed_stack + return bool(all_nodes) def _recovery_tokenize(self, tokens): for typ, value, start_pos, prefix in tokens: From a85f5449015a9b47325050a1cb3a4864a40d3d88 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 11:12:10 +0200 Subject: [PATCH 25/76] Fix all tests except diff tests. Mostly error recovery fixes --- parso/python/parser.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index 9e15d50..9283cb1 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -93,7 +93,7 @@ class Parser(BaseParser): # not what we want, we want a module, so we add it here: node = self.convert_node( self._pgen_grammar, - self._pgen_grammar.nonterminal2number['file_input'], + 'file_input', [node] ) @@ -214,15 +214,13 @@ class Parser(BaseParser): tos = stack[-1] if tos.nonterminal == 'suite': - dfa, state, node = stack[-1] - states, first = dfa - arcs = states[state] - intended_label = pgen_grammar.nonterminal2label['stmt'] - # Introduce a proper state transition. We're basically allowing - # there to be no valid statements inside a suite. - if [x[0] for x in arcs] == [intended_label]: - new_state = arcs[0][1] - stack[-1] = dfa, new_state, node + # Need at least one statement in the suite. This happend with the + # error recovery above. + try: + tos.dfa = tos.dfa.arcs['stmt'] + except KeyError: + # We're already in a final state. + pass def _stack_removal(self, stack, start_index): all_nodes = [] From 4e5ba02dbb638a4f2190e5bf988f893b6fa8887c Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 11:38:34 +0200 Subject: [PATCH 26/76] Fix the final issues of the new parser --- parso/python/diff.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/parso/python/diff.py b/parso/python/diff.py index 529f06a..742e0eb 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -41,9 +41,8 @@ def _flows_finished(pgen_grammar, stack): if, while, for and try might not be finished, because another part might still be parsed. """ - for dfa, newstate, (nonterminal_number, nodes) in stack: - if pgen_grammar.number2nonterminal[nonterminal_number] \ - in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'): + for stack_node in stack: + if stack_node.nonterminal in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'): return False return True @@ -52,10 +51,10 @@ def suite_or_file_input_is_valid(pgen_grammar, stack): if not _flows_finished(pgen_grammar, stack): return False - for dfa, newstate, (nonterminal_number, nodes) in reversed(stack): - if pgen_grammar.number2nonterminal[nonterminal_number] == 'suite': + for stack_node in reversed(stack): + if stack_node.nonterminal == 'suite': # If only newline is in the suite, the suite is not valid, yet. - return len(nodes) > 1 + return len(stack_node.nodes) > 1 # Not reaching a suite means that we're dealing with file_input levels # where there's no need for a valid statement in it. It can also be empty. return True From 67ca091631c52dcfa5c17a165cb78973f2e34439 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 11:44:42 +0200 Subject: [PATCH 27/76] delete a lot of the old parser code --- parso/pgen2/parse.py | 94 -------------------------------------------- 1 file changed, 94 deletions(-) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 9c56257..952c43e 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -144,12 +144,6 @@ class PgenParser(object): self.convert_node = convert_node self.convert_leaf = convert_leaf - # Each stack entry is a tuple: (dfa, state, node). - # A node is a tuple: (type, children), - # where children is a list of nodes or None -# newnode = (start, []) -# stackentry = (self.grammar.dfas[start], 0, newnode) -# self.stack = Stack([stackentry]) start_nonterminal = grammar.number2nonterminal[start] self.stack = Stack([StackNode(grammar._nonterminal_to_dfas[start_nonterminal][0])]) self.rootnode = None @@ -197,94 +191,6 @@ class PgenParser(object): leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) stack[-1].nodes.append(leaf) - def _old_add_token(self, type_, value, start_pos, prefix): - """Add a token; return True if this is the end of the program.""" - self._new_add_token(type_, value, start_pos, prefix) - ilabel = token_to_ilabel(self.grammar, type_, value) - - # Loop until the token is shifted; may raise exceptions - _gram = self.grammar - _labels = _gram.labels - _push = self._push - _pop = self._pop - _shift = self._shift - while True: - dfa, state, node = self.stack[-1] - states, first = dfa - arcs = states[state] - # Look for a state with this label - for i, newstate in arcs: - t, v = _labels[i] - if ilabel == i: - # Look it up in the list of labels - assert t < 256 - # Shift a token; we're done with it - _shift(type_, value, newstate, prefix, start_pos) - # Pop while we are in an accept-only state - state = newstate - while states[state] == [(0, state)]: - _pop() - if not self.stack: - # Done parsing! - return True - dfa, state, node = self.stack[-1] - states, first = dfa - # Done with this token - return False - elif t >= 256: - # See if it's a nonterminal and if we're in its first set - itsdfa = _gram.dfas[t] - itsstates, first_terminals = itsdfa - if ilabel in first_terminals: - # Push a nonterminal - _push(t, itsdfa, newstate) - break # To continue the outer while loop - else: - if (0, state) in arcs: - # An accepting state, pop it and try something else - _pop() - if not self.stack: - # Done parsing, but another token is input - raise InternalParseError("too much input", type_, value, start_pos) - else: - self.error_recovery(self.grammar, self.stack, arcs, type_, - value, start_pos, prefix, self.add_token) - break - - def _shift(self, type_, value, newstate, prefix, start_pos): - """Shift a token. (Internal)""" - dfa, state, node = self.stack[-1] - newnode = self.convert_leaf(self.grammar, type_, value, prefix, start_pos) - node[-1].append(newnode) - self.stack[-1] = (dfa, newstate, node) - - def _push(self, type_, newdfa, newstate): - """Push a nonterminal. (Internal)""" - dfa, state, node = self.stack[-1] - newnode = (type_, []) - self.stack[-1] = (dfa, newstate, node) - self.stack.append((newdfa, 0, newnode)) - - def _pop(self): - """Pop a nonterminal. (Internal)""" - popdfa, popstate, (type_, children) = self.stack.pop() - # If there's exactly one child, return that child instead of creating a - # new node. We still create expr_stmt and file_input though, because a - # lot of Jedi depends on its logic. - if len(children) == 1: - newnode = children[0] - else: - newnode = self.convert_node(self.grammar, type_, children) - - try: - # Equal to: - # dfa, state, node = self.stack[-1] - # nonterminal, children = node - self.stack[-1][2][1].append(newnode) - except IndexError: - # Stack is empty, set the rootnode. - self.rootnode = newnode - def _pop(self): tos = self.stack.pop() # If there's exactly one child, return that child instead of From 4f0e9c0fd7dd89d6fc99cb761d46901d98a5e9f6 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 11:47:18 +0200 Subject: [PATCH 28/76] Remove old dfas and states from the parser generator --- parso/pgen2/grammar.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 988cdd8..0f7dda9 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -84,8 +84,6 @@ class Grammar(object): self.nonterminal2number = {} self.number2nonterminal = {} - self.states = [] - self.dfas = {} self.labels = [(0, "EMPTY")] self.keywords = {} self.tokens = {} @@ -114,19 +112,6 @@ class Grammar(object): # Now that we have calculated the first terminals, we are sure that # there is no left recursion or ambiguities. - for nonterminal in nonterminals: - dfas = self._nonterminal_to_dfas[nonterminal] - states = [] - for state in dfas: - arcs = [] - for terminal_or_nonterminal, next_ in state.arcs.items(): - arcs.append((self._make_label(terminal_or_nonterminal), dfas.index(next_))) - if state.is_final: - arcs.append((0, dfas.index(state))) - states.append(arcs) - self.states.append(states) - self.dfas[self.nonterminal2number[nonterminal]] = (states, self._make_first(nonterminal)) - for dfas in self._nonterminal_to_dfas.values(): for dfa_state in dfas: dfa_state.ilabel_to_plan = plans = {} @@ -252,18 +237,3 @@ class Grammar(object): @property def start(self): return self.nonterminal2number[self.start_nonterminal] - - def report(self): - """Dump the grammar tables to standard output, for debugging.""" - from pprint import pprint - print("s2n") - pprint(self.nonterminal2number) - print("n2s") - pprint(self.number2nonterminal) - print("states") - pprint(self.states) - print("dfas") - pprint(self.dfas) - print("labels") - pprint(self.labels) - print("start", self.start) From 87299335c4fc9fe2666ccaf8376413a7c2d112a7 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 12:36:48 +0200 Subject: [PATCH 29/76] Remove more unused code --- parso/pgen2/grammar.py | 42 ++++++++++-------------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 0f7dda9..63a214b 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -49,20 +49,6 @@ class Grammar(object): A dict mapping numbers to nonterminal names; these two are each other's inverse. - states -- a list of DFAs, where each DFA is a list of - states, each state is a list of arcs, and each - arc is a (i, j) pair where i is a label and j is - a state number. The DFA number is the index into - this list. (This name is slightly confusing.) - Final states are represented by a special arc of - the form (0, j) where j is its own state number. - - dfas -- a dict mapping nonterminal numbers to (DFA, first) - pairs, where DFA is an item from the states list - above, and first is a set of tokens that can - begin this grammar rule (represented by a dict - whose values are always 1). - labels -- a list of (x, y) pairs where x is either a token number or a nonterminal number, and y is either None or a string; the strings are keywords. The label @@ -149,25 +135,17 @@ class Grammar(object): ilabel = len(self.labels) if label[0].isalpha(): # Either a nonterminal name or a named token - if label in self.nonterminal2number: - # A nonterminal name - if label in self.nonterminal2label: - return self.nonterminal2label[label] - else: - self.labels.append((self.nonterminal2number[label], None)) - self.nonterminal2label[label] = ilabel - self.label2nonterminal[ilabel] = label - return ilabel + assert label not in self.nonterminal2number + + # A named token (e.g. NAME, NUMBER, STRING) + itoken = getattr(self._token_namespace, label, None) + assert isinstance(itoken, int), label + if itoken in self.tokens: + return self.tokens[itoken] else: - # A named token (NAME, NUMBER, STRING) - itoken = getattr(self._token_namespace, label, None) - assert isinstance(itoken, int), label - if itoken in self.tokens: - return self.tokens[itoken] - else: - self.labels.append((itoken, None)) - self.tokens[itoken] = ilabel - return ilabel + self.labels.append((itoken, None)) + self.tokens[itoken] = ilabel + return ilabel else: # Either a keyword or an operator assert label[0] in ('"', "'"), label From 878b4b2d3be9f527609c55b5d99fceeac6aa0d91 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 12:47:02 +0200 Subject: [PATCH 30/76] Use nonterminals instead of numbers if possible --- parso/parser.py | 3 +-- parso/pgen2/grammar.py | 4 ---- parso/pgen2/parse.py | 4 ++-- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/parso/parser.py b/parso/parser.py index 80ea7b2..6b906d2 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -44,10 +44,9 @@ class BaseParser(object): self._error_recovery = error_recovery def parse(self, tokens): - start_number = self._pgen_grammar.nonterminal2number[self._start_nonterminal] self.pgen_parser = PgenParser( self._pgen_grammar, self.convert_node, self.convert_leaf, - self.error_recovery, start_number + self.error_recovery, self._start_nonterminal ) node = self.pgen_parser.parse(tokens) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 63a214b..b6e076e 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -211,7 +211,3 @@ class Grammar(object): (nonterminal, terminal, nonterminal_or_string, inverse[terminal])) inverse[terminal] = nonterminal_or_string self._first_terminals[nonterminal] = totalset - - @property - def start(self): - return self.nonterminal2number[self.start_nonterminal] diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 952c43e..f3d8c88 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -114,7 +114,8 @@ class PgenParser(object): """ - def __init__(self, grammar, convert_node, convert_leaf, error_recovery, start): + def __init__(self, grammar, convert_node, convert_leaf, error_recovery, + start_nonterminal): """Constructor. The grammar argument is a grammar.Grammar instance; see the @@ -144,7 +145,6 @@ class PgenParser(object): self.convert_node = convert_node self.convert_leaf = convert_leaf - start_nonterminal = grammar.number2nonterminal[start] self.stack = Stack([StackNode(grammar._nonterminal_to_dfas[start_nonterminal][0])]) self.rootnode = None self.error_recovery = error_recovery From 532aef2342b704d60fe14460091c46ff55e1f972 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 12:52:44 +0200 Subject: [PATCH 31/76] Remove nonterminal2number and number2nonterminal, they are no longer used --- parso/grammar.py | 4 ++-- parso/pgen2/grammar.py | 20 ++------------------ parso/python/parser.py | 5 ----- 3 files changed, 4 insertions(+), 25 deletions(-) diff --git a/parso/grammar.py b/parso/grammar.py index 2906b5d..acdf286 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -186,8 +186,8 @@ class Grammar(object): return normalizer.issues def __repr__(self): - labels = self._pgen_grammar.number2nonterminal.values() - txt = ' '.join(list(labels)[:3]) + ' ...' + nonterminals = self._pgen_grammar._nonterminal_to_dfas.keys() + txt = ' '.join(list(nonterminals)[:3]) + ' ...' return '<%s:%s>' % (self.__class__.__name__, txt) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index b6e076e..caa7acf 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -39,16 +39,6 @@ class Grammar(object): The instance variables are as follows: - nonterminal2number -- - A dict mapping nonterminal names to numbers. - Nonterminal numbers are always 256 or higher, to - distinguish them from token numbers, which are between 0 - and 255 (inclusive). - - number2nonterminal -- - A dict mapping numbers to nonterminal names; - these two are each other's inverse. - labels -- a list of (x, y) pairs where x is either a token number or a nonterminal number, and y is either None or a string; the strings are keywords. The label @@ -68,8 +58,6 @@ class Grammar(object): self._token_namespace = token_namespace self._nonterminal_to_dfas = rule_to_dfas - self.nonterminal2number = {} - self.number2nonterminal = {} self.labels = [(0, "EMPTY")] self.keywords = {} self.tokens = {} @@ -91,10 +79,6 @@ class Grammar(object): if nonterminal not in self._first_terminals: self._calculate_first_terminals(nonterminal) - i = 256 + len(self.nonterminal2number) - self.nonterminal2number[nonterminal] = i - self.number2nonterminal[i] = nonterminal - # Now that we have calculated the first terminals, we are sure that # there is no left recursion or ambiguities. @@ -102,7 +86,7 @@ class Grammar(object): for dfa_state in dfas: dfa_state.ilabel_to_plan = plans = {} for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): - if terminal_or_nonterminal in self.nonterminal2number: + if terminal_or_nonterminal in self._nonterminal_to_dfas: for t, pushes in self._first_plans[terminal_or_nonterminal].items(): plans[self._make_label(t)] = DFAPlan(next_dfa, pushes) else: @@ -135,7 +119,7 @@ class Grammar(object): ilabel = len(self.labels) if label[0].isalpha(): # Either a nonterminal name or a named token - assert label not in self.nonterminal2number + assert label not in self._nonterminal_to_dfas # A named token (e.g. NAME, NUMBER, STRING) itoken = getattr(self._token_namespace, label, None) diff --git a/parso/python/parser.py b/parso/python/parser.py index 9283cb1..7728121 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -136,11 +136,6 @@ class Parser(BaseParser): def error_recovery(self, pgen_grammar, stack, typ, value, start_pos, prefix, add_token_callback): - def get_nonterminal_and_nodes(stack): - for dfa, state, (type_, nodes) in stack: - nonterminal = pgen_grammar.number2nonterminal[type_] - yield nonterminal, nodes - tos_nodes = stack[-1].nodes if tos_nodes: last_leaf = tos_nodes[-1].get_last_leaf() From 1362d4f05d6020560324b4b9e1aa17ebffbf9607 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 12:53:38 +0200 Subject: [PATCH 32/76] Remove more unused grammar stuff --- parso/pgen2/grammar.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index caa7acf..8075d5e 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -61,8 +61,6 @@ class Grammar(object): self.labels = [(0, "EMPTY")] self.keywords = {} self.tokens = {} - self.nonterminal2label = {} - self.label2nonterminal = {} self.start_nonterminal = start_nonterminal self._label_cache = {} From a890ddd6ccacc51738754d26966d0fbcb2d942ed Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 12:54:27 +0200 Subject: [PATCH 33/76] Remove make_first --- parso/pgen2/grammar.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 8075d5e..a66f03d 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -91,15 +91,6 @@ class Grammar(object): ilabel = self._make_label(terminal_or_nonterminal) plans[ilabel] = DFAPlan(next_dfa) - def _make_first(self, nonterminal): - rawfirst = self._first_terminals[nonterminal] - first = set() - for terminal_or_nonterminal in rawfirst: - ilabel = self._make_label(terminal_or_nonterminal) - ##assert ilabel not in first, "%s failed on <> ... !=" % terminal_or_nonterminal - first.add(ilabel) - return first - def _cache_labels(func): def wrapper(self, label): try: From 33e321a53975f552d406b2104222cdfc8246b20a Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 22 Jun 2018 13:04:00 +0200 Subject: [PATCH 34/76] Don't set the root node before it's not actually defined --- parso/pgen2/parse.py | 1 - 1 file changed, 1 deletion(-) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index f3d8c88..ac925a7 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -146,7 +146,6 @@ class PgenParser(object): self.convert_leaf = convert_leaf self.stack = Stack([StackNode(grammar._nonterminal_to_dfas[start_nonterminal][0])]) - self.rootnode = None self.error_recovery = error_recovery def parse(self, tokens): From b5378e46028b4a0c070dc77a74fbf7db07d916db Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 11:28:23 +0200 Subject: [PATCH 35/76] Use token.OP and use reserved words This change breaks the tokenizer backwards compatibility a bit. Details of operators is now part of the parser and not the tokenizer anymore. The parser does this anyway, so we don't need the complexity in the tokenizer. --- parso/pgen2/grammar.py | 24 ++++++------------------ parso/pgen2/grammar_parser.py | 12 +++++++----- parso/pgen2/parse.py | 4 ++-- parso/python/parser.py | 2 +- parso/python/tokenize.py | 5 +++-- 5 files changed, 19 insertions(+), 28 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index a66f03d..b941ba1 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -59,7 +59,7 @@ class Grammar(object): self._nonterminal_to_dfas = rule_to_dfas self.labels = [(0, "EMPTY")] - self.keywords = {} + self.reserved_syntax_strings = {} self.tokens = {} self.start_nonterminal = start_nonterminal @@ -104,7 +104,6 @@ class Grammar(object): #@_cache_labels def _make_label(self, label): - # XXX Maybe this should be a method on a subclass of converter? ilabel = len(self.labels) if label[0].isalpha(): # Either a nonterminal name or a named token @@ -124,23 +123,12 @@ class Grammar(object): assert label[0] in ('"', "'"), label # TODO use literal_eval instead of a simple eval. value = eval(label) - if value[0].isalpha(): - # A keyword - if value in self.keywords: - return self.keywords[value] - else: - self.labels.append((token.NAME, value)) - self.keywords[value] = ilabel - return ilabel + if value in self.reserved_syntax_strings: + return self.reserved_syntax_strings[value] else: - # An operator (any non-numeric token) - itoken = self._token_namespace.generate_token_id(value) - if itoken in self.tokens: - return self.tokens[itoken] - else: - self.labels.append((itoken, None)) - self.tokens[itoken] = ilabel - return ilabel + self.labels.append((token.NAME, value)) + self.reserved_syntax_strings[value] = ilabel + return self.reserved_syntax_strings[value] def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] diff --git a/parso/pgen2/grammar_parser.py b/parso/pgen2/grammar_parser.py index 17aac0c..80b4e20 100644 --- a/parso/pgen2/grammar_parser.py +++ b/parso/pgen2/grammar_parser.py @@ -30,7 +30,7 @@ class GrammarParser(): # rule: NAME ':' rhs NEWLINE self._current_rule_name = self._expect(token.NAME) - self._expect(token.COLON) + self._expect(token.OP, ':') a, z = self._parse_rhs() self._expect(token.NEWLINE) @@ -60,7 +60,7 @@ class GrammarParser(): def _parse_items(self): # items: item+ a, b = self._parse_item() - while self.type in (token.NAME, token.STRING, token.LPAR, token.LSQB): + while self.type in (token.NAME, token.STRING) or self.value in ('(', '['): c, d = self._parse_item() # Need to end on the next item. b.add_arc(c) @@ -72,7 +72,7 @@ class GrammarParser(): if self.value == "[": self._gettoken() a, z = self._parse_rhs() - self._expect(token.RSQB) + self._expect(token.OP, ']') # Make it also possible that there is no token and change the # state. a.add_arc(z) @@ -97,7 +97,7 @@ class GrammarParser(): if self.value == "(": self._gettoken() a, z = self._parse_rhs() - self._expect(token.RPAR) + self._expect(token.OP, ')') return a, z elif self.type in (token.NAME, token.STRING): a = NFAState(self._current_rule_name) @@ -110,10 +110,12 @@ class GrammarParser(): self._raise_error("expected (...) or NAME or STRING, got %s/%s", self.type, self.value) - def _expect(self, type): + def _expect(self, type, value=None): if self.type != type: self._raise_error("expected %s(%s), got %s(%s)", type, token.tok_name[type], self.type, self.value) + if value is not None and self.value != value: + self._raise_error("expected %s, got %s", value, self.value) value = self.value self._gettoken() return value diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index ac925a7..43edc92 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -71,10 +71,10 @@ def token_to_ilabel(grammar, type_, value): # Map from token to label # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the # grammar. - if type_ == tokenize.NAME: + if type_ in (tokenize.NAME, tokenize.OP): # Check for reserved words (keywords) try: - return grammar.keywords[value] + return grammar.reserved_syntax_strings[value] except KeyError: pass diff --git a/parso/python/parser.py b/parso/python/parser.py index 7728121..2ebd63d 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -127,7 +127,7 @@ class Parser(BaseParser): def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) if type == NAME: - if value in pgen_grammar.keywords: + if value in pgen_grammar.reserved_syntax_strings: return tree.Keyword(value, start_pos, prefix) else: return tree.Name(value, start_pos, prefix) diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 0ac8a8d..1d6e981 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -21,7 +21,7 @@ from codecs import BOM_UTF8 from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap, NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT, ERROR_DEDENT, FSTRING_STRING, FSTRING_START, - FSTRING_END) + FSTRING_END, OP) from parso._compatibility import py_version from parso.utils import split_lines @@ -574,7 +574,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): try: # This check is needed in any case to check if it's a valid # operator or just some random unicode character. - typ = opmap[token] + opmap[token] + typ = OP except KeyError: typ = ERRORTOKEN yield PythonToken(typ, token, spos, prefix) From 6098d891504421583437d10dc12b2c2fa33ef77e Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 13:03:07 +0200 Subject: [PATCH 36/76] Add PythonTokens to get rid of a lot of the token module eventually --- parso/python/token.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/parso/python/token.py b/parso/python/token.py index 6f7ad5a..9571364 100644 --- a/parso/python/token.py +++ b/parso/python/token.py @@ -113,3 +113,30 @@ def generate_token_id(string): except KeyError: pass return globals()[string] + + +class Token(object): + def __init__(self, name): + self.name = name + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self.name) + + +class Tokens(object): + """ + Basically an enum, but Python 2 doesn't have enums in the standard library. + """ + def __init__(self, names, contains_syntax): + for name in names: + setattr(self, name, Token(name)) + + self.contains_syntax = [getattr(self, name) for name in contains_syntax] + + +PythonTokens = Tokens(( + 'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT', + 'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP', + 'ENDMARKER'), + contains_syntax=('NAME', 'OP'), +) From 03de9cebb8691c31775c45c003baf276da54b0c6 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 16:24:09 +0200 Subject: [PATCH 37/76] Introduce TokenTypes --- parso/grammar.py | 4 +-- parso/pgen2/grammar.py | 6 +--- parso/pgen2/grammar_parser.py | 33 ++++++++++---------- parso/pgen2/parse.py | 11 +++---- parso/python/diff.py | 31 ++++++++++--------- parso/python/errors.py | 4 +-- parso/python/parser.py | 36 +++++++++++----------- parso/python/token.py | 58 +++++------------------------------ parso/python/tokenize.py | 54 +++++++++++++++----------------- parso/python/tree.py | 2 +- parso/tree.py | 9 +++--- test/test_tokenize.py | 38 +++++++++++------------ 12 files changed, 117 insertions(+), 169 deletions(-) diff --git a/parso/grammar.py b/parso/grammar.py index acdf286..981a0fc 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -6,7 +6,7 @@ from parso.pgen2.pgen import generate_grammar from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string from parso.python.diff import DiffParser from parso.python.tokenize import tokenize_lines, tokenize -from parso.python import token +from parso.python.token import PythonTokenTypes from parso.cache import parser_cache, load_module, save_module from parso.parser import BaseParser from parso.python.parser import Parser as PythonParser @@ -193,7 +193,7 @@ class Grammar(object): class PythonGrammar(Grammar): _error_normalizer_config = ErrorFinderConfig() - _token_namespace = token + _token_namespace = PythonTokenTypes _start_nonterminal = 'file_input' def __init__(self, version_info, bnf_text): diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index b941ba1..453298a 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -16,9 +16,6 @@ fallback token code OP, but the parser needs the actual token code. """ -from parso.python import token - - class DFAPlan(object): def __init__(self, next_dfa, dfa_pushes=[]): self.next_dfa = next_dfa @@ -111,7 +108,6 @@ class Grammar(object): # A named token (e.g. NAME, NUMBER, STRING) itoken = getattr(self._token_namespace, label, None) - assert isinstance(itoken, int), label if itoken in self.tokens: return self.tokens[itoken] else: @@ -126,7 +122,7 @@ class Grammar(object): if value in self.reserved_syntax_strings: return self.reserved_syntax_strings[value] else: - self.labels.append((token.NAME, value)) + self.labels.append(('XXX', value)) self.reserved_syntax_strings[value] = ilabel return self.reserved_syntax_strings[value] diff --git a/parso/pgen2/grammar_parser.py b/parso/pgen2/grammar_parser.py index 80b4e20..623a455 100644 --- a/parso/pgen2/grammar_parser.py +++ b/parso/pgen2/grammar_parser.py @@ -5,9 +5,9 @@ # Copyright David Halter and Contributors # Modifications are dual-licensed: MIT and PSF. -from parso.python import tokenize +from parso.python.tokenize import tokenize from parso.utils import parse_version_string -from parso.python import token +from parso.python.token import PythonTokenTypes class GrammarParser(): @@ -16,7 +16,7 @@ class GrammarParser(): """ def __init__(self, bnf_grammar): self._bnf_grammar = bnf_grammar - self.generator = tokenize.tokenize( + self.generator = tokenize( bnf_grammar, version_info=parse_version_string('3.6') ) @@ -24,16 +24,16 @@ class GrammarParser(): def parse(self): # grammar: (NEWLINE | rule)* ENDMARKER - while self.type != token.ENDMARKER: - while self.type == token.NEWLINE: + while self.type != PythonTokenTypes.ENDMARKER: + while self.type == PythonTokenTypes.NEWLINE: self._gettoken() # rule: NAME ':' rhs NEWLINE - self._current_rule_name = self._expect(token.NAME) - self._expect(token.OP, ':') + self._current_rule_name = self._expect(PythonTokenTypes.NAME) + self._expect(PythonTokenTypes.OP, ':') a, z = self._parse_rhs() - self._expect(token.NEWLINE) + self._expect(PythonTokenTypes.NEWLINE) yield a, z @@ -60,7 +60,8 @@ class GrammarParser(): def _parse_items(self): # items: item+ a, b = self._parse_item() - while self.type in (token.NAME, token.STRING) or self.value in ('(', '['): + while self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING) \ + or self.value in ('(', '['): c, d = self._parse_item() # Need to end on the next item. b.add_arc(c) @@ -72,7 +73,7 @@ class GrammarParser(): if self.value == "[": self._gettoken() a, z = self._parse_rhs() - self._expect(token.OP, ']') + self._expect(PythonTokenTypes.OP, ']') # Make it also possible that there is no token and change the # state. a.add_arc(z) @@ -97,9 +98,9 @@ class GrammarParser(): if self.value == "(": self._gettoken() a, z = self._parse_rhs() - self._expect(token.OP, ')') + self._expect(PythonTokenTypes.OP, ')') return a, z - elif self.type in (token.NAME, token.STRING): + elif self.type in (PythonTokenTypes.NAME, PythonTokenTypes.STRING): a = NFAState(self._current_rule_name) z = NFAState(self._current_rule_name) # Make it clear that the state transition requires that value. @@ -110,10 +111,10 @@ class GrammarParser(): self._raise_error("expected (...) or NAME or STRING, got %s/%s", self.type, self.value) - def _expect(self, type, value=None): - if self.type != type: - self._raise_error("expected %s(%s), got %s(%s)", - type, token.tok_name[type], self.type, self.value) + def _expect(self, type_, value=None): + if self.type != type_: + self._raise_error("expected %s, got %s [%s]", + type_, self.type, self.value) if value is not None and self.value != value: self._raise_error("expected %s, got %s", value, self.value) value = self.value diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 43edc92..b22ffa1 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -14,8 +14,6 @@ See Parser/parser.c in the Python distribution for additional info on how this parsing engine works. """ -from parso.python import tokenize - class InternalParseError(Exception): """ @@ -24,9 +22,9 @@ class InternalParseError(Exception): wrong. """ - def __init__(self, msg, type, value, start_pos): + def __init__(self, msg, type_, value, start_pos): Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % - (msg, tokenize.tok_name[type], value, start_pos)) + (msg, type_.name, value, start_pos)) self.msg = msg self.type = type self.value = value @@ -69,9 +67,7 @@ class StackNode(object): def token_to_ilabel(grammar, type_, value): # Map from token to label - # TODO this is not good, shouldn't use tokenize.NAME, but somehow use the - # grammar. - if type_ in (tokenize.NAME, tokenize.OP): + if type_.contains_syntax: # Check for reserved words (keywords) try: return grammar.reserved_syntax_strings[value] @@ -196,6 +192,7 @@ class PgenParser(object): # creating a new node. We still create expr_stmt and # file_input though, because a lot of Jedi depends on its # logic. + print(tos.nodes) if len(tos.nodes) == 1: new_node = tos.nodes[0] else: diff --git a/parso/python/diff.py b/parso/python/diff.py index 742e0eb..3b7eee5 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -13,8 +13,8 @@ import logging from parso.utils import split_lines from parso.python.parser import Parser from parso.python.tree import EndMarker -from parso.python.tokenize import (NEWLINE, PythonToken, ERROR_DEDENT, - ENDMARKER, INDENT, DEDENT) +from parso.python.tokenize import PythonToken +from parso.python.token import PythonTokenTypes LOG = logging.getLogger(__name__) @@ -29,7 +29,7 @@ def _get_last_line(node_or_leaf): def _ends_with_newline(leaf, suffix=''): if leaf.type == 'error_leaf': - typ = leaf.original_type + typ = leaf.token_type.lower() else: typ = leaf.type @@ -167,8 +167,7 @@ class DiffParser(object): def _enabled_debugging(self, old_lines, lines_new): if self._module.get_code() != ''.join(lines_new): - LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), - ''.join(lines_new)) + LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new)) def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new): copied_nodes = [None] @@ -272,7 +271,6 @@ class DiffParser(object): # memoryview? parsed_until_line = self._nodes_stack.parsed_until_line lines_after = self._parser_lines_new[parsed_until_line:] - #print('parse_content', parsed_until_line, lines_after, until_line) tokens = self._diff_tokenize( lines_after, until_line, @@ -292,7 +290,7 @@ class DiffParser(object): stack = self._active_parser.pgen_parser.stack for typ, string, start_pos, prefix in tokens: start_pos = start_pos[0] + line_offset, start_pos[1] - if typ == INDENT: + if typ == PythonTokenTypes.INDENT: indents.append(start_pos[1]) if is_first_token: omitted_first_indent = True @@ -305,8 +303,9 @@ class DiffParser(object): # In case of omitted_first_indent, it might not be dedented fully. # However this is a sign for us that a dedent happened. - if typ == DEDENT \ - or typ == ERROR_DEDENT and omitted_first_indent and len(indents) == 1: + if typ == PythonTokenTypes.DEDENT \ + or typ == PythonTokenTypes.ERROR_DEDENT \ + and omitted_first_indent and len(indents) == 1: indents.pop() if omitted_first_indent and not indents: # We are done here, only thing that can come now is an @@ -316,18 +315,22 @@ class DiffParser(object): prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix) else: prefix = '' - yield PythonToken(ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix) + yield PythonToken( + PythonTokenTypes.ENDMARKER, '', + (start_pos[0] + line_offset, 0), + prefix + ) break - elif typ == NEWLINE and start_pos[0] >= until_line: + elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line: yield PythonToken(typ, string, start_pos, prefix) # Check if the parser is actually in a valid suite state. if suite_or_file_input_is_valid(self._pgen_grammar, stack): start_pos = start_pos[0] + 1, 0 while len(indents) > int(omitted_first_indent): indents.pop() - yield PythonToken(DEDENT, '', start_pos, '') + yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '') - yield PythonToken(ENDMARKER, '', start_pos, '') + yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '') break else: continue @@ -536,7 +539,7 @@ class _NodesStack(object): line_offset_index = -2 elif (new_nodes[-1].type in ('error_leaf', 'error_node') or - _is_flow_node(new_nodes[-1])): + _is_flow_node(new_nodes[-1])): # Error leafs/nodes don't have a defined start/end. Error # nodes might not end with a newline (e.g. if there's an # open `(`). Therefore ignore all of them unless they are diff --git a/parso/python/errors.py b/parso/python/errors.py index cfb8380..92fdef1 100644 --- a/parso/python/errors.py +++ b/parso/python/errors.py @@ -306,12 +306,12 @@ class ErrorFinder(Normalizer): def visit_leaf(self, leaf): if leaf.type == 'error_leaf': - if leaf.original_type in ('indent', 'error_dedent'): + if leaf.token_type in ('INDENT', 'ERROR_DEDENT'): # Indents/Dedents itself never have a prefix. They are just # "pseudo" tokens that get removed by the syntax tree later. # Therefore in case of an error we also have to check for this. spacing = list(leaf.get_next_leaf()._split_prefix())[-1] - if leaf.original_type == 'indent': + if leaf.token_type == 'INDENT': message = 'unexpected indent' else: message = 'unindent does not match any outer indentation level' diff --git a/parso/python/parser.py b/parso/python/parser.py index 2ebd63d..d2ae0f9 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -1,7 +1,5 @@ from parso.python import tree -from parso.python.token import (DEDENT, INDENT, ENDMARKER, NEWLINE, NUMBER, - STRING, tok_name, NAME, FSTRING_STRING, - FSTRING_START, FSTRING_END) +from parso.python.token import PythonTokenTypes from parso.parser import BaseParser from parso.pgen2.parse import token_to_ilabel @@ -53,17 +51,18 @@ class Parser(BaseParser): # Names/Keywords are handled separately _leaf_map = { - STRING: tree.String, - NUMBER: tree.Number, - NEWLINE: tree.Newline, - ENDMARKER: tree.EndMarker, - FSTRING_STRING: tree.FStringString, - FSTRING_START: tree.FStringStart, - FSTRING_END: tree.FStringEnd, + PythonTokenTypes.STRING: tree.String, + PythonTokenTypes.NUMBER: tree.Number, + PythonTokenTypes.NEWLINE: tree.Newline, + PythonTokenTypes.ENDMARKER: tree.EndMarker, + PythonTokenTypes.FSTRING_STRING: tree.FStringString, + PythonTokenTypes.FSTRING_START: tree.FStringStart, + PythonTokenTypes.FSTRING_END: tree.FStringEnd, } def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'): - super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery) + super(Parser, self).__init__(pgen_grammar, start_nonterminal, + error_recovery=error_recovery) self.syntax_errors = [] self._omit_dedent_list = [] @@ -126,7 +125,7 @@ class Parser(BaseParser): def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) - if type == NAME: + if type == PythonTokenTypes.NAME: if value in pgen_grammar.reserved_syntax_strings: return tree.Keyword(value, start_pos, prefix) else: @@ -143,7 +142,8 @@ class Parser(BaseParser): last_leaf = None if self._start_nonterminal == 'file_input' and \ - (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value): + (typ == PythonTokenTypes.ENDMARKER or + typ == PythonTokenTypes.DEDENT and '\n' not in last_leaf.value): def reduce_stack(states, newstate): # reduce state = newstate @@ -158,7 +158,7 @@ class Parser(BaseParser): # end of a file, we have to recover even if the user doesn't want # error recovery. if stack[-1].dfa.from_rule == 'simple_stmt': - ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value) + ilabel = token_to_ilabel(pgen_grammar, PythonTokenTypes.NEWLINE, value) try: plan = stack[-1].dfa.ilabel_to_plan[ilabel] except KeyError: @@ -199,12 +199,12 @@ class Parser(BaseParser): if self._stack_removal(stack, until_index + 1): add_token_callback(typ, value, start_pos, prefix) else: - if typ == INDENT: + if typ == PythonTokenTypes.INDENT: # For every deleted INDENT we have to delete a DEDENT as well. # Otherwise the parser will get into trouble and DEDENT too early. self._omit_dedent_list.append(self._indent_counter) - error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix) + error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix) stack[-1].nodes.append(error_leaf) tos = stack[-1] @@ -230,7 +230,7 @@ class Parser(BaseParser): def _recovery_tokenize(self, tokens): for typ, value, start_pos, prefix in tokens: # print(tok_name[typ], repr(value), start_pos, repr(prefix)) - if typ == DEDENT: + if typ == PythonTokenTypes.DEDENT: # We need to count indents, because if we just omit any DEDENT, # we might omit them in the wrong place. o = self._omit_dedent_list @@ -239,6 +239,6 @@ class Parser(BaseParser): continue self._indent_counter -= 1 - elif typ == INDENT: + elif typ == PythonTokenTypes.INDENT: self._indent_counter += 1 yield typ, value, start_pos, prefix diff --git a/parso/python/token.py b/parso/python/token.py index 9571364..3e4e17b 100644 --- a/parso/python/token.py +++ b/parso/python/token.py @@ -1,47 +1,4 @@ from __future__ import absolute_import -from itertools import count -from token import * - -from parso._compatibility import py_version - -# Don't mutate the standard library dict -tok_name = tok_name.copy() - -_counter = count(N_TOKENS) -# Never want to see this thing again. -del N_TOKENS - -COMMENT = next(_counter) -tok_name[COMMENT] = 'COMMENT' - -NL = next(_counter) -tok_name[NL] = 'NL' - -# Sets the attributes that don't exist in these tok_name versions. -if py_version >= 30: - BACKQUOTE = next(_counter) - tok_name[BACKQUOTE] = 'BACKQUOTE' -else: - RARROW = next(_counter) - tok_name[RARROW] = 'RARROW' - ELLIPSIS = next(_counter) - tok_name[ELLIPSIS] = 'ELLIPSIS' - -if py_version < 35: - ATEQUAL = next(_counter) - tok_name[ATEQUAL] = 'ATEQUAL' - -ERROR_DEDENT = next(_counter) -tok_name[ERROR_DEDENT] = 'ERROR_DEDENT' - -FSTRING_START = next(_counter) -tok_name[FSTRING_START] = 'FSTRING_START' -FSTRING_END = next(_counter) -tok_name[FSTRING_END] = 'FSTRING_END' -FSTRING_STRING = next(_counter) -tok_name[FSTRING_STRING] = 'FSTRING_STRING' -EXCLAMATION = next(_counter) -tok_name[EXCLAMATION] = 'EXCLAMATION' # Map from operator to number (since tokenize doesn't do this) @@ -100,7 +57,7 @@ opmap_raw = """\ opmap = {} for line in opmap_raw.splitlines(): op, name = line.split() - opmap[op] = globals()[name] + opmap[op] = name def generate_token_id(string): @@ -115,26 +72,25 @@ def generate_token_id(string): return globals()[string] -class Token(object): - def __init__(self, name): +class TokenType(object): + def __init__(self, name, contains_syntax=False): self.name = name + self.contains_syntax = contains_syntax def __repr__(self): return '%s(%s)' % (self.__class__.__name__, self.name) -class Tokens(object): +class TokenTypes(object): """ Basically an enum, but Python 2 doesn't have enums in the standard library. """ def __init__(self, names, contains_syntax): for name in names: - setattr(self, name, Token(name)) - - self.contains_syntax = [getattr(self, name) for name in contains_syntax] + setattr(self, name, TokenType(name, contains_syntax=name in contains_syntax)) -PythonTokens = Tokens(( +PythonTokenTypes = TokenTypes(( 'STRING', 'NUMBER', 'NAME', 'ERRORTOKEN', 'NEWLINE', 'INDENT', 'DEDENT', 'ERROR_DEDENT', 'FSTRING_STRING', 'FSTRING_START', 'FSTRING_END', 'OP', 'ENDMARKER'), diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 1d6e981..1061672 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -18,10 +18,7 @@ from collections import namedtuple import itertools as _itertools from codecs import BOM_UTF8 -from parso.python.token import (tok_name, ENDMARKER, STRING, NUMBER, opmap, - NAME, ERRORTOKEN, NEWLINE, INDENT, DEDENT, - ERROR_DEDENT, FSTRING_STRING, FSTRING_START, - FSTRING_END, OP) +from parso.python.token import PythonTokenTypes, opmap from parso._compatibility import py_version from parso.utils import split_lines @@ -242,12 +239,9 @@ class Token(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): class PythonToken(Token): - def _get_type_name(self, exact=True): - return tok_name[self.type] - def __repr__(self): return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' % - self._replace(type=self._get_type_name())) + self._replace(type=self.type.name)) class FStringNode(object): @@ -396,7 +390,9 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): endmatch = endprog.match(line) if endmatch: pos = endmatch.end(0) - yield PythonToken(STRING, contstr + line[:pos], contstr_start, prefix) + yield PythonToken( + PythonTokenTypes.STRING, contstr + line[:pos], + contstr_start, prefix) contstr = '' contline = None else: @@ -409,7 +405,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): string, pos = _find_fstring_string(fstring_stack, line, lnum, pos) if string: yield PythonToken( - FSTRING_STRING, string, + PythonTokenTypes.FSTRING_STRING, string, fstring_stack[-1].last_string_start_pos, # Never has a prefix because it can start anywhere and # include whitespace. @@ -426,7 +422,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if fstring_index is not None: yield PythonToken( - FSTRING_END, + PythonTokenTypes.FSTRING_END, fstring_stack[fstring_index].quote, (lnum, pos), prefix=additional_prefix, @@ -443,7 +439,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): match = whitespace.match(line, pos) pos = match.end() yield PythonToken( - ERRORTOKEN, line[pos:], (lnum, pos), + PythonTokenTypes.ERRORTOKEN, line[pos:], (lnum, pos), additional_prefix + match.group(0) ) additional_prefix = '' @@ -471,24 +467,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): # TODO don't we need to change spos as well? start -= 1 if start > indents[-1]: - yield PythonToken(INDENT, '', spos, '') + yield PythonToken(PythonTokenTypes.INDENT, '', spos, '') indents.append(start) while start < indents[-1]: if start > indents[-2]: - yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') + yield PythonToken(PythonTokenTypes.ERROR_DEDENT, '', (lnum, 0), '') break - yield PythonToken(DEDENT, '', spos, '') + yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '') indents.pop() if fstring_stack: fstring_index, end = _check_fstring_ending(fstring_stack, token) if fstring_index is not None: if end != 0: - yield PythonToken(ERRORTOKEN, token[:end], spos, prefix) + yield PythonToken(PythonTokenTypes.ERRORTOKEN, token[:end], spos, prefix) prefix = '' yield PythonToken( - FSTRING_END, + PythonTokenTypes.FSTRING_END, fstring_stack[fstring_index].quote, (lnum, spos[1] + 1), prefix=prefix @@ -499,7 +495,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if (initial in numchars or # ordinary number (initial == '.' and token != '.' and token != '...')): - yield PythonToken(NUMBER, token, spos, prefix) + yield PythonToken(PythonTokenTypes.NUMBER, token, spos, prefix) elif initial in '\r\n': if any(not f.allow_multiline() for f in fstring_stack): # Would use fstring_stack.clear, but that's not available @@ -507,7 +503,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): fstring_stack[:] = [] if not new_line and paren_level == 0 and not fstring_stack: - yield PythonToken(NEWLINE, token, spos, prefix) + yield PythonToken(PythonTokenTypes.NEWLINE, token, spos, prefix) else: additional_prefix = prefix + token new_line = True @@ -520,7 +516,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if endmatch: # all on one line pos = endmatch.end(0) token = line[start:pos] - yield PythonToken(STRING, token, spos, prefix) + yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix) else: contstr_start = (lnum, start) # multiple lines contstr = line[start:] @@ -537,10 +533,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): contline = line break else: # ordinary string - yield PythonToken(STRING, token, spos, prefix) + yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix) elif token in fstring_pattern_map: # The start of an fstring. fstring_stack.append(FStringNode(fstring_pattern_map[token])) - yield PythonToken(FSTRING_START, token, spos, prefix) + yield PythonToken(PythonTokenTypes.FSTRING_START, token, spos, prefix) elif is_identifier(initial): # ordinary name if token in always_break_tokens: fstring_stack[:] = [] @@ -548,11 +544,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): while True: indent = indents.pop() if indent > start: - yield PythonToken(DEDENT, '', spos, '') + yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '') else: indents.append(indent) break - yield PythonToken(NAME, token, spos, prefix) + yield PythonToken(PythonTokenTypes.NAME, token, spos, prefix) elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt additional_prefix += prefix + line[start:] break @@ -575,13 +571,13 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): # This check is needed in any case to check if it's a valid # operator or just some random unicode character. opmap[token] - typ = OP + typ = PythonTokenTypes.OP except KeyError: - typ = ERRORTOKEN + typ = PythonTokenTypes.ERRORTOKEN yield PythonToken(typ, token, spos, prefix) if contstr: - yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) + yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix) if contstr.endswith('\n'): new_line = True @@ -589,8 +585,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): # As the last position we just take the maximally possible position. We # remove -1 for the last new line. for indent in indents[1:]: - yield PythonToken(DEDENT, '', end_pos, '') - yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) + yield PythonToken(PythonTokenTypes.DEDENT, '', end_pos, '') + yield PythonToken(PythonTokenTypes.ENDMARKER, '', end_pos, additional_prefix) if __name__ == "__main__": diff --git a/parso/python/tree.py b/parso/python/tree.py index f6b4dd3..70de59e 100644 --- a/parso/python/tree.py +++ b/parso/python/tree.py @@ -124,7 +124,7 @@ class PythonLeaf(PythonMixin, Leaf): # indent error leafs somehow? No idea how, though. previous_leaf = self.get_previous_leaf() if previous_leaf is not None and previous_leaf.type == 'error_leaf' \ - and previous_leaf.original_type in ('indent', 'error_dedent'): + and previous_leaf.token_type in ('INDENT', 'ERROR_DEDENT'): previous_leaf = previous_leaf.get_previous_leaf() if previous_leaf is None: diff --git a/parso/tree.py b/parso/tree.py index 5316795..9e7ab2f 100644 --- a/parso/tree.py +++ b/parso/tree.py @@ -229,6 +229,7 @@ class Leaf(NodeOrLeaf): class TypedLeaf(Leaf): __slots__ = ('type',) + def __init__(self, type, value, start_pos, prefix=''): super(TypedLeaf, self).__init__(value, start_pos, prefix) self.type = type @@ -351,13 +352,13 @@ class ErrorLeaf(Leaf): A leaf that is either completely invalid in a language (like `$` in Python) or is invalid at that position. Like the star in `1 +* 1`. """ - __slots__ = ('original_type',) + __slots__ = ('token_type',) type = 'error_leaf' - def __init__(self, original_type, value, start_pos, prefix=''): + def __init__(self, token_type, value, start_pos, prefix=''): super(ErrorLeaf, self).__init__(value, start_pos, prefix) - self.original_type = original_type + self.token_type = token_type def __repr__(self): return "<%s: %s:%s, %s>" % \ - (type(self).__name__, self.original_type, repr(self.value), self.start_pos) + (type(self).__name__, self.token_type, repr(self.value), self.start_pos) diff --git a/test/test_tokenize.py b/test/test_tokenize.py index 08590a6..6593ff8 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -1,20 +1,29 @@ # -*- coding: utf-8 # This file contains Unicode characters. from textwrap import dedent -import tokenize as stdlib_tokenize import pytest from parso._compatibility import py_version from parso.utils import split_lines, parse_version_string -from parso.python.token import ( - NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER, ERROR_DEDENT, - FSTRING_START) +from parso.python.token import PythonTokenTypes from parso.python import tokenize from parso import parse from parso.python.tokenize import PythonToken +# To make it easier to access some of the token types, just put them here. +NAME = PythonTokenTypes.NAME +NEWLINE = PythonTokenTypes.NEWLINE +STRING = PythonTokenTypes.STRING +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT +ERRORTOKEN = PythonTokenTypes.ERRORTOKEN +ENDMARKER = PythonTokenTypes.ENDMARKER +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +FSTRING_START = PythonTokenTypes.FSTRING_START + + def _get_token_list(string): # Load the current version. version_info = parse_version_string() @@ -128,7 +137,7 @@ def test_identifier_contains_unicode(): else: # Unicode tokens in Python 2 seem to be identified as operators. # They will be ignored in the parser, that's ok. - assert unicode_token[0] == tokenize.ERRORTOKEN + assert unicode_token[0] == ERRORTOKEN def test_quoted_strings(): @@ -188,17 +197,17 @@ def test_ur_literals(): def test_error_literal(): error_token, endmarker = _get_token_list('"\n') - assert error_token.type == tokenize.ERRORTOKEN + assert error_token.type == ERRORTOKEN assert endmarker.prefix == '' assert error_token.string == '"\n' - assert endmarker.type == tokenize.ENDMARKER + assert endmarker.type == ENDMARKER assert endmarker.prefix == '' bracket, error_token, endmarker = _get_token_list('( """') - assert error_token.type == tokenize.ERRORTOKEN + assert error_token.type == ERRORTOKEN assert error_token.prefix == ' ' assert error_token.string == '"""' - assert endmarker.type == tokenize.ENDMARKER + assert endmarker.type == ENDMARKER assert endmarker.prefix == '' @@ -236,14 +245,3 @@ def test_error_string(): assert t1.prefix == ' ' assert t1.string == '"\n' assert endmarker.string == '' - -def test_tok_name_copied(): - # Make sure parso doesn't mutate the standard library - tok_len = len(stdlib_tokenize.tok_name) - correct_len = stdlib_tokenize.N_TOKENS - if 'N_TOKENS' in stdlib_tokenize.tok_name.values(): # Python 3.7 - correct_len += 1 - if 'NT_OFFSET' in stdlib_tokenize.tok_name.values(): # Not there in PyPy - correct_len += 1 - - assert tok_len == correct_len From 34ab35558ff491afd5f9f05d4b57f57caa08fd71 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 16:31:58 +0200 Subject: [PATCH 38/76] Remove a lot of the old token code --- parso/python/token.py | 71 ---------------------------------------- parso/python/tokenize.py | 11 ++----- 2 files changed, 2 insertions(+), 80 deletions(-) diff --git a/parso/python/token.py b/parso/python/token.py index 3e4e17b..bb86ec9 100644 --- a/parso/python/token.py +++ b/parso/python/token.py @@ -1,76 +1,5 @@ from __future__ import absolute_import -# Map from operator to number (since tokenize doesn't do this) - -opmap_raw = """\ -( LPAR -) RPAR -[ LSQB -] RSQB -: COLON -, COMMA -; SEMI -+ PLUS -- MINUS -* STAR -/ SLASH -| VBAR -& AMPER -< LESS -> GREATER -= EQUAL -. DOT -% PERCENT -` BACKQUOTE -{ LBRACE -} RBRACE -@ AT -== EQEQUAL -!= NOTEQUAL -<> NOTEQUAL -<= LESSEQUAL ->= GREATEREQUAL -~ TILDE -^ CIRCUMFLEX -<< LEFTSHIFT ->> RIGHTSHIFT -** DOUBLESTAR -+= PLUSEQUAL --= MINEQUAL -*= STAREQUAL -/= SLASHEQUAL -%= PERCENTEQUAL -&= AMPEREQUAL -|= VBAREQUAL -@= ATEQUAL -^= CIRCUMFLEXEQUAL -<<= LEFTSHIFTEQUAL ->>= RIGHTSHIFTEQUAL -**= DOUBLESTAREQUAL -// DOUBLESLASH -//= DOUBLESLASHEQUAL --> RARROW -... ELLIPSIS -! EXCLAMATION -""" - -opmap = {} -for line in opmap_raw.splitlines(): - op, name = line.split() - opmap[op] = name - - -def generate_token_id(string): - """ - Uses a token in the grammar (e.g. `'+'` or `'and'`returns the corresponding - ID for it. The strings are part of the grammar file. - """ - try: - return opmap[string] - except KeyError: - pass - return globals()[string] - class TokenType(object): def __init__(self, name, contains_syntax=False): diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 1061672..4273625 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -18,7 +18,7 @@ from collections import namedtuple import itertools as _itertools from codecs import BOM_UTF8 -from parso.python.token import PythonTokenTypes, opmap +from parso.python.token import PythonTokenTypes from parso._compatibility import py_version from parso.utils import split_lines @@ -567,14 +567,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): and fstring_stack[-1].parentheses_count == 1: fstring_stack[-1].format_spec_count += 1 - try: - # This check is needed in any case to check if it's a valid - # operator or just some random unicode character. - opmap[token] - typ = PythonTokenTypes.OP - except KeyError: - typ = PythonTokenTypes.ERRORTOKEN - yield PythonToken(typ, token, spos, prefix) + yield PythonToken(PythonTokenTypes.OP, token, spos, prefix) if contstr: yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix) From e958b241c7fad1d4a37f6bccf6f530ec0a942b05 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 16:39:48 +0200 Subject: [PATCH 39/76] Use some tokenize names directly --- parso/python/tokenize.py | 54 +++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index 4273625..bceb8ee 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -23,6 +23,20 @@ from parso._compatibility import py_version from parso.utils import split_lines +STRING = PythonTokenTypes.STRING +NAME = PythonTokenTypes.NAME +NUMBER = PythonTokenTypes.NUMBER +OP = PythonTokenTypes.OP +NEWLINE = PythonTokenTypes.NEWLINE +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT +ENDMARKER = PythonTokenTypes.ENDMARKER +ERRORTOKEN = PythonTokenTypes.ERRORTOKEN +ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT +FSTRING_START = PythonTokenTypes.FSTRING_START +FSTRING_STRING = PythonTokenTypes.FSTRING_STRING +FSTRING_END = PythonTokenTypes.FSTRING_END + TokenCollection = namedtuple( 'TokenCollection', 'pseudo_token single_quoted triple_quoted endpats whitespace ' @@ -391,7 +405,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if endmatch: pos = endmatch.end(0) yield PythonToken( - PythonTokenTypes.STRING, contstr + line[:pos], + STRING, contstr + line[:pos], contstr_start, prefix) contstr = '' contline = None @@ -405,7 +419,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): string, pos = _find_fstring_string(fstring_stack, line, lnum, pos) if string: yield PythonToken( - PythonTokenTypes.FSTRING_STRING, string, + FSTRING_STRING, string, fstring_stack[-1].last_string_start_pos, # Never has a prefix because it can start anywhere and # include whitespace. @@ -422,7 +436,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if fstring_index is not None: yield PythonToken( - PythonTokenTypes.FSTRING_END, + FSTRING_END, fstring_stack[fstring_index].quote, (lnum, pos), prefix=additional_prefix, @@ -439,7 +453,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): match = whitespace.match(line, pos) pos = match.end() yield PythonToken( - PythonTokenTypes.ERRORTOKEN, line[pos:], (lnum, pos), + ERRORTOKEN, line[pos:], (lnum, pos), additional_prefix + match.group(0) ) additional_prefix = '' @@ -467,24 +481,24 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): # TODO don't we need to change spos as well? start -= 1 if start > indents[-1]: - yield PythonToken(PythonTokenTypes.INDENT, '', spos, '') + yield PythonToken(INDENT, '', spos, '') indents.append(start) while start < indents[-1]: if start > indents[-2]: - yield PythonToken(PythonTokenTypes.ERROR_DEDENT, '', (lnum, 0), '') + yield PythonToken(ERROR_DEDENT, '', (lnum, 0), '') break - yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '') + yield PythonToken(DEDENT, '', spos, '') indents.pop() if fstring_stack: fstring_index, end = _check_fstring_ending(fstring_stack, token) if fstring_index is not None: if end != 0: - yield PythonToken(PythonTokenTypes.ERRORTOKEN, token[:end], spos, prefix) + yield PythonToken(ERRORTOKEN, token[:end], spos, prefix) prefix = '' yield PythonToken( - PythonTokenTypes.FSTRING_END, + FSTRING_END, fstring_stack[fstring_index].quote, (lnum, spos[1] + 1), prefix=prefix @@ -495,7 +509,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if (initial in numchars or # ordinary number (initial == '.' and token != '.' and token != '...')): - yield PythonToken(PythonTokenTypes.NUMBER, token, spos, prefix) + yield PythonToken(NUMBER, token, spos, prefix) elif initial in '\r\n': if any(not f.allow_multiline() for f in fstring_stack): # Would use fstring_stack.clear, but that's not available @@ -503,7 +517,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): fstring_stack[:] = [] if not new_line and paren_level == 0 and not fstring_stack: - yield PythonToken(PythonTokenTypes.NEWLINE, token, spos, prefix) + yield PythonToken(NEWLINE, token, spos, prefix) else: additional_prefix = prefix + token new_line = True @@ -516,7 +530,7 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): if endmatch: # all on one line pos = endmatch.end(0) token = line[start:pos] - yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix) + yield PythonToken(STRING, token, spos, prefix) else: contstr_start = (lnum, start) # multiple lines contstr = line[start:] @@ -533,10 +547,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): contline = line break else: # ordinary string - yield PythonToken(PythonTokenTypes.STRING, token, spos, prefix) + yield PythonToken(STRING, token, spos, prefix) elif token in fstring_pattern_map: # The start of an fstring. fstring_stack.append(FStringNode(fstring_pattern_map[token])) - yield PythonToken(PythonTokenTypes.FSTRING_START, token, spos, prefix) + yield PythonToken(FSTRING_START, token, spos, prefix) elif is_identifier(initial): # ordinary name if token in always_break_tokens: fstring_stack[:] = [] @@ -544,11 +558,11 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): while True: indent = indents.pop() if indent > start: - yield PythonToken(PythonTokenTypes.DEDENT, '', spos, '') + yield PythonToken(DEDENT, '', spos, '') else: indents.append(indent) break - yield PythonToken(PythonTokenTypes.NAME, token, spos, prefix) + yield PythonToken(NAME, token, spos, prefix) elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n'): # continued stmt additional_prefix += prefix + line[start:] break @@ -567,10 +581,10 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): and fstring_stack[-1].parentheses_count == 1: fstring_stack[-1].format_spec_count += 1 - yield PythonToken(PythonTokenTypes.OP, token, spos, prefix) + yield PythonToken(OP, token, spos, prefix) if contstr: - yield PythonToken(PythonTokenTypes.ERRORTOKEN, contstr, contstr_start, prefix) + yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix) if contstr.endswith('\n'): new_line = True @@ -578,8 +592,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)): # As the last position we just take the maximally possible position. We # remove -1 for the last new line. for indent in indents[1:]: - yield PythonToken(PythonTokenTypes.DEDENT, '', end_pos, '') - yield PythonToken(PythonTokenTypes.ENDMARKER, '', end_pos, additional_prefix) + yield PythonToken(DEDENT, '', end_pos, '') + yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) if __name__ == "__main__": From c5d141bf60ab02d545c5d53adffc3e863901789c Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 16:43:01 +0200 Subject: [PATCH 40/76] Make some more things faster --- parso/pgen2/parse.py | 1 - parso/python/parser.py | 15 ++++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index b22ffa1..f2690b1 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -192,7 +192,6 @@ class PgenParser(object): # creating a new node. We still create expr_stmt and # file_input though, because a lot of Jedi depends on its # logic. - print(tos.nodes) if len(tos.nodes) == 1: new_node = tos.nodes[0] else: diff --git a/parso/python/parser.py b/parso/python/parser.py index d2ae0f9..cf4298f 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -4,6 +4,11 @@ from parso.parser import BaseParser from parso.pgen2.parse import token_to_ilabel +NAME = PythonTokenTypes.NAME +INDENT = PythonTokenTypes.INDENT +DEDENT = PythonTokenTypes.DEDENT + + class Parser(BaseParser): """ This class is used to parse a Python file, it then divides them into a @@ -125,7 +130,7 @@ class Parser(BaseParser): def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) - if type == PythonTokenTypes.NAME: + if type == NAME: if value in pgen_grammar.reserved_syntax_strings: return tree.Keyword(value, start_pos, prefix) else: @@ -143,7 +148,7 @@ class Parser(BaseParser): if self._start_nonterminal == 'file_input' and \ (typ == PythonTokenTypes.ENDMARKER or - typ == PythonTokenTypes.DEDENT and '\n' not in last_leaf.value): + typ == DEDENT and '\n' not in last_leaf.value): def reduce_stack(states, newstate): # reduce state = newstate @@ -199,7 +204,7 @@ class Parser(BaseParser): if self._stack_removal(stack, until_index + 1): add_token_callback(typ, value, start_pos, prefix) else: - if typ == PythonTokenTypes.INDENT: + if typ == INDENT: # For every deleted INDENT we have to delete a DEDENT as well. # Otherwise the parser will get into trouble and DEDENT too early. self._omit_dedent_list.append(self._indent_counter) @@ -230,7 +235,7 @@ class Parser(BaseParser): def _recovery_tokenize(self, tokens): for typ, value, start_pos, prefix in tokens: # print(tok_name[typ], repr(value), start_pos, repr(prefix)) - if typ == PythonTokenTypes.DEDENT: + if typ == DEDENT: # We need to count indents, because if we just omit any DEDENT, # we might omit them in the wrong place. o = self._omit_dedent_list @@ -239,6 +244,6 @@ class Parser(BaseParser): continue self._indent_counter -= 1 - elif typ == PythonTokenTypes.INDENT: + elif typ == INDENT: self._indent_counter += 1 yield typ, value, start_pos, prefix From 71003bc20ec2e71a85f4f73dd6d58c0b8dfafd96 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 17:29:03 +0200 Subject: [PATCH 41/76] Just use caching instead of strange transitions --- parso/pgen2/grammar.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 453298a..a57e74d 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -16,6 +16,7 @@ fallback token code OP, but the parser needs the actual token code. """ + class DFAPlan(object): def __init__(self, next_dfa, dfa_pushes=[]): self.next_dfa = next_dfa @@ -99,7 +100,7 @@ class Grammar(object): return wrapper - #@_cache_labels + @_cache_labels def _make_label(self, label): ilabel = len(self.labels) if label[0].isalpha(): @@ -108,23 +109,17 @@ class Grammar(object): # A named token (e.g. NAME, NUMBER, STRING) itoken = getattr(self._token_namespace, label, None) - if itoken in self.tokens: - return self.tokens[itoken] - else: - self.labels.append((itoken, None)) - self.tokens[itoken] = ilabel - return ilabel + self.labels.append((itoken, None)) + self.tokens[itoken] = ilabel + return ilabel else: # Either a keyword or an operator assert label[0] in ('"', "'"), label # TODO use literal_eval instead of a simple eval. value = eval(label) - if value in self.reserved_syntax_strings: - return self.reserved_syntax_strings[value] - else: - self.labels.append(('XXX', value)) - self.reserved_syntax_strings[value] = ilabel - return self.reserved_syntax_strings[value] + self.labels.append(('XXX', value)) + self.reserved_syntax_strings[value] = ilabel + return self.reserved_syntax_strings[value] def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] From 530a32464305d7a3b6d1d3d18b16db90ee4a65ec Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 17:55:22 +0200 Subject: [PATCH 42/76] Remove labels --- parso/pgen2/grammar.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index a57e74d..b286f27 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -16,6 +16,8 @@ fallback token code OP, but the parser needs the actual token code. """ +from ast import literal_eval + class DFAPlan(object): def __init__(self, next_dfa, dfa_pushes=[]): @@ -26,6 +28,11 @@ class DFAPlan(object): return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) +class ReservedString(object): + def __init__(self, value): + self.value = value + + class Grammar(object): """Pgen parsing tables conversion class. @@ -37,13 +44,6 @@ class Grammar(object): The instance variables are as follows: - labels -- a list of (x, y) pairs where x is either a token - number or a nonterminal number, and y is either None - or a string; the strings are keywords. The label - number is the index in this list; label numbers - are used to mark state transitions (arcs) in the - DFAs. - start -- the number of the grammar's start nonterminal. keywords -- a dict mapping keyword strings to arc labels. @@ -56,7 +56,6 @@ class Grammar(object): self._token_namespace = token_namespace self._nonterminal_to_dfas = rule_to_dfas - self.labels = [(0, "EMPTY")] self.reserved_syntax_strings = {} self.tokens = {} self.start_nonterminal = start_nonterminal @@ -102,24 +101,24 @@ class Grammar(object): @_cache_labels def _make_label(self, label): - ilabel = len(self.labels) if label[0].isalpha(): # Either a nonterminal name or a named token assert label not in self._nonterminal_to_dfas # A named token (e.g. NAME, NUMBER, STRING) - itoken = getattr(self._token_namespace, label, None) - self.labels.append((itoken, None)) - self.tokens[itoken] = ilabel - return ilabel + token_type = getattr(self._token_namespace, label, None) + self.tokens[token_type] = token_type + return token_type else: # Either a keyword or an operator assert label[0] in ('"', "'"), label # TODO use literal_eval instead of a simple eval. - value = eval(label) - self.labels.append(('XXX', value)) - self.reserved_syntax_strings[value] = ilabel - return self.reserved_syntax_strings[value] + value = literal_eval(label) + try: + return self.reserved_syntax_strings[value] + except KeyError: + r = self.reserved_syntax_strings[value] = ReservedString(value) + return r def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] From 2a9d8632fe2bca9474f776eadd6ecd234c08f992 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 17:56:13 +0200 Subject: [PATCH 43/76] Remove label caching --- parso/pgen2/grammar.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index b286f27..51c329a 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -60,7 +60,6 @@ class Grammar(object): self.tokens = {} self.start_nonterminal = start_nonterminal - self._label_cache = {} self._make_grammar() def _make_grammar(self): @@ -88,18 +87,6 @@ class Grammar(object): ilabel = self._make_label(terminal_or_nonterminal) plans[ilabel] = DFAPlan(next_dfa) - def _cache_labels(func): - def wrapper(self, label): - try: - return self._label_cache[label] - except KeyError: - result = func(self, label) - self._label_cache[label] = result - return result - - return wrapper - - @_cache_labels def _make_label(self, label): if label[0].isalpha(): # Either a nonterminal name or a named token From 309033ae2dae1535b98cca46550b501802a275a9 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 17:59:32 +0200 Subject: [PATCH 44/76] Work with token types whenever possible --- parso/pgen2/grammar.py | 11 ----------- parso/pgen2/parse.py | 5 +---- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 51c329a..ae5e7de 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -41,15 +41,6 @@ class Grammar(object): accesses the instance variables directly. The class here does not provide initialization of the tables; several subclasses exist to do this (see the conv and pgen modules). - - The instance variables are as follows: - - start -- the number of the grammar's start nonterminal. - - keywords -- a dict mapping keyword strings to arc labels. - - tokens -- a dict mapping token numbers to arc labels. - """ def __init__(self, bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace): @@ -57,7 +48,6 @@ class Grammar(object): self._nonterminal_to_dfas = rule_to_dfas self.reserved_syntax_strings = {} - self.tokens = {} self.start_nonterminal = start_nonterminal self._make_grammar() @@ -94,7 +84,6 @@ class Grammar(object): # A named token (e.g. NAME, NUMBER, STRING) token_type = getattr(self._token_namespace, label, None) - self.tokens[token_type] = token_type return token_type else: # Either a keyword or an operator diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index f2690b1..646eb58 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -74,10 +74,7 @@ def token_to_ilabel(grammar, type_, value): except KeyError: pass - try: - return grammar.tokens[type_] - except KeyError: - return None + return type_ class PgenParser(object): From 43d4a8a83456c256a8418b0c7b1cbc62dd8f818a Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Sun, 24 Jun 2018 23:45:18 +0200 Subject: [PATCH 45/76] Don't use a function that doesn't work --- parso/python/parser.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index cf4298f..a11cf70 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -1,7 +1,6 @@ from parso.python import tree from parso.python.token import PythonTokenTypes from parso.parser import BaseParser -from parso.pgen2.parse import token_to_ilabel NAME = PythonTokenTypes.NAME @@ -163,9 +162,8 @@ class Parser(BaseParser): # end of a file, we have to recover even if the user doesn't want # error recovery. if stack[-1].dfa.from_rule == 'simple_stmt': - ilabel = token_to_ilabel(pgen_grammar, PythonTokenTypes.NEWLINE, value) try: - plan = stack[-1].dfa.ilabel_to_plan[ilabel] + plan = stack[-1].dfa.ilabel_to_plan[PythonTokenTypes.NEWLINE] except KeyError: pass else: From da5aa8a2abb2d7d2243ade247ed08b6d09be8c34 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Mon, 25 Jun 2018 01:56:02 +0200 Subject: [PATCH 46/76] Better detection of ambiguities --- parso/pgen2/grammar.py | 14 ++++++++++---- test/test_pgen2.py | 12 ++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index ae5e7de..c8aaaad 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -99,7 +99,7 @@ class Grammar(object): def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] self._first_terminals[nonterminal] = None # dummy to detect left recursion - self._first_plans[nonterminal] = {} + first_plans = self._first_plans[nonterminal] = {} # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] @@ -121,13 +121,19 @@ class Grammar(object): overlapcheck[nonterminal_or_string] = fset for t, pushes in self._first_plans[nonterminal_or_string].items(): - assert not self._first_plans[nonterminal].get(t) - self._first_plans[nonterminal][t] = [next_] + pushes + check = first_plans.get(t) + if check is not None: + raise ValueError( + "Rule %s is ambiguous; %s is the" + " start of the rule %s as well as %s." + % (nonterminal, t, nonterminal_or_string, check[-1].from_rule) + ) + first_plans[t] = [next_] + pushes else: # It's a string. We have finally found a possible first token. totalset.add(nonterminal_or_string) overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) - self._first_plans[nonterminal][nonterminal_or_string] = [next_] + first_plans[nonterminal_or_string] = [next_] inverse = {} for nonterminal_or_string, first_set in overlapcheck.items(): diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 8c6e90f..88f6591 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -12,6 +12,8 @@ import pytest from parso import load_grammar from parso import ParserSyntaxError +from parso.pgen2.pgen import generate_grammar +from parso.python import tokenize def _parse(code, version=None): @@ -270,3 +272,13 @@ def py_br(each_version): def test_py3_rb(works_ge_py3): works_ge_py3.parse("rb'1'") works_ge_py3.parse("RB'1'") + + +def test_left_recursion(): + with pytest.raises(ValueError, match='left recursion'): + generate_grammar('foo: foo NAME\n', tokenize.PythonTokenTypes) + + +def test_ambiguities(): + with pytest.raises(ValueError, match='ambiguous'): + generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes) From a46ecbb49912153c67e484b13da21bb2909bd109 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 00:58:19 +0200 Subject: [PATCH 47/76] Fix an ambiguity issue Unfortunately had to refactor most of the transition generation --- parso/pgen2/grammar.py | 97 ++++++++++++++---------------------------- parso/pgen2/pgen.py | 46 +++++++++++++++++++- test/test_pgen2.py | 3 ++ 3 files changed, 80 insertions(+), 66 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index c8aaaad..9409c4a 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -16,8 +16,6 @@ fallback token code OP, but the parser needs the actual token code. """ -from ast import literal_eval - class DFAPlan(object): def __init__(self, next_dfa, dfa_pushes=[]): @@ -28,11 +26,6 @@ class DFAPlan(object): return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) -class ReservedString(object): - def __init__(self, value): - self.value = value - - class Grammar(object): """Pgen parsing tables conversion class. @@ -43,11 +36,10 @@ class Grammar(object): do this (see the conv and pgen modules). """ - def __init__(self, bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace): - self._token_namespace = token_namespace + def __init__(self, bnf_grammar, start_nonterminal, rule_to_dfas, reserved_syntax_strings): self._nonterminal_to_dfas = rule_to_dfas - self.reserved_syntax_strings = {} + self.reserved_syntax_strings = reserved_syntax_strings self.start_nonterminal = start_nonterminal self._make_grammar() @@ -68,33 +60,10 @@ class Grammar(object): for dfas in self._nonterminal_to_dfas.values(): for dfa_state in dfas: - dfa_state.ilabel_to_plan = plans = {} - for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): - if terminal_or_nonterminal in self._nonterminal_to_dfas: - for t, pushes in self._first_plans[terminal_or_nonterminal].items(): - plans[self._make_label(t)] = DFAPlan(next_dfa, pushes) - else: - ilabel = self._make_label(terminal_or_nonterminal) - plans[ilabel] = DFAPlan(next_dfa) - - def _make_label(self, label): - if label[0].isalpha(): - # Either a nonterminal name or a named token - assert label not in self._nonterminal_to_dfas - - # A named token (e.g. NAME, NUMBER, STRING) - token_type = getattr(self._token_namespace, label, None) - return token_type - else: - # Either a keyword or an operator - assert label[0] in ('"', "'"), label - # TODO use literal_eval instead of a simple eval. - value = literal_eval(label) - try: - return self.reserved_syntax_strings[value] - except KeyError: - r = self.reserved_syntax_strings[value] = ReservedString(value) - return r + for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): + for transition, pushes in self._first_plans[nonterminal].items(): + dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes) + #print(dfa_state.from_rule, dfa_state.ilabel_to_plan) def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] @@ -105,35 +74,35 @@ class Grammar(object): state = dfas[0] totalset = set() overlapcheck = {} - for nonterminal_or_string, next_ in state.arcs.items(): - if nonterminal_or_string in self._nonterminal_to_dfas: - # It's a nonterminal and we have either a left recursion issue - # in the grammar or we have to recurse. - try: - fset = self._first_terminals[nonterminal_or_string] - except KeyError: - self._calculate_first_terminals(nonterminal_or_string) - fset = self._first_terminals[nonterminal_or_string] - else: - if fset is None: - raise ValueError("left recursion for rule %r" % nonterminal) - totalset.update(fset) - overlapcheck[nonterminal_or_string] = fset + for transition, next_ in state.ilabel_to_plan.items(): + # It's a string. We have finally found a possible first token. + totalset.add(transition) + #overlapcheck[nonterminal] = set([transition]) + first_plans[transition] = [next_.next_dfa] - for t, pushes in self._first_plans[nonterminal_or_string].items(): - check = first_plans.get(t) - if check is not None: - raise ValueError( - "Rule %s is ambiguous; %s is the" - " start of the rule %s as well as %s." - % (nonterminal, t, nonterminal_or_string, check[-1].from_rule) - ) - first_plans[t] = [next_] + pushes + for nonterminal2, next_ in state.nonterminal_arcs.items(): + # It's a nonterminal and we have either a left recursion issue + # in the grammar or we have to recurse. + try: + fset = self._first_terminals[nonterminal2] + except KeyError: + self._calculate_first_terminals(nonterminal2) + fset = self._first_terminals[nonterminal2] else: - # It's a string. We have finally found a possible first token. - totalset.add(nonterminal_or_string) - overlapcheck[nonterminal_or_string] = set([nonterminal_or_string]) - first_plans[nonterminal_or_string] = [next_] + if fset is None: + raise ValueError("left recursion for rule %r" % nonterminal) + totalset.update(fset) + overlapcheck[nonterminal2] = fset + + for t, pushes in self._first_plans[nonterminal2].items(): + check = first_plans.get(t) + if check is not None: + raise ValueError( + "Rule %s is ambiguous; %s is the" + " start of the rule %s as well as %s." + % (nonterminal, t, nonterminal2, check[-1].from_rule) + ) + first_plans[t] = [next_] + pushes inverse = {} for nonterminal_or_string, first_set in overlapcheck.items(): diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index e194c3f..3dd9f2a 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -18,7 +18,9 @@ Specifying grammars in pgen is possible with this grammar:: This grammar is self-referencing. """ -from parso.pgen2.grammar import Grammar +from ast import literal_eval + +from parso.pgen2.grammar import Grammar, DFAPlan from parso.pgen2.grammar_parser import GrammarParser, NFAState @@ -32,6 +34,7 @@ class DFAState(object): self.is_final = final in nfa_set self.arcs = {} # map from terminals/nonterminals to DFAState self.ilabel_to_plan = {} + self.nonterminal_arcs = {} def add_arc(self, next_, label): assert isinstance(label, str) @@ -66,6 +69,14 @@ class DFAState(object): ) +class ReservedString(object): + def __init__(self, value): + self.value = value + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, self.value) + + def _simplify_dfas(dfas): # This is not theoretically optimal, but works well enough. # Algorithm: repeatedly look for two states that have the same @@ -184,4 +195,35 @@ def generate_grammar(bnf_grammar, token_namespace): if start_nonterminal is None: start_nonterminal = nfa_a.from_rule - return Grammar(bnf_grammar, start_nonterminal, rule_to_dfas, token_namespace) + reserved_strings = {} + for nonterminal, dfas in rule_to_dfas.items(): + for dfa_state in dfas: + for terminal_or_nonterminal, next_dfa in dfa_state.arcs.items(): + if terminal_or_nonterminal in rule_to_dfas: + dfa_state.nonterminal_arcs[terminal_or_nonterminal] = next_dfa + else: + transition = _make_transition( + token_namespace, + reserved_strings, + terminal_or_nonterminal + ) + dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa) + + return Grammar(bnf_grammar, start_nonterminal, rule_to_dfas, reserved_strings) + + +def _make_transition(token_namespace, reserved_syntax_strings, label): + if label[0].isalpha(): + # A named token (e.g. NAME, NUMBER, STRING) + return getattr(token_namespace, label) + else: + # Either a keyword or an operator + assert label[0] in ('"', "'"), label + assert not label.startswith('"""') and not label.startswith("'''") + # TODO use literal_eval instead of a simple eval. + value = literal_eval(label) + try: + return reserved_syntax_strings[value] + except KeyError: + r = reserved_syntax_strings[value] = ReservedString(value) + return r diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 88f6591..4dba172 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -282,3 +282,6 @@ def test_left_recursion(): def test_ambiguities(): with pytest.raises(ValueError, match='ambiguous'): generate_grammar('foo: bar | baz\nbar: NAME\nbaz: NAME\n', tokenize.PythonTokenTypes) + + with pytest.raises(ValueError, match='ambiguous'): + generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes) From e9fde82512c8ac9e729f477548ba64b4335b2f0d Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 01:00:06 +0200 Subject: [PATCH 48/76] Remove the overlapcheck, it's probably not needed anymore --- parso/pgen2/grammar.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 9409c4a..1a1a450 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -63,7 +63,6 @@ class Grammar(object): for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): for transition, pushes in self._first_plans[nonterminal].items(): dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes) - #print(dfa_state.from_rule, dfa_state.ilabel_to_plan) def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] @@ -73,11 +72,9 @@ class Grammar(object): # interesting to find first terminals. state = dfas[0] totalset = set() - overlapcheck = {} for transition, next_ in state.ilabel_to_plan.items(): # It's a string. We have finally found a possible first token. totalset.add(transition) - #overlapcheck[nonterminal] = set([transition]) first_plans[transition] = [next_.next_dfa] for nonterminal2, next_ in state.nonterminal_arcs.items(): @@ -92,7 +89,6 @@ class Grammar(object): if fset is None: raise ValueError("left recursion for rule %r" % nonterminal) totalset.update(fset) - overlapcheck[nonterminal2] = fset for t, pushes in self._first_plans[nonterminal2].items(): check = first_plans.get(t) @@ -104,12 +100,4 @@ class Grammar(object): ) first_plans[t] = [next_] + pushes - inverse = {} - for nonterminal_or_string, first_set in overlapcheck.items(): - for terminal in first_set: - if terminal in inverse: - raise ValueError("rule %s is ambiguous; %s is in the" - " first sets of %s as well as %s" % - (nonterminal, terminal, nonterminal_or_string, inverse[terminal])) - inverse[terminal] = nonterminal_or_string self._first_terminals[nonterminal] = totalset From 5d46c3e18b5b2b0d340956f8c51e08784be578a9 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 01:04:22 +0200 Subject: [PATCH 49/76] Trying to reduce the amount of variables used in first sets --- parso/pgen2/grammar.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 1a1a450..74f9af4 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -71,10 +71,8 @@ class Grammar(object): # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] - totalset = set() for transition, next_ in state.ilabel_to_plan.items(): # It's a string. We have finally found a possible first token. - totalset.add(transition) first_plans[transition] = [next_.next_dfa] for nonterminal2, next_ in state.nonterminal_arcs.items(): @@ -84,11 +82,9 @@ class Grammar(object): fset = self._first_terminals[nonterminal2] except KeyError: self._calculate_first_terminals(nonterminal2) - fset = self._first_terminals[nonterminal2] else: if fset is None: raise ValueError("left recursion for rule %r" % nonterminal) - totalset.update(fset) for t, pushes in self._first_plans[nonterminal2].items(): check = first_plans.get(t) @@ -100,4 +96,4 @@ class Grammar(object): ) first_plans[t] = [next_] + pushes - self._first_terminals[nonterminal] = totalset + self._first_terminals[nonterminal] = 1 From 7b7b66eb3c208dd8e2719acda6991217fc644980 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 09:48:13 +0200 Subject: [PATCH 50/76] Get rid of the first_terminal variable in the grammar generator --- parso/pgen2/grammar.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index 74f9af4..c1c4aff 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -46,13 +46,12 @@ class Grammar(object): def _make_grammar(self): # Map from grammar rule (nonterminal) name to a set of tokens. - self._first_terminals = {} self._first_plans = {} nonterminals = list(self._nonterminal_to_dfas.keys()) nonterminals.sort() for nonterminal in nonterminals: - if nonterminal not in self._first_terminals: + if nonterminal not in self._first_plans: self._calculate_first_terminals(nonterminal) # Now that we have calculated the first terminals, we are sure that @@ -66,34 +65,35 @@ class Grammar(object): def _calculate_first_terminals(self, nonterminal): dfas = self._nonterminal_to_dfas[nonterminal] - self._first_terminals[nonterminal] = None # dummy to detect left recursion - first_plans = self._first_plans[nonterminal] = {} + new_first_plans = {} + self._first_plans[nonterminal] = None # dummy to detect left recursion # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] - for transition, next_ in state.ilabel_to_plan.items(): - # It's a string. We have finally found a possible first token. - first_plans[transition] = [next_.next_dfa] - for nonterminal2, next_ in state.nonterminal_arcs.items(): # It's a nonterminal and we have either a left recursion issue # in the grammar or we have to recurse. try: - fset = self._first_terminals[nonterminal2] + first_plans2 = self._first_plans[nonterminal2] except KeyError: - self._calculate_first_terminals(nonterminal2) + first_plans2 = self._calculate_first_terminals(nonterminal2) else: - if fset is None: + if first_plans2 is None: raise ValueError("left recursion for rule %r" % nonterminal) - for t, pushes in self._first_plans[nonterminal2].items(): - check = first_plans.get(t) + for t, pushes in first_plans2.items(): + check = new_first_plans.get(t) if check is not None: raise ValueError( "Rule %s is ambiguous; %s is the" " start of the rule %s as well as %s." % (nonterminal, t, nonterminal2, check[-1].from_rule) ) - first_plans[t] = [next_] + pushes + new_first_plans[t] = [next_] + pushes - self._first_terminals[nonterminal] = 1 + for transition, next_ in state.ilabel_to_plan.items(): + # It's a string. We have finally found a possible first token. + new_first_plans[transition] = [next_.next_dfa] + + self._first_plans[nonterminal] = new_first_plans + return new_first_plans From c1675da0cbddf4e664e326a5fe94c3d752d562fe Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 09:56:49 +0200 Subject: [PATCH 51/76] Make nonterminal_to_dfas public --- parso/pgen2/grammar.py | 10 +++++----- parso/pgen2/parse.py | 2 +- parso/pgen2/pgen.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py index c1c4aff..2d7948d 100644 --- a/parso/pgen2/grammar.py +++ b/parso/pgen2/grammar.py @@ -36,8 +36,8 @@ class Grammar(object): do this (see the conv and pgen modules). """ - def __init__(self, bnf_grammar, start_nonterminal, rule_to_dfas, reserved_syntax_strings): - self._nonterminal_to_dfas = rule_to_dfas + def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings): + self.nonterminal_to_dfas = rule_to_dfas self.reserved_syntax_strings = reserved_syntax_strings self.start_nonterminal = start_nonterminal @@ -48,7 +48,7 @@ class Grammar(object): # Map from grammar rule (nonterminal) name to a set of tokens. self._first_plans = {} - nonterminals = list(self._nonterminal_to_dfas.keys()) + nonterminals = list(self.nonterminal_to_dfas.keys()) nonterminals.sort() for nonterminal in nonterminals: if nonterminal not in self._first_plans: @@ -57,14 +57,14 @@ class Grammar(object): # Now that we have calculated the first terminals, we are sure that # there is no left recursion or ambiguities. - for dfas in self._nonterminal_to_dfas.values(): + for dfas in self.nonterminal_to_dfas.values(): for dfa_state in dfas: for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): for transition, pushes in self._first_plans[nonterminal].items(): dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes) def _calculate_first_terminals(self, nonterminal): - dfas = self._nonterminal_to_dfas[nonterminal] + dfas = self.nonterminal_to_dfas[nonterminal] new_first_plans = {} self._first_plans[nonterminal] = None # dummy to detect left recursion # We only need to check the first dfa. All the following ones are not diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 646eb58..3965d2d 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -138,7 +138,7 @@ class PgenParser(object): self.convert_node = convert_node self.convert_leaf = convert_leaf - self.stack = Stack([StackNode(grammar._nonterminal_to_dfas[start_nonterminal][0])]) + self.stack = Stack([StackNode(grammar.nonterminal_to_dfas[start_nonterminal][0])]) self.error_recovery = error_recovery def parse(self, tokens): diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 3dd9f2a..d211941 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -209,7 +209,7 @@ def generate_grammar(bnf_grammar, token_namespace): ) dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa) - return Grammar(bnf_grammar, start_nonterminal, rule_to_dfas, reserved_strings) + return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) def _make_transition(token_namespace, reserved_syntax_strings, label): From 30cf491b4f7daa790ba8300bf05bf5557ff69359 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 10:08:44 +0200 Subject: [PATCH 52/76] Move the Grammar to the pgen module --- parso/pgen2/grammar.py | 99 ------------------------------------------ parso/pgen2/pgen.py | 84 ++++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 100 deletions(-) delete mode 100644 parso/pgen2/grammar.py diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py deleted file mode 100644 index 2d7948d..0000000 --- a/parso/pgen2/grammar.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright David Halter and Contributors -# Modifications are dual-licensed: MIT and PSF. - -"""This module defines the data structures used to represent a grammar. - -These are a bit arcane because they are derived from the data -structures used by Python's 'pgen' parser generator. - -There's also a table here mapping operators to their names in the -token module; the Python tokenize module reports all operators as the -fallback token code OP, but the parser needs the actual token code. - -""" - - -class DFAPlan(object): - def __init__(self, next_dfa, dfa_pushes=[]): - self.next_dfa = next_dfa - self.dfa_pushes = dfa_pushes - - def __repr__(self): - return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) - - -class Grammar(object): - """Pgen parsing tables conversion class. - - Once initialized, this class supplies the grammar tables for the - parsing engine implemented by parse.py. The parsing engine - accesses the instance variables directly. The class here does not - provide initialization of the tables; several subclasses exist to - do this (see the conv and pgen modules). - """ - - def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings): - self.nonterminal_to_dfas = rule_to_dfas - - self.reserved_syntax_strings = reserved_syntax_strings - self.start_nonterminal = start_nonterminal - - self._make_grammar() - - def _make_grammar(self): - # Map from grammar rule (nonterminal) name to a set of tokens. - self._first_plans = {} - - nonterminals = list(self.nonterminal_to_dfas.keys()) - nonterminals.sort() - for nonterminal in nonterminals: - if nonterminal not in self._first_plans: - self._calculate_first_terminals(nonterminal) - - # Now that we have calculated the first terminals, we are sure that - # there is no left recursion or ambiguities. - - for dfas in self.nonterminal_to_dfas.values(): - for dfa_state in dfas: - for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): - for transition, pushes in self._first_plans[nonterminal].items(): - dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes) - - def _calculate_first_terminals(self, nonterminal): - dfas = self.nonterminal_to_dfas[nonterminal] - new_first_plans = {} - self._first_plans[nonterminal] = None # dummy to detect left recursion - # We only need to check the first dfa. All the following ones are not - # interesting to find first terminals. - state = dfas[0] - for nonterminal2, next_ in state.nonterminal_arcs.items(): - # It's a nonterminal and we have either a left recursion issue - # in the grammar or we have to recurse. - try: - first_plans2 = self._first_plans[nonterminal2] - except KeyError: - first_plans2 = self._calculate_first_terminals(nonterminal2) - else: - if first_plans2 is None: - raise ValueError("left recursion for rule %r" % nonterminal) - - for t, pushes in first_plans2.items(): - check = new_first_plans.get(t) - if check is not None: - raise ValueError( - "Rule %s is ambiguous; %s is the" - " start of the rule %s as well as %s." - % (nonterminal, t, nonterminal2, check[-1].from_rule) - ) - new_first_plans[t] = [next_] + pushes - - for transition, next_ in state.ilabel_to_plan.items(): - # It's a string. We have finally found a possible first token. - new_first_plans[transition] = [next_.next_dfa] - - self._first_plans[nonterminal] = new_first_plans - return new_first_plans diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index d211941..9a7d239 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -6,6 +6,8 @@ # Modifications are dual-licensed: MIT and PSF. """ +This module defines the data structures used to represent a grammar. + Specifying grammars in pgen is possible with this grammar:: grammar: (NEWLINE | rule)* ENDMARKER @@ -20,10 +22,90 @@ This grammar is self-referencing. from ast import literal_eval -from parso.pgen2.grammar import Grammar, DFAPlan from parso.pgen2.grammar_parser import GrammarParser, NFAState +class DFAPlan(object): + def __init__(self, next_dfa, dfa_pushes=[]): + self.next_dfa = next_dfa + self.dfa_pushes = dfa_pushes + + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) + + +class Grammar(object): + """Pgen parsing tables conversion class. + + Once initialized, this class supplies the grammar tables for the + parsing engine implemented by parse.py. The parsing engine + accesses the instance variables directly. The class here does not + provide initialization of the tables; several subclasses exist to + do this (see the conv and pgen modules). + """ + + def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings): + self.nonterminal_to_dfas = rule_to_dfas # Dict[str, List[DFAState]] + self.reserved_syntax_strings = reserved_syntax_strings + self.start_nonterminal = start_nonterminal + + self._make_grammar() + + def _make_grammar(self): + # Map from grammar rule (nonterminal) name to a set of tokens. + self._first_plans = {} + + nonterminals = list(self.nonterminal_to_dfas.keys()) + nonterminals.sort() + for nonterminal in nonterminals: + if nonterminal not in self._first_plans: + self._calculate_first_terminals(nonterminal) + + # Now that we have calculated the first terminals, we are sure that + # there is no left recursion or ambiguities. + + for dfas in self.nonterminal_to_dfas.values(): + for dfa_state in dfas: + for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): + for transition, pushes in self._first_plans[nonterminal].items(): + dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes) + + def _calculate_first_terminals(self, nonterminal): + dfas = self.nonterminal_to_dfas[nonterminal] + new_first_plans = {} + self._first_plans[nonterminal] = None # dummy to detect left recursion + # We only need to check the first dfa. All the following ones are not + # interesting to find first terminals. + state = dfas[0] + for nonterminal2, next_ in state.nonterminal_arcs.items(): + # It's a nonterminal and we have either a left recursion issue + # in the grammar or we have to recurse. + try: + first_plans2 = self._first_plans[nonterminal2] + except KeyError: + first_plans2 = self._calculate_first_terminals(nonterminal2) + else: + if first_plans2 is None: + raise ValueError("left recursion for rule %r" % nonterminal) + + for t, pushes in first_plans2.items(): + check = new_first_plans.get(t) + if check is not None: + raise ValueError( + "Rule %s is ambiguous; %s is the" + " start of the rule %s as well as %s." + % (nonterminal, t, nonterminal2, check[-1].from_rule) + ) + new_first_plans[t] = [next_] + pushes + + for transition, next_ in state.ilabel_to_plan.items(): + # It's a string. We have finally found a possible first token. + new_first_plans[transition] = [next_.next_dfa] + + self._first_plans[nonterminal] = new_first_plans + return new_first_plans + + class DFAState(object): def __init__(self, from_rule, nfa_set, final): assert isinstance(nfa_set, set) From 4cf198285adbfea1a152712ce7bc0e516a8adea2 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 10:15:31 +0200 Subject: [PATCH 53/76] Move things out of the grammar class --- parso/pgen2/pgen.py | 128 ++++++++++++++++++++++---------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 9a7d239..e64577e 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -25,18 +25,8 @@ from ast import literal_eval from parso.pgen2.grammar_parser import GrammarParser, NFAState -class DFAPlan(object): - def __init__(self, next_dfa, dfa_pushes=[]): - self.next_dfa = next_dfa - self.dfa_pushes = dfa_pushes - - def __repr__(self): - return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) - - class Grammar(object): - """Pgen parsing tables conversion class. - + """ Once initialized, this class supplies the grammar tables for the parsing engine implemented by parse.py. The parsing engine accesses the instance variables directly. The class here does not @@ -49,61 +39,14 @@ class Grammar(object): self.reserved_syntax_strings = reserved_syntax_strings self.start_nonterminal = start_nonterminal - self._make_grammar() - def _make_grammar(self): - # Map from grammar rule (nonterminal) name to a set of tokens. - self._first_plans = {} +class DFAPlan(object): + def __init__(self, next_dfa, dfa_pushes=[]): + self.next_dfa = next_dfa + self.dfa_pushes = dfa_pushes - nonterminals = list(self.nonterminal_to_dfas.keys()) - nonterminals.sort() - for nonterminal in nonterminals: - if nonterminal not in self._first_plans: - self._calculate_first_terminals(nonterminal) - - # Now that we have calculated the first terminals, we are sure that - # there is no left recursion or ambiguities. - - for dfas in self.nonterminal_to_dfas.values(): - for dfa_state in dfas: - for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): - for transition, pushes in self._first_plans[nonterminal].items(): - dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes) - - def _calculate_first_terminals(self, nonterminal): - dfas = self.nonterminal_to_dfas[nonterminal] - new_first_plans = {} - self._first_plans[nonterminal] = None # dummy to detect left recursion - # We only need to check the first dfa. All the following ones are not - # interesting to find first terminals. - state = dfas[0] - for nonterminal2, next_ in state.nonterminal_arcs.items(): - # It's a nonterminal and we have either a left recursion issue - # in the grammar or we have to recurse. - try: - first_plans2 = self._first_plans[nonterminal2] - except KeyError: - first_plans2 = self._calculate_first_terminals(nonterminal2) - else: - if first_plans2 is None: - raise ValueError("left recursion for rule %r" % nonterminal) - - for t, pushes in first_plans2.items(): - check = new_first_plans.get(t) - if check is not None: - raise ValueError( - "Rule %s is ambiguous; %s is the" - " start of the rule %s as well as %s." - % (nonterminal, t, nonterminal2, check[-1].from_rule) - ) - new_first_plans[t] = [next_] + pushes - - for transition, next_ in state.ilabel_to_plan.items(): - # It's a string. We have finally found a possible first token. - new_first_plans[transition] = [next_.next_dfa] - - self._first_plans[nonterminal] = new_first_plans - return new_first_plans + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.next_dfa, self.dfa_pushes) class DFAState(object): @@ -291,6 +234,7 @@ def generate_grammar(bnf_grammar, token_namespace): ) dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa) + _calculate_tree_traversal(rule_to_dfas) return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) @@ -309,3 +253,59 @@ def _make_transition(token_namespace, reserved_syntax_strings, label): except KeyError: r = reserved_syntax_strings[value] = ReservedString(value) return r + + +def _calculate_tree_traversal(nonterminal_to_dfas): + # Map from grammar rule (nonterminal) name to a set of tokens. + first_plans = {} + + nonterminals = list(nonterminal_to_dfas.keys()) + nonterminals.sort() + for nonterminal in nonterminals: + if nonterminal not in first_plans: + _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal) + + # Now that we have calculated the first terminals, we are sure that + # there is no left recursion or ambiguities. + + for dfas in nonterminal_to_dfas.values(): + for dfa_state in dfas: + for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): + for transition, pushes in first_plans[nonterminal].items(): + dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes) + + +def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): + dfas = nonterminal_to_dfas[nonterminal] + new_first_plans = {} + first_plans[nonterminal] = None # dummy to detect left recursion + # We only need to check the first dfa. All the following ones are not + # interesting to find first terminals. + state = dfas[0] + for nonterminal2, next_ in state.nonterminal_arcs.items(): + # It's a nonterminal and we have either a left recursion issue + # in the grammar or we have to recurse. + try: + first_plans2 = first_plans[nonterminal2] + except KeyError: + first_plans2 = _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal2) + else: + if first_plans2 is None: + raise ValueError("left recursion for rule %r" % nonterminal) + + for t, pushes in first_plans2.items(): + check = new_first_plans.get(t) + if check is not None: + raise ValueError( + "Rule %s is ambiguous; %s is the" + " start of the rule %s as well as %s." + % (nonterminal, t, nonterminal2, check[-1].from_rule) + ) + new_first_plans[t] = [next_] + pushes + + for transition, next_ in state.ilabel_to_plan.items(): + # It's a string. We have finally found a possible first token. + new_first_plans[transition] = [next_.next_dfa] + + first_plans[nonterminal] = new_first_plans + return new_first_plans From e20f2069baf153987f4641ebb5778b6d1cc4ab22 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 10:20:05 +0200 Subject: [PATCH 54/76] Move the grammar to a fitting file. --- parso/grammar.py | 2 +- parso/pgen2/__init__.py | 2 ++ parso/pgen2/{pgen.py => generator.py} | 0 test/test_pgen2.py | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) rename parso/pgen2/{pgen.py => generator.py} (100%) diff --git a/parso/grammar.py b/parso/grammar.py index 981a0fc..e77d83c 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -2,7 +2,7 @@ import hashlib import os from parso._compatibility import FileNotFoundError, is_pypy -from parso.pgen2.pgen import generate_grammar +from parso.pgen2 import generate_grammar from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string from parso.python.diff import DiffParser from parso.python.tokenize import tokenize_lines, tokenize diff --git a/parso/pgen2/__init__.py b/parso/pgen2/__init__.py index 5e4cfc0..d4d9dcd 100644 --- a/parso/pgen2/__init__.py +++ b/parso/pgen2/__init__.py @@ -6,3 +6,5 @@ # Licensed to PSF under a Contributor Agreement. # Copyright 2014 David Halter and Contributors # Modifications are dual-licensed: MIT and PSF. + +from parso.pgen2.generator import generate_grammar diff --git a/parso/pgen2/pgen.py b/parso/pgen2/generator.py similarity index 100% rename from parso/pgen2/pgen.py rename to parso/pgen2/generator.py diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 4dba172..6ea96a4 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -12,7 +12,7 @@ import pytest from parso import load_grammar from parso import ParserSyntaxError -from parso.pgen2.pgen import generate_grammar +from parso.pgen2 import generate_grammar from parso.python import tokenize From 91d864b23d8a599a92b247a8f0215f8b027c954f Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 10:22:38 +0200 Subject: [PATCH 55/76] Make it clearer which things are public in pgen --- parso/parser.py | 2 +- parso/pgen2/__init__.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/parso/parser.py b/parso/parser.py index 6b906d2..fb37d6f 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -16,7 +16,7 @@ complexity of the ``Parser`` (there's another parser sitting inside ``Statement``, which produces ``Array`` and ``Call``). """ from parso import tree -from parso.pgen2.parse import PgenParser +from parso.pgen2 import PgenParser class ParserSyntaxError(Exception): diff --git a/parso/pgen2/__init__.py b/parso/pgen2/__init__.py index d4d9dcd..921a1ce 100644 --- a/parso/pgen2/__init__.py +++ b/parso/pgen2/__init__.py @@ -8,3 +8,4 @@ # Modifications are dual-licensed: MIT and PSF. from parso.pgen2.generator import generate_grammar +from parso.pgen2.parse import PgenParser From f66e47c5407c197cc2ea11c930cbc190814e3674 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 22:53:02 +0200 Subject: [PATCH 56/76] Check better for more transitions --- parso/pgen2/generator.py | 8 ++++---- test/test_pgen2.py | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/parso/pgen2/generator.py b/parso/pgen2/generator.py index e64577e..d5355d3 100644 --- a/parso/pgen2/generator.py +++ b/parso/pgen2/generator.py @@ -282,6 +282,10 @@ def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] + for transition, next_ in state.ilabel_to_plan.items(): + # It's a string. We have finally found a possible first token. + new_first_plans[transition] = [next_.next_dfa] + for nonterminal2, next_ in state.nonterminal_arcs.items(): # It's a nonterminal and we have either a left recursion issue # in the grammar or we have to recurse. @@ -303,9 +307,5 @@ def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): ) new_first_plans[t] = [next_] + pushes - for transition, next_ in state.ilabel_to_plan.items(): - # It's a string. We have finally found a possible first token. - new_first_plans[transition] = [next_.next_dfa] - first_plans[nonterminal] = new_first_plans return new_first_plans diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 6ea96a4..fe22d5b 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -285,3 +285,6 @@ def test_ambiguities(): with pytest.raises(ValueError, match='ambiguous'): generate_grammar('''foo: bar | baz\nbar: 'x'\nbaz: "x"\n''', tokenize.PythonTokenTypes) + + with pytest.raises(ValueError, match='ambiguous'): + generate_grammar('''foo: bar | 'x'\nbar: 'x'\n''', tokenize.PythonTokenTypes) From e4efebc9f3dfd861a3ccfaaa6e18716ac1aab74c Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Tue, 26 Jun 2018 23:05:04 +0200 Subject: [PATCH 57/76] s/ilabel/transition/g --- parso/pgen2/generator.py | 8 ++++---- parso/pgen2/parse.py | 6 +++--- parso/python/parser.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/parso/pgen2/generator.py b/parso/pgen2/generator.py index d5355d3..2704336 100644 --- a/parso/pgen2/generator.py +++ b/parso/pgen2/generator.py @@ -58,7 +58,7 @@ class DFAState(object): self.nfa_set = nfa_set self.is_final = final in nfa_set self.arcs = {} # map from terminals/nonterminals to DFAState - self.ilabel_to_plan = {} + self.transition_to_plan = {} self.nonterminal_arcs = {} def add_arc(self, next_, label): @@ -232,7 +232,7 @@ def generate_grammar(bnf_grammar, token_namespace): reserved_strings, terminal_or_nonterminal ) - dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa) + dfa_state.transition_to_plan[transition] = DFAPlan(next_dfa) _calculate_tree_traversal(rule_to_dfas) return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) @@ -272,7 +272,7 @@ def _calculate_tree_traversal(nonterminal_to_dfas): for dfa_state in dfas: for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): for transition, pushes in first_plans[nonterminal].items(): - dfa_state.ilabel_to_plan[transition] = DFAPlan(next_dfa, pushes) + dfa_state.transition_to_plan[transition] = DFAPlan(next_dfa, pushes) def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): @@ -282,7 +282,7 @@ def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] - for transition, next_ in state.ilabel_to_plan.items(): + for transition, next_ in state.transition_to_plan.items(): # It's a string. We have finally found a possible first token. new_first_plans[transition] = [next_.next_dfa] diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index 3965d2d..fd85714 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -65,7 +65,7 @@ class StackNode(object): return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes) -def token_to_ilabel(grammar, type_, value): +def _token_to_transition(grammar, type_, value): # Map from token to label if type_.contains_syntax: # Check for reserved words (keywords) @@ -157,13 +157,13 @@ class PgenParser(object): def add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" - ilabel = token_to_ilabel(self.grammar, type_, value) + ilabel = _token_to_transition(self.grammar, type_, value) stack = self.stack grammar = self.grammar while True: try: - plan = stack[-1].dfa.ilabel_to_plan[ilabel] + plan = stack[-1].dfa.transition_to_plan[ilabel] break except KeyError: if stack[-1].dfa.is_final: diff --git a/parso/python/parser.py b/parso/python/parser.py index a11cf70..be13490 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -163,7 +163,7 @@ class Parser(BaseParser): # error recovery. if stack[-1].dfa.from_rule == 'simple_stmt': try: - plan = stack[-1].dfa.ilabel_to_plan[PythonTokenTypes.NEWLINE] + plan = stack[-1].dfa.transition_to_plan[PythonTokenTypes.NEWLINE] except KeyError: pass else: From 8407894b25ecbc05e970915831ad1a3a53c0d623 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 27 Jun 2018 00:15:00 +0200 Subject: [PATCH 58/76] Fix python 2 tests --- test/test_error_recovery.py | 11 +++++++---- test/test_tokenize.py | 3 ++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/test/test_error_recovery.py b/test/test_error_recovery.py index 0d90907..af6137b 100644 --- a/test/test_error_recovery.py +++ b/test/test_error_recovery.py @@ -11,16 +11,19 @@ def test_with_stmt(): assert module.children[2].type == 'name' -def test_one_line_function(): - module = parse('def x(): f.') +def test_one_line_function(each_version): + module = parse('def x(): f.', version=each_version) assert module.children[0].type == 'funcdef' def_, name, parameters, colon, f = module.children[0].children assert f.type == 'error_node' - module = parse('def x(a:') + module = parse('def x(a:', version=each_version) func = module.children[0] assert func.type == 'error_node' - assert func.children[-1] == ':' + if each_version.startswith('2'): + assert func.children[-1].value == 'a' + else: + assert func.children[-1] == ':' def test_if_stmt(): diff --git a/test/test_tokenize.py b/test/test_tokenize.py index 6593ff8..bab6439 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -19,6 +19,7 @@ STRING = PythonTokenTypes.STRING INDENT = PythonTokenTypes.INDENT DEDENT = PythonTokenTypes.DEDENT ERRORTOKEN = PythonTokenTypes.ERRORTOKEN +OP = PythonTokenTypes.OP ENDMARKER = PythonTokenTypes.ENDMARKER ERROR_DEDENT = PythonTokenTypes.ERROR_DEDENT FSTRING_START = PythonTokenTypes.FSTRING_START @@ -137,7 +138,7 @@ def test_identifier_contains_unicode(): else: # Unicode tokens in Python 2 seem to be identified as operators. # They will be ignored in the parser, that's ok. - assert unicode_token[0] == ERRORTOKEN + assert unicode_token[0] == OP def test_quoted_strings(): From b14f5183066ac5ca1eece5ae4a7b4f3b06a5b3b7 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 27 Jun 2018 00:18:27 +0200 Subject: [PATCH 59/76] Rename the last usage of ilabel to transition --- parso/pgen2/parse.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py index fd85714..2b679b5 100644 --- a/parso/pgen2/parse.py +++ b/parso/pgen2/parse.py @@ -157,13 +157,13 @@ class PgenParser(object): def add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" - ilabel = _token_to_transition(self.grammar, type_, value) + transition = _token_to_transition(self.grammar, type_, value) stack = self.stack grammar = self.grammar while True: try: - plan = stack[-1].dfa.transition_to_plan[ilabel] + plan = stack[-1].dfa.transition_to_plan[transition] break except KeyError: if stack[-1].dfa.is_final: From a9e40eb57843b19832515290ec2398595678cb7e Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 27 Jun 2018 22:21:17 +0200 Subject: [PATCH 60/76] Simplify error recovery for suites --- parso/python/parser.py | 18 +++++++----------- test/test_error_recovery.py | 29 ++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index be13490..7d4592a 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -182,19 +182,16 @@ class Parser(BaseParser): def current_suite(stack): # For now just discard everything that is not a suite or # file_input, if we detect an error. - one_line_suite = False for until_index, stack_node in reversed(list(enumerate(stack))): # `suite` can sometimes be only simple_stmt, not stmt. - if one_line_suite: - break - elif stack_node.nonterminal == 'file_input': + if stack_node.nonterminal == 'file_input': break elif stack_node.nonterminal == 'suite': - if len(stack_node.nodes) > 1: + # In the case where we just have a newline we don't want to + # do error recovery here. In all other cases, we want to do + # error recovery. + if len(stack_node.nodes) != 1: break - elif not stack_node.nodes: - one_line_suite = True - # `suite` without an indent are error nodes. return until_index until_index = current_suite(stack) @@ -221,9 +218,8 @@ class Parser(BaseParser): pass def _stack_removal(self, stack, start_index): - all_nodes = [] - for stack_node in stack[start_index:]: - all_nodes += stack_node.nodes + all_nodes = [node for stack_node in stack[start_index:] for node in stack_node.nodes] + if all_nodes: stack[start_index - 1].nodes.append(tree.PythonErrorNode(all_nodes)) diff --git a/test/test_error_recovery.py b/test/test_error_recovery.py index af6137b..f8dcd94 100644 --- a/test/test_error_recovery.py +++ b/test/test_error_recovery.py @@ -26,13 +26,36 @@ def test_one_line_function(each_version): assert func.children[-1] == ':' +def test_if_else(): + module = parse('if x:\n f.\nelse:\n g(') + if_stmt = module.children[0] + if_, test, colon, suite1, else_, colon, suite2 = if_stmt.children + f = suite1.children[1] + assert f.type == 'error_node' + assert f.children[0].value == 'f' + assert f.children[1].value == '.' + g = suite2.children[1] + assert g.children[0].value == 'g' + assert g.children[1].value == '(' + + def test_if_stmt(): - module = parse('if x: f.')# \nelse: g( + module = parse('if x: f.\nelse: g(') if_stmt = module.children[0] assert if_stmt.type == 'if_stmt' if_, test, colon, f = if_stmt.children assert f.type == 'error_node' assert f.children[0].value == 'f' assert f.children[1].value == '.' - #assert g.children[0].value == 'g' - #assert g.children[1].value == '(' + + assert module.children[1].type == 'newline' + assert module.children[1].value == '\n' + assert module.children[2].type == 'error_leaf' + assert module.children[2].value == 'else' + assert module.children[3].type == 'error_leaf' + assert module.children[3].value == ':' + + in_else_stmt = module.children[4] + assert in_else_stmt.type == 'error_node' + assert in_else_stmt.children[0].value == 'g' + assert in_else_stmt.children[1].value == '(' From edce279dee2848e9c35959221e43d5f0e34c1947 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 27 Jun 2018 23:19:57 +0200 Subject: [PATCH 61/76] Remove a function that was no longer used --- parso/python/parser.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index 7d4592a..a18309c 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -148,15 +148,6 @@ class Parser(BaseParser): if self._start_nonterminal == 'file_input' and \ (typ == PythonTokenTypes.ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value): - def reduce_stack(states, newstate): - # reduce - state = newstate - while states[state] == [(0, state)]: - self.pgen_parser._pop() - - dfa, state, (type_, nodes) = stack[-1] - states, first = dfa - # In Python statements need to end with a newline. But since it's # possible (and valid in Python ) that there's no newline at the # end of a file, we have to recover even if the user doesn't want From f7d3d4e82f7d362bfad843e7a4f7b0cff14def38 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Wed, 27 Jun 2018 23:45:04 +0200 Subject: [PATCH 62/76] Merge the PgenParser and our own parser --- parso/parser.py | 150 ++++++++++++++++++++++++++++-- parso/pgen2/__init__.py | 1 - parso/pgen2/parse.py | 201 ---------------------------------------- parso/python/diff.py | 2 +- 4 files changed, 142 insertions(+), 212 deletions(-) delete mode 100644 parso/pgen2/parse.py diff --git a/parso/parser.py b/parso/parser.py index fb37d6f..d9f0029 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -1,3 +1,11 @@ +# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +# Modifications: +# Copyright David Halter and Contributors +# Modifications are dual-licensed: MIT and PSF. +# 99% of the code is different from pgen2, now. + """ The ``Parser`` tries to convert the available Python code in an easy to read format, something like an abstract syntax tree. The classes who represent this @@ -16,7 +24,6 @@ complexity of the ``Parser`` (there's another parser sitting inside ``Statement``, which produces ``Array`` and ``Call``). """ from parso import tree -from parso.pgen2 import PgenParser class ParserSyntaxError(Exception): @@ -30,7 +37,81 @@ class ParserSyntaxError(Exception): self.error_leaf = error_leaf +class InternalParseError(Exception): + """ + Exception to signal the parser is stuck and error recovery didn't help. + Basically this shouldn't happen. It's a sign that something is really + wrong. + """ + + def __init__(self, msg, type_, value, start_pos): + Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % + (msg, type_.name, value, start_pos)) + self.msg = msg + self.type = type + self.value = value + self.start_pos = start_pos + + +class Stack(list): + def get_tos_nodes(self): + tos = self[-1] + return tos[2][1] + + def get_tos_first_tokens(self, grammar): + tos = self[-1] + inv_tokens = dict((v, k) for k, v in grammar.tokens.items()) + inv_keywords = dict((v, k) for k, v in grammar.keywords.items()) + dfa, state, nodes = tos + + def check(): + for first in dfa[1]: + try: + yield inv_keywords[first] + except KeyError: + yield tokenize.tok_name[inv_tokens[first]] + + return sorted(check()) + + +class StackNode(object): + def __init__(self, dfa): + self.dfa = dfa + self.nodes = [] + + @property + def nonterminal(self): + return self.dfa.from_rule + + def __repr__(self): + return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes) + + +def _token_to_transition(grammar, type_, value): + # Map from token to label + if type_.contains_syntax: + # Check for reserved words (keywords) + try: + return grammar.reserved_syntax_strings[value] + except KeyError: + pass + + return type_ + + + class BaseParser(object): + """Parser engine. + + A Parser instance contains state pertaining to the current token + sequence, and should not be used concurrently by different threads + to parse separate token sequences. + + See python/tokenize.py for how to get input tokens by a string. + + When a syntax error occurs, error_recovery() is called. + """ + node_map = {} default_node = tree.Node @@ -44,15 +125,21 @@ class BaseParser(object): self._error_recovery = error_recovery def parse(self, tokens): - self.pgen_parser = PgenParser( - self._pgen_grammar, self.convert_node, self.convert_leaf, - self.error_recovery, self._start_nonterminal - ) + first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0] + self.stack = Stack([StackNode(first_dfa)]) - node = self.pgen_parser.parse(tokens) - # The stack is empty now, we don't need it anymore. - del self.pgen_parser - return node + for type_, value, start_pos, prefix in tokens: + self.add_token(type_, value, start_pos, prefix) + + while self.stack and self.stack[-1].dfa.is_final: + self._pop() + + if self.stack: + # We never broke out -- EOF is too soon -- Unfinished statement. + # However, the error recovery might have added the token again, if + # the stack is empty, we're fine. + raise InternalParseError("incomplete input", type_, value, start_pos) + return self.rootnode def error_recovery(self, pgen_grammar, stack, typ, value, start_pos, prefix, add_token_callback): @@ -73,3 +160,48 @@ class BaseParser(object): return self.leaf_map[type_](value, start_pos, prefix) except KeyError: return self.default_leaf(value, start_pos, prefix) + + def add_token(self, type_, value, start_pos, prefix): + """Add a token; return True if this is the end of the program.""" + grammar = self._pgen_grammar + stack = self.stack + transition = _token_to_transition(grammar, type_, value) + + while True: + try: + plan = stack[-1].dfa.transition_to_plan[transition] + break + except KeyError: + if stack[-1].dfa.is_final: + self._pop() + else: + self.error_recovery(grammar, stack, type_, + value, start_pos, prefix, self.add_token) + return + except IndexError: + raise InternalParseError("too much input", type_, value, start_pos) + + stack[-1].dfa = plan.next_dfa + + for push in plan.dfa_pushes: + stack.append(StackNode(push)) + + leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) + stack[-1].nodes.append(leaf) + + def _pop(self): + tos = self.stack.pop() + # If there's exactly one child, return that child instead of + # creating a new node. We still create expr_stmt and + # file_input though, because a lot of Jedi depends on its + # logic. + if len(tos.nodes) == 1: + new_node = tos.nodes[0] + else: + new_node = self.convert_node(self._pgen_grammar, tos.dfa.from_rule, tos.nodes) + + try: + self.stack[-1].nodes.append(new_node) + except IndexError: + # Stack is empty, set the rootnode. + self.rootnode = new_node diff --git a/parso/pgen2/__init__.py b/parso/pgen2/__init__.py index 921a1ce..d4d9dcd 100644 --- a/parso/pgen2/__init__.py +++ b/parso/pgen2/__init__.py @@ -8,4 +8,3 @@ # Modifications are dual-licensed: MIT and PSF. from parso.pgen2.generator import generate_grammar -from parso.pgen2.parse import PgenParser diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py deleted file mode 100644 index 2b679b5..0000000 --- a/parso/pgen2/parse.py +++ /dev/null @@ -1,201 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright David Halter and Contributors -# Modifications are dual-licensed: MIT and PSF. - -""" -Parser engine for the grammar tables generated by pgen. - -The grammar table must be loaded first. - -See Parser/parser.c in the Python distribution for additional info on -how this parsing engine works. -""" - - -class InternalParseError(Exception): - """ - Exception to signal the parser is stuck and error recovery didn't help. - Basically this shouldn't happen. It's a sign that something is really - wrong. - """ - - def __init__(self, msg, type_, value, start_pos): - Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" % - (msg, type_.name, value, start_pos)) - self.msg = msg - self.type = type - self.value = value - self.start_pos = start_pos - - -class Stack(list): - def get_tos_nodes(self): - tos = self[-1] - return tos[2][1] - - def get_tos_first_tokens(self, grammar): - tos = self[-1] - inv_tokens = dict((v, k) for k, v in grammar.tokens.items()) - inv_keywords = dict((v, k) for k, v in grammar.keywords.items()) - dfa, state, nodes = tos - - def check(): - for first in dfa[1]: - try: - yield inv_keywords[first] - except KeyError: - yield tokenize.tok_name[inv_tokens[first]] - - return sorted(check()) - - -class StackNode(object): - def __init__(self, dfa): - self.dfa = dfa - self.nodes = [] - - @property - def nonterminal(self): - return self.dfa.from_rule - - def __repr__(self): - return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes) - - -def _token_to_transition(grammar, type_, value): - # Map from token to label - if type_.contains_syntax: - # Check for reserved words (keywords) - try: - return grammar.reserved_syntax_strings[value] - except KeyError: - pass - - return type_ - - -class PgenParser(object): - """Parser engine. - - The proper usage sequence is: - - p = Parser(grammar, [converter]) # create instance - p.setup([start]) # prepare for parsing - : - if p.add_token(...): # parse a token - break - root = p.rootnode # root of abstract syntax tree - - A Parser instance may be reused by calling setup() repeatedly. - - A Parser instance contains state pertaining to the current token - sequence, and should not be used concurrently by different threads - to parse separate token sequences. - - See driver.py for how to get input tokens by tokenizing a file or - string. - - Parsing is complete when add_token() returns True; the root of the - abstract syntax tree can then be retrieved from the rootnode - instance variable. When a syntax error occurs, error_recovery() - is called. There is no error recovery; the parser cannot be used - after a syntax error was reported (but it can be reinitialized by - calling setup()). - - """ - - def __init__(self, grammar, convert_node, convert_leaf, error_recovery, - start_nonterminal): - """Constructor. - - The grammar argument is a grammar.Grammar instance; see the - grammar module for more information. - - The parser is not ready yet for parsing; you must call the - setup() method to get it started. - - The optional convert argument is a function mapping concrete - syntax tree nodes to abstract syntax tree nodes. If not - given, no conversion is done and the syntax tree produced is - the concrete syntax tree. If given, it must be a function of - two arguments, the first being the grammar (a grammar.Grammar - instance), and the second being the concrete syntax tree node - to be converted. The syntax tree is converted from the bottom - up. - - A concrete syntax tree node is a (type, nodes) tuple, where - type is the node type (a token or nonterminal number) and nodes - is a list of children for nonterminals, and None for tokens. - - An abstract syntax tree node may be anything; this is entirely - up to the converter function. - - """ - self.grammar = grammar - self.convert_node = convert_node - self.convert_leaf = convert_leaf - - self.stack = Stack([StackNode(grammar.nonterminal_to_dfas[start_nonterminal][0])]) - self.error_recovery = error_recovery - - def parse(self, tokens): - for type_, value, start_pos, prefix in tokens: - self.add_token(type_, value, start_pos, prefix) - - while self.stack and self.stack[-1].dfa.is_final: - self._pop() - - if self.stack: - # We never broke out -- EOF is too soon -- Unfinished statement. - # However, the error recovery might have added the token again, if - # the stack is empty, we're fine. - raise InternalParseError("incomplete input", type_, value, start_pos) - return self.rootnode - - def add_token(self, type_, value, start_pos, prefix): - """Add a token; return True if this is the end of the program.""" - transition = _token_to_transition(self.grammar, type_, value) - stack = self.stack - grammar = self.grammar - - while True: - try: - plan = stack[-1].dfa.transition_to_plan[transition] - break - except KeyError: - if stack[-1].dfa.is_final: - self._pop() - else: - self.error_recovery(grammar, stack, type_, - value, start_pos, prefix, self.add_token) - return - except IndexError: - raise InternalParseError("too much input", type_, value, start_pos) - - stack[-1].dfa = plan.next_dfa - - for push in plan.dfa_pushes: - stack.append(StackNode(push)) - - leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) - stack[-1].nodes.append(leaf) - - def _pop(self): - tos = self.stack.pop() - # If there's exactly one child, return that child instead of - # creating a new node. We still create expr_stmt and - # file_input though, because a lot of Jedi depends on its - # logic. - if len(tos.nodes) == 1: - new_node = tos.nodes[0] - else: - new_node = self.convert_node(self.grammar, tos.dfa.from_rule, tos.nodes) - - try: - self.stack[-1].nodes.append(new_node) - except IndexError: - # Stack is empty, set the rootnode. - self.rootnode = new_node diff --git a/parso/python/diff.py b/parso/python/diff.py index 3b7eee5..2197548 100644 --- a/parso/python/diff.py +++ b/parso/python/diff.py @@ -287,7 +287,7 @@ class DiffParser(object): omitted_first_indent = False indents = [] tokens = self._tokenizer(lines, (1, 0)) - stack = self._active_parser.pgen_parser.stack + stack = self._active_parser.stack for typ, string, start_pos, prefix in tokens: start_pos = start_pos[0] + line_offset, start_pos[1] if typ == PythonTokenTypes.INDENT: From 692436ba129d0e23cb7ff4a7f255451b96c168cb Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 00:01:47 +0200 Subject: [PATCH 63/76] Don't use grammar as an argument anymore, because it's already there --- parso/parser.py | 19 ++++++++----------- parso/python/parser.py | 34 +++++++++++++++------------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/parso/parser.py b/parso/parser.py index d9f0029..3eb57ab 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -99,7 +99,6 @@ def _token_to_transition(grammar, type_, value): return type_ - class BaseParser(object): """Parser engine. @@ -129,7 +128,7 @@ class BaseParser(object): self.stack = Stack([StackNode(first_dfa)]) for type_, value, start_pos, prefix in tokens: - self.add_token(type_, value, start_pos, prefix) + self._add_token(type_, value, start_pos, prefix) while self.stack and self.stack[-1].dfa.is_final: self._pop() @@ -141,27 +140,26 @@ class BaseParser(object): raise InternalParseError("incomplete input", type_, value, start_pos) return self.rootnode - def error_recovery(self, pgen_grammar, stack, typ, value, start_pos, prefix, - add_token_callback): + def error_recovery(self, typ, value, start_pos, prefix): if self._error_recovery: raise NotImplementedError("Error Recovery is not implemented") else: error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix) raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) - def convert_node(self, pgen_grammar, nonterminal, children): + def convert_node(self, nonterminal, children): try: return self.node_map[nonterminal](children) except KeyError: return self.default_node(nonterminal, children) - def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos): + def convert_leaf(self, type_, value, prefix, start_pos): try: return self.leaf_map[type_](value, start_pos, prefix) except KeyError: return self.default_leaf(value, start_pos, prefix) - def add_token(self, type_, value, start_pos, prefix): + def _add_token(self, type_, value, start_pos, prefix): """Add a token; return True if this is the end of the program.""" grammar = self._pgen_grammar stack = self.stack @@ -175,8 +173,7 @@ class BaseParser(object): if stack[-1].dfa.is_final: self._pop() else: - self.error_recovery(grammar, stack, type_, - value, start_pos, prefix, self.add_token) + self.error_recovery(type_, value, start_pos, prefix) return except IndexError: raise InternalParseError("too much input", type_, value, start_pos) @@ -186,7 +183,7 @@ class BaseParser(object): for push in plan.dfa_pushes: stack.append(StackNode(push)) - leaf = self.convert_leaf(grammar, type_, value, prefix, start_pos) + leaf = self.convert_leaf(type_, value, prefix, start_pos) stack[-1].nodes.append(leaf) def _pop(self): @@ -198,7 +195,7 @@ class BaseParser(object): if len(tos.nodes) == 1: new_node = tos.nodes[0] else: - new_node = self.convert_node(self._pgen_grammar, tos.dfa.from_rule, tos.nodes) + new_node = self.convert_node(tos.dfa.from_rule, tos.nodes) try: self.stack[-1].nodes.append(new_node) diff --git a/parso/python/parser.py b/parso/python/parser.py index a18309c..2d0c84c 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -95,14 +95,13 @@ class Parser(BaseParser): # If there's only one statement, we get back a non-module. That's # not what we want, we want a module, so we add it here: node = self.convert_node( - self._pgen_grammar, 'file_input', [node] ) return node - def convert_node(self, pgen_grammar, nonterminal, children): + def convert_node(self, nonterminal, children): """ Convert raw node information to a PythonBaseNode instance. @@ -127,19 +126,18 @@ class Parser(BaseParser): nonterminal = 'testlist_comp' return self.default_node(nonterminal, children) - def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos): + def convert_leaf(self, type, value, prefix, start_pos): # print('leaf', repr(value), token.tok_name[type]) if type == NAME: - if value in pgen_grammar.reserved_syntax_strings: + if value in self._pgen_grammar.reserved_syntax_strings: return tree.Keyword(value, start_pos, prefix) else: return tree.Name(value, start_pos, prefix) return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) - def error_recovery(self, pgen_grammar, stack, typ, value, start_pos, prefix, - add_token_callback): - tos_nodes = stack[-1].nodes + def error_recovery(self, typ, value, start_pos, prefix): + tos_nodes = self.stack[-1].nodes if tos_nodes: last_leaf = tos_nodes[-1].get_last_leaf() else: @@ -152,23 +150,21 @@ class Parser(BaseParser): # possible (and valid in Python ) that there's no newline at the # end of a file, we have to recover even if the user doesn't want # error recovery. - if stack[-1].dfa.from_rule == 'simple_stmt': + if self.stack[-1].dfa.from_rule == 'simple_stmt': try: - plan = stack[-1].dfa.transition_to_plan[PythonTokenTypes.NEWLINE] + plan = self.stack[-1].dfa.transition_to_plan[PythonTokenTypes.NEWLINE] except KeyError: pass else: if plan.next_dfa.is_final and not plan.dfa_pushes: # We are ignoring here that the newline would be # required for a simple_stmt. - stack[-1].dfa = plan.next_dfa - add_token_callback(typ, value, start_pos, prefix) + self.stack[-1].dfa = plan.next_dfa + self._add_token(typ, value, start_pos, prefix) return if not self._error_recovery: - return super(Parser, self).error_recovery( - pgen_grammar, stack, typ, value, start_pos, prefix, - add_token_callback) + return super(Parser, self).error_recovery(typ, value, start_pos, prefix) def current_suite(stack): # For now just discard everything that is not a suite or @@ -185,10 +181,10 @@ class Parser(BaseParser): break return until_index - until_index = current_suite(stack) + until_index = current_suite(self.stack) - if self._stack_removal(stack, until_index + 1): - add_token_callback(typ, value, start_pos, prefix) + if self._stack_removal(self.stack, until_index + 1): + self._add_token(typ, value, start_pos, prefix) else: if typ == INDENT: # For every deleted INDENT we have to delete a DEDENT as well. @@ -196,9 +192,9 @@ class Parser(BaseParser): self._omit_dedent_list.append(self._indent_counter) error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix) - stack[-1].nodes.append(error_leaf) + self.stack[-1].nodes.append(error_leaf) - tos = stack[-1] + tos = self.stack[-1] if tos.nonterminal == 'suite': # Need at least one statement in the suite. This happend with the # error recovery above. From 7686273287d5ab37270f5b5603256e16bed5ea70 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 00:12:18 +0200 Subject: [PATCH 64/76] Use the stack from the parser itself --- parso/python/parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index 2d0c84c..845797f 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -183,7 +183,7 @@ class Parser(BaseParser): until_index = current_suite(self.stack) - if self._stack_removal(self.stack, until_index + 1): + if self._stack_removal(until_index + 1): self._add_token(typ, value, start_pos, prefix) else: if typ == INDENT: @@ -204,13 +204,13 @@ class Parser(BaseParser): # We're already in a final state. pass - def _stack_removal(self, stack, start_index): - all_nodes = [node for stack_node in stack[start_index:] for node in stack_node.nodes] + def _stack_removal(self, start_index): + all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes] if all_nodes: - stack[start_index - 1].nodes.append(tree.PythonErrorNode(all_nodes)) + self.stack[start_index - 1].nodes.append(tree.PythonErrorNode(all_nodes)) - stack[start_index:] = [] + self.stack[start_index:] = [] return bool(all_nodes) def _recovery_tokenize(self, tokens): From 603b67ee6de67b6924165b312966b42d25c16c65 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 00:18:44 +0200 Subject: [PATCH 65/76] Just always pass token objects to the tokenizer --- parso/python/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index 845797f..5fcf86f 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -214,7 +214,8 @@ class Parser(BaseParser): return bool(all_nodes) def _recovery_tokenize(self, tokens): - for typ, value, start_pos, prefix in tokens: + for token in tokens: + typ = token[0] # print(tok_name[typ], repr(value), start_pos, repr(prefix)) if typ == DEDENT: # We need to count indents, because if we just omit any DEDENT, @@ -227,4 +228,4 @@ class Parser(BaseParser): self._indent_counter -= 1 elif typ == INDENT: self._indent_counter += 1 - yield typ, value, start_pos, prefix + yield token From 97cdb448d4985a6fbb66237ddff14bfa03a85c80 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 00:33:22 +0200 Subject: [PATCH 66/76] Pass tokens around and not all the different token values --- parso/parser.py | 16 +++++++++------- parso/python/parser.py | 13 +++++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/parso/parser.py b/parso/parser.py index 3eb57ab..685c7cb 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -127,8 +127,8 @@ class BaseParser(object): first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0] self.stack = Stack([StackNode(first_dfa)]) - for type_, value, start_pos, prefix in tokens: - self._add_token(type_, value, start_pos, prefix) + for token in tokens: + self._add_token(token) while self.stack and self.stack[-1].dfa.is_final: self._pop() @@ -137,14 +137,15 @@ class BaseParser(object): # We never broke out -- EOF is too soon -- Unfinished statement. # However, the error recovery might have added the token again, if # the stack is empty, we're fine. - raise InternalParseError("incomplete input", type_, value, start_pos) + raise InternalParseError("incomplete input", token.type, token.value, token.start_pos) return self.rootnode - def error_recovery(self, typ, value, start_pos, prefix): + def error_recovery(self, token): if self._error_recovery: raise NotImplementedError("Error Recovery is not implemented") else: - error_leaf = tree.ErrorLeaf('TODO %s' % typ, value, start_pos, prefix) + type_, value, start_pos, prefix = token + error_leaf = tree.ErrorLeaf('TODO %s' % type_, value, start_pos, prefix) raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf) def convert_node(self, nonterminal, children): @@ -159,10 +160,11 @@ class BaseParser(object): except KeyError: return self.default_leaf(value, start_pos, prefix) - def _add_token(self, type_, value, start_pos, prefix): + def _add_token(self, token): """Add a token; return True if this is the end of the program.""" grammar = self._pgen_grammar stack = self.stack + type_, value, start_pos, prefix = token transition = _token_to_transition(grammar, type_, value) while True: @@ -173,7 +175,7 @@ class BaseParser(object): if stack[-1].dfa.is_final: self._pop() else: - self.error_recovery(type_, value, start_pos, prefix) + self.error_recovery(token) return except IndexError: raise InternalParseError("too much input", type_, value, start_pos) diff --git a/parso/python/parser.py b/parso/python/parser.py index 5fcf86f..05487b8 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -136,7 +136,7 @@ class Parser(BaseParser): return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix) - def error_recovery(self, typ, value, start_pos, prefix): + def error_recovery(self, token): tos_nodes = self.stack[-1].nodes if tos_nodes: last_leaf = tos_nodes[-1].get_last_leaf() @@ -144,8 +144,8 @@ class Parser(BaseParser): last_leaf = None if self._start_nonterminal == 'file_input' and \ - (typ == PythonTokenTypes.ENDMARKER or - typ == DEDENT and '\n' not in last_leaf.value): + (token.type == PythonTokenTypes.ENDMARKER or + token.type == DEDENT and '\n' not in last_leaf.value): # In Python statements need to end with a newline. But since it's # possible (and valid in Python ) that there's no newline at the # end of a file, we have to recover even if the user doesn't want @@ -160,11 +160,11 @@ class Parser(BaseParser): # We are ignoring here that the newline would be # required for a simple_stmt. self.stack[-1].dfa = plan.next_dfa - self._add_token(typ, value, start_pos, prefix) + self._add_token(token) return if not self._error_recovery: - return super(Parser, self).error_recovery(typ, value, start_pos, prefix) + return super(Parser, self).error_recovery(token) def current_suite(stack): # For now just discard everything that is not a suite or @@ -184,8 +184,9 @@ class Parser(BaseParser): until_index = current_suite(self.stack) if self._stack_removal(until_index + 1): - self._add_token(typ, value, start_pos, prefix) + self._add_token(token) else: + typ, value, start_pos, prefix = token if typ == INDENT: # For every deleted INDENT we have to delete a DEDENT as well. # Otherwise the parser will get into trouble and DEDENT too early. From 52fc8fc5697437f38b8e42d56e624497fc7993c5 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 00:59:55 +0200 Subject: [PATCH 67/76] Finish the stack in a way we want to. --- parso/parser.py | 27 ++++++++++++++------------- parso/python/parser.py | 12 +----------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/parso/parser.py b/parso/parser.py index 685c7cb..e33b8fb 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -130,15 +130,20 @@ class BaseParser(object): for token in tokens: self._add_token(token) - while self.stack and self.stack[-1].dfa.is_final: - self._pop() + while True: + tos = self.stack[-1] + if not tos.dfa.is_final: + # We never broke out -- EOF is too soon -- Unfinished statement. + # However, the error recovery might have added the token again, if + # the stack is empty, we're fine. + raise InternalParseError( + "incomplete input", token.type, token.value, token.start_pos + ) - if self.stack: - # We never broke out -- EOF is too soon -- Unfinished statement. - # However, the error recovery might have added the token again, if - # the stack is empty, we're fine. - raise InternalParseError("incomplete input", token.type, token.value, token.start_pos) - return self.rootnode + if len(self.stack) > 1: + self._pop() + else: + return self.convert_node(tos.nonterminal, tos.nodes) def error_recovery(self, token): if self._error_recovery: @@ -199,8 +204,4 @@ class BaseParser(object): else: new_node = self.convert_node(tos.dfa.from_rule, tos.nodes) - try: - self.stack[-1].nodes.append(new_node) - except IndexError: - # Stack is empty, set the rootnode. - self.rootnode = new_node + self.stack[-1].nodes.append(new_node) diff --git a/parso/python/parser.py b/parso/python/parser.py index 05487b8..9762f54 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -89,17 +89,7 @@ class Parser(BaseParser): tokens = self._recovery_tokenize(tokens) - node = super(Parser, self).parse(tokens) - - if self._start_nonterminal == 'file_input' != node.type: - # If there's only one statement, we get back a non-module. That's - # not what we want, we want a module, so we add it here: - node = self.convert_node( - 'file_input', - [node] - ) - - return node + return super(Parser, self).parse(tokens) def convert_node(self, nonterminal, children): """ From 8e118c913ca760f0ef074adc07b2a4c55b01e902 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 01:01:46 +0200 Subject: [PATCH 68/76] Remove note about print as absolute import. This is probably not going to happen anymore, Python 2 is pretty much end-of-life --- parso/python/parser.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/parso/python/parser.py b/parso/python/parser.py index 9762f54..a5e2632 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -72,16 +72,6 @@ class Parser(BaseParser): self._omit_dedent_list = [] self._indent_counter = 0 - # TODO do print absolute import detection here. - # try: - # del python_grammar_no_print_statement.keywords["print"] - # except KeyError: - # pass # Doesn't exist in the Python 3 grammar. - - # if self.options["print_function"]: - # python_grammar = pygram.python_grammar_no_print_statement - # else: - def parse(self, tokens): if self._error_recovery: if self._start_nonterminal != 'file_input': From badb2fe0102d4d94262055fa4904e4c8a57309fe Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 09:42:37 +0200 Subject: [PATCH 69/76] Move transition_to_generator to transitions --- parso/parser.py | 2 +- parso/pgen2/generator.py | 8 ++++---- parso/python/parser.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/parso/parser.py b/parso/parser.py index e33b8fb..b56251a 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -174,7 +174,7 @@ class BaseParser(object): while True: try: - plan = stack[-1].dfa.transition_to_plan[transition] + plan = stack[-1].dfa.transitions[transition] break except KeyError: if stack[-1].dfa.is_final: diff --git a/parso/pgen2/generator.py b/parso/pgen2/generator.py index 2704336..53a56fa 100644 --- a/parso/pgen2/generator.py +++ b/parso/pgen2/generator.py @@ -58,7 +58,7 @@ class DFAState(object): self.nfa_set = nfa_set self.is_final = final in nfa_set self.arcs = {} # map from terminals/nonterminals to DFAState - self.transition_to_plan = {} + self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan] self.nonterminal_arcs = {} def add_arc(self, next_, label): @@ -232,7 +232,7 @@ def generate_grammar(bnf_grammar, token_namespace): reserved_strings, terminal_or_nonterminal ) - dfa_state.transition_to_plan[transition] = DFAPlan(next_dfa) + dfa_state.transitions[transition] = DFAPlan(next_dfa) _calculate_tree_traversal(rule_to_dfas) return Grammar(start_nonterminal, rule_to_dfas, reserved_strings) @@ -272,7 +272,7 @@ def _calculate_tree_traversal(nonterminal_to_dfas): for dfa_state in dfas: for nonterminal, next_dfa in dfa_state.nonterminal_arcs.items(): for transition, pushes in first_plans[nonterminal].items(): - dfa_state.transition_to_plan[transition] = DFAPlan(next_dfa, pushes) + dfa_state.transitions[transition] = DFAPlan(next_dfa, pushes) def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): @@ -282,7 +282,7 @@ def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): # We only need to check the first dfa. All the following ones are not # interesting to find first terminals. state = dfas[0] - for transition, next_ in state.transition_to_plan.items(): + for transition, next_ in state.transitions.items(): # It's a string. We have finally found a possible first token. new_first_plans[transition] = [next_.next_dfa] diff --git a/parso/python/parser.py b/parso/python/parser.py index a5e2632..550bc4c 100644 --- a/parso/python/parser.py +++ b/parso/python/parser.py @@ -132,7 +132,7 @@ class Parser(BaseParser): # error recovery. if self.stack[-1].dfa.from_rule == 'simple_stmt': try: - plan = self.stack[-1].dfa.transition_to_plan[PythonTokenTypes.NEWLINE] + plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE] except KeyError: pass else: From 375ebf21817815a5fa044c46dccbdc9f9064da7a Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 09:49:35 +0200 Subject: [PATCH 70/76] Better documentation of the parser generator --- parso/pgen2/generator.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/parso/pgen2/generator.py b/parso/pgen2/generator.py index 53a56fa..c74d088 100644 --- a/parso/pgen2/generator.py +++ b/parso/pgen2/generator.py @@ -18,6 +18,12 @@ Specifying grammars in pgen is possible with this grammar:: atom: '(' rhs ')' | NAME | STRING This grammar is self-referencing. + +This parser generator (pgen2) was created by Guido Rossum and used for lib2to3. +Most of the code has been refactored to make it more Pythonic. Since this was a +"copy" of the CPython Parser parser "pgen", there was some work needed to make +it more readable. It should also be slightly faster than the original pgen2, +because we made some optimizations. """ from ast import literal_eval @@ -56,11 +62,16 @@ class DFAState(object): assert isinstance(final, NFAState) self.from_rule = from_rule self.nfa_set = nfa_set - self.is_final = final in nfa_set self.arcs = {} # map from terminals/nonterminals to DFAState - self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan] + # In an intermediary step we set these nonterminal arcs (which has the + # same structure as arcs). These don't contain terminals anymore. self.nonterminal_arcs = {} + # Transitions are basically the only thing that the parser is using + # with is_final. Everyting else is purely here to create a parser. + self.transitions = {} #: Dict[Union[TokenType, ReservedString], DFAPlan] + self.is_final = final in nfa_set + def add_arc(self, next_, label): assert isinstance(label, str) assert label not in self.arcs From ecdb90d9bcb9b239c34b0630342ab20d7767a762 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 10:08:09 +0200 Subject: [PATCH 71/76] Way better documentation for the DFA generator --- parso/pgen2/generator.py | 55 +++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/parso/pgen2/generator.py b/parso/pgen2/generator.py index c74d088..f85c219 100644 --- a/parso/pgen2/generator.py +++ b/parso/pgen2/generator.py @@ -35,9 +35,10 @@ class Grammar(object): """ Once initialized, this class supplies the grammar tables for the parsing engine implemented by parse.py. The parsing engine - accesses the instance variables directly. The class here does not - provide initialization of the tables; several subclasses exist to - do this (see the conv and pgen modules). + accesses the instance variables directly. + + The only important part in this parsers are dfas and transitions between + dfas. """ def __init__(self, start_nonterminal, rule_to_dfas, reserved_syntax_strings): @@ -47,6 +48,10 @@ class Grammar(object): class DFAPlan(object): + """ + Plans are used for the parser to create stack nodes and do the proper + DFA state transitions. + """ def __init__(self, next_dfa, dfa_pushes=[]): self.next_dfa = next_dfa self.dfa_pushes = dfa_pushes @@ -56,6 +61,15 @@ class DFAPlan(object): class DFAState(object): + """ + The DFAState object is the core class for pretty much anything. DFAState + are the vertices of an ordered graph while arcs and transitions are the + edges. + + Arcs are the initial edges, where most DFAStates are not connected and + transitions are then calculated to connect the DFA state machines that have + different nonterminals. + """ def __init__(self, from_rule, nfa_set, final): assert isinstance(nfa_set, set) assert isinstance(next(iter(nfa_set)), NFAState) @@ -106,6 +120,12 @@ class DFAState(object): class ReservedString(object): + """ + Most grammars will have certain keywords and operators that are mentioned + in the grammar as strings (e.g. "if") and not token types (e.g. NUMBER). + This class basically is the former. + """ + def __init__(self, value): self.value = value @@ -114,12 +134,14 @@ class ReservedString(object): def _simplify_dfas(dfas): - # This is not theoretically optimal, but works well enough. - # Algorithm: repeatedly look for two states that have the same - # set of arcs (same labels pointing to the same nodes) and - # unify them, until things stop changing. + """ + This is not theoretically optimal, but works well enough. + Algorithm: repeatedly look for two states that have the same + set of arcs (same labels pointing to the same nodes) and + unify them, until things stop changing. - # dfas is a list of DFAState instances + dfas is a list of DFAState instances + """ changes = True while changes: changes = False @@ -137,7 +159,10 @@ def _simplify_dfas(dfas): def _make_dfas(start, finish): """ - This is basically doing what the powerset construction algorithm is doing. + Uses the powerset construction algorithm to create DFA states from sets of + NFA states. + + Also does state reduction if some states are not needed. """ # To turn an NFA into a DFA, we define the states of the DFA # to correspond to *sets* of states of the NFA. Then do some @@ -250,6 +275,10 @@ def generate_grammar(bnf_grammar, token_namespace): def _make_transition(token_namespace, reserved_syntax_strings, label): + """ + Creates a reserved string ("if", "for", "*", ...) or returns the token type + (NUMBER, STRING, ...) for a given grammar terminal. + """ if label[0].isalpha(): # A named token (e.g. NAME, NUMBER, STRING) return getattr(token_namespace, label) @@ -267,6 +296,10 @@ def _make_transition(token_namespace, reserved_syntax_strings, label): def _calculate_tree_traversal(nonterminal_to_dfas): + """ + By this point we know how dfas can move around within a stack node, but we + don't know how we can add a new stack node (nonterminal transitions). + """ # Map from grammar rule (nonterminal) name to a set of tokens. first_plans = {} @@ -287,6 +320,10 @@ def _calculate_tree_traversal(nonterminal_to_dfas): def _calculate_first_plans(nonterminal_to_dfas, first_plans, nonterminal): + """ + Calculates the first plan in the first_plans dictionary for every given + nonterminal. This is going to be used to know when to create stack nodes. + """ dfas = nonterminal_to_dfas[nonterminal] new_first_plans = {} first_plans[nonterminal] = None # dummy to detect left recursion From 98c9a1ec7fce7c84156c079acfd948d96f201698 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Thu, 28 Jun 2018 10:11:44 +0200 Subject: [PATCH 72/76] Better documentation for _add_token --- parso/parser.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/parso/parser.py b/parso/parser.py index b56251a..ebf8554 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -166,7 +166,11 @@ class BaseParser(object): return self.default_leaf(value, start_pos, prefix) def _add_token(self, token): - """Add a token; return True if this is the end of the program.""" + """ + This is the only core function for parsing. Here happens basically + everything. Everything is well prepared by the parser generator and we + only apply the necessary steps here. + """ grammar = self._pgen_grammar stack = self.stack type_, value, start_pos, prefix = token From c8bf23b787db64640fb0af5b315a1b6afe9cc07c Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 29 Jun 2018 00:00:09 +0200 Subject: [PATCH 73/76] Remove get_tos_nodes and get_tos_first_tokens, because they are not used (not even in Jedi) --- parso/parser.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/parso/parser.py b/parso/parser.py index ebf8554..df4aea4 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -54,24 +54,7 @@ class InternalParseError(Exception): class Stack(list): - def get_tos_nodes(self): - tos = self[-1] - return tos[2][1] - - def get_tos_first_tokens(self, grammar): - tos = self[-1] - inv_tokens = dict((v, k) for k, v in grammar.tokens.items()) - inv_keywords = dict((v, k) for k, v in grammar.keywords.items()) - dfa, state, nodes = tos - - def check(): - for first in dfa[1]: - try: - yield inv_keywords[first] - except KeyError: - yield tokenize.tok_name[inv_tokens[first]] - - return sorted(check()) + pass class StackNode(object): From 55d5d39c53cda6f26b89daac575e9220a74837d4 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 29 Jun 2018 10:04:54 +0200 Subject: [PATCH 74/76] Add a private API for jedi to work with the parser stack --- parso/parser.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/parso/parser.py b/parso/parser.py index df4aea4..d96f1ee 100644 --- a/parso/parser.py +++ b/parso/parser.py @@ -24,6 +24,7 @@ complexity of the ``Parser`` (there's another parser sitting inside ``Statement``, which produces ``Array`` and ``Call``). """ from parso import tree +from parso.pgen2.generator import ReservedString class ParserSyntaxError(Exception): @@ -54,7 +55,20 @@ class InternalParseError(Exception): class Stack(list): - pass + def _allowed_transition_names_and_token_types(self): + def iterate(): + # An API just for Jedi. + for stack_node in reversed(self): + for transition in stack_node.dfa.transitions: + if isinstance(transition, ReservedString): + yield transition.value + else: + yield transition # A token type + + if not stack_node.dfa.is_final: + break + + return list(iterate()) class StackNode(object): From fce3ead82984d6851bac62405f0af99af4ac60c4 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 29 Jun 2018 18:04:55 +0200 Subject: [PATCH 75/76] Bump version to 0.3.0 --- parso/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parso/__init__.py b/parso/__init__.py index 9654389..a656b8e 100644 --- a/parso/__init__.py +++ b/parso/__init__.py @@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar from parso.utils import split_lines, python_bytes_to_unicode -__version__ = '0.2.1' +__version__ = '0.3.0' def parse(code=None, **kwargs): From 1af5d9d46bf3855d205e285d56398e8b5daf8e05 Mon Sep 17 00:00:00 2001 From: Dave Halter Date: Fri, 29 Jun 2018 18:13:53 +0200 Subject: [PATCH 76/76] Add a changelog for 0.3.0 --- CHANGELOG.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 59a4222..b2da513 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,11 @@ Changelog --------- +0.3.0 (2018-07-30) ++++++++++++++++++++ + +- Rewrote the pgen2 parser generator. + 0.2.1 (2018-05-21) +++++++++++++++++++