From 73ce57428be069d2c34eb1aae0325e0dc7cbd997 Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Sun, 17 Jun 2018 18:30:20 +0200
Subject: [PATCH] Try to completely remove the word symbol and use nonterminal

The ones that we could not remove are in grammar.py, because that's the public documented API.
---
 parso/grammar.py       | 12 ++++-----
 parso/parser.py        | 14 +++++-----
 parso/pgen2/grammar.py | 36 +++++++++++++-------------
 parso/pgen2/parse.py   | 10 ++++----
 parso/pgen2/pgen.py    | 35 ++++++++++++-------------
 parso/python/diff.py   | 10 ++++----
 parso/python/parser.py | 58 +++++++++++++++++++++---------------------
 7 files changed, 89 insertions(+), 86 deletions(-)

diff --git a/parso/grammar.py b/parso/grammar.py
index 6c13f00..2906b5d 100644
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -51,8 +51,8 @@ class Grammar(object):
             it is invalid, it will be returned as an error node. If disabled,
             you will get a ParseError when encountering syntax errors in your
             code.
-        :param str start_symbol: The grammar symbol that you want to parse. Only
-            allowed to be used when error_recovery is False.
+        :param str start_symbol: The grammar rule (nonterminal) that you want
+            to parse. Only allowed to be used when error_recovery is False.
         :param str path: The path to the file you want to open. Only needed for caching.
         :param bool cache: Keeps a copy of the parser tree in RAM and on disk
             if a path is given. Returns the cached trees if the corresponding
@@ -88,7 +88,7 @@ class Grammar(object):
             raise TypeError("Please provide either code or a path.")
 
         if start_symbol is None:
-            start_symbol = self._start_symbol
+            start_symbol = self._start_nonterminal
 
         if error_recovery and start_symbol != 'file_input':
             raise NotImplementedError("This is currently not implemented.")
@@ -136,7 +136,7 @@ class Grammar(object):
         p = self._parser(
             self._pgen_grammar,
             error_recovery=error_recovery,
-            start_symbol=start_symbol
+            start_nonterminal=start_symbol
         )
         root_node = p.parse(tokens=tokens)
 
@@ -186,7 +186,7 @@ class Grammar(object):
         return normalizer.issues
 
     def __repr__(self):
-        labels = self._pgen_grammar.number2symbol.values()
+        labels = self._pgen_grammar.number2nonterminal.values()
         txt = ' '.join(list(labels)[:3]) + ' ...'
         return '<%s:%s>' % (self.__class__.__name__, txt)
 
@@ -194,7 +194,7 @@ class Grammar(object):
 class PythonGrammar(Grammar):
     _error_normalizer_config = ErrorFinderConfig()
     _token_namespace = token
-    _start_symbol = 'file_input'
+    _start_nonterminal = 'file_input'
 
     def __init__(self, version_info, bnf_text):
         super(PythonGrammar, self).__init__(
diff --git a/parso/parser.py b/parso/parser.py
index 555ebc7..c9df89e 100644
--- a/parso/parser.py
+++ b/parso/parser.py
@@ -38,13 +38,13 @@ class BaseParser(object):
     }
     default_leaf = tree.Leaf
 
-    def __init__(self, pgen_grammar, start_symbol='file_input', error_recovery=False):
+    def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
         self._pgen_grammar = pgen_grammar
-        self._start_symbol = start_symbol
+        self._start_nonterminal = start_nonterminal
         self._error_recovery = error_recovery
 
     def parse(self, tokens):
-        start_number = self._pgen_grammar.symbol2number[self._start_symbol]
+        start_number = self._pgen_grammar.nonterminal2number[self._start_nonterminal]
         self.pgen_parser = PgenParser(
             self._pgen_grammar, self.convert_node, self.convert_leaf,
             self.error_recovery, start_number
@@ -64,12 +64,12 @@ class BaseParser(object):
             raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
 
     def convert_node(self, pgen_grammar, type_, children):
-        # TODO REMOVE symbol, we don't want type here.
-        symbol = pgen_grammar.number2symbol[type_]
+        # TODO REMOVE nonterminal, we don't want type here.
+        nonterminal = pgen_grammar.number2nonterminal[type_]
         try:
-            return self.node_map[symbol](children)
+            return self.node_map[nonterminal](children)
         except KeyError:
-            return self.default_node(symbol, children)
+            return self.default_node(nonterminal, children)
 
     def convert_leaf(self, pgen_grammar, type_, value, prefix, start_pos):
         try:
diff --git a/parso/pgen2/grammar.py b/parso/pgen2/grammar.py
index 1a2c6e9..00a6e8c 100644
--- a/parso/pgen2/grammar.py
+++ b/parso/pgen2/grammar.py
@@ -28,12 +28,14 @@ class Grammar(object):
 
     The instance variables are as follows:
 
-    symbol2number -- a dict mapping symbol names to numbers.  Symbol
-                     numbers are always 256 or higher, to distinguish
-                     them from token numbers, which are between 0 and
-                     255 (inclusive).
+    nonterminal2number --
+                     A dict mapping nonterminal names to numbers.
+                     Nonterminal numbers are always 256 or higher, to
+                     distinguish them from token numbers, which are between 0
+                     and 255 (inclusive).
 
-    number2symbol -- a dict mapping numbers to symbol names;
+    number2nonterminal --
+                     A dict mapping numbers to nonterminal names;
                      these two are each other's inverse.
 
     states        -- a list of DFAs, where each DFA is a list of
@@ -44,20 +46,20 @@ class Grammar(object):
                      Final states are represented by a special arc of
                      the form (0, j) where j is its own state number.
 
-    dfas          -- a dict mapping symbol numbers to (DFA, first)
+    dfas          -- a dict mapping nonterminal numbers to (DFA, first)
                      pairs, where DFA is an item from the states list
                      above, and first is a set of tokens that can
                      begin this grammar rule (represented by a dict
                      whose values are always 1).
 
     labels        -- a list of (x, y) pairs where x is either a token
-                     number or a symbol number, and y is either None
+                     number or a nonterminal number, and y is either None
                      or a string; the strings are keywords.  The label
                      number is the index in this list; label numbers
                      are used to mark state transitions (arcs) in the
                      DFAs.
 
-    start         -- the number of the grammar's start symbol.
+    start         -- the number of the grammar's start nonterminal.
 
     keywords      -- a dict mapping keyword strings to arc labels.
 
@@ -65,29 +67,29 @@ class Grammar(object):
 
     """
 
-    def __init__(self, bnf_text, start_symbol):
-        self.symbol2number = {}
-        self.number2symbol = {}
+    def __init__(self, bnf_text, start_nonterminal):
+        self.nonterminal2number = {}
+        self.number2nonterminal = {}
         self.states = []
         self.dfas = {}
         self.labels = [(0, "EMPTY")]
         self.keywords = {}
         self.tokens = {}
-        self.symbol2label = {}
-        self.label2symbol = {}
-        self.start_symbol = start_symbol
+        self.nonterminal2label = {}
+        self.label2nonterminal = {}
+        self.start_nonterminal = start_nonterminal
 
     @property
     def start(self):
-        return self.symbol2number[self.start_symbol]
+        return self.nonterminal2number[self.start_nonterminal]
 
     def report(self):
         """Dump the grammar tables to standard output, for debugging."""
         from pprint import pprint
         print("s2n")
-        pprint(self.symbol2number)
+        pprint(self.nonterminal2number)
         print("n2s")
-        pprint(self.number2symbol)
+        pprint(self.number2nonterminal)
         print("states")
         pprint(self.states)
         print("dfas")
diff --git a/parso/pgen2/parse.py b/parso/pgen2/parse.py
index e2d9593..4e1ad6c 100644
--- a/parso/pgen2/parse.py
+++ b/parso/pgen2/parse.py
@@ -118,8 +118,8 @@ class PgenParser(object):
         up.
 
         A concrete syntax tree node is a (type, nodes) tuple, where
-        type is the node type (a token or symbol number) and nodes
-        is a list of children for symbols, and None for tokens.
+        type is the node type (a token or nonterminal number) and nodes
+        is a list of children for nonterminals, and None for tokens.
 
         An abstract syntax tree node may be anything; this is entirely
         up to the converter function.
@@ -184,11 +184,11 @@ class PgenParser(object):
                     # Done with this token
                     return False
                 elif t >= 256:
-                    # See if it's a symbol and if we're in its first set
+                    # See if it's a nonterminal and if we're in its first set
                     itsdfa = _gram.dfas[t]
                     itsstates, itsfirst = itsdfa
                     if ilabel in itsfirst:
-                        # Push a symbol
+                        # Push a nonterminal
                         _push(t, itsdfa, newstate)
                         break  # To continue the outer while loop
             else:
@@ -231,7 +231,7 @@ class PgenParser(object):
         try:
             # Equal to:
             # dfa, state, node = self.stack[-1]
-            # symbol, children = node
+            # nonterminal, children = node
             self.stack[-1][2][1].append(newnode)
         except IndexError:
             # Stack is empty, set the rootnode.
diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py
index 9d0988f..de1efcb 100644
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -29,7 +29,8 @@ class ParserGenerator(object):
         self._nonterminal_to_dfas = rule_to_dfas
 
     def make_grammar(self, grammar):
-        self._first_terminals = {}  # map from symbol name to set of tokens
+        # Map from grammar rule (nonterminal) name to a set of tokens.
+        self._first_terminals = {}
 
         names = list(self._nonterminal_to_dfas.keys())
         names.sort()
@@ -37,9 +38,9 @@ class ParserGenerator(object):
             if name not in self._first_terminals:
                 self._calculate_first_terminals(name)
 
-            i = 256 + len(grammar.symbol2number)
-            grammar.symbol2number[name] = i
-            grammar.number2symbol[i] = name
+            i = 256 + len(grammar.nonterminal2number)
+            grammar.nonterminal2number[name] = i
+            grammar.number2nonterminal[i] = name
 
         # Now that we have calculated the first terminals, we are sure that
         # there is no left recursion or ambiguities.
@@ -55,7 +56,7 @@ class ParserGenerator(object):
                     arcs.append((0, dfas.index(state)))
                 states.append(arcs)
             grammar.states.append(states)
-            grammar.dfas[grammar.symbol2number[name]] = (states, self._make_first(grammar, name))
+            grammar.dfas[grammar.nonterminal2number[name]] = (states, self._make_first(grammar, name))
         return grammar
 
     def _make_first(self, grammar, name):
@@ -71,15 +72,15 @@ class ParserGenerator(object):
         # XXX Maybe this should be a method on a subclass of converter?
         ilabel = len(grammar.labels)
         if label[0].isalpha():
-            # Either a symbol name or a named token
-            if label in grammar.symbol2number:
-                # A symbol name (a non-terminal)
-                if label in grammar.symbol2label:
-                    return grammar.symbol2label[label]
+            # Either a nonterminal name or a named token
+            if label in grammar.nonterminal2number:
+                # A nonterminal name (a non-terminal)
+                if label in grammar.nonterminal2label:
+                    return grammar.nonterminal2label[label]
                 else:
-                    grammar.labels.append((grammar.symbol2number[label], None))
-                    grammar.symbol2label[label] = ilabel
-                    grammar.label2symbol[ilabel] = label
+                    grammar.labels.append((grammar.nonterminal2number[label], None))
+                    grammar.nonterminal2label[label] = ilabel
+                    grammar.label2nonterminal[ilabel] = label
                     return ilabel
             else:
                 # A named token (NAME, NUMBER, STRING)
@@ -293,7 +294,7 @@ def generate_grammar(bnf_grammar, token_namespace):
     own parser.
     """
     rule_to_dfas = {}
-    start_symbol = None
+    start_nonterminal = None
     for nfa_a, nfa_z in GrammarParser(bnf_grammar).parse():
         #_dump_nfa(a, z)
         dfas = _make_dfas(nfa_a, nfa_z)
@@ -304,8 +305,8 @@ def generate_grammar(bnf_grammar, token_namespace):
         rule_to_dfas[nfa_a.from_rule] = dfas
         #print(nfa_a.from_rule, oldlen, newlen)
 
-        if start_symbol is None:
-            start_symbol = nfa_a.from_rule
+        if start_nonterminal is None:
+            start_nonterminal = nfa_a.from_rule
 
     p = ParserGenerator(rule_to_dfas, token_namespace)
-    return p.make_grammar(Grammar(bnf_grammar, start_symbol))
+    return p.make_grammar(Grammar(bnf_grammar, start_nonterminal))
diff --git a/parso/python/diff.py b/parso/python/diff.py
index f8b73c7..529f06a 100644
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -41,9 +41,9 @@ def _flows_finished(pgen_grammar, stack):
     if, while, for and try might not be finished, because another part might
     still be parsed.
     """
-    for dfa, newstate, (symbol_number, nodes) in stack:
-        if pgen_grammar.number2symbol[symbol_number] in ('if_stmt', 'while_stmt',
-                                                    'for_stmt', 'try_stmt'):
+    for dfa, newstate, (nonterminal_number, nodes) in stack:
+        if pgen_grammar.number2nonterminal[nonterminal_number] \
+                in ('if_stmt', 'while_stmt', 'for_stmt', 'try_stmt'):
             return False
     return True
 
@@ -52,8 +52,8 @@ def suite_or_file_input_is_valid(pgen_grammar, stack):
     if not _flows_finished(pgen_grammar, stack):
         return False
 
-    for dfa, newstate, (symbol_number, nodes) in reversed(stack):
-        if pgen_grammar.number2symbol[symbol_number] == 'suite':
+    for dfa, newstate, (nonterminal_number, nodes) in reversed(stack):
+        if pgen_grammar.number2nonterminal[nonterminal_number] == 'suite':
             # If only newline is in the suite, the suite is not valid, yet.
             return len(nodes) > 1
     # Not reaching a suite means that we're dealing with file_input levels
diff --git a/parso/python/parser.py b/parso/python/parser.py
index b99053b..cb283e8 100644
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -62,8 +62,8 @@ class Parser(BaseParser):
         FSTRING_END: tree.FStringEnd,
     }
 
-    def __init__(self, pgen_grammar, error_recovery=True, start_symbol='file_input'):
-        super(Parser, self).__init__(pgen_grammar, start_symbol, error_recovery=error_recovery)
+    def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
+        super(Parser, self).__init__(pgen_grammar, start_nonterminal, error_recovery=error_recovery)
 
         self.syntax_errors = []
         self._omit_dedent_list = []
@@ -81,19 +81,19 @@ class Parser(BaseParser):
 
     def parse(self, tokens):
         if self._error_recovery:
-            if self._start_symbol != 'file_input':
+            if self._start_nonterminal != 'file_input':
                 raise NotImplementedError
 
             tokens = self._recovery_tokenize(tokens)
 
         node = super(Parser, self).parse(tokens)
 
-        if self._start_symbol == 'file_input' != node.type:
+        if self._start_nonterminal == 'file_input' != node.type:
             # If there's only one statement, we get back a non-module. That's
             # not what we want, we want a module, so we add it here:
             node = self.convert_node(
                 self._pgen_grammar,
-                self._pgen_grammar.symbol2number['file_input'],
+                self._pgen_grammar.nonterminal2number['file_input'],
                 [node]
             )
 
@@ -107,24 +107,24 @@ class Parser(BaseParser):
         grammar rule produces a new complete node, so that the tree is build
         strictly bottom-up.
         """
-        # TODO REMOVE symbol, we don't want type here.
-        symbol = pgen_grammar.number2symbol[type]
+        # TODO REMOVE nonterminal, we don't want type here.
+        nonterminal = pgen_grammar.number2nonterminal[type]
         try:
-            return self.node_map[symbol](children)
+            return self.node_map[nonterminal](children)
         except KeyError:
-            if symbol == 'suite':
+            if nonterminal == 'suite':
                 # We don't want the INDENT/DEDENT in our parser tree. Those
                 # leaves are just cancer. They are virtual leaves and not real
                 # ones and therefore have pseudo start/end positions and no
                 # prefixes. Just ignore them.
                 children = [children[0]] + children[2:-1]
-            elif symbol == 'list_if':
+            elif nonterminal == 'list_if':
                 # Make transitioning from 2 to 3 easier.
-                symbol = 'comp_if'
-            elif symbol == 'listmaker':
+                nonterminal = 'comp_if'
+            elif nonterminal == 'listmaker':
                 # Same as list_if above.
-                symbol = 'testlist_comp'
-            return self.default_node(symbol, children)
+                nonterminal = 'testlist_comp'
+            return self.default_node(nonterminal, children)
 
     def convert_leaf(self, pgen_grammar, type, value, prefix, start_pos):
         # print('leaf', repr(value), token.tok_name[type])
@@ -138,10 +138,10 @@ class Parser(BaseParser):
 
     def error_recovery(self, pgen_grammar, stack, arcs, typ, value, start_pos, prefix,
                        add_token_callback):
-        def get_symbol_and_nodes(stack):
+        def get_nonterminal_and_nodes(stack):
             for dfa, state, (type_, nodes) in stack:
-                symbol = pgen_grammar.number2symbol[type_]
-                yield symbol, nodes
+                nonterminal = pgen_grammar.number2nonterminal[type_]
+                yield nonterminal, nodes
 
         tos_nodes = stack.get_tos_nodes()
         if tos_nodes:
@@ -149,7 +149,7 @@ class Parser(BaseParser):
         else:
             last_leaf = None
 
-        if self._start_symbol == 'file_input' and \
+        if self._start_nonterminal == 'file_input' and \
                 (typ == ENDMARKER or typ == DEDENT and '\n' not in last_leaf.value):
             def reduce_stack(states, newstate):
                 # reduce
@@ -168,13 +168,13 @@ class Parser(BaseParser):
             ilabel = token_to_ilabel(pgen_grammar, NEWLINE, value)
 
             dfa, state, (type_, nodes) = stack[-1]
-            symbol = pgen_grammar.number2symbol[type_]
+            nonterminal = pgen_grammar.number2nonterminal[type_]
             states, first = dfa
             arcs = states[state]
             # Look for a state with this label
             for i, newstate in arcs:
                 if ilabel == i:
-                    if symbol == 'simple_stmt':
+                    if nonterminal == 'simple_stmt':
                         # This is basically shifting
                         stack[-1] = (dfa, newstate, (type_, nodes))
 
@@ -182,12 +182,12 @@ class Parser(BaseParser):
                         add_token_callback(typ, value, start_pos, prefix)
                         return
                     # Check if we're at the right point
-                    #for symbol, nodes in get_symbol_and_nodes(stack):
+                    #for nonterminal, nodes in get_nonterminal_and_nodes(stack):
                     #        self.pgen_parser._pop()
 
                             #break
                     break
-            #symbol = pgen_grammar.number2symbol[type_]
+            #nonterminal = pgen_grammar.number2nonterminal[type_]
 
         if not self._error_recovery:
             return super(Parser, self).error_recovery(
@@ -198,21 +198,21 @@ class Parser(BaseParser):
             # For now just discard everything that is not a suite or
             # file_input, if we detect an error.
             one_line_suite = False
-            for index, (symbol, nodes) in reversed(list(enumerate(get_symbol_and_nodes(stack)))):
+            for index, (nonterminal, nodes) in reversed(list(enumerate(get_nonterminal_and_nodes(stack)))):
                 # `suite` can sometimes be only simple_stmt, not stmt.
                 if one_line_suite:
                     break
-                elif symbol == 'file_input':
+                elif nonterminal == 'file_input':
                     break
-                elif symbol == 'suite':
+                elif nonterminal == 'suite':
                     if len(nodes) > 1:
                         break
                     elif not nodes:
                         one_line_suite = True
                     # `suite` without an indent are error nodes.
-            return index, symbol, nodes
+            return index, nonterminal, nodes
 
-        index, symbol, nodes = current_suite(stack)
+        index, nonterminal, nodes = current_suite(stack)
 
         # print('err', token.tok_name[typ], repr(value), start_pos, len(stack), index)
         if self._stack_removal(pgen_grammar, stack, arcs, index + 1, value, start_pos):
@@ -226,11 +226,11 @@ class Parser(BaseParser):
             error_leaf = tree.PythonErrorLeaf(tok_name[typ].lower(), value, start_pos, prefix)
             stack[-1][2][1].append(error_leaf)
 
-        if symbol == 'suite':
+        if nonterminal == 'suite':
             dfa, state, node = stack[-1]
             states, first = dfa
             arcs = states[state]
-            intended_label = pgen_grammar.symbol2label['stmt']
+            intended_label = pgen_grammar.nonterminal2label['stmt']
             # Introduce a proper state transition. We're basically allowing
             # there to be no valid statements inside a suite.
             if [x[0] for x in arcs] == [intended_label]: