Add a more API focused grammar.

This makes it so we don't have to expose all the details of a pgen grammar to the user.
This commit is contained in:
Dave Halter
2017-05-21 17:02:15 -04:00
parent 973f7c5f61
commit 23d1470618
8 changed files with 81 additions and 57 deletions

View File

@@ -1,11 +1,57 @@
import os
from parso._compatibility import FileNotFoundError
from parso.parser import ParserSyntaxError
from parso.pgen2.pgen import generate_grammar
from parso import python
from parso import grammar
from parso.tokenize import generate_tokens
from parso.python.parser import Parser
__version__ = '0.0.1'
_loaded_grammars = {}
def parse(grammar, code):
raise NotImplementedError
Parser(grammar, code)
def create_grammar(text, tokenizer=generate_tokens):
"""
:param text: A BNF representation of your grammar.
"""
return grammar.Grammar(text, tokenizer, parser=None)
def load_python_grammar(version=None):
"""
Loads a Python grammar. The default version is always the latest.
If you need support for a specific version, please use e.g.
`version='3.3'`.
"""
if version is None:
version = '3.6'
if version in ('3.2', '3.3'):
version = '3.4'
elif version == '2.6':
version = '2.7'
file = 'python/grammar' + version + '.txt'
global _loaded_grammars
path = os.path.join(os.path.dirname(__file__), file)
try:
return _loaded_grammars[path]
except KeyError:
try:
with open(path) as f:
bnf_text = f.read()
grammar = create_grammar(bnf_text)
return _loaded_grammars.setdefault(path, grammar)
except FileNotFoundError:
# Just load the default if the file does not exist.
return load_python_grammar()

17
parso/grammar.py Normal file
View File

@@ -0,0 +1,17 @@
import hashlib
from parso.pgen2.pgen import generate_grammar
class Grammar(object):
def __init__(self, bnf_text, tokenizer, parser, diff_parser=None):
self._pgen_grammar = generate_grammar(bnf_text)
self._parser = parser
self._tokenizer = tokenizer
self._diff_parser = diff_parser
self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
def __repr__(self):
labels = self._pgen_grammar.symbol2number.values()
txt = ' '.join(list(labels)[:3]) + ' ...'
return '<%s:%s>' % (self.__class__.__name__, txt)

View File

@@ -17,8 +17,6 @@ fallback token code OP, but the parser needs the actual token code.
"""
import pickle
import hashlib
class Grammar(object):
@@ -85,7 +83,6 @@ class Grammar(object):
self.tokens = {}
self.symbol2label = {}
self.start = 256
self.sha256 = hashlib.sha256(bnf_text.encode("utf-8")).hexdigest()
def dump(self, filename):
"""Dump the grammar tables to a pickle file."""

View File

@@ -1,52 +1,13 @@
"""
Parsers for Python
"""
import os
from parso.utils import splitlines, source_to_unicode
from parso._compatibility import FileNotFoundError
from parso.pgen2.pgen import generate_grammar
from parso.python.parser import Parser, remove_last_newline
from parso.python.diff import DiffParser
from parso.tokenize import generate_tokens
from parso.cache import parser_cache, load_module, save_module
_loaded_grammars = {}
def load_grammar(version=None):
"""
Loads a Python grammar. The default version is always the latest.
If you need support for a specific version, please use e.g.
`version='3.3'`.
"""
if version is None:
version = '3.6'
if version in ('3.2', '3.3'):
version = '3.4'
elif version == '2.6':
version = '2.7'
file = 'grammar' + version + '.txt'
global _loaded_grammars
path = os.path.join(os.path.dirname(__file__), file)
try:
return _loaded_grammars[path]
except KeyError:
try:
with open(path) as f:
bnf_text = f.read()
grammar = generate_grammar(bnf_text)
return _loaded_grammars.setdefault(path, grammar)
except FileNotFoundError:
# Just load the default if the file does not exist.
return load_grammar()
def parse(code=None, **kwargs):
"""
If you want to parse a Python file you want to start here, most likely.
@@ -78,7 +39,8 @@ def parse(code=None, **kwargs):
raise TypeError("Please provide either code or a path.")
if grammar is None:
grammar = load_grammar()
from parso import load_python_grammar
grammar = load_python_grammar()
if cache and code is None and path is not None:
# With the current architecture we cannot load from cache if the
@@ -124,7 +86,7 @@ def parse(code=None, **kwargs):
tokens = generate_tokens(tokenize_lines, use_exact_op_types=True)
p = Parser(grammar, error_recovery=error_recovery, start_symbol=start_symbol)
p = Parser(grammar._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol)
root_node = p.parse(tokens=tokens)
if added_newline:
remove_last_newline(root_node)