mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-07 05:14:29 +08:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6f385bdba1 | ||
|
|
4fc31c58b3 | ||
|
|
689decc66c | ||
|
|
c2eacdb81c | ||
|
|
ac0bf4fcdd | ||
|
|
948f9ccecc | ||
|
|
f20106d88e | ||
|
|
f4912f6c17 | ||
|
|
bf5a4b7c2c |
@@ -3,7 +3,6 @@ sudo: false
|
|||||||
python:
|
python:
|
||||||
- 2.6
|
- 2.6
|
||||||
- 2.7
|
- 2.7
|
||||||
- 3.3
|
|
||||||
- 3.4
|
- 3.4
|
||||||
- 3.5
|
- 3.5
|
||||||
- 3.6
|
- 3.6
|
||||||
|
|||||||
@@ -3,6 +3,12 @@
|
|||||||
Changelog
|
Changelog
|
||||||
---------
|
---------
|
||||||
|
|
||||||
|
0.2.1 (2018-05-21)
|
||||||
|
+++++++++++++++++++
|
||||||
|
|
||||||
|
- A bugfix for the diff parser.
|
||||||
|
- Grammar files can now be loaded from a specific path.
|
||||||
|
|
||||||
0.2.0 (2018-04-15)
|
0.2.0 (2018-04-15)
|
||||||
+++++++++++++++++++
|
+++++++++++++++++++
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
|
|||||||
from parso.utils import split_lines, python_bytes_to_unicode
|
from parso.utils import split_lines, python_bytes_to_unicode
|
||||||
|
|
||||||
|
|
||||||
__version__ = '0.2.0'
|
__version__ = '0.2.1'
|
||||||
|
|
||||||
|
|
||||||
def parse(code=None, **kwargs):
|
def parse(code=None, **kwargs):
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ class Grammar(object):
|
|||||||
"""
|
"""
|
||||||
:py:func:`parso.load_grammar` returns instances of this class.
|
:py:func:`parso.load_grammar` returns instances of this class.
|
||||||
|
|
||||||
Creating custom grammars by calling this is not supported, yet.
|
Creating custom none-python grammars by calling this is not supported, yet.
|
||||||
"""
|
"""
|
||||||
#:param text: A BNF representation of your grammar.
|
#:param text: A BNF representation of your grammar.
|
||||||
_error_normalizer_config = None
|
_error_normalizer_config = None
|
||||||
@@ -219,12 +219,13 @@ def load_grammar(**kwargs):
|
|||||||
version.
|
version.
|
||||||
|
|
||||||
:param str version: A python version string, e.g. ``version='3.3'``.
|
:param str version: A python version string, e.g. ``version='3.3'``.
|
||||||
|
:param str path: A path to a grammar file
|
||||||
"""
|
"""
|
||||||
def load_grammar(language='python', version=None):
|
def load_grammar(language='python', version=None, path=None):
|
||||||
if language == 'python':
|
if language == 'python':
|
||||||
version_info = parse_version_string(version)
|
version_info = parse_version_string(version)
|
||||||
|
|
||||||
file = os.path.join(
|
file = path or os.path.join(
|
||||||
'python',
|
'python',
|
||||||
'grammar%s%s.txt' % (version_info.major, version_info.minor)
|
'grammar%s%s.txt' % (version_info.major, version_info.minor)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -490,6 +490,9 @@ class _NodesStack(object):
|
|||||||
|
|
||||||
new_tos = tos
|
new_tos = tos
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
|
if node.start_pos[0] > until_line:
|
||||||
|
break
|
||||||
|
|
||||||
if node.type == 'endmarker':
|
if node.type == 'endmarker':
|
||||||
# We basically removed the endmarker, but we are not allowed to
|
# We basically removed the endmarker, but we are not allowed to
|
||||||
# remove the newline at the end of the line, otherwise it's
|
# remove the newline at the end of the line, otherwise it's
|
||||||
@@ -501,8 +504,6 @@ class _NodesStack(object):
|
|||||||
# Endmarkers just distort all the checks below. Remove them.
|
# Endmarkers just distort all the checks below. Remove them.
|
||||||
break
|
break
|
||||||
|
|
||||||
if node.start_pos[0] > until_line:
|
|
||||||
break
|
|
||||||
# TODO this check might take a bit of time for large files. We
|
# TODO this check might take a bit of time for large files. We
|
||||||
# might want to change this to do more intelligent guessing or
|
# might want to change this to do more intelligent guessing or
|
||||||
# binary search.
|
# binary search.
|
||||||
|
|||||||
@@ -28,7 +28,8 @@ from parso.utils import split_lines
|
|||||||
|
|
||||||
TokenCollection = namedtuple(
|
TokenCollection = namedtuple(
|
||||||
'TokenCollection',
|
'TokenCollection',
|
||||||
'pseudo_token single_quoted triple_quoted endpats fstring_pattern_map always_break_tokens',
|
'pseudo_token single_quoted triple_quoted endpats whitespace '
|
||||||
|
'fstring_pattern_map always_break_tokens',
|
||||||
)
|
)
|
||||||
|
|
||||||
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
|
||||||
@@ -114,6 +115,7 @@ def _create_token_collection(version_info):
|
|||||||
# Note: we use unicode matching for names ("\w") but ascii matching for
|
# Note: we use unicode matching for names ("\w") but ascii matching for
|
||||||
# number literals.
|
# number literals.
|
||||||
Whitespace = r'[ \f\t]*'
|
Whitespace = r'[ \f\t]*'
|
||||||
|
whitespace = _compile(Whitespace)
|
||||||
Comment = r'#[^\r\n]*'
|
Comment = r'#[^\r\n]*'
|
||||||
Name = r'\w+'
|
Name = r'\w+'
|
||||||
|
|
||||||
@@ -225,7 +227,7 @@ def _create_token_collection(version_info):
|
|||||||
pseudo_token_compiled = _compile(PseudoToken)
|
pseudo_token_compiled = _compile(PseudoToken)
|
||||||
return TokenCollection(
|
return TokenCollection(
|
||||||
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
pseudo_token_compiled, single_quoted, triple_quoted, endpats,
|
||||||
fstring_pattern_map, ALWAYS_BREAK_TOKENS
|
whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -244,7 +246,7 @@ class PythonToken(Token):
|
|||||||
return tok_name[self.type]
|
return tok_name[self.type]
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' %
|
return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
|
||||||
self._replace(type=self._get_type_name()))
|
self._replace(type=self._get_type_name()))
|
||||||
|
|
||||||
|
|
||||||
@@ -354,7 +356,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
token. This idea comes from lib2to3. The prefix contains all information
|
token. This idea comes from lib2to3. The prefix contains all information
|
||||||
that is irrelevant for the parser like newlines in parentheses or comments.
|
that is irrelevant for the parser like newlines in parentheses or comments.
|
||||||
"""
|
"""
|
||||||
pseudo_token, single_quoted, triple_quoted, endpats, fstring_pattern_map, always_break_tokens, = \
|
pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
|
||||||
|
fstring_pattern_map, always_break_tokens, = \
|
||||||
_get_token_collection(version_info)
|
_get_token_collection(version_info)
|
||||||
paren_level = 0 # count parentheses
|
paren_level = 0 # count parentheses
|
||||||
indents = [0]
|
indents = [0]
|
||||||
@@ -435,10 +438,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
|
|||||||
|
|
||||||
pseudomatch = pseudo_token.match(line, pos)
|
pseudomatch = pseudo_token.match(line, pos)
|
||||||
if not pseudomatch: # scan for tokens
|
if not pseudomatch: # scan for tokens
|
||||||
txt = line[pos:]
|
if line.endswith('\n'):
|
||||||
if txt.endswith('\n'):
|
|
||||||
new_line = True
|
new_line = True
|
||||||
yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix)
|
match = whitespace.match(line, pos)
|
||||||
|
pos = match.end()
|
||||||
|
yield PythonToken(
|
||||||
|
ERRORTOKEN, line[pos:], (lnum, pos),
|
||||||
|
additional_prefix + match.group(0)
|
||||||
|
)
|
||||||
additional_prefix = ''
|
additional_prefix = ''
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
@@ -60,7 +60,6 @@ _GET_DEFINITION_TYPES = set([
|
|||||||
_IMPORTS = set(['import_name', 'import_from'])
|
_IMPORTS = set(['import_name', 'import_from'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DocstringMixin(object):
|
class DocstringMixin(object):
|
||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
|
|
||||||
@@ -133,7 +132,6 @@ class PythonLeaf(PythonMixin, Leaf):
|
|||||||
return previous_leaf.end_pos
|
return previous_leaf.end_pos
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class _LeafWithoutNewlines(PythonLeaf):
|
class _LeafWithoutNewlines(PythonLeaf):
|
||||||
"""
|
"""
|
||||||
Simply here to optimize performance.
|
Simply here to optimize performance.
|
||||||
@@ -166,6 +164,10 @@ class EndMarker(_LeafWithoutNewlines):
|
|||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
type = 'endmarker'
|
type = 'endmarker'
|
||||||
|
|
||||||
|
@utf8_repr
|
||||||
|
def __repr__(self):
|
||||||
|
return "<%s: prefix=%s>" % (type(self).__name__, repr(self.prefix))
|
||||||
|
|
||||||
|
|
||||||
class Newline(PythonLeaf):
|
class Newline(PythonLeaf):
|
||||||
"""Contains NEWLINE and ENDMARKER tokens."""
|
"""Contains NEWLINE and ENDMARKER tokens."""
|
||||||
@@ -235,7 +237,6 @@ class Name(_LeafWithoutNewlines):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Literal(PythonLeaf):
|
class Literal(PythonLeaf):
|
||||||
__slots__ = ()
|
__slots__ = ()
|
||||||
|
|
||||||
@@ -653,6 +654,7 @@ class Function(ClassOrFunc):
|
|||||||
except IndexError:
|
except IndexError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
class Lambda(Function):
|
class Lambda(Function):
|
||||||
"""
|
"""
|
||||||
Lambdas are basically trimmed functions, so give it the same interface.
|
Lambdas are basically trimmed functions, so give it the same interface.
|
||||||
|
|||||||
@@ -502,3 +502,8 @@ def test_endmarker_newline(differ):
|
|||||||
|
|
||||||
differ.initialize(code1)
|
differ.initialize(code1)
|
||||||
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_newlines_at_end(differ):
|
||||||
|
differ.initialize('a\n\n')
|
||||||
|
differ.parse('a\n', copies=1)
|
||||||
|
|||||||
@@ -227,3 +227,11 @@ def test_endmarker_end_pos():
|
|||||||
def test_indentation(code, types):
|
def test_indentation(code, types):
|
||||||
actual_types = [t.type for t in _get_token_list(code)]
|
actual_types = [t.type for t in _get_token_list(code)]
|
||||||
assert actual_types == types + [ENDMARKER]
|
assert actual_types == types + [ENDMARKER]
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_string():
|
||||||
|
t1, endmarker = _get_token_list(' "\n')
|
||||||
|
assert t1.type == ERRORTOKEN
|
||||||
|
assert t1.prefix == ' '
|
||||||
|
assert t1.string == '"\n'
|
||||||
|
assert endmarker.string == ''
|
||||||
|
|||||||
Reference in New Issue
Block a user