9 Commits

Author SHA1 Message Date
Dave Halter
6f385bdba1 Not testing Python 3.3 anymore on travis. It seems to be broken 2018-05-21 12:55:49 +02:00
Dave Halter
4fc31c58b3 Add a changelog for 0.2.1 2018-05-21 12:48:31 +02:00
Dave Halter
689decc66c Push the version 2018-05-21 12:44:10 +02:00
Dave Halter
c2eacdb81c The diff parser was slighly off with prefixes, fixes #1121 2018-05-20 19:13:50 +02:00
Dave Halter
ac0bf4fcdd A better repr for the endmarker 2018-05-17 09:56:16 +02:00
Dave Halter
948f9ccecc Merge branch 'master' of github.com:davidhalter/parso 2018-04-23 23:42:11 +02:00
Dave Halter
f20106d88e Fix a prefix issue with error leafs. 2018-04-22 19:28:30 +02:00
Aaron Meurer
f4912f6c17 Use the correct field name in the PythonToken repr 2018-04-19 22:20:23 +02:00
Jonas Tranberg
bf5a4b7c2c Added path param to load_grammar for loading custom grammar files 2018-04-19 10:16:33 +02:00
9 changed files with 46 additions and 17 deletions

View File

@@ -3,7 +3,6 @@ sudo: false
python: python:
- 2.6 - 2.6
- 2.7 - 2.7
- 3.3
- 3.4 - 3.4
- 3.5 - 3.5
- 3.6 - 3.6

View File

@@ -3,6 +3,12 @@
Changelog Changelog
--------- ---------
0.2.1 (2018-05-21)
+++++++++++++++++++
- A bugfix for the diff parser.
- Grammar files can now be loaded from a specific path.
0.2.0 (2018-04-15) 0.2.0 (2018-04-15)
+++++++++++++++++++ +++++++++++++++++++

View File

@@ -43,7 +43,7 @@ from parso.grammar import Grammar, load_grammar
from parso.utils import split_lines, python_bytes_to_unicode from parso.utils import split_lines, python_bytes_to_unicode
__version__ = '0.2.0' __version__ = '0.2.1'
def parse(code=None, **kwargs): def parse(code=None, **kwargs):

View File

@@ -20,7 +20,7 @@ class Grammar(object):
""" """
:py:func:`parso.load_grammar` returns instances of this class. :py:func:`parso.load_grammar` returns instances of this class.
Creating custom grammars by calling this is not supported, yet. Creating custom none-python grammars by calling this is not supported, yet.
""" """
#:param text: A BNF representation of your grammar. #:param text: A BNF representation of your grammar.
_error_normalizer_config = None _error_normalizer_config = None
@@ -219,12 +219,13 @@ def load_grammar(**kwargs):
version. version.
:param str version: A python version string, e.g. ``version='3.3'``. :param str version: A python version string, e.g. ``version='3.3'``.
:param str path: A path to a grammar file
""" """
def load_grammar(language='python', version=None): def load_grammar(language='python', version=None, path=None):
if language == 'python': if language == 'python':
version_info = parse_version_string(version) version_info = parse_version_string(version)
file = os.path.join( file = path or os.path.join(
'python', 'python',
'grammar%s%s.txt' % (version_info.major, version_info.minor) 'grammar%s%s.txt' % (version_info.major, version_info.minor)
) )

View File

@@ -490,6 +490,9 @@ class _NodesStack(object):
new_tos = tos new_tos = tos
for node in nodes: for node in nodes:
if node.start_pos[0] > until_line:
break
if node.type == 'endmarker': if node.type == 'endmarker':
# We basically removed the endmarker, but we are not allowed to # We basically removed the endmarker, but we are not allowed to
# remove the newline at the end of the line, otherwise it's # remove the newline at the end of the line, otherwise it's
@@ -501,8 +504,6 @@ class _NodesStack(object):
# Endmarkers just distort all the checks below. Remove them. # Endmarkers just distort all the checks below. Remove them.
break break
if node.start_pos[0] > until_line:
break
# TODO this check might take a bit of time for large files. We # TODO this check might take a bit of time for large files. We
# might want to change this to do more intelligent guessing or # might want to change this to do more intelligent guessing or
# binary search. # binary search.

View File

@@ -28,7 +28,8 @@ from parso.utils import split_lines
TokenCollection = namedtuple( TokenCollection = namedtuple(
'TokenCollection', 'TokenCollection',
'pseudo_token single_quoted triple_quoted endpats fstring_pattern_map always_break_tokens', 'pseudo_token single_quoted triple_quoted endpats whitespace '
'fstring_pattern_map always_break_tokens',
) )
BOM_UTF8_STRING = BOM_UTF8.decode('utf-8') BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
@@ -114,6 +115,7 @@ def _create_token_collection(version_info):
# Note: we use unicode matching for names ("\w") but ascii matching for # Note: we use unicode matching for names ("\w") but ascii matching for
# number literals. # number literals.
Whitespace = r'[ \f\t]*' Whitespace = r'[ \f\t]*'
whitespace = _compile(Whitespace)
Comment = r'#[^\r\n]*' Comment = r'#[^\r\n]*'
Name = r'\w+' Name = r'\w+'
@@ -225,7 +227,7 @@ def _create_token_collection(version_info):
pseudo_token_compiled = _compile(PseudoToken) pseudo_token_compiled = _compile(PseudoToken)
return TokenCollection( return TokenCollection(
pseudo_token_compiled, single_quoted, triple_quoted, endpats, pseudo_token_compiled, single_quoted, triple_quoted, endpats,
fstring_pattern_map, ALWAYS_BREAK_TOKENS whitespace, fstring_pattern_map, ALWAYS_BREAK_TOKENS
) )
@@ -244,7 +246,7 @@ class PythonToken(Token):
return tok_name[self.type] return tok_name[self.type]
def __repr__(self): def __repr__(self):
return ('TokenInfo(type=%s, string=%r, start=%r, prefix=%r)' % return ('TokenInfo(type=%s, string=%r, start_pos=%r, prefix=%r)' %
self._replace(type=self._get_type_name())) self._replace(type=self._get_type_name()))
@@ -354,7 +356,8 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
token. This idea comes from lib2to3. The prefix contains all information token. This idea comes from lib2to3. The prefix contains all information
that is irrelevant for the parser like newlines in parentheses or comments. that is irrelevant for the parser like newlines in parentheses or comments.
""" """
pseudo_token, single_quoted, triple_quoted, endpats, fstring_pattern_map, always_break_tokens, = \ pseudo_token, single_quoted, triple_quoted, endpats, whitespace, \
fstring_pattern_map, always_break_tokens, = \
_get_token_collection(version_info) _get_token_collection(version_info)
paren_level = 0 # count parentheses paren_level = 0 # count parentheses
indents = [0] indents = [0]
@@ -435,10 +438,14 @@ def tokenize_lines(lines, version_info, start_pos=(1, 0)):
pseudomatch = pseudo_token.match(line, pos) pseudomatch = pseudo_token.match(line, pos)
if not pseudomatch: # scan for tokens if not pseudomatch: # scan for tokens
txt = line[pos:] if line.endswith('\n'):
if txt.endswith('\n'):
new_line = True new_line = True
yield PythonToken(ERRORTOKEN, txt, (lnum, pos), additional_prefix) match = whitespace.match(line, pos)
pos = match.end()
yield PythonToken(
ERRORTOKEN, line[pos:], (lnum, pos),
additional_prefix + match.group(0)
)
additional_prefix = '' additional_prefix = ''
break break

View File

@@ -60,7 +60,6 @@ _GET_DEFINITION_TYPES = set([
_IMPORTS = set(['import_name', 'import_from']) _IMPORTS = set(['import_name', 'import_from'])
class DocstringMixin(object): class DocstringMixin(object):
__slots__ = () __slots__ = ()
@@ -133,7 +132,6 @@ class PythonLeaf(PythonMixin, Leaf):
return previous_leaf.end_pos return previous_leaf.end_pos
class _LeafWithoutNewlines(PythonLeaf): class _LeafWithoutNewlines(PythonLeaf):
""" """
Simply here to optimize performance. Simply here to optimize performance.
@@ -166,6 +164,10 @@ class EndMarker(_LeafWithoutNewlines):
__slots__ = () __slots__ = ()
type = 'endmarker' type = 'endmarker'
@utf8_repr
def __repr__(self):
return "<%s: prefix=%s>" % (type(self).__name__, repr(self.prefix))
class Newline(PythonLeaf): class Newline(PythonLeaf):
"""Contains NEWLINE and ENDMARKER tokens.""" """Contains NEWLINE and ENDMARKER tokens."""
@@ -235,7 +237,6 @@ class Name(_LeafWithoutNewlines):
return None return None
class Literal(PythonLeaf): class Literal(PythonLeaf):
__slots__ = () __slots__ = ()
@@ -653,6 +654,7 @@ class Function(ClassOrFunc):
except IndexError: except IndexError:
return None return None
class Lambda(Function): class Lambda(Function):
""" """
Lambdas are basically trimmed functions, so give it the same interface. Lambdas are basically trimmed functions, so give it the same interface.

View File

@@ -502,3 +502,8 @@ def test_endmarker_newline(differ):
differ.initialize(code1) differ.initialize(code1)
differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True) differ.parse(code2, parsers=2, copies=2, expect_error_leaves=True)
def test_newlines_at_end(differ):
differ.initialize('a\n\n')
differ.parse('a\n', copies=1)

View File

@@ -227,3 +227,11 @@ def test_endmarker_end_pos():
def test_indentation(code, types): def test_indentation(code, types):
actual_types = [t.type for t in _get_token_list(code)] actual_types = [t.type for t in _get_token_list(code)]
assert actual_types == types + [ENDMARKER] assert actual_types == types + [ENDMARKER]
def test_error_string():
t1, endmarker = _get_token_list(' "\n')
assert t1.type == ERRORTOKEN
assert t1.prefix == ' '
assert t1.string == '"\n'
assert endmarker.string == ''