diff --git a/parso/normalizer.py b/parso/normalizer.py index 83db7a3..c5d843f 100644 --- a/parso/normalizer.py +++ b/parso/normalizer.py @@ -9,6 +9,11 @@ class Normalizer(object): >>> class MyRule(Rule): >>> error_code = 42 """ + try: + rules = cls.rules + except AttributeError: + rules = cls.rules = [] + rules.append(rule) return rule def normalize(self, leaf): diff --git a/parso/python/normalizer.py b/parso/python/normalizer.py index 9857d9b..647b1ce 100644 --- a/parso/python/normalizer.py +++ b/parso/python/normalizer.py @@ -18,3 +18,7 @@ class PEP8Normalizer(Normalizer): def iter_errors(self, leaf): return iter([]) + + +class Rule(): + pass diff --git a/parso/python/prefix.py b/parso/python/prefix.py new file mode 100644 index 0000000..f6d3142 --- /dev/null +++ b/parso/python/prefix.py @@ -0,0 +1,50 @@ +import re + +from parso.tokenize import group + + +class PrefixToken(object): + def __init__(self, value, start_pos): + self.value = value + self.start_pos = start_pos + + @property + def end_pos(self): + if '\n' in self.value: + return self.start_pos[0] + 1, 0 + return self.end_pos[1] + + +_comment = r'#[^\n\r\f]*' +_backslash = r'\\\r?\n?' +_whitespace = r' +' +_newline = r'\r?\n' +_form_feed = r'\f' + +_regex = group(_comment, _backslash, _whitespace, _newline, _form_feed) +_regex = re.compile(_regex) + + +_types = { + ' ': 'spaces', + '#': 'comment', + '\\': 'backslash', + '\f': 'form_feed', + '\n': 'newline', + '\r': 'newline' +} + + +def split_prefix(prefix, start_pos): + line, column = start_pos + start = 0 + while start != len(prefix): + match =_regex.match(prefix, start) + value = match.group(0) + yield PrefixToken(value, (line, column + start)) + + start = match.end(0) + print(start) + if '\n' in value: + line += 1 + column = -start diff --git a/parso/python/tree.py b/parso/python/tree.py index dbf6855..9d6fa50 100644 --- a/parso/python/tree.py +++ b/parso/python/tree.py @@ -29,6 +29,7 @@ from parso._compatibility import utf8_repr, unicode from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ search_ancestor from parso.python import normalizer +from parso.python.prefix import split_prefix class DocstringMixin(object): @@ -99,6 +100,9 @@ class PythonMixin(object): class PythonLeaf(PythonMixin, Leaf): __slots__ = () + def _split_prefix(self): + return split_prefix(self.prefix, self.start_pos) + class _LeafWithoutNewlines(PythonLeaf): """ diff --git a/test/test_prefix.py b/test/test_prefix.py new file mode 100644 index 0000000..c0883b5 --- /dev/null +++ b/test/test_prefix.py @@ -0,0 +1,24 @@ +import pytest +import parso + + +@pytest.mark.parametrize(('string', 'tokens'), [ + ('#', ['#']), + (' # ', [' ', '# ']), + (' # \n', [' ', '# ', '\n']), + (' # \f\n', [' ', '# ', '\f', '\n']), + (' \n', [' ', '\n']), + (' \n ', [' ', '\n', ' ']), + (' \f ', [' ', '\f', ' ']), + (' \f ', [' ', '\f', ' ']), + (' \r\n', [' ', '\r\n']), + ('\\', ['\\']), + ('\\\n', ['\\\n']), + ('\\\r\n', ['\\\r\n']), +]) +def test_simple_prefix_splitting(string, tokens): + tree = parso.parse(string) + leaf = tree.children[0] + assert leaf.type == 'endmarker' + parsed_tokens = list(leaf._split_prefix()) + assert [t.value for t in parsed_tokens] == tokens