1
0
forked from VimPlug/jedi

Trying to move the indent/dedent logic back into the tokenizer.

This commit is contained in:
Dave Halter
2014-11-28 02:04:04 +01:00
parent 97516eb26b
commit e1d6511f2f
4 changed files with 14 additions and 18 deletions

View File

@@ -310,6 +310,7 @@ class Script(object):
if not isinstance(stmt, (pr.ExprStmt, pr.KeywordStatement)): if not isinstance(stmt, (pr.ExprStmt, pr.KeywordStatement)):
raise NotFoundError() raise NotFoundError()
# TODO remove?
user_stmt = self._parser.user_stmt() user_stmt = self._parser.user_stmt()
if user_stmt is None: if user_stmt is None:
# Set the start_pos to a pseudo position, that doesn't exist but works # Set the start_pos to a pseudo position, that doesn't exist but works
@@ -318,7 +319,7 @@ class Script(object):
else: else:
pos = user_stmt.start_pos pos = user_stmt.start_pos
stmt.move(pos[0] - 1, pos[1]) #stmt.move(pos[0] - 1, pos[1])
stmt.parent = self._parser.user_scope() stmt.parent = self._parser.user_scope()
return stmt return stmt

View File

@@ -208,13 +208,13 @@ class Parser(object):
""" """
# For now just discard everything that is not a suite or # For now just discard everything that is not a suite or
# file_input, if we detect an error. # file_input, if we detect an error.
for i, (dfa, state, (_type, _)) in reversed(list(enumerate(stack))): for index, (dfa, state, (_type, _)) in reversed(list(enumerate(stack))):
# `suite` can sometimes be only simple_stmt, not stmt. # `suite` can sometimes be only simple_stmt, not stmt.
symbol = grammar.number2symbol[_type] symbol = grammar.number2symbol[_type]
if symbol in ('file_input', 'suite'): if symbol in ('file_input', 'suite'):
index = i
break break
# No success finding a transition # No success finding a transition
print('err', tokenize.tok_name[typ], repr(value), start_pos, len(stack), index)
self._stack_removal(grammar, stack, index + 1, value, start_pos) self._stack_removal(grammar, stack, index + 1, value, start_pos)
def _stack_removal(self, grammar, stack, start_index, value, start_pos): def _stack_removal(self, grammar, stack, start_index, value, start_pos):

View File

@@ -96,7 +96,6 @@ class Parser(object):
self.stack = [stackentry] self.stack = [stackentry]
self.rootnode = None self.rootnode = None
self.error_recovery = error_recovery self.error_recovery = error_recovery
indent_errors = [] # TODO generate those.
def tokenize(self, tokenizer): def tokenize(self, tokenizer):
""" """
@@ -104,20 +103,7 @@ class Parser(object):
parse function a normal tokenizer (e.g. the lib2to3 one). But if we use parse function a normal tokenizer (e.g. the lib2to3 one). But if we use
the parser stack we are able to do error recovery from wrong indents. the parser stack we are able to do error recovery from wrong indents.
""" """
indents = [0]
new_line = False
for type, value, prefix, start_pos in tokenizer: for type, value, prefix, start_pos in tokenizer:
if type == token.NEWLINE:
new_line = True
elif new_line:
indent = start_pos[1]
if indent > indents[-1]:
yield token.INDENT, '', '', start_pos
indents.append(indent)
while indent < indents[-1]:
yield token.DEDENT, '', '', start_pos
indents.pop()
new_line = False
yield type, value, prefix, start_pos yield type, value, prefix, start_pos
def parse(self, tokenizer): def parse(self, tokenizer):

View File

@@ -15,7 +15,7 @@ import string
import re import re
from io import StringIO from io import StringIO
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP, from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
ERRORTOKEN, NEWLINE) ERRORTOKEN, NEWLINE, INDENT, DEDENT)
from jedi._compatibility import u from jedi._compatibility import u
@@ -224,6 +224,7 @@ def generate_tokens(readline, line_offset=0):
The original stdlib Python version with minor modifications. The original stdlib Python version with minor modifications.
Modified to not care about dedents. Modified to not care about dedents.
""" """
indents = [0]
lnum = line_offset lnum = line_offset
numchars = '0123456789' numchars = '0123456789'
contstr = '' contstr = ''
@@ -272,6 +273,12 @@ def generate_tokens(readline, line_offset=0):
if new_line and initial not in '\r\n#': if new_line and initial not in '\r\n#':
new_line = False new_line = False
if start > indents[-1]:
yield Token(INDENT, '', spos, '')
indents.append(start)
while start < indents[-1]:
yield Token(DEDENT, '', spos, '')
indents.pop()
if (initial in numchars or # ordinary number if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')): (initial == '.' and token != '.' and token != '...')):
@@ -314,4 +321,6 @@ def generate_tokens(readline, line_offset=0):
else: else:
yield Token(OP, token, spos, prefix) yield Token(OP, token, spos, prefix)
for indent in indents[1:]:
yield Token(DEDENT, '', (lnum, 0), '')
yield Token(ENDMARKER, '', (lnum, 0), prefix) yield Token(ENDMARKER, '', (lnum, 0), prefix)