forked from VimPlug/jedi
Trying to move the indent/dedent logic back into the tokenizer.
This commit is contained in:
@@ -310,6 +310,7 @@ class Script(object):
|
|||||||
if not isinstance(stmt, (pr.ExprStmt, pr.KeywordStatement)):
|
if not isinstance(stmt, (pr.ExprStmt, pr.KeywordStatement)):
|
||||||
raise NotFoundError()
|
raise NotFoundError()
|
||||||
|
|
||||||
|
# TODO remove?
|
||||||
user_stmt = self._parser.user_stmt()
|
user_stmt = self._parser.user_stmt()
|
||||||
if user_stmt is None:
|
if user_stmt is None:
|
||||||
# Set the start_pos to a pseudo position, that doesn't exist but works
|
# Set the start_pos to a pseudo position, that doesn't exist but works
|
||||||
@@ -318,7 +319,7 @@ class Script(object):
|
|||||||
else:
|
else:
|
||||||
pos = user_stmt.start_pos
|
pos = user_stmt.start_pos
|
||||||
|
|
||||||
stmt.move(pos[0] - 1, pos[1])
|
#stmt.move(pos[0] - 1, pos[1])
|
||||||
stmt.parent = self._parser.user_scope()
|
stmt.parent = self._parser.user_scope()
|
||||||
return stmt
|
return stmt
|
||||||
|
|
||||||
|
|||||||
@@ -208,13 +208,13 @@ class Parser(object):
|
|||||||
"""
|
"""
|
||||||
# For now just discard everything that is not a suite or
|
# For now just discard everything that is not a suite or
|
||||||
# file_input, if we detect an error.
|
# file_input, if we detect an error.
|
||||||
for i, (dfa, state, (_type, _)) in reversed(list(enumerate(stack))):
|
for index, (dfa, state, (_type, _)) in reversed(list(enumerate(stack))):
|
||||||
# `suite` can sometimes be only simple_stmt, not stmt.
|
# `suite` can sometimes be only simple_stmt, not stmt.
|
||||||
symbol = grammar.number2symbol[_type]
|
symbol = grammar.number2symbol[_type]
|
||||||
if symbol in ('file_input', 'suite'):
|
if symbol in ('file_input', 'suite'):
|
||||||
index = i
|
|
||||||
break
|
break
|
||||||
# No success finding a transition
|
# No success finding a transition
|
||||||
|
print('err', tokenize.tok_name[typ], repr(value), start_pos, len(stack), index)
|
||||||
self._stack_removal(grammar, stack, index + 1, value, start_pos)
|
self._stack_removal(grammar, stack, index + 1, value, start_pos)
|
||||||
|
|
||||||
def _stack_removal(self, grammar, stack, start_index, value, start_pos):
|
def _stack_removal(self, grammar, stack, start_index, value, start_pos):
|
||||||
|
|||||||
@@ -96,7 +96,6 @@ class Parser(object):
|
|||||||
self.stack = [stackentry]
|
self.stack = [stackentry]
|
||||||
self.rootnode = None
|
self.rootnode = None
|
||||||
self.error_recovery = error_recovery
|
self.error_recovery = error_recovery
|
||||||
indent_errors = [] # TODO generate those.
|
|
||||||
|
|
||||||
def tokenize(self, tokenizer):
|
def tokenize(self, tokenizer):
|
||||||
"""
|
"""
|
||||||
@@ -104,20 +103,7 @@ class Parser(object):
|
|||||||
parse function a normal tokenizer (e.g. the lib2to3 one). But if we use
|
parse function a normal tokenizer (e.g. the lib2to3 one). But if we use
|
||||||
the parser stack we are able to do error recovery from wrong indents.
|
the parser stack we are able to do error recovery from wrong indents.
|
||||||
"""
|
"""
|
||||||
indents = [0]
|
|
||||||
new_line = False
|
|
||||||
for type, value, prefix, start_pos in tokenizer:
|
for type, value, prefix, start_pos in tokenizer:
|
||||||
if type == token.NEWLINE:
|
|
||||||
new_line = True
|
|
||||||
elif new_line:
|
|
||||||
indent = start_pos[1]
|
|
||||||
if indent > indents[-1]:
|
|
||||||
yield token.INDENT, '', '', start_pos
|
|
||||||
indents.append(indent)
|
|
||||||
while indent < indents[-1]:
|
|
||||||
yield token.DEDENT, '', '', start_pos
|
|
||||||
indents.pop()
|
|
||||||
new_line = False
|
|
||||||
yield type, value, prefix, start_pos
|
yield type, value, prefix, start_pos
|
||||||
|
|
||||||
def parse(self, tokenizer):
|
def parse(self, tokenizer):
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ import string
|
|||||||
import re
|
import re
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
|
from token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP,
|
||||||
ERRORTOKEN, NEWLINE)
|
ERRORTOKEN, NEWLINE, INDENT, DEDENT)
|
||||||
|
|
||||||
from jedi._compatibility import u
|
from jedi._compatibility import u
|
||||||
|
|
||||||
@@ -224,6 +224,7 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
The original stdlib Python version with minor modifications.
|
The original stdlib Python version with minor modifications.
|
||||||
Modified to not care about dedents.
|
Modified to not care about dedents.
|
||||||
"""
|
"""
|
||||||
|
indents = [0]
|
||||||
lnum = line_offset
|
lnum = line_offset
|
||||||
numchars = '0123456789'
|
numchars = '0123456789'
|
||||||
contstr = ''
|
contstr = ''
|
||||||
@@ -272,6 +273,12 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
|
|
||||||
if new_line and initial not in '\r\n#':
|
if new_line and initial not in '\r\n#':
|
||||||
new_line = False
|
new_line = False
|
||||||
|
if start > indents[-1]:
|
||||||
|
yield Token(INDENT, '', spos, '')
|
||||||
|
indents.append(start)
|
||||||
|
while start < indents[-1]:
|
||||||
|
yield Token(DEDENT, '', spos, '')
|
||||||
|
indents.pop()
|
||||||
|
|
||||||
if (initial in numchars or # ordinary number
|
if (initial in numchars or # ordinary number
|
||||||
(initial == '.' and token != '.' and token != '...')):
|
(initial == '.' and token != '.' and token != '...')):
|
||||||
@@ -314,4 +321,6 @@ def generate_tokens(readline, line_offset=0):
|
|||||||
else:
|
else:
|
||||||
yield Token(OP, token, spos, prefix)
|
yield Token(OP, token, spos, prefix)
|
||||||
|
|
||||||
|
for indent in indents[1:]:
|
||||||
|
yield Token(DEDENT, '', (lnum, 0), '')
|
||||||
yield Token(ENDMARKER, '', (lnum, 0), prefix)
|
yield Token(ENDMARKER, '', (lnum, 0), prefix)
|
||||||
|
|||||||
Reference in New Issue
Block a user