diff --git a/jedi/api/__init__.py b/jedi/api/__init__.py index 9ce7a8dc..42dd09a2 100644 --- a/jedi/api/__init__.py +++ b/jedi/api/__init__.py @@ -15,6 +15,7 @@ from itertools import chain from jedi._compatibility import next, unicode, builtins from jedi.parser import Parser +from jedi.parser.tokenize import source_tokens, NoErrorTokenizer from jedi.parser import representation as pr from jedi.parser.user_context import UserContext, UserContextParser from jedi import debug @@ -227,8 +228,9 @@ class Script(object): return scopes def _get_under_cursor_stmt(self, cursor_txt): - offset = self._pos[0] - 1, self._pos[1] - r = Parser(cursor_txt, no_docstr=True, offset=offset) + tokenizer = source_tokens(cursor_txt, self._pos[0] - 1) + tokenizer = NoErrorTokenizer(cursor_txt, self._pos[0] - 1) + r = Parser(cursor_txt, no_docstr=True, tokenizer=tokenizer) try: stmt = r.module.statements[0] except IndexError: diff --git a/jedi/parser/__init__.py b/jedi/parser/__init__.py index 85482ed2..3e38690b 100644 --- a/jedi/parser/__init__.py +++ b/jedi/parser/__init__.py @@ -17,7 +17,7 @@ complexity of the ``Parser`` (there's another parser sitting inside """ import keyword -from jedi._compatibility import next, StringIO +from jedi._compatibility import next from jedi import debug from jedi import common from jedi.parser import representation as pr @@ -35,23 +35,21 @@ class Parser(object): :param module_path: The path of the module in the file system, may be None. :type module_path: str :param no_docstr: If True, a string at the beginning is not a docstr. - :param is_fast_parser: -> for fast_parser :param top_module: Use this module as a parent instead of `self.module`. """ def __init__(self, source, module_path=None, no_docstr=False, - offset=(0, 0), is_fast_parser=None, top_module=None): + tokenizer=None, top_module=None, offset=0, is_fast=False): self.no_docstr = no_docstr - self.start_pos = self.end_pos = 1 + offset[0], offset[1] + self.start_pos = self.end_pos = 1 + offset, 0 # initialize global Scope self.module = pr.SubModule(module_path, self.start_pos, top_module) self._scope = self.module self._current = (None, None) - source = source + '\n' # end with \n, because the parser needs it - buf = StringIO(source) - self._gen = tokenize.NoErrorTokenizer(buf.readline, offset, is_fast_parser) - self.top_module = top_module or self.module + self._gen = tokenizer or tokenize.NoErrorTokenizer(source) + self._gen = tokenize.NoErrorTokenizer(source, offset, is_fast) + self._top_module = top_module or self.module try: self._parse() except (common.MultiLevelStopIteration, StopIteration): @@ -386,7 +384,7 @@ class Parser(object): as_names=as_names, names_are_set_vars=names_are_set_vars) - stmt.parent = self.top_module + stmt.parent = self._top_module self._check_user_stmt(stmt) if tok in always_break + not_first_break: @@ -455,9 +453,10 @@ class Parser(object): and not isinstance(self._scope, pr.SubModule): self._scope = self.module - use_as_parent_scope = self.top_module if isinstance( - self._scope, pr.SubModule - ) else self._scope + if isinstance(self._scope, pr.SubModule): + use_as_parent_scope = self._top_module + else: + use_as_parent_scope = self._scope first_pos = self.start_pos if tok == 'def': func = self._parse_function() @@ -630,7 +629,7 @@ class Parser(object): else: if token_type not in [tokenize.COMMENT, tokenize.INDENT, tokenize.NEWLINE, tokenize.NL]: - debug.warning('token not classified %s %s %s', tok, - token_type, self.start_pos[0]) + debug.warning('Token not used: %s %s %s', tok, + tokenize.tok_name[token_type], self.start_pos) continue self.no_docstr = False diff --git a/jedi/parser/fast.py b/jedi/parser/fast.py index a0f7a435..c3eee157 100644 --- a/jedi/parser/fast.py +++ b/jedi/parser/fast.py @@ -362,9 +362,10 @@ class FastParser(use_metaclass(CachedFastParser)): if nodes[index].code != code: raise ValueError() except ValueError: - p = Parser(parser_code, self.module_path, offset=(line_offset, 0), - is_fast_parser=True, top_module=self.module, - no_docstr=no_docstr) + tokenizer = tokenize.NoErrorTokenizer(parser_code, line_offset, True) + p = Parser(parser_code, self.module_path, tokenizer=tokenizer, + top_module=self.module, no_docstr=no_docstr, + is_fast=True, offset=line_offset) p.module.parent = self.module else: if nodes[index] != self.current_node: diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index 65da85d3..482678f9 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -11,6 +11,7 @@ from __future__ import absolute_import import string import re +from jedi._compatibility import StringIO from token import * import collections cookie_re = re.compile("coding[:=]\s*([-\w.]+)") @@ -142,7 +143,14 @@ del _compile tabsize = 8 +def source_tokens(source, line_offset=0): + source = source + '\n' # end with \n, because the parser needs it + readline = StringIO(source).readline + return generate_tokens(readline, line_offset) + + def generate_tokens(readline, line_offset=0): + """The original stdlib Python version with minor modifications""" lnum = line_offset parenlev = 0 continued = False @@ -291,9 +299,9 @@ FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally'] class NoErrorTokenizer(object): - def __init__(self, readline, offset=(0, 0), is_fast_parser=False): - self.readline = readline - self.gen = generate_tokens(readline, offset[0]) + def __init__(self, source, line_offset=0, is_fast_parser=False): + self.source = source + self.gen = source_tokens(source, line_offset) self.closed = False self.is_first = True self.push_backs = [] diff --git a/jedi/parser/user_context.py b/jedi/parser/user_context.py index d482a08a..904bf063 100644 --- a/jedi/parser/user_context.py +++ b/jedi/parser/user_context.py @@ -1,6 +1,5 @@ import re import os -import sys from jedi import cache from jedi.parser import tokenize @@ -64,45 +63,42 @@ class UserContext(object): level = 0 force_point = False last_type = None - try: - for token_type, tok, start, end in gen: - # print 'tok', token_type, tok, force_point - if last_type == token_type == tokenize.NAME: - string += ' ' + for token_type, tok, start, end in gen: + # print 'tok', token_type, tok, force_point + if last_type == token_type == tokenize.NAME: + string += ' ' - if level > 0: - if tok in close_brackets: - level += 1 - if tok in open_brackets: - level -= 1 - elif tok == '.': - force_point = False - elif force_point: - # it is reversed, therefore a number is getting recognized - # as a floating point number - if token_type == tokenize.NUMBER and tok[0] == '.': - force_point = False - else: - break - elif tok in close_brackets: + if level > 0: + if tok in close_brackets: level += 1 - elif token_type in [tokenize.NAME, tokenize.STRING]: - force_point = True - elif token_type == tokenize.NUMBER: - pass + if tok in open_brackets: + level -= 1 + elif tok == '.': + force_point = False + elif force_point: + # it is reversed, therefore a number is getting recognized + # as a floating point number + if token_type == tokenize.NUMBER and tok[0] == '.': + force_point = False else: - self._column_temp = self._line_length - end[1] break - - x = start_pos[0] - end[0] + 1 - l = self.get_line(x) - l = self._first_line if x == start_pos[0] else l - start_cursor = x, len(l) - end[1] + elif tok in close_brackets: + level += 1 + elif token_type in [tokenize.NAME, tokenize.STRING]: + force_point = True + elif token_type == tokenize.NUMBER: + pass + else: self._column_temp = self._line_length - end[1] - string += tok - last_type = token_type - except tokenize.TokenError: - debug.warning("Tokenize couldn't finish: %s", sys.exc_info) + break + + x = start_pos[0] - end[0] + 1 + l = self.get_line(x) + l = self._first_line if x == start_pos[0] else l + start_cursor = x, len(l) - end[1] + self._column_temp = self._line_length - end[1] + string += tok + last_type = token_type # string can still contain spaces at the end return string[::-1].strip(), start_cursor @@ -215,9 +211,9 @@ class UserContextParser(object): # or `abs( ` where the cursor is out in the whitespace. if self._user_context.get_path_under_cursor(): # We really should have a user_stmt, but the parser couldn't - # process it - probably a Syntax Error. - debug.warning('Something is probably wrong with the syntax under the cursor.') - return None + # process it - probably a Syntax Error (or in a comment). + debug.warning('No statement under the cursor.') + return pos = next(self._user_context.get_context(yield_positions=True)) user_stmt = self.module().get_statement_for_position(pos, include_imports=True) return user_stmt diff --git a/test/test_parser/test_token.py b/test/test_parser/test_token.py index 9b7b1a3f..3bbcbca4 100644 --- a/test/test_parser/test_token.py +++ b/test/test_parser/test_token.py @@ -12,7 +12,7 @@ class TokenTest(unittest.TestCase): def testit(): a = "huhu" ''') - tok = parsed.top_module.subscopes[0].statements[0].token_list[2] + tok = parsed.module.subscopes[0].statements[0].token_list[2] self.assertEqual(tok.end_pos, (3, 14)) def test_end_pos_multi_line(self): @@ -21,5 +21,5 @@ def testit(): a = """huhu asdfasdf""" + "h" ''') - tok = parsed.top_module.subscopes[0].statements[0].token_list[2] + tok = parsed.module.subscopes[0].statements[0].token_list[2] self.assertEqual(tok.end_pos, (4, 11))