From c3c07c4ec2680c7deea5e2f900521b184c1d0f10 Mon Sep 17 00:00:00 2001 From: Savor d'Isavano Date: Fri, 16 Jan 2015 18:45:34 +0800 Subject: [PATCH] Fixed issue #526. --- AUTHORS.txt | 1 + jedi/parser/tokenize.py | 10 ++++++++-- test/test_parser/test_tokenize.py | 24 +++++++++++++++++++++++- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/AUTHORS.txt b/AUTHORS.txt index 376078eb..0fb69e6b 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -29,5 +29,6 @@ Syohei Yoshida (@syohex) ppalucky (@ppalucky) immerrr (@immerrr) immerrr@gmail.com Albertas Agejevas (@alga) +Savor d'Isavano (@KenetJervet) Note: (@user) means a github user name. diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index 6765f0b0..b82b31e8 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -16,6 +16,7 @@ import re from io import StringIO from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT) +from jedi._compatibility import is_py3 cookie_re = re.compile("coding[:=]\s*([-\w.]+)") @@ -26,7 +27,12 @@ cookie_re = re.compile("coding[:=]\s*([-\w.]+)") FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally'] -namechars = string.ascii_letters + '_' +if is_py3: + # Python 3 has str.isidentifier() to check if a char is a valid identifier + is_identifier = str.isidentifier +else: + namechars = string.ascii_letters + '_' + is_identifier = lambda s: s in namechars COMMENT = N_TOKENS @@ -248,7 +254,7 @@ def generate_tokens(readline, line_offset=0): break else: # ordinary string yield STRING, token, spos, prefix - elif initial in namechars: # ordinary name + elif is_identifier(initial): # ordinary name if token in ALWAYS_BREAK_TOKEN: paren_level = 0 while True: diff --git a/test/test_parser/test_tokenize.py b/test/test_parser/test_tokenize.py index 03686206..84e117be 100644 --- a/test/test_parser/test_tokenize.py +++ b/test/test_parser/test_tokenize.py @@ -1,9 +1,13 @@ +# -*- coding: utf-8 # This file contains Unicode characters. + from io import StringIO from token import NEWLINE, STRING -from jedi._compatibility import u +from jedi._compatibility import u, is_py3 +from jedi.parser.token import NAME from jedi import parser + from ..helpers import unittest @@ -73,6 +77,24 @@ asdfasdf""" + "h" if value == 'if': self.assertEqual(prefix, ' ') + def test_identifier_contains_unicode(self): + fundef = u(''' +def 我あφ(): + pass +''') + fundef_io = StringIO(fundef) + if is_py3: + tokens = parser.tokenize.generate_tokens(fundef_io.readline) + token_list = list(tokens) + identifier_token = next( + (token for token in token_list if token[1] == '我あφ'), + None + ) + self.assertIsNotNone(identifier_token) + self.assertEqual(identifier_token[0], NAME) + else: + pass + def test_tokenizer_with_string_literal_backslash(): import jedi