Merged the tokenize is_identifier changes.

2015-02-01 20:32:01 +01:00
parent 9a0f1363e3 86391268a7
commit e913872192
3 changed files with 32 additions and 3 deletions
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -29,5 +29,6 @@ Syohei Yoshida (@syohex) <syohex@gmail.com>
 ppalucky (@ppalucky)
 immerrr (@immerrr) immerrr@gmail.com
 Albertas Agejevas (@alga)
 Savor d'Isavano (@KenetJervet) <newelevenken@163.com>
 Note: (@user) means a github user name.
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -16,12 +16,18 @@ import re
 from io import StringIO
 from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER,
                               NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
 from jedi._compatibility import is_py3
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 if is_py3:
    # Python 3 has str.isidentifier() to check if a char is a valid identifier
    is_identifier = str.isidentifier
 else:
    namechars = string.ascii_letters + '_'
    is_identifier = lambda s: s in namechars
 COMMENT = N_TOKENS
@@ -247,7 +253,7 @@ def generate_tokens(readline, line_offset=0):
                    break
                else:                                       # ordinary string
                    yield STRING, token, spos, prefix
-            elif initial in namechars:                      # ordinary name
+            elif is_identifier(initial):                      # ordinary name
                if token in ALWAYS_BREAK_TOKEN:
                    paren_level = 0
                    while True:
--- a/test/test_parser/test_tokenize.py
+++ b/test/test_parser/test_tokenize.py
@@ -1,9 +1,13 @@
 # -*- coding: utf-8    # This file contains Unicode characters.
 from io import StringIO
 from token import NEWLINE, STRING
-from jedi._compatibility import u
+from jedi._compatibility import u, is_py3
 from jedi.parser.token import NAME
 from jedi import parser
 from ..helpers import unittest
@@ -73,6 +77,24 @@ asdfasdf""" + "h"
            if value == 'if':
                self.assertEqual(prefix, '    ')
    def test_identifier_contains_unicode(self):
        fundef = u('''
 def 我あφ():
    pass
 ''')
        fundef_io = StringIO(fundef)
        if is_py3:
            tokens = parser.tokenize.generate_tokens(fundef_io.readline)
            token_list = list(tokens)
            identifier_token = next(
                (token for token in token_list if token[1] == '我あφ'),
                None
            )
            self.assertIsNotNone(identifier_token)
            self.assertEqual(identifier_token[0], NAME)
        else:
            pass
 def test_tokenizer_with_string_literal_backslash():
    import jedi