From c3c07c4ec2680c7deea5e2f900521b184c1d0f10 Mon Sep 17 00:00:00 2001
From: Savor d'Isavano <anohig_isavay@163.com>
Date: Fri, 16 Jan 2015 18:45:34 +0800
Subject: [PATCH] Fixed issue #526.

---
 AUTHORS.txt                       |  1 +
 jedi/parser/tokenize.py           | 10 ++++++++--
 test/test_parser/test_tokenize.py | 24 +++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/AUTHORS.txt b/AUTHORS.txt
index 376078eb..0fb69e6b 100644
--- a/AUTHORS.txt
+++ b/AUTHORS.txt
@@ -29,5 +29,6 @@ Syohei Yoshida (@syohex) <syohex@gmail.com>
 ppalucky (@ppalucky)
 immerrr (@immerrr) immerrr@gmail.com
 Albertas Agejevas (@alga)
+Savor d'Isavano (@KenetJervet) <newelevenken@163.com>
 
 Note: (@user) means a github user name.
diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py
index 6765f0b0..b82b31e8 100644
--- a/jedi/parser/tokenize.py
+++ b/jedi/parser/tokenize.py
@@ -16,6 +16,7 @@ import re
 from io import StringIO
 from jedi.parser.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER,
                                NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
+from jedi._compatibility import is_py3
 
 
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
@@ -26,7 +27,12 @@ cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 FLOWS = ['if', 'else', 'elif', 'while', 'with', 'try', 'except', 'finally']
 
 
-namechars = string.ascii_letters + '_'
+if is_py3:
+    # Python 3 has str.isidentifier() to check if a char is a valid identifier
+    is_identifier = str.isidentifier
+else:
+    namechars = string.ascii_letters + '_'
+    is_identifier = lambda s: s in namechars
 
 
 COMMENT = N_TOKENS
@@ -248,7 +254,7 @@ def generate_tokens(readline, line_offset=0):
                     break
                 else:                                       # ordinary string
                     yield STRING, token, spos, prefix
-            elif initial in namechars:                      # ordinary name
+            elif is_identifier(initial):                      # ordinary name
                 if token in ALWAYS_BREAK_TOKEN:
                     paren_level = 0
                     while True:
diff --git a/test/test_parser/test_tokenize.py b/test/test_parser/test_tokenize.py
index 03686206..84e117be 100644
--- a/test/test_parser/test_tokenize.py
+++ b/test/test_parser/test_tokenize.py
@@ -1,9 +1,13 @@
+# -*- coding: utf-8    # This file contains Unicode characters.
+
 from io import StringIO
 from token import NEWLINE, STRING
 
-from jedi._compatibility import u
+from jedi._compatibility import u, is_py3
+from jedi.parser.token import NAME
 from jedi import parser
 
+
 from ..helpers import unittest
 
 
@@ -73,6 +77,24 @@ asdfasdf""" + "h"
             if value == 'if':
                 self.assertEqual(prefix, '    ')
 
+    def test_identifier_contains_unicode(self):
+        fundef = u('''
+def 我あφ():
+    pass
+''')
+        fundef_io = StringIO(fundef)
+        if is_py3:
+            tokens = parser.tokenize.generate_tokens(fundef_io.readline)
+            token_list = list(tokens)
+            identifier_token = next(
+                (token for token in token_list if token[1] == '我あφ'),
+                None
+            )
+            self.assertIsNotNone(identifier_token)
+            self.assertEqual(identifier_token[0], NAME)
+        else:
+            pass
+
 
 def test_tokenizer_with_string_literal_backslash():
     import jedi