diff --git a/parso/grammar.py b/parso/grammar.py
index 2bf8b10..87b57e6 100644
--- a/parso/grammar.py
+++ b/parso/grammar.py
@@ -96,7 +96,9 @@ class Grammar(object):
                 if old_lines == lines:
                     return module_node
 
-                new_node = self._diff_parser(self._pgen_grammar, module_node).update(
+                new_node = self._diff_parser(
+                    self._pgen_grammar, self._tokenizer, module_node
+                ).update(
                     old_lines=old_lines,
                     new_lines=lines
                 )
@@ -106,7 +108,11 @@ class Grammar(object):
 
         tokens = self._tokenizer(lines)
 
-        p = self._parser(self._pgen_grammar, error_recovery=error_recovery, start_symbol=start_symbol)
+        p = self._parser(
+            self._pgen_grammar,
+            error_recovery=error_recovery,
+            start_symbol=start_symbol
+        )
         root_node = p.parse(tokens=tokens)
 
         if cache or diff_cache:
@@ -120,6 +126,20 @@ class Grammar(object):
         return '<%s:%s>' % (self.__class__.__name__, txt)
 
 
+class PythonGrammar(Grammar):
+    def __init__(self, version_int, bnf_text):
+        super(PythonGrammar, self).__init__(
+            bnf_text,
+            tokenizer=self._tokenize_lines,
+            parser=PythonParser,
+            diff_parser=DiffParser
+        )
+        self._version_int = version_int
+
+    def _tokenize_lines(self, lines):
+        return tokenize_lines(lines, self._version_int)
+
+
 def load_grammar(version=None):
     """
     Loads a Python grammar. The default version is the current Python version.
@@ -147,12 +167,7 @@ def load_grammar(version=None):
             with open(path) as f:
                 bnf_text = f.read()
 
-            grammar = Grammar(
-                bnf_text,
-                tokenizer=tokenize_lines,
-                parser=PythonParser,
-                diff_parser=DiffParser
-            )
+            grammar = PythonGrammar(version_int, bnf_text)
             return _loaded_grammars.setdefault(path, grammar)
         except FileNotFoundError:
             message = "Python version %s is currently not supported." % version
diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py
index 60e4838..75bd2ea 100644
--- a/parso/pgen2/pgen.py
+++ b/parso/pgen2/pgen.py
@@ -13,7 +13,7 @@ from parso.python import tokenize
 class ParserGenerator(object):
     def __init__(self, bnf_text):
         self._bnf_text = bnf_text
-        self.generator = tokenize.tokenize(bnf_text)
+        self.generator = tokenize.tokenize(bnf_text, version_int=36)
         self._gettoken()  # Initialize lookahead
         self.dfas, self.startsymbol = self._parse()
         self.first = {}  # map from symbol name to set of tokens
diff --git a/parso/python/diff.py b/parso/python/diff.py
index a98ae79..6b98636 100644
--- a/parso/python/diff.py
+++ b/parso/python/diff.py
@@ -13,7 +13,7 @@ import logging
 from parso.utils import splitlines
 from parso.python.parser import Parser
 from parso.python.tree import EndMarker
-from parso.python.tokenize import (tokenize_lines, NEWLINE, TokenInfo,
+from parso.python.tokenize import (NEWLINE, TokenInfo,
                                    ENDMARKER, INDENT, DEDENT, ERRORTOKEN)
 
 
@@ -89,8 +89,9 @@ class DiffParser(object):
     An advanced form of parsing a file faster. Unfortunately comes with huge
     side effects. It changes the given module.
     """
-    def __init__(self, pgen_grammar, module):
+    def __init__(self, pgen_grammar, tokenizer, module):
         self._pgen_grammar = pgen_grammar
+        self._tokenizer = tokenizer
         self._module = module
 
     def _reset(self):
@@ -286,7 +287,7 @@ class DiffParser(object):
         is_first_token = True
         omitted_first_indent = False
         indents = []
-        tokens = tokenize_lines(lines)
+        tokens = self._tokenizer(lines)
         stack = self._active_parser.pgen_parser.stack
         for typ, string, start_pos, prefix in tokens:
             start_pos = start_pos[0] + line_offset, start_pos[1]
diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py
index 19a3a8f..43f4547 100644
--- a/parso/python/tokenize.py
+++ b/parso/python/tokenize.py
@@ -11,6 +11,7 @@ memory optimizations here.
 """
 from __future__ import absolute_import
 
+import sys
 import string
 import re
 from collections import namedtuple
@@ -19,12 +20,19 @@ from codecs import BOM_UTF8
 
 from parso.python.token import (tok_name, N_TOKENS, ENDMARKER, STRING, NUMBER, opmap,
                                 NAME, OP, ERRORTOKEN, NEWLINE, INDENT, DEDENT)
-from parso._compatibility import py_version, u
+from parso._compatibility import py_version
 from parso.utils import splitlines
 
 
+TokenCollection = namedtuple(
+    'TokenCollection',
+    'pseudo_token single_quoted triple_quoted endpats always_break_tokens',
+)
+
 BOM_UTF8_STRING = BOM_UTF8.decode('utf-8')
 
+_token_collection_cache = {}
+
 if py_version >= 30:
     # Python 3 has str.isidentifier() to check if a char is a valid identifier
     is_identifier = str.isidentifier
@@ -46,55 +54,24 @@ def group(*choices, **kwargs):
         start += '?:'
     return start + '|'.join(choices) + ')'
 
+
 def any(*choices):
     return group(*choices) + '*'
 
+
 def maybe(*choices):
     return group(*choices) + '?'
 
-# Note: we use unicode matching for names ("\w") but ascii matching for
-# number literals.
-Whitespace = r'[ \f\t]*'
-Comment = r'#[^\r\n]*'
-Name = r'\w+'
-
-if py_version >= 36:
-    Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
-    Binnumber = r'0[bB](?:_?[01])+'
-    Octnumber = r'0[oO](?:_?[0-7])+'
-    Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
-    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
-    Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
-    Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
-                       r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
-    Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
-    Floatnumber = group(Pointfloat, Expfloat)
-    Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
-else:
-    Hexnumber = r'0[xX][0-9a-fA-F]+'
-    Binnumber = r'0[bB][01]+'
-    if py_version >= 30:
-        Octnumber = r'0[oO][0-7]+'
-    else:
-        Octnumber = '0[0-7]+'
-    Decnumber = r'(?:0+|[1-9][0-9]*)'
-    Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
-    Exponent = r'[eE][-+]?[0-9]+'
-    Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
-    Expfloat = r'[0-9]+' + Exponent
-    Floatnumber = group(Pointfloat, Expfloat)
-    Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
-Number = group(Imagnumber, Floatnumber, Intnumber)
 
 # Return the empty string, plus all of the valid string prefixes.
-def _all_string_prefixes():
+def _all_string_prefixes(version_int):
     # The valid string prefixes. Only contain the lower case versions,
     #  and don't contain any permuations (include 'fr', but not
     #  'rf'). The various permutations will be generated.
     _valid_string_prefixes = ['b', 'r', 'u', 'br']
-    if py_version >= 36:
+    if version_int >= 36:
         _valid_string_prefixes += ['f', 'fr']
-    if py_version <= 27:
+    if version_int <= 27:
         # TODO this is actually not 100% valid. ur is valid in Python 2.7,
         # while ru is not.
         _valid_string_prefixes.append('ur')
@@ -109,70 +86,118 @@ def _all_string_prefixes():
                 result.add(''.join(s))
     return result
 
+
 def _compile(expr):
     return re.compile(expr, re.UNICODE)
 
-# Note that since _all_string_prefixes includes the empty string,
-#  StringPrefix can be the empty string (making it optional).
-StringPrefix = group(*_all_string_prefixes())
 
-# Tail end of ' string.
-Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
-# Tail end of " string.
-Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
-# Tail end of ''' string.
-Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
-# Tail end of """ string.
-Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-Triple = group(StringPrefix + "'''", StringPrefix + '"""')
-
-# Because of leftmost-then-longest match semantics, be sure to put the
-# longest operators first (e.g., if = came before ==, == would get
-# recognized as two instances of =).
-Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
-                 r"//=?", r"->",
-                 r"[+\-*/%&@|^=<>]=?",
-                 r"~")
-
-Bracket = '[][(){}]'
-Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
-Funny = group(Operator, Bracket, Special)
-
-PlainToken = group(Number, Funny, Name, capture=True)
-
-# First (or only) line of ' or " string.
-ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
-                group("'", r'\\\r?\n'),
-                StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
-                group('"', r'\\\r?\n'))
-PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
-PseudoToken = group(Whitespace, capture=True) + \
-    group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
-
-# For a given string prefix plus quotes, endpats maps it to a regex
-#  to match the remainder of that string. _prefix can be empty, for
-#  a normal single or triple quoted string (with no prefix).
-endpats = {}
-for _prefix in _all_string_prefixes():
-    endpats[_prefix + "'"] = _compile(Single)
-    endpats[_prefix + '"'] = _compile(Double)
-    endpats[_prefix + "'''"] = _compile(Single3)
-    endpats[_prefix + '"""'] = _compile(Double3)
-
-# A set of all of the single and triple quoted string prefixes,
-#  including the opening quotes.
-single_quoted = set()
-triple_quoted = set()
-for t in _all_string_prefixes():
-    for p in (t + '"', t + "'"):
-        single_quoted.add(p)
-    for p in (t + '"""', t + "'''"):
-        triple_quoted.add(p)
+def _get_token_collection(version_int):
+    try:
+        return _token_collection_cache[version_int]
+    except KeyError:
+        _token_collection_cache[version_int] = result = \
+            _create_token_collection(version_int)
+        return result
 
 
-ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
-                       'finally', 'while', 'with', 'return')
-pseudo_token_compiled = _compile(PseudoToken)
+def _create_token_collection(version_int):
+    # Note: we use unicode matching for names ("\w") but ascii matching for
+    # number literals.
+    Whitespace = r'[ \f\t]*'
+    Comment = r'#[^\r\n]*'
+    Name = r'\w+'
+
+    if version_int >= 36:
+        Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
+        Binnumber = r'0[bB](?:_?[01])+'
+        Octnumber = r'0[oO](?:_?[0-7])+'
+        Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
+        Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+        Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
+        Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
+                           r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
+        Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
+        Floatnumber = group(Pointfloat, Expfloat)
+        Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
+    else:
+        Hexnumber = r'0[xX][0-9a-fA-F]+'
+        Binnumber = r'0[bB][01]+'
+        if version_int >= 30:
+            Octnumber = r'0[oO][0-7]+'
+        else:
+            Octnumber = '0[0-7]+'
+        Decnumber = r'(?:0+|[1-9][0-9]*)'
+        Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
+        Exponent = r'[eE][-+]?[0-9]+'
+        Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
+        Expfloat = r'[0-9]+' + Exponent
+        Floatnumber = group(Pointfloat, Expfloat)
+        Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
+    Number = group(Imagnumber, Floatnumber, Intnumber)
+
+    # Note that since _all_string_prefixes includes the empty string,
+    #  StringPrefix can be the empty string (making it optional).
+    possible_prefixes = _all_string_prefixes(version_int)
+    StringPrefix = group(*possible_prefixes)
+
+    # Tail end of ' string.
+    Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+    # Tail end of " string.
+    Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+    # Tail end of ''' string.
+    Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+    # Tail end of """ string.
+    Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
+    Triple = group(StringPrefix + "'''", StringPrefix + '"""')
+
+    # Because of leftmost-then-longest match semantics, be sure to put the
+    # longest operators first (e.g., if = came before ==, == would get
+    # recognized as two instances of =).
+    Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
+                     r"//=?", r"->",
+                     r"[+\-*/%&@|^=<>]=?",
+                     r"~")
+
+    Bracket = '[][(){}]'
+    Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
+    Funny = group(Operator, Bracket, Special)
+
+    # First (or only) line of ' or " string.
+    ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+                    group("'", r'\\\r?\n'),
+                    StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                    group('"', r'\\\r?\n'))
+    PseudoExtras = group(r'\\\r?\n|\Z', Comment, Triple)
+    PseudoToken = group(Whitespace, capture=True) + \
+        group(PseudoExtras, Number, Funny, ContStr, Name, capture=True)
+
+    # For a given string prefix plus quotes, endpats maps it to a regex
+    #  to match the remainder of that string. _prefix can be empty, for
+    #  a normal single or triple quoted string (with no prefix).
+    endpats = {}
+    for _prefix in possible_prefixes:
+        endpats[_prefix + "'"] = _compile(Single)
+        endpats[_prefix + '"'] = _compile(Double)
+        endpats[_prefix + "'''"] = _compile(Single3)
+        endpats[_prefix + '"""'] = _compile(Double3)
+
+    # A set of all of the single and triple quoted string prefixes,
+    #  including the opening quotes.
+    single_quoted = set()
+    triple_quoted = set()
+    for t in possible_prefixes:
+        for p in (t + '"', t + "'"):
+            single_quoted.add(p)
+        for p in (t + '"""', t + "'''"):
+            triple_quoted.add(p)
+
+    ALWAYS_BREAK_TOKENS = (';', 'import', 'class', 'def', 'try', 'except',
+                           'finally', 'while', 'with', 'return')
+    pseudo_token_compiled = _compile(PseudoToken)
+    return TokenCollection(
+        pseudo_token_compiled, single_quoted, triple_quoted, endpats,
+        ALWAYS_BREAK_TOKENS
+    )
 
 
 class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
@@ -203,13 +228,13 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
             return self.start_pos[0], self.start_pos[1] + len(self.string)
 
 
-def tokenize(code):
+def tokenize(code, version_int):
     """Generate tokens from a the source code (string)."""
     lines = splitlines(code, keepends=True)
-    return tokenize_lines(lines)
+    return tokenize_lines(lines, version_int)
 
 
-def tokenize_lines(lines):
+def tokenize_lines(lines, version_int):
     """
     A heavily modified Python standard library tokenizer.
 
@@ -217,6 +242,8 @@ def tokenize_lines(lines):
     token. This idea comes from lib2to3. The prefix contains all information
     that is irrelevant for the parser like newlines in parentheses or comments.
     """
+    pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
+        _get_token_collection(version_int)
     paren_level = 0  # count parentheses
     indents = [0]
     max = 0
@@ -252,7 +279,7 @@ def tokenize_lines(lines):
                 continue
 
         while pos < max:
-            pseudomatch = pseudo_token_compiled.match(line, pos)
+            pseudomatch = pseudo_token.match(line, pos)
             if not pseudomatch:                             # scan for tokens
                 txt = line[pos:]
                 if txt.endswith('\n'):
@@ -329,7 +356,7 @@ def tokenize_lines(lines):
                 else:                                       # ordinary string
                     yield TokenInfo(STRING, token, spos, prefix)
             elif is_identifier(initial):                      # ordinary name
-                if token in ALWAYS_BREAK_TOKENS:
+                if token in always_break_tokens:
                     paren_level = 0
                     while True:
                         indent = indents.pop()
@@ -370,12 +397,16 @@ def tokenize_lines(lines):
 
 
 if __name__ == "__main__":
-    import sys
     if len(sys.argv) >= 2:
         path = sys.argv[1]
         with open(path) as f:
-            code = u(f.read())
+            code = f.read()
     else:
-        code = u(sys.stdin.read())
+        code = sys.stdin.read()
+
+    if isinstance(code, bytes):
+        from parso.utils import source_to_unicode
+        code = source_to_unicode(code)
+
     for token in tokenize(code):
         print(token)
diff --git a/parso/utils.py b/parso/utils.py
index 047643a..5d2abc0 100644
--- a/parso/utils.py
+++ b/parso/utils.py
@@ -108,7 +108,7 @@ def _parse_version(version):
     return int(major + minor)
 
 
-def version_string_to_int(version):
+def version_string_to_int(version=None):
     """
     Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
     returns a corresponding int that is always two characters long in decimal.
diff --git a/test/test_diff_parser.py b/test/test_diff_parser.py
index ceca69b..e0985fa 100644
--- a/test/test_diff_parser.py
+++ b/test/test_diff_parser.py
@@ -65,7 +65,11 @@ class Differ(object):
     def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
         logging.debug('differ: parse copies=%s parsers=%s', copies, parsers)
         lines = splitlines(code, keepends=True)
-        diff_parser = DiffParser(self.grammar._pgen_grammar, self.module)
+        diff_parser = DiffParser(
+            self.grammar._pgen_grammar,
+            self.grammar._tokenizer,
+            self.module,
+        )
         new_module = diff_parser.update(self.lines, lines)
         self.lines = lines
         assert code == new_module.get_code()
diff --git a/test/test_normalizer_issues_files.py b/test/test_normalizer_issues_files.py
index 8519187..b0672c7 100644
--- a/test/test_normalizer_issues_files.py
+++ b/test/test_normalizer_issues_files.py
@@ -5,7 +5,7 @@ tests of pydocstyle.
 
 import difflib
 import re
-from _compatibility import total_ordering
+from test._compatibility import total_ordering
 
 import parso
 from parso.utils import source_to_unicode
diff --git a/test/test_tokenize.py b/test/test_tokenize.py
index 9ae56e9..885dfff 100644
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -5,7 +5,7 @@ from textwrap import dedent
 import pytest
 
 from parso._compatibility import py_version
-from parso.utils import splitlines
+from parso.utils import splitlines, version_string_to_int
 from parso.python.token import (
     NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER)
 from parso.python import tokenize
@@ -14,7 +14,9 @@ from parso.python.tokenize import TokenInfo
 
 
 def _get_token_list(string):
-    return list(tokenize.tokenize(string))
+    # Load the current version.
+    version_int = version_string_to_int()
+    return list(tokenize.tokenize(string, version_int))
 
 
 def test_end_pos_one_line():
@@ -41,8 +43,7 @@ def test_end_pos_multi_line():
 def test_simple_no_whitespace():
     # Test a simple one line string, no preceding whitespace
     simple_docstring = '"""simple one line docstring"""'
-    tokens = tokenize.tokenize(simple_docstring)
-    token_list = list(tokens)
+    token_list = _get_token_list(simple_docstring)
     _, value, _, prefix = token_list[0]
     assert prefix == ''
     assert value == '"""simple one line docstring"""'
@@ -51,8 +52,7 @@ def test_simple_no_whitespace():
 def test_simple_with_whitespace():
     # Test a simple one line string with preceding whitespace and newline
     simple_docstring = '  """simple one line docstring""" \r\n'
-    tokens = tokenize.tokenize(simple_docstring)
-    token_list = list(tokens)
+    token_list = _get_token_list(simple_docstring)
     assert token_list[0][0] == INDENT
     typ, value, start_pos, prefix = token_list[1]
     assert prefix == '  '
@@ -71,8 +71,7 @@ def test_function_whitespace():
         if x > 0:
             print(True)
     ''')
-    tokens = tokenize.tokenize(fundef)
-    token_list = list(tokens)
+    token_list = _get_token_list(fundef)
     for _, value, _, prefix in token_list:
         if value == 'test_whitespace':
             assert prefix == ' '
@@ -92,8 +91,7 @@ def test_tokenize_multiline_I():
     # Make sure multiline string having newlines have the end marker on the
     # next line
     fundef = '''""""\n'''
-    tokens = tokenize.tokenize(fundef)
-    token_list = list(tokens)
+    token_list = _get_token_list(fundef)
     assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
                           TokenInfo(ENDMARKER ,       '', (2, 0), '')]
 
@@ -102,8 +100,7 @@ def test_tokenize_multiline_II():
     # Make sure multiline string having no newlines have the end marker on
     # same line
     fundef = '''""""'''
-    tokens = tokenize.tokenize(fundef)
-    token_list = list(tokens)
+    token_list = _get_token_list(fundef)
     assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
                           TokenInfo(ENDMARKER,      '', (1, 4), '')]
 
@@ -112,8 +109,7 @@ def test_tokenize_multiline_III():
     # Make sure multiline string having newlines have the end marker on the
     # next line even if several newline
     fundef = '''""""\n\n'''
-    tokens = tokenize.tokenize(fundef)
-    token_list = list(tokens)
+    token_list = _get_token_list(fundef)
     assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
                           TokenInfo(ENDMARKER,          '', (3, 0), '')]
 
@@ -123,8 +119,7 @@ def test_identifier_contains_unicode():
     def 我あφ():
         pass
     ''')
-    tokens = tokenize.tokenize(fundef)
-    token_list = list(tokens)
+    token_list = _get_token_list(fundef)
     unicode_token = token_list[1]
     if py_version >= 30:
         assert unicode_token[0] == NAME