mirror of
https://github.com/davidhalter/parso.git
synced 2025-12-08 05:34:51 +08:00
Rename source_tokens to tokenize and generate_tokens to tokenize_lines.
This commit is contained in:
@@ -8,7 +8,7 @@ from parso.pgen2.pgen import generate_grammar
|
|||||||
from parso.utils import splitlines, source_to_unicode
|
from parso.utils import splitlines, source_to_unicode
|
||||||
from parso.python.parser import remove_last_newline
|
from parso.python.parser import remove_last_newline
|
||||||
from parso.python.diff import DiffParser
|
from parso.python.diff import DiffParser
|
||||||
from parso.tokenize import generate_tokens
|
from parso.tokenize import tokenize_lines
|
||||||
from parso.cache import parser_cache, load_module, save_module
|
from parso.cache import parser_cache, load_module, save_module
|
||||||
from parso.parser import BaseParser
|
from parso.parser import BaseParser
|
||||||
from parso.python.parser import Parser as PythonParser
|
from parso.python.parser import Parser as PythonParser
|
||||||
@@ -22,7 +22,7 @@ class Grammar(object):
|
|||||||
|
|
||||||
:param text: A BNF representation of your grammar.
|
:param text: A BNF representation of your grammar.
|
||||||
"""
|
"""
|
||||||
def __init__(self, text, parser=BaseParser, tokenizer=generate_tokens,
|
def __init__(self, text, tokenizer, parser=BaseParser,
|
||||||
diff_parser=None):
|
diff_parser=None):
|
||||||
self._pgen_grammar = generate_grammar(text)
|
self._pgen_grammar = generate_grammar(text)
|
||||||
self._parser = parser
|
self._parser = parser
|
||||||
@@ -188,7 +188,12 @@ def load_grammar(version=None):
|
|||||||
with open(path) as f:
|
with open(path) as f:
|
||||||
bnf_text = f.read()
|
bnf_text = f.read()
|
||||||
|
|
||||||
grammar = Grammar(bnf_text, parser=PythonParser, diff_parser=DiffParser)
|
grammar = Grammar(
|
||||||
|
bnf_text,
|
||||||
|
tokenizer=tokenize_lines,
|
||||||
|
parser=PythonParser,
|
||||||
|
diff_parser=DiffParser
|
||||||
|
)
|
||||||
return _loaded_grammars.setdefault(path, grammar)
|
return _loaded_grammars.setdefault(path, grammar)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
message = "Python version %s is currently not supported." % version
|
message = "Python version %s is currently not supported." % version
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from parso import tokenize
|
|||||||
class ParserGenerator(object):
|
class ParserGenerator(object):
|
||||||
def __init__(self, bnf_text):
|
def __init__(self, bnf_text):
|
||||||
self._bnf_text = bnf_text
|
self._bnf_text = bnf_text
|
||||||
self.generator = tokenize.source_tokens(bnf_text)
|
self.generator = tokenize.tokenize(bnf_text)
|
||||||
self._gettoken() # Initialize lookahead
|
self._gettoken() # Initialize lookahead
|
||||||
self.dfas, self.startsymbol = self._parse()
|
self.dfas, self.startsymbol = self._parse()
|
||||||
self.first = {} # map from symbol name to set of tokens
|
self.first = {} # map from symbol name to set of tokens
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import logging
|
|||||||
from parso.utils import splitlines
|
from parso.utils import splitlines
|
||||||
from parso.python.parser import Parser, remove_last_newline
|
from parso.python.parser import Parser, remove_last_newline
|
||||||
from parso.python.tree import EndMarker
|
from parso.python.tree import EndMarker
|
||||||
from parso.tokenize import (generate_tokens, NEWLINE, TokenInfo,
|
from parso.tokenize import (tokenize_lines, NEWLINE, TokenInfo,
|
||||||
ENDMARKER, INDENT, DEDENT)
|
ENDMARKER, INDENT, DEDENT)
|
||||||
|
|
||||||
|
|
||||||
@@ -308,7 +308,7 @@ class DiffParser(object):
|
|||||||
is_first_token = True
|
is_first_token = True
|
||||||
omitted_first_indent = False
|
omitted_first_indent = False
|
||||||
indents = []
|
indents = []
|
||||||
tokens = generate_tokens(lines)
|
tokens = tokenize_lines(lines)
|
||||||
stack = self._active_parser.pgen_parser.stack
|
stack = self._active_parser.pgen_parser.stack
|
||||||
for typ, string, start_pos, prefix in tokens:
|
for typ, string, start_pos, prefix in tokens:
|
||||||
start_pos = start_pos[0] + line_offset, start_pos[1]
|
start_pos = start_pos[0] + line_offset, start_pos[1]
|
||||||
|
|||||||
@@ -204,13 +204,13 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
|
|||||||
return self.start_pos[0], self.start_pos[1] + len(self.string)
|
return self.start_pos[0], self.start_pos[1] + len(self.string)
|
||||||
|
|
||||||
|
|
||||||
def source_tokens(source):
|
def tokenize(code):
|
||||||
"""Generate tokens from a the source code (string)."""
|
"""Generate tokens from a the source code (string)."""
|
||||||
lines = splitlines(source, keepends=True)
|
lines = splitlines(code, keepends=True)
|
||||||
return generate_tokens(lines)
|
return tokenize_lines(lines)
|
||||||
|
|
||||||
|
|
||||||
def generate_tokens(lines):
|
def tokenize_lines(lines):
|
||||||
"""
|
"""
|
||||||
A heavily modified Python standard library tokenizer.
|
A heavily modified Python standard library tokenizer.
|
||||||
|
|
||||||
@@ -361,5 +361,5 @@ if __name__ == "__main__":
|
|||||||
code = u(f.read())
|
code = u(f.read())
|
||||||
else:
|
else:
|
||||||
code = u(sys.stdin.read())
|
code = u(sys.stdin.read())
|
||||||
for token in source_tokens(code):
|
for token in tokenize(code):
|
||||||
print(token)
|
print(token)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from parso import parse
|
|||||||
from parso.tokenize import TokenInfo
|
from parso.tokenize import TokenInfo
|
||||||
|
|
||||||
def _get_token_list(string):
|
def _get_token_list(string):
|
||||||
return list(tokenize.source_tokens(string))
|
return list(tokenize.tokenize(string))
|
||||||
|
|
||||||
|
|
||||||
def test_end_pos_one_line():
|
def test_end_pos_one_line():
|
||||||
@@ -37,7 +37,7 @@ def test_end_pos_multi_line():
|
|||||||
def test_simple_no_whitespace():
|
def test_simple_no_whitespace():
|
||||||
# Test a simple one line string, no preceding whitespace
|
# Test a simple one line string, no preceding whitespace
|
||||||
simple_docstring = '"""simple one line docstring"""'
|
simple_docstring = '"""simple one line docstring"""'
|
||||||
tokens = tokenize.source_tokens(simple_docstring)
|
tokens = tokenize.tokenize(simple_docstring)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
_, value, _, prefix = token_list[0]
|
_, value, _, prefix = token_list[0]
|
||||||
assert prefix == ''
|
assert prefix == ''
|
||||||
@@ -47,7 +47,7 @@ def test_simple_no_whitespace():
|
|||||||
def test_simple_with_whitespace():
|
def test_simple_with_whitespace():
|
||||||
# Test a simple one line string with preceding whitespace and newline
|
# Test a simple one line string with preceding whitespace and newline
|
||||||
simple_docstring = ' """simple one line docstring""" \r\n'
|
simple_docstring = ' """simple one line docstring""" \r\n'
|
||||||
tokens = tokenize.source_tokens(simple_docstring)
|
tokens = tokenize.tokenize(simple_docstring)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
assert token_list[0][0] == INDENT
|
assert token_list[0][0] == INDENT
|
||||||
typ, value, start_pos, prefix = token_list[1]
|
typ, value, start_pos, prefix = token_list[1]
|
||||||
@@ -67,7 +67,7 @@ def test_function_whitespace():
|
|||||||
if x > 0:
|
if x > 0:
|
||||||
print(True)
|
print(True)
|
||||||
''')
|
''')
|
||||||
tokens = tokenize.source_tokens(fundef)
|
tokens = tokenize.tokenize(fundef)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
for _, value, _, prefix in token_list:
|
for _, value, _, prefix in token_list:
|
||||||
if value == 'test_whitespace':
|
if value == 'test_whitespace':
|
||||||
@@ -88,7 +88,7 @@ def test_tokenize_multiline_I():
|
|||||||
# Make sure multiline string having newlines have the end marker on the
|
# Make sure multiline string having newlines have the end marker on the
|
||||||
# next line
|
# next line
|
||||||
fundef = '''""""\n'''
|
fundef = '''""""\n'''
|
||||||
tokens = tokenize.source_tokens(fundef)
|
tokens = tokenize.tokenize(fundef)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
|
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n', (1, 0), ''),
|
||||||
TokenInfo(ENDMARKER , '', (2, 0), '')]
|
TokenInfo(ENDMARKER , '', (2, 0), '')]
|
||||||
@@ -98,7 +98,7 @@ def test_tokenize_multiline_II():
|
|||||||
# Make sure multiline string having no newlines have the end marker on
|
# Make sure multiline string having no newlines have the end marker on
|
||||||
# same line
|
# same line
|
||||||
fundef = '''""""'''
|
fundef = '''""""'''
|
||||||
tokens = tokenize.source_tokens(fundef)
|
tokens = tokenize.tokenize(fundef)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
|
assert token_list == [TokenInfo(ERRORTOKEN, '""""', (1, 0), ''),
|
||||||
TokenInfo(ENDMARKER, '', (1, 4), '')]
|
TokenInfo(ENDMARKER, '', (1, 4), '')]
|
||||||
@@ -108,7 +108,7 @@ def test_tokenize_multiline_III():
|
|||||||
# Make sure multiline string having newlines have the end marker on the
|
# Make sure multiline string having newlines have the end marker on the
|
||||||
# next line even if several newline
|
# next line even if several newline
|
||||||
fundef = '''""""\n\n'''
|
fundef = '''""""\n\n'''
|
||||||
tokens = tokenize.source_tokens(fundef)
|
tokens = tokenize.tokenize(fundef)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
|
assert token_list == [TokenInfo(ERRORTOKEN, '""""\n\n', (1, 0), ''),
|
||||||
TokenInfo(ENDMARKER, '', (3, 0), '')]
|
TokenInfo(ENDMARKER, '', (3, 0), '')]
|
||||||
@@ -119,7 +119,7 @@ def test_identifier_contains_unicode():
|
|||||||
def 我あφ():
|
def 我あφ():
|
||||||
pass
|
pass
|
||||||
''')
|
''')
|
||||||
tokens = tokenize.source_tokens(fundef)
|
tokens = tokenize.tokenize(fundef)
|
||||||
token_list = list(tokens)
|
token_list = list(tokens)
|
||||||
unicode_token = token_list[1]
|
unicode_token = token_list[1]
|
||||||
if py_version >= 30:
|
if py_version >= 30:
|
||||||
|
|||||||
Reference in New Issue
Block a user