Refactor the version info to use a tuple, always.

This commit is contained in:
Dave Halter
2017-07-19 09:09:33 +02:00
parent dc3b3158eb
commit 78c371f73a
7 changed files with 74 additions and 45 deletions

View File

@@ -3,7 +3,7 @@ import os
from parso._compatibility import FileNotFoundError from parso._compatibility import FileNotFoundError
from parso.pgen2.pgen import generate_grammar from parso.pgen2.pgen import generate_grammar
from parso.utils import splitlines, source_to_unicode, version_string_to_int from parso.utils import splitlines, source_to_unicode, parse_version_string
from parso.python.diff import DiffParser from parso.python.diff import DiffParser
from parso.python.tokenize import tokenize_lines, tokenize from parso.python.tokenize import tokenize_lines, tokenize
from parso.cache import parser_cache, load_module, save_module from parso.cache import parser_cache, load_module, save_module
@@ -127,14 +127,14 @@ class Grammar(object):
class PythonGrammar(Grammar): class PythonGrammar(Grammar):
def __init__(self, version_int, bnf_text): def __init__(self, version_info, bnf_text):
super(PythonGrammar, self).__init__( super(PythonGrammar, self).__init__(
bnf_text, bnf_text,
tokenizer=self._tokenize_lines, tokenizer=self._tokenize_lines,
parser=PythonParser, parser=PythonParser,
diff_parser=DiffParser diff_parser=DiffParser
) )
self._version_int = version_int self._version_int = version_info
def _tokenize_lines(self, lines): def _tokenize_lines(self, lines):
return tokenize_lines(lines, self._version_int) return tokenize_lines(lines, self._version_int)
@@ -152,16 +152,16 @@ def load_grammar(**kwargs):
`version='3.3'`. `version='3.3'`.
""" """
def load_grammar(version=None): def load_grammar(version=None):
version_int = version_string_to_int(version) version_info = parse_version_string(version)
# For these versions we use the same grammar files, because nothing # For these versions we use the same grammar files, because nothing
# changed. # changed.
if version_int == 33: if version_info == (3, 3):
version_int = 34 version_info = parse_version_string('3.4')
elif version_int == 26: elif version_info == (2, 6):
version_int = 27 version_info = parse_version_string('2.7')
file = 'python/grammar' + str(version_int) + '.txt' file = 'python/grammar%s%s.txt' % (version_info.major, version_info.minor)
global _loaded_grammars global _loaded_grammars
path = os.path.join(os.path.dirname(__file__), file) path = os.path.join(os.path.dirname(__file__), file)
@@ -172,7 +172,7 @@ def load_grammar(**kwargs):
with open(path) as f: with open(path) as f:
bnf_text = f.read() bnf_text = f.read()
grammar = PythonGrammar(version_int, bnf_text) grammar = PythonGrammar(version_info, bnf_text)
return _loaded_grammars.setdefault(path, grammar) return _loaded_grammars.setdefault(path, grammar)
except FileNotFoundError: except FileNotFoundError:
message = "Python version %s is currently not supported." % version message = "Python version %s is currently not supported." % version

View File

@@ -8,12 +8,16 @@
from parso.pgen2 import grammar from parso.pgen2 import grammar
from parso.python import token from parso.python import token
from parso.python import tokenize from parso.python import tokenize
from parso.utils import parse_version_string
class ParserGenerator(object): class ParserGenerator(object):
def __init__(self, bnf_text): def __init__(self, bnf_text):
self._bnf_text = bnf_text self._bnf_text = bnf_text
self.generator = tokenize.tokenize(bnf_text, version_int=36) self.generator = tokenize.tokenize(
bnf_text,
version_info=parse_version_string('3.6')
)
self._gettoken() # Initialize lookahead self._gettoken() # Initialize lookahead
self.dfas, self.startsymbol = self._parse() self.dfas, self.startsymbol = self._parse()
self.first = {} # map from symbol name to set of tokens self.first = {} # map from symbol name to set of tokens

View File

@@ -64,14 +64,14 @@ def maybe(*choices):
# Return the empty string, plus all of the valid string prefixes. # Return the empty string, plus all of the valid string prefixes.
def _all_string_prefixes(version_int): def _all_string_prefixes(version_info):
# The valid string prefixes. Only contain the lower case versions, # The valid string prefixes. Only contain the lower case versions,
# and don't contain any permuations (include 'fr', but not # and don't contain any permuations (include 'fr', but not
# 'rf'). The various permutations will be generated. # 'rf'). The various permutations will be generated.
_valid_string_prefixes = ['b', 'r', 'u', 'br'] _valid_string_prefixes = ['b', 'r', 'u', 'br']
if version_int >= 36: if version_info >= (3, 6):
_valid_string_prefixes += ['f', 'fr'] _valid_string_prefixes += ['f', 'fr']
if version_int <= 27: if version_info <= (2, 7):
# TODO this is actually not 100% valid. ur is valid in Python 2.7, # TODO this is actually not 100% valid. ur is valid in Python 2.7,
# while ru is not. # while ru is not.
_valid_string_prefixes.append('ur') _valid_string_prefixes.append('ur')
@@ -91,23 +91,23 @@ def _compile(expr):
return re.compile(expr, re.UNICODE) return re.compile(expr, re.UNICODE)
def _get_token_collection(version_int): def _get_token_collection(version_info):
try: try:
return _token_collection_cache[version_int] return _token_collection_cache[tuple(version_info)]
except KeyError: except KeyError:
_token_collection_cache[version_int] = result = \ _token_collection_cache[tuple(version_info)] = result = \
_create_token_collection(version_int) _create_token_collection(version_info)
return result return result
def _create_token_collection(version_int): def _create_token_collection(version_info):
# Note: we use unicode matching for names ("\w") but ascii matching for # Note: we use unicode matching for names ("\w") but ascii matching for
# number literals. # number literals.
Whitespace = r'[ \f\t]*' Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*' Comment = r'#[^\r\n]*'
Name = r'\w+' Name = r'\w+'
if version_int >= 36: if version_info >= (3, 6):
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
Binnumber = r'0[bB](?:_?[01])+' Binnumber = r'0[bB](?:_?[01])+'
Octnumber = r'0[oO](?:_?[0-7])+' Octnumber = r'0[oO](?:_?[0-7])+'
@@ -122,7 +122,7 @@ def _create_token_collection(version_int):
else: else:
Hexnumber = r'0[xX][0-9a-fA-F]+' Hexnumber = r'0[xX][0-9a-fA-F]+'
Binnumber = r'0[bB][01]+' Binnumber = r'0[bB][01]+'
if version_int >= 30: if version_info >= (3, 0):
Octnumber = r'0[oO][0-7]+' Octnumber = r'0[oO][0-7]+'
else: else:
Octnumber = '0[oO]?[0-7]+' Octnumber = '0[oO]?[0-7]+'
@@ -137,7 +137,7 @@ def _create_token_collection(version_int):
# Note that since _all_string_prefixes includes the empty string, # Note that since _all_string_prefixes includes the empty string,
# StringPrefix can be the empty string (making it optional). # StringPrefix can be the empty string (making it optional).
possible_prefixes = _all_string_prefixes(version_int) possible_prefixes = _all_string_prefixes(version_info)
StringPrefix = group(*possible_prefixes) StringPrefix = group(*possible_prefixes)
# Tail end of ' string. # Tail end of ' string.
@@ -161,7 +161,7 @@ def _create_token_collection(version_int):
Bracket = '[][(){}]' Bracket = '[][(){}]'
special_args = [r'\r?\n', r'[:;.,@]'] special_args = [r'\r?\n', r'[:;.,@]']
if version_int >= 30: if version_info >= (3, 0):
special_args.insert(0, r'\.\.\.') special_args.insert(0, r'\.\.\.')
Special = group(*special_args) Special = group(*special_args)
@@ -233,13 +233,13 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])):
return self.start_pos[0], self.start_pos[1] + len(self.string) return self.start_pos[0], self.start_pos[1] + len(self.string)
def tokenize(code, version_int): def tokenize(code, version_info):
"""Generate tokens from a the source code (string).""" """Generate tokens from a the source code (string)."""
lines = splitlines(code, keepends=True) lines = splitlines(code, keepends=True)
return tokenize_lines(lines, version_int) return tokenize_lines(lines, version_info)
def tokenize_lines(lines, version_int): def tokenize_lines(lines, version_info):
""" """
A heavily modified Python standard library tokenizer. A heavily modified Python standard library tokenizer.
@@ -248,7 +248,7 @@ def tokenize_lines(lines, version_int):
that is irrelevant for the parser like newlines in parentheses or comments. that is irrelevant for the parser like newlines in parentheses or comments.
""" """
pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \ pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \
_get_token_collection(version_int) _get_token_collection(version_info)
paren_level = 0 # count parentheses paren_level = 0 # count parentheses
indents = [0] indents = [0]
max = 0 max = 0

View File

@@ -3,7 +3,10 @@ import re
import sys import sys
from ast import literal_eval from ast import literal_eval
from parso._compatibility import unicode from parso._compatibility import unicode, total_ordering
Version = namedtuple('Version', 'major, minor, micro')
def splitlines(string, keepends=False): def splitlines(string, keepends=False):
@@ -82,7 +85,6 @@ def version_info():
Returns a namedtuple of parso's version, similar to Python's Returns a namedtuple of parso's version, similar to Python's
``sys.version_info``. ``sys.version_info``.
""" """
Version = namedtuple('Version', 'major, minor, micro')
from parso import __version__ from parso import __version__
tupl = re.findall(r'[a-z]+|\d+', __version__) tupl = re.findall(r'[a-z]+|\d+', __version__)
return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)])
@@ -94,24 +96,48 @@ def _parse_version(version):
raise ValueError('The given version is not in the right format. ' raise ValueError('The given version is not in the right format. '
'Use something like "3.2" or "3".') 'Use something like "3.2" or "3".')
major = match.group(1) major = int(match.group(1))
minor = match.group(2) minor = match.group(2)
if minor is None: if minor is None:
# Use the latest Python in case it's not exactly defined, because the # Use the latest Python in case it's not exactly defined, because the
# grammars are typically backwards compatible? # grammars are typically backwards compatible?
if major == "2": if major == 2:
minor = "7" minor = "7"
elif major == "3": elif major == 3:
minor = "6" minor = "6"
else: else:
raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.")
return int(major + minor) minor = int(minor)
return PythonVersionInfo(major, minor)
def version_string_to_int(version=None): @total_ordering
class PythonVersionInfo(namedtuple('Version', 'major, minor')):
def __gt__(self, other):
if isinstance(other, tuple):
if len(other) != 2:
raise ValueError("Can only compare to tuples of length 2.")
return (self.major, self.minor) > other
super(PythonVersionInfo, self).__gt__(other)
return (self.major, self.minor)
def __eq__(self, other):
if isinstance(other, tuple):
if len(other) != 2:
raise ValueError("Can only compare to tuples of length 2.")
return (self.major, self.minor) == other
super(PythonVersionInfo, self).__eq__(other)
def __ne__(self, other):
return not self.__eq__(other)
def parse_version_string(version=None):
""" """
Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and
returns a corresponding int that is always two characters long in decimal. returns a corresponding version info that is always two characters long in
decimal.
""" """
if version is None: if version is None:
version = '%s.%s' % sys.version_info[:2] version = '%s.%s' % sys.version_info[:2]

View File

@@ -14,7 +14,7 @@ def test_load_inexisting_grammar():
@pytest.mark.parametrize(('string', 'result'), [ @pytest.mark.parametrize(('string', 'result'), [
('2', 27), ('3', 36), ('1.1', 11), ('1.1.1', 11), ('300.1.31', 3001) ('2', (2, 7)), ('3', (3, 6)), ('1.1', (1, 1)), ('1.1.1', (1, 1)), ('300.1.31', (300, 1))
]) ])
def test_parse_version(string, result): def test_parse_version(string, result):
assert utils._parse_version(string) == result assert utils._parse_version(string) == result

View File

@@ -10,10 +10,9 @@ from textwrap import dedent
import pytest import pytest
from parso._compatibility import py_version
from parso import load_grammar from parso import load_grammar
from parso import ParserSyntaxError from parso import ParserSyntaxError
from parso.utils import version_string_to_int from parso.utils import parse_version_string
class Checker(): class Checker():
@@ -32,8 +31,8 @@ def works_in_py2(each_version):
@pytest.fixture @pytest.fixture
def works_ge_py3(each_version): def works_ge_py3(each_version):
version_int = version_string_to_int(each_version) version_info = parse_version_string(each_version)
return Checker(each_version, version_int >= 30) return Checker(each_version, version_info >= (3, 0))
@pytest.fixture @pytest.fixture
@@ -41,8 +40,8 @@ def works_ge_py35(each_version):
""" """
Works only greater equal Python 3.3. Works only greater equal Python 3.3.
""" """
version_int = version_string_to_int(each_version) version_info = parse_version_string(each_version)
return Checker(each_version, version_int >= 35) return Checker(each_version, version_info >= (3, 5))
def _parse(code, version=None): def _parse(code, version=None):

View File

@@ -5,7 +5,7 @@ from textwrap import dedent
import pytest import pytest
from parso._compatibility import py_version from parso._compatibility import py_version
from parso.utils import splitlines, version_string_to_int from parso.utils import splitlines, parse_version_string
from parso.python.token import ( from parso.python.token import (
NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER) NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER)
from parso.python import tokenize from parso.python import tokenize
@@ -15,8 +15,8 @@ from parso.python.tokenize import TokenInfo
def _get_token_list(string): def _get_token_list(string):
# Load the current version. # Load the current version.
version_int = version_string_to_int() version_info = parse_version_string()
return list(tokenize.tokenize(string, version_int)) return list(tokenize.tokenize(string, version_info))
def test_end_pos_one_line(): def test_end_pos_one_line():