diff --git a/parso/grammar.py b/parso/grammar.py index a92faa5..22f04f6 100644 --- a/parso/grammar.py +++ b/parso/grammar.py @@ -3,7 +3,7 @@ import os from parso._compatibility import FileNotFoundError from parso.pgen2.pgen import generate_grammar -from parso.utils import splitlines, source_to_unicode, version_string_to_int +from parso.utils import splitlines, source_to_unicode, parse_version_string from parso.python.diff import DiffParser from parso.python.tokenize import tokenize_lines, tokenize from parso.cache import parser_cache, load_module, save_module @@ -127,14 +127,14 @@ class Grammar(object): class PythonGrammar(Grammar): - def __init__(self, version_int, bnf_text): + def __init__(self, version_info, bnf_text): super(PythonGrammar, self).__init__( bnf_text, tokenizer=self._tokenize_lines, parser=PythonParser, diff_parser=DiffParser ) - self._version_int = version_int + self._version_int = version_info def _tokenize_lines(self, lines): return tokenize_lines(lines, self._version_int) @@ -152,16 +152,16 @@ def load_grammar(**kwargs): `version='3.3'`. """ def load_grammar(version=None): - version_int = version_string_to_int(version) + version_info = parse_version_string(version) # For these versions we use the same grammar files, because nothing # changed. - if version_int == 33: - version_int = 34 - elif version_int == 26: - version_int = 27 + if version_info == (3, 3): + version_info = parse_version_string('3.4') + elif version_info == (2, 6): + version_info = parse_version_string('2.7') - file = 'python/grammar' + str(version_int) + '.txt' + file = 'python/grammar%s%s.txt' % (version_info.major, version_info.minor) global _loaded_grammars path = os.path.join(os.path.dirname(__file__), file) @@ -172,7 +172,7 @@ def load_grammar(**kwargs): with open(path) as f: bnf_text = f.read() - grammar = PythonGrammar(version_int, bnf_text) + grammar = PythonGrammar(version_info, bnf_text) return _loaded_grammars.setdefault(path, grammar) except FileNotFoundError: message = "Python version %s is currently not supported." % version diff --git a/parso/pgen2/pgen.py b/parso/pgen2/pgen.py index 029466d..fe92bca 100644 --- a/parso/pgen2/pgen.py +++ b/parso/pgen2/pgen.py @@ -8,12 +8,16 @@ from parso.pgen2 import grammar from parso.python import token from parso.python import tokenize +from parso.utils import parse_version_string class ParserGenerator(object): def __init__(self, bnf_text): self._bnf_text = bnf_text - self.generator = tokenize.tokenize(bnf_text, version_int=36) + self.generator = tokenize.tokenize( + bnf_text, + version_info=parse_version_string('3.6') + ) self._gettoken() # Initialize lookahead self.dfas, self.startsymbol = self._parse() self.first = {} # map from symbol name to set of tokens diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py index a98f28b..5895729 100644 --- a/parso/python/tokenize.py +++ b/parso/python/tokenize.py @@ -64,14 +64,14 @@ def maybe(*choices): # Return the empty string, plus all of the valid string prefixes. -def _all_string_prefixes(version_int): +def _all_string_prefixes(version_info): # The valid string prefixes. Only contain the lower case versions, # and don't contain any permuations (include 'fr', but not # 'rf'). The various permutations will be generated. _valid_string_prefixes = ['b', 'r', 'u', 'br'] - if version_int >= 36: + if version_info >= (3, 6): _valid_string_prefixes += ['f', 'fr'] - if version_int <= 27: + if version_info <= (2, 7): # TODO this is actually not 100% valid. ur is valid in Python 2.7, # while ru is not. _valid_string_prefixes.append('ur') @@ -91,23 +91,23 @@ def _compile(expr): return re.compile(expr, re.UNICODE) -def _get_token_collection(version_int): +def _get_token_collection(version_info): try: - return _token_collection_cache[version_int] + return _token_collection_cache[tuple(version_info)] except KeyError: - _token_collection_cache[version_int] = result = \ - _create_token_collection(version_int) + _token_collection_cache[tuple(version_info)] = result = \ + _create_token_collection(version_info) return result -def _create_token_collection(version_int): +def _create_token_collection(version_info): # Note: we use unicode matching for names ("\w") but ascii matching for # number literals. Whitespace = r'[ \f\t]*' Comment = r'#[^\r\n]*' Name = r'\w+' - if version_int >= 36: + if version_info >= (3, 6): Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' Binnumber = r'0[bB](?:_?[01])+' Octnumber = r'0[oO](?:_?[0-7])+' @@ -122,7 +122,7 @@ def _create_token_collection(version_int): else: Hexnumber = r'0[xX][0-9a-fA-F]+' Binnumber = r'0[bB][01]+' - if version_int >= 30: + if version_info >= (3, 0): Octnumber = r'0[oO][0-7]+' else: Octnumber = '0[oO]?[0-7]+' @@ -137,7 +137,7 @@ def _create_token_collection(version_int): # Note that since _all_string_prefixes includes the empty string, # StringPrefix can be the empty string (making it optional). - possible_prefixes = _all_string_prefixes(version_int) + possible_prefixes = _all_string_prefixes(version_info) StringPrefix = group(*possible_prefixes) # Tail end of ' string. @@ -161,7 +161,7 @@ def _create_token_collection(version_int): Bracket = '[][(){}]' special_args = [r'\r?\n', r'[:;.,@]'] - if version_int >= 30: + if version_info >= (3, 0): special_args.insert(0, r'\.\.\.') Special = group(*special_args) @@ -233,13 +233,13 @@ class TokenInfo(namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])): return self.start_pos[0], self.start_pos[1] + len(self.string) -def tokenize(code, version_int): +def tokenize(code, version_info): """Generate tokens from a the source code (string).""" lines = splitlines(code, keepends=True) - return tokenize_lines(lines, version_int) + return tokenize_lines(lines, version_info) -def tokenize_lines(lines, version_int): +def tokenize_lines(lines, version_info): """ A heavily modified Python standard library tokenizer. @@ -248,7 +248,7 @@ def tokenize_lines(lines, version_int): that is irrelevant for the parser like newlines in parentheses or comments. """ pseudo_token, single_quoted, triple_quoted, endpats, always_break_tokens, = \ - _get_token_collection(version_int) + _get_token_collection(version_info) paren_level = 0 # count parentheses indents = [0] max = 0 diff --git a/parso/utils.py b/parso/utils.py index 5d2abc0..bfbb2a1 100644 --- a/parso/utils.py +++ b/parso/utils.py @@ -3,7 +3,10 @@ import re import sys from ast import literal_eval -from parso._compatibility import unicode +from parso._compatibility import unicode, total_ordering + + +Version = namedtuple('Version', 'major, minor, micro') def splitlines(string, keepends=False): @@ -82,7 +85,6 @@ def version_info(): Returns a namedtuple of parso's version, similar to Python's ``sys.version_info``. """ - Version = namedtuple('Version', 'major, minor, micro') from parso import __version__ tupl = re.findall(r'[a-z]+|\d+', __version__) return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) @@ -94,24 +96,48 @@ def _parse_version(version): raise ValueError('The given version is not in the right format. ' 'Use something like "3.2" or "3".') - major = match.group(1) + major = int(match.group(1)) minor = match.group(2) if minor is None: # Use the latest Python in case it's not exactly defined, because the # grammars are typically backwards compatible? - if major == "2": + if major == 2: minor = "7" - elif major == "3": + elif major == 3: minor = "6" else: raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") - return int(major + minor) + minor = int(minor) + return PythonVersionInfo(major, minor) -def version_string_to_int(version=None): +@total_ordering +class PythonVersionInfo(namedtuple('Version', 'major, minor')): + def __gt__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) > other + super(PythonVersionInfo, self).__gt__(other) + + return (self.major, self.minor) + + def __eq__(self, other): + if isinstance(other, tuple): + if len(other) != 2: + raise ValueError("Can only compare to tuples of length 2.") + return (self.major, self.minor) == other + super(PythonVersionInfo, self).__eq__(other) + + def __ne__(self, other): + return not self.__eq__(other) + + +def parse_version_string(version=None): """ Checks for a valid version number (e.g. `3.2` or `2.7.1` or `3`) and - returns a corresponding int that is always two characters long in decimal. + returns a corresponding version info that is always two characters long in + decimal. """ if version is None: version = '%s.%s' % sys.version_info[:2] diff --git a/test/test_load_grammar.py b/test/test_load_grammar.py index 105dc7a..70dd807 100644 --- a/test/test_load_grammar.py +++ b/test/test_load_grammar.py @@ -14,7 +14,7 @@ def test_load_inexisting_grammar(): @pytest.mark.parametrize(('string', 'result'), [ - ('2', 27), ('3', 36), ('1.1', 11), ('1.1.1', 11), ('300.1.31', 3001) + ('2', (2, 7)), ('3', (3, 6)), ('1.1', (1, 1)), ('1.1.1', (1, 1)), ('300.1.31', (300, 1)) ]) def test_parse_version(string, result): assert utils._parse_version(string) == result diff --git a/test/test_pgen2.py b/test/test_pgen2.py index 9febd72..b967fd6 100644 --- a/test/test_pgen2.py +++ b/test/test_pgen2.py @@ -10,10 +10,9 @@ from textwrap import dedent import pytest -from parso._compatibility import py_version from parso import load_grammar from parso import ParserSyntaxError -from parso.utils import version_string_to_int +from parso.utils import parse_version_string class Checker(): @@ -32,8 +31,8 @@ def works_in_py2(each_version): @pytest.fixture def works_ge_py3(each_version): - version_int = version_string_to_int(each_version) - return Checker(each_version, version_int >= 30) + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 0)) @pytest.fixture @@ -41,8 +40,8 @@ def works_ge_py35(each_version): """ Works only greater equal Python 3.3. """ - version_int = version_string_to_int(each_version) - return Checker(each_version, version_int >= 35) + version_info = parse_version_string(each_version) + return Checker(each_version, version_info >= (3, 5)) def _parse(code, version=None): diff --git a/test/test_tokenize.py b/test/test_tokenize.py index 885dfff..f0101bb 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -5,7 +5,7 @@ from textwrap import dedent import pytest from parso._compatibility import py_version -from parso.utils import splitlines, version_string_to_int +from parso.utils import splitlines, parse_version_string from parso.python.token import ( NAME, NEWLINE, STRING, INDENT, DEDENT, ERRORTOKEN, ENDMARKER) from parso.python import tokenize @@ -15,8 +15,8 @@ from parso.python.tokenize import TokenInfo def _get_token_list(string): # Load the current version. - version_int = version_string_to_int() - return list(tokenize.tokenize(string, version_int)) + version_info = parse_version_string() + return list(tokenize.tokenize(string, version_info)) def test_end_pos_one_line():