From 3747b009bf2ffc33910667e3c1d6dd36810ca24d Mon Sep 17 00:00:00 2001 From: farhad Date: Sun, 14 Sep 2014 11:34:27 +0400 Subject: [PATCH 1/3] fix tokenization of code containing unicode strings --- jedi/parser/tokenize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jedi/parser/tokenize.py b/jedi/parser/tokenize.py index d988135c..08099e79 100644 --- a/jedi/parser/tokenize.py +++ b/jedi/parser/tokenize.py @@ -137,7 +137,7 @@ double = r'[^"\\]*(?:\\.[^"\\]*)*"' single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" # Tail end of """ string. double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' -triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""') +triple = group("[uUbB]?[rR]?'''", '[uUbB]?[rR]?"""') # Single-line ' or " string. # Because of leftmost-then-longest match semantics, be sure to put the @@ -193,7 +193,7 @@ single_quoted = {} for t in ("'", '"', "r'", 'r"', "R'", 'R"', "b'", 'b"', "B'", 'B"', - "u'", 'u""', "U'", 'U"', + "u'", 'u"', "U'", 'U"', "br'", 'br"', "Br'", 'Br"', "bR'", 'bR"', "BR'", 'BR"'): single_quoted[t] = t From 80719fc8216b64e6b16f6e14304f08db6ebf1410 Mon Sep 17 00:00:00 2001 From: farhad Date: Fri, 6 Mar 2015 11:54:01 +0400 Subject: [PATCH 2/3] added test for quoted strings parsing --- test/test_parser/test_tokenize.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/test_parser/test_tokenize.py b/test/test_parser/test_tokenize.py index 04a9115f..ffe2dd91 100644 --- a/test/test_parser/test_tokenize.py +++ b/test/test_parser/test_tokenize.py @@ -1,5 +1,6 @@ from jedi._compatibility import u from jedi import parser +from token import STRING from ..helpers import unittest @@ -22,6 +23,24 @@ asdfasdf""" + "h" tok = parsed.module.subscopes[0].statements[0]._token_list[2] self.assertEqual(tok.end_pos, (4, 11)) + def test_quoted_strings(self): + + string_tokens = [ + 'u"test"', + 'u"""test"""', + 'U"""test"""', + "u'''test'''", + "U'''test'''", + ] + + for s in string_tokens: + parsed = parser.Parser(u('''a = %s\n''' % s)) + tok_list = parsed.module.statements[0]._token_list + self.assertEqual(len(tok_list), 3) + tok = tok_list[2] + self.assertIsInstance(tok, parser.tokenize.Token) + self.assertEqual(tok.type, STRING) + def test_tokenizer_with_string_literal_backslash(): import jedi From f9c104348e4a9cb0a9b253a957c463594b7b1595 Mon Sep 17 00:00:00 2001 From: farhad Date: Fri, 6 Mar 2015 11:55:16 +0400 Subject: [PATCH 3/3] added myself to AUTHORS.txt --- AUTHORS.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.txt b/AUTHORS.txt index 376078eb..10bff213 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -29,5 +29,6 @@ Syohei Yoshida (@syohex) ppalucky (@ppalucky) immerrr (@immerrr) immerrr@gmail.com Albertas Agejevas (@alga) +Farkhad Khatamov (@hatamov) Note: (@user) means a github user name.