From b367058af6ce2f5f28d67293622fc549a4efd9be Mon Sep 17 00:00:00 2001
From: Dave Halter <davidhalter88@gmail.com>
Date: Wed, 31 May 2017 08:59:49 +0200
Subject: [PATCH] Temporary work on carriage returns.

---
 parso/python/parser.py | 36 ++++++++++++++++++++++++++----------
 parso/python/prefix.py | 10 ++++++----
 test/test_get_code.py  | 14 ++++++++++++++
 test/test_prefix.py    |  2 ++
 4 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/parso/python/parser.py b/parso/python/parser.py
index 44871b3..4300a8a 100644
--- a/parso/python/parser.py
+++ b/parso/python/parser.py
@@ -196,6 +196,18 @@ class Parser(BaseParser):
 
 
 def remove_last_newline(node):
+    def calculate_end_pos(leaf, text):
+        if leaf is None:
+            end_pos = (1, 0)
+        else:
+            end_pos = leaf.end_pos
+
+        lines = splitlines(text, keepends=True)
+        if len(lines) == 1:
+            return end_pos[0], end_pos[1] + len(lines[0])
+        else:
+            return end_pos[0] + len(lines) - 1,  len(lines[-1])
+
     endmarker = node.children[-1]
     # The newline is either in the endmarker as a prefix or the previous
     # leaf as a newline token.
@@ -213,20 +225,24 @@ def remove_last_newline(node):
         raise ValueError("There's no newline at the end, cannot remove it.")
 
     text = text[:-1]
+    if text and text[-1] == '\r':
+        # By adding an artificial newline this creates weird side effects for
+        # \r at the end of files that would normally be error leafs. Try to
+        # correct that here.
+        text = text[:-1]
+        start_pos = calculate_end_pos(leaf, text)
+        error_token = tree.PythonErrorLeaf('errortoken', '\r', start_pos, prefix=text)
+        node.children.insert(-2, error_token)
+
+        # Cleanup
+        leaf = error_token
+        text = ''
+
     if prefix:
         endmarker.prefix = text
 
-        if leaf is None:
-            end_pos = (1, 0)
-        else:
-            end_pos = leaf.end_pos
 
-        lines = splitlines(text, keepends=True)
-        if len(lines) == 1:
-            end_pos = end_pos[0], end_pos[1] + len(lines[0])
-        else:
-            end_pos = end_pos[0] + len(lines) - 1,  len(lines[-1])
-        endmarker.start_pos = end_pos
+        endmarker.start_pos = calculate_end_pos(leaf, text)
     else:
         leaf.value = text
         endmarker.start_pos = leaf.end_pos
diff --git a/parso/python/prefix.py b/parso/python/prefix.py
index 6ba3eb6..06bbf53 100644
--- a/parso/python/prefix.py
+++ b/parso/python/prefix.py
@@ -3,7 +3,7 @@ import re
 from parso.tokenize import group
 
 
-class PrefixToken(object):
+class PrefixPart(object):
     def __init__(self, typ, value, start_pos):
         self.type = typ
         self.value = value
@@ -19,10 +19,11 @@ class PrefixToken(object):
 _comment = r'#[^\n\r\f]*'
 _backslash = r'\\\r?\n?'
 _whitespace = r' +'
+_tabs = r'\t+'
 _newline = r'\r?\n'
 _form_feed = r'\f'
 
-_regex = group(_comment, _backslash, _whitespace, _newline, _form_feed)
+_regex = group(_comment, _backslash, _whitespace, _newline, _form_feed, _tabs)
 _regex = re.compile(_regex)
 
 
@@ -32,7 +33,8 @@ _types = {
     '\\': 'backslash',
     '\f': 'formfeed',
     '\n': 'newline',
-    '\r': 'newline'
+    '\r': 'newline',
+    '\t': 'tabs',
 }
 
 
@@ -43,7 +45,7 @@ def split_prefix(prefix, start_pos):
         match =_regex.match(prefix, start)
         value = match.group(0)
         typ = _types[value[0]]
-        yield PrefixToken(typ, value, (line, column + start))
+        yield PrefixPart(typ, value, (line, column + start))
 
         start = match.end(0)
         if value.endswith('\n'):
diff --git a/test/test_get_code.py b/test/test_get_code.py
index 6a54385..5bb789f 100644
--- a/test/test_get_code.py
+++ b/test/test_get_code.py
@@ -104,3 +104,17 @@ def test_end_newlines():
     test('def a():\n pass', (2, 5))
 
     test('def a(', (1, 6))
+
+
+@pytest.mark.parametrize(('code', 'types'), [
+    ('\r', ['error_leaf', 'endmarker']),
+    ('\n\r', ['error_leaf', 'endmarker'])
+])
+def test_carriage_return_at_end(code, types):
+    """
+    By adding an artificial newline this creates weird side effects for
+    \r at the end of files that would normally be error leafs.
+    """
+    tree = parse(code)
+    assert tree.get_code() == code
+    assert [c.type for c in tree.children] == types
diff --git a/test/test_prefix.py b/test/test_prefix.py
index 95eea4b..ab0d1d4 100644
--- a/test/test_prefix.py
+++ b/test/test_prefix.py
@@ -15,6 +15,7 @@ import parso
     ('\\', ['\\']),
     ('\\\n', ['\\\n']),
     ('\\\r\n', ['\\\r\n']),
+    ('\t\t\n\t', ['\t\t', '\n', '\t']),
 ])
 def test_simple_prefix_splitting(string, tokens):
     tree = parso.parse(string)
@@ -42,6 +43,7 @@ def test_simple_prefix_splitting(string, tokens):
     ('\r\n', ['newline']),
     ('\f', ['formfeed']),
     ('\\\n', ['backslash']),
+    ('\r', ['newline']),
 ])
 def test_prefix_splitting_types(string, types):
     tree = parso.parse(string)