support unicode in Python 2 for difflib (#2055)

Fixes #1961.

I mostly just replaced all str annotations with Text, including in return types. This is
only broadly correct; diffing a str and a unicode sequence actually results in a mixed
output of str and unicode. We could also keep the return types as str if using Text
causes errors in real code. For callbacks that take str, I introduced a Union alias
because a callable taking a str would not be a compatible with a parameter of type
Callable[[Text], bool].

I also fixed the return type of difflib.restore.
This commit is contained in:
Jelle Zijlstra
2018-04-27 14:39:18 -07:00
committed by Matthias Kramm
parent f60ffe47a2
commit 46f0bb8b91

View File

@@ -1,14 +1,13 @@
# Based on https://docs.python.org/2.7/library/difflib.html and https://docs.python.org/3.2/library/difflib.html
# TODO: Support unicode in Python 2?
import sys
from typing import (
TypeVar, Callable, Iterable, Iterator, List, NamedTuple, Sequence, Tuple,
Generic, Optional
Generic, Optional, Text, Union
)
_T = TypeVar('_T')
_JunkCallback = Union[Callable[[Text], bool], Callable[[str], bool]]
Match = NamedTuple('Match', [
('a', int),
@@ -37,36 +36,35 @@ def get_close_matches(word: Sequence[_T], possibilities: Iterable[Sequence[_T]],
n: int = ..., cutoff: float = ...) -> List[Sequence[_T]]: ...
class Differ:
def __init__(self, linejunk: Callable[[str], bool] = ...,
charjunk: Callable[[str], bool] = ...) -> None: ...
def compare(self, a: Sequence[str], b: Sequence[str]) -> Iterator[str]: ...
def __init__(self, linejunk: _JunkCallback = ..., charjunk: _JunkCallback = ...) -> None: ...
def compare(self, a: Sequence[Text], b: Sequence[Text]) -> Iterator[Text]: ...
def IS_LINE_JUNK(str) -> bool: ...
def IS_CHARACTER_JUNK(str) -> bool: ...
def unified_diff(a: Sequence[str], b: Sequence[str], fromfile: str = ...,
tofile: str = ..., fromfiledate: str = ..., tofiledate: str = ...,
n: int = ..., lineterm: str = ...) -> Iterator[str]: ...
def context_diff(a: Sequence[str], b: Sequence[str], fromfile: str=...,
tofile: str = ..., fromfiledate: str = ..., tofiledate: str = ...,
n: int = ..., lineterm: str = ...) -> Iterator[str]: ...
def ndiff(a: Sequence[str], b: Sequence[str],
linejunk: Callable[[str], bool] = ...,
charjunk: Callable[[str], bool] = ...
) -> Iterator[str]: ...
def IS_LINE_JUNK(line: Text) -> bool: ...
def IS_CHARACTER_JUNK(line: Text) -> bool: ...
def unified_diff(a: Sequence[Text], b: Sequence[Text], fromfile: Text = ...,
tofile: Text = ..., fromfiledate: Text = ..., tofiledate: Text = ...,
n: int = ..., lineterm: Text = ...) -> Iterator[Text]: ...
def context_diff(a: Sequence[Text], b: Sequence[Text], fromfile: Text=...,
tofile: Text = ..., fromfiledate: Text = ..., tofiledate: Text = ...,
n: int = ..., lineterm: Text = ...) -> Iterator[Text]: ...
def ndiff(a: Sequence[Text], b: Sequence[Text],
linejunk: _JunkCallback = ...,
charjunk: _JunkCallback = ...
) -> Iterator[Text]: ...
class HtmlDiff(object):
def __init__(self, tabsize: int = ..., wrapcolumn: int = ...,
linejunk: Callable[[str], bool] = ...,
charjunk: Callable[[str], bool] = ...
linejunk: _JunkCallback = ...,
charjunk: _JunkCallback = ...
) -> None: ...
def make_file(self, fromlines: Sequence[str], tolines: Sequence[str],
fromdesc: str = ..., todesc: str = ..., context: bool = ...,
numlines: int = ...) -> str: ...
def make_table(self, fromlines: Sequence[str], tolines: Sequence[str],
fromdesc: str = ..., todesc: str = ..., context: bool = ...,
numlines: int = ...) -> str: ...
def make_file(self, fromlines: Sequence[Text], tolines: Sequence[Text],
fromdesc: Text = ..., todesc: Text = ..., context: bool = ...,
numlines: int = ...) -> Text: ...
def make_table(self, fromlines: Sequence[Text], tolines: Sequence[Text],
fromdesc: Text = ..., todesc: Text = ..., context: bool = ...,
numlines: int = ...) -> Text: ...
def restore(delta: Iterable[str], which: int) -> Iterator[int]: ...
def restore(delta: Iterable[Text], which: int) -> Iterator[Text]: ...
if sys.version_info >= (3, 5):
def diff_bytes(