From 46f0bb8b91f622bdb31485f1ad891fb3577dc7fb Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 27 Apr 2018 14:39:18 -0700 Subject: [PATCH] support unicode in Python 2 for difflib (#2055) Fixes #1961. I mostly just replaced all str annotations with Text, including in return types. This is only broadly correct; diffing a str and a unicode sequence actually results in a mixed output of str and unicode. We could also keep the return types as str if using Text causes errors in real code. For callbacks that take str, I introduced a Union alias because a callable taking a str would not be a compatible with a parameter of type Callable[[Text], bool]. I also fixed the return type of difflib.restore. --- stdlib/2and3/difflib.pyi | 52 +++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/stdlib/2and3/difflib.pyi b/stdlib/2and3/difflib.pyi index f62864fc2..2727b36f2 100644 --- a/stdlib/2and3/difflib.pyi +++ b/stdlib/2and3/difflib.pyi @@ -1,14 +1,13 @@ # Based on https://docs.python.org/2.7/library/difflib.html and https://docs.python.org/3.2/library/difflib.html -# TODO: Support unicode in Python 2? - import sys from typing import ( TypeVar, Callable, Iterable, Iterator, List, NamedTuple, Sequence, Tuple, - Generic, Optional + Generic, Optional, Text, Union ) _T = TypeVar('_T') +_JunkCallback = Union[Callable[[Text], bool], Callable[[str], bool]] Match = NamedTuple('Match', [ ('a', int), @@ -37,36 +36,35 @@ def get_close_matches(word: Sequence[_T], possibilities: Iterable[Sequence[_T]], n: int = ..., cutoff: float = ...) -> List[Sequence[_T]]: ... class Differ: - def __init__(self, linejunk: Callable[[str], bool] = ..., - charjunk: Callable[[str], bool] = ...) -> None: ... - def compare(self, a: Sequence[str], b: Sequence[str]) -> Iterator[str]: ... + def __init__(self, linejunk: _JunkCallback = ..., charjunk: _JunkCallback = ...) -> None: ... + def compare(self, a: Sequence[Text], b: Sequence[Text]) -> Iterator[Text]: ... -def IS_LINE_JUNK(str) -> bool: ... -def IS_CHARACTER_JUNK(str) -> bool: ... -def unified_diff(a: Sequence[str], b: Sequence[str], fromfile: str = ..., - tofile: str = ..., fromfiledate: str = ..., tofiledate: str = ..., - n: int = ..., lineterm: str = ...) -> Iterator[str]: ... -def context_diff(a: Sequence[str], b: Sequence[str], fromfile: str=..., - tofile: str = ..., fromfiledate: str = ..., tofiledate: str = ..., - n: int = ..., lineterm: str = ...) -> Iterator[str]: ... -def ndiff(a: Sequence[str], b: Sequence[str], - linejunk: Callable[[str], bool] = ..., - charjunk: Callable[[str], bool] = ... - ) -> Iterator[str]: ... +def IS_LINE_JUNK(line: Text) -> bool: ... +def IS_CHARACTER_JUNK(line: Text) -> bool: ... +def unified_diff(a: Sequence[Text], b: Sequence[Text], fromfile: Text = ..., + tofile: Text = ..., fromfiledate: Text = ..., tofiledate: Text = ..., + n: int = ..., lineterm: Text = ...) -> Iterator[Text]: ... +def context_diff(a: Sequence[Text], b: Sequence[Text], fromfile: Text=..., + tofile: Text = ..., fromfiledate: Text = ..., tofiledate: Text = ..., + n: int = ..., lineterm: Text = ...) -> Iterator[Text]: ... +def ndiff(a: Sequence[Text], b: Sequence[Text], + linejunk: _JunkCallback = ..., + charjunk: _JunkCallback = ... + ) -> Iterator[Text]: ... class HtmlDiff(object): def __init__(self, tabsize: int = ..., wrapcolumn: int = ..., - linejunk: Callable[[str], bool] = ..., - charjunk: Callable[[str], bool] = ... + linejunk: _JunkCallback = ..., + charjunk: _JunkCallback = ... ) -> None: ... - def make_file(self, fromlines: Sequence[str], tolines: Sequence[str], - fromdesc: str = ..., todesc: str = ..., context: bool = ..., - numlines: int = ...) -> str: ... - def make_table(self, fromlines: Sequence[str], tolines: Sequence[str], - fromdesc: str = ..., todesc: str = ..., context: bool = ..., - numlines: int = ...) -> str: ... + def make_file(self, fromlines: Sequence[Text], tolines: Sequence[Text], + fromdesc: Text = ..., todesc: Text = ..., context: bool = ..., + numlines: int = ...) -> Text: ... + def make_table(self, fromlines: Sequence[Text], tolines: Sequence[Text], + fromdesc: Text = ..., todesc: Text = ..., context: bool = ..., + numlines: int = ...) -> Text: ... -def restore(delta: Iterable[str], which: int) -> Iterator[int]: ... +def restore(delta: Iterable[Text], which: int) -> Iterator[Text]: ... if sys.version_info >= (3, 5): def diff_bytes(