From 5c7cf9f75c67b5e65974650f408e10e7b9fb5e0f Mon Sep 17 00:00:00 2001 From: Ivan Levkivskyi Date: Wed, 1 Jul 2020 12:36:23 +0100 Subject: [PATCH] Script for modular typeshed migration (#4259) This reshuffles directory structure according to the specification in https://github.com/python/typeshed/issues/2491#issuecomment-611607557 --- scripts/migrate_script.py | 343 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 343 insertions(+) create mode 100644 scripts/migrate_script.py diff --git a/scripts/migrate_script.py b/scripts/migrate_script.py new file mode 100644 index 000000000..170d31ae4 --- /dev/null +++ b/scripts/migrate_script.py @@ -0,0 +1,343 @@ +""" +Ad-hoc script to migrate typeshed to a new directory structure proposed in +https://github.com/python/typeshed/issues/2491#issuecomment-611607557 +""" + +import ast +import os +import os.path +import shutil + +from dataclasses import dataclass +from typing import Optional, List, Set, Tuple + +# These names may be still discussed so I make them constants. +STDLIB_NAMESPACE = "stdlib" +THIRD_PARTY_NAMESPACE = "stubs" +DEFAULT_VERSION = "0.1" +DEFAULT_PY3_VERSION = "3.5" +PY2_NAMESPACE = "python2" +OUTPUT_DIR = "out" + +# Third party imports (type ignored) of missing stubs. +MISSING_WHITELIST = { + "thrift", +} + +# Manually collected special cases where distribution name and +# package name are different. +package_to_distribution = { + "_pytest": "pytest", + "yaml": "PyYAML", + "typing_extensions": "typing-extensions", + "mypy_extensions": "mypy-extensions", + "pyre_extensions": "pyre-extensions", + "attr": "attrs", + "concurrent": "futures", + "Crypto": "pycrypto", + "datetimerange": "DateTimeRange", + "dateutil": "python-dateutil", + "enum": "enum34", + "flask": "Flask", + "gflags": "python-gflags", + "google": "protobuf", + "jinja2": "Jinja2", + "markupsafe": "MarkupSafe", + "OpenSSL": "openssl-python", + "pymysql": "PyMySQL", + "pyVmomi": "pyvmomi", + "routes": "Routes", + "typed_ast": "typed-ast", + "werkzeug": "Werkzeug", +} + +known_versions = { + "mypy-extensions": "0.4", + "typing-extensions": "3.7", + "typed-ast": "1.4", +} + + +# Classes with "Package" in name represent both packages and modules. +# The latter two are distinguished by is_dir flag. +class PackageBase: + """Common attributes for packages/modules""" + path: str # full initial path like stdlib/2and3/argparse.pyi + is_dir: bool + + @property + def name(self) -> str: + _, tail = os.path.split(self.path) + if self.is_dir: + assert not tail.endswith(".pyi") + return tail + assert tail.endswith(".pyi") + name, _ = os.path.splitext(tail) + return name + + +@dataclass +class StdLibPackage(PackageBase): + """Package/module in standard library.""" + path: str + py_version: Optional[str] # Can be omitted for Python 2 only packages. + is_dir: bool + + +@dataclass +class ThirdPartyPackage(PackageBase): + path: str + py2_compatible: bool + py3_compatible: bool + is_dir: bool + requires: List[str] # distributions this depends on + + +def add_stdlib_packages_from(subdir: str, packages: List[StdLibPackage], + py_version: Optional[str]) -> None: + """Add standard library packages/modules from a given stdlib/xxx subdirectory. + + Append to packages list in-place, use py_version as the minimal supported version. + """ + for name in os.listdir(subdir): + path = os.path.join(subdir, name) + packages.append(StdLibPackage(path, py_version, is_dir=os.path.isdir(path))) + + +def collect_stdlib_packages() -> Tuple[List[StdLibPackage], List[StdLibPackage]]: + """Collect standard library packages/modules from all current stdlib/xxx sub-directories.""" + stdlib: List[StdLibPackage] = [] + py2_stdlib: List[StdLibPackage] = [] + # These will go to a separate subdirectory. + add_stdlib_packages_from("stdlib/2", py2_stdlib, None) + add_stdlib_packages_from("stdlib/2and3", stdlib, "2.7") + # Use oldest currently supported version for Python 3 packages/modules. + add_stdlib_packages_from("stdlib/3", stdlib, DEFAULT_PY3_VERSION) + for version in ("3.6", "3.7", "3.8", "3.9"): + subdir = os.path.join("stdlib", version) + if os.path.isdir(subdir): + add_stdlib_packages_from(subdir, stdlib, version) + return stdlib, py2_stdlib + + +def add_third_party_packages_from(subdir: str, packages: List[ThirdPartyPackage], + py2_compatible: bool, py3_compatible: bool) -> None: + """Add third party packages/modules from a given third_party/xxx subdirectory.""" + for name in os.listdir(subdir): + path = os.path.join(subdir, name) + packages.append(ThirdPartyPackage(path, py2_compatible, py3_compatible, + requires=[], is_dir=os.path.isdir(path))) + + +def collect_third_party_packages() -> Tuple[List[ThirdPartyPackage], List[ThirdPartyPackage]]: + """Collect third party packages/modules from all current third_party/xxx sub-directories.""" + third_party: List[ThirdPartyPackage] = [] + py2_third_party: List[ThirdPartyPackage] = [] + add_third_party_packages_from("third_party/3", third_party, + py2_compatible=False, py3_compatible=True) + add_third_party_packages_from("third_party/2and3", third_party, + py2_compatible=True, py3_compatible=True) + # We special-case Python 2 for third party packages like six. + subdir = "third_party/2" + py3_packages = os.listdir("third_party/3") + for name in os.listdir(subdir): + path = os.path.join(subdir, name) + package = ThirdPartyPackage(path, py2_compatible=True, py3_compatible=False, + requires=[], is_dir=os.path.isdir(path)) + if name in py3_packages: + # If there is a package with the same name in /2 and /3, we add the former to + # a separate list, packages from there will be put into /python2 sub-directories. + py2_third_party.append(package) + else: + third_party.append(package) + return third_party, py2_third_party + + +def get_top_imported_names(file: str) -> Set[str]: + """Collect names imported in given file. + + We only collect top-level names, i.e. `from foo.bar import baz` + will only add `foo` to the list. + """ + if not file.endswith(".pyi"): + return set() + with open(os.path.join(file), "rb") as f: + content = f.read() + parsed = ast.parse(content) + top_imported = set() + for node in ast.walk(parsed): + if isinstance(node, ast.Import): + for name in node.names: + top_imported.add(name.name.split('.')[0]) + elif isinstance(node, ast.ImportFrom): + if node.level > 0: + # Relative imports always refer to the current package. + continue + assert node.module + top_imported.add(node.module.split('.')[0]) + return top_imported + + +def populate_requirements(package: ThirdPartyPackage, + stdlib: List[str], py2_stdlib: List[str], + known_distributions: Set[str]) -> None: + """Generate requirements using imports found in a package.""" + assert not package.requires, "Populate must be called once" + if not package.is_dir: + all_top_imports = get_top_imported_names(package.path) + else: + all_top_imports = set() + for dir_path, _, file_names in os.walk(package.path): + for file_name in file_names: + all_top_imports |= get_top_imported_names(os.path.join(dir_path, file_name)) + + # Generate dependencies using collected imports. + requirements = set() + for name in all_top_imports: + # Note: dependencies are between distributions, not packages. + distribution = package_to_distribution.get(name, name) + if package.py3_compatible and name not in stdlib: + if distribution in known_distributions: + requirements.add(distribution) + else: + # Likely a conditional import. + assert distribution in py2_stdlib or distribution in MISSING_WHITELIST + if package.py2_compatible and name not in py2_stdlib: + if distribution in known_distributions: + requirements.add(distribution) + else: + # Likely a conditional import. + assert distribution in stdlib or distribution in MISSING_WHITELIST + # Remove dependency to itself generated by absolute imports. + current_distribution = package_to_distribution.get(package.name, package.name) + package.requires = sorted(requirements - {current_distribution}) + + +def generate_versions(packages: List[StdLibPackage]) -> str: + """Generate the stdlib/VERSIONS file for packages/modules.""" + lines = [] + for package in packages: + assert package.py_version is not None + lines.append(f"{package.name}: {package.py_version}") + return "\n".join(sorted(lines)) + + +def copy_stdlib(packages: List[StdLibPackage], py2_packages: List[StdLibPackage]) -> None: + """Refactor the standard library part using collected metadata.""" + stdlib_dir = os.path.join(OUTPUT_DIR, STDLIB_NAMESPACE) + os.makedirs(stdlib_dir, exist_ok=True) + + # Write version metadata. + with open(os.path.join(stdlib_dir, "VERSIONS"), "w") as f: + f.write(generate_versions(packages)) + f.write("\n") + + # Copy stdlib/2and3 and stdlib/3 packages/modules. + for package in packages: + if not package.is_dir: + shutil.copy(package.path, stdlib_dir) + else: + shutil.copytree(package.path, os.path.join(stdlib_dir, package.name)) + + # Copy stdlib/2 packages/modules to a nested /python namespace. + if py2_packages: + py2_stdlib_dir = os.path.join(stdlib_dir, PY2_NAMESPACE) + os.makedirs(py2_stdlib_dir, exist_ok=True) + for package in py2_packages: + if not package.is_dir: + shutil.copy(package.path, py2_stdlib_dir) + else: + shutil.copytree(package.path, os.path.join(py2_stdlib_dir, package.name)) + + +def generate_metadata(package: ThirdPartyPackage, py2_packages: List[str]) -> str: + """Generate METADATA.toml for a given package. + + Only add compatibility flags if they are different from default values: + python2 = false, python3 = true. + + Note: the metadata should be generated per distribution, but we just use + an arbitrary package to populate it, since it should be the same for all + packages. + """ + version = known_versions.get( + package_to_distribution.get(package.name, package.name), + DEFAULT_VERSION, + ) + lines = [f'version = "{version}"'] + if package.py2_compatible or package.name in py2_packages: + # Note: for packages like six that appear in both normal and Python 2 only + # lists we force set python2 = true. + lines.append("python2 = true") + if not package.py3_compatible: + lines.append("python3 = false") + if package.requires: + distributions = [f'"types-{package_to_distribution.get(dep, dep)}"' + for dep in package.requires] + lines.append(f"requires = [{', '.join(distributions)}]") + return "\n".join(lines) + + +def copy_third_party(packages: List[ThirdPartyPackage], + py2_packages: List[ThirdPartyPackage]) -> None: + """Refactor the third party part using collected metadata.""" + third_party_dir = os.path.join(OUTPUT_DIR, THIRD_PARTY_NAMESPACE) + os.makedirs(third_party_dir, exist_ok=True) + + # Note: these include Python 3 versions of packages like six. + for package in packages: + distribution = package_to_distribution.get(package.name, package.name) + distribution_dir = os.path.join(third_party_dir, distribution) + os.makedirs(distribution_dir, exist_ok=True) + metadata_file = os.path.join(distribution_dir, "METADATA.toml") + if not os.path.isfile(metadata_file): + # Write metadata once. + # TODO: check consistency between different packages in same distribution? + with open(metadata_file, "w") as f: + f.write(generate_metadata(package, [package.name for package in py2_packages])) + f.write("\n") + if not package.is_dir: + shutil.copy(package.path, distribution_dir) + else: + shutil.copytree(package.path, os.path.join(distribution_dir, package.name)) + + # Add Python 2 counterparts of packages like six (with different stubs) to nested + # namespaces like six/python2/six. + for package in py2_packages: + distribution = package_to_distribution.get(package.name, package.name) + distribution_dir = os.path.join(third_party_dir, distribution, PY2_NAMESPACE) + os.makedirs(distribution_dir, exist_ok=True) + if not package.is_dir: + shutil.copy(package.path, distribution_dir) + else: + shutil.copytree(package.path, os.path.join(distribution_dir, package.name)) + + +def main() -> None: + # Collect metadata for Python 2 and 3, and Python 2 only standard library + # packages/modules. The latter will go to a separate nested namespace. + stdlib, py2_stdlib = collect_stdlib_packages() + third_party, py2_third_party = collect_third_party_packages() + + # Collect standard library names to filter out from dependencies. + stdlib_names = [package.name for package in stdlib] + py2_stdlib_names = [package.name for package in py2_stdlib] + py2_stdlib_names += [package.name for package in stdlib if package.py_version == "2.7"] + + # Collect all known distributions (for sanity checks). + known_distributions = {package_to_distribution.get(package.name, package.name) + for package in third_party + py2_third_party} + + # Compute dependencies between third party packages/modules to populate metadata. + for package in third_party + py2_third_party: + populate_requirements(package, stdlib_names, py2_stdlib_names, known_distributions) + + # Copy the files to a separate location (to not clobber the root directory). + if not os.path.isdir(OUTPUT_DIR): + os.mkdir(OUTPUT_DIR) + copy_stdlib(stdlib, py2_stdlib) + copy_third_party(third_party, py2_third_party) + + +if __name__ == "__main__": + main()