From 8971c242cb22b9982a6636e43de65ee28e30deaa Mon Sep 17 00:00:00 2001 From: Sebastian Rittau Date: Fri, 14 May 2021 18:33:57 +0200 Subject: [PATCH] Remove migration script; migration has happened (#5446) * Remove migration script; migration has happened --- scripts/migrate_script.py | 343 -------------------------------------- 1 file changed, 343 deletions(-) delete mode 100644 scripts/migrate_script.py diff --git a/scripts/migrate_script.py b/scripts/migrate_script.py deleted file mode 100644 index ff5bdf013..000000000 --- a/scripts/migrate_script.py +++ /dev/null @@ -1,343 +0,0 @@ -""" -Ad-hoc script to migrate typeshed to a new directory structure proposed in -https://github.com/python/typeshed/issues/2491#issuecomment-611607557 -""" - -import ast -import os -import os.path -import shutil -from dataclasses import dataclass -from typing import List, Optional, Set, Tuple - -# These names may be still discussed so I make them constants. -STDLIB_NAMESPACE = "stdlib" -THIRD_PARTY_NAMESPACE = "stubs" -DEFAULT_VERSION = "0.1" -DEFAULT_PY3_VERSION = "3.6" -PY2_NAMESPACE = "@python2" -OUTPUT_DIR = "out" - -# Third party imports (type ignored) of missing stubs. -MISSING_WHITELIST = { - "thrift", -} - -# Manually collected special cases where distribution name and -# package name are different. -package_to_distribution = { - "_pytest": "pytest", - "yaml": "PyYAML", - "typing_extensions": "typing-extensions", - "mypy_extensions": "mypy-extensions", - "pyre_extensions": "pyre-extensions", - "attr": "attrs", - "concurrent": "futures", - "click_spinner": "click-spinner", - "Crypto": "pycrypto", - "datetimerange": "DateTimeRange", - "dateutil": "python-dateutil", - "deprecated": "Deprecated", - "enum": "enum34", - "flask": "Flask", - "gflags": "python-gflags", - "google": "protobuf", - "jack": "JACK-Client", - "jinja2": "Jinja2", - "markdown": "Markdown", - "markupsafe": "MarkupSafe", - "OpenSSL": "openssl-python", - "pymysql": "PyMySQL", - "pyrfc3339": "pyRFC3339", - "pyVmomi": "pyvmomi", - "routes": "Routes", - "typed_ast": "typed-ast", - "slugify": "python-slugify", - "werkzeug": "Werkzeug", -} - -known_versions = { - "mypy-extensions": "0.4", - "typing-extensions": "3.7", - "typed-ast": "1.4", -} - - -# Classes with "Package" in name represent both packages and modules. -# The latter two are distinguished by is_dir flag. -class PackageBase: - """Common attributes for packages/modules""" - - path: str # full initial path like stdlib/2and3/argparse.pyi - is_dir: bool - - @property - def name(self) -> str: - _, tail = os.path.split(self.path) - if self.is_dir: - assert not tail.endswith(".pyi") - return tail - assert tail.endswith(".pyi") - name, _ = os.path.splitext(tail) - return name - - -@dataclass -class StdLibPackage(PackageBase): - """Package/module in standard library.""" - - path: str - py_version: Optional[str] # Can be omitted for Python 2 only packages. - is_dir: bool - - -@dataclass -class ThirdPartyPackage(PackageBase): - path: str - py2_compatible: bool - py3_compatible: bool - is_dir: bool - requires: List[str] # distributions this depends on - - -def add_stdlib_packages_from(subdir: str, packages: List[StdLibPackage], py_version: Optional[str]) -> None: - """Add standard library packages/modules from a given stdlib/xxx subdirectory. - - Append to packages list in-place, use py_version as the minimal supported version. - """ - for name in os.listdir(subdir): - path = os.path.join(subdir, name) - packages.append(StdLibPackage(path, py_version, is_dir=os.path.isdir(path))) - - -def collect_stdlib_packages() -> Tuple[List[StdLibPackage], List[StdLibPackage]]: - """Collect standard library packages/modules from all current stdlib/xxx sub-directories.""" - stdlib: List[StdLibPackage] = [] - py2_stdlib: List[StdLibPackage] = [] - # These will go to a separate subdirectory. - add_stdlib_packages_from("stdlib/2", py2_stdlib, None) - add_stdlib_packages_from("stdlib/2and3", stdlib, "2.7") - # Use oldest currently supported version for Python 3 packages/modules. - add_stdlib_packages_from("stdlib/3", stdlib, DEFAULT_PY3_VERSION) - for version in ("3.7", "3.8", "3.9"): - subdir = os.path.join("stdlib", version) - if os.path.isdir(subdir): - add_stdlib_packages_from(subdir, stdlib, version) - return stdlib, py2_stdlib - - -def add_third_party_packages_from( - subdir: str, packages: List[ThirdPartyPackage], py2_compatible: bool, py3_compatible: bool -) -> None: - """Add third party packages/modules from a given third_party/xxx subdirectory.""" - for name in os.listdir(subdir): - path = os.path.join(subdir, name) - packages.append(ThirdPartyPackage(path, py2_compatible, py3_compatible, requires=[], is_dir=os.path.isdir(path))) - - -def collect_third_party_packages() -> Tuple[List[ThirdPartyPackage], List[ThirdPartyPackage]]: - """Collect third party packages/modules from all current third_party/xxx sub-directories.""" - third_party: List[ThirdPartyPackage] = [] - py2_third_party: List[ThirdPartyPackage] = [] - add_third_party_packages_from("third_party/3", third_party, py2_compatible=False, py3_compatible=True) - add_third_party_packages_from("third_party/2and3", third_party, py2_compatible=True, py3_compatible=True) - # We special-case Python 2 for third party packages like six. - subdir = "third_party/2" - py3_packages = os.listdir("third_party/3") - for name in os.listdir(subdir): - path = os.path.join(subdir, name) - package = ThirdPartyPackage(path, py2_compatible=True, py3_compatible=False, requires=[], is_dir=os.path.isdir(path)) - if name in py3_packages: - # If there is a package with the same name in /2 and /3, we add the former to - # a separate list, packages from there will be put into /python2 sub-directories. - py2_third_party.append(package) - else: - third_party.append(package) - return third_party, py2_third_party - - -def get_top_imported_names(file: str) -> Set[str]: - """Collect names imported in given file. - - We only collect top-level names, i.e. `from foo.bar import baz` - will only add `foo` to the list. - """ - if not file.endswith(".pyi"): - return set() - with open(os.path.join(file), "rb") as f: - content = f.read() - parsed = ast.parse(content) - top_imported = set() - for node in ast.walk(parsed): - if isinstance(node, ast.Import): - for name in node.names: - top_imported.add(name.name.split(".")[0]) - elif isinstance(node, ast.ImportFrom): - if node.level > 0: - # Relative imports always refer to the current package. - continue - assert node.module - top_imported.add(node.module.split(".")[0]) - return top_imported - - -def populate_requirements( - package: ThirdPartyPackage, stdlib: List[str], py2_stdlib: List[str], known_distributions: Set[str] -) -> None: - """Generate requirements using imports found in a package.""" - assert not package.requires, "Populate must be called once" - if not package.is_dir: - all_top_imports = get_top_imported_names(package.path) - else: - all_top_imports = set() - for dir_path, _, file_names in os.walk(package.path): - for file_name in file_names: - all_top_imports |= get_top_imported_names(os.path.join(dir_path, file_name)) - - # Generate dependencies using collected imports. - requirements = set() - for name in all_top_imports: - # Note: dependencies are between distributions, not packages. - distribution = package_to_distribution.get(name, name) - if package.py3_compatible and name not in stdlib: - if distribution in known_distributions: - requirements.add(distribution) - else: - # Likely a conditional import. - assert distribution in py2_stdlib or distribution in MISSING_WHITELIST - if package.py2_compatible and name not in py2_stdlib: - if distribution in known_distributions: - requirements.add(distribution) - else: - # Likely a conditional import. - assert distribution in stdlib or distribution in MISSING_WHITELIST - # Remove dependency to itself generated by absolute imports. - current_distribution = package_to_distribution.get(package.name, package.name) - package.requires = sorted(requirements - {current_distribution}) - - -def generate_versions(packages: List[StdLibPackage]) -> str: - """Generate the stdlib/VERSIONS file for packages/modules.""" - lines = [] - for package in packages: - assert package.py_version is not None - lines.append(f"{package.name}: {package.py_version}") - return "\n".join(sorted(lines)) - - -def copy_stdlib(packages: List[StdLibPackage], py2_packages: List[StdLibPackage]) -> None: - """Refactor the standard library part using collected metadata.""" - stdlib_dir = os.path.join(OUTPUT_DIR, STDLIB_NAMESPACE) - os.makedirs(stdlib_dir, exist_ok=True) - - # Write version metadata. - with open(os.path.join(stdlib_dir, "VERSIONS"), "w") as f: - f.write(generate_versions(packages)) - f.write("\n") - - # Copy stdlib/2and3 and stdlib/3 packages/modules. - for package in packages: - if not package.is_dir: - shutil.copy(package.path, stdlib_dir) - else: - shutil.copytree(package.path, os.path.join(stdlib_dir, package.name)) - - # Copy stdlib/2 packages/modules to a nested /python namespace. - if py2_packages: - py2_stdlib_dir = os.path.join(stdlib_dir, PY2_NAMESPACE) - os.makedirs(py2_stdlib_dir, exist_ok=True) - for package in py2_packages: - if not package.is_dir: - shutil.copy(package.path, py2_stdlib_dir) - else: - shutil.copytree(package.path, os.path.join(py2_stdlib_dir, package.name)) - - -def generate_metadata(package: ThirdPartyPackage, py2_packages: List[str]) -> str: - """Generate METADATA.toml for a given package. - - Only add compatibility flags if they are different from default values: - python2 = false, python3 = true. - - Note: the metadata should be generated per distribution, but we just use - an arbitrary package to populate it, since it should be the same for all - packages. - """ - version = known_versions.get( - package_to_distribution.get(package.name, package.name), - DEFAULT_VERSION, - ) - lines = [f'version = "{version}"'] - if package.py2_compatible or package.name in py2_packages: - # Note: for packages like six that appear in both normal and Python 2 only - # lists we force set python2 = true. - lines.append("python2 = true") - if not package.py3_compatible: - lines.append("python3 = false") - if package.requires: - distributions = [f'"types-{package_to_distribution.get(dep, dep)}"' for dep in package.requires] - lines.append(f"requires = [{', '.join(distributions)}]") - return "\n".join(lines) - - -def copy_third_party(packages: List[ThirdPartyPackage], py2_packages: List[ThirdPartyPackage]) -> None: - """Refactor the third party part using collected metadata.""" - third_party_dir = os.path.join(OUTPUT_DIR, THIRD_PARTY_NAMESPACE) - os.makedirs(third_party_dir, exist_ok=True) - - # Note: these include Python 3 versions of packages like six. - for package in packages: - distribution = package_to_distribution.get(package.name, package.name) - distribution_dir = os.path.join(third_party_dir, distribution) - os.makedirs(distribution_dir, exist_ok=True) - metadata_file = os.path.join(distribution_dir, "METADATA.toml") - if not os.path.isfile(metadata_file): - # Write metadata once. - # TODO: check consistency between different packages in same distribution? - with open(metadata_file, "w") as f: - f.write(generate_metadata(package, [package.name for package in py2_packages])) - f.write("\n") - if not package.is_dir: - shutil.copy(package.path, distribution_dir) - else: - shutil.copytree(package.path, os.path.join(distribution_dir, package.name)) - - # Add Python 2 counterparts of packages like six (with different stubs) to nested - # namespaces like six/python2/six. - for package in py2_packages: - distribution = package_to_distribution.get(package.name, package.name) - distribution_dir = os.path.join(third_party_dir, distribution, PY2_NAMESPACE) - os.makedirs(distribution_dir, exist_ok=True) - if not package.is_dir: - shutil.copy(package.path, distribution_dir) - else: - shutil.copytree(package.path, os.path.join(distribution_dir, package.name)) - - -def main() -> None: - # Collect metadata for Python 2 and 3, and Python 2 only standard library - # packages/modules. The latter will go to a separate nested namespace. - stdlib, py2_stdlib = collect_stdlib_packages() - third_party, py2_third_party = collect_third_party_packages() - - # Collect standard library names to filter out from dependencies. - stdlib_names = [package.name for package in stdlib] - py2_stdlib_names = [package.name for package in py2_stdlib] - py2_stdlib_names += [package.name for package in stdlib if package.py_version == "2.7"] - - # Collect all known distributions (for sanity checks). - known_distributions = {package_to_distribution.get(package.name, package.name) for package in third_party + py2_third_party} - - # Compute dependencies between third party packages/modules to populate metadata. - for package in third_party + py2_third_party: - populate_requirements(package, stdlib_names, py2_stdlib_names, known_distributions) - - # Copy the files to a separate location (to not clobber the root directory). - if not os.path.isdir(OUTPUT_DIR): - os.mkdir(OUTPUT_DIR) - copy_stdlib(stdlib, py2_stdlib) - copy_third_party(third_party, py2_third_party) - - -if __name__ == "__main__": - main()