Script for modular typeshed migration (#4259)

This reshuffles directory structure according to the specification in https://github.com/python/typeshed/issues/2491#issuecomment-611607557
This commit is contained in:
Ivan Levkivskyi
2020-07-01 12:36:23 +01:00
committed by GitHub
parent 3efb675dba
commit 5c7cf9f75c

343
scripts/migrate_script.py Normal file
View File

@@ -0,0 +1,343 @@
"""
Ad-hoc script to migrate typeshed to a new directory structure proposed in
https://github.com/python/typeshed/issues/2491#issuecomment-611607557
"""
import ast
import os
import os.path
import shutil
from dataclasses import dataclass
from typing import Optional, List, Set, Tuple
# These names may be still discussed so I make them constants.
STDLIB_NAMESPACE = "stdlib"
THIRD_PARTY_NAMESPACE = "stubs"
DEFAULT_VERSION = "0.1"
DEFAULT_PY3_VERSION = "3.5"
PY2_NAMESPACE = "python2"
OUTPUT_DIR = "out"
# Third party imports (type ignored) of missing stubs.
MISSING_WHITELIST = {
"thrift",
}
# Manually collected special cases where distribution name and
# package name are different.
package_to_distribution = {
"_pytest": "pytest",
"yaml": "PyYAML",
"typing_extensions": "typing-extensions",
"mypy_extensions": "mypy-extensions",
"pyre_extensions": "pyre-extensions",
"attr": "attrs",
"concurrent": "futures",
"Crypto": "pycrypto",
"datetimerange": "DateTimeRange",
"dateutil": "python-dateutil",
"enum": "enum34",
"flask": "Flask",
"gflags": "python-gflags",
"google": "protobuf",
"jinja2": "Jinja2",
"markupsafe": "MarkupSafe",
"OpenSSL": "openssl-python",
"pymysql": "PyMySQL",
"pyVmomi": "pyvmomi",
"routes": "Routes",
"typed_ast": "typed-ast",
"werkzeug": "Werkzeug",
}
known_versions = {
"mypy-extensions": "0.4",
"typing-extensions": "3.7",
"typed-ast": "1.4",
}
# Classes with "Package" in name represent both packages and modules.
# The latter two are distinguished by is_dir flag.
class PackageBase:
"""Common attributes for packages/modules"""
path: str # full initial path like stdlib/2and3/argparse.pyi
is_dir: bool
@property
def name(self) -> str:
_, tail = os.path.split(self.path)
if self.is_dir:
assert not tail.endswith(".pyi")
return tail
assert tail.endswith(".pyi")
name, _ = os.path.splitext(tail)
return name
@dataclass
class StdLibPackage(PackageBase):
"""Package/module in standard library."""
path: str
py_version: Optional[str] # Can be omitted for Python 2 only packages.
is_dir: bool
@dataclass
class ThirdPartyPackage(PackageBase):
path: str
py2_compatible: bool
py3_compatible: bool
is_dir: bool
requires: List[str] # distributions this depends on
def add_stdlib_packages_from(subdir: str, packages: List[StdLibPackage],
py_version: Optional[str]) -> None:
"""Add standard library packages/modules from a given stdlib/xxx subdirectory.
Append to packages list in-place, use py_version as the minimal supported version.
"""
for name in os.listdir(subdir):
path = os.path.join(subdir, name)
packages.append(StdLibPackage(path, py_version, is_dir=os.path.isdir(path)))
def collect_stdlib_packages() -> Tuple[List[StdLibPackage], List[StdLibPackage]]:
"""Collect standard library packages/modules from all current stdlib/xxx sub-directories."""
stdlib: List[StdLibPackage] = []
py2_stdlib: List[StdLibPackage] = []
# These will go to a separate subdirectory.
add_stdlib_packages_from("stdlib/2", py2_stdlib, None)
add_stdlib_packages_from("stdlib/2and3", stdlib, "2.7")
# Use oldest currently supported version for Python 3 packages/modules.
add_stdlib_packages_from("stdlib/3", stdlib, DEFAULT_PY3_VERSION)
for version in ("3.6", "3.7", "3.8", "3.9"):
subdir = os.path.join("stdlib", version)
if os.path.isdir(subdir):
add_stdlib_packages_from(subdir, stdlib, version)
return stdlib, py2_stdlib
def add_third_party_packages_from(subdir: str, packages: List[ThirdPartyPackage],
py2_compatible: bool, py3_compatible: bool) -> None:
"""Add third party packages/modules from a given third_party/xxx subdirectory."""
for name in os.listdir(subdir):
path = os.path.join(subdir, name)
packages.append(ThirdPartyPackage(path, py2_compatible, py3_compatible,
requires=[], is_dir=os.path.isdir(path)))
def collect_third_party_packages() -> Tuple[List[ThirdPartyPackage], List[ThirdPartyPackage]]:
"""Collect third party packages/modules from all current third_party/xxx sub-directories."""
third_party: List[ThirdPartyPackage] = []
py2_third_party: List[ThirdPartyPackage] = []
add_third_party_packages_from("third_party/3", third_party,
py2_compatible=False, py3_compatible=True)
add_third_party_packages_from("third_party/2and3", third_party,
py2_compatible=True, py3_compatible=True)
# We special-case Python 2 for third party packages like six.
subdir = "third_party/2"
py3_packages = os.listdir("third_party/3")
for name in os.listdir(subdir):
path = os.path.join(subdir, name)
package = ThirdPartyPackage(path, py2_compatible=True, py3_compatible=False,
requires=[], is_dir=os.path.isdir(path))
if name in py3_packages:
# If there is a package with the same name in /2 and /3, we add the former to
# a separate list, packages from there will be put into /python2 sub-directories.
py2_third_party.append(package)
else:
third_party.append(package)
return third_party, py2_third_party
def get_top_imported_names(file: str) -> Set[str]:
"""Collect names imported in given file.
We only collect top-level names, i.e. `from foo.bar import baz`
will only add `foo` to the list.
"""
if not file.endswith(".pyi"):
return set()
with open(os.path.join(file), "rb") as f:
content = f.read()
parsed = ast.parse(content)
top_imported = set()
for node in ast.walk(parsed):
if isinstance(node, ast.Import):
for name in node.names:
top_imported.add(name.name.split('.')[0])
elif isinstance(node, ast.ImportFrom):
if node.level > 0:
# Relative imports always refer to the current package.
continue
assert node.module
top_imported.add(node.module.split('.')[0])
return top_imported
def populate_requirements(package: ThirdPartyPackage,
stdlib: List[str], py2_stdlib: List[str],
known_distributions: Set[str]) -> None:
"""Generate requirements using imports found in a package."""
assert not package.requires, "Populate must be called once"
if not package.is_dir:
all_top_imports = get_top_imported_names(package.path)
else:
all_top_imports = set()
for dir_path, _, file_names in os.walk(package.path):
for file_name in file_names:
all_top_imports |= get_top_imported_names(os.path.join(dir_path, file_name))
# Generate dependencies using collected imports.
requirements = set()
for name in all_top_imports:
# Note: dependencies are between distributions, not packages.
distribution = package_to_distribution.get(name, name)
if package.py3_compatible and name not in stdlib:
if distribution in known_distributions:
requirements.add(distribution)
else:
# Likely a conditional import.
assert distribution in py2_stdlib or distribution in MISSING_WHITELIST
if package.py2_compatible and name not in py2_stdlib:
if distribution in known_distributions:
requirements.add(distribution)
else:
# Likely a conditional import.
assert distribution in stdlib or distribution in MISSING_WHITELIST
# Remove dependency to itself generated by absolute imports.
current_distribution = package_to_distribution.get(package.name, package.name)
package.requires = sorted(requirements - {current_distribution})
def generate_versions(packages: List[StdLibPackage]) -> str:
"""Generate the stdlib/VERSIONS file for packages/modules."""
lines = []
for package in packages:
assert package.py_version is not None
lines.append(f"{package.name}: {package.py_version}")
return "\n".join(sorted(lines))
def copy_stdlib(packages: List[StdLibPackage], py2_packages: List[StdLibPackage]) -> None:
"""Refactor the standard library part using collected metadata."""
stdlib_dir = os.path.join(OUTPUT_DIR, STDLIB_NAMESPACE)
os.makedirs(stdlib_dir, exist_ok=True)
# Write version metadata.
with open(os.path.join(stdlib_dir, "VERSIONS"), "w") as f:
f.write(generate_versions(packages))
f.write("\n")
# Copy stdlib/2and3 and stdlib/3 packages/modules.
for package in packages:
if not package.is_dir:
shutil.copy(package.path, stdlib_dir)
else:
shutil.copytree(package.path, os.path.join(stdlib_dir, package.name))
# Copy stdlib/2 packages/modules to a nested /python namespace.
if py2_packages:
py2_stdlib_dir = os.path.join(stdlib_dir, PY2_NAMESPACE)
os.makedirs(py2_stdlib_dir, exist_ok=True)
for package in py2_packages:
if not package.is_dir:
shutil.copy(package.path, py2_stdlib_dir)
else:
shutil.copytree(package.path, os.path.join(py2_stdlib_dir, package.name))
def generate_metadata(package: ThirdPartyPackage, py2_packages: List[str]) -> str:
"""Generate METADATA.toml for a given package.
Only add compatibility flags if they are different from default values:
python2 = false, python3 = true.
Note: the metadata should be generated per distribution, but we just use
an arbitrary package to populate it, since it should be the same for all
packages.
"""
version = known_versions.get(
package_to_distribution.get(package.name, package.name),
DEFAULT_VERSION,
)
lines = [f'version = "{version}"']
if package.py2_compatible or package.name in py2_packages:
# Note: for packages like six that appear in both normal and Python 2 only
# lists we force set python2 = true.
lines.append("python2 = true")
if not package.py3_compatible:
lines.append("python3 = false")
if package.requires:
distributions = [f'"types-{package_to_distribution.get(dep, dep)}"'
for dep in package.requires]
lines.append(f"requires = [{', '.join(distributions)}]")
return "\n".join(lines)
def copy_third_party(packages: List[ThirdPartyPackage],
py2_packages: List[ThirdPartyPackage]) -> None:
"""Refactor the third party part using collected metadata."""
third_party_dir = os.path.join(OUTPUT_DIR, THIRD_PARTY_NAMESPACE)
os.makedirs(third_party_dir, exist_ok=True)
# Note: these include Python 3 versions of packages like six.
for package in packages:
distribution = package_to_distribution.get(package.name, package.name)
distribution_dir = os.path.join(third_party_dir, distribution)
os.makedirs(distribution_dir, exist_ok=True)
metadata_file = os.path.join(distribution_dir, "METADATA.toml")
if not os.path.isfile(metadata_file):
# Write metadata once.
# TODO: check consistency between different packages in same distribution?
with open(metadata_file, "w") as f:
f.write(generate_metadata(package, [package.name for package in py2_packages]))
f.write("\n")
if not package.is_dir:
shutil.copy(package.path, distribution_dir)
else:
shutil.copytree(package.path, os.path.join(distribution_dir, package.name))
# Add Python 2 counterparts of packages like six (with different stubs) to nested
# namespaces like six/python2/six.
for package in py2_packages:
distribution = package_to_distribution.get(package.name, package.name)
distribution_dir = os.path.join(third_party_dir, distribution, PY2_NAMESPACE)
os.makedirs(distribution_dir, exist_ok=True)
if not package.is_dir:
shutil.copy(package.path, distribution_dir)
else:
shutil.copytree(package.path, os.path.join(distribution_dir, package.name))
def main() -> None:
# Collect metadata for Python 2 and 3, and Python 2 only standard library
# packages/modules. The latter will go to a separate nested namespace.
stdlib, py2_stdlib = collect_stdlib_packages()
third_party, py2_third_party = collect_third_party_packages()
# Collect standard library names to filter out from dependencies.
stdlib_names = [package.name for package in stdlib]
py2_stdlib_names = [package.name for package in py2_stdlib]
py2_stdlib_names += [package.name for package in stdlib if package.py_version == "2.7"]
# Collect all known distributions (for sanity checks).
known_distributions = {package_to_distribution.get(package.name, package.name)
for package in third_party + py2_third_party}
# Compute dependencies between third party packages/modules to populate metadata.
for package in third_party + py2_third_party:
populate_requirements(package, stdlib_names, py2_stdlib_names, known_distributions)
# Copy the files to a separate location (to not clobber the root directory).
if not os.path.isdir(OUTPUT_DIR):
os.mkdir(OUTPUT_DIR)
copy_stdlib(stdlib, py2_stdlib)
copy_third_party(third_party, py2_third_party)
if __name__ == "__main__":
main()