mirror of
https://github.com/davidhalter/typeshed.git
synced 2025-12-06 20:24:30 +08:00
Script for modular typeshed migration (#4259)
This reshuffles directory structure according to the specification in https://github.com/python/typeshed/issues/2491#issuecomment-611607557
This commit is contained in:
343
scripts/migrate_script.py
Normal file
343
scripts/migrate_script.py
Normal file
@@ -0,0 +1,343 @@
|
||||
"""
|
||||
Ad-hoc script to migrate typeshed to a new directory structure proposed in
|
||||
https://github.com/python/typeshed/issues/2491#issuecomment-611607557
|
||||
"""
|
||||
|
||||
import ast
|
||||
import os
|
||||
import os.path
|
||||
import shutil
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List, Set, Tuple
|
||||
|
||||
# These names may be still discussed so I make them constants.
|
||||
STDLIB_NAMESPACE = "stdlib"
|
||||
THIRD_PARTY_NAMESPACE = "stubs"
|
||||
DEFAULT_VERSION = "0.1"
|
||||
DEFAULT_PY3_VERSION = "3.5"
|
||||
PY2_NAMESPACE = "python2"
|
||||
OUTPUT_DIR = "out"
|
||||
|
||||
# Third party imports (type ignored) of missing stubs.
|
||||
MISSING_WHITELIST = {
|
||||
"thrift",
|
||||
}
|
||||
|
||||
# Manually collected special cases where distribution name and
|
||||
# package name are different.
|
||||
package_to_distribution = {
|
||||
"_pytest": "pytest",
|
||||
"yaml": "PyYAML",
|
||||
"typing_extensions": "typing-extensions",
|
||||
"mypy_extensions": "mypy-extensions",
|
||||
"pyre_extensions": "pyre-extensions",
|
||||
"attr": "attrs",
|
||||
"concurrent": "futures",
|
||||
"Crypto": "pycrypto",
|
||||
"datetimerange": "DateTimeRange",
|
||||
"dateutil": "python-dateutil",
|
||||
"enum": "enum34",
|
||||
"flask": "Flask",
|
||||
"gflags": "python-gflags",
|
||||
"google": "protobuf",
|
||||
"jinja2": "Jinja2",
|
||||
"markupsafe": "MarkupSafe",
|
||||
"OpenSSL": "openssl-python",
|
||||
"pymysql": "PyMySQL",
|
||||
"pyVmomi": "pyvmomi",
|
||||
"routes": "Routes",
|
||||
"typed_ast": "typed-ast",
|
||||
"werkzeug": "Werkzeug",
|
||||
}
|
||||
|
||||
known_versions = {
|
||||
"mypy-extensions": "0.4",
|
||||
"typing-extensions": "3.7",
|
||||
"typed-ast": "1.4",
|
||||
}
|
||||
|
||||
|
||||
# Classes with "Package" in name represent both packages and modules.
|
||||
# The latter two are distinguished by is_dir flag.
|
||||
class PackageBase:
|
||||
"""Common attributes for packages/modules"""
|
||||
path: str # full initial path like stdlib/2and3/argparse.pyi
|
||||
is_dir: bool
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
_, tail = os.path.split(self.path)
|
||||
if self.is_dir:
|
||||
assert not tail.endswith(".pyi")
|
||||
return tail
|
||||
assert tail.endswith(".pyi")
|
||||
name, _ = os.path.splitext(tail)
|
||||
return name
|
||||
|
||||
|
||||
@dataclass
|
||||
class StdLibPackage(PackageBase):
|
||||
"""Package/module in standard library."""
|
||||
path: str
|
||||
py_version: Optional[str] # Can be omitted for Python 2 only packages.
|
||||
is_dir: bool
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThirdPartyPackage(PackageBase):
|
||||
path: str
|
||||
py2_compatible: bool
|
||||
py3_compatible: bool
|
||||
is_dir: bool
|
||||
requires: List[str] # distributions this depends on
|
||||
|
||||
|
||||
def add_stdlib_packages_from(subdir: str, packages: List[StdLibPackage],
|
||||
py_version: Optional[str]) -> None:
|
||||
"""Add standard library packages/modules from a given stdlib/xxx subdirectory.
|
||||
|
||||
Append to packages list in-place, use py_version as the minimal supported version.
|
||||
"""
|
||||
for name in os.listdir(subdir):
|
||||
path = os.path.join(subdir, name)
|
||||
packages.append(StdLibPackage(path, py_version, is_dir=os.path.isdir(path)))
|
||||
|
||||
|
||||
def collect_stdlib_packages() -> Tuple[List[StdLibPackage], List[StdLibPackage]]:
|
||||
"""Collect standard library packages/modules from all current stdlib/xxx sub-directories."""
|
||||
stdlib: List[StdLibPackage] = []
|
||||
py2_stdlib: List[StdLibPackage] = []
|
||||
# These will go to a separate subdirectory.
|
||||
add_stdlib_packages_from("stdlib/2", py2_stdlib, None)
|
||||
add_stdlib_packages_from("stdlib/2and3", stdlib, "2.7")
|
||||
# Use oldest currently supported version for Python 3 packages/modules.
|
||||
add_stdlib_packages_from("stdlib/3", stdlib, DEFAULT_PY3_VERSION)
|
||||
for version in ("3.6", "3.7", "3.8", "3.9"):
|
||||
subdir = os.path.join("stdlib", version)
|
||||
if os.path.isdir(subdir):
|
||||
add_stdlib_packages_from(subdir, stdlib, version)
|
||||
return stdlib, py2_stdlib
|
||||
|
||||
|
||||
def add_third_party_packages_from(subdir: str, packages: List[ThirdPartyPackage],
|
||||
py2_compatible: bool, py3_compatible: bool) -> None:
|
||||
"""Add third party packages/modules from a given third_party/xxx subdirectory."""
|
||||
for name in os.listdir(subdir):
|
||||
path = os.path.join(subdir, name)
|
||||
packages.append(ThirdPartyPackage(path, py2_compatible, py3_compatible,
|
||||
requires=[], is_dir=os.path.isdir(path)))
|
||||
|
||||
|
||||
def collect_third_party_packages() -> Tuple[List[ThirdPartyPackage], List[ThirdPartyPackage]]:
|
||||
"""Collect third party packages/modules from all current third_party/xxx sub-directories."""
|
||||
third_party: List[ThirdPartyPackage] = []
|
||||
py2_third_party: List[ThirdPartyPackage] = []
|
||||
add_third_party_packages_from("third_party/3", third_party,
|
||||
py2_compatible=False, py3_compatible=True)
|
||||
add_third_party_packages_from("third_party/2and3", third_party,
|
||||
py2_compatible=True, py3_compatible=True)
|
||||
# We special-case Python 2 for third party packages like six.
|
||||
subdir = "third_party/2"
|
||||
py3_packages = os.listdir("third_party/3")
|
||||
for name in os.listdir(subdir):
|
||||
path = os.path.join(subdir, name)
|
||||
package = ThirdPartyPackage(path, py2_compatible=True, py3_compatible=False,
|
||||
requires=[], is_dir=os.path.isdir(path))
|
||||
if name in py3_packages:
|
||||
# If there is a package with the same name in /2 and /3, we add the former to
|
||||
# a separate list, packages from there will be put into /python2 sub-directories.
|
||||
py2_third_party.append(package)
|
||||
else:
|
||||
third_party.append(package)
|
||||
return third_party, py2_third_party
|
||||
|
||||
|
||||
def get_top_imported_names(file: str) -> Set[str]:
|
||||
"""Collect names imported in given file.
|
||||
|
||||
We only collect top-level names, i.e. `from foo.bar import baz`
|
||||
will only add `foo` to the list.
|
||||
"""
|
||||
if not file.endswith(".pyi"):
|
||||
return set()
|
||||
with open(os.path.join(file), "rb") as f:
|
||||
content = f.read()
|
||||
parsed = ast.parse(content)
|
||||
top_imported = set()
|
||||
for node in ast.walk(parsed):
|
||||
if isinstance(node, ast.Import):
|
||||
for name in node.names:
|
||||
top_imported.add(name.name.split('.')[0])
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
if node.level > 0:
|
||||
# Relative imports always refer to the current package.
|
||||
continue
|
||||
assert node.module
|
||||
top_imported.add(node.module.split('.')[0])
|
||||
return top_imported
|
||||
|
||||
|
||||
def populate_requirements(package: ThirdPartyPackage,
|
||||
stdlib: List[str], py2_stdlib: List[str],
|
||||
known_distributions: Set[str]) -> None:
|
||||
"""Generate requirements using imports found in a package."""
|
||||
assert not package.requires, "Populate must be called once"
|
||||
if not package.is_dir:
|
||||
all_top_imports = get_top_imported_names(package.path)
|
||||
else:
|
||||
all_top_imports = set()
|
||||
for dir_path, _, file_names in os.walk(package.path):
|
||||
for file_name in file_names:
|
||||
all_top_imports |= get_top_imported_names(os.path.join(dir_path, file_name))
|
||||
|
||||
# Generate dependencies using collected imports.
|
||||
requirements = set()
|
||||
for name in all_top_imports:
|
||||
# Note: dependencies are between distributions, not packages.
|
||||
distribution = package_to_distribution.get(name, name)
|
||||
if package.py3_compatible and name not in stdlib:
|
||||
if distribution in known_distributions:
|
||||
requirements.add(distribution)
|
||||
else:
|
||||
# Likely a conditional import.
|
||||
assert distribution in py2_stdlib or distribution in MISSING_WHITELIST
|
||||
if package.py2_compatible and name not in py2_stdlib:
|
||||
if distribution in known_distributions:
|
||||
requirements.add(distribution)
|
||||
else:
|
||||
# Likely a conditional import.
|
||||
assert distribution in stdlib or distribution in MISSING_WHITELIST
|
||||
# Remove dependency to itself generated by absolute imports.
|
||||
current_distribution = package_to_distribution.get(package.name, package.name)
|
||||
package.requires = sorted(requirements - {current_distribution})
|
||||
|
||||
|
||||
def generate_versions(packages: List[StdLibPackage]) -> str:
|
||||
"""Generate the stdlib/VERSIONS file for packages/modules."""
|
||||
lines = []
|
||||
for package in packages:
|
||||
assert package.py_version is not None
|
||||
lines.append(f"{package.name}: {package.py_version}")
|
||||
return "\n".join(sorted(lines))
|
||||
|
||||
|
||||
def copy_stdlib(packages: List[StdLibPackage], py2_packages: List[StdLibPackage]) -> None:
|
||||
"""Refactor the standard library part using collected metadata."""
|
||||
stdlib_dir = os.path.join(OUTPUT_DIR, STDLIB_NAMESPACE)
|
||||
os.makedirs(stdlib_dir, exist_ok=True)
|
||||
|
||||
# Write version metadata.
|
||||
with open(os.path.join(stdlib_dir, "VERSIONS"), "w") as f:
|
||||
f.write(generate_versions(packages))
|
||||
f.write("\n")
|
||||
|
||||
# Copy stdlib/2and3 and stdlib/3 packages/modules.
|
||||
for package in packages:
|
||||
if not package.is_dir:
|
||||
shutil.copy(package.path, stdlib_dir)
|
||||
else:
|
||||
shutil.copytree(package.path, os.path.join(stdlib_dir, package.name))
|
||||
|
||||
# Copy stdlib/2 packages/modules to a nested /python namespace.
|
||||
if py2_packages:
|
||||
py2_stdlib_dir = os.path.join(stdlib_dir, PY2_NAMESPACE)
|
||||
os.makedirs(py2_stdlib_dir, exist_ok=True)
|
||||
for package in py2_packages:
|
||||
if not package.is_dir:
|
||||
shutil.copy(package.path, py2_stdlib_dir)
|
||||
else:
|
||||
shutil.copytree(package.path, os.path.join(py2_stdlib_dir, package.name))
|
||||
|
||||
|
||||
def generate_metadata(package: ThirdPartyPackage, py2_packages: List[str]) -> str:
|
||||
"""Generate METADATA.toml for a given package.
|
||||
|
||||
Only add compatibility flags if they are different from default values:
|
||||
python2 = false, python3 = true.
|
||||
|
||||
Note: the metadata should be generated per distribution, but we just use
|
||||
an arbitrary package to populate it, since it should be the same for all
|
||||
packages.
|
||||
"""
|
||||
version = known_versions.get(
|
||||
package_to_distribution.get(package.name, package.name),
|
||||
DEFAULT_VERSION,
|
||||
)
|
||||
lines = [f'version = "{version}"']
|
||||
if package.py2_compatible or package.name in py2_packages:
|
||||
# Note: for packages like six that appear in both normal and Python 2 only
|
||||
# lists we force set python2 = true.
|
||||
lines.append("python2 = true")
|
||||
if not package.py3_compatible:
|
||||
lines.append("python3 = false")
|
||||
if package.requires:
|
||||
distributions = [f'"types-{package_to_distribution.get(dep, dep)}"'
|
||||
for dep in package.requires]
|
||||
lines.append(f"requires = [{', '.join(distributions)}]")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def copy_third_party(packages: List[ThirdPartyPackage],
|
||||
py2_packages: List[ThirdPartyPackage]) -> None:
|
||||
"""Refactor the third party part using collected metadata."""
|
||||
third_party_dir = os.path.join(OUTPUT_DIR, THIRD_PARTY_NAMESPACE)
|
||||
os.makedirs(third_party_dir, exist_ok=True)
|
||||
|
||||
# Note: these include Python 3 versions of packages like six.
|
||||
for package in packages:
|
||||
distribution = package_to_distribution.get(package.name, package.name)
|
||||
distribution_dir = os.path.join(third_party_dir, distribution)
|
||||
os.makedirs(distribution_dir, exist_ok=True)
|
||||
metadata_file = os.path.join(distribution_dir, "METADATA.toml")
|
||||
if not os.path.isfile(metadata_file):
|
||||
# Write metadata once.
|
||||
# TODO: check consistency between different packages in same distribution?
|
||||
with open(metadata_file, "w") as f:
|
||||
f.write(generate_metadata(package, [package.name for package in py2_packages]))
|
||||
f.write("\n")
|
||||
if not package.is_dir:
|
||||
shutil.copy(package.path, distribution_dir)
|
||||
else:
|
||||
shutil.copytree(package.path, os.path.join(distribution_dir, package.name))
|
||||
|
||||
# Add Python 2 counterparts of packages like six (with different stubs) to nested
|
||||
# namespaces like six/python2/six.
|
||||
for package in py2_packages:
|
||||
distribution = package_to_distribution.get(package.name, package.name)
|
||||
distribution_dir = os.path.join(third_party_dir, distribution, PY2_NAMESPACE)
|
||||
os.makedirs(distribution_dir, exist_ok=True)
|
||||
if not package.is_dir:
|
||||
shutil.copy(package.path, distribution_dir)
|
||||
else:
|
||||
shutil.copytree(package.path, os.path.join(distribution_dir, package.name))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
# Collect metadata for Python 2 and 3, and Python 2 only standard library
|
||||
# packages/modules. The latter will go to a separate nested namespace.
|
||||
stdlib, py2_stdlib = collect_stdlib_packages()
|
||||
third_party, py2_third_party = collect_third_party_packages()
|
||||
|
||||
# Collect standard library names to filter out from dependencies.
|
||||
stdlib_names = [package.name for package in stdlib]
|
||||
py2_stdlib_names = [package.name for package in py2_stdlib]
|
||||
py2_stdlib_names += [package.name for package in stdlib if package.py_version == "2.7"]
|
||||
|
||||
# Collect all known distributions (for sanity checks).
|
||||
known_distributions = {package_to_distribution.get(package.name, package.name)
|
||||
for package in third_party + py2_third_party}
|
||||
|
||||
# Compute dependencies between third party packages/modules to populate metadata.
|
||||
for package in third_party + py2_third_party:
|
||||
populate_requirements(package, stdlib_names, py2_stdlib_names, known_distributions)
|
||||
|
||||
# Copy the files to a separate location (to not clobber the root directory).
|
||||
if not os.path.isdir(OUTPUT_DIR):
|
||||
os.mkdir(OUTPUT_DIR)
|
||||
copy_stdlib(stdlib, py2_stdlib)
|
||||
copy_third_party(third_party, py2_third_party)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user