mirror of
https://github.com/davidhalter/typeshed.git
synced 2025-12-06 04:04:25 +08:00
338 lines
13 KiB
Python
338 lines
13 KiB
Python
"""
|
|
Ad-hoc script to migrate typeshed to a new directory structure proposed in
|
|
https://github.com/python/typeshed/issues/2491#issuecomment-611607557
|
|
"""
|
|
|
|
import ast
|
|
import os
|
|
import os.path
|
|
import shutil
|
|
from dataclasses import dataclass
|
|
from typing import List, Optional, Set, Tuple
|
|
|
|
# These names may be still discussed so I make them constants.
|
|
STDLIB_NAMESPACE = "stdlib"
|
|
THIRD_PARTY_NAMESPACE = "stubs"
|
|
DEFAULT_VERSION = "0.1"
|
|
DEFAULT_PY3_VERSION = "3.6"
|
|
PY2_NAMESPACE = "python2"
|
|
OUTPUT_DIR = "out"
|
|
|
|
# Third party imports (type ignored) of missing stubs.
|
|
MISSING_WHITELIST = {
|
|
"thrift",
|
|
}
|
|
|
|
# Manually collected special cases where distribution name and
|
|
# package name are different.
|
|
package_to_distribution = {
|
|
"_pytest": "pytest",
|
|
"yaml": "PyYAML",
|
|
"typing_extensions": "typing-extensions",
|
|
"mypy_extensions": "mypy-extensions",
|
|
"pyre_extensions": "pyre-extensions",
|
|
"attr": "attrs",
|
|
"concurrent": "futures",
|
|
"Crypto": "pycrypto",
|
|
"datetimerange": "DateTimeRange",
|
|
"dateutil": "python-dateutil",
|
|
"enum": "enum34",
|
|
"flask": "Flask",
|
|
"gflags": "python-gflags",
|
|
"google": "protobuf",
|
|
"jinja2": "Jinja2",
|
|
"markupsafe": "MarkupSafe",
|
|
"OpenSSL": "openssl-python",
|
|
"pymysql": "PyMySQL",
|
|
"pyVmomi": "pyvmomi",
|
|
"routes": "Routes",
|
|
"typed_ast": "typed-ast",
|
|
"werkzeug": "Werkzeug",
|
|
}
|
|
|
|
known_versions = {
|
|
"mypy-extensions": "0.4",
|
|
"typing-extensions": "3.7",
|
|
"typed-ast": "1.4",
|
|
}
|
|
|
|
|
|
# Classes with "Package" in name represent both packages and modules.
|
|
# The latter two are distinguished by is_dir flag.
|
|
class PackageBase:
|
|
"""Common attributes for packages/modules"""
|
|
|
|
path: str # full initial path like stdlib/2and3/argparse.pyi
|
|
is_dir: bool
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
_, tail = os.path.split(self.path)
|
|
if self.is_dir:
|
|
assert not tail.endswith(".pyi")
|
|
return tail
|
|
assert tail.endswith(".pyi")
|
|
name, _ = os.path.splitext(tail)
|
|
return name
|
|
|
|
|
|
@dataclass
|
|
class StdLibPackage(PackageBase):
|
|
"""Package/module in standard library."""
|
|
|
|
path: str
|
|
py_version: Optional[str] # Can be omitted for Python 2 only packages.
|
|
is_dir: bool
|
|
|
|
|
|
@dataclass
|
|
class ThirdPartyPackage(PackageBase):
|
|
path: str
|
|
py2_compatible: bool
|
|
py3_compatible: bool
|
|
is_dir: bool
|
|
requires: List[str] # distributions this depends on
|
|
|
|
|
|
def add_stdlib_packages_from(subdir: str, packages: List[StdLibPackage], py_version: Optional[str]) -> None:
|
|
"""Add standard library packages/modules from a given stdlib/xxx subdirectory.
|
|
|
|
Append to packages list in-place, use py_version as the minimal supported version.
|
|
"""
|
|
for name in os.listdir(subdir):
|
|
path = os.path.join(subdir, name)
|
|
packages.append(StdLibPackage(path, py_version, is_dir=os.path.isdir(path)))
|
|
|
|
|
|
def collect_stdlib_packages() -> Tuple[List[StdLibPackage], List[StdLibPackage]]:
|
|
"""Collect standard library packages/modules from all current stdlib/xxx sub-directories."""
|
|
stdlib: List[StdLibPackage] = []
|
|
py2_stdlib: List[StdLibPackage] = []
|
|
# These will go to a separate subdirectory.
|
|
add_stdlib_packages_from("stdlib/2", py2_stdlib, None)
|
|
add_stdlib_packages_from("stdlib/2and3", stdlib, "2.7")
|
|
# Use oldest currently supported version for Python 3 packages/modules.
|
|
add_stdlib_packages_from("stdlib/3", stdlib, DEFAULT_PY3_VERSION)
|
|
for version in ("3.7", "3.8", "3.9"):
|
|
subdir = os.path.join("stdlib", version)
|
|
if os.path.isdir(subdir):
|
|
add_stdlib_packages_from(subdir, stdlib, version)
|
|
return stdlib, py2_stdlib
|
|
|
|
|
|
def add_third_party_packages_from(
|
|
subdir: str, packages: List[ThirdPartyPackage], py2_compatible: bool, py3_compatible: bool
|
|
) -> None:
|
|
"""Add third party packages/modules from a given third_party/xxx subdirectory."""
|
|
for name in os.listdir(subdir):
|
|
path = os.path.join(subdir, name)
|
|
packages.append(ThirdPartyPackage(path, py2_compatible, py3_compatible, requires=[], is_dir=os.path.isdir(path)))
|
|
|
|
|
|
def collect_third_party_packages() -> Tuple[List[ThirdPartyPackage], List[ThirdPartyPackage]]:
|
|
"""Collect third party packages/modules from all current third_party/xxx sub-directories."""
|
|
third_party: List[ThirdPartyPackage] = []
|
|
py2_third_party: List[ThirdPartyPackage] = []
|
|
add_third_party_packages_from("third_party/3", third_party, py2_compatible=False, py3_compatible=True)
|
|
add_third_party_packages_from("third_party/2and3", third_party, py2_compatible=True, py3_compatible=True)
|
|
# We special-case Python 2 for third party packages like six.
|
|
subdir = "third_party/2"
|
|
py3_packages = os.listdir("third_party/3")
|
|
for name in os.listdir(subdir):
|
|
path = os.path.join(subdir, name)
|
|
package = ThirdPartyPackage(path, py2_compatible=True, py3_compatible=False, requires=[], is_dir=os.path.isdir(path))
|
|
if name in py3_packages:
|
|
# If there is a package with the same name in /2 and /3, we add the former to
|
|
# a separate list, packages from there will be put into /python2 sub-directories.
|
|
py2_third_party.append(package)
|
|
else:
|
|
third_party.append(package)
|
|
return third_party, py2_third_party
|
|
|
|
|
|
def get_top_imported_names(file: str) -> Set[str]:
|
|
"""Collect names imported in given file.
|
|
|
|
We only collect top-level names, i.e. `from foo.bar import baz`
|
|
will only add `foo` to the list.
|
|
"""
|
|
if not file.endswith(".pyi"):
|
|
return set()
|
|
with open(os.path.join(file), "rb") as f:
|
|
content = f.read()
|
|
parsed = ast.parse(content)
|
|
top_imported = set()
|
|
for node in ast.walk(parsed):
|
|
if isinstance(node, ast.Import):
|
|
for name in node.names:
|
|
top_imported.add(name.name.split(".")[0])
|
|
elif isinstance(node, ast.ImportFrom):
|
|
if node.level > 0:
|
|
# Relative imports always refer to the current package.
|
|
continue
|
|
assert node.module
|
|
top_imported.add(node.module.split(".")[0])
|
|
return top_imported
|
|
|
|
|
|
def populate_requirements(
|
|
package: ThirdPartyPackage, stdlib: List[str], py2_stdlib: List[str], known_distributions: Set[str]
|
|
) -> None:
|
|
"""Generate requirements using imports found in a package."""
|
|
assert not package.requires, "Populate must be called once"
|
|
if not package.is_dir:
|
|
all_top_imports = get_top_imported_names(package.path)
|
|
else:
|
|
all_top_imports = set()
|
|
for dir_path, _, file_names in os.walk(package.path):
|
|
for file_name in file_names:
|
|
all_top_imports |= get_top_imported_names(os.path.join(dir_path, file_name))
|
|
|
|
# Generate dependencies using collected imports.
|
|
requirements = set()
|
|
for name in all_top_imports:
|
|
# Note: dependencies are between distributions, not packages.
|
|
distribution = package_to_distribution.get(name, name)
|
|
if package.py3_compatible and name not in stdlib:
|
|
if distribution in known_distributions:
|
|
requirements.add(distribution)
|
|
else:
|
|
# Likely a conditional import.
|
|
assert distribution in py2_stdlib or distribution in MISSING_WHITELIST
|
|
if package.py2_compatible and name not in py2_stdlib:
|
|
if distribution in known_distributions:
|
|
requirements.add(distribution)
|
|
else:
|
|
# Likely a conditional import.
|
|
assert distribution in stdlib or distribution in MISSING_WHITELIST
|
|
# Remove dependency to itself generated by absolute imports.
|
|
current_distribution = package_to_distribution.get(package.name, package.name)
|
|
package.requires = sorted(requirements - {current_distribution})
|
|
|
|
|
|
def generate_versions(packages: List[StdLibPackage]) -> str:
|
|
"""Generate the stdlib/VERSIONS file for packages/modules."""
|
|
lines = []
|
|
for package in packages:
|
|
assert package.py_version is not None
|
|
lines.append(f"{package.name}: {package.py_version}")
|
|
return "\n".join(sorted(lines))
|
|
|
|
|
|
def copy_stdlib(packages: List[StdLibPackage], py2_packages: List[StdLibPackage]) -> None:
|
|
"""Refactor the standard library part using collected metadata."""
|
|
stdlib_dir = os.path.join(OUTPUT_DIR, STDLIB_NAMESPACE)
|
|
os.makedirs(stdlib_dir, exist_ok=True)
|
|
|
|
# Write version metadata.
|
|
with open(os.path.join(stdlib_dir, "VERSIONS"), "w") as f:
|
|
f.write(generate_versions(packages))
|
|
f.write("\n")
|
|
|
|
# Copy stdlib/2and3 and stdlib/3 packages/modules.
|
|
for package in packages:
|
|
if not package.is_dir:
|
|
shutil.copy(package.path, stdlib_dir)
|
|
else:
|
|
shutil.copytree(package.path, os.path.join(stdlib_dir, package.name))
|
|
|
|
# Copy stdlib/2 packages/modules to a nested /python namespace.
|
|
if py2_packages:
|
|
py2_stdlib_dir = os.path.join(stdlib_dir, PY2_NAMESPACE)
|
|
os.makedirs(py2_stdlib_dir, exist_ok=True)
|
|
for package in py2_packages:
|
|
if not package.is_dir:
|
|
shutil.copy(package.path, py2_stdlib_dir)
|
|
else:
|
|
shutil.copytree(package.path, os.path.join(py2_stdlib_dir, package.name))
|
|
|
|
|
|
def generate_metadata(package: ThirdPartyPackage, py2_packages: List[str]) -> str:
|
|
"""Generate METADATA.toml for a given package.
|
|
|
|
Only add compatibility flags if they are different from default values:
|
|
python2 = false, python3 = true.
|
|
|
|
Note: the metadata should be generated per distribution, but we just use
|
|
an arbitrary package to populate it, since it should be the same for all
|
|
packages.
|
|
"""
|
|
version = known_versions.get(
|
|
package_to_distribution.get(package.name, package.name),
|
|
DEFAULT_VERSION,
|
|
)
|
|
lines = [f'version = "{version}"']
|
|
if package.py2_compatible or package.name in py2_packages:
|
|
# Note: for packages like six that appear in both normal and Python 2 only
|
|
# lists we force set python2 = true.
|
|
lines.append("python2 = true")
|
|
if not package.py3_compatible:
|
|
lines.append("python3 = false")
|
|
if package.requires:
|
|
distributions = [f'"types-{package_to_distribution.get(dep, dep)}"' for dep in package.requires]
|
|
lines.append(f"requires = [{', '.join(distributions)}]")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def copy_third_party(packages: List[ThirdPartyPackage], py2_packages: List[ThirdPartyPackage]) -> None:
|
|
"""Refactor the third party part using collected metadata."""
|
|
third_party_dir = os.path.join(OUTPUT_DIR, THIRD_PARTY_NAMESPACE)
|
|
os.makedirs(third_party_dir, exist_ok=True)
|
|
|
|
# Note: these include Python 3 versions of packages like six.
|
|
for package in packages:
|
|
distribution = package_to_distribution.get(package.name, package.name)
|
|
distribution_dir = os.path.join(third_party_dir, distribution)
|
|
os.makedirs(distribution_dir, exist_ok=True)
|
|
metadata_file = os.path.join(distribution_dir, "METADATA.toml")
|
|
if not os.path.isfile(metadata_file):
|
|
# Write metadata once.
|
|
# TODO: check consistency between different packages in same distribution?
|
|
with open(metadata_file, "w") as f:
|
|
f.write(generate_metadata(package, [package.name for package in py2_packages]))
|
|
f.write("\n")
|
|
if not package.is_dir:
|
|
shutil.copy(package.path, distribution_dir)
|
|
else:
|
|
shutil.copytree(package.path, os.path.join(distribution_dir, package.name))
|
|
|
|
# Add Python 2 counterparts of packages like six (with different stubs) to nested
|
|
# namespaces like six/python2/six.
|
|
for package in py2_packages:
|
|
distribution = package_to_distribution.get(package.name, package.name)
|
|
distribution_dir = os.path.join(third_party_dir, distribution, PY2_NAMESPACE)
|
|
os.makedirs(distribution_dir, exist_ok=True)
|
|
if not package.is_dir:
|
|
shutil.copy(package.path, distribution_dir)
|
|
else:
|
|
shutil.copytree(package.path, os.path.join(distribution_dir, package.name))
|
|
|
|
|
|
def main() -> None:
|
|
# Collect metadata for Python 2 and 3, and Python 2 only standard library
|
|
# packages/modules. The latter will go to a separate nested namespace.
|
|
stdlib, py2_stdlib = collect_stdlib_packages()
|
|
third_party, py2_third_party = collect_third_party_packages()
|
|
|
|
# Collect standard library names to filter out from dependencies.
|
|
stdlib_names = [package.name for package in stdlib]
|
|
py2_stdlib_names = [package.name for package in py2_stdlib]
|
|
py2_stdlib_names += [package.name for package in stdlib if package.py_version == "2.7"]
|
|
|
|
# Collect all known distributions (for sanity checks).
|
|
known_distributions = {package_to_distribution.get(package.name, package.name) for package in third_party + py2_third_party}
|
|
|
|
# Compute dependencies between third party packages/modules to populate metadata.
|
|
for package in third_party + py2_third_party:
|
|
populate_requirements(package, stdlib_names, py2_stdlib_names, known_distributions)
|
|
|
|
# Copy the files to a separate location (to not clobber the root directory).
|
|
if not os.path.isdir(OUTPUT_DIR):
|
|
os.mkdir(OUTPUT_DIR)
|
|
copy_stdlib(stdlib, py2_stdlib)
|
|
copy_third_party(third_party, py2_third_party)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|