Skip to content

Commit

Permalink
Add ability to delete all versions except the most recent one
Browse files Browse the repository at this point in the history
The most recent version is the one containing the most recently created files.
Fix the file name parsing logic
Update README and documentation

fixes #33
  • Loading branch information
arcivanov committed Jun 24, 2024
1 parent ea0e8e4 commit f410467
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 17 deletions.
57 changes: 52 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ Authentication password may be passed via environment variable

Authentication with TOTP is supported.

Examples:
### Examples:

```bash
$ pypi-cleanup --help
usage: pypi-cleanup [-h] -u USERNAME -p PACKAGE [-t URL] [-r PATTERNS] [--do-it] [-y] [-v]
usage: pypi-cleanup [-h] [-u USERNAME] -p PACKAGE [-t URL] [-r PATTERNS | --leave-most-recent-only] [--query-only] [--do-it] [-y] [-d DAYS] [-v]

PyPi Package Cleanup Utility
PyPi Package Cleanup Utility v0.1.7.dev20240624230606

optional arguments:
options:
-h, --help show this help message and exit
-u USERNAME, --username USERNAME
authentication username (default: None)
Expand All @@ -51,11 +51,16 @@ optional arguments:
-t URL, --host URL PyPI <proto>://<host> prefix (default: https://pypi.org/)
-r PATTERNS, --version-regex PATTERNS
regex to use to match package versions to be deleted (default: None)
--leave-most-recent-only
delete all releases except the *most recent* one, i.e. the one containing the most recently created files (default: False)
--query-only only queries and processes the package, no login required (default: False)
--do-it actually perform the destructive delete (default: False)
-y, --yes confirm extremely dangerous destructive delete (default: False)
-d DAYS, --days DAYS only delete releases **matching specified patterns** where all files are older than X days (default: 0)
-v, --verbose be verbose (default: 0)
```

#### Regular Cleanup of Development Artifacts
```bash
$ pypi-cleanup -u arcivanov -p pybuilder
Password:
Expand All @@ -66,11 +71,53 @@ INFO:root:Deleting pybuilder version 0.12.3.dev20200421010857
INFO:root:Deleted pybuilder version 0.12.3.dev20200421010857
```

#### Using Custom Regex Pattern
```bash
$ pypi-cleanup -u arcivanov -p geventmp -n -r '.*\\.dev1$'
$ pypi-cleanup -u arcivanov -p geventmp -r '.*\\.dev1$'
WARNING:root:
WARNING:
You're using custom patterns: [re.compile('.*\\\\.dev1$')].
If you make a mistake in your patterns you can potentially wipe critical versions irrecoverably.
Make sure to test your patterns before running the destructive cleanup.
Once you're satisfied the patterns are correct re-run with `-y`/`--yes` to confirm you know what you're doing.
Goodbye.
$ pypi-cleanup -u arcivanov -p geventmp -r '.*\\.dev1$' -y
Password:
WARNING:root:RUNNING IN DRY-RUN MODE
INFO:root:Will use the following patterns [re.compile('.*\\.dev1$')] on package geventmp
Authentication code: 123456
INFO:root:Deleting geventmp version 0.0.1.dev1
```
#### Deleting All Versions Except The Most Recent One
```bash
$ pypi-cleanup -p pypi-cleanup --leave-most-recent-only
WARNING:root:
WARNING:
You're trying to delete ALL versions of the package EXCEPT for the *most recent one*, i.e.
the one with the most recent (by the wall clock) files, disregarding the actual version numbers
or versioning schemes!

You can potentially wipe critical versions irrecoverably.
Make sure this is what you really want before running the destructive cleanup.
Once you're sure you want to delete all versions except the most recent one,
re-run with `-y`/`--yes` to confirm you know what you're doing.
Goodbye.
$ pypi-cleanup -p pypi-cleanup --leave-most-recent-only -y --query-only
INFO:root:Running in DRY RUN mode
INFO:root:Will only leave the MOST RECENT version of the package 'pypi-cleanup'
INFO:root:Leaving the MOST RECENT package version: 0.1.7.dev20240624221535 - 2024-06-24T22:15:52.778775+0000
INFO:root:Found the following releases to delete:
INFO:root: 0.0.1
INFO:root: 0.0.2
INFO:root: 0.0.3
INFO:root: 0.1.0
INFO:root: 0.1.1
INFO:root: 0.1.2
INFO:root: 0.1.3
INFO:root: 0.1.4
INFO:root: 0.1.5
INFO:root: 0.1.6
INFO:root:Query-only mode - exiting
```
61 changes: 49 additions & 12 deletions src/main/python/pypi_cleanup/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def handle_endtag(self, tag):


class PypiCleanup:
def __init__(self, url, username, package, do_it, patterns, verbose, days, query_only, **_):
def __init__(self, url, username, package, do_it, patterns, verbose, days, query_only, leave_most_recent_only, **_):
self.url = urlparse(url).geturl()
if self.url[-1] == "/":
self.url = self.url[:-1]
Expand All @@ -84,6 +84,7 @@ def __init__(self, url, username, package, do_it, patterns, verbose, days, query
self.patterns = patterns or DEFAULT_PATTERNS
self.verbose = verbose
self.query_only = query_only
self.leave_most_recent_only = leave_most_recent_only
self.date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=days)

def run(self):
Expand All @@ -97,7 +98,10 @@ def run(self):
else:
logging.info("Running in DRY RUN mode")

logging.info(f"Will use the following patterns {self.patterns} on package {self.package!r}")
if not self.leave_most_recent_only:
logging.info(f"Will use the following patterns {self.patterns} on package {self.package!r}")
else:
logging.info(f"Will only leave the MOST RECENT version of the package {self.package!r}")

with requests.Session() as s:
s.headers.update({"User-Agent": f"pypi-cleanup/{__version__} (requests/{requests_version})"})
Expand All @@ -112,21 +116,34 @@ def run(self):

project_info = r.json()
releases_by_date = {}

def package_matches_file(p, v, f):
filename = f["filename"].lower()
if filename.endswith(".whl") or filename.endswith(".egg") or filename.endswith(".src.rpm"):
return filename.startswith(f"{p.replace('-', '_')}-{v}-")

return filename in (f"{p}-{v}.tar.gz", f"{p}-{v}.zip")

for version in project_info["versions"]:
releases_by_date[version] = max(
[datetime.datetime.strptime(f["upload-time"], '%Y-%m-%dT%H:%M:%S.%f%z')
[datetime.datetime.strptime(f["upload-time"], "%Y-%m-%dT%H:%M:%S.%f%z")
for f in project_info["files"]
if f["filename"].lower().startswith(f"{self.package}-{version}") or
f["filename"].lower().startswith(f"{self.package.replace('-', '_')}-{version}")])
if package_matches_file(self.package, version, f)])

if not releases_by_date:
logging.info(f"No releases for package {self.package!r} have been found")
return

pkg_vers = list(filter(lambda k:
any(filter(lambda rex: rex.match(k),
self.patterns)) and releases_by_date[k] < self.date,
releases_by_date.keys()))
if self.leave_most_recent_only:
leave_release = max(releases_by_date, key=releases_by_date.get)
logging.info(
f"Leaving the MOST RECENT package version: {leave_release} - {releases_by_date[leave_release].strftime("%Y-%m-%dT%H:%M:%S.%f%z")}")
pkg_vers = list(r for r in releases_by_date if r != leave_release)
else:
pkg_vers = list(filter(lambda k:
any(filter(lambda rex: rex.match(k),
self.patterns)) and releases_by_date[k] < self.date,
releases_by_date.keys()))

if not pkg_vers:
logging.info(f"No releases were found matching specified patterns "
Expand Down Expand Up @@ -262,16 +279,21 @@ def main():
parser.add_argument("-p", "--package", required=True, help="PyPI package name")
parser.add_argument("-t", "--host", default="https://pypi.org/", dest="url",
help="PyPI <proto>://<host> prefix")
parser.add_argument("-r", "--version-regex", type=re.compile, action="append",
dest="patterns", help="regex to use to match package versions to be deleted")
g = parser.add_mutually_exclusive_group()
g.add_argument("-r", "--version-regex", type=re.compile, action="append",
dest="patterns", help="regex to use to match package versions to be deleted")
g.add_argument("--leave-most-recent-only", action="store_true", default=False,
help="delete all releases except the *most recent* one, i.e. the one containing "
"the most recently created files")
parser.add_argument("--query-only", action="store_true", default=False,
help="only queries and processes the package, no login required")
parser.add_argument("--do-it", action="store_true", default=False,
help="actually perform the destructive delete")
parser.add_argument("-y", "--yes", action="store_true", default=False, dest="confirm",
help="confirm extremely dangerous destructive delete")
parser.add_argument("-d", "--days", type=int, default=0,
help="only delete releases where all files are older than X days")
help="only delete releases **matching specified patterns** where all files are "
"older than X days")
parser.add_argument("-v", "--verbose", action="store_const", const=1, default=0, help="be verbose")

args = parser.parse_args()
Expand All @@ -286,6 +308,21 @@ def main():
\t"""))
return 3

if args.leave_most_recent_only and not args.confirm and not args.do_it:
logging.warning(dedent(f"""
WARNING:
\tYou're trying to delete ALL versions of the package EXCEPT for the *most recent one*, i.e.
\tthe one with the most recent (by the wall clock) files, disregarding the actual version numbers
\tor versioning schemes!
\t
\tYou can potentially wipe critical versions irrecoverably.
\tMake sure this is what you really want before running the destructive cleanup.
\tOnce you're sure you want to delete all versions except the most recent one,
\tre-run with `-y`/`--yes` to confirm you know what you're doing.
\tGoodbye.
\t"""))
return 3

return PypiCleanup(**vars(args)).run()
finally:
logging.shutdown()
Expand Down

0 comments on commit f410467

Please sign in to comment.