Skip to content

Commit

Permalink
Merge pull request #2 from sonatype-nexus-community/optimize-compute
Browse files Browse the repository at this point in the history
update compute platform only if main has updates
  • Loading branch information
hboutemy authored Oct 9, 2024
2 parents 12e271c + 1799192 commit 122294c
Showing 1 changed file with 55 additions and 24 deletions.
79 changes: 55 additions & 24 deletions update-pytorch.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
#!/bin/bash

# old non-updated compute platforms:
# cu75 cu80 cu90 cu91 cu92 cu100 cu101 cu102 cu110 cu111 cu113 cu115 cu116 cu117 cu117_pypi_cudnn
# rocm3.10 rocm3.7 rocm3.8 rocm4.0.1 rocm4.1 rocm4.2 rocm4.3.1 rocm4.5.2 rocm5.0 rocm5.1.1 rocm5.2 rocm5.3 rocm5.4.2 rocm5.5 rocm5.6 rocm5.7
for d in . cpu cpu-cxx11-abi cpu_pypi_pkg cu118 cu121 cu124 rocm6.0 rocm6.1
do
d="whl/$d"
[ "$d" == "whl/." ] && d="whl"
# update a PyPI index from PyTorch = https://download.pytorch.org/$d with d=whl or whl/<compute platform>
# this copies the content (main url + follows links to projects) and does 2 updates:
# 1. copies the content in a "simple/" sub-directory to match the convention from PEP 503 simple (that allows other APIs in parallel)
# 2. updates the links to binary packages by adding "https://download.pytorch.org" prefix to PyTorch-provided "/whl/*" path to link back to PyTorch binaries in their home location
function updateIndex() {
local d=$1

# will copy source $d to $d/simple
mkdir -p $d/simple
dir="$(pwd)"
local dir="$(pwd)"
cd $d/simple

# projects list
curl -s https://download.pytorch.org/$d/ | grep -v 'TIMESTAMP 1' > index.html
count="$(cat index.html | cut -d '>' -f 2 | cut -d '<' -f 1 | grep -cve '^\s*$')"
local count="$(cat index.html | cut -d '>' -f 2 | cut -d '<' -f 1 | grep -cve '^\s*$')"

echo "https://download.pytorch.org/$d/ $count => $d/simple/"
if [ $count -lt 40 ]
then
echo "failing because low packages count for $d: $count (intermittent download failure?)"
exit 1
fi
echo "https://download.pytorch.org/$d/ => $d/simple/"
checkCount $d $count 40

i=0
# copy also content of each project
local i=0
for p in `cat index.html | cut -d '>' -f 2 | cut -d '<' -f 1`
do
mkdir -p $p
Expand All @@ -31,20 +31,51 @@ do
| grep -v 'TIMESTAMP 1' \
> index.html

count="$(cat index.html | grep -c 'https://download.pytorch.org/whl/')"
echo "$i $d/$p/ => $d/simple/$p/ $count"
if [ $count -lt 1 ]
then
echo "failing because low packages count for $d/$p: $count (intermittent download failure?)"
exit 1
fi
local pcount="$(cat index.html | grep -c 'https://download.pytorch.org/whl/')"
printf "%5d / $count $d/$p/ => $d/simple/$p/ $pcount\n" $i
checkCount $d/$p $count 1
cd ..
done
echo
cd "$dir"
}

function checkCount() {
local content="$1"
local count="$2"
local minimum="$3"

if [ $count -lt $minimum ]
then
echo "!!! failing because low packages count for $content: $count (probably intermittent download failure)"
exit 1
fi
}

# update main PyTorch index, that contains everything, whatever the compute platform
updateIndex "whl"

# see resulting updates
git update-index -q --refresh
git diff-index --name-status HEAD

if `git diff-index --quiet HEAD`
then
echo "no update found in PyTorch main index."
exit 0
fi
echo "updates found in PyTorch main index: updating also compute platform specific ones..."

# update compute-platform specific indexes
# ignore old non-updated ones:
# - cu75 cu80 cu90 cu91 cu92 cu100 cu101 cu102 cu110 cu111 cu113 cu115 cu116 cu117 cu117_pypi_cudnn
# - rocm3.10 rocm3.7 rocm3.8 rocm4.0.1 rocm4.1 rocm4.2 rocm4.3.1 rocm4.5.2 rocm5.0 rocm5.1.1 rocm5.2 rocm5.3 rocm5.4.2 rocm5.5 rocm5.6 rocm5.7
for d in cpu cpu-cxx11-abi cpu_pypi_pkg cu118 cu121 cu124 rocm6.0 rocm6.1
do
updateIndex "whl/$d"
done

du -sh whl/*

for d in whl/simple whl/*/simple ; do echo "$(ls $d | wc -l) $d" ; done > summary.txt
for d in whl/simple whl/*/simple ; do echo "$(ls $d | wc -l | xargs) $d" ; done > summary.txt
cat summary.txt

0 comments on commit 122294c

Please sign in to comment.