diff --git a/hpc/Makefile b/hpc/Makefile index 3617429..5757bea 100644 --- a/hpc/Makefile +++ b/hpc/Makefile @@ -1,6 +1,7 @@ -all: build-load-balancer - -load-balancer-files = LoadBalancer.cpp LoadBalancer.hpp ../lib/httplib.h ../lib/json.hpp ../lib/umbridge.h +all: build-load-balancer build-testmodel build-load-balancer: - - g++ -O3 -Wno-unused-result -std=c++17 $(load-balancer-files) -o load-balancer -pthread + - g++ -O3 -Wno-unused-result -std=c++17 -I../lib/ LoadBalancer.cpp -o load-balancer -pthread + +build-testmodel: + - g++ -O3 -Wno-unused-result -std=c++17 -I../lib/ ../models/testmodel/minimal-server.cpp -o testmodel -pthread diff --git a/hpc/README.md b/hpc/README.md index e12eec5..038cbb7 100644 --- a/hpc/README.md +++ b/hpc/README.md @@ -5,19 +5,19 @@ This load balancer allows any scaling up UM-Bridge applications to HPC systems. ## Installation 1. **Build the load balancer** - + Clone the UM-Bridge repository. - + ``` git clone https://github.com/UM-Bridge/umbridge.git ``` - + Then navigate to the `hpc` directory. ``` cd umbridge/hpc ``` - + Finally, compile the load balancer. Depending on your HPC system, you likely have to load a module providing a recent c++ compiler. ``` @@ -25,7 +25,7 @@ This load balancer allows any scaling up UM-Bridge applications to HPC systems. ``` 2. **Download HyperQueue** - + Download HyperQueue from the most recent release at https://github.com/It4innovations/hyperqueue/releases and place the `hq` binary in the `hpc` directory next to the load balancer. ## Usage @@ -35,7 +35,7 @@ The load balancer is primarily intended to run on a login node. 1. **Configure resource allocation** The load balancer instructs HyperQueue to allocate batches of resources on the HPC system, depending on demand for model evaluations. HyperQueue will submit SLURM or PBS jobs on the HPC system when needed, scheduling requested model runs within those jobs. When demand decreases, HyperQueue will cancel some of those jobs again. - + Adapt the configuration in ``hpc/hq_scripts/allocation_queue.sh`` to your needs. For example, when running a very fast UM-Bridge model on an HPC cluster, it is advisable to choose medium-sized jobs for resource allocation. That will avoid submitting large numbers of jobs to the HPC system's scheduler, while HyperQueue itself will handle large numbers of small model runs within those allocated jobs. @@ -44,8 +44,7 @@ The load balancer is primarily intended to run on a login node. Adapt the configuration in ``hpc/hq_scripts/job.sh`` to your needs: * Specify what UM-Bridge model server to run, - * set `#HQ` variables at the top to specify what resources each instance should receive, - * and set the directory of your load balancer binary in `load_balancer_dir`. + * and set `#HQ` variables at the top to specify what resources each instance should receive. Importantly, the UM-Bridge model server must serve its models at the port specified by the environment variable `PORT`. The value of `PORT` is automatically determined by `job.sh`, avoiding potential conflicts if multiple servers run on the same compute node. diff --git a/hpc/hq_scripts/job.sh b/hpc/hq_scripts/job.sh index 94e7b0b..03cb381 100755 --- a/hpc/hq_scripts/job.sh +++ b/hpc/hq_scripts/job.sh @@ -30,9 +30,8 @@ port=$(get_avaliable_port) export PORT=$port # Assume that server sets the port according to the environment variable 'PORT'. -/your/model/server/call & # CHANGE ME! - -load_balancer_dir="/load/balancer/directory" # CHANGE ME! +# Otherwise the job script will be stuck waiting for model server's response. +./testmodel & # CHANGE ME! host=$(hostname -I | awk '{print $1}') @@ -44,6 +43,7 @@ done echo "Model server responded" # Write server URL to file identified by HQ job ID. +load_balancer_dir="." mkdir -p "$load_balancer_dir/urls" echo "http://$host:$port" > "$load_balancer_dir/urls/url-$HQ_JOB_ID.txt" diff --git a/models/testmodel/minimal-server.cpp b/models/testmodel/minimal-server.cpp index e0d0ad4..6e83a16 100644 --- a/models/testmodel/minimal-server.cpp +++ b/models/testmodel/minimal-server.cpp @@ -4,7 +4,7 @@ #include // Needed for HTTPS, implies the need for openssl, may be omitted if HTTP suffices -#define CPPHTTPLIB_OPENSSL_SUPPORT +// #define CPPHTTPLIB_OPENSSL_SUPPORT #include "umbridge.h"