From 7843dd79937e02c65cb1f071279862daa1eaf96f Mon Sep 17 00:00:00 2001 From: Kilian Lieret Date: Sun, 10 Mar 2024 12:36:07 -0400 Subject: [PATCH] Config for training current best model --- .../strict-mutant-agouti-legacy-metric.yaml | 111 ++++++++++++++++++ .../full_detector/submit_continue_oc.slurm | 2 +- scripts/pixel/run_oc_animation.py | 2 + 3 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 scripts/full_detector/configs/fixed-all-in-one/strict-mutant-agouti-legacy-metric.yaml diff --git a/scripts/full_detector/configs/fixed-all-in-one/strict-mutant-agouti-legacy-metric.yaml b/scripts/full_detector/configs/fixed-all-in-one/strict-mutant-agouti-legacy-metric.yaml new file mode 100644 index 0000000..a720fd6 --- /dev/null +++ b/scripts/full_detector/configs/fixed-all-in-one/strict-mutant-agouti-legacy-metric.yaml @@ -0,0 +1,111 @@ +# pytorch_lightning==2.1.1 +data: + identifier: point_clouds_10 + train: + dirs: + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_1 + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_2 + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_3 + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_4 + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_5 + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_6 + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_7 + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_8 + sample_size: 450 + val: + dirs: + - /scratch/gpfs/IOJALVO/gnn-tracking/object_condensation/point_clouds_v10/part_9 + stop: 5 + test: null + cpus: 6 +model: + class_path: gnn_tracking.training.tc.TCModule + init_args: + loss_fct: + class_path: gnn_tracking.metrics.losses.metric_learning.OldGraphConstructionHingeEmbeddingLoss + init_args: + lw_repulsive: 0.006 + attr_pt_thld: 0.9 + max_num_neighbors: 256 + p_attr: 2 + p_rep: 2 + r_emb: 1 + cluster_scanner: + class_path: gnn_tracking.postprocessing.dbscanscanner.DBSCANHyperParamScanner + init_args: + eps_range: + - 0 + - 1 + min_samples_range: + - 1 + - 4 + n_trials: 60 + keep_best: 30 + n_jobs: 6 + guide: double_majority_pt0.9 + pt_thlds: + - 0.0 + - 0.5 + - 0.9 + - 1.5 + max_eta: 4.0 + model: + class_path: gnn_tracking.models.track_condensation_networks.GraphTCNForMLGCPipeline + init_args: + node_indim: 38 + edge_indim: 76 + h_dim: 192 + e_dim: 192 + h_outdim: 24 + hidden_dim: 192 + L_hc: 4 + alpha_hc: 0.5 + ec: null + feed_edge_weights: false + ec_threshold: 0.5 + mask_orphan_nodes: false + use_ec_embeddings_for_hc: false + alpha_latent: 0.5 + n_embedding_coords: 24 + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 7.0e-05 + betas: + - 0.9 + - 0.999 + eps: 1.0e-08 + weight_decay: 0.0 + amsgrad: false + foreach: null + maximize: false + capturable: false + differentiable: false + fused: null + scheduler: + class_path: torch.optim.lr_scheduler.ExponentialLR + init_args: + gamma: 0.933 + last_epoch: -1 + verbose: false + preproc: + class_path: gnn_tracking.models.graph_construction.MLGraphConstructionFromChkpt + init_args: + ml_chkpt_path: /home/kl5675/Documents/23/git_sync/hyperparameter_optimization2/scripts/full_detector/lightning_logs/amber-gibbon-of-joy/checkpoints/epoch=78-step=71100.compat_newcompatible.ckpt + + ec_chkpt_path: "" + ml_class_name: gnn_tracking.training.ml.MLModule + ec_class_name: gnn_tracking.training.ec.ECModule + ml_model_only: true + ec_model_only: true + max_radius: 1.0 + max_num_neighbors: 25 + use_embedding_features: true + ratio_of_false: null + build_edge_features: true + ec_threshold: null + ml_freeze: true + ec_freeze: true + embedding_slice: + - null + - null diff --git a/scripts/full_detector/submit_continue_oc.slurm b/scripts/full_detector/submit_continue_oc.slurm index f9b094e..5552c35 100644 --- a/scripts/full_detector/submit_continue_oc.slurm +++ b/scripts/full_detector/submit_continue_oc.slurm @@ -4,7 +4,7 @@ #SBATCH --output=slurm_logs/continue-full-oc-%j.log #SBATCH --nodes=1 #SBATCH --ntasks-per-node=1 -#SBATCH --time=48:00:00 +#SBATCH --time=24:00:00 #SBATCH --cpus-per-task 6 #SBATCH --gres=gpu:1 #SBATCH --mem-per-cpu=30G diff --git a/scripts/pixel/run_oc_animation.py b/scripts/pixel/run_oc_animation.py index 569a809..f30cdeb 100644 --- a/scripts/pixel/run_oc_animation.py +++ b/scripts/pixel/run_oc_animation.py @@ -28,7 +28,9 @@ def cli_main(): RichProgressBar(leave=True), PrintValidationMetrics(), ModelCheckpoint( + dirpath="/scratch/gpfs/kl5675/checkpoints/animation", every_n_train_steps=50, + save_top_k=-1, ), LearningRateMonitor(logging_interval="step", log_momentum=True), ],