From a2fa2dad0789b3ec3e6764927871dc721bda4a28 Mon Sep 17 00:00:00 2001
From: Divij <67919237+divijghose@users.noreply.github.com>
Date: Fri, 20 Sep 2024 16:58:41 +0530
Subject: [PATCH] Hyperparameter optimization with Optuna (#24)

* Main file for hyperparameter tuning:
1. Running with a .yaml config file will lead to usual execution.
2. Running with the --optimized flag will tune hyperparameters with optuna. No input file needed.

* Module for hyperparameter tuning.
1. objective.py defines the objective function for tuning. Contains the fastvpinns object returning metric for tuning.
2. optuna_tuner.py manages the hyperparameter tuning process.

* Black formatting for hyperparameter optimization files.

* Black formatting for hyperparameter tuning files.

* Objective function that accepts number of training iterations as an argument.

* Changes to main file to incorporate hyperparameter tuning using Optuna
1. Accept number of trials and number of training iteration for each trial as an argument.

* Changes to geometry fle:
1. Accept an is_optimized argument, True if hyperparameter optimization with Optuna is being used.
2. If is_optimized is True, geometry module doesn't print out the test mesh and VTK file for each trial.
3. Backward compatibility - default value of is_optimized is False, existing code with config file should work as is.

* Parallel runs with optuna tuner
1. Creates an SQLite database if it doesn't exist. Can be used for stalled runs or parallel implementation.
2. Lists available number of GPUs and divides jobs.

* Files for hyperparameter tuning tests

* Black formatting for main file.

* Black formatting.
---
 .gitignore                                    |   5 +
 docker_initialise.py                          |  91 +++--
 docs/conf.py                                  |   5 +-
 .../uniform_mesh/poisson_2d/sin_cos.py        |   4 +-
 fastvpinns/Geometry/geometry_2d.py            |   5 +-
 fastvpinns/hyperparameter_tuning/__init__.py  |   0
 fastvpinns/hyperparameter_tuning/objective.py | 145 +++++++
 .../hyperparameter_tuning/optuna_tuner.py     |  65 ++++
 input.yaml                                    |  60 +++
 main.py                                       | 365 ++++++++++++++++++
 sin_cos.py                                    |  89 +++++
 utility.py                                    | 113 ++++++
 12 files changed, 918 insertions(+), 29 deletions(-)
 create mode 100644 fastvpinns/hyperparameter_tuning/__init__.py
 create mode 100644 fastvpinns/hyperparameter_tuning/objective.py
 create mode 100644 fastvpinns/hyperparameter_tuning/optuna_tuner.py
 create mode 100644 input.yaml
 create mode 100644 main.py
 create mode 100644 sin_cos.py
 create mode 100644 utility.py

diff --git a/.gitignore b/.gitignore
index 195240c..6c069d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,6 +30,8 @@ examples/**/output/
 examples/**/output_*
 examples/**/__pycache__/
 
+output/*
+
 
 # pytest 
 *.vtk
@@ -41,4 +43,7 @@ sensor_points.png
 # examples/**/*.png # as the README.md uses png image files
 examples/**/sensor_points.png
 
+# Optuna files
+*.sqlite3
+*.db
 
diff --git a/docker_initialise.py b/docker_initialise.py
index 4e00894..1b0694b 100644
--- a/docker_initialise.py
+++ b/docker_initialise.py
@@ -2,8 +2,9 @@
 import subprocess
 import re
 
+
 def get_version_from_toml():
-    try: 
+    try:
         with open("pyproject.toml", "r") as file:
             content = file.read()
         version_match = re.search(r'version = "([^"]+)"', content)
@@ -12,6 +13,7 @@ def get_version_from_toml():
     except Exception:
         return "Unknown"
 
+
 def check_tensorflow():
     """
     To check tensorflow version and GPU Support and number of GPUs available
@@ -19,12 +21,32 @@ def check_tensorflow():
     tensor_flow_version = "Not Found"
     gpu_support = "Not Found"
     number_of_gpus = "Not Found"
-    
-    tensorflow_version = subprocess.run(["python3", "-c", "import tensorflow as tf; print(tf.__version__)"], capture_output=True, text=True)
-    gpu_support = subprocess.run(["python3", "-c", "import tensorflow as tf; print(tf.test.is_gpu_available())"], capture_output=True, text=True)
-    number_of_gpus = subprocess.run(["python3", "-c", "import tensorflow as tf; print(len(tf.config.experimental.list_physical_devices('GPU')))"], capture_output=True, text=True)
 
-    return tensorflow_version.stdout.strip(), gpu_support.stdout.strip(), number_of_gpus.stdout.strip()
+    tensorflow_version = subprocess.run(
+        ["python3", "-c", "import tensorflow as tf; print(tf.__version__)"],
+        capture_output=True,
+        text=True,
+    )
+    gpu_support = subprocess.run(
+        ["python3", "-c", "import tensorflow as tf; print(tf.test.is_gpu_available())"],
+        capture_output=True,
+        text=True,
+    )
+    number_of_gpus = subprocess.run(
+        [
+            "python3",
+            "-c",
+            "import tensorflow as tf; print(len(tf.config.experimental.list_physical_devices('GPU')))",
+        ],
+        capture_output=True,
+        text=True,
+    )
+
+    return (
+        tensorflow_version.stdout.strip(),
+        gpu_support.stdout.strip(),
+        number_of_gpus.stdout.strip(),
+    )
 
 
 def get_cuda_cudnn_nvidia_versions():
@@ -34,7 +56,11 @@ def get_cuda_cudnn_nvidia_versions():
     # Get CUDA version
     try:
         cuda_version = subprocess.run(['nvcc', '--version'], capture_output=True, text=True)
-        cuda_version = re.search(r'release (\d+\.\d+)', cuda_version.stdout).group(1) if cuda_version.stdout else 'Not found'
+        cuda_version = (
+            re.search(r'release (\d+\.\d+)', cuda_version.stdout).group(1)
+            if cuda_version.stdout
+            else 'Not found'
+        )
     except Exception:
         pass
 
@@ -42,57 +68,74 @@ def get_cuda_cudnn_nvidia_versions():
     try:
         with open('/usr/local/cuda/include/cudnn_version.h', 'r') as f:
             cudnn_version = f.read()
-        cudnn_version = re.search(r'#define CUDNN_MAJOR (\d+)\n#define CUDNN_MINOR (\d+)\n#define CUDNN_PATCHLEVEL (\d+)', cudnn_version)
-        cudnn_version = f"{cudnn_version.group(1)}.{cudnn_version.group(2)}.{cudnn_version.group(3)}" if cudnn_version else 'Not found'
+        cudnn_version = re.search(
+            r'#define CUDNN_MAJOR (\d+)\n#define CUDNN_MINOR (\d+)\n#define CUDNN_PATCHLEVEL (\d+)',
+            cudnn_version,
+        )
+        cudnn_version = (
+            f"{cudnn_version.group(1)}.{cudnn_version.group(2)}.{cudnn_version.group(3)}"
+            if cudnn_version
+            else 'Not found'
+        )
     except Exception:
         cudnn_version = 'Not found'
 
     # Get NVIDIA driver version
     nvidia_driver_version = 'Not found'
     try:
-        nvidia_driver_version = subprocess.run(['nvidia-smi', '--query-gpu=driver_version', '--format=csv,noheader'], capture_output=True, text=True)
-        nvidia_driver_version = nvidia_driver_version.stdout.strip() if nvidia_driver_version.stdout else 'Not found'
+        nvidia_driver_version = subprocess.run(
+            ['nvidia-smi', '--query-gpu=driver_version', '--format=csv,noheader'],
+            capture_output=True,
+            text=True,
+        )
+        nvidia_driver_version = (
+            nvidia_driver_version.stdout.strip() if nvidia_driver_version.stdout else 'Not found'
+        )
     except Exception:
         pass
 
     return cuda_version, cudnn_version, nvidia_driver_version.split('\n')[0]
 
 
-
 def main():
     version = get_version_from_toml()
     # run the ascii-image-converter in subprocess
-    subprocess.run(["ascii-image-converter", "Fastvpinns_logo.png", "--braille", "-d" , "70,10"])
+    subprocess.run(["ascii-image-converter", "Fastvpinns_logo.png", "--braille", "-d", "70,10"])
     print("**********************************************************")
     print(f"Official Docker Image for FastVPINNs - Version {version}")
     print(f"URL: https://cmgcds.github.io/fastvpinns/")
     print("Docker Image Author : Thivin Anandh")
     print("**********************************************************\n")
-    
+
     # Execute any additional command passed to the Docker container
     # Should be a security risk, so commented out
     # if len(sys.argv) > 1:
     #     subprocess.run(sys.argv[1:])
-    
+
     # obtain the cuda versions
     cuda_version, cudnn_version, nvidia_driver_version = get_cuda_cudnn_nvidia_versions()
     if cuda_version != 'Not found' and nvidia_driver_version != 'Not found':
         print(f"\033[92mGPU Checks Passed - GPU Acceleration is Available \033[0m")
-    else :
+    else:
         print(f"\033[91mGPU Checks Failed - Execution is available on CPU only\033[0m")
 
-
     # get tensorflow versions
     tensor_flow_version, gpu_support, number_of_gpus = check_tensorflow()
 
     column_width = 10
-    print("-----------------------------------------------------------------------------------------------------")
-    print(f"| CUDA Version:       {cuda_version:<{column_width}} || cuDNN Version: {cudnn_version:<{column_width}}   || NVIDIA Driver Version: {nvidia_driver_version:<{column_width}} |")
-    print(f"| Tensorflow Version: {tensor_flow_version:<{column_width}} || GPU Support: {gpu_support:<{column_width}}     || Number of GPUs: {number_of_gpus:<{column_width}}        |")
-    print("-----------------------------------------------------------------------------------------------------")
-
+    print(
+        "-----------------------------------------------------------------------------------------------------"
+    )
+    print(
+        f"| CUDA Version:       {cuda_version:<{column_width}} || cuDNN Version: {cudnn_version:<{column_width}}   || NVIDIA Driver Version: {nvidia_driver_version:<{column_width}} |"
+    )
+    print(
+        f"| Tensorflow Version: {tensor_flow_version:<{column_width}} || GPU Support: {gpu_support:<{column_width}}     || Number of GPUs: {number_of_gpus:<{column_width}}        |"
+    )
+    print(
+        "-----------------------------------------------------------------------------------------------------"
+    )
 
-    
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/docs/conf.py b/docs/conf.py
index 3fc0279..7c8d303 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -26,6 +26,7 @@
 
 import os
 import sys
+
 sys.path.insert(0, os.path.abspath('../fastvpinns'))
 sys.path.insert(0, os.path.abspath('../../fastvpinns'))
 import fastvpinns
@@ -46,7 +47,7 @@
     'sphinx.ext.viewcode',
     'sphinx.ext.mathjax',
     'sphinx.ext.intersphinx',
-    'sphinx_copybutton', 
+    'sphinx_copybutton',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -84,4 +85,4 @@
     'logo_only': True,
     'display_version': True,
     'prev_next_buttons_location': 'bottom',
-}
\ No newline at end of file
+}
diff --git a/examples/forward_problems_2d/uniform_mesh/poisson_2d/sin_cos.py b/examples/forward_problems_2d/uniform_mesh/poisson_2d/sin_cos.py
index a4aa5fb..2b5ced3 100644
--- a/examples/forward_problems_2d/uniform_mesh/poisson_2d/sin_cos.py
+++ b/examples/forward_problems_2d/uniform_mesh/poisson_2d/sin_cos.py
@@ -48,7 +48,7 @@ def rhs(x, y):
     omegaX = 4.0 * np.pi
     omegaY = 4.0 * np.pi
     f_temp = -2.0 * (omegaX**2) * (np.sin(omegaX * x) * np.sin(omegaY * y))
-    
+
     return f_temp
 
 
@@ -57,7 +57,7 @@ def exact_solution(x, y):
     This function will return the exact solution at a given point
     """
     # If the exact Solution does not have an analytical expression, leave the value as 0(zero)
-    # it can be set using `np.ones_like(x) * 0.0` and then ignore the errors and the error plots generated. 
+    # it can be set using `np.ones_like(x) * 0.0` and then ignore the errors and the error plots generated.
 
     omegaX = 4.0 * np.pi
     omegaY = 4.0 * np.pi
diff --git a/fastvpinns/Geometry/geometry_2d.py b/fastvpinns/Geometry/geometry_2d.py
index b93f968..6791eba 100644
--- a/fastvpinns/Geometry/geometry_2d.py
+++ b/fastvpinns/Geometry/geometry_2d.py
@@ -43,6 +43,7 @@ def __init__(
         n_test_points_x: int,
         n_test_points_y: int,
         output_folder: str,
+        is_optimized: bool = False,
     ):
         """
         Constructor for Geometry_2D class.
@@ -64,6 +65,7 @@ def __init__(
         self.n_test_points_x = n_test_points_x
         self.n_test_points_y = n_test_points_y
         self.output_folder = output_folder
+        self.is_optimized = is_optimized
 
         if self.mesh_generation_method not in ["internal", "external"]:
             print(
@@ -338,7 +340,8 @@ def _temp_bd_func(start, end, num_pts):
         self.bd_dict = bd_points
 
         # generate vtk
-        self.generate_vtk_for_test()
+        if not self.is_optimized:
+            self.generate_vtk_for_test()
 
         return self.cell_points, self.bd_dict
 
diff --git a/fastvpinns/hyperparameter_tuning/__init__.py b/fastvpinns/hyperparameter_tuning/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/fastvpinns/hyperparameter_tuning/objective.py b/fastvpinns/hyperparameter_tuning/objective.py
new file mode 100644
index 0000000..707ddba
--- /dev/null
+++ b/fastvpinns/hyperparameter_tuning/objective.py
@@ -0,0 +1,145 @@
+"""
+This file contains the objective function for hyperparameter tuning of the FastVPINN model.
+
+The objective function defines the search space for hyperparameters and evaluates the model's
+performance using the suggested hyperparameter values. It sets up the geometry, finite element
+space, data handler, and model based on the trial's suggestions. The model is then trained for
+a fixed number of epochs, and its performance is evaluated using the relative L2 error.
+
+Author: Divij Ghose
+
+Changelog: 9/9/24 - Initial implementation of the objective function for hyperparameter tuning
+
+
+Known issues: None
+
+Dependencies: optuna, tensorflow, fastvpinns
+"""
+
+# objective.py
+import optuna
+import tensorflow as tf
+import os
+
+from fastvpinns.Geometry.geometry_2d import Geometry_2D
+from fastvpinns.FE.fespace2d import Fespace2D
+from fastvpinns.data.datahandler2d import DataHandler2D
+from fastvpinns.model.model import DenseModel
+from fastvpinns.physics.poisson2d import pde_loss_poisson
+from fastvpinns.utils.compute_utils import compute_errors_combined
+from sin_cos import *  # Import your example-specific functions
+
+
+def objective(trial, num_epochs):
+    # Suggest values for hyperparameters
+    config = {
+        "geometry": {
+            "internal_mesh_params": {
+                "n_cells_x": trial.suggest_int("n_cells_x", 2, 10),
+                "n_cells_y": trial.suggest_int("n_cells_y", 2, 10),
+                "n_boundary_points": trial.suggest_int("n_boundary_points", 100, 1000),
+            }
+        },
+        "fe": {
+            "fe_order": trial.suggest_int("fe_order", 2, 8),
+            "fe_type": trial.suggest_categorical("fe_type", ["legendre", "jacobi"]),
+            "quad_order": trial.suggest_int("quad_order", 3, 15),
+            "quad_type": trial.suggest_categorical("quad_type", ["gauss-legendre", "gauss-jacobi"]),
+        },
+        "model": {
+            "model_architecture": [2]
+            + [
+                trial.suggest_int(f"layer_{i}", 10, 100)
+                for i in range(trial.suggest_int("n_layers", 1, 5))
+            ]
+            + [1],
+            "activation": "tanh",
+            "use_attention": False,
+            "learning_rate": {
+                "initial_learning_rate": trial.suggest_loguniform(
+                    "initial_learning_rate", 1e-5, 1e-2
+                ),
+                "use_lr_scheduler": True,
+                "decay_steps": trial.suggest_int("decay_steps", 1000, 10000),
+                "decay_rate": trial.suggest_uniform("decay_rate", 0.9, 0.99),
+            },
+        },
+        "pde": {"beta": 10},
+    }
+
+    # Set up your model and training process using the suggested hyperparameters
+
+    output_temp_dir = "output_temp"
+    if not os.path.exists(output_temp_dir):
+        os.makedirs(output_temp_dir)
+
+    gpus = tf.config.list_physical_devices('GPU')
+    if gpus:
+        try:
+            tf.config.experimental.set_memory_growth(gpus[0], True)
+        except RuntimeError as e:
+            print(e)
+
+    domain = Geometry_2D("quadrilateral", "internal", 100, 100, output_temp_dir, is_optimized=True)
+    cells, boundary_points = domain.generate_quad_mesh_internal(
+        x_limits=[0, 1],
+        y_limits=[0, 1],
+        n_cells_x=config["geometry"]["internal_mesh_params"]["n_cells_x"],
+        n_cells_y=config["geometry"]["internal_mesh_params"]["n_cells_y"],
+        num_boundary_points=config["geometry"]["internal_mesh_params"]["n_boundary_points"],
+    )
+
+    fespace = Fespace2D(
+        mesh=domain.mesh,
+        cells=cells,
+        boundary_points=boundary_points,
+        cell_type=domain.mesh_type,
+        fe_order=config["fe"]["fe_order"],
+        fe_type=config["fe"]["fe_type"],
+        quad_order=config["fe"]["quad_order"],
+        quad_type=config["fe"]["quad_type"],
+        fe_transformation_type="bilinear",
+        bound_function_dict=get_boundary_function_dict(),
+        bound_condition_dict=get_bound_cond_dict(),
+        forcing_function=rhs,
+        output_path="output_temp",
+        generate_mesh_plot=False,
+    )
+
+    datahandler = DataHandler2D(fespace, domain, dtype=tf.float32)
+
+    params_dict = {"n_cells": fespace.n_cells}
+    train_dirichlet_input, train_dirichlet_output = datahandler.get_dirichlet_input()
+    bilinear_params_dict = datahandler.get_bilinear_params_dict_as_tensors(get_bilinear_params_dict)
+
+    model = DenseModel(
+        layer_dims=config["model"]["model_architecture"],
+        learning_rate_dict=config["model"]["learning_rate"],
+        params_dict=params_dict,
+        loss_function=pde_loss_poisson,
+        input_tensors_list=[datahandler.x_pde_list, train_dirichlet_input, train_dirichlet_output],
+        orig_factor_matrices=[
+            datahandler.shape_val_mat_list,
+            datahandler.grad_x_mat_list,
+            datahandler.grad_y_mat_list,
+        ],
+        force_function_list=datahandler.forcing_function_list,
+        tensor_dtype=tf.float32,
+        use_attention=config["model"]["use_attention"],
+        activation=config["model"]["activation"],
+        hessian=False,
+    )
+
+    # Train the model for a fixed number of epochs
+    beta = tf.constant(config["pde"]["beta"], dtype=tf.float32)
+
+    for epoch in range(num_epochs):
+        loss = model.train_step(beta=beta, bilinear_params_dict=bilinear_params_dict)
+    # remove output_temp directory using os
+    test_points = domain.get_test_points()
+    y_exact = exact_solution(test_points[:, 0], test_points[:, 1])
+    y_pred = model(test_points).numpy().reshape(-1)
+
+    _, _, l2_error_relative, _, _, _ = compute_errors_combined(y_exact, y_pred)
+
+    return l2_error_relative  # Return the relative L2 error as the objective to minimize
diff --git a/fastvpinns/hyperparameter_tuning/optuna_tuner.py b/fastvpinns/hyperparameter_tuning/optuna_tuner.py
new file mode 100644
index 0000000..af4ebcb
--- /dev/null
+++ b/fastvpinns/hyperparameter_tuning/optuna_tuner.py
@@ -0,0 +1,65 @@
+# optuna_tuner.py
+"""
+Optuna-based hyperparameter tuner for FastVPINNs.
+
+This module provides the OptunaTuner class, which implements hyperparameter
+tuning using the Optuna optimization framework. It allows for efficient
+exploration of the hyperparameter space to find optimal configurations
+for FastVPINNs models.
+
+Classes:
+    OptunaTuner: Manages the hyperparameter tuning process using Optuna.
+
+Usage:
+    tuner = OptunaTuner(n_trials=100, study_name="my_optimization")
+    best_params = tuner.run()
+
+Note:
+    This module requires the 'optuna' package to be installed.
+"""
+
+import optuna
+from .objective import objective
+import tensorflow as tf
+import os
+
+
+class OptunaTuner:
+    def __init__(
+        self, n_trials=100, study_name="fastvpinns_optimization", n_jobs=-1, n_epochs=5000
+    ):
+        self.n_trials = n_trials
+        self.study_name = study_name
+        self.n_jobs = n_jobs
+        self.n_epochs = n_epochs
+        self.gpus = tf.config.list_physical_devices('GPU')
+        print(f"Available GPUs: {len(self.gpus)}")
+
+    def objective_wrapper(self, trial):
+        """
+        Wrapper function to run the objective function on a specific GPU.
+        """
+
+        gpu_id = trial.number % len(self.gpus)
+        with tf.device(f'/device:GPU:{gpu_id}'):
+            return objective(trial, self.n_epochs)
+
+    def run(self):
+        study = optuna.create_study(
+            study_name=self.study_name,
+            direction="minimize",
+            storage="sqlite:///fastvpinns_optuna.db",
+            load_if_exists=True,
+        )
+        study.optimize(
+            self.objective_wrapper, n_trials=self.n_trials, n_jobs=min(len(self.gpus), self.n_jobs)
+        )
+
+        print("Best trial:")
+        trial = study.best_trial
+        print("  Value: ", trial.value)
+        print("  Params: ")
+        for key, value in trial.params.items():
+            print("    {}: {}".format(key, value))
+
+        return study.best_params
diff --git a/input.yaml b/input.yaml
new file mode 100644
index 0000000..65985c7
--- /dev/null
+++ b/input.yaml
@@ -0,0 +1,60 @@
+# This YAML file contains configuration parameters for a variational physics-informed neural network (VarPINN) experimentation.
+
+experimentation:
+  output_path: "output/poisson2d/1"  # Path to the output directory where the results will be saved.
+
+geometry:
+  mesh_generation_method: "internal"  # Method for generating the mesh. Can be "internal" or "external".
+  generate_mesh_plot: True  # Flag indicating whether to generate a plot of the mesh.
+  
+  # internal mesh generated quadrilateral mesh, depending on the parameters specified below.
+
+  internal_mesh_params:  # Parameters for internal mesh generation method.
+    x_min: 0  # Minimum x-coordinate of the domain.
+    x_max: 1  # Maximum x-coordinate of the domain.
+    y_min: 0  # Minimum y-coordinate of the domain.
+    y_max: 1  # Maximum y-coordinate of the domain.
+    n_cells_x: 4  # Number of cells in the x-direction.
+    n_cells_y: 4  # Number of cells in the y-direction.
+    n_boundary_points: 400  # Number of boundary points.
+    n_test_points_x: 100  # Number of test points in the x-direction.
+    n_test_points_y: 100  # Number of test points in the y-direction.
+  
+  exact_solution:
+    exact_solution_generation: "internal" # whether the exact solution needs to be read from external file.
+    exact_solution_file_name: "" # External solution file name (if exists from FEM)
+
+  mesh_type: "quadrilateral"  # Type of mesh. Can be "quadrilateral" or other supported types.
+  
+  external_mesh_params:  # Parameters for external mesh generation method.
+    mesh_file_name: "meshes/hemker.mesh"  # Path to the external mesh file (should be a .mesh file).
+    boundary_refinement_level: 4  # Level of refinement for the boundary.
+    boundary_sampling_method: "uniform"  # Method for sampling the boundary. Can be "uniform"
+
+fe:
+  fe_order: 6 # Order of the finite element basis functions.
+  fe_type: "legendre"  # Type of finite element basis functions. Can be "jacobi" or other supported types.
+  quad_order: 10  # Order of the quadrature rule.
+  quad_type: "gauss-jacobi"  # Type of quadrature rule. Can be "gauss-jacobi" or other supported types.
+
+pde:
+  beta: 10  # Parameter for the PDE.
+
+model:
+  model_architecture: [2, 50,50,50,50, 1]  # Architecture of the neural network model.
+  activation: "tanh"  # Activation function used in the neural network.
+  use_attention: False  # Flag indicating whether to use attention mechanism in the model.
+  epochs: 10000  # Number of training epochs.
+  dtype: "float32"  # Data type used for computations.
+  set_memory_growth: False  # Flag indicating whether to set memory growth for GPU.
+  
+  learning_rate:  # Parameters for learning rate scheduling.
+    initial_learning_rate: 0.001  # Initial learning rate.
+    use_lr_scheduler: False  # Flag indicating whether to use learning rate scheduler.
+    decay_steps: 1000  # Number of steps between each learning rate decay.
+    decay_rate: 0.99  # Decay rate for the learning rate.
+    staircase: False  # Flag indicating whether to use staircase decay.
+  
+
+logging:
+  update_console_output: 5000  # Number of steps between each update of the console output.
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..2e93710
--- /dev/null
+++ b/main.py
@@ -0,0 +1,365 @@
+import numpy as np
+import pandas as pd
+import pytest
+import tensorflow as tf
+from pathlib import Path
+from tqdm import tqdm
+import yaml
+import sys
+import copy
+from tensorflow.keras import layers
+from tensorflow.keras import initializers
+from rich.console import Console
+import copy
+import time
+import optuna
+import argparse
+
+
+from fastvpinns.Geometry.geometry_2d import Geometry_2D
+from fastvpinns.FE.fespace2d import Fespace2D
+from fastvpinns.data.datahandler2d import DataHandler2D
+from fastvpinns.model.model import DenseModel
+from fastvpinns.physics.poisson2d import pde_loss_poisson
+from fastvpinns.utils.plot_utils import plot_contour, plot_loss_function, plot_test_loss_function
+from fastvpinns.utils.compute_utils import compute_errors_combined
+from fastvpinns.utils.print_utils import print_table
+
+# import the example file
+from sin_cos import *
+
+# import all files from utility
+from utility import *
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run FastVPINNs with YAML config or optimized hyperparameters"
+    )
+    parser.add_argument(
+        "config",
+        nargs="?",
+        default="input.yaml",
+        help="Path to YAML config file (default: input.yaml)",
+    )
+    parser.add_argument("--optimized", action="store_true", help="Use optimized hyperparameters")
+    parser.add_argument("--n-trials", type=int, default=100, help="Number of optimization trials")
+    parser.add_argument(
+        "--n-epochs",
+        type=int,
+        default=5000,
+        help="Number of epochs to train each model in the hyperparameter optimization",
+    )
+    args = parser.parse_args()
+
+    gpus = tf.config.list_physical_devices('GPU')
+
+    if args.optimized:
+        from fastvpinns.hyperparameter_tuning.optuna_tuner import OptunaTuner
+
+        print("Running with optimized hyperparameters")
+        print("This may take a while...")
+        print("Running OptunaTuner...")
+
+        tuner = OptunaTuner(n_trials=args.n_trials, n_jobs=len(gpus), n_epochs=args.n_epochs)
+        best_params = tuner.run()
+        # Convert best_params to the format expected by your code
+        # config = convert_best_params_to_config(best_params)
+        print("OptunaTuner completed")
+        print("Best hyperparameters:")
+        for key, value in best_params.items():
+            print(f"{key}: {value}")
+        sys.exit(0)
+    elif args.config:
+        config_path = Path(args.config)
+        if not config_path.exists():
+            print(f"Config file not found: {config_path}")
+            sys.exit(1)
+
+        with open(config_path, 'r') as f:
+            config = yaml.safe_load(f)
+    else:
+        print("Please provide either a config file or use --optimized flag")
+        sys.exit(1)
+
+    console = Console()
+
+    # # check input arguments
+    # if len(sys.argv) != 2:
+    #     print("Usage: python main.py <input file>")
+    #     sys.exit(1)
+
+    # # Read the YAML file
+    # with open(sys.argv[1], 'r') as f:
+    #     config = yaml.safe_load(f)
+
+    # Extract the values from the YAML file
+    # Values that are not hyperparameters:
+    i_output_path = config['experimentation']['output_path']
+    i_mesh_generation_method = config['geometry']['mesh_generation_method']
+    i_generate_mesh_plot = config['geometry']['generate_mesh_plot']
+    i_mesh_type = config['geometry']['mesh_type']
+    i_x_min = config['geometry']['internal_mesh_params']['x_min']
+    i_x_max = config['geometry']['internal_mesh_params']['x_max']
+    i_y_min = config['geometry']['internal_mesh_params']['y_min']
+    i_y_max = config['geometry']['internal_mesh_params']['y_max']
+    i_n_test_points_x = config['geometry']['internal_mesh_params']['n_test_points_x']
+    i_n_test_points_y = config['geometry']['internal_mesh_params']['n_test_points_y']
+    i_exact_solution_generation = config['geometry']['exact_solution']['exact_solution_generation']
+    i_exact_solution_file_name = config['geometry']['exact_solution']['exact_solution_file_name']
+
+    i_mesh_file_name = config['geometry']['external_mesh_params']['mesh_file_name']
+    i_boundary_refinement_level = config['geometry']['external_mesh_params'][
+        'boundary_refinement_level'
+    ]
+    i_boundary_sampling_method = config['geometry']['external_mesh_params'][
+        'boundary_sampling_method'
+    ]
+    i_epochs = config['model']['epochs']
+    i_set_memory_growth = config['model']['set_memory_growth']
+    i_update_console_output = config['logging']['update_console_output']
+    i_dtype = config['model']['dtype']
+    if i_dtype == "float64":
+        i_dtype = tf.float64
+    elif i_dtype == "float32":
+        i_dtype = tf.float32
+    else:
+        print("[ERROR] The given dtype is not a valid tensorflow dtype")
+        raise ValueError("The given dtype is not a valid tensorflow dtype")
+
+    # Values that are hyperparameters:
+    i_n_cells_x = config['geometry']['internal_mesh_params']['n_cells_x']
+    i_n_cells_y = config['geometry']['internal_mesh_params']['n_cells_y']
+    i_n_boundary_points = config['geometry']['internal_mesh_params']['n_boundary_points']
+
+    i_fe_order = config['fe']['fe_order']
+    i_fe_type = config['fe']['fe_type']
+    i_quad_order = config['fe']['quad_order']
+    i_quad_type = config['fe']['quad_type']
+
+    i_model_architecture = config['model']['model_architecture']
+    i_activation = config['model']['activation']
+    i_use_attention = config['model']['use_attention']
+
+    i_learning_rate_dict = config['model']['learning_rate']
+
+    i_beta = config['pde']['beta']
+
+    # use pathlib to create the folder,if it does not exist
+    folder = Path(i_output_path)
+    # create the folder if it does not exist
+    if not folder.exists():
+        folder.mkdir(parents=True, exist_ok=True)
+
+    # get the boundary function dictionary from example file
+    bound_function_dict, bound_condition_dict = get_boundary_function_dict(), get_bound_cond_dict()
+
+    # Initiate a Geometry_2D object
+    domain = Geometry_2D(
+        i_mesh_type, i_mesh_generation_method, i_n_test_points_x, i_n_test_points_y, i_output_path
+    )
+
+    # load the mesh
+    cells, boundary_points = domain.generate_quad_mesh_internal(
+        x_limits=[i_x_min, i_x_max],
+        y_limits=[i_y_min, i_y_max],
+        n_cells_x=i_n_cells_x,
+        n_cells_y=i_n_cells_y,
+        num_boundary_points=i_n_boundary_points,
+    )
+
+    # get the boundary function dictionary from example file
+    bound_function_dict, bound_condition_dict = get_boundary_function_dict(), get_bound_cond_dict()
+
+    fespace = Fespace2D(
+        mesh=domain.mesh,
+        cells=cells,
+        boundary_points=boundary_points,
+        cell_type=domain.mesh_type,
+        fe_order=i_fe_order,
+        fe_type=i_fe_type,
+        quad_order=i_quad_order,
+        quad_type=i_quad_type,
+        fe_transformation_type="bilinear",
+        bound_function_dict=bound_function_dict,
+        bound_condition_dict=bound_condition_dict,
+        forcing_function=rhs,
+        output_path=i_output_path,
+        generate_mesh_plot=i_generate_mesh_plot,
+    )
+
+    # instantiate data handler
+    datahandler = DataHandler2D(fespace, domain, dtype=i_dtype)
+
+    params_dict = {}
+    params_dict['n_cells'] = fespace.n_cells
+
+    # get the input data for the PDE
+    train_dirichlet_input, train_dirichlet_output = datahandler.get_dirichlet_input()
+
+    # get bilinear parameters
+    # this function will obtain the values of the bilinear parameters from the model
+    # and convert them into tensors of desired dtype
+    bilinear_params_dict = datahandler.get_bilinear_params_dict_as_tensors(get_bilinear_params_dict)
+
+    model = DenseModel(
+        layer_dims=[2, 30, 30, 30, 1],
+        learning_rate_dict=i_learning_rate_dict,
+        params_dict=params_dict,
+        loss_function=pde_loss_poisson,
+        input_tensors_list=[datahandler.x_pde_list, train_dirichlet_input, train_dirichlet_output],
+        orig_factor_matrices=[
+            datahandler.shape_val_mat_list,
+            datahandler.grad_x_mat_list,
+            datahandler.grad_y_mat_list,
+        ],
+        force_function_list=datahandler.forcing_function_list,
+        tensor_dtype=i_dtype,
+        use_attention=i_use_attention,
+        activation=i_activation,
+        hessian=False,
+    )
+
+    test_points = domain.get_test_points()
+    print(f"[bold]Number of Test Points = [/bold] {test_points.shape[0]}")
+    y_exact = exact_solution(test_points[:, 0], test_points[:, 1])
+
+    # save points for plotting
+    X = test_points[:, 0].reshape(i_n_test_points_x, i_n_test_points_y)
+    Y = test_points[:, 1].reshape(i_n_test_points_x, i_n_test_points_y)
+    Y_Exact_Matrix = y_exact.reshape(i_n_test_points_x, i_n_test_points_y)
+
+    # plot the exact solution
+    plot_contour(
+        x=X,
+        y=Y,
+        z=Y_Exact_Matrix,
+        output_path=i_output_path,
+        filename="exact_solution",
+        title="Exact Solution",
+    )
+
+    num_epochs = i_epochs  # num_epochs
+    progress_bar = tqdm(
+        total=num_epochs,
+        desc='Training',
+        unit='epoch',
+        bar_format="{l_bar}{bar:40}{r_bar}{bar:-10b}",
+        colour="green",
+        ncols=100,
+    )
+    loss_array = []  # total loss
+    test_loss_array = []  # test loss
+    time_array = []  # time per epoc
+    # beta - boundary loss parameters
+    beta = tf.constant(i_beta, dtype=i_dtype)
+
+    # ---------------------------------------------------------------#
+    # ------------- TRAINING LOOP ---------------------------------- #
+    # ---------------------------------------------------------------#
+    for epoch in range(num_epochs):
+
+        # Train the model
+        batch_start_time = time.time()
+        loss = model.train_step(beta=beta, bilinear_params_dict=bilinear_params_dict)
+        elapsed = time.time() - batch_start_time
+
+        # print(elapsed)
+        time_array.append(elapsed)
+
+        loss_array.append(loss['loss'])
+
+        # ------ Intermediate results update ------ #
+        if (epoch + 1) % i_update_console_output == 0 or epoch == num_epochs - 1:
+            y_pred = model(test_points).numpy()
+            y_pred = y_pred.reshape(-1)
+
+            error = np.abs(y_exact - y_pred)
+
+            # get errors
+            (
+                l2_error,
+                linf_error,
+                l2_error_relative,
+                linf_error_relative,
+                l1_error,
+                l1_error_relative,
+            ) = compute_errors_combined(y_exact, y_pred)
+
+            loss_pde = float(loss['loss_pde'].numpy())
+            loss_dirichlet = float(loss['loss_dirichlet'].numpy())
+            total_loss = float(loss['loss'].numpy())
+
+            # Append test loss
+            test_loss_array.append(l1_error)
+
+            console.print(f"\nEpoch [bold]{epoch+1}/{num_epochs}[/bold]")
+            console.print("[bold]--------------------[/bold]")
+            console.print("[bold]Beta : [/bold]", beta.numpy(), end=" ")
+            console.print(
+                f"Variational Losses || Pde Loss : [red]{loss_pde:.3e}[/red] Dirichlet Loss : [red]{loss_dirichlet:.3e}[/red] Total Loss : [red]{total_loss:.3e}[/red]"
+            )
+            console.print(
+                f"Test Losses        || L1 Error : {l1_error:.3e} L2 Error : {l2_error:.3e} Linf Error : {linf_error:.3e}"
+            )
+
+            plot_results(
+                loss_array,
+                test_loss_array,
+                y_pred,
+                X,
+                Y,
+                Y_Exact_Matrix,
+                i_output_path,
+                epoch,
+                i_n_test_points_x,
+                i_n_test_points_y,
+            )
+
+        progress_bar.update(1)
+
+    # Save the model
+    model.save_weights(str(Path(i_output_path) / "model_weights"))
+
+    # print the Error values in table
+    print_table(
+        "Error Values",
+        ["Error Type", "Value"],
+        [
+            "L2 Error",
+            "Linf Error",
+            "Relative L2 Error",
+            "Relative Linf Error",
+            "L1 Error",
+            "Relative L1 Error",
+        ],
+        [l2_error, linf_error, l2_error_relative, linf_error_relative, l1_error, l1_error_relative],
+    )
+
+    # print the time values in table
+    print_table(
+        "Time Values",
+        ["Time Type", "Value"],
+        [
+            "Time per Epoch(s) - Median",
+            "Time per Epoch(s) IQR-25% ",
+            "Time per Epoch(s) IQR-75% ",
+            "Mean (s)",
+            "Epochs per second",
+            "Total Train Time",
+        ],
+        [
+            np.median(time_array),
+            np.percentile(time_array, 25),
+            np.percentile(time_array, 75),
+            np.mean(time_array),
+            int(i_epochs / np.sum(time_array)),
+            np.sum(time_array),
+        ],
+    )
+
+    # save all the arrays as numpy arrays
+    np.savetxt(str(Path(i_output_path) / "loss_function.txt"), np.array(loss_array))
+    np.savetxt(str(Path(i_output_path) / "prediction.txt"), y_pred)
+    np.savetxt(str(Path(i_output_path) / "exact.txt"), y_exact)
+    np.savetxt(str(Path(i_output_path) / "error.txt"), error)
+    np.savetxt(str(Path(i_output_path) / "time_per_epoch.txt"), np.array(time_array))
diff --git a/sin_cos.py b/sin_cos.py
new file mode 100644
index 0000000..2b5ced3
--- /dev/null
+++ b/sin_cos.py
@@ -0,0 +1,89 @@
+# Example file for the poisson problem
+# Path: examples/poisson.py
+# file contains the exact solution, rhs and boundary conditions for the poisson problem
+import numpy as np
+import tensorflow as tf
+
+
+def left_boundary(x, y):
+    """
+    This function will return the boundary value for given component of a boundary
+    """
+    val = 0.0
+    return np.ones_like(x) * val
+
+
+def right_boundary(x, y):
+    """
+    This function will return the boundary value for given component of a boundary
+    """
+    val = 0.0
+    return np.ones_like(x) * val
+
+
+def top_boundary(x, y):
+    """
+    This function will return the boundary value for given component of a boundary
+    """
+    val = 0.0
+    return np.ones_like(x) * val
+
+
+def bottom_boundary(x, y):
+    """
+    This function will return the boundary value for given component of a boundary
+    """
+    val = 0.0
+    return np.ones_like(x) * val
+
+
+def rhs(x, y):
+    """
+    This function will return the value of the rhs at a given point
+    """
+    # f_temp =  32 * (x  * (1 - x) + y * (1 - y))
+    # f_temp = 1
+    # For a Laplace Equation, Make the f_temp = np.ones_like(x) * 0.0
+
+    omegaX = 4.0 * np.pi
+    omegaY = 4.0 * np.pi
+    f_temp = -2.0 * (omegaX**2) * (np.sin(omegaX * x) * np.sin(omegaY * y))
+
+    return f_temp
+
+
+def exact_solution(x, y):
+    """
+    This function will return the exact solution at a given point
+    """
+    # If the exact Solution does not have an analytical expression, leave the value as 0(zero)
+    # it can be set using `np.ones_like(x) * 0.0` and then ignore the errors and the error plots generated.
+
+    omegaX = 4.0 * np.pi
+    omegaY = 4.0 * np.pi
+    val = -1.0 * np.sin(omegaX * x) * np.sin(omegaY * y)
+
+    return val
+
+
+def get_boundary_function_dict():
+    """
+    This function will return a dictionary of boundary functions
+    """
+    return {1000: bottom_boundary, 1001: right_boundary, 1002: top_boundary, 1003: left_boundary}
+
+
+def get_bound_cond_dict():
+    """
+    This function will return a dictionary of boundary conditions
+    """
+    return {1000: "dirichlet", 1001: "dirichlet", 1002: "dirichlet", 1003: "dirichlet"}
+
+
+def get_bilinear_params_dict():
+    """
+    This function will return a dictionary of bilinear parameters
+    """
+    eps = 1.0
+
+    return {"eps": eps}
diff --git a/utility.py b/utility.py
new file mode 100644
index 0000000..794a0ea
--- /dev/null
+++ b/utility.py
@@ -0,0 +1,113 @@
+import sys
+import yaml
+import numpy as np
+
+from fastvpinns.utils.plot_utils import plot_contour, plot_loss_function, plot_test_loss_function
+from fastvpinns.utils.compute_utils import compute_errors_combined
+
+
+def get_errors(
+    model,
+    console,
+    y_pred,
+    y_exact,
+    Y_Exact_Matrix,
+    i_n_test_points_x,
+    i_n_test_points_y,
+    i_output_path,
+    epoch,
+    loss,
+    num_epochs,
+):
+    """
+    Calculate and return various error metrics and loss values.
+
+    Args:
+        model (object): The trained model.
+        console (object): The console object for printing messages.
+        y_exact (array): The exact solution.
+        Y_Exact_Matrix (array): The matrix of exact solutions.
+        i_n_test_points_x (int): The number of test points in the x-direction.
+        i_n_test_points_y (int): The number of test points in the y-direction.
+        i_output_path (str): The output path for saving plots.
+        epoch (int): The current epoch number.
+        loss (dict): The dictionary containing different loss values.
+        num_epochs (int): The total number of epochs.
+
+    Returns:
+        dict: A dictionary containing various error metrics and loss values.
+    """
+
+    # Compute error metrics
+    l2_error, linf_error, l2_error_relative, linf_error_relative, l1_error, l1_error_relative = (
+        compute_errors_combined(y_exact, y_pred)
+    )
+
+    # Print epoch information
+    console.print(f"\nEpoch [bold]{epoch+1}/{num_epochs}[/bold]")
+    console.print("[bold]--------------------[/bold]")
+    console.print(
+        f"Variational Losses || Pde Loss : [red]{loss_pde:.3e}[/red] Dirichlet Loss : [red]{loss_dirichlet:.3e}[/red] Total Loss : [red]{total_loss:.3e}[/red]"
+    )
+    console.print(f"Test Losses        || L1 Error : {l1_error:.3e}", end=" ")
+    console.print(f"L2 Error : {l2_error:.3e}", end=" ")
+    console.print(f"Linf Error : {linf_error:.3e}", end="\n")
+
+    return {
+        'l2_error': l2_error,
+        'linf_error': linf_error,
+        'l2_error_relative': l2_error_relative,
+        'linf_error_relative': linf_error_relative,
+        'l1_error': l1_error,
+        'l1_error_relative': l1_error_relative,
+        'loss_pde': loss_pde,
+        'loss_dirichlet': loss_dirichlet,
+        'total_loss': total_loss,
+    }
+
+
+def plot_results(
+    loss_array,
+    test_loss_array,
+    y_pred,
+    X,
+    Y,
+    Y_Exact_Matrix,
+    i_output_path,
+    epoch,
+    i_n_test_points_x,
+    i_n_test_points_y,
+):
+    """
+    Plot the loss function, test loss function, solution, and error.
+
+    Args:
+        loss_array (array): Array of loss values during training.
+        test_loss_array (array): Array of test loss values during training.
+        y_pred (array): Predicted solution.
+        X (array): X-coordinates of the grid.
+        Y (array): Y-coordinates of the grid.
+        Y_Exact_Matrix (array): Matrix of exact solutions.
+        i_output_path (str): Output path for saving plots.
+        epoch (int): Current epoch number.
+        i_n_test_points_x (int): Number of test points in the x-direction.
+        i_n_test_points_y (int): Number of test points in the y-direction.
+    """
+    # plot loss
+    plot_loss_function(loss_array, i_output_path)  # plots NN loss
+    plot_test_loss_function(test_loss_array, i_output_path)  # plots test loss
+
+    # plot solution
+    y_pred = y_pred.reshape(i_n_test_points_x, i_n_test_points_y)
+    error = np.abs(Y_Exact_Matrix - y_pred)
+    plot_contour(
+        x=X,
+        y=Y,
+        z=y_pred,
+        output_path=i_output_path,
+        filename=f"prediction_{epoch+1}",
+        title="Prediction",
+    )
+    plot_contour(
+        x=X, y=Y, z=error, output_path=i_output_path, filename=f"error_{epoch+1}", title="Error"
+    )