From b1dcf3d1e274597b16cebfe8117472fc941d8af9 Mon Sep 17 00:00:00 2001
From: alok <alok.kr300@gmail.com>
Date: Thu, 3 Oct 2024 19:02:39 +0530
Subject: [PATCH 1/4] feat: Add feature to export explanation table to CSV

---
 samples/sample4/evaluation.json  | 13 ++++---------
 src/entry.py                     |  3 ++-
 src/evaluation.py                | 33 +++++++++++++++++++++++++++++---
 src/schemas/evaluation_schema.py |  1 +
 src/utils/file.py                |  7 +++++++
 5 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/samples/sample4/evaluation.json b/samples/sample4/evaluation.json
index ec9b5071..d7e71cd2 100644
--- a/samples/sample4/evaluation.json
+++ b/samples/sample4/evaluation.json
@@ -1,9 +1,7 @@
 {
   "source_type": "custom",
   "options": {
-    "questions_in_order": [
-      "q1..11"
-    ],
+    "questions_in_order": ["q1..11"],
     "answers_in_order": [
       "B",
       "D",
@@ -11,17 +9,14 @@
       "B",
       "D",
       "C",
-      [
-        "B",
-        "C",
-        "BC"
-      ],
+      ["B", "C", "BC"],
       "A",
       "C",
       "D",
       "C"
     ],
-    "should_explain_scoring": true
+    "should_explain_scoring": true,
+    "enable_evaluation_table_to_csv": true
   },
   "marking_schemes": {
     "DEFAULT": {
diff --git a/src/entry.py b/src/entry.py
index 298bb235..cd744b3f 100644
--- a/src/entry.py
+++ b/src/entry.py
@@ -45,6 +45,7 @@ def print_config_summary(
     evaluation_config,
     args,
 ):
+    print(args)
     logger.info("")
     table = Table(title="Current Configurations", show_header=False, show_lines=False)
     table.add_column("Key", style="cyan", no_wrap=True)
@@ -273,7 +274,7 @@ def process_files(
 
         score = 0
         if evaluation_config is not None:
-            score = evaluate_concatenated_response(omr_response, evaluation_config)
+            score = evaluate_concatenated_response(omr_response, evaluation_config,file_id)
             logger.info(
                 f"(/{files_counter}) Graded with score: {round(score, 2)}\t for file: '{file_id}'"
             )
diff --git a/src/evaluation.py b/src/evaluation.py
index 67cdb40c..1545268c 100644
--- a/src/evaluation.py
+++ b/src/evaluation.py
@@ -1,5 +1,6 @@
 import ast
 import os
+from csv import QUOTE_NONNUMERIC
 import re
 from copy import deepcopy
 
@@ -197,6 +198,7 @@ def __init__(self, curr_dir, evaluation_path, template, tuning_config):
         self.should_explain_scoring = options.get("should_explain_scoring", False)
         self.has_non_default_section = False
         self.exclude_files = []
+        self.explanation_table_data_for_csv = []
 
         if source_type == "csv":
             csv_path = curr_dir.joinpath(options["answer_key_csv_path"])
@@ -352,10 +354,34 @@ def match_answer_for_question(self, current_score, question, marked_answer):
         )
         return delta
 
-    def conditionally_print_explanation(self):
+    def conditionally_print_explanation(self,file_id):
         if self.should_explain_scoring:
             console.print(self.explanation_table, justify="center")
 
+        self.explanation_to_csv(file_id)
+        self.explanation_table_data_for_csv = []
+
+    # Explanation Table to CSV     
+    def explanation_to_csv(self,file_id):
+        evaluation_json = open_evaluation_with_validation(self.path)
+        
+        if evaluation_json['options'].get('enable_evaluation_table_to_csv', False):
+            explanation_table_header = []
+            cols = self.explanation_table.columns
+            processed_img_name,ext = file_id.split('.')
+
+            for col in cols:
+                explanation_table_header.append(col.header)
+            
+            output_dir = os.path.join(os.path.dirname(os.getcwd()),f'OMRChecker/outputs/Evaluation/{processed_img_name}.csv')
+            
+            pd.DataFrame(self.explanation_table_data_for_csv,columns=explanation_table_header,dtype=str).to_csv(
+                    output_dir,
+                    mode="a",
+                    quoting=QUOTE_NONNUMERIC,
+                    index=False,
+                )
+
     def get_should_explain_scoring(self):
         return self.should_explain_scoring
 
@@ -505,9 +531,10 @@ def conditionally_add_explanation(
                 if item is not None
             ]
             self.explanation_table.add_row(*row)
+            self.explanation_table_data_for_csv.append(row)
 
 
-def evaluate_concatenated_response(concatenated_response, evaluation_config):
+def evaluate_concatenated_response(concatenated_response, evaluation_config,file_id):
     evaluation_config.prepare_and_validate_omr_response(concatenated_response)
     current_score = 0.0
     for question in evaluation_config.questions_in_order:
@@ -517,6 +544,6 @@ def evaluate_concatenated_response(concatenated_response, evaluation_config):
         )
         current_score += delta
 
-    evaluation_config.conditionally_print_explanation()
+    evaluation_config.conditionally_print_explanation(file_id)
 
     return current_score
diff --git a/src/schemas/evaluation_schema.py b/src/schemas/evaluation_schema.py
index 8de1af76..091a7625 100644
--- a/src/schemas/evaluation_schema.py
+++ b/src/schemas/evaluation_schema.py
@@ -138,6 +138,7 @@
                                 ]
                             },
                             "questions_in_order": ARRAY_OF_STRINGS,
+                            "enable_evaluation_table_to_csv": {"type": "boolean", "default": False}
                         },
                     }
                 }
diff --git a/src/utils/file.py b/src/utils/file.py
index 5513381f..3f6eca0d 100644
--- a/src/utils/file.py
+++ b/src/utils/file.py
@@ -1,6 +1,7 @@
 import argparse
 import json
 import os
+import shutil
 from csv import QUOTE_NONNUMERIC
 from time import localtime, strftime
 
@@ -25,6 +26,7 @@ def __init__(self, output_dir):
         self.save_marked_dir = output_dir.joinpath("CheckedOMRs")
         self.results_dir = output_dir.joinpath("Results")
         self.manual_dir = output_dir.joinpath("Manual")
+        self.evaluation_dir = output_dir.joinpath("Evaluation")
         self.errors_dir = self.manual_dir.joinpath("ErrorFiles")
         self.multi_marked_dir = self.manual_dir.joinpath("MultiMarkedFiles")
 
@@ -44,6 +46,11 @@ def setup_dirs_for_paths(paths):
             logger.info(f"Created : {save_output_dir}")
             os.makedirs(save_output_dir)
 
+    for save_output_dir in [paths.evaluation_dir]:
+        if os.path.exists(save_output_dir):
+            shutil.rmtree(save_output_dir)
+        os.makedirs(save_output_dir)
+
     for save_output_dir in [paths.multi_marked_dir, paths.errors_dir]:
         if not os.path.exists(save_output_dir):
             logger.info(f"Created : {save_output_dir}")

From 5bcaac1c671518670530c161922d5c31a2d842d3 Mon Sep 17 00:00:00 2001
From: alok <alok.kr300@gmail.com>
Date: Sun, 6 Oct 2024 12:05:18 +0530
Subject: [PATCH 2/4] Remove print args statement

---
 samples/sample4/evaluation.json  | 10 ++++++--
 src/core.py                      |  2 +-
 src/entry.py                     | 19 ++++++++-------
 src/evaluation.py                | 41 +++++++++++++++++++-------------
 src/schemas/evaluation_schema.py |  5 +++-
 5 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/samples/sample4/evaluation.json b/samples/sample4/evaluation.json
index d7e71cd2..50db0fbe 100644
--- a/samples/sample4/evaluation.json
+++ b/samples/sample4/evaluation.json
@@ -1,7 +1,9 @@
 {
   "source_type": "custom",
   "options": {
-    "questions_in_order": ["q1..11"],
+    "questions_in_order": [
+      "q1..11"
+    ],
     "answers_in_order": [
       "B",
       "D",
@@ -9,7 +11,11 @@
       "B",
       "D",
       "C",
-      ["B", "C", "BC"],
+      [
+        "B",
+        "C",
+        "BC"
+      ],
       "A",
       "C",
       "D",
diff --git a/src/core.py b/src/core.py
index b4246789..d475105f 100644
--- a/src/core.py
+++ b/src/core.py
@@ -243,7 +243,7 @@ def read_omr_response(self, template, image, name, save_dir=None):
             global_thr, _, _ = self.get_global_threshold(all_q_vals, looseness=4)
 
             logger.info(
-                f"Thresholding:\tglobal_thr: {round(global_thr, 2)} \tglobal_std_THR: {round(global_std_thresh, 2)}\t{'(Looks like a Xeroxed OMR)' if (global_thr == 255) else ''}"
+                f"Thresholding: \tglobal_thr: {round(global_thr, 2)} \tglobal_std_THR: {round(global_std_thresh, 2)}\t{'(Looks like a Xeroxed OMR)' if (global_thr == 255) else ''}"
             )
             # plt.show()
             # hist = getPlotImg()
diff --git a/src/entry.py b/src/entry.py
index cd744b3f..cc0877dc 100644
--- a/src/entry.py
+++ b/src/entry.py
@@ -45,7 +45,6 @@ def print_config_summary(
     evaluation_config,
     args,
 ):
-    print(args)
     logger.info("")
     table = Table(title="Current Configurations", show_header=False, show_lines=False)
     table.add_column("Key", style="cyan", no_wrap=True)
@@ -274,7 +273,9 @@ def process_files(
 
         score = 0
         if evaluation_config is not None:
-            score = evaluate_concatenated_response(omr_response, evaluation_config,file_id)
+            score = evaluate_concatenated_response(
+                omr_response, evaluation_config, file_id
+            )
             logger.info(
                 f"(/{files_counter}) Graded with score: {round(score, 2)}\t for file: '{file_id}'"
             )
@@ -343,25 +344,25 @@ def print_stats(start_time, files_counter, tuning_config):
     time_checking = max(1, round(time() - start_time, 2))
     log = logger.info
     log("")
-    log(f"{'Total file(s) moved':<27}: {STATS.files_moved}")
-    log(f"{'Total file(s) not moved':<27}: {STATS.files_not_moved}")
+    log(f"{'Total file(s) moved': <27}: {STATS.files_moved}")
+    log(f"{'Total file(s) not moved': <27}: {STATS.files_not_moved}")
     log("--------------------------------")
     log(
-        f"{'Total file(s) processed':<27}: {files_counter} ({'Sum Tallied!' if files_counter == (STATS.files_moved + STATS.files_not_moved) else 'Not Tallying!'})"
+        f"{'Total file(s) processed': <27}: {files_counter} ({'Sum Tallied!' if files_counter == (STATS.files_moved + STATS.files_not_moved) else 'Not Tallying!'})"
     )
 
     if tuning_config.outputs.show_image_level <= 0:
         log(
-            f"\nFinished Checking {files_counter} file(s) in {round(time_checking, 1)} seconds i.e. ~{round(time_checking/60, 1)} minute(s)."
+            f"\nFinished Checking {files_counter} file(s) in {round(time_checking, 1)} seconds i.e. ~{round(time_checking / 60, 1)} minute(s)."
         )
         log(
-            f"{'OMR Processing Rate':<27}:\t ~ {round(time_checking/files_counter,2)} seconds/OMR"
+            f"{'OMR Processing Rate': <27}: \t ~ {round(time_checking / files_counter, 2)} seconds/OMR"
         )
         log(
-            f"{'OMR Processing Speed':<27}:\t ~ {round((files_counter * 60) / time_checking, 2)} OMRs/minute"
+            f"{'OMR Processing Speed': <27}: \t ~ {round((files_counter * 60) / time_checking, 2)} OMRs/minute"
         )
     else:
-        log(f"\n{'Total script time':<27}: {time_checking} seconds")
+        log(f"\n{'Total script time': <27}: {time_checking} seconds")
 
     if tuning_config.outputs.show_image_level <= 1:
         log(
diff --git a/src/evaluation.py b/src/evaluation.py
index 1545268c..7f735427 100644
--- a/src/evaluation.py
+++ b/src/evaluation.py
@@ -1,8 +1,8 @@
 import ast
 import os
-from csv import QUOTE_NONNUMERIC
 import re
 from copy import deepcopy
+from csv import QUOTE_NONNUMERIC
 
 import cv2
 import pandas as pd
@@ -354,33 +354,40 @@ def match_answer_for_question(self, current_score, question, marked_answer):
         )
         return delta
 
-    def conditionally_print_explanation(self,file_id):
+    def conditionally_print_explanation(self, file_id):
         if self.should_explain_scoring:
             console.print(self.explanation_table, justify="center")
 
         self.explanation_to_csv(file_id)
         self.explanation_table_data_for_csv = []
 
-    # Explanation Table to CSV     
-    def explanation_to_csv(self,file_id):
+    # Explanation Table to CSV
+    def explanation_to_csv(self, file_id):
         evaluation_json = open_evaluation_with_validation(self.path)
-        
-        if evaluation_json['options'].get('enable_evaluation_table_to_csv', False):
+
+        if evaluation_json["options"].get("enable_evaluation_table_to_csv", False):
             explanation_table_header = []
             cols = self.explanation_table.columns
-            processed_img_name,ext = file_id.split('.')
+            processed_img_name, ext = file_id.split(".")
 
             for col in cols:
                 explanation_table_header.append(col.header)
-            
-            output_dir = os.path.join(os.path.dirname(os.getcwd()),f'OMRChecker/outputs/Evaluation/{processed_img_name}.csv')
-            
-            pd.DataFrame(self.explanation_table_data_for_csv,columns=explanation_table_header,dtype=str).to_csv(
-                    output_dir,
-                    mode="a",
-                    quoting=QUOTE_NONNUMERIC,
-                    index=False,
-                )
+
+            output_dir = os.path.join(
+                os.path.dirname(os.getcwd()),
+                f"OMRChecker/outputs/Evaluation/{processed_img_name}.csv",
+            )
+
+            pd.DataFrame(
+                self.explanation_table_data_for_csv,
+                columns=explanation_table_header,
+                dtype=str,
+            ).to_csv(
+                output_dir,
+                mode="a",
+                quoting=QUOTE_NONNUMERIC,
+                index=False,
+            )
 
     def get_should_explain_scoring(self):
         return self.should_explain_scoring
@@ -534,7 +541,7 @@ def conditionally_add_explanation(
             self.explanation_table_data_for_csv.append(row)
 
 
-def evaluate_concatenated_response(concatenated_response, evaluation_config,file_id):
+def evaluate_concatenated_response(concatenated_response, evaluation_config, file_id):
     evaluation_config.prepare_and_validate_omr_response(concatenated_response)
     current_score = 0.0
     for question in evaluation_config.questions_in_order:
diff --git a/src/schemas/evaluation_schema.py b/src/schemas/evaluation_schema.py
index 091a7625..5f2237fd 100644
--- a/src/schemas/evaluation_schema.py
+++ b/src/schemas/evaluation_schema.py
@@ -138,7 +138,10 @@
                                 ]
                             },
                             "questions_in_order": ARRAY_OF_STRINGS,
-                            "enable_evaluation_table_to_csv": {"type": "boolean", "default": False}
+                            "enable_evaluation_table_to_csv": {
+                                "type": "boolean",
+                                "default": False,
+                            },
                         },
                     }
                 }

From 32d9079819c4c0d4544411b2fc4ca1f709f965bd Mon Sep 17 00:00:00 2001
From: alok <alok.kr300@gmail.com>
Date: Tue, 8 Oct 2024 23:21:41 +0530
Subject: [PATCH 3/4] Refactored code to improve reusability

---
 src/entry.py      |  5 ++++-
 src/evaluation.py | 40 +++++++++++++++-------------------------
 src/utils/file.py |  7 +++----
 3 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/src/entry.py b/src/entry.py
index cc0877dc..d1593116 100644
--- a/src/entry.py
+++ b/src/entry.py
@@ -209,6 +209,9 @@ def process_files(
     for file_path in omr_files:
         files_counter += 1
         file_name = file_path.name
+        evaluation_path = os.path.join(
+            outputs_namespace.paths.evaluation_dir, file_path.stem
+        )
 
         in_omr = cv2.imread(str(file_path), cv2.IMREAD_GRAYSCALE)
 
@@ -274,7 +277,7 @@ def process_files(
         score = 0
         if evaluation_config is not None:
             score = evaluate_concatenated_response(
-                omr_response, evaluation_config, file_id
+                omr_response, evaluation_config, evaluation_path
             )
             logger.info(
                 f"(/{files_counter}) Graded with score: {round(score, 2)}\t for file: '{file_id}'"
diff --git a/src/evaluation.py b/src/evaluation.py
index 7f735427..360bb3bc 100644
--- a/src/evaluation.py
+++ b/src/evaluation.py
@@ -198,7 +198,9 @@ def __init__(self, curr_dir, evaluation_path, template, tuning_config):
         self.should_explain_scoring = options.get("should_explain_scoring", False)
         self.has_non_default_section = False
         self.exclude_files = []
-        self.explanation_table_data_for_csv = []
+        self.enable_evaluation_table_to_csv = options.get(
+            "enable_evaluation_table_to_csv", False
+        )
 
         if source_type == "csv":
             csv_path = curr_dir.joinpath(options["answer_key_csv_path"])
@@ -354,35 +356,21 @@ def match_answer_for_question(self, current_score, question, marked_answer):
         )
         return delta
 
-    def conditionally_print_explanation(self, file_id):
+    def conditionally_print_explanation(self):
         if self.should_explain_scoring:
             console.print(self.explanation_table, justify="center")
 
-        self.explanation_to_csv(file_id)
-        self.explanation_table_data_for_csv = []
-
     # Explanation Table to CSV
-    def explanation_to_csv(self, file_id):
-        evaluation_json = open_evaluation_with_validation(self.path)
-
-        if evaluation_json["options"].get("enable_evaluation_table_to_csv", False):
-            explanation_table_header = []
-            cols = self.explanation_table.columns
-            processed_img_name, ext = file_id.split(".")
-
-            for col in cols:
-                explanation_table_header.append(col.header)
+    def conditionally_save_explanation_csv(self, evaluation_path):
+        if self.enable_evaluation_table_to_csv:
+            data = {col.header: col._cells for col in self.explanation_table.columns}
 
             output_dir = os.path.join(
-                os.path.dirname(os.getcwd()),
-                f"OMRChecker/outputs/Evaluation/{processed_img_name}.csv",
+                os.getcwd(),
+                f"{evaluation_path}.csv",
             )
 
-            pd.DataFrame(
-                self.explanation_table_data_for_csv,
-                columns=explanation_table_header,
-                dtype=str,
-            ).to_csv(
+            pd.DataFrame(data, dtype=str).to_csv(
                 output_dir,
                 mode="a",
                 quoting=QUOTE_NONNUMERIC,
@@ -538,10 +526,11 @@ def conditionally_add_explanation(
                 if item is not None
             ]
             self.explanation_table.add_row(*row)
-            self.explanation_table_data_for_csv.append(row)
 
 
-def evaluate_concatenated_response(concatenated_response, evaluation_config, file_id):
+def evaluate_concatenated_response(
+    concatenated_response, evaluation_config, evaluation_path
+):
     evaluation_config.prepare_and_validate_omr_response(concatenated_response)
     current_score = 0.0
     for question in evaluation_config.questions_in_order:
@@ -551,6 +540,7 @@ def evaluate_concatenated_response(concatenated_response, evaluation_config, fil
         )
         current_score += delta
 
-    evaluation_config.conditionally_print_explanation(file_id)
+    evaluation_config.conditionally_print_explanation()
+    evaluation_config.conditionally_save_explanation_csv(evaluation_path)
 
     return current_score
diff --git a/src/utils/file.py b/src/utils/file.py
index 3f6eca0d..40ed6bd4 100644
--- a/src/utils/file.py
+++ b/src/utils/file.py
@@ -1,7 +1,6 @@
 import argparse
 import json
 import os
-import shutil
 from csv import QUOTE_NONNUMERIC
 from time import localtime, strftime
 
@@ -47,9 +46,9 @@ def setup_dirs_for_paths(paths):
             os.makedirs(save_output_dir)
 
     for save_output_dir in [paths.evaluation_dir]:
-        if os.path.exists(save_output_dir):
-            shutil.rmtree(save_output_dir)
-        os.makedirs(save_output_dir)
+        if not os.path.exists(save_output_dir):
+            logger.info(f"Created : {save_output_dir}")
+            os.makedirs(save_output_dir)
 
     for save_output_dir in [paths.multi_marked_dir, paths.errors_dir]:
         if not os.path.exists(save_output_dir):

From fb0dd759448f9ca09b5ac3634125e23dfbc6e84d Mon Sep 17 00:00:00 2001
From: alok <alok.kr300@gmail.com>
Date: Sun, 13 Oct 2024 17:27:52 +0530
Subject: [PATCH 4/4] Refactor code in file.py, evaluation.py and entry.py

---
 src/entry.py      |  5 +----
 src/evaluation.py | 14 +++++++-------
 src/utils/file.py |  7 +------
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/src/entry.py b/src/entry.py
index d1593116..5cfcfd95 100644
--- a/src/entry.py
+++ b/src/entry.py
@@ -209,9 +209,6 @@ def process_files(
     for file_path in omr_files:
         files_counter += 1
         file_name = file_path.name
-        evaluation_path = os.path.join(
-            outputs_namespace.paths.evaluation_dir, file_path.stem
-        )
 
         in_omr = cv2.imread(str(file_path), cv2.IMREAD_GRAYSCALE)
 
@@ -277,7 +274,7 @@ def process_files(
         score = 0
         if evaluation_config is not None:
             score = evaluate_concatenated_response(
-                omr_response, evaluation_config, evaluation_path
+                omr_response, evaluation_config, file_path, outputs_namespace.paths.evaluation_dir
             )
             logger.info(
                 f"(/{files_counter}) Graded with score: {round(score, 2)}\t for file: '{file_id}'"
diff --git a/src/evaluation.py b/src/evaluation.py
index 360bb3bc..0b567169 100644
--- a/src/evaluation.py
+++ b/src/evaluation.py
@@ -361,17 +361,17 @@ def conditionally_print_explanation(self):
             console.print(self.explanation_table, justify="center")
 
     # Explanation Table to CSV
-    def conditionally_save_explanation_csv(self, evaluation_path):
+    def conditionally_save_explanation_csv(self, file_path, evaluation_output_dir):
         if self.enable_evaluation_table_to_csv:
             data = {col.header: col._cells for col in self.explanation_table.columns}
 
-            output_dir = os.path.join(
-                os.getcwd(),
-                f"{evaluation_path}.csv",
+            output_path = os.path.join(
+                evaluation_output_dir,
+                f"{file_path.stem}_evaluation.csv",
             )
 
             pd.DataFrame(data, dtype=str).to_csv(
-                output_dir,
+                output_path,
                 mode="a",
                 quoting=QUOTE_NONNUMERIC,
                 index=False,
@@ -529,7 +529,7 @@ def conditionally_add_explanation(
 
 
 def evaluate_concatenated_response(
-    concatenated_response, evaluation_config, evaluation_path
+    concatenated_response, evaluation_config, file_path, evaluation_output_dir
 ):
     evaluation_config.prepare_and_validate_omr_response(concatenated_response)
     current_score = 0.0
@@ -541,6 +541,6 @@ def evaluate_concatenated_response(
         current_score += delta
 
     evaluation_config.conditionally_print_explanation()
-    evaluation_config.conditionally_save_explanation_csv(evaluation_path)
+    evaluation_config.conditionally_save_explanation_csv(file_path, evaluation_output_dir)
 
     return current_score
diff --git a/src/utils/file.py b/src/utils/file.py
index 40ed6bd4..08536ff1 100644
--- a/src/utils/file.py
+++ b/src/utils/file.py
@@ -40,12 +40,7 @@ def setup_dirs_for_paths(paths):
             os.mkdir(save_output_dir.joinpath("_MULTI_"))
             os.mkdir(save_output_dir.joinpath("_MULTI_", "stack"))
 
-    for save_output_dir in [paths.manual_dir, paths.results_dir]:
-        if not os.path.exists(save_output_dir):
-            logger.info(f"Created : {save_output_dir}")
-            os.makedirs(save_output_dir)
-
-    for save_output_dir in [paths.evaluation_dir]:
+    for save_output_dir in [paths.manual_dir, paths.results_dir, paths.evaluation_dir]:
         if not os.path.exists(save_output_dir):
             logger.info(f"Created : {save_output_dir}")
             os.makedirs(save_output_dir)