From b1dcf3d1e274597b16cebfe8117472fc941d8af9 Mon Sep 17 00:00:00 2001 From: alok Date: Thu, 3 Oct 2024 19:02:39 +0530 Subject: [PATCH 1/4] feat: Add feature to export explanation table to CSV --- samples/sample4/evaluation.json | 13 ++++--------- src/entry.py | 3 ++- src/evaluation.py | 33 +++++++++++++++++++++++++++++--- src/schemas/evaluation_schema.py | 1 + src/utils/file.py | 7 +++++++ 5 files changed, 44 insertions(+), 13 deletions(-) diff --git a/samples/sample4/evaluation.json b/samples/sample4/evaluation.json index ec9b5071..d7e71cd2 100644 --- a/samples/sample4/evaluation.json +++ b/samples/sample4/evaluation.json @@ -1,9 +1,7 @@ { "source_type": "custom", "options": { - "questions_in_order": [ - "q1..11" - ], + "questions_in_order": ["q1..11"], "answers_in_order": [ "B", "D", @@ -11,17 +9,14 @@ "B", "D", "C", - [ - "B", - "C", - "BC" - ], + ["B", "C", "BC"], "A", "C", "D", "C" ], - "should_explain_scoring": true + "should_explain_scoring": true, + "enable_evaluation_table_to_csv": true }, "marking_schemes": { "DEFAULT": { diff --git a/src/entry.py b/src/entry.py index 298bb235..cd744b3f 100644 --- a/src/entry.py +++ b/src/entry.py @@ -45,6 +45,7 @@ def print_config_summary( evaluation_config, args, ): + print(args) logger.info("") table = Table(title="Current Configurations", show_header=False, show_lines=False) table.add_column("Key", style="cyan", no_wrap=True) @@ -273,7 +274,7 @@ def process_files( score = 0 if evaluation_config is not None: - score = evaluate_concatenated_response(omr_response, evaluation_config) + score = evaluate_concatenated_response(omr_response, evaluation_config,file_id) logger.info( f"(/{files_counter}) Graded with score: {round(score, 2)}\t for file: '{file_id}'" ) diff --git a/src/evaluation.py b/src/evaluation.py index 67cdb40c..1545268c 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -1,5 +1,6 @@ import ast import os +from csv import QUOTE_NONNUMERIC import re from copy import deepcopy @@ -197,6 +198,7 @@ def __init__(self, curr_dir, evaluation_path, template, tuning_config): self.should_explain_scoring = options.get("should_explain_scoring", False) self.has_non_default_section = False self.exclude_files = [] + self.explanation_table_data_for_csv = [] if source_type == "csv": csv_path = curr_dir.joinpath(options["answer_key_csv_path"]) @@ -352,10 +354,34 @@ def match_answer_for_question(self, current_score, question, marked_answer): ) return delta - def conditionally_print_explanation(self): + def conditionally_print_explanation(self,file_id): if self.should_explain_scoring: console.print(self.explanation_table, justify="center") + self.explanation_to_csv(file_id) + self.explanation_table_data_for_csv = [] + + # Explanation Table to CSV + def explanation_to_csv(self,file_id): + evaluation_json = open_evaluation_with_validation(self.path) + + if evaluation_json['options'].get('enable_evaluation_table_to_csv', False): + explanation_table_header = [] + cols = self.explanation_table.columns + processed_img_name,ext = file_id.split('.') + + for col in cols: + explanation_table_header.append(col.header) + + output_dir = os.path.join(os.path.dirname(os.getcwd()),f'OMRChecker/outputs/Evaluation/{processed_img_name}.csv') + + pd.DataFrame(self.explanation_table_data_for_csv,columns=explanation_table_header,dtype=str).to_csv( + output_dir, + mode="a", + quoting=QUOTE_NONNUMERIC, + index=False, + ) + def get_should_explain_scoring(self): return self.should_explain_scoring @@ -505,9 +531,10 @@ def conditionally_add_explanation( if item is not None ] self.explanation_table.add_row(*row) + self.explanation_table_data_for_csv.append(row) -def evaluate_concatenated_response(concatenated_response, evaluation_config): +def evaluate_concatenated_response(concatenated_response, evaluation_config,file_id): evaluation_config.prepare_and_validate_omr_response(concatenated_response) current_score = 0.0 for question in evaluation_config.questions_in_order: @@ -517,6 +544,6 @@ def evaluate_concatenated_response(concatenated_response, evaluation_config): ) current_score += delta - evaluation_config.conditionally_print_explanation() + evaluation_config.conditionally_print_explanation(file_id) return current_score diff --git a/src/schemas/evaluation_schema.py b/src/schemas/evaluation_schema.py index 8de1af76..091a7625 100644 --- a/src/schemas/evaluation_schema.py +++ b/src/schemas/evaluation_schema.py @@ -138,6 +138,7 @@ ] }, "questions_in_order": ARRAY_OF_STRINGS, + "enable_evaluation_table_to_csv": {"type": "boolean", "default": False} }, } } diff --git a/src/utils/file.py b/src/utils/file.py index 5513381f..3f6eca0d 100644 --- a/src/utils/file.py +++ b/src/utils/file.py @@ -1,6 +1,7 @@ import argparse import json import os +import shutil from csv import QUOTE_NONNUMERIC from time import localtime, strftime @@ -25,6 +26,7 @@ def __init__(self, output_dir): self.save_marked_dir = output_dir.joinpath("CheckedOMRs") self.results_dir = output_dir.joinpath("Results") self.manual_dir = output_dir.joinpath("Manual") + self.evaluation_dir = output_dir.joinpath("Evaluation") self.errors_dir = self.manual_dir.joinpath("ErrorFiles") self.multi_marked_dir = self.manual_dir.joinpath("MultiMarkedFiles") @@ -44,6 +46,11 @@ def setup_dirs_for_paths(paths): logger.info(f"Created : {save_output_dir}") os.makedirs(save_output_dir) + for save_output_dir in [paths.evaluation_dir]: + if os.path.exists(save_output_dir): + shutil.rmtree(save_output_dir) + os.makedirs(save_output_dir) + for save_output_dir in [paths.multi_marked_dir, paths.errors_dir]: if not os.path.exists(save_output_dir): logger.info(f"Created : {save_output_dir}") From 5bcaac1c671518670530c161922d5c31a2d842d3 Mon Sep 17 00:00:00 2001 From: alok Date: Sun, 6 Oct 2024 12:05:18 +0530 Subject: [PATCH 2/4] Remove print args statement --- samples/sample4/evaluation.json | 10 ++++++-- src/core.py | 2 +- src/entry.py | 19 ++++++++------- src/evaluation.py | 41 +++++++++++++++++++------------- src/schemas/evaluation_schema.py | 5 +++- 5 files changed, 47 insertions(+), 30 deletions(-) diff --git a/samples/sample4/evaluation.json b/samples/sample4/evaluation.json index d7e71cd2..50db0fbe 100644 --- a/samples/sample4/evaluation.json +++ b/samples/sample4/evaluation.json @@ -1,7 +1,9 @@ { "source_type": "custom", "options": { - "questions_in_order": ["q1..11"], + "questions_in_order": [ + "q1..11" + ], "answers_in_order": [ "B", "D", @@ -9,7 +11,11 @@ "B", "D", "C", - ["B", "C", "BC"], + [ + "B", + "C", + "BC" + ], "A", "C", "D", diff --git a/src/core.py b/src/core.py index b4246789..d475105f 100644 --- a/src/core.py +++ b/src/core.py @@ -243,7 +243,7 @@ def read_omr_response(self, template, image, name, save_dir=None): global_thr, _, _ = self.get_global_threshold(all_q_vals, looseness=4) logger.info( - f"Thresholding:\tglobal_thr: {round(global_thr, 2)} \tglobal_std_THR: {round(global_std_thresh, 2)}\t{'(Looks like a Xeroxed OMR)' if (global_thr == 255) else ''}" + f"Thresholding: \tglobal_thr: {round(global_thr, 2)} \tglobal_std_THR: {round(global_std_thresh, 2)}\t{'(Looks like a Xeroxed OMR)' if (global_thr == 255) else ''}" ) # plt.show() # hist = getPlotImg() diff --git a/src/entry.py b/src/entry.py index cd744b3f..cc0877dc 100644 --- a/src/entry.py +++ b/src/entry.py @@ -45,7 +45,6 @@ def print_config_summary( evaluation_config, args, ): - print(args) logger.info("") table = Table(title="Current Configurations", show_header=False, show_lines=False) table.add_column("Key", style="cyan", no_wrap=True) @@ -274,7 +273,9 @@ def process_files( score = 0 if evaluation_config is not None: - score = evaluate_concatenated_response(omr_response, evaluation_config,file_id) + score = evaluate_concatenated_response( + omr_response, evaluation_config, file_id + ) logger.info( f"(/{files_counter}) Graded with score: {round(score, 2)}\t for file: '{file_id}'" ) @@ -343,25 +344,25 @@ def print_stats(start_time, files_counter, tuning_config): time_checking = max(1, round(time() - start_time, 2)) log = logger.info log("") - log(f"{'Total file(s) moved':<27}: {STATS.files_moved}") - log(f"{'Total file(s) not moved':<27}: {STATS.files_not_moved}") + log(f"{'Total file(s) moved': <27}: {STATS.files_moved}") + log(f"{'Total file(s) not moved': <27}: {STATS.files_not_moved}") log("--------------------------------") log( - f"{'Total file(s) processed':<27}: {files_counter} ({'Sum Tallied!' if files_counter == (STATS.files_moved + STATS.files_not_moved) else 'Not Tallying!'})" + f"{'Total file(s) processed': <27}: {files_counter} ({'Sum Tallied!' if files_counter == (STATS.files_moved + STATS.files_not_moved) else 'Not Tallying!'})" ) if tuning_config.outputs.show_image_level <= 0: log( - f"\nFinished Checking {files_counter} file(s) in {round(time_checking, 1)} seconds i.e. ~{round(time_checking/60, 1)} minute(s)." + f"\nFinished Checking {files_counter} file(s) in {round(time_checking, 1)} seconds i.e. ~{round(time_checking / 60, 1)} minute(s)." ) log( - f"{'OMR Processing Rate':<27}:\t ~ {round(time_checking/files_counter,2)} seconds/OMR" + f"{'OMR Processing Rate': <27}: \t ~ {round(time_checking / files_counter, 2)} seconds/OMR" ) log( - f"{'OMR Processing Speed':<27}:\t ~ {round((files_counter * 60) / time_checking, 2)} OMRs/minute" + f"{'OMR Processing Speed': <27}: \t ~ {round((files_counter * 60) / time_checking, 2)} OMRs/minute" ) else: - log(f"\n{'Total script time':<27}: {time_checking} seconds") + log(f"\n{'Total script time': <27}: {time_checking} seconds") if tuning_config.outputs.show_image_level <= 1: log( diff --git a/src/evaluation.py b/src/evaluation.py index 1545268c..7f735427 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -1,8 +1,8 @@ import ast import os -from csv import QUOTE_NONNUMERIC import re from copy import deepcopy +from csv import QUOTE_NONNUMERIC import cv2 import pandas as pd @@ -354,33 +354,40 @@ def match_answer_for_question(self, current_score, question, marked_answer): ) return delta - def conditionally_print_explanation(self,file_id): + def conditionally_print_explanation(self, file_id): if self.should_explain_scoring: console.print(self.explanation_table, justify="center") self.explanation_to_csv(file_id) self.explanation_table_data_for_csv = [] - # Explanation Table to CSV - def explanation_to_csv(self,file_id): + # Explanation Table to CSV + def explanation_to_csv(self, file_id): evaluation_json = open_evaluation_with_validation(self.path) - - if evaluation_json['options'].get('enable_evaluation_table_to_csv', False): + + if evaluation_json["options"].get("enable_evaluation_table_to_csv", False): explanation_table_header = [] cols = self.explanation_table.columns - processed_img_name,ext = file_id.split('.') + processed_img_name, ext = file_id.split(".") for col in cols: explanation_table_header.append(col.header) - - output_dir = os.path.join(os.path.dirname(os.getcwd()),f'OMRChecker/outputs/Evaluation/{processed_img_name}.csv') - - pd.DataFrame(self.explanation_table_data_for_csv,columns=explanation_table_header,dtype=str).to_csv( - output_dir, - mode="a", - quoting=QUOTE_NONNUMERIC, - index=False, - ) + + output_dir = os.path.join( + os.path.dirname(os.getcwd()), + f"OMRChecker/outputs/Evaluation/{processed_img_name}.csv", + ) + + pd.DataFrame( + self.explanation_table_data_for_csv, + columns=explanation_table_header, + dtype=str, + ).to_csv( + output_dir, + mode="a", + quoting=QUOTE_NONNUMERIC, + index=False, + ) def get_should_explain_scoring(self): return self.should_explain_scoring @@ -534,7 +541,7 @@ def conditionally_add_explanation( self.explanation_table_data_for_csv.append(row) -def evaluate_concatenated_response(concatenated_response, evaluation_config,file_id): +def evaluate_concatenated_response(concatenated_response, evaluation_config, file_id): evaluation_config.prepare_and_validate_omr_response(concatenated_response) current_score = 0.0 for question in evaluation_config.questions_in_order: diff --git a/src/schemas/evaluation_schema.py b/src/schemas/evaluation_schema.py index 091a7625..5f2237fd 100644 --- a/src/schemas/evaluation_schema.py +++ b/src/schemas/evaluation_schema.py @@ -138,7 +138,10 @@ ] }, "questions_in_order": ARRAY_OF_STRINGS, - "enable_evaluation_table_to_csv": {"type": "boolean", "default": False} + "enable_evaluation_table_to_csv": { + "type": "boolean", + "default": False, + }, }, } } From 32d9079819c4c0d4544411b2fc4ca1f709f965bd Mon Sep 17 00:00:00 2001 From: alok Date: Tue, 8 Oct 2024 23:21:41 +0530 Subject: [PATCH 3/4] Refactored code to improve reusability --- src/entry.py | 5 ++++- src/evaluation.py | 40 +++++++++++++++------------------------- src/utils/file.py | 7 +++---- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/src/entry.py b/src/entry.py index cc0877dc..d1593116 100644 --- a/src/entry.py +++ b/src/entry.py @@ -209,6 +209,9 @@ def process_files( for file_path in omr_files: files_counter += 1 file_name = file_path.name + evaluation_path = os.path.join( + outputs_namespace.paths.evaluation_dir, file_path.stem + ) in_omr = cv2.imread(str(file_path), cv2.IMREAD_GRAYSCALE) @@ -274,7 +277,7 @@ def process_files( score = 0 if evaluation_config is not None: score = evaluate_concatenated_response( - omr_response, evaluation_config, file_id + omr_response, evaluation_config, evaluation_path ) logger.info( f"(/{files_counter}) Graded with score: {round(score, 2)}\t for file: '{file_id}'" diff --git a/src/evaluation.py b/src/evaluation.py index 7f735427..360bb3bc 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -198,7 +198,9 @@ def __init__(self, curr_dir, evaluation_path, template, tuning_config): self.should_explain_scoring = options.get("should_explain_scoring", False) self.has_non_default_section = False self.exclude_files = [] - self.explanation_table_data_for_csv = [] + self.enable_evaluation_table_to_csv = options.get( + "enable_evaluation_table_to_csv", False + ) if source_type == "csv": csv_path = curr_dir.joinpath(options["answer_key_csv_path"]) @@ -354,35 +356,21 @@ def match_answer_for_question(self, current_score, question, marked_answer): ) return delta - def conditionally_print_explanation(self, file_id): + def conditionally_print_explanation(self): if self.should_explain_scoring: console.print(self.explanation_table, justify="center") - self.explanation_to_csv(file_id) - self.explanation_table_data_for_csv = [] - # Explanation Table to CSV - def explanation_to_csv(self, file_id): - evaluation_json = open_evaluation_with_validation(self.path) - - if evaluation_json["options"].get("enable_evaluation_table_to_csv", False): - explanation_table_header = [] - cols = self.explanation_table.columns - processed_img_name, ext = file_id.split(".") - - for col in cols: - explanation_table_header.append(col.header) + def conditionally_save_explanation_csv(self, evaluation_path): + if self.enable_evaluation_table_to_csv: + data = {col.header: col._cells for col in self.explanation_table.columns} output_dir = os.path.join( - os.path.dirname(os.getcwd()), - f"OMRChecker/outputs/Evaluation/{processed_img_name}.csv", + os.getcwd(), + f"{evaluation_path}.csv", ) - pd.DataFrame( - self.explanation_table_data_for_csv, - columns=explanation_table_header, - dtype=str, - ).to_csv( + pd.DataFrame(data, dtype=str).to_csv( output_dir, mode="a", quoting=QUOTE_NONNUMERIC, @@ -538,10 +526,11 @@ def conditionally_add_explanation( if item is not None ] self.explanation_table.add_row(*row) - self.explanation_table_data_for_csv.append(row) -def evaluate_concatenated_response(concatenated_response, evaluation_config, file_id): +def evaluate_concatenated_response( + concatenated_response, evaluation_config, evaluation_path +): evaluation_config.prepare_and_validate_omr_response(concatenated_response) current_score = 0.0 for question in evaluation_config.questions_in_order: @@ -551,6 +540,7 @@ def evaluate_concatenated_response(concatenated_response, evaluation_config, fil ) current_score += delta - evaluation_config.conditionally_print_explanation(file_id) + evaluation_config.conditionally_print_explanation() + evaluation_config.conditionally_save_explanation_csv(evaluation_path) return current_score diff --git a/src/utils/file.py b/src/utils/file.py index 3f6eca0d..40ed6bd4 100644 --- a/src/utils/file.py +++ b/src/utils/file.py @@ -1,7 +1,6 @@ import argparse import json import os -import shutil from csv import QUOTE_NONNUMERIC from time import localtime, strftime @@ -47,9 +46,9 @@ def setup_dirs_for_paths(paths): os.makedirs(save_output_dir) for save_output_dir in [paths.evaluation_dir]: - if os.path.exists(save_output_dir): - shutil.rmtree(save_output_dir) - os.makedirs(save_output_dir) + if not os.path.exists(save_output_dir): + logger.info(f"Created : {save_output_dir}") + os.makedirs(save_output_dir) for save_output_dir in [paths.multi_marked_dir, paths.errors_dir]: if not os.path.exists(save_output_dir): From fb0dd759448f9ca09b5ac3634125e23dfbc6e84d Mon Sep 17 00:00:00 2001 From: alok Date: Sun, 13 Oct 2024 17:27:52 +0530 Subject: [PATCH 4/4] Refactor code in file.py, evaluation.py and entry.py --- src/entry.py | 5 +---- src/evaluation.py | 14 +++++++------- src/utils/file.py | 7 +------ 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/entry.py b/src/entry.py index d1593116..5cfcfd95 100644 --- a/src/entry.py +++ b/src/entry.py @@ -209,9 +209,6 @@ def process_files( for file_path in omr_files: files_counter += 1 file_name = file_path.name - evaluation_path = os.path.join( - outputs_namespace.paths.evaluation_dir, file_path.stem - ) in_omr = cv2.imread(str(file_path), cv2.IMREAD_GRAYSCALE) @@ -277,7 +274,7 @@ def process_files( score = 0 if evaluation_config is not None: score = evaluate_concatenated_response( - omr_response, evaluation_config, evaluation_path + omr_response, evaluation_config, file_path, outputs_namespace.paths.evaluation_dir ) logger.info( f"(/{files_counter}) Graded with score: {round(score, 2)}\t for file: '{file_id}'" diff --git a/src/evaluation.py b/src/evaluation.py index 360bb3bc..0b567169 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -361,17 +361,17 @@ def conditionally_print_explanation(self): console.print(self.explanation_table, justify="center") # Explanation Table to CSV - def conditionally_save_explanation_csv(self, evaluation_path): + def conditionally_save_explanation_csv(self, file_path, evaluation_output_dir): if self.enable_evaluation_table_to_csv: data = {col.header: col._cells for col in self.explanation_table.columns} - output_dir = os.path.join( - os.getcwd(), - f"{evaluation_path}.csv", + output_path = os.path.join( + evaluation_output_dir, + f"{file_path.stem}_evaluation.csv", ) pd.DataFrame(data, dtype=str).to_csv( - output_dir, + output_path, mode="a", quoting=QUOTE_NONNUMERIC, index=False, @@ -529,7 +529,7 @@ def conditionally_add_explanation( def evaluate_concatenated_response( - concatenated_response, evaluation_config, evaluation_path + concatenated_response, evaluation_config, file_path, evaluation_output_dir ): evaluation_config.prepare_and_validate_omr_response(concatenated_response) current_score = 0.0 @@ -541,6 +541,6 @@ def evaluate_concatenated_response( current_score += delta evaluation_config.conditionally_print_explanation() - evaluation_config.conditionally_save_explanation_csv(evaluation_path) + evaluation_config.conditionally_save_explanation_csv(file_path, evaluation_output_dir) return current_score diff --git a/src/utils/file.py b/src/utils/file.py index 40ed6bd4..08536ff1 100644 --- a/src/utils/file.py +++ b/src/utils/file.py @@ -40,12 +40,7 @@ def setup_dirs_for_paths(paths): os.mkdir(save_output_dir.joinpath("_MULTI_")) os.mkdir(save_output_dir.joinpath("_MULTI_", "stack")) - for save_output_dir in [paths.manual_dir, paths.results_dir]: - if not os.path.exists(save_output_dir): - logger.info(f"Created : {save_output_dir}") - os.makedirs(save_output_dir) - - for save_output_dir in [paths.evaluation_dir]: + for save_output_dir in [paths.manual_dir, paths.results_dir, paths.evaluation_dir]: if not os.path.exists(save_output_dir): logger.info(f"Created : {save_output_dir}") os.makedirs(save_output_dir)