diff --git a/.gitignore b/.gitignore index 57b2ec85..2e0eec35 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,6 @@ worked_examples/**/workshops_*/**/*.png worked_examples/**/workshops_*/**/*.csv worked_examples/**/workshops_*/**/*.geojson worked_examples/**/workshops_*/**/*.xlsx + +# test outputs +tmp_checkpoints diff --git a/mapreader/annotate/annotator.py b/mapreader/annotate/annotator.py index f04251ea..bd4f90f2 100644 --- a/mapreader/annotate/annotator.py +++ b/mapreader/annotate/annotator.py @@ -3,6 +3,7 @@ import functools import hashlib import json +import logging import os import random import string @@ -20,12 +21,16 @@ from ..load.loader import load_patches +# Ignore warnings warnings.filterwarnings("ignore", category=UserWarning) _CENTER_LAYOUT = widgets.Layout( display="flex", flex_flow="column", align_items="center" ) +# Set up logging +logger = logging.getLogger(__name__) + class Annotator: """ @@ -218,7 +223,7 @@ def __init__( # Test for existing patch annotation file if os.path.exists(annotations_file): - print("[INFO] Loading existing patch annotations.") + logger.info("Loading existing patch annotations.") patch_df = self._load_annotations( patch_df=patch_df, annotations_file=annotations_file, @@ -330,9 +335,9 @@ def _load_dataframes( A tuple containing the parent dataframe and patch dataframe. """ if patch_paths: - print(f"[INFO] Loading patches from {patch_paths}.") + logger.info(f"Loading patches from {patch_paths}.") if parent_paths: - print(f"[INFO] Loading parents from {parent_paths}.") + logger.info(f"Loading parents from {parent_paths}.") maps = load_patches(patch_paths=patch_paths, parent_paths=parent_paths) # Add pixel stats @@ -340,10 +345,10 @@ def _load_dataframes( try: maps.add_metadata(metadata_path, delimiter=delimiter) - print(f"[INFO] Adding metadata from {metadata_path}.") + logger.info(f"Adding metadata from {metadata_path}.") except ValueError: raise FileNotFoundError( - f"[INFO] Metadata file at {metadata_path} not found. Please specify the correct file path using the ``metadata_path`` argument." + f"[ERROR] Metadata file at {metadata_path} not found. Please specify the correct file path using the ``metadata_path`` argument." ) parent_df, patch_df = maps.convert_images() @@ -745,7 +750,7 @@ def _annotate( self._queue = self.get_queue() if self._filter_for is not None: - print(f"[INFO] Filtering for: {self._filter_for}") + logger.info(f"Filtering for: {self._filter_for}") self.out = widgets.Output(layout=_CENTER_LAYOUT) display(self.box) diff --git a/mapreader/annotate/utils.py b/mapreader/annotate/utils.py index a0c216d1..1fc79433 100644 --- a/mapreader/annotate/utils.py +++ b/mapreader/annotate/utils.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from __future__ import annotations +import logging import os import random import sys @@ -25,10 +26,14 @@ from mapreader import load_patches, loader +# Ignore warnings warnings.filterwarnings("ignore") -# warnings.filterwarnings( +# ( # "ignore", message="Pandas doesn't allow columns to be created via a new attribute name") +# Set up logging +logger = logging.getLogger(__name__) + def prepare_data( df: pd.DataFrame, @@ -77,10 +82,10 @@ def prepare_data( col_names = ["image_path", "parent_id"] if (label_col_name in list(df.columns)) and (not redo): already_annotated = len(df[~df[label_col_name].isnull()]) - print(f"Number of already annotated images: {already_annotated}") + logger.info(f"Number of already annotated images: {already_annotated}") # only annotate those patches that have not been already annotated df = df[df[label_col_name].isnull()] - print(f"Number of images to be annotated (total): {len(df)}") + logger.info(f"Number of images to be annotated (total): {len(df)}") else: # if redo = True or "label" column does not exist # annotate all patches in the pandas dataframe @@ -100,10 +105,10 @@ def prepare_data( else: df = df.groupby("pixel_groups").sample(n=10, random_state=random_state) except Exception: - print(f"[INFO] len(df) = {len(df)}, .sample method is deactivated.") + logger.info(f"len(df) = {len(df)}, .sample method is deactivated.") df = df.iloc[:num_samples] else: - print(f"[WARNING] could not find {tar_param} in columns.") + logger.warning(f"could not find {tar_param} in columns.") df = df.iloc[:num_samples] data = [] @@ -117,7 +122,7 @@ def prepare_data( data.append(cols2add) row_counter += 1 - print(f"Number of images to annotate (current batch): {len(data)}") + logger.info(f"Number of images to annotate (current batch): {len(data)}") return data @@ -300,9 +305,9 @@ def display_record(record: tuple[str, str, str, int, int]) -> None: plt.tight_layout() plt.show() - print(20 * "-") - print("Additional info:") - print(f"Counter: {record[-1]}") + logger.info(20 * "-") + logger.info("Additional info:") + logger.info(f"Counter: {record[-1]}") if url_main: try: map_id = record[2].split("_")[-1].split(".")[0] @@ -311,8 +316,7 @@ def display_record(record: tuple[str, str, str, int, int]) -> None: # the page exists response = requests.get(url, stream=True) assert response.status_code < 400 - print() - print(f"URL: {url}") + logger.info(f"URL: {url}") except: url = False pass @@ -619,7 +623,7 @@ def prepare_annotation( ) if len(data2annotate) == 0: - print("No image to annotate!") + logger.info("No image to annotate!") else: annotation = annotation_interface( data2annotate, @@ -665,7 +669,7 @@ def save_annotation( annotation_tasks = yaml.load(f, Loader=yaml.FullLoader) if annotation_set not in annotation_tasks["paths"].keys(): - print(f"{annotation_set} could not be found in {annotation_tasks_file}") + logger.info(f"{annotation_set} could not be found in {annotation_tasks_file}") else: annot_file = os.path.join( annotation_tasks["paths"][annotation_set]["annot_dir"], @@ -704,8 +708,8 @@ def save_annotation( if len(image_df) > 0: # image_df = image_df.set_index("image_id") image_df.to_csv(annot_file, mode="w") - print(f"[INFO] Save {newly_annotated} new annotations to {annot_file}") - print(f"[INFO] {new_labels} labels were not already stored") - print(f"[INFO] Total number of saved annotations: {len(image_df)}") + logger.info(f"Save {newly_annotated} new annotations to {annot_file}") + logger.info(f"{new_labels} labels were not already stored") + logger.info(f"Total number of saved annotations: {len(image_df)}") else: - print("[INFO] No annotations to save!") + logger.info("No annotations to save!") diff --git a/mapreader/classify/classifier.py b/mapreader/classify/classifier.py index e8d1af8e..cf7fbd0a 100644 --- a/mapreader/classify/classifier.py +++ b/mapreader/classify/classifier.py @@ -2,9 +2,9 @@ from __future__ import annotations import copy +import logging import os import random -import socket import sys import time from collections.abc import Hashable, Iterable @@ -29,6 +29,9 @@ # from tqdm.autonotebook import tqdm # from torch.nn.modules.module import _addindent +# Set up logging +logger = logging.getLogger(__name__) + class ClassifierContainer: def __init__( @@ -117,7 +120,7 @@ def __init__( self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") else: self.device = device - print(f"[INFO] Device is set to {self.device}") + logger.info(f"Device is set to {self.device}") # check if loading an pre-existing object if load_path: @@ -147,7 +150,7 @@ def __init__( self.labels_map = labels_map # set up model and move to device - print("[INFO] Initializing model.") + logger.info("Initializing model.") if isinstance(model, nn.Module): self.model = model.to(self.device) self.input_size = input_size @@ -171,14 +174,11 @@ def __init__( f"./tmp_checkpoints/tmp_{random.randint(0, int(1e10))}_checkpoint.pkl" ) - # add colors for printing/logging - self._set_up_print_colors() - # add dataloaders and labels_map self.dataloaders = dataloaders if dataloaders else {} for set_name, dataloader in self.dataloaders.items(): - print(f'[INFO] Loaded "{set_name}" with {len(dataloader.dataset)} items.') + logger.info(f'Loaded "{set_name}" with {len(dataloader.dataset)} items.') def generate_layerwise_lrs( self, @@ -437,7 +437,7 @@ def add_criterion( '[ERROR] At present, if passing ``criterion`` as a string, criterion can only be "cross entropy" or "ce" (cross-entropy), "bce" (binary cross-entropy) or "mse" (mean squared error).' ) - print(f'[INFO] Using "{criterion}" as criterion.') + logger.info(f'Using "{criterion}" as criterion.') elif not isinstance(criterion, nn.modules.loss._Loss): raise ValueError( @@ -733,12 +733,12 @@ def train( print_info_batch_freq=print_info_batch_freq, ) except KeyboardInterrupt: - print("[INFO] Exiting...") + logger.info("Exiting...") if os.path.isfile(self.tmp_save_filename): - print(f'[INFO] Loading "{self.tmp_save_filename}" as model.') + logger.info(f'Loading "{self.tmp_save_filename}" as model.') self.load(self.tmp_save_filename, remove_after_load=remove_after_load) else: - print("[INFO] No checkpoint file found - model has not been updated.") + logger.info("No checkpoint file found - model has not been updated.") def train_core( self, @@ -799,7 +799,7 @@ def train_core( if phases is None: phases = ["train", "val"] - print(f"[INFO] Each step will pass: {phases}.") + logger.info(f"Each step will pass: {phases}.") for phase in phases: if phase not in self.dataloaders.keys(): @@ -831,10 +831,10 @@ def train_core( tboard_writer = SummaryWriter(tensorboard_path) except ImportError: - print( - "[WARNING] Could not import ``SummaryWriter`` from torch.utils.tensorboard" # noqa + logger.warning( + "Could not import `SummaryWriter` from torch.utils.tensorboard" # noqa ) - print("[WARNING] Continuing without tensorboard.") + logger.warning("Continuing without tensorboard.") tensorboard_path = None start_epoch = self.last_epoch + 1 @@ -958,12 +958,12 @@ def train_core( if phase.lower() in valid_phase_names: epoch_msg += f"Loss: {loss.data:.3f}" - self.cprint("[INFO]", "dred", epoch_msg) + logger.info(epoch_msg) elif phase.lower() in train_phase_names: epoch_msg += f"Loss: {loss.data:.3f}" - self.cprint("[INFO]", "dgreen", epoch_msg) + logger.info(epoch_msg) else: - self.cprint("[INFO]", "dgreen", epoch_msg) + logger.info(epoch_msg) # --- END: one batch # scheduler @@ -996,9 +996,9 @@ def train_core( epoch_msg = self._gen_epoch_msg(phase, epoch_msg) if phase.lower() in valid_phase_names: - self.cprint("[INFO]", "dred", epoch_msg + "\n") + logger.info(epoch_msg) else: - self.cprint("[INFO]", "dgreen", epoch_msg) + logger.info(epoch_msg) # labels/confidence self.pred_conf.extend(running_pred_conf) @@ -1013,12 +1013,9 @@ def train_core( if phase.lower() in valid_phase_names: if epoch % tmp_file_save_freq == 0: - tmp_str = f'[INFO] Checkpoint file saved to "{self.tmp_save_filename}".' # noqa - print( - self._print_colors["lgrey"] - + tmp_str - + self._print_colors["reset"] - ) + logger.info( + f'Checkpoint file saved to "{self.tmp_save_filename}".' + ) # noqa self.last_epoch = epoch self.save(self.tmp_save_filename, force=True) @@ -1030,7 +1027,7 @@ def train_core( ] time_elapsed = time.time() - since - print(f"[INFO] Total time: {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s") + logger.info(f"Total time: {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s") # load best model weights self.model.load_state_dict(best_model_wts) @@ -1045,10 +1042,10 @@ def train_core( with open(os.path.join(save_model_dir, "info.txt"), "a+") as fio: fio.writelines(f"{save_filename},{self.best_loss:.5f}\n") - print( - f"[INFO] Model at epoch {self.best_epoch} has least valid loss ({self.best_loss:.4f}) so will be saved.\n\ -[INFO] Path: {save_model_path}" + logger.info( + f"Model at epoch {self.best_epoch} has least valid loss ({self.best_loss:.4f}) so will be saved." ) + logger.info(f"Path: {save_model_path}") @staticmethod def _get_logits(out): @@ -1320,8 +1317,8 @@ def plot_metric( for i, one_item in enumerate(y_axis): if one_item not in self.metrics.keys(): - print( - f"[WARNING] requested item: {one_item} not in keys: {self.metrics.keys}" # noqa + logger.warning( + f"Requested item: {one_item} not in keys: {self.metrics.keys}" # noqa ) continue @@ -1518,8 +1515,8 @@ def show_sample( num_batches = int(np.ceil(len(dataloader.dataset) / dataloader.batch_size)) if min(num_batches, batch_number) != batch_number: - print( - f'[INFO] "{set_name}" only contains {num_batches}.\n\ + logger.info( + f'"{set_name}" only contains {num_batches}.\n\ Output will show batch number {num_batches}.' ) batch_number = num_batches @@ -1561,12 +1558,10 @@ def print_batch_info(self, set_name: str | None = "train") -> None: num_samples = len(self.dataloaders[set_name].dataset) num_batches = int(np.ceil(num_samples / batch_size)) - print( - f"[INFO] dataset: {set_name}\n\ - - items: {num_samples}\n\ - - batch size: {batch_size}\n\ - - batches: {num_batches}" - ) + logger.info(f"Dataset: {set_name}") + logger.info(f"- items: {num_samples}") + logger.info(f"- batch size: {batch_size}") + logger.info(f"- batches: {num_batches}") @staticmethod def _imshow( @@ -1782,7 +1777,7 @@ def save_predictions( if save_path is None: save_path = f"{set_name}_predictions_patch_df.csv" patch_df.to_csv(save_path, sep=delimiter) - print(f"[INFO] Saved predictions to {save_path}.") + logger.info(f"Saved predictions to {save_path}.") def load_dataset( self, @@ -1812,7 +1807,7 @@ def load_dataset( The number of worker threads to use for loading data, by default 0. """ if sampler and shuffle: - print("[INFO] ``sampler`` is defined so train dataset will be unshuffled.") + logger.info("`sampler` is defined so train dataset will be unshuffled.") dataloader = DataLoader( dataset, @@ -1861,7 +1856,7 @@ def load( if not os.path.isfile(load_path): raise FileNotFoundError(f'[ERROR] "{load_path}" cannot be found.') - print(f'[INFO] Loading "{load_path}".') + logger.info(f'Loading "{load_path}".') with open(load_path, "rb") as myfile: # objPickle = pickle.load(myfile) @@ -1885,40 +1880,6 @@ def load( except: pass - def _set_up_print_colors(self): - """Private function, setting color attributes on the object.""" - self._print_colors = {} - - # color - self._print_colors["lgrey"] = "\033[1;90m" - self._print_colors["grey"] = "\033[90m" # boring information - self._print_colors["yellow"] = "\033[93m" # FYI - self._print_colors["orange"] = "\033[0;33m" # Warning - - self._print_colors["lred"] = "\033[1;31m" # there is smoke - self._print_colors["red"] = "\033[91m" # fire! - self._print_colors["dred"] = "\033[2;31m" # Everything is on fire - - self._print_colors["lblue"] = "\033[1;34m" - self._print_colors["blue"] = "\033[94m" - self._print_colors["dblue"] = "\033[2;34m" - - self._print_colors["lgreen"] = "\033[1;32m" # all is normal - self._print_colors["green"] = "\033[92m" # something else - self._print_colors["dgreen"] = "\033[2;32m" # even more interesting - - self._print_colors["lmagenta"] = "\033[1;35m" - self._print_colors["magenta"] = "\033[95m" # for title - self._print_colors["dmagenta"] = "\033[2;35m" - - self._print_colors["cyan"] = "\033[96m" # system time - self._print_colors["white"] = "\033[97m" # final time - self._print_colors["black"] = "\033[0;30m" - - self._print_colors["reset"] = "\033[0m" - self._print_colors["bold"] = "\033[1m" - self._print_colors["under"] = "\033[4m" - def _get_dtime(self) -> str: """ Get the current date and time as a formatted string. @@ -1931,38 +1892,6 @@ def _get_dtime(self) -> str: dtime = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") return dtime - def cprint(self, type_info: str, bc_color: str, text: str) -> None: - """ - Print colored text with additional information. - - Parameters - ---------- - type_info : str - The type of message to display. - bc_color : str - The color to use for the message text. - text : str - The text to display. - - Returns - ------- - None - The colored message is displayed on the standard output stream. - """ - host_name = socket.gethostname().split(".")[0][:10] - - print( - self._print_colors["green"] - + self._get_dtime() - + self._print_colors["reset"], - self._print_colors["magenta"] + host_name + self._print_colors["reset"], - self._print_colors["bold"] - + self._print_colors["grey"] - + type_info - + self._print_colors["reset"], - self._print_colors[bc_color] + text + self._print_colors["reset"], - ) - def update_progress( self, progress: float | int, diff --git a/mapreader/classify/datasets.py b/mapreader/classify/datasets.py index 9535bcf6..48717012 100644 --- a/mapreader/classify/datasets.py +++ b/mapreader/classify/datasets.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from __future__ import annotations +import logging import os from ast import literal_eval from itertools import product @@ -13,14 +14,18 @@ from torch.utils.data import DataLoader, Dataset from torchvision import transforms +# Set up logging +logger = logging.getLogger(__name__) + + # Import parhugin try: from parhugin import multiFunc parhugin_installed = True except ImportError: - print( - "[WARNING] parhugin (https://github.com/kasra-hosseini/parhugin) is not installed, continue without it." # noqa + logger.warning( + "parhugin (https://github.com/kasra-hosseini/parhugin) is not installed, continue without it." # noqa ) parhugin_installed = False @@ -104,7 +109,7 @@ def __init__( elif isinstance(patch_df, str): if os.path.isfile(patch_df): - print(f'[INFO] Reading "{patch_df}".') + logger.info(f'Reading "{patch_df}".') patch_df = pd.read_csv(patch_df, sep=delimiter) # ensure tuple/list columns are read as such patch_df = self._eval_df(patch_df) @@ -141,8 +146,8 @@ def __init__( if self.label_index_col: if self.label_index_col not in self.patch_df.columns: if self.label_col: - print( - f"[INFO] Label index column ({label_index_col}) not in dataframe. Creating column." + logger.info( + f"Label index column ({label_index_col}) not in dataframe. Creating column." ) self.patch_df[self.label_index_col] = self.patch_df[ self.label_col @@ -462,7 +467,7 @@ def __init__( elif isinstance(patch_df, str): if os.path.isfile(patch_df): - print(f'[INFO] Reading "{patch_df}".') + logger.info(f'Reading "{patch_df}".') patch_df = pd.read_csv(patch_df, sep=delimiter) patch_df = self._eval_df(patch_df) self.patch_df = patch_df @@ -512,8 +517,8 @@ def __init__( if self.label_index_col: if self.label_index_col not in self.patch_df.columns: - print( - f"[INFO] Label index column ({label_index_col}) not in dataframe. Creating column." + logger.info( + f"Label index column ({label_index_col}) not in dataframe. Creating column." ) self.patch_df[self.label_index_col] = self.patch_df[ self.label_col @@ -581,7 +586,7 @@ def save_context( ] ) - print(f"Total number of jobs: {len(list_jobs)}") + logger.info(f"Total number of jobs: {len(list_jobs)}") # and then adding them to my_proc my_proc.add_list_jobs(list_jobs) my_proc.run_jobs() diff --git a/mapreader/classify/load_annotations.py b/mapreader/classify/load_annotations.py index 6292b211..46b21301 100644 --- a/mapreader/classify/load_annotations.py +++ b/mapreader/classify/load_annotations.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from __future__ import annotations +import logging import os from decimal import Decimal from typing import Callable @@ -16,6 +17,9 @@ from .datasets import PatchContextDataset, PatchDataset +# Set up logging +logger = logging.getLogger(__name__) + class AnnotationsLoader: def __init__(self): @@ -85,16 +89,16 @@ def load( if not self.patch_paths_col: self.patch_paths_col = patch_paths_col elif self.patch_paths_col != patch_paths_col: - print( - f'[WARNING] Patch paths column was previously "{self.patch_paths_col}, but will now be set to {patch_paths_col}.' + logger.warning( + f'Patch paths column was previously "{self.patch_paths_col}, but will now be set to {patch_paths_col}.' ) self.patch_paths_col = patch_paths_col if not self.label_col: self.label_col = label_col elif self.label_col != label_col: - print( - f'[WARNING] Label column was previously "{self.label_col}, but will now be set to {label_col}.' + logger.warning( + f'Label column was previously "{self.label_col}, but will now be set to {label_col}.' ) self.label_col = label_col @@ -169,7 +173,7 @@ def _load_annotations_csv( """ if os.path.isfile(annotations): - print(f'[INFO] Reading "{annotations}"') + logger.info(f'Reading "{annotations}"') annotations = pd.read_csv(annotations, sep=delimiter, index_col=0) else: raise ValueError(f'[ERROR] "{annotations}" cannot be found.') @@ -218,9 +222,9 @@ def _check_patch_paths( for broken_path in broken_paths: f.write(f"{broken_path}\n") - print( - f"[WARNING] {len(broken_paths)} files cannot be found.\n\ -Check '{os.path.abspath('broken_paths.txt')}' for more details and, if possible, update your file paths using the 'images_dir' argument." + logger.warning(f"{len(broken_paths)} files cannot be found.") + logger.warning( + f"Check '{os.path.abspath('broken_paths.txt')}' for more details and, if possible, update your file paths using the 'images_dir' argument." ) if remove_broken: @@ -230,15 +234,15 @@ def _check_patch_paths( Please check your files exist and, if possible, update your file paths using the 'images_dir' argument." ) else: - print( - f"[INFO] Annotations with broken file paths have been removed.\n\ -Number of annotations remaining: {len(self.annotations)}" + logger.info("Annotations with broken file paths have been removed.") + logger.info( + f"Number of annotations remaining: {len(self.annotations)}" ) else: # raise error for 'remove_broken=False' if ignore_broken: - print( - f"[WARNING] Continuing with {len(broken_paths)} broken file paths." + logger.warning( + f"Continuing with {len(broken_paths)} broken file paths." ) else: raise ValueError( @@ -288,7 +292,7 @@ def print_unique_labels(self) -> None: if len(self.annotations) == 0: raise ValueError("[ERROR] No annotations loaded.") - print(f"[INFO] Unique labels: {self.unique_labels}") + logger.info(f"Unique labels: {self.unique_labels}") def review_labels( self, @@ -367,9 +371,9 @@ def review_labels( image_idx = 0 while image_idx < len(annots2review): - print('[INFO] Type "exit", "end" or "stop" to exit.') - print( - f"[INFO] Showing {image_idx}-{image_idx+chunks} out of {len(annots2review)}." # noqa + logger.info('Type "exit", "end" or "stop" to exit.') + logger.info( + f"Showing {image_idx}-{image_idx+chunks} out of {len(annots2review)}." # noqa ) plt.figure(figsize=(num_cols * 3, (chunks // num_cols) * 3)) counter = 1 @@ -406,7 +410,7 @@ def review_labels( image_idx += 1 plt.show() - print(f"[INFO] IDs of current patches: {iter_ids}") + logger.info(f"IDs of current patches: {iter_ids}") q = "\nEnter IDs, comma separated (or press enter to continue): " user_input_ids = input(q) @@ -417,13 +421,13 @@ def review_labels( "stop", ]: list_input_ids = user_input_ids.split(",") - print( - f"[INFO] Options for labels:{list(self.annotations[self.label_col].unique())}" + logger.info( + f"Options for labels:{list(self.annotations[self.label_col].unique())}" ) input_label = input("Enter new label: ") if input_label not in list(self.annotations[self.label_col].unique()): - print( - f'[ERROR] Label "{input_label}" not found in the annotations. Please enter a valid label.' + logger.error( + f'Label "{input_label}" not found in the annotations. Please enter a valid label.' ) continue @@ -443,8 +447,8 @@ def review_labels( self.annotations[self.label_col].value_counts().tolist() == self.annotations["label_index"].value_counts().tolist() ) - print( - f'[INFO] Image {input_id} has been relabelled as "{input_label}"' + logger.info( + f'Image {input_id} has been relabelled as "{input_label}"' ) user_input_ids = input(q) @@ -452,7 +456,7 @@ def review_labels( if user_input_ids.lower() in ["exit", "end", "stop"]: break - print("[INFO] Exited.") + logger.info("Exited.") def show_sample(self, label_to_show: str, num_samples: int | None = 9) -> None: """Show a random sample of images with the specified label (tar_label). @@ -629,9 +633,9 @@ def create_datasets( self.datasets = datasets self.dataset_sizes = dataset_sizes - print("[INFO] Number of annotations in each set:") + logger.info("Number of annotations in each set:") for set_name in datasets.keys(): - print(f" - {set_name}: {dataset_sizes[set_name]}") + logger.info(f" - {set_name}: {dataset_sizes[set_name]}") def create_patch_datasets( self, train_transform, val_transform, test_transform, df_train, df_val, df_test @@ -752,8 +756,8 @@ def create_dataloaders( ``sampler`` will only be applied to the training dataset (datasets["train"]). """ if not self.datasets: - print( - "[INFO] Creating datasets using default train/val/test split of 0.7:0.15:0.15 and default transformations." + logger.info( + "Creating datasets using default train/val/test split of 0.7:0.15:0.15 and default transformations." ) self.create_datasets() @@ -761,7 +765,7 @@ def create_dataloaders( if isinstance(sampler, str): if sampler == "default": - print("[INFO] Using default sampler.") + logger.info("Using default sampler.") sampler = self._define_sampler() else: raise ValueError( @@ -769,7 +773,7 @@ def create_dataloaders( ) if sampler and shuffle: - print("[INFO] ``sampler`` is defined so train dataset will be un-shuffled.") + logger.info("``sampler`` is defined so train dataset will be un-shuffled.") dataloaders = { set_name: DataLoader( diff --git a/mapreader/download/downloader.py b/mapreader/download/downloader.py index 0005fb71..14779331 100644 --- a/mapreader/download/downloader.py +++ b/mapreader/download/downloader.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import os import shutil import urllib @@ -12,6 +13,9 @@ from .tile_loading import DEFAULT_TEMP_FOLDER, TileDownloader from .tile_merging import TileMerger +# Set up logging +logger = logging.getLogger(__name__) + class Downloader: """ @@ -43,8 +47,7 @@ def __init__( self.download_url = my_ts def __str__(self) -> str: - info = f"[INFO] Downloading from {self.download_url}." - return info + return f"[INFO] Downloading from {self.download_url}." def _initialise_downloader(self): """ @@ -83,8 +86,8 @@ def _check_map_exists(self, grid_bb: GridBoundingBox, map_name: str | None) -> b map_name = self.merger._get_output_name(grid_bb) path_save = self.merger.output_folder if os.path.exists(f"{path_save}{map_name}.png"): - print( - f'[INFO] "{path_save}{map_name}.png" already exists. Skipping download.' + logger.info( + f'"{path_save}{map_name}.png" already exists. Skipping download.' ) return True return False @@ -145,9 +148,9 @@ def _download_map( self.downloader.download_tiles(grid_bb, download_in_parallel=False) success = self.merger.merge(grid_bb, map_name) if success: - print(f'[INFO] Downloaded "{map_name}.png"') + logger.info(f'Downloaded "{map_name}.png"') else: - print(f'[WARNING] Download of "{map_name}.png" was unsuccessful.') + logger.warning(f'Download of "{map_name}.png" was unsuccessful.') shutil.rmtree(DEFAULT_TEMP_FOLDER) return success diff --git a/mapreader/download/sheet_downloader.py b/mapreader/download/sheet_downloader.py index 3a852bba..063124cc 100644 --- a/mapreader/download/sheet_downloader.py +++ b/mapreader/download/sheet_downloader.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import logging import os import re import shutil @@ -23,6 +24,9 @@ from .tile_loading import DEFAULT_TEMP_FOLDER, TileDownloader from .tile_merging import TileMerger +# Set up logging +logger = logging.getLogger(__name__) + class SheetDownloader: """ @@ -61,7 +65,7 @@ def __init__( with open(metadata_path) as f: self.metadata = json.load(f) self.features = self.metadata["features"] - print(self.__str__()) + logger.info(self.__str__()) else: raise ValueError("[ERROR] Metadata file not found.") @@ -81,8 +85,7 @@ def __init__( self.crs = crs_string.to_string() def __str__(self) -> str: - info = f"[INFO] Metadata file has {self.__len__()} item(s)." - return info + return f"[INFO] Metadata file has {self.__len__()} item(s)." def __len__(self) -> int: return len(self.features) @@ -95,7 +98,9 @@ def get_polygons(self) -> None: polygon = shape(feature["geometry"]) map_name = feature["properties"]["IMAGE"] if len(polygon.geoms) != 1: - f"[WARNING] Multiple geometries found in map {map_name}. Using first instance." + logger.warning( + f"Multiple geometries found in map {map_name}. Using first instance." + ) feature["polygon"] = polygon.geoms[0] self.polygons = True @@ -178,7 +183,7 @@ def extract_published_dates( ) from err if published_date == "": # missing date is fine - print(f"[WARNING] No published date detected in {map_name}.") + logger.warning(f"No published date detected in {map_name}.") feature["properties"]["published_date"] = [] else: @@ -200,14 +205,14 @@ def extract_published_dates( if len(published_date) > 0: # if date is found if len(published_date) > 1: - print( - f"[WARNING] Multiple published dates detected in map {map_name}. Using first date." + logger.warning( + f"Multiple published dates detected in map {map_name}. Using first date." ) feature["properties"]["published_date"] = int(published_date[0]) else: - print(f"[WARNING] No published date detected in map {map_name}.") + logger.warning(f"No published date detected in map {map_name}.") feature["properties"]["published_date"] = [] self.published_dates = True @@ -227,16 +232,17 @@ def get_merged_polygon(self) -> None: def get_minmax_latlon(self) -> None: """ - Prints minimum and maximum latitudes and longitudes of all maps in metadata. + Returns minimum and maximum latitudes and longitudes of all maps in metadata. """ if self.merged_polygon is None: self.get_merged_polygon() min_x, min_y, max_x, max_y = self.merged_polygon.bounds - print( - f"[INFO] Min lat: {min_y}, max lat: {max_y} \n\ -[INFO] Min lon: {min_x}, max lon: {max_x}" - ) + logger.error("HELLO ERROR") + logger.warning("HELLO WARNING") + logger.info(f"Min lat: {min_y}, max lat: {max_y}") + logger.info(f"Min lon: {min_x}, max lon: {max_x}") + return (min_x, min_y, max_x, max_y) ## queries def query_map_sheets_by_wfs_ids( @@ -506,16 +512,16 @@ def print_found_queries(self) -> None: self.get_polygons() if len(self.found_queries) == 0: - print("[INFO] No query results found/saved.") + logger.info("No query results found/saved.") else: divider = 14 * "=" - print(f"{divider}\nQuery results:\n{divider}") + logger.info(f"{divider}\nQuery results:\n{divider}") for feature in self.found_queries: map_url = feature["properties"]["IMAGEURL"] map_bounds = feature["polygon"].bounds - print(f"URL: \t{map_url}") - print(f"coordinates (bounds): \t{map_bounds}") - print(20 * "-") + logger.info(f"URL: \t{map_url}") + logger.info(f"coordinates (bounds): \t{map_bounds}") + logger.info(20 * "-") ## download def _initialise_downloader(self): @@ -618,9 +624,9 @@ def _download_map( ) if img_path is not False: - print(f'[INFO] Downloaded "{img_path}"') + logger.info(f'Downloaded "{img_path}"') else: - print(f'[WARNING] Download of "{img_path}" was unsuccessful.') + logger.warning(f'Download of "{img_path}" was unsuccessful.') shutil.rmtree(DEFAULT_TEMP_FOLDER) return img_path @@ -787,7 +793,7 @@ def _download_map_sheets( if ( not overwrite and existing_id is not False ): # if map already exists and overwrite is False then skip - print(f'[INFO] "{existing_id}" already exists. Skipping download.') + logger.info(f'"{existing_id}" already exists. Skipping download.') continue img_path = self._download_map( feature, @@ -1298,8 +1304,8 @@ def plot_features_on_map( Whether to add an ID (WFS ID number) to each map sheet, by default True. """ if self.crs != "EPSG:4326": - print( - "[WARNING] This method assumes your coordinates are projected using EPSG 4326. The plot may therefore be incorrect." + logger.warning( + "This method assumes your coordinates are projected using EPSG 4326. The plot may therefore be incorrect." ) if add_id: @@ -1350,8 +1356,8 @@ def plot_features_on_map( ) except ImportError: - print( - "[WARNING] Cartopy is not installed. \ + logger.warning( + "Cartopy is not installed. \ If you would like to install it, please follow instructions at https://scitools.org.uk/cartopy/docs/latest/installing.html" ) diff --git a/mapreader/download/tile_loading.py b/mapreader/download/tile_loading.py index 40f257e8..dc1c3c8a 100644 --- a/mapreader/download/tile_loading.py +++ b/mapreader/download/tile_loading.py @@ -10,8 +10,10 @@ from .data_structures import GridBoundingBox, GridIndex +# Set up logging logger = logging.getLogger(__name__) +# Default values (constants) DEFAULT_TEMP_FOLDER = "_tile_cache/" # must end with a "/" DEFAULT_IMG_DOWNLOAD_FORMAT = "png" @@ -212,4 +214,4 @@ def _trigger_download(url: str, file_path: str): f.write(data) except: - print(f"[WARNING] {url} not found.") + logger.warning(f"{url} not found.") diff --git a/mapreader/download/tile_merging.py b/mapreader/download/tile_merging.py index 461e5b78..ddbef46d 100644 --- a/mapreader/download/tile_merging.py +++ b/mapreader/download/tile_merging.py @@ -10,8 +10,10 @@ from .data_structures import GridBoundingBox, GridIndex from .tile_loading import DEFAULT_IMG_DOWNLOAD_FORMAT, DEFAULT_TEMP_FOLDER +# Set up logging logger = logging.getLogger(__name__) +# Default values (constants) DEFAULT_OUT_FOLDER = "./" DEFAULT_IMG_STORE_FORMAT = ("png", "PNG") diff --git a/mapreader/load/geo_utils.py b/mapreader/load/geo_utils.py index 47f3c888..7c67c741 100644 --- a/mapreader/load/geo_utils.py +++ b/mapreader/load/geo_utils.py @@ -1,11 +1,16 @@ #!/usr/bin/env python from __future__ import annotations +import logging + import numpy as np import rasterio from geopy.distance import geodesic, great_circle from pyproj import Transformer +# Set up logging +logger = logging.getLogger(__name__) + def extractGeoInfo(image_path): """Extract geographic information (shape, CRS and coordinates) from GeoTiff files @@ -33,8 +38,9 @@ def extractGeoInfo(image_path): tiff_proj = tiff_src.crs.to_string() tiff_coord = tuple(tiff_src.bounds) - print(f"[INFO] Shape: {tiff_shape}. \n[INFO] CRS: {tiff_proj}.") - print("[INFO] Coordinates: {:.4f} {:.4f} {:.4f} {:.4f}".format(*tiff_coord)) + logger.info(f"Shape: {tiff_shape}.") + logger.info(f"CRS: {tiff_proj}.") + logger.info("Coordinates: {:.4f} {:.4f} {:.4f} {:.4f}".format(*tiff_coord)) return tiff_shape, tiff_proj, tiff_coord @@ -61,8 +67,8 @@ def reproject_geo_info(image_path, target_crs="EPSG:4326", calc_size_in_m=False) # Coordinate transformation: proj1 ---> proj2 transformer = Transformer.from_crs(tiff_proj, target_crs, always_xy=True) coord = transformer.transform_bounds(*tiff_coord) - print(f"[INFO] New CRS: {target_crs}") - print("[INFO] Reprojected coordinates: {:.4f} {:.4f} {:.4f} {:.4f}".format(*coord)) + logger.info(f"New CRS: {target_crs}") + logger.info("Reprojected coordinates: {:.4f} {:.4f} {:.4f} {:.4f}".format(*coord)) height, width, _ = tiff_shape @@ -91,10 +97,10 @@ def reproject_geo_info(image_path, target_crs="EPSG:4326", calc_size_in_m=False) mean_pixel_height = np.mean([right / height, left / height]) mean_pixel_width = np.mean([bottom / width, top / width]) - print( - f"[INFO] Size in meters of left/bottom/right/top: {left:.2f}/{bottom:.2f}/{right:.2f}/{top:.2f}" + logger.info( + f"Size in meters of left/bottom/right/top: {left:.2f}/{bottom:.2f}/{right:.2f}/{top:.2f}" ) - print( + logger.info( f"Each pixel is ~{mean_pixel_height:.3f} X {mean_pixel_width:.3f} meters (height x width)." ) # noqa diff --git a/mapreader/load/images.py b/mapreader/load/images.py index 513db9d1..574459ca 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -5,6 +5,7 @@ except ImportError: pass +import logging import os import random import re @@ -37,6 +38,9 @@ # Ignore warnings warnings.filterwarnings("ignore") +# Set up logging +logger = logging.getLogger(__name__) + class MapImages: """ @@ -196,8 +200,8 @@ def _images_constructor( if tree_level == "parent": if parent_path: - print( - "[WARNING] Ignoring `parent_path` as `tree_level` is set to 'parent'." + logger.warning( + "Ignoring `parent_path` as `tree_level` is set to 'parent'." ) parent_path = None parent_id = None @@ -378,8 +382,8 @@ def add_metadata( if "name" in columns: image_id_col = "name" if "image_id" in columns: - print( - "[WARNING] Both 'name' and 'image_id' columns exist! Using 'name' as index" # noqa + logger.warning( + "Both 'name' and 'image_id' columns exist! Using 'name' as index" # noqa ) elif "image_id" in columns: image_id_col = "image_id" @@ -389,8 +393,8 @@ def add_metadata( ) if any(metadata_df.duplicated(subset=image_id_col)): - print( - "[WARNING] Duplicates found in metadata. Keeping only first instance of each duplicated value" + logger.warning( + "Duplicates found in metadata. Keeping only first instance of each duplicated value" ) metadata_df.drop_duplicates(subset=image_id_col, inplace=True, keep="first") @@ -511,21 +515,21 @@ def add_shape(self, tree_level: str | None = "parent") -> None: The method runs :meth:`mapreader.load.images.MapImages._add_shape_id` for each image present at the ``tree_level`` provided. """ - print(f"[INFO] Add shape, tree level: {tree_level}") + logger.info(f"Add shape, tree level: {tree_level}") image_ids = list(self.images[tree_level].keys()) for image_id in image_ids: self._add_shape_id(image_id=image_id) def add_coords_from_grid_bb(self, verbose: bool = False) -> None: - print("[INFO] Adding coordinates, tree level: parent") + logger.info("Adding coordinates, tree level: parent") parent_list = self.list_parents() for parent_id in parent_list: if "grid_bb" not in self.parents[parent_id].keys(): - print( - f"[WARNING] No grid bounding box found for {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa + logger.warning( + f"No grid bounding box found for {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa ) continue @@ -552,14 +556,14 @@ def add_coord_increments(self, verbose: bool | None = False) -> None: pixel-wise delta longitude (``dlon``) and delta latitude (``dlat``) for the image and adds the data to it. """ - print("[INFO] Add coord-increments, tree level: parent") + logger.info("Add coord-increments, tree level: parent") parent_list = self.list_parents() for parent_id in parent_list: if "coordinates" not in self.parents[parent_id].keys(): - print( - f"[WARNING] No coordinates found for {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa + logger.warning( + f"No coordinates found for {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa ) continue @@ -619,7 +623,7 @@ def add_center_coord( central longitude and latitude (``center_lon`` and ``center_lat``) for the image and adds the data to it. """ - print(f"[INFO] Add center coordinates, tree level: {tree_level}") + logger.info(f"Add center coordinates, tree level: {tree_level}") image_ids = list(self.images[tree_level].keys()) @@ -629,8 +633,8 @@ def add_center_coord( for image_id in image_ids: if tree_level == "parent": if "coordinates" not in self.parents[image_id].keys(): - print( - f"[WARNING] 'coordinates' could not be found in {image_id}. Suggestion: run add_metadata or add_geo_info" # noqa + logger.warning( + f"'coordinates' could not be found in {image_id}. Suggestion: run add_metadata or add_geo_info" # noqa ) continue @@ -639,8 +643,8 @@ def add_center_coord( if "coordinates" not in self.parents[parent_id].keys(): if parent_id not in already_checked_parent_ids: - print( - f"[WARNING] 'coordinates' could not be found in {parent_id} so center coordinates cannot be calculated for it's patches. Suggestion: run add_metadata or add_geo_info." # noqa + logger.warning( + f"'coordinates' could not be found in {parent_id} so center coordinates cannot be calculated for it's patches. Suggestion: run add_metadata or add_geo_info." # noqa ) already_checked_parent_ids.append(parent_id) continue @@ -758,9 +762,10 @@ def _add_coord_increments_id( """ if "coordinates" not in self.parents[image_id].keys(): - self._print_if_verbose( - f"[WARNING]'coordinates' could not be found in {image_id}. Suggestion: run add_metadata or add_geo_info.", + self._log_if_verbose( + f"'coordinates' could not be found in {image_id}. Suggestion: run add_metadata or add_geo_info.", verbose, + type="warning", ) return @@ -797,9 +802,10 @@ def _add_patch_coords_id(self, image_id: str, verbose: bool = False) -> None: parent_id = self.patches[image_id]["parent_id"] if "coordinates" not in self.parents[parent_id].keys(): - self._print_if_verbose( - f"[WARNING] No coordinates found in {parent_id} (parent of {image_id}). Suggestion: run add_metadata or add_geo_info.", + self._log_if_verbose( + f"No coordinates found in {parent_id} (parent of {image_id}). Suggestion: run add_metadata or add_geo_info.", verbose, + type="warning", ) return @@ -873,9 +879,10 @@ def _add_center_coord_id( if "coordinates" not in self.images[tree_level][image_id].keys(): if tree_level == "parent": - self._print_if_verbose( - f"[WARNING] No coordinates found for {image_id}. Suggestion: run add_metadata or add_geo_info.", + self._log_if_verbose( + f"No coordinates found for {image_id}. Suggestion: run add_metadata or add_geo_info.", verbose, + type="warning", ) return @@ -883,8 +890,8 @@ def _add_center_coord_id( self._add_patch_coords_id(image_id, verbose) if "coordinates" in self.images[tree_level][image_id].keys(): - self._print_if_verbose( - f"[INFO] Reading 'coordinates' from {image_id}.", verbose + self._log_if_verbose( + f"Reading 'coordinates' from {image_id}.", verbose, type="info" ) min_x, min_y, max_x, max_y = self.images[tree_level][image_id][ @@ -933,8 +940,8 @@ def _calc_pixel_height_width( """ if "coordinates" not in self.parents[parent_id].keys(): - print( - f"[WARNING] 'coordinates' could not be found in {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa + logger.warning( + f"'coordinates' could not be found in {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa ) return @@ -967,13 +974,15 @@ def _calc_pixel_height_width( mean_pixel_height = np.mean([right / height, left / height]) mean_pixel_width = np.mean([bottom / width, top / width]) - self._print_if_verbose( - f"[INFO] Size in meters of left/bottom/right/top: {left:.2f}/{bottom:.2f}/{right:.2f}/{top:.2f}", + self._log_if_verbose( + f"Size in meters of left/bottom/right/top: {left:.2f}/{bottom:.2f}/{right:.2f}/{top:.2f}", verbose, + type="info", ) - self._print_if_verbose( + self._log_if_verbose( f"Each pixel is ~{mean_pixel_height:.3f} X {mean_pixel_width:.3f} meters (height x width).", verbose, + type="info", ) # noqa return size_in_m, mean_pixel_height, mean_pixel_width @@ -1039,17 +1048,21 @@ def patchify_all( if path_save is None: path_save = f"patches_{patch_size}_{method}" - print(f'[INFO] Saving patches in directory named "{path_save}".') + logger.info(f'Saving patches in directory named "{path_save}".') for image_id in tqdm(image_ids): image_path = self.images[tree_level][image_id]["image_path"] try: - full_path = print(os.path.relpath(image_path)) + full_path = logger.info( + os.path.relpath(image_path) + ) # TODO: This looks like it won't work except ValueError: # if no rel path (e.g. mounted on different drives) - full_path = print(os.path.abspath(image_path)) + full_path = logger.info( + os.path.abspath(image_path) + ) # TODO: This looks like it won't work - self._print_if_verbose(f"[INFO] Patchifying {full_path}", verbose) + self._log_if_verbose(f"Patchifying {full_path}", verbose, type="info") # make sure the dir exists self._make_dir(path_save) @@ -1066,8 +1079,8 @@ def patchify_all( ) ## check this is correct - should patch be different size in x and y? if square_cuts: - print( - "[WARNING] Square cuts is deprecated as of version 1.1.3 and will soon be removed." + logger.warning( + "Square cuts is deprecated as of version 1.1.3 and will soon be removed." ) self._patchify_by_pixel_square( @@ -1159,8 +1172,8 @@ def _patchify_by_pixel( patch_path = os.path.abspath(patch_path) if os.path.isfile(patch_path) and not rewrite: - self._print_if_verbose( - f"[INFO] File already exists: {patch_path}.", verbose + self._log_if_verbose( + f"File already exists: {patch_path}.", verbose, type="info" ) else: @@ -1261,14 +1274,15 @@ def _patchify_by_pixel_square( patch_path = os.path.abspath(patch_path) if os.path.isfile(patch_path) and not rewrite: - self._print_if_verbose( - f"[INFO] File already exists: {patch_path}.", verbose + self._log_if_verbose( + f"File already exists: {patch_path}.", verbose, type="info" ) else: - self._print_if_verbose( - f'[INFO] Creating "{patch_id}". Number of pixels in x,y: {max_x - min_x},{max_y - min_y}.', + self._log_if_verbose( + f'Creating "{patch_id}". Number of pixels in x,y: {max_x - min_x},{max_y - min_y}.', verbose, + type="info", ) patch = img.crop((min_x, min_y, max_x, max_y)) @@ -1380,13 +1394,14 @@ def calc_pixel_stats( parent_ids = [parent_id] for parent_id in tqdm(parent_ids): - self._print_if_verbose( - f"\n[INFO] Calculating pixel stats for patches of image: {parent_id}", + self._log_if_verbose( + f"\nCalculating pixel stats for patches of image: {parent_id}", verbose, + type="info", ) if "patches" not in self.parents[parent_id]: - print(f"[WARNING] No patches found for: {parent_id}") + logger.warning(f"No patches found for: {parent_id}") continue list_patches = self.parents[parent_id]["patches"] @@ -1468,14 +1483,14 @@ def convert_images( if save: if save_format == "csv": parent_df.to_csv("parent_df.csv", sep=delimiter) - print('[INFO] Saved parent dataframe as "parent_df.csv"') + logger.info('Saved parent dataframe as "parent_df.csv"') patch_df.to_csv("patch_df.csv", sep=delimiter) - print('[INFO] Saved patch dataframe as "patch_df.csv"') + logger.info('Saved patch dataframe as "patch_df.csv"') elif save_format in ["excel", "xlsx"]: parent_df.to_excel("parent_df.xlsx") - print('[INFO] Saved parent dataframe as "parent_df.xlsx"') + logger.info('Saved parent dataframe as "parent_df.xlsx"') patch_df.to_excel("patch_df.xlsx") - print('[INFO] Saved patch dataframe as "patch_df.xslx"') + logger.info('Saved patch dataframe as "patch_df.xslx"') else: raise ValueError( @@ -1627,14 +1642,14 @@ def show( plt.imshow(img, zorder=1) if column_to_plot: - print( - "[WARNING] Values are only plotted on patches. If you'd like to plot values on all patches of a parent image, use ``show_parent`` instead." + logger.warning( + "Values are only plotted on patches. If you'd like to plot values on all patches of a parent image, use ``show_parent`` instead." ) if save_kml_dir: if "coordinates" not in self.parents[image_id].keys(): - print( - f"[WARNING] 'coordinates' could not be found in {image_id} so no KML file can be created/saved." # noqa + logger.warning( + f"'coordinates' could not be found in {image_id} so no KML file can be created/saved." # noqa ) continue else: @@ -1667,7 +1682,7 @@ def show( parent_id = self.patches[image_id].get("parent_id", None) if parent_id is None: - print(f"[WARNING] {image_id} has no parent. Skipping.") + logger.warning(f"{image_id} has no parent. Skipping.") continue if parent_id not in parent_images.keys(): @@ -1897,7 +1912,7 @@ def load_patches( for patch_file in tqdm(patch_files): if not os.path.isfile(patch_file): - print(f"[WARNING] File does not exist: {patch_file}") + logger.warning(f"File does not exist: {patch_file}") continue self._check_image_mode(patch_file) @@ -2022,7 +2037,7 @@ def load_parents( for file in tqdm(files): if not os.path.isfile(file): - print(f"[WARNING] File does not exist: {file}") + logger.warning(f"File does not exist: {file}") continue self._check_image_mode(file) @@ -2217,9 +2232,10 @@ def _add_geo_info_id( # Check whether coordinates are present if isinstance(tiff_src.crs, type(None)): - self._print_if_verbose( + self._log_if_verbose( f"No coordinates found in {image_id}. Try add_metadata instead.", verbose, + type="warning", ) # noqa return @@ -2234,12 +2250,18 @@ def _add_geo_info_id( self.parents[image_id]["crs"] = target_crs @staticmethod - def _print_if_verbose(msg: str, verbose: bool) -> None: + def _log_if_verbose(msg: str, verbose: bool, type: str) -> None: """ Print message if verbose is True. """ if verbose: - print(msg) + if type == "warning": + logger.warning(msg) + elif type == "info": + logger.info(msg) + else: + # default to info for now (we might want to change this at some point) + logger.info(msg) def _get_tree_level(self, image_id: str) -> str: """Identify tree level of an image from image_id. @@ -2323,14 +2345,15 @@ def _save_parent_as_geotiff( if os.path.isfile(f"{geotiff_path}"): if not rewrite: - self._print_if_verbose( - f"[INFO] File already exists: {geotiff_path}.", verbose + self._log_if_verbose( + f"File already exists: {geotiff_path}.", verbose, type="info" ) return - self._print_if_verbose( - f"[INFO] Creating: {geotiff_path}.", + self._log_if_verbose( + f"Creating: {geotiff_path}.", verbose, + type="info", ) if "shape" not in self.parents[parent_id].keys(): @@ -2338,7 +2361,7 @@ def _save_parent_as_geotiff( height, width, channels = self.parents[parent_id]["shape"] if "coordinates" not in self.parents[parent_id].keys(): - print(self.parents[parent_id].keys()) + logger.error(self.parents[parent_id].keys()) raise ValueError(f"[ERROR] Cannot locate coordinates for {parent_id}") coords = self.parents[parent_id]["coordinates"] @@ -2434,14 +2457,17 @@ def _save_patch_as_geotiff( if os.path.isfile(f"{geotiff_path}"): if not rewrite: - self._print_if_verbose( - f"[INFO] File already exists: {geotiff_path}.", verbose + self._log_if_verbose( + f"File already exists: {geotiff_path}.", + verbose, + type="info", ) return - self._print_if_verbose( - f"[INFO] Creating: {geotiff_path}.", + self._log_if_verbose( + f"Creating: {geotiff_path}.", verbose, + type="info", ) # get shape @@ -2508,8 +2534,8 @@ def save_patches_to_geojson( """ if os.path.isfile(geojson_fname): if not rewrite: - print( - f"[WARNING] File already exists: {geojson_fname}. Use ``rewrite=True`` to overwrite." + logger.warning( + f"File already exists: {geojson_fname}. Use ``rewrite=True`` to overwrite." ) return @@ -2577,9 +2603,9 @@ def readPatches(self, try: metadata_df = pd.read_csv(metadata) except: - print(f"[WARNING] could not find metadata file: {metadata}") # noqa + logger.warning(f"could not find metadata file: {metadata}") # noqa else: - print(f"format cannot be recognized: {metadata_fmt}") + logger.warning(f"format cannot be recognized: {metadata_fmt}") include_metadata = False if include_metadata: metadata_df['rd_index_id'] = metadata_df[metadata_index_column].apply(lambda x: os.path.basename(x)) diff --git a/mapreader/process/post_process.py b/mapreader/process/post_process.py index 16b518f0..0c390532 100644 --- a/mapreader/process/post_process.py +++ b/mapreader/process/post_process.py @@ -1,12 +1,16 @@ #!/usr/bin/env python from __future__ import annotations +import logging from ast import literal_eval from itertools import product import pandas as pd from tqdm import tqdm +# Set up logging +logger = logging.getLogger(__name__) + class PostProcessor: """A class for post-processing predictions on patches using the surrounding context. @@ -50,8 +54,8 @@ def __init__( if all( [col in patch_df.columns for col in ["min_x", "min_y", "max_x", "max_y"]] ): - print( - "[INFO] Using existing pixel bounds columns (min_x, min_y, max_x, max_y)." + logger.info( + "Using existing pixel bounds columns (min_x, min_y, max_x, max_y)." ) else: patch_df[["min_x", "min_y", "max_x", "max_y"]] = [*patch_df["pixel_bounds"]] @@ -159,10 +163,8 @@ def update_preds(self, remap: dict, conf: float = 0.7, inplace: bool = False): # add new label to labels_map if not already present (assume label index is next in sequence) for new_label in remap.values(): if new_label not in self.labels_map.values(): - print( - [ - f"[INFO] Adding {new_label} to labels_map at index {len(self.labels_map)}." - ] + logger.info( + f"Adding {new_label} to labels_map at index {len(self.labels_map)}." ) self.labels_map[len(self.labels_map)] = new_label diff --git a/mapreader/process/process.py b/mapreader/process/process.py index ab38bd26..1764c386 100644 --- a/mapreader/process/process.py +++ b/mapreader/process/process.py @@ -13,10 +13,14 @@ pass import distutils.spawn +import logging import os import subprocess from glob import glob +# Set up logging +logger = logging.getLogger(__name__) + def preprocess_all( image_paths: list[str] | str, save_preproc_dir: str, **kwds @@ -50,7 +54,7 @@ def preprocess_all( saved_paths = [] for one_path in all_paths: - print(f"Preprocessing: {one_path}") + logger.info(f"Preprocessing: {one_path}") saved_path = preprocess(one_path, save_preproc_dir, **kwds) saved_paths.append(saved_path) return saved_paths @@ -119,7 +123,7 @@ def preprocess( ) if os.path.isfile(path2save_resample): - print(f"{path2save_resample} already exists!") + logger.warning(f"{path2save_resample} already exists!") return "False" with rasterio.open(image_path) as src: @@ -173,7 +177,7 @@ def preprocess( cropped = True except Exception as e: - print(e) + logger.exception(e) # if not cropped correctly, clean up and exit if not cropped: @@ -196,7 +200,7 @@ def preprocess( subprocess.run(gdal_command, shell=True) resampled = True except Exception as e: - print(e) + logger.exception(e) if remove_reproj_file: os.remove(path2save_reproj) diff --git a/mapreader/spot_text/deepsolo_runner.py b/mapreader/spot_text/deepsolo_runner.py index 5fdb8944..0ee23cf0 100644 --- a/mapreader/spot_text/deepsolo_runner.py +++ b/mapreader/spot_text/deepsolo_runner.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import pathlib import pickle @@ -29,6 +30,9 @@ from .runner_base import Runner +# Set up logging +logger = logging.getLogger(__name__) + class DeepSoloRunner(Runner): def __init__( diff --git a/mapreader/spot_text/dptext_detr_runner.py b/mapreader/spot_text/dptext_detr_runner.py index f353c784..ecc87dd8 100644 --- a/mapreader/spot_text/dptext_detr_runner.py +++ b/mapreader/spot_text/dptext_detr_runner.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import pathlib try: @@ -28,6 +29,9 @@ from .runner_base import Runner +# Set up logging +logger = logging.getLogger(__name__) + class DPTextDETRRunner(Runner): def __init__( diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py index 616954c0..aa7b7b28 100644 --- a/mapreader/spot_text/runner_base.py +++ b/mapreader/spot_text/runner_base.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import os import pathlib from itertools import combinations @@ -13,6 +14,9 @@ from shapely import Polygon from tqdm.auto import tqdm +# Set up logging +logger = logging.getLogger(__name__) + class Runner: def __init__() -> None: @@ -324,7 +328,7 @@ def convert_to_coords( raise ValueError("[ERROR] Please provide a `parent_df`") if self.parent_predictions == {}: - print("[INFO] Converting patch pixel bounds to parent pixel bounds.") + logger.info("Converting patch pixel bounds to parent pixel bounds.") _ = self.convert_to_parent_pixel_bounds() for parent_id, prediction in self.parent_predictions.items(): diff --git a/tests/test_load/test_images.py b/tests/test_load/test_images.py index 9f7f24de..49a5f143 100644 --- a/tests/test_load/test_images.py +++ b/tests/test_load/test_images.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import os from pathlib import Path from random import randint @@ -14,6 +15,10 @@ from mapreader.load.images import MapImages +# Set up logging +logger = logging.getLogger(__name__) +logging.basicConfig(format="%(levelname)s:%(message)s") + @pytest.fixture def sample_dir(): @@ -160,7 +165,8 @@ def test_init_geotiff(sample_dir): assert isinstance(geotiffs, MapImages) -def test_init_parent_path(sample_dir, image_id, capfd): +def test_init_parent_path(sample_dir, image_id, caplog): + caplog.set_level(logging.INFO) maps = MapImages( f"{sample_dir}/{image_id}", tree_level="patch", @@ -170,11 +176,9 @@ def test_init_parent_path(sample_dir, image_id, capfd): assert len(maps.list_patches()) == 1 # without passing tree level should get warning + maps = MapImages(f"{sample_dir}/{image_id}", parent_path=f"{sample_dir}/{image_id}") - out, _ = capfd.readouterr() - assert ( - "[WARNING] Ignoring `parent_path` as `tree_level` is set to 'parent'." in out - ) + assert "Ignoring `parent_path` as `tree_level` is set to 'parent'." in caplog.text assert len(maps.list_parents()) == 1 assert len(maps.list_patches()) == 0 @@ -549,16 +553,16 @@ def test_calc_coords_from_grid_bb(sample_dir, image_id): ) -def test_calc_coords_from_grid_bb_warning(sample_dir, image_id, capfd): +def test_calc_coords_from_grid_bb_warning(sample_dir, image_id, caplog): + caplog.set_level(logging.INFO) maps = MapImages(f"{sample_dir}/{image_id}") assert all([x not in maps.parents[image_id] for x in ["coordinates", "grid_bb"]]) maps.add_coords_from_grid_bb() - out, _ = capfd.readouterr() - assert "[WARNING] No grid bounding box" in out + assert "No grid bounding box" in caplog.text assert "coordinates" not in maps.parents[image_id] -def test_calc_coords_from_grid_bb_error(sample_dir, image_id, capfd): +def test_calc_coords_from_grid_bb_error(sample_dir, image_id): maps = MapImages(f"{sample_dir}/{image_id}") maps.add_metadata( f"{sample_dir}/ts_downloaded_maps.csv", columns=["name", "grid_bb", "crs"] @@ -569,7 +573,8 @@ def test_calc_coords_from_grid_bb_error(sample_dir, image_id, capfd): maps.add_coords_from_grid_bb() -def test_coord_functions(init_maps, image_id, sample_dir, capfd): +def test_coord_functions(init_maps, image_id, sample_dir, caplog): + caplog.set_level(logging.INFO) # test for png with added metadata maps, _, patch_list = init_maps maps.add_center_coord() @@ -591,8 +596,7 @@ def test_coord_functions(init_maps, image_id, sample_dir, capfd): keys = list(tiffs.parents[image_id].keys()) tiffs.add_coord_increments() tiffs.add_center_coord(tree_level="parent") - out, _ = capfd.readouterr() - assert "[WARNING] 'coordinates' could not be found" in out + assert "'coordinates' could not be found" in caplog.text assert list(tiffs.parents[image_id].keys()) == keys @@ -652,7 +656,8 @@ def test_save_patches_as_geotiffs_grayscale(sample_dir, tmp_path): assert os.path.isfile(maps.patches[patch_id]["geotiff_path"]) -def test_save_to_geojson(init_maps, tmp_path, capfd): +def test_save_to_geojson(init_maps, tmp_path, caplog): + caplog.set_level(logging.INFO) maps, _, _ = init_maps maps.save_patches_to_geojson(geojson_fname=f"{tmp_path}/patches.geojson") assert os.path.exists(f"{tmp_path}/patches.geojson") @@ -662,8 +667,7 @@ def test_save_to_geojson(init_maps, tmp_path, capfd): assert isinstance(geo_df["geometry"][0], Polygon) maps.save_patches_to_geojson(geojson_fname=f"{tmp_path}/patches.geojson") - out, _ = capfd.readouterr() - assert "[WARNING] File already exists" in out + assert "File already exists" in caplog.text def test_save_to_geojson_missing_data(sample_dir, image_id, tmp_path): diff --git a/tests/test_sheet_downloader.py b/tests/test_sheet_downloader.py index c2b19e97..c3ce3614 100644 --- a/tests/test_sheet_downloader.py +++ b/tests/test_sheet_downloader.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import os from ast import literal_eval from pathlib import Path @@ -15,6 +16,9 @@ from mapreader.download.tile_loading import TileDownloader from mapreader.download.tile_merging import TileMerger +# Set up logging +logger = logging.getLogger(__name__) + @pytest.fixture def sample_dir(): @@ -38,7 +42,9 @@ def test_init(sheet_downloader): def test_init_errors(sample_dir): test_json = f"{sample_dir}/test_json.json" # crs changed to EPSG:3857 (note: coordinates are wrong in file) - download_url = "https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png" + download_url = ( + "https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png" + ) with pytest.raises(ValueError, match="file not found"): SheetDownloader("fake_file.json", download_url) with pytest.raises(ValueError, match="string or list of strings"): @@ -52,6 +58,11 @@ def test_get_polygons(sheet_downloader): assert (isinstance(sd.features[i]["polygon"], Polygon) for i in sd.features) +# TODO: add a test for when there are multiple geometries in one of the map's +# features, i.e. when get_polygons is run and only one of the feature's +# polygons is selected => logger warning for "Multiple geometries found in map" + + def test_get_grid_bb(sheet_downloader): sd = sheet_downloader sd.get_grid_bb() @@ -67,7 +78,9 @@ def test_get_grid_bb(sheet_downloader): def test_get_grid_bb_errors(sample_dir): test_json = f"{sample_dir}/test_json_epsg3857.json" # crs changed to EPSG:3857 (note: coordinates are wrong in file) - download_url = "https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png" + download_url = ( + "https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png" + ) sd = SheetDownloader(test_json, download_url) with pytest.raises(NotImplementedError, match="EPSG:4326"): sd.get_grid_bb() @@ -120,14 +133,12 @@ def test_get_merged_polygon(sheet_downloader): assert isinstance(sd.merged_polygon, MultiPolygon) -def test_get_minmax_latlon(sheet_downloader, capfd): +def test_get_minmax_latlon(sheet_downloader, caplog): + caplog.set_level(logging.INFO) sd = sheet_downloader sd.get_minmax_latlon() - out, _ = capfd.readouterr() - assert ( - out - == "[INFO] Min lat: 51.49344796, max lat: 54.2089733 \n[INFO] Min lon: -4.7682, max lon: -0.16093917\n" - ) + assert "Min lat: 51.49344796, max lat: 54.2089733" in caplog.text + assert "Min lon: -4.7682, max lon: -0.16093917" in caplog.text # queries @@ -469,7 +480,9 @@ def test_download_by_wfs_ids(sheet_downloader, tmp_path, mock_response): assert df.loc[1, "name"] == "map_101602038.png" -def test_download_same_image_names(sheet_downloader, tmp_path, capfd, mock_response): + +def test_download_same_image_names(sheet_downloader, tmp_path, caplog): + caplog.set_level(logging.INFO) sd = sheet_downloader sd.get_grid_bb(14) maps_path = tmp_path / "test_maps/" @@ -495,10 +508,9 @@ def test_download_same_image_names(sheet_downloader, tmp_path, capfd, mock_respo # run again, nothing should happen sd.download_map_sheets_by_wfs_ids([107, 116], maps_path, metadata_fname, force=True) - out, _ = capfd.readouterr() - assert out.endswith( - '[INFO] "map_101603986.png" already exists. Skipping download.\n[INFO] "map_101603986_1.png" already exists. Skipping download.\n' - ) + assert '"map_101603986.png" already exists. Skipping download.' in caplog.text + assert '"map_101603986_1.png" already exists. Skipping download.' in caplog.text + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) assert len(df) == 2