From 6a6c43c0b5ee54d16bf383a35929f879b123e447 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:24:07 +0100 Subject: [PATCH 01/37] :boom: Refactored conf --- melusine/__init__.py | 63 ++++++-- melusine/_config.py | 143 ++++++++++++++++++ .../conf/detectors/emergency_detector.yaml | 3 + melusine/conf/detectors/reply_detector.yaml | 3 + melusine/conf/detectors/thanks_detector.yaml | 3 + .../conf/detectors/transfer_detector.yaml | 4 + .../detectors/vacation_reply_detector.yaml | 3 + melusine/conf/global.yaml | 5 + melusine/conf/models.yaml | 4 + melusine/conf/pipelines/demo_pipeline.yaml | 23 +++ .../conf/pipelines/expeditor_pipeline.yaml | 8 + melusine/conf/pipelines/my_pipeline.yaml | 17 +++ .../conf/pipelines/pipeline_selection.yaml | 18 +++ .../pipelines/preprocessing_pipeline.yaml | 35 +++++ .../conf/pipelines/recipients_pipeline.yaml | 8 + melusine/conf/pipelines/reply_pipeline.yaml | 8 + melusine/conf/pipelines/thanks_pipeline.yaml | 8 + .../conf/pipelines/transfer_pipeline.yaml | 8 + .../pipelines/vacation_reply_pipeline.yaml | 8 + melusine/conf/processors/cleaner.yaml | 31 ++++ melusine/conf/processors/content_tagger.yaml | 9 ++ melusine/conf/processors/normalizer.yaml | 30 ++++ melusine/conf/processors/segmenter.yaml | 5 + melusine/conf/processors/text_extractor.yaml | 9 ++ melusine/conf/processors/text_flagger.yaml | 19 +++ melusine/conf/processors/tokenizer.yaml | 21 +++ .../conf/processors/tokens_extractor.yaml | 6 + .../transferred_email_processor.yaml | 8 + melusine/conf/regex/complex_regex.yaml | 17 +++ melusine/conf/regex/regex.yaml | 1 + melusine/conf/shared.yaml | 1 + 31 files changed, 513 insertions(+), 16 deletions(-) create mode 100644 melusine/_config.py create mode 100644 melusine/conf/detectors/emergency_detector.yaml create mode 100644 melusine/conf/detectors/reply_detector.yaml create mode 100644 melusine/conf/detectors/thanks_detector.yaml create mode 100644 melusine/conf/detectors/transfer_detector.yaml create mode 100644 melusine/conf/detectors/vacation_reply_detector.yaml create mode 100644 melusine/conf/global.yaml create mode 100644 melusine/conf/models.yaml create mode 100644 melusine/conf/pipelines/demo_pipeline.yaml create mode 100644 melusine/conf/pipelines/expeditor_pipeline.yaml create mode 100644 melusine/conf/pipelines/my_pipeline.yaml create mode 100644 melusine/conf/pipelines/pipeline_selection.yaml create mode 100644 melusine/conf/pipelines/preprocessing_pipeline.yaml create mode 100644 melusine/conf/pipelines/recipients_pipeline.yaml create mode 100644 melusine/conf/pipelines/reply_pipeline.yaml create mode 100644 melusine/conf/pipelines/thanks_pipeline.yaml create mode 100644 melusine/conf/pipelines/transfer_pipeline.yaml create mode 100644 melusine/conf/pipelines/vacation_reply_pipeline.yaml create mode 100644 melusine/conf/processors/cleaner.yaml create mode 100644 melusine/conf/processors/content_tagger.yaml create mode 100644 melusine/conf/processors/normalizer.yaml create mode 100644 melusine/conf/processors/segmenter.yaml create mode 100644 melusine/conf/processors/text_extractor.yaml create mode 100644 melusine/conf/processors/text_flagger.yaml create mode 100644 melusine/conf/processors/tokenizer.yaml create mode 100644 melusine/conf/processors/tokens_extractor.yaml create mode 100644 melusine/conf/processors/transferred_email_processor.yaml create mode 100644 melusine/conf/regex/complex_regex.yaml create mode 100644 melusine/conf/regex/regex.yaml create mode 100644 melusine/conf/shared.yaml diff --git a/melusine/__init__.py b/melusine/__init__.py index e0c9a87..40022fe 100644 --- a/melusine/__init__.py +++ b/melusine/__init__.py @@ -1,16 +1,47 @@ -"""Top-level package for melusine.""" - -__author__ = """Sacha Samama & Tom Stringer & Hugo Perrier""" -__email__ = ["tiphaine.fabre@maif.fr", "hperrier@quantmetry.com"] -__version__ = "2.3.6" - -from .data.data_loader import load_email_data -from .config.config import config - -__all__ = [ - "__author__", - "__email__", - "__version__", - "load_email_data", - "config", -] +""" +Top-level package. +""" +from ctypes import CDLL, cdll +from typing import Any, Optional + +import pandas as pd + +from melusine._config import config + +__all__ = ["config"] + +VERSION = (3, 0, 0) +__version__ = ".".join(map(str, VERSION)) + + +# ------------------------------- # +# MONKEY PATCH +# ------------------------------- # + +# Monkey patch for pandas DataFrame memory leaking on linux OS (pandas issue #2659) +try: + # Try executing linux malloc_trim function (release free memory) + cdll.LoadLibrary("libc.so.6") + libc: Optional[CDLL] = CDLL("libc.so.6") + if libc is not None: + libc.malloc_trim(0) +except (OSError, AttributeError): # pragma: no cover + # Incompatible OS: this monkey patch is not needed + libc = None + +# Store the standard pandas method +__std_del: Optional[Any] = getattr(pd.DataFrame, "__del__", None) + + +# Prepare a new __del__ method +def __fixed_del(self: Any) -> None: # pragma: no cover + """Override DataFrame's __del__ method: call the standard method + release free memory with malloc_trim.""" + if __std_del is not None: + __std_del(self) + if libc is not None: + libc.malloc_trim(0) + + +# Override standard pandas method if needed +if libc is not None: + pd.DataFrame.__del__ = __fixed_del diff --git a/melusine/_config.py b/melusine/_config.py new file mode 100644 index 0000000..a21458a --- /dev/null +++ b/melusine/_config.py @@ -0,0 +1,143 @@ +""" +Module which handles the package configuration. +""" +import copy +import logging +import os +from collections import UserDict +from pathlib import Path +from typing import Any, Dict, List, Optional, cast, no_type_check + +from omegaconf import OmegaConf + +logger = logging.getLogger(__name__) +CONST_ENV_MELUSINE_CONFIG_DIR = "MELUSINE_CONFIG_DIR" + + +class MelusineConfig(UserDict): + """ + The MelusineConfig class acts as a dict containing configurations. + The configurations can be changed dynamically using the switch_config function. + """ + + ENV_MELUSINE_CONFIG_DIR = "MELUSINE_CONFIG_DIR" + LOG_MESSAGE_DEFAULT_CONFIG = "Using default configurations." + LOG_MESSAGE_CONFIG_FROM_ENV_VARIABLE = f"Using config_path from env variable {ENV_MELUSINE_CONFIG_DIR}." + LOG_MESSAGE_CONFIG_PATH = "Using config_path : {config_path}." + DEFAULT_CONFIG_PATH = str(Path(__file__).parent.resolve() / "conf") + + @no_type_check + def pop(self, s: Any = None) -> None: + """ + Prevent MelusineConfig modification. + """ + raise MelusineConfigError() + + @no_type_check + def popitem(self, s: Any = None) -> None: + """ + Prevent MelusineConfig modification. + """ + raise MelusineConfigError() + + def __setitem__(self, key: str, value: Any) -> None: + """ + Prevent MelusineConfig modification. + """ + raise MelusineConfigError() + + def dict(self) -> Dict[str, Any]: + """ + Return a copy of the config dict. + """ + return copy.deepcopy(self.data) + + @staticmethod + def _load_from_path(config_path: str) -> Dict[str, Any]: + """ + Load yaml config files, merge them and return a config dict. + """ + yaml_conf_file_list = list(Path(config_path).rglob("*.yaml")) + list(Path(config_path).rglob("*.yml")) + omega_conf = OmegaConf.unsafe_merge(*[OmegaConf.load(conf_file) for conf_file in yaml_conf_file_list]) + return cast(Dict[str, Any], OmegaConf.to_object(omega_conf)) + + def reset(self, config_dict: Optional[Dict[str, Any]] = None, config_path: Optional[str] = None) -> None: + """ + Function to reset the Melusine configuration using a dict or a path. + + Parameters + ---------- + config_dict: Dict containing the new config. + config_path: path to directory containing YAML config files. + """ + config_path_from_env = os.getenv(self.ENV_MELUSINE_CONFIG_DIR) + + if not config_dict and not config_path: + if config_path_from_env: + logger.info(self.LOG_MESSAGE_CONFIG_FROM_ENV_VARIABLE) + config_path = config_path_from_env + else: + logger.warning(self.LOG_MESSAGE_DEFAULT_CONFIG) + config_dict = self._load_from_path(self.DEFAULT_CONFIG_PATH) + + if config_path: + logger.info(self.LOG_MESSAGE_CONFIG_PATH.format(config_path=config_path)) + config_dict = self._load_from_path(config_path) + + if config_dict is None: + raise MelusineConfigError() # pragma no cover + + self.data = config_dict + + def export_default_config(self, path: str) -> List[str]: + """ + Export the default Melusine configurations to a directory. + + Parameters + ---------- + path: Destination path + + Returns + ------- + _: + """ + from distutils.dir_util import copy_tree + + source = self.DEFAULT_CONFIG_PATH + file_list: List[str] = copy_tree(source, path) + + return file_list + + +# Load Melusine configuration +config = MelusineConfig() +config.reset() + + +class MelusineConfigError(Exception): + """ + Exception raised when encountering config related errors. + """ + + CONST_CONFIG_ERROR_MESSAGE = f"""To modify the config use the `reset` method: + - Using a dict: + > from melusine import config + > config.reset(config_dict=my_dict) + - Using the path to a directory containing YAML files: + > from melusine import config + > config.reset(config_path=my_config_path) + - Reset to default configurations: + > from melusine import config + > config.reset() + - Using the {MelusineConfig.ENV_MELUSINE_CONFIG_DIR} environment variable: + > import os + > os.environ["{MelusineConfig.ENV_MELUSINE_CONFIG_DIR}"] = "/path/to/config/dir" + > from melusine import config + > config.reset() + """ + + def __init__(self, msg: str = CONST_CONFIG_ERROR_MESSAGE, *args: Any) -> None: + """ + Initialize with a default error message. + """ + super().__init__(msg, *args) diff --git a/melusine/conf/detectors/emergency_detector.yaml b/melusine/conf/detectors/emergency_detector.yaml new file mode 100644 index 0000000..5c40cd8 --- /dev/null +++ b/melusine/conf/detectors/emergency_detector.yaml @@ -0,0 +1,3 @@ +emergency_detector: + name: emergency + text_column: det_normalized_last_body diff --git a/melusine/conf/detectors/reply_detector.yaml b/melusine/conf/detectors/reply_detector.yaml new file mode 100644 index 0000000..f2d3379 --- /dev/null +++ b/melusine/conf/detectors/reply_detector.yaml @@ -0,0 +1,3 @@ +reply_detector: + name: reply + header_column: det_clean_header diff --git a/melusine/conf/detectors/thanks_detector.yaml b/melusine/conf/detectors/thanks_detector.yaml new file mode 100644 index 0000000..68b4341 --- /dev/null +++ b/melusine/conf/detectors/thanks_detector.yaml @@ -0,0 +1,3 @@ +thanks_detector: + name: thanks + messages_column: messages diff --git a/melusine/conf/detectors/transfer_detector.yaml b/melusine/conf/detectors/transfer_detector.yaml new file mode 100644 index 0000000..cdd48ab --- /dev/null +++ b/melusine/conf/detectors/transfer_detector.yaml @@ -0,0 +1,4 @@ +transfer_detector: + name: transfer + header_column: det_clean_header + messages_column: messages diff --git a/melusine/conf/detectors/vacation_reply_detector.yaml b/melusine/conf/detectors/vacation_reply_detector.yaml new file mode 100644 index 0000000..32362bf --- /dev/null +++ b/melusine/conf/detectors/vacation_reply_detector.yaml @@ -0,0 +1,3 @@ +vacation_reply_detector: + name: vacation_reply + messages_column: messages diff --git a/melusine/conf/global.yaml b/melusine/conf/global.yaml new file mode 100644 index 0000000..9ecbf1f --- /dev/null +++ b/melusine/conf/global.yaml @@ -0,0 +1,5 @@ +global: + test_shared_variable: ${TEST_VAR} # For demonstration purpose +message: + str_line_length: 120 + str_tag_name_length: 22 diff --git a/melusine/conf/models.yaml b/melusine/conf/models.yaml new file mode 100644 index 0000000..077bf71 --- /dev/null +++ b/melusine/conf/models.yaml @@ -0,0 +1,4 @@ +models: + dummy_model: + inference_mode: LOCAL + version: null diff --git a/melusine/conf/pipelines/demo_pipeline.yaml b/melusine/conf/pipelines/demo_pipeline.yaml new file mode 100644 index 0000000..0b13b1f --- /dev/null +++ b/melusine/conf/pipelines/demo_pipeline.yaml @@ -0,0 +1,23 @@ +demo_pipeline: + steps: + - class_name: Cleaner + config_key: body_cleaner + module: melusine.processors + - class_name: Cleaner + config_key: header_cleaner + module: melusine.processors + - class_name: Segmenter + config_key: segmenter + module: melusine.processors + - class_name: ContentTagger + config_key: content_tagger + module: melusine.processors + - class_name: TextExtractor + config_key: text_extractor + module: melusine.processors + - class_name: Normalizer + config_key: demo_normalizer + module: melusine.processors + - class_name: EmergencyDetector + config_key: emergency_detector + module: melusine.detectors diff --git a/melusine/conf/pipelines/expeditor_pipeline.yaml b/melusine/conf/pipelines/expeditor_pipeline.yaml new file mode 100644 index 0000000..b193587 --- /dev/null +++ b/melusine/conf/pipelines/expeditor_pipeline.yaml @@ -0,0 +1,8 @@ +expeditor_pipeline: + steps: + - class_name: MelusinePipeline + config_key: preprocessing_pipeline + module: melusine.pipeline + - class_name: ExpeditorDetector + config_key: expeditor_detector + module: melusine.detectors diff --git a/melusine/conf/pipelines/my_pipeline.yaml b/melusine/conf/pipelines/my_pipeline.yaml new file mode 100644 index 0000000..e37248d --- /dev/null +++ b/melusine/conf/pipelines/my_pipeline.yaml @@ -0,0 +1,17 @@ +my_pipeline: + steps: + - class_name: MelusinePipeline + config_key: preprocessing_pipeline + module: melusine.pipeline + - class_name: ThanksDetector + config_key: thanks_detector + module: melusine.detectors + - class_name: VacationReplyDetector + config_key: vacation_reply_detector + module: melusine.detectors + - class_name: ReplyDetector + config_key: reply_detector + module: melusine.detectors + - class_name: TransferDetector + config_key: transfer_detector + module: melusine.detectors diff --git a/melusine/conf/pipelines/pipeline_selection.yaml b/melusine/conf/pipelines/pipeline_selection.yaml new file mode 100644 index 0000000..04d2ebf --- /dev/null +++ b/melusine/conf/pipelines/pipeline_selection.yaml @@ -0,0 +1,18 @@ +pipeline_selection: +- attr_1: value_1 + attr_2: value_2 + attr_3: value_3 + pipeline_conf_key: pipeline_abc +- attr_1: value_1 + attr_2: value_2 + pipeline_conf_key: pipeline_def +- attr_1: value_1 + pipeline_conf_key: pipeline_ghi +- attr_2: value_2 + pipeline_conf_key: pipeline_jkl +- attr_1: value_1 + attr_2: value_21 + pipeline_conf_key: pipeline_mno +- attr_1: value_1 + attr_3: value_3 + pipeline_conf_key: pipeline_pqr diff --git a/melusine/conf/pipelines/preprocessing_pipeline.yaml b/melusine/conf/pipelines/preprocessing_pipeline.yaml new file mode 100644 index 0000000..cec8380 --- /dev/null +++ b/melusine/conf/pipelines/preprocessing_pipeline.yaml @@ -0,0 +1,35 @@ +preprocessing_pipeline: + steps: + - class_name: Cleaner + config_key: body_cleaner + module: melusine.processors + - class_name: Cleaner + config_key: header_cleaner + module: melusine.processors + - class_name: Segmenter + config_key: segmenter + module: melusine.processors + - class_name: ContentTagger + config_key: content_tagger + module: melusine.processors + - class_name: TransferredEmailProcessor + config_key: transferred_email_processor + module: melusine.processors + - class_name: TextExtractor + config_key: text_extractor + module: melusine.processors + - class_name: DeterministicTextFlagger + config_key: text_flagger + module: melusine.processors + - class_name: DeterministicTextFlagger + config_key: header_flagger + module: melusine.processors + - class_name: RegexTokenizer + config_key: tokenizer + module: melusine.processors + - class_name: RegexTokenizer + config_key: header_tokenizer + module: melusine.processors + - class_name: TokensExtractor + config_key: tokens_extractor + module: melusine.processors diff --git a/melusine/conf/pipelines/recipients_pipeline.yaml b/melusine/conf/pipelines/recipients_pipeline.yaml new file mode 100644 index 0000000..a712efe --- /dev/null +++ b/melusine/conf/pipelines/recipients_pipeline.yaml @@ -0,0 +1,8 @@ +recipients_pipeline: + steps: + - class_name: MelusinePipeline + config_key: preprocessing_pipeline + module: melusine.pipeline + - class_name: RecipientsDetector + config_key: recipients_detector + module: melusine.detectors diff --git a/melusine/conf/pipelines/reply_pipeline.yaml b/melusine/conf/pipelines/reply_pipeline.yaml new file mode 100644 index 0000000..ef08b2e --- /dev/null +++ b/melusine/conf/pipelines/reply_pipeline.yaml @@ -0,0 +1,8 @@ +reply_pipeline: + steps: + - class_name: MelusinePipeline + config_key: preprocessing_pipeline + module: melusine.pipeline + - class_name: ReplyDetector + config_key: reply_detector + module: melusine.detectors diff --git a/melusine/conf/pipelines/thanks_pipeline.yaml b/melusine/conf/pipelines/thanks_pipeline.yaml new file mode 100644 index 0000000..957bf3f --- /dev/null +++ b/melusine/conf/pipelines/thanks_pipeline.yaml @@ -0,0 +1,8 @@ +thanks_pipeline: + steps: + - class_name: MelusinePipeline + config_key: preprocessing_pipeline + module: melusine.pipeline + - class_name: ThanksDetector + config_key: thanks_detector + module: melusine.detectors diff --git a/melusine/conf/pipelines/transfer_pipeline.yaml b/melusine/conf/pipelines/transfer_pipeline.yaml new file mode 100644 index 0000000..d2a2a3c --- /dev/null +++ b/melusine/conf/pipelines/transfer_pipeline.yaml @@ -0,0 +1,8 @@ +transfer_pipeline: + steps: + - class_name: MelusinePipeline + config_key: preprocessing_pipeline + module: melusine.pipeline + - class_name: TransferDetector + config_key: transfer_detector + module: melusine.detectors diff --git a/melusine/conf/pipelines/vacation_reply_pipeline.yaml b/melusine/conf/pipelines/vacation_reply_pipeline.yaml new file mode 100644 index 0000000..f1bab7b --- /dev/null +++ b/melusine/conf/pipelines/vacation_reply_pipeline.yaml @@ -0,0 +1,8 @@ +vacation_reply_pipeline: + steps: + - class_name: MelusinePipeline + config_key: preprocessing_pipeline + module: melusine.pipeline + - class_name: VacationReplyDetector + config_key: vacation_reply_detector + module: melusine.detectors diff --git a/melusine/conf/processors/cleaner.yaml b/melusine/conf/processors/cleaner.yaml new file mode 100644 index 0000000..c4403b7 --- /dev/null +++ b/melusine/conf/processors/cleaner.yaml @@ -0,0 +1,31 @@ +body_cleaner: + input_columns: + - body + output_columns: + - tmp_clean_body + substitutions: + "’": "'" # Replace special quotes + "œ": "oe" # Specific french combination + "\\s*[\\r\\n]+\\s*": "\\n" # Standardize newline characters + " +": " " # Remove multiple spaces + "\\n*<\\n(\\w)": " <\\1" # Undesired newlines + "(\\w)\\n>": "\\1>" # Undesired newlines + "\n ?»": " »" # Undesired newlines + "« ?\n": "« " # Undesired newlines + "\\xa0": " " # Replace non-breaking spaces + "(?m)^\\s+": "" # Remove beginning of line newline characters + "(?m)\\s+$": "" # Remove ending of line newline characters + +header_cleaner: + input_columns: + - header + output_columns: + - det_clean_header + substitutions: + "’": "'" + "œ": "oe" + "\\s*[\\r\\n]+\\s*": "\\n" + " +": " " + "<\\n(\\w)": "<\\1" + "(\\w)\\n>": "\\1>" + "\\xa0": " " diff --git a/melusine/conf/processors/content_tagger.yaml b/melusine/conf/processors/content_tagger.yaml new file mode 100644 index 0000000..14e7845 --- /dev/null +++ b/melusine/conf/processors/content_tagger.yaml @@ -0,0 +1,9 @@ +content_tagger: + default_tag: BODY + tag_list: + - PJ + - FOOTER + - THANKS + - HELLO + - SIGNATURE + - GREETINGS diff --git a/melusine/conf/processors/normalizer.yaml b/melusine/conf/processors/normalizer.yaml new file mode 100644 index 0000000..7c9a276 --- /dev/null +++ b/melusine/conf/processors/normalizer.yaml @@ -0,0 +1,30 @@ +email_normalizer: + form: NFKD + input_columns: + - messages + lowercase: true + output_columns: + - messages + +normalizer: + form: NFKD + input_columns: + - body + lowercase: false + output_columns: + - text + +header_normalizer: + form: NFKD + input_columns: + - header + lowercase: true + output_columns: + - clean_header + +demo_normalizer: + input_columns: + - det_clean_last_body + lowercase: false + output_columns: + - det_normalized_last_body diff --git a/melusine/conf/processors/segmenter.yaml b/melusine/conf/processors/segmenter.yaml new file mode 100644 index 0000000..6cd3021 --- /dev/null +++ b/melusine/conf/processors/segmenter.yaml @@ -0,0 +1,5 @@ +segmenter: + input_columns: + - tmp_clean_body + output_columns: + - messages diff --git a/melusine/conf/processors/text_extractor.yaml b/melusine/conf/processors/text_extractor.yaml new file mode 100644 index 0000000..16e1713 --- /dev/null +++ b/melusine/conf/processors/text_extractor.yaml @@ -0,0 +1,9 @@ +text_extractor: + include_tags: BODY + input_columns: + - messages + output_columns: + - det_clean_last_body + n_messages: 1 + stop_at: + - GREETINGS diff --git a/melusine/conf/processors/text_flagger.yaml b/melusine/conf/processors/text_flagger.yaml new file mode 100644 index 0000000..3ef7218 --- /dev/null +++ b/melusine/conf/processors/text_flagger.yaml @@ -0,0 +1,19 @@ +text_flagger: + input_columns: + - det_clean_last_body + output_columns: + - det_flagged_last_body + add_spaces: true + text_flags: + 'rendez[- ]vous': rendez_vous + 'ci[- ]joint': ci_joint + +header_flagger: + input_columns: + - det_clean_header + output_columns: + - det_flagged_header + add_spaces: true + text_flags: + 'rendez[- ]vous': rendez_vous + 'ci[- ]joint': ci_joint diff --git a/melusine/conf/processors/tokenizer.yaml b/melusine/conf/processors/tokenizer.yaml new file mode 100644 index 0000000..d6ba9b4 --- /dev/null +++ b/melusine/conf/processors/tokenizer.yaml @@ -0,0 +1,21 @@ +tokenizer: + input_columns: + - det_flagged_last_body + output_columns: + - ml_body_tokens + normalization_form: NFKD + stopwords: + - le + - les + tokenizer_regex: \w+(?:[\?\-\"_]\w+)* + +header_tokenizer: + input_columns: + - det_flagged_header + output_columns: + - ml_header_tokens + normalization_form: NFKD + stopwords: + - le + - les + tokenizer_regex: \w+(?:[\?\-\"_]\w+)* diff --git a/melusine/conf/processors/tokens_extractor.yaml b/melusine/conf/processors/tokens_extractor.yaml new file mode 100644 index 0000000..0d45bb8 --- /dev/null +++ b/melusine/conf/processors/tokens_extractor.yaml @@ -0,0 +1,6 @@ +tokens_extractor: + input_columns: + - ml_header_tokens + - ml_body_tokens + output_columns: + - ml_tokens diff --git a/melusine/conf/processors/transferred_email_processor.yaml b/melusine/conf/processors/transferred_email_processor.yaml new file mode 100644 index 0000000..159e72e --- /dev/null +++ b/melusine/conf/processors/transferred_email_processor.yaml @@ -0,0 +1,8 @@ +transferred_email_processor: + output_columns: + - messages + - det_original_from + tags_to_ignore: + - FOOTER + - SIGNATURE + messages_column: messages diff --git a/melusine/conf/regex/complex_regex.yaml b/melusine/conf/regex/complex_regex.yaml new file mode 100644 index 0000000..e73a715 --- /dev/null +++ b/melusine/conf/regex/complex_regex.yaml @@ -0,0 +1,17 @@ +complex_regexes: + MSG_ABSENCE: VAC_REP_HOLIDAYS or VAC_REP_OUT_OF_OFFICE + RDV: IS_RDV and not(RDV_EXCEPT) + THANKS: THANKS_MANDATORY_WORDS and not(THANKS_FORBIDDEN_WORDS) + VACATION_REPLY: > + (VAC_REP_ORDER or VAC_REP_ACKNOLEDGMENT or VAC_REP_AUTO or VAC_REP_NO_REPLY or VAC_REP_OUT_OF_OFFICE_ENG + or VAC_REP_OUT_OF_OFFICE or VAC_REP_ON_MOVE or VAC_REP_HOLIDAYS or VAC_REP_URGENCY) + and not(VAC_REP_FORBIDDEN_CO or VAC_REP_FORBIDDEN_HOME or VAC_REP_FORBIDDEN_LEAVE or VAC_REP_FORBIDDEN_SICKNESS + or VAC_REP_FORBIDDEN_EMAIL or VAC_REP_FORBIDDEN_ABSENCE or VAC_REP_FORBIDDEN_AMBIGUOUS + or VAC_REP_FORBIDDEN_EXTRA or VAC_REP_FORBIDDEN_RETURN or VAC_PREP_FORBIDDEN_FORGOT or VAC_PREP_FORBIDDEN_ACTION + or VAC_REP_FORBIDDEN_PROC or INTERROGATION_MARK) + DUMMY_CPLX: DUMMY_UNIT and not(IS_RDV) + TEST_REGEX: TEST_UNIT_REGEX_POS and not(TEST_UNIT_REGEX_NEG) + REPLACE_TEST: TEST_UNIT_REGEX or TEST_UNIT_REGEX_POS and not(TEST_UNIT_REGEX_NEG) + TRANSFER: IS_TRANSFER + REPLY: IS_REPLY + EXPEDITORS: NOREPLY or SPECIFIC_EXPEDITEUR diff --git a/melusine/conf/regex/regex.yaml b/melusine/conf/regex/regex.yaml new file mode 100644 index 0000000..ab846dc --- /dev/null +++ b/melusine/conf/regex/regex.yaml @@ -0,0 +1 @@ +regex: {} diff --git a/melusine/conf/shared.yaml b/melusine/conf/shared.yaml new file mode 100644 index 0000000..7ee3666 --- /dev/null +++ b/melusine/conf/shared.yaml @@ -0,0 +1 @@ +TEST_VAR: test # For demonstration purpose From e3b909909c42562932daf65b6f9351cfc7fceb39 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:25:03 +0100 Subject: [PATCH 02/37] :sparkles: New feature : variable dataset backend --- melusine/backend/__init__.py | 3 + melusine/backend/active_backend.py | 196 ++++++++++++++++ melusine/backend/base_backend.py | 146 ++++++++++++ melusine/backend/dict_backend.py | 169 ++++++++++++++ melusine/backend/pandas_backend.py | 360 +++++++++++++++++++++++++++++ 5 files changed, 874 insertions(+) create mode 100644 melusine/backend/__init__.py create mode 100644 melusine/backend/active_backend.py create mode 100644 melusine/backend/base_backend.py create mode 100644 melusine/backend/dict_backend.py create mode 100644 melusine/backend/pandas_backend.py diff --git a/melusine/backend/__init__.py b/melusine/backend/__init__.py new file mode 100644 index 0000000..88592e4 --- /dev/null +++ b/melusine/backend/__init__.py @@ -0,0 +1,3 @@ +from .active_backend import backend + +__all__ = ["backend"] diff --git a/melusine/backend/active_backend.py b/melusine/backend/active_backend.py new file mode 100644 index 0000000..1844b00 --- /dev/null +++ b/melusine/backend/active_backend.py @@ -0,0 +1,196 @@ +""" +Melusine transformation can operate on different data structures such as dict or pandas.DataFrame. +Different transformation backends are used to process different data structures. +The ActiveBackend class stores an instance of the activated backend. + +Implemented classes: [ + ActiveBackend, +] +""" + +import logging +from typing import Callable, List, Optional, Union + +from melusine.backend.base_backend import Any, BaseTransformerBackend +from melusine.backend.dict_backend import DictBackend + +logger = logging.getLogger(__name__) + + +class ActiveBackend(BaseTransformerBackend): + """ + Class storing the active backend used by Melusine. + """ + + PANDAS_BACKEND: str = "pandas" + DICT_BACKEND: str = "dict" + + def __init__(self) -> None: + """Init""" + super().__init__() + self._backend: Optional[BaseTransformerBackend] = None + + @property + def backend(self) -> BaseTransformerBackend: + """Backend attribute getter""" + if self._backend is None: + raise AttributeError("'_backend' attribute is None") + + else: + return self._backend + + def reset(self, new_backend: Union[BaseTransformerBackend, str] = PANDAS_BACKEND) -> None: + """ + Method to switch from current backend to specified backend. + + Parameters + ---------- + new_backend: Union[BaseTransformerBackend, str] + New backend to be used + """ + + if isinstance(new_backend, BaseTransformerBackend): + self._backend = new_backend + + elif new_backend == self.PANDAS_BACKEND: + # Importing in local scope to prevent hard dependencies + from melusine.backend.pandas_backend import PandasBackend + + self._backend = PandasBackend() + + elif new_backend == self.DICT_BACKEND: + self._backend = DictBackend() + + else: + raise ValueError(f"Backend {new_backend} is not supported") + + logger.info(f"Using backend '{new_backend}' for Data transformations") + + def apply_transform( + self, + data: Any, + func: Callable, + output_columns: Optional[List[str]] = None, + input_columns: Optional[List[str]] = None, + **kwargs: Any, + ) -> Any: + """ + Method to apply a transform on a Dataset using current backend. + + Parameters + ---------- + data: Dataset + Data to be transformed + func: Callable + Transform function to apply to the input data + output_columns: Sequence[str] + List of output columns + input_columns: Sequence[str] + List of input columns + kwargs + + Returns + ------- + _: Dataset + Transformed data + """ + return self.backend.apply_transform( + data=data, + func=func, + output_columns=output_columns, + input_columns=input_columns, + **kwargs, + ) + + def copy(self, data: Any, fields: List[str] = None) -> Any: + """ + Method to make a copy of the input dataset. + + Parameters + ---------- + data: Dataset + MelusineDataset object + fields: List[str] + List of fields to include in the copy (by default copy all fields) + + Returns + ------- + _: Dataset + Copy of original object + """ + return self.backend.copy(data, fields=fields) + + def get_fields(self, data: Any) -> List[str]: + """ + Method to get the list of fields available in the input dataset. + + Parameters + ---------- + data: Dataset + MelusineDataset object + + Returns + ------- + _: List[str] + List of dataset fields + """ + return self.backend.get_fields(data=data) + + def add_fields(self, left: Any, right: Any, fields: List[str] = None) -> Any: + """ + Method to add fields from the right object to the left object + + Parameters + ---------- + left: Dataset + MelusineDataset object + right: Dataset + Melusine Dataset object + fields: List[str] + List of fields to be added + + Returns + ------- + _: Dataset + Left object with added fields + """ + return self.backend.add_fields(left=left, right=right, fields=fields) + + def check_debug_flag(self, data: Any) -> bool: + """ + Method to check if debug_mode is activated. + + Parameters + ---------- + data: Dataset + MelusineDataset object + + Returns + ------- + _: bool + True if debug mode is activated + """ + return self.backend.check_debug_flag(data=data) + + def setup_debug_dict(self, data: Any, dict_name: str) -> Any: + """ + Method to check if debug_mode is activated + + Parameters + ---------- + data: Dataset + MelusineDataset object + dict_name: str + Name of the debug dict field to be added + + Returns + ------- + _: Dataset + MelusineDataset object + """ + return self.backend.setup_debug_dict(data=data, dict_name=dict_name) + + +# Instantiate the default backend +backend = ActiveBackend() +backend.reset() diff --git a/melusine/backend/base_backend.py b/melusine/backend/base_backend.py new file mode 100644 index 0000000..42ac450 --- /dev/null +++ b/melusine/backend/base_backend.py @@ -0,0 +1,146 @@ +""" +Melusine transformation can operate on different data structures such as dict or pandas.DataFrame. +Different transformation backends are used to process different data structures. +The BaseTransformerBackend class defines the interface for transformation backend classes. + +Implemented classes: [ + BaseTransformerBackend, +] +""" +from abc import ABC, abstractmethod +from typing import Any, Callable, List, Optional + + +class BaseTransformerBackend(ABC): + """ + Abstract base class defining how to implement a Melusine Backend. + Each backend applies transform operations on a specific type of data. + Ex: Pandas DataFrames, Dict, Spark objects, etc + """ + + DEBUG_FLAG = "debug" + + @abstractmethod + def apply_transform( + self, + data: Any, + func: Callable, + output_columns: Optional[List[str]] = None, + input_columns: Optional[List[str]] = None, + **kwargs: Any, + ) -> Any: + """ + Method to apply a transform on a Dataset using the current backend. + + Parameters + ---------- + data: Any + Data to be transformed + func: Callable + Transform function to apply to the input data + output_columns: List[str] + List of output columns + input_columns: List[str] + List of input columns + kwargs + + Returns + ------- + _: Any + Transformed data + """ + + @abstractmethod + def add_fields(self, left: Any, right: Any, fields: List[str] = None) -> Any: + """ + Method to add fields form the right object to the left object. + + Parameters + ---------- + left: Dataset + MelusineDataset object + right: Dataset + Melusine Dataset object + fields: List[str] + List of fields to be added + + Returns + ------- + _: Dataset + Left object with added fields + """ + + @abstractmethod + def copy(self, data: Any, fields: List[str] = None) -> Any: + """ + Method to make a copy of the dataset. + + Parameters + ---------- + data: Dataset + MelusineDataset object + fields: List[str] + List of fields to include in the copy (by default copy all fields) + + Returns + ------- + _: Dataset + Copy of original object + """ + + @abstractmethod + def get_fields(self, data: Any) -> List[str]: + """ + Method to get the list of fields available in the input dataset. + + Parameters + ---------- + data: Dataset + MelusineDataset object + + Returns + ------- + _: List[str] + List of dataset fields + """ + + def check_debug_flag(self, data: Any) -> bool: + """ + Method to check if debug_mode is activated. + + Parameters + ---------- + data: Dataset + MelusineDataset object + + Returns + ------- + _: bool + True if debug mode is activated + """ + if hasattr(data, self.DEBUG_FLAG): + debug_mode = getattr(data, self.DEBUG_FLAG) + elif isinstance(data, dict): + debug_mode = data.get(self.DEBUG_FLAG, False) + else: # pragma: no cover + debug_mode = False + + return debug_mode + + @abstractmethod + def setup_debug_dict(self, data: Any, dict_name: str) -> Any: + """ + Method to check if debug_mode is activated. + + Parameters + ---------- + data: Dataset + MelusineDataset object + dict_name: str + Name of the debug dict field to be added + + Returns + ------- + _: Dataset + MelusineDataset object + """ diff --git a/melusine/backend/dict_backend.py b/melusine/backend/dict_backend.py new file mode 100644 index 0000000..1c34069 --- /dev/null +++ b/melusine/backend/dict_backend.py @@ -0,0 +1,169 @@ +""" +Backend to run transforms on dict objects. + +Implemented classes: [ + DictBackend, +] +""" + +from typing import Any, Callable, Dict, List, Optional + +from melusine.backend.base_backend import BaseTransformerBackend + + +class DictBackend(BaseTransformerBackend): + """ + Backend class to operate on dict objects. + Inherits from the BaseTransformerBackend abstract class. + """ + + def apply_transform( + self, + data: Dict[str, Any], + func: Callable, + output_columns: Optional[List[str]] = None, + input_columns: Optional[List[str]] = None, + **kwargs: Any, + ) -> Dict[str, Any]: + """ + Method to apply a transform on a Dataset using the Dict backend. + + Parameters + ---------- + data: Dict[str: Any] + Data to be transformed + func: Callable + Transform function to apply to the input data + output_columns: List[str] + List of output columns + input_columns: List[str] + List of input columns + kwargs + + Returns + ------- + _: Dict[str: Any] + Transformed data + """ + if input_columns and len(input_columns) == 1: + input_column = input_columns[0] + + # Modify the entire dict + if not output_columns: + raise ValueError("DictBackend does not support single input + None output situation.") + + # Create a single new field + elif len(output_columns) == 1: + output_column = output_columns[0] + data[output_column] = func(data[input_column], **kwargs) + + # Create multiple new fields + else: + result = func(data[input_column], **kwargs) + data.update(dict(zip(output_columns, result))) + + # Use DataFrame.apply + else: + # Modify the entire dict + if not output_columns: + data = func(data, **kwargs) + + # Create a single new field + elif len(output_columns) == 1: + output_column = output_columns[0] + data[output_column] = func(data, **kwargs) + + # Create multiple new fields + else: + result = func(data, **kwargs) + data.update(dict(zip(output_columns, result))) + + return data + + def add_fields(self, left: Dict[str, Any], right: Dict[str, Any], fields: List[str] = None) -> Dict[str, Any]: + """ + Method to add fields form the right object to the left object. + + Parameters + ---------- + left: Dict[str, Any] + MelusineDataset object + right: Dict[str, Any] + Melusine Dataset object + fields: List[str] + List of fields to be added + + Returns + ------- + _: Dict[str, Any] + Left object with added fields + """ + if not fields: + fields = list(right.keys()) + + for field in fields: + left[field] = right[field] + + return left + + def copy(self, data: Dict[str, Any], fields: List[str] = None) -> Dict[str, Any]: + """ + Method to make a copy of the dataset. + + Parameters + ---------- + data: Dict[str, Any] + MelusineDataset object + fields: List[str] + List of fields to include in the copy (by default copy all fields) + + Returns + ------- + _: Dict[str, Any] + Copy of original object + """ + new_dict = dict() + + if fields is None: + fields = list(data.keys()) + + for field in fields: + new_dict[field] = data[field] + + return new_dict + + def get_fields(self, data: Dict[str, Any]) -> List[str]: + """ + Method to get the list of fields available in the input dataset. + + Parameters + ---------- + data: Dict[str, Any] + MelusineDataset object + + Returns + ------- + _: List[str] + List of dataset fields + """ + return list(data.keys()) + + def setup_debug_dict(self, data: Dict[str, Any], dict_name: str) -> Dict[str, Any]: + """ + Method to check if debug_mode is activated. + + Parameters + ---------- + data: Dict[str, Any] + MelusineDataset object + dict_name: str + Name of the debug dict field to be added + + Returns + ------- + _: Dict[str, Any] + MelusineDataset object + """ + data[dict_name] = {} + + return data diff --git a/melusine/backend/pandas_backend.py b/melusine/backend/pandas_backend.py new file mode 100644 index 0000000..267d1b3 --- /dev/null +++ b/melusine/backend/pandas_backend.py @@ -0,0 +1,360 @@ +""" +Backend to run transforms on pandas.DataFrame objects. + +Implemented classes: [ + PandasBackend, +] +""" +from typing import Any, Callable, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd +from joblib import Parallel, delayed +from tqdm import tqdm + +from melusine.backend.base_backend import BaseTransformerBackend + + +class PandasBackend(BaseTransformerBackend): + """ + Backend class to operate on Pandas DataFrames. + Inherits from the BaseTransformerBackend abstract class. + Includes multiprocessing functionalities + """ + + def __init__(self, progress_bar: bool = False, workers: int = 1): + """ + Parameters + ---------- + progress_bar: bool + If True, display progress bar + workers: int + Number of workers for multiprocessing + """ + super().__init__() + self.progress_bar = progress_bar + self.workers = workers + + def apply_transform( + self, + data: pd.DataFrame, + func: Callable, + output_columns: Optional[List[str]] = None, + input_columns: Optional[List[str]] = None, + **kwargs: Any, + ) -> pd.DataFrame: + """ + Method to apply a transform on a Dataset using the Dict backend. + + Parameters + ---------- + data: pd.DataFrame + Data to be transformed + func: Callable + Transform function to apply to the input data + output_columns: List[str] + List of output columns + input_columns: List[str] + List of input columns + kwargs + + Returns + ------- + _: pd.DataFrame + Transformed data + """ + # Multiprocessing + if self.workers > 1: + data = self.apply_transform_multiprocessing( + data, func, output_columns, input_columns=input_columns, **kwargs + ) + else: + data = self.apply_transform_regular(data, func, output_columns, input_columns=input_columns, **kwargs) + + return data + + def apply_transform_regular( + self, + data: pd.DataFrame, + func: Callable, + output_columns: Optional[List[str]] = None, + input_columns: Optional[List[str]] = None, + **kwargs: Any, + ) -> pd.DataFrame: + """ + Regular transform (no multiprocessing) + + Parameters + ---------- + data: pd.DataFrame + Data to be transformed + func: Callable + Transform function to apply to the input data + output_columns: List[str] + List of output columns + input_columns: List[str] + List of input columns + kwargs + + Returns + ------- + _: pd.DataFrame + Transformed data + """ + + # Setup apply parameters + expand, new_cols = self.setup_apply_parameters(output_columns) + + # Series apply + if input_columns and len(input_columns) == 1: + input_column = input_columns[0] + + result = self.apply_joblib_series( + s=data[input_column], + func=func, + expand=expand, + progress_bar=self.progress_bar, + **kwargs, + ) + + # DataFrame apply + else: + result = self.apply_joblib_dataframe( + df=data, + func=func, + expand=expand, + progress_bar=self.progress_bar, + **kwargs, + ) + + # Collect results + if not new_cols: + data = result + else: + data[new_cols] = result + + return data + + @staticmethod + def setup_apply_parameters( + output_columns: Optional[List[str]] = None, + ) -> Tuple[Union[None, str], Union[None, str, List[str]]]: + """ + Parameters + ---------- + output_columns: List[str] + List of output columns + + Returns + ------- + expand: str + new_cols: Union[None, str, List[str]] + """ + if not output_columns: + expand = None + new_cols: Union[None, str, List[str]] = None + elif len(output_columns) == 1: + expand = None + new_cols = output_columns[0] + # Multiple output columns + else: + expand = "expand" + new_cols = list(output_columns) + + return expand, new_cols + + def apply_transform_multiprocessing( + self, + data: pd.DataFrame, + func: Callable, + output_columns: Optional[List[str]] = None, + input_columns: Optional[List[str]] = None, + **kwargs: Any, + ) -> pd.DataFrame: + """ + Transform with multiprocessing + + Parameters + ---------- + data: pd.DataFrame + Data to be transformed + func: Callable + Transform function to apply to the input data + output_columns: List[str] + List of output columns + input_columns: List[str] + List of input columns + kwargs + + Returns + ------- + _: pd.DataFrame + Transformed data + """ + workers = min(self.workers, int(data.shape[0] // 2)) + workers = max(workers, 1) + + # Dataframe is too small to use multiprocessing + if workers == 1: + return self.apply_transform_regular(data, func, output_columns, input_columns=input_columns, **kwargs) + + expand, new_cols = self.setup_apply_parameters(output_columns) + + # Use Series.apply + if input_columns and len(input_columns) == 1: + input_column = input_columns[0] + chunks = Parallel(n_jobs=workers)( + delayed(self.apply_joblib_series)( + s=d[input_column], + func=func, + expand=expand, + progress_bar=self.progress_bar, + **kwargs, + ) + for d in np.array_split(data, workers) + ) + + # Use DataFrame.apply + else: + chunks = Parallel(n_jobs=workers)( + delayed(self.apply_joblib_dataframe)( + df=d, + func=func, + expand=expand, + progress_bar=self.progress_bar, + **kwargs, + ) + for d in np.array_split(data, workers) + ) + + if not new_cols: + data = pd.concat(chunks) + else: + data[new_cols] = pd.concat(chunks) + + return data + + @staticmethod + def apply_joblib_dataframe( + df: pd.DataFrame, + func: Callable, + expand: str = None, + progress_bar: bool = False, + **kwargs: Any, + ) -> pd.DataFrame: + """ + Need to create a function to pass to Joblib Parallel. + This function can't be a lambda so we need to create a separate function. + """ + if progress_bar: + apply_func = "progress_apply" + tqdm.pandas(desc=func.__name__) + else: + apply_func = "apply" + + result = getattr(df, apply_func)(func, axis=1, result_type=expand, **kwargs) + + return result + + @staticmethod + def apply_joblib_series( + s: pd.Series, + func: Callable, + expand: str = None, + progress_bar: bool = False, + **kwargs: Any, + ) -> pd.DataFrame: + """ + Need to create a function to pass to Joblib Parallel. + This function can't be a lambda so we need to create a separate function. + """ + if progress_bar: + apply_func = "progress_apply" + tqdm.pandas(desc=func.__name__) + else: + apply_func = "apply" + + result = getattr(s, apply_func)(func, **kwargs) + if expand: + result = result.apply(pd.Series) + + return result + + def add_fields(self, left: pd.DataFrame, right: pd.DataFrame, fields: List[str] = None) -> pd.DataFrame: + """ + Method to add fields form the right object to the left object. + + Parameters + ---------- + left: pd.DataFrame + MelusineDataset object + right: pd.DataFrame + Melusine Dataset object + fields: List[str] + List of fields to be added + + Returns + ------- + _: pd.DataFrame + Left object with added fields + """ + left[fields] = right[fields] + + return left + + def copy(self, data: pd.DataFrame, fields: List[str] = None) -> pd.DataFrame: + """ + Method to make a copy of the dataset. + + Parameters + ---------- + data: pd.DataFrame + MelusineDataset object + fields: List[str] + List of fields to include in the copy (by default copy all fields) + + Returns + ------- + _: pd.DataFrame + Copy of original object + """ + if not fields: + fields = data.columns + + return data[fields].copy() + + def get_fields(self, data: pd.DataFrame) -> List[str]: + """ + Method to get the list of fields available in the input dataset. + + Parameters + ---------- + data: pd.DataFrame + MelusineDataset object + + Returns + ------- + _: List[str] + List of dataset fields + """ + return data.columns.to_list() + + def setup_debug_dict(self, data: pd.DataFrame, dict_name: str) -> pd.DataFrame: + """ + Method to check if debug_mode is activated. + + Parameters + ---------- + data: pd.DataFrame + MelusineDataset object + dict_name: str + Name of the debug dict field to be added + + Returns + ------- + _: pd.DataFrame + MelusineDataset object + """ + data[dict_name] = [{} for _ in range(len(data))] + + return data From 0fa5917903cab39a2511c27b6097cf69bdbda1b3 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:25:48 +0100 Subject: [PATCH 03/37] :recycle: Cleanup connectors --- melusine/connectors/__init__.py | 0 melusine/connectors/exchange.py | 548 ++++++++++++++++++++++++++++++++ 2 files changed, 548 insertions(+) create mode 100644 melusine/connectors/__init__.py create mode 100644 melusine/connectors/exchange.py diff --git a/melusine/connectors/__init__.py b/melusine/connectors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/melusine/connectors/exchange.py b/melusine/connectors/exchange.py new file mode 100644 index 0000000..3018cc3 --- /dev/null +++ b/melusine/connectors/exchange.py @@ -0,0 +1,548 @@ +import logging +import re +from typing import Any, Dict, List, Optional, Union + +import pandas as pd +from exchangelib import ( # noqa + Account, + Configuration, + Credentials, + FaultTolerance, + FileAttachment, + Folder, + HTMLBody, + Message, +) +from exchangelib.errors import ErrorFolderNotFound # noqa + +logger = logging.getLogger(__name__) + + +class ExchangeConnector: + """ + Connector to Outlook Exchange Mailboxs. + This class contains methods suited for automated emails routing. + """ + + def __init__( + self, + mailbox_address: str, + credentials: Credentials, + config: Configuration, + routing_folder_path: str = None, + correction_folder_path: str = None, + done_folder_path: str = None, + target_column: str = "target", + account_args: Dict[str, Any] = None, + sender_address: str = None, + ): + """ + Parameters + ---------- + mailbox_address: str + Email address of the mailbox. By default, the login address is used + credentials: Credentials + Exchangelib credentials to connect to an Exchange mailbox + config: Configuration + Exchangelib configuration object + routing_folder_path: str + Path of the base routing folder + correction_folder_path: str + Path of the base correction folder + done_folder_path: str + Path of the Done folder + target_column: str + Name of the DataFrame column containing target folder names + account_args: dict + Dict containing arguments to instantiate an exchangelib "Account" object. + sender_address: str + Email address used to send emails. + """ + + self.sender_address = sender_address + self.mailbox_address = mailbox_address + self.folder_list = None + self.target_column = target_column + # Default Account parameters + if not account_args: + account_args = {"autodiscover": True} + + # Connect to mailbox + self.credentials = credentials + self.exchangelib_config = config + # Mailbox account (Routing, Corrections, etc) + self.mailbox_account = Account( + self.mailbox_address, + credentials=self.credentials, + config=self.exchangelib_config, + **account_args, + ) + # Sender accounts (send emails) + if sender_address: + self.sender_account = Account( + self.sender_address, + credentials=self.credentials, + config=self.exchangelib_config, + **account_args, + ) + logger.info(f"Address {self.sender_address} is set up to send emails.") + else: + self.sender_account = None + logger.info("Sender address not specified, email sending is disabled.") + + # Setup correction folder and done folder + self.routing_folder_path = routing_folder_path + self.correction_folder_path = correction_folder_path + self.done_folder_path = done_folder_path + + logger.info(f"Connected to mailbox {self.mailbox_address}.") + + def _get_mailbox_path(self, path: Optional[str]) -> Folder: + """ + Utils function to get a mailbox Folder from a path string. + Ex: + - input string : ROUTING + - output Folder : Folder at root/Haut de la banque d'informations/Boîte de réception/ROUTING + + Parameters + ---------- + path : str + String describing the desired path to a mailbox folder + + Returns + ------- + mailbox_path: Folder + Mailbox Folder corresponding to the input path + """ + # Default to inbox + if not path: + return self.mailbox_account.inbox + + # Start mailbox path from root folder + if re.match("/?root/", path, flags=re.I): + path = re.split("/?root/", path, flags=re.I)[1] + mailbox_path = self.mailbox_account.root + + # Start mailbox path from inbox folder + else: + mailbox_path = self.mailbox_account.inbox + + # Build mailbox path + folders = path.split("/") + for folder in folders: + if folder == "..": + mailbox_path = mailbox_path.parent + else: + mailbox_path = mailbox_path / folder + + return mailbox_path + + @staticmethod + def _get_folder_path(folder: Folder) -> Union[str, None]: + """ + Utils function to get the full mailbox path of a folder. + - input Folder : Folder("Routing") + - output string : root/Haut de la banque d'informations/Boîte de réception/ROUTING + + Parameters + ---------- + folder : Folder + Mailbox folder + + Returns + ------- + path: str + Full mailbox path of the input Folder + """ + if not isinstance(folder, Folder): + return None + + path = folder.name + while folder.name != "root": + folder = folder.parent + path = folder.name + "/" + path + + return path + + @property + def routing_folder_path(self) -> Union[str, None]: + """ + Get the path to the Routing folder. + + Returns + ------- + path: str + Path to the Routing folder + """ + path = self._get_folder_path(self.routing_folder) + return path + + @routing_folder_path.setter + def routing_folder_path(self, routing_folder_path: str) -> None: + """ + Setter for the routing folder. + """ + self.routing_folder = self._get_mailbox_path(routing_folder_path) + folder_path = self._get_folder_path(self.routing_folder) + logger.info(f"Routing folder path set to '{folder_path}'") + + @property + def done_folder_path(self) -> Union[str, None]: + """ + Get the path to the Done folder. + + Returns + ------- + path: str + Path to the Done folder + """ + path = self._get_folder_path(self.done_folder) + return path + + @done_folder_path.setter + def done_folder_path(self, done_folder_path: str) -> None: + """ + Setter for the done folder. + """ + if not done_folder_path: + self.done_folder = None + logger.info("Done folder path not set") + else: + self.done_folder = self._get_mailbox_path(done_folder_path) + folder_path = self._get_folder_path(self.done_folder) + logger.info(f"Done folder path set to '{folder_path}'") + + @property + def correction_folder_path(self) -> Union[str, None]: + """ + Get the path to the Correction folder. + + Returns + ------- + path: str + Path to the Correction folder + """ + path = self._get_folder_path(self.correction_folder) + return path + + @correction_folder_path.setter + def correction_folder_path(self, correction_folder_path: str) -> None: + """ + Setter for the correction folder. + """ + if not correction_folder_path: + self.correction_folder = None + logger.info("Correction folder path not set") + else: + self.correction_folder = self._get_mailbox_path(correction_folder_path) + folder_path = self._get_folder_path(self.correction_folder) + logger.info(f"Correction folder path set to '{folder_path}'") + + def create_folders(self, folder_list: List[str], base_folder_path: str = None) -> None: + """Create folders in the mailbox. + + Parameters + ---------- + folder_list : list + Create folders in the mailbox + base_folder_path : str + New folders will be created inside at path base_folder_path (Defaults to inbox) + """ + self.folder_list = folder_list + + # Setup base folder + base_folder = self._get_mailbox_path(base_folder_path) + + # Check existing folders + existing_folders = [f.name for f in base_folder.children] + + # Create new folders + base_folder_name = base_folder_path or "Inbox" + for folder_name in folder_list: + if folder_name not in existing_folders: + f = Folder(parent=base_folder, name=folder_name) + f.save() + logger.info(f"Created subfolder {folder_name} in folder {base_folder_name}") + + def get_emails( + self, + max_emails: int = 100, + base_folder_path: str = None, + ascending: bool = True, + ) -> pd.DataFrame: + """ + Load emails in the inbox. + + Parameters + ---------- + max_emails: int + Maximum number of emails to load + base_folder_path: str + Path to folder to fetch + ascending: bool + Whether emails should be returned in ascending reception date order + + Returns + ------- + df_new_emails: pandas.DataFrame + DataFrame containing nex emails + """ + logger.info(f"Reading new emails for mailbox '{self.mailbox_address}'") + base_folder = self._get_mailbox_path(base_folder_path) + if ascending: + order = "datetime_received" + else: + order = "-datetime_received" + + all_new_data = ( + base_folder.all() + .only( + "message_id", + "datetime_sent", + "sender", + "to_recipients", + "subject", + "text_body", + "attachments", + ) + .order_by(order)[:max_emails] + ) + + new_emails = [self._extract_email_attributes(x) for x in all_new_data if isinstance(x, Message)] + df_new_emails = pd.DataFrame(new_emails) + + logger.info(f"Read '{len(new_emails)}' new emails") + return df_new_emails + + @staticmethod + def _extract_email_attributes(email_item: Message) -> dict: + """ + Load email attributes of interest such as: + - `message_id` field + - `body` field + - `header` field + - `date` field + - `from` field + - `to` field + - `attachment` field + + Parameters + ---------- + email_item: exchangelib.Message + Exchange Message object + + Returns + ------- + email_dict: Dict with email attributes of interest + """ + if not email_item.to_recipients: + to_list = list() + else: + to_list = [i.email_address for i in email_item.to_recipients] + + if not email_item.attachments: + attachments_list = None + else: + attachments_list = [i.name for i in email_item.attachments] + + # Modification to deal with draft in file tree + if email_item.datetime_sent is not None and email_item.sender is not None: + email_dict = { + "message_id": email_item.message_id, + "body": email_item.text_body or "", + "header": email_item.subject or "", + "date": email_item.datetime_sent.isoformat(), + "from": email_item.sender.email_address or None, + "to": to_list, + "attachment": attachments_list, + } + else: + # There is a draft in the emails + email_dict = { + "message_id": email_item.message_id, + "body": email_item.body or "", + "header": email_item.subject or "", + "date": None, + "from": None, + "to": None, + "attachment": attachments_list, + } + return email_dict + + def route_emails( + self, + classified_emails: pd.DataFrame, + raise_missing_folder_error: bool = False, + id_column: str = "message_id", + ) -> None: + """ + Function to route emails to mailbox folders. + + Parameters + ---------- + classified_emails: pandas.DataFrame + DataFrame containing emails message_id and target folder + raise_missing_folder_error: bool + Whether an error should be raised when a target folder is missing + id_column: str + Name of the DataFrame column containing message ids + """ + target_column = self.target_column + target_folders = classified_emails[target_column].unique().tolist() + base_folder = self.routing_folder + + for folder in target_folders: + try: + destination_folder = base_folder / folder + except ErrorFolderNotFound: + if raise_missing_folder_error: + logger.exception(f"Mailbox (sub)folder '{folder}' not found") + raise + else: + logger.warning(f"Mailbox (sub)folder '{folder}' not found") + continue + + mask = classified_emails[target_column] == folder + mids_to_move = classified_emails[mask][id_column] + items = self.mailbox_account.inbox.filter(message_id__in=mids_to_move).only("id", "changekey") + self.mailbox_account.bulk_move(ids=items, to_folder=destination_folder, chunk_size=5) + logger.info(f"Moving {mids_to_move.size} emails to folder '{folder}'") + + def get_corrections( + self, + max_emails: int = 100, + ignore_list: Optional[List[str]] = None, + correction_column_name: str = "correction", + ) -> pd.DataFrame: + """ + When mailbox users find misclassified emails, they should move them to correction folders. + This method collects the emails placed in the correction folders. + + Parameters + ---------- + max_emails: Maximum number of emails to fetch at once + ignore_list: List of folders that should be ignored when fetching emails + correction_column_name: Name of the column containing the correction folder in the returned DataFrame + + Returns + ------- + df_corrected_emails: pandas.DataFrame + DataFrame containing the misclassified emails ids and associated correction folder + """ + if ignore_list is None: + ignore_list = [] + + if self.correction_folder is None: + raise AttributeError( + "You need to set the class attribute `correction_folder_path` to use `get_corrections`." + ) + + logger.info(f"Reading corrected emails from folder and {self.correction_folder}") + + # Get correction folders + categories = [e.name for e in self.correction_folder.children if e.name not in ignore_list] + + # Load corrected emails + all_corrected_emails = list() + for folder_name in categories: + folder = self.correction_folder / folder_name + messages = ( + folder.all() + .only( + "message_id", + "datetime_sent", + "sender", + "to_recipients", + "subject", + "text_body", + "attachments", + ) + .order_by("datetime_received")[:max_emails] + ) + emails = [self._extract_email_attributes(m) for m in messages if isinstance(m, Message)] + + # Add correction folder to email attributes + for item in emails: + item.update({correction_column_name: folder_name}) + + all_corrected_emails.extend(emails) + logger.info(f"Found {len(emails)} corrected emails in folder {folder}") + + logger.info(f"Found {len(all_corrected_emails)} corrected emails in total") + df_corrected_emails = pd.DataFrame(all_corrected_emails) + + return df_corrected_emails + + def move_to_done(self, emails_id: List[str]) -> None: + """ + Once the corrected emails have been processed, they can be moved to a "Done" folder. + + Parameters + ---------- + emails_id: list + List of emails IDs to be moved to the done folder. + """ + if (self.correction_folder is None) or (self.done_folder is None): + raise AttributeError( + "You need to set the class attribute `done_folder_path` " + "and the class attribute `correction_folder_path` to use `move_to_done`." + ) + # Collect corrected emails + items = self.correction_folder.children.filter(message_id__in=emails_id).only("id", "changekey") + n_items = items.count() + + # Move to done folder + self.mailbox_account.bulk_move(ids=items, to_folder=self.done_folder, chunk_size=5) + logger.info(f"Moved {n_items} corrected emails to the folder {self.done_folder_path}") + + def list_subfolders(self, base_folder_path: str = None) -> List[str]: + """ + List the sub-folders of the specified folder. + + Parameters + ---------- + base_folder_path: str + Path to folder to be inspected + """ + base_folder = self._get_mailbox_path(base_folder_path) + return [f.name for f in base_folder.children] + + def send_email(self, to: Union[str, List[str]], header: str, body: str, attachments: dict) -> None: + """ + This method sends an email from the login address (attribute login_address). + + Parameters + ---------- + to: str or list + Address or list of addresses of email recipients + header: str + Email header + body: str + Email body + attachments: dict + Dict containing attachment names as key and attachment file contents as values. + Currently, the code is tested for DataFrame attachments only. + """ + if self.sender_account is None: + raise AttributeError( + "To send emails, you need to specify a `sender_address` when initializing " + "the ExchangeConnector class." + ) + + if isinstance(to, str): + to = [to] + + # Prepare Message object + m = Message( + account=self.sender_account, + subject=header, + body=HTMLBody(body), + to_recipients=to, + ) + if attachments: + for key, value in attachments.items(): + m.attach(FileAttachment(name=key, content=bytes(value, "utf-8"))) + + # Send email + m.send() + logger.info(f"Email sent from address '{self.sender_address}'") From e385a8bd052c824d47ee4862e66ccccae6e13de5 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:26:28 +0100 Subject: [PATCH 04/37] :poop: Fake emails data --- melusine/data/__init__.py | 5 + melusine/data/_data_loader.py | 40 + melusine/data/emails.csv | 1068 +++++ melusine/data/emails.json | 540 +++ melusine/data/emails_full.json | 5624 ++++++++++++++++++++++++ melusine/data/emails_preprocessed.json | 5624 ++++++++++++++++++++++++ 6 files changed, 12901 insertions(+) create mode 100644 melusine/data/__init__.py create mode 100644 melusine/data/_data_loader.py create mode 100644 melusine/data/emails.csv create mode 100644 melusine/data/emails.json create mode 100644 melusine/data/emails_full.json create mode 100644 melusine/data/emails_preprocessed.json diff --git a/melusine/data/__init__.py b/melusine/data/__init__.py new file mode 100644 index 0000000..1dcdffa --- /dev/null +++ b/melusine/data/__init__.py @@ -0,0 +1,5 @@ +from melusine.data._data_loader import load_email_data + +__all__ = [ + "load_email_data", +] diff --git a/melusine/data/_data_loader.py b/melusine/data/_data_loader.py new file mode 100644 index 0000000..c61e525 --- /dev/null +++ b/melusine/data/_data_loader.py @@ -0,0 +1,40 @@ +import os.path as op + +import pandas as pd + + +def load_email_data(type: str = "raw") -> pd.DataFrame: + """ + Function to load a file containing toy email data. + Possible types are: + - raw : minimal DataFrame with email data + - preprocessed : DataFrame with preprocessed email data + - full : Full DataFrame with all email features + + Return + ------ + pandas.DataFrame + DataFrame with toy email data + """ + + # Path to data directory + data_directory = op.dirname(op.abspath(__file__)) + + # Load raw data + if type == "raw": + email_data_path = op.join(data_directory, "emails.json") + df = pd.read_json(email_data_path, orient="records").fillna("") + + # Load preprocessed data + elif type == "preprocessed": + email_data_path = op.join(data_directory, "emails_preprocessed.json") + df = pd.read_json(email_data_path, orient="records").fillna("") + + # Load preprocessed data with feature engineering + elif type == "full": + email_data_path = op.join(data_directory, "emails_full.json") + df = pd.read_json(email_data_path, orient="records").fillna("") + else: + raise ValueError(f"Unknown data type {type}. Choose between 'raw', 'preprocessed' and 'full'") + + return df diff --git a/melusine/data/emails.csv b/melusine/data/emails.csv new file mode 100644 index 0000000..0377424 --- /dev/null +++ b/melusine/data/emails.csv @@ -0,0 +1,1068 @@ +body,header,date,from,to,attachment,sexe,age,label +" + + + + ----- Transféré par Conseiller le 24/05/2018 11:49 ----- + + De : Dupont + A : conseiller@Societeimaginaire.fr + Cc : Societe@www.Societe.fr + Date : 24/05/2018 11:36 + Objet : Devis habitation + + + + Bonjour + Je suis client chez vous + Pouvez vous m établir un devis pour mon fils qui souhaite + louer l’appartement suivant : + 25 rue du rueimaginaire 77000 + Merci + Envoyé de mon iPhone",Tr : Devis habitation,jeudi 24 mai 2018 11 h 49 CEST,conseiller1@societeimaginaire.fr,demandes@societeimaginaire.fr,[],F,35,habitation +" + + + + ----- Transféré par Conseiller le 25/05/2018 08:20 ----- + + De : Dupont + A : conseiller@Societeimaginaire.fr + Date : 24/05/2018 19:37 + Objet : Immatriculation voiture + + + + Bonsoir madame, + + Je vous informe que la nouvelle immatriculation est enfin + faite. Je vous remercie bien pour votre patience. + Je vous prie de trouver donc la carte grise ainsi que la + nouvelle immatriculation. Je vous demanderai de faire les changements + nécessaires concernant l’assurance. + Je vous remercie encore pour tout. + Cordialement, + Monsieur Dupont (See attached file: pj.pdf)",Tr : Immatriculation voiture,vendredi 25 mai 2018 06 h 21 CEST,conseiller1@societeimaginaire.fr,demandes@societeimaginaire.fr,"[""pj.pdf""]",M,32,vehicule +" + + + Bonjours, + + Suite a notre conversation téléphonique de Mardi , pourriez vous me dire la + somme que je vous dois afin d'être en régularisation . + + Merci bonne journée + + Le mar. 22 mai 2018 à 10:20, a écrit : + Bonjour. + + Merci de bien vouloir prendre connaissance du document ci-joint : + 1 - Relevé d'identité postal (contrats) + + Cordialement. + + La Mututelle Imaginaire + + La visualisation des fichiers PDF nécessite Adobe Reader. + ",Re: Envoi d'un document de la Société Imaginaire,vendredi 25 mai 2018 06 h 45 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,[],M,66,compte +" + + + + + Bonjour, + + + Je fais suite à  votre mail. J'ai envoyé mon bulletin de salaire + par courrier mais il semblerait que vous ne l'ayez pas réceptionné. + Vous trouverez ci-joint mon bulletin de salaire. + + + Bien cordialement, + + + Monsieur Dupont. + + Le 16/05/2018 à 11:27, conseiller@Societeimaginaire.fr a écrit : + + Cher(e) client, + + Sauf erreur de notre part, nous n'avons pas reçu votre justificatif + d’emploi. + Nous vous rappelons qu'il nous est indispensable pour valider votre + adhésion auprès de notre Societe. + + Il peut s’agir de votre dernier bulletin de paye ou d’une attestation de + votre employeur par exemple. + + Votre conseiller Societe Imaginaire + |------------------------+---------------------------------------| + | |Garanti sans virus. www.avast.com | + |------------------------+---------------------------------------| + + + ",Re: Votre adhésion à la Société Imaginaire,vendredi 25 mai 2018 10 h 15 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""fichedepaie.png""]",M,50,adhesion +" + + + Bonjour, + Voici ci joint mon bulletin de salaire comme demandé. + Cordialement + Monsieur Dupont + + Le ven. 25 mai 2018 à 11:23, a écrit : + + Cher(e) client, + + Sauf erreur de notre part, nous n'avons pas reçu votre justificatif d’emploi. + Nous vous rappelons qu'il nous est indispensable pour valider votre + adhésion auprès de notre Societe Imaginaire. + + Il peut s’agir de votre dernier bulletin de paye ou d’une attestation de + votre employeur. + + Votre conseiller Societe Imaginaire  (See attached file: pj.jpg)",Bulletin de salaire,vendredi 25 mai 2018 17 h 30 CEST,Monsieur Dupont ,conseiller@societeimaginaire.fr,"[""pj.pdf""]",M,15,adhesion +" Madame, Monsieur, + + Je vous avais contactés car j'avais pour + projet d'agrandir ma maison. J'avais reçu un devis pour lequel je n'avais + pas donné suite, les travaux n'étant pas encore réalisés. + + Le projet a maintenant été porté à son terme et je voudrais donc revoir + votre offre si possible. + + Je désire garder le même type de contrat. + Je suis à votre disposition pour tout renseignement complémentaires. + + Sincères salutations + Monsieur Dupont + ",Modification et extension de ma maison,jeudi 31 mai 2018 10 h 28 CEST,Monsieur Dupont ,conseiller@societeimaginaire.fr,[],F,22,habitation +" + + + + + + + ----- Transféré par Conseiller le 31/05/2018 12:24 ----- + + De : Dupont + A : conseiller@Societeimaginaire.fr + Date : 30/05/2018 15:56 + Objet : Assurance d'un nouveau logement + + + + Bonjour, + + J'emménage dans un nouveau studio le Vendredi 2 Juin 2019 + mais je n'arrive pas à souscrire d'assurance via la plateforme en ligne. + + C'est pourquoi je vous envoie l'état des lieux de sortie du deuxième studio + afin de pouvoir l'enlever du contrat et ainsi pouvoir assurer le nouveau + logement. + + + Cordialement, + + + Monsieur Dupont. + + + Tél : 07.00.00.00.00 + + + Mail : monsieurdupont@extensiona.com (See attached file: pj.pdf) + ",Tr : Assurance d'un nouveau logement,jeudi 31 mai 2018 12 h 24 CEST,Conseiller ,conseiller1@societeimaginaire.fr,"[""pj.pdf""]",F,28,resiliation +" + + + + + Bonjour, + + + + Je me permets de venir vers vous car depuis le début de l’année, nous avons + des difficultés pour assurer nos véhicules. + + Ces derniers jours, vous nous avez fait parvenir le détail des + dénominations et des niveau d’assurance de chaque véhicule. + + Merci d’effectuer ces changements + + + + Dans l’attente de votre retour + + + + Cordialement + + + + Monsieur Dupont + +  (See attached file: image001.png)(See attached file: Assurances véhicules2018.pdf)",Assurance véhicules,jeudi 31 mai 2018 14 h 02 CEST,Monsieur Dupont ,demandes4@societeimaginaire.fr,"[""image001.png""]",M,39,vehicule +" + + + Bonjour, + + Voici la copie du virement effectuer à ce jour. + Serait-il possible d’obtenir une attestation d’assurance? + + Cordialement, + + Monsieur Dupont + 06 00 00 00 00 + (See attached file: pj.pdf) + + > Le 23 mai 2018 à 10:17, conseiller@Societeimaginaire.fr a écrit : + > + > Bonjour, + > + > Nous faisons suite à votre dernier courriel. + > + > A ce jour, le montant à devoir, permettant de solder votre compte + cotisation, est de 000.00euros. + > + > Nous restons à votre disposition pour tous renseignements + complémentaires. + > + > Meilleures salutations, + > + > Conseiller. + > ",Re: Virement,jeudi 31 mai 2018 17 h 10 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""pj.pdf""]",M,38,autres +" + + + + + + + + BONJOUR + + + + CI-JOINT PRET VEHICULE + + CORDIALEMENT + + GARAGE + + + + + + + > Message du 31/05/18 08:51 + > De : monsieurdupont@extensionb.com + > A : ""GARAGE"" + > Copie à : + > Objet : Attached Image + > + >(See attached file: pj.pdf)",Prêt véhicule,jeudi 31 mai 2018 08 h 54 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""pj.pdf""]",M,30,vehicule +" + + + ----- Transféré par Conseiller le 31/05/2018 11:59 ----- + + De : Dupont + A : conseiller@Societeimaginaire.fr + Date : 30/05/2018 10:12 + Objet : Re: Demande + + + + Bonjour Monsieur, + Pouvez-vous m'appeler lundi prochain en fin d'après-midi ? + Cordialement, + Monsieur Dupont + + Le mer. 31 mai 2018 à 09:58, a écrit : + Bonjour Mr Dupont, + + Je fais suite à mon message téléphonique de ce jour. + Comme précisé, je vous adresse ce mail avec mes coordonnées pour que + vous puissiez en retour me faire part de vos disponibilités. + Pour rappel, je souhaite vous rencontrer pour faire un bilan + général de vos contrats. + + Dans l'attente de vous lire, je vous souhaite une bonne journée. + + Bien cordialement. + + Conseiller. + conseiller@Societeimaginaire.fr + Conseiller Societe Imaginaire. + ",Bilan général contrats,jeudi 31 mai 2018 12 h 00 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,[],F,63,bilan +" + + + Bonjour, + + Suite à notre entretien téléphonique de ce jour, + je joins le numéro de téléphone de mon fils + + Monsieur Dupont +  tel : 06 00 00 00 00. + + Monsieur Dupont + + Cordialement + ",Numéro de téléphone,jeudi 31 mai 2018 12 h 44 CEST,monsieurdupont@extensionf.net,demandes@societeimaginaire.fr,[],M,23,modification +" + + + + De : Dupont + A : conseiller@Societeimaginaire.fr + Date : 03/06/2018 16:41 + Objet : Re: Vos documents demandés + + + + Bonjour, + + Serait-il possible de rebasculer mon véhicule dès + aujourd'hui en assurance parfaite? + + Voici des photos du jour qui attestent de son parfait état. + + D'avance merci de votre confirmation + + Monsieur Dupont + Envoyé de mon iPhone + (See attached file: image1.jpeg) + + + (See attached file: image2.jpeg) + + + (See attached file: image3.jpeg) + + + > Le 21 janv. 2018 à 20:38, Monsieur Dupont a écrit : + > + > Bonjour Madame, + > + > Je vous confirme que je souhaite basculer mon contrat actuel sous la + nouvelle forme dès à présent. + > + > D'avance merci de votre confirmation. + > + > Bien cordialement, + > Monsieur Dupont + > + > Envoyé de mon iPad + > + >> Le 30 nov. 2017 à 10:06, conseiller@Societeimaginaire.fr a écrit : + >> + >> Bonjour, + >> + >> Veuillez trouver ci-joint les documents que vous nous avez demandés : + >> - Devis1 + >> - Devis2 + >> + >> La visualisation des fichiers PDF nécessite Adobe Reader. + >> + >> Bien à vous. + >> + >> La SocSociete Imaginaire. + >> + >> ",Tr : Re: Vos documents demandés,lundi 4 juin 2018 09 h 56 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,"[""image1.jpeg"",""image2.jpeg"",""image3.jpeg""]",F,28,vehicule +" + + + + + + + -- + Monsieur Dupont (See attached file: Relevé d'informations.pdf)",Demande,lundi 4 juin 2018 14 h 09 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""Relevé d'informations.pdf""]",M,86,vehicule +" + + + ----- Transféré par Conseiller le 04/06/2018 09:20 ----- + + De :        Association Dupont + A :        representant@Societeimaginaire.fr + Date :        01/06/2018 16:03 + Objet :        Re: Correspondance de La Societe Imaginaire + + + + + Bonjour, merci de votre retour rapide. + + Concernent l'attestation du 22 septembre, serait-il possible de faire + apparaître l'adresse complète de l'événement. + + En effet, s'agissant d'un domaine privé, les propriétaires nous ont fait + cette demande. + + Vous remerciant par avance. + + Cordialement. + + Monsieur Dupont, + Association LOI 1901. + Tél. perso : 06.00.00.00.00 + + ",Tr : attestation,lundi 4 juin 2018 09 h 20 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,[],M,36,habitation +" + + + + + + + + + Bonjour Madame, + + Je vous remercie pour l'attestation demandée par téléphone. + je vous rappelle que je suis la propriétaire de ce véhicule et que cette + attestation est à destination de mon travail. + + Cordialement + + Madame Dupont + + + + PS: il y a quelques temps j'ai sollicité vos services pour une attestation + et je n'ai eu aucune difficulté pour l'obtenir + + + + > Message du 28/05/18 15:22 + > De : conseiller@Societeimaginaire.fr + > A : madamedupont@extensionb.com + > Copie à : + > Objet : Envoi d'un document de la Societe Imaginaire + > + > Bonjour. + + Merci de bien vouloir prendre connaissance du document ci-joint : + 1 - Attestation d'assurance + + Cordialement. + + La Societe Imaginaire + + La visualisation des fichiers PDF nécessite Adobe Reader.(See attached + file: pj.pdf)",demande attestation - Envoi d'un document de la Mutuelle Imaginaire,lundi 4 juin 2018 10 h 22 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,[],F,88,resiliation +" + + + + Bonjour madame, + Suite à notre entretien téléphonique de ce jour, je vous joins un Scan de + mon certificat de cession de mon véhicule nautique assuré chez vous. + Je vous remercie de la clarté de vos explications et reste à votre + disposition. + Bien à vous + Monsieur Dupont(See attached file: Numériser.pdf) + ",certificat de cession de véhicule,lundi 4 juin 2018 15 h 39 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""Numériser.pdf""]",M,57,resiliation +" + + + + Bonjour, + Vous trouverez ci-joint le certificat de cession attestant de la vente du + véhicule pour résilier l'assurance. + + Cordialement + Monsieur Dupont(See attached file: pj.jpg)",certificat de cession ,lundi 4 juin 2018 15 h 49 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""pj.jpg""]",M,82,vehicule +" + + + + Vous trouverez en pièce jointe le relevé de situation ainsi que le bulletin + de paie demandé et un rib. + Mon Adresse complète est là suivante : 00 rue imaginaire 33000 Villeimaginaire +  (See attached file: Screenshot.png)(See attached + file: Screenshot2.png)(See attached file: + Screenshot3.png)",Assurance auto,lundi 4 juin 2018 18 h 04 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""Screenshot.png"",""Screenshot2.png"",""Screenshot3.png""]",F,60,adhesion +" + + + + + + + Le lundi 4 juin 2018 à 13:59:14 UTC+2, conseiller@Societeimaginaire.fr + a écrit : + + + Chère client, + + Nous avons bien reçu votre email qui a retenu toute notre attention. + + Le document que vous nous avez envoyé n'est pas exploitable. Pourriez-vous + nous le renvoyer dans une meilleure résolution ? + + Bien Cordialement. + + Au nom de l'équipe Societe Imaginaire + + logo Societe Imaginaire + |------------------------+---------------------------------------| + | |Garanti sans virus. www.avast.com | + |------------------------+---------------------------------------| + + + + (See attached file: ACTE VENTE 1.pdf)(See attached file: ACTE VENTE 2.pdf)",Re: Suppression assurance logement,lundi 4 juin 2018 20 h 45 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""ACTE VENTE 1.pdf"",""ACTE VENTE 2.pdf""]",M,21,succession +" + + + Bonjour, + + Permettez-moi de vous signaler un changement d'adresse me concernant. + + Voici ma nouvelle adresse : + + 00 rue du Nomderue + 75000 Paris + + Merci. + + Bien à vous, + + Monsieur Dupont + ",changement d'adresse,lundi 4 juin 2018 22 h 28 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,[],M,58,habitation +" + + + + + ----- Transféré par Conseiller le 04/06/2018 10:29 ----- + + De : monsieurdupont@extensionf.net + A : Societe Imaginaire + Date : 02/06/2018 11:07 + Objet : Fwd: Changement de vehicule + + + + Bonjour, + + Pourriez vous faire suite au mail suivant en date du 22 Mai + 2017. + + + De: monsieurdupont@extensionf.net + À: ""Societe Imaginaire"" + Envoyé: Mardi 22 Mai 2017 10:15:25 + Objet: Changement de vehicule + + Bonjour, + + Merci de bien vouloir transférer l'assurance du vehicuel sur le + Scooter, + dont les références sont sur la facture fourni en pièce-jointe. + + Me faire parvenir l'attestation. + + Merci. + + MR Dupont, + le 22 mai 2017 + + Cordialement + (See attached file: Facture.jpg)",Tr : Fwd: Changement de Scooter !,lundi 4 juin 2018 10 h 29 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,[],M,64,vehicule +" + + + + ----- Transféré par Conseiller le 04/06/2018 10:38 ----- + + De : monsieurdupont@extensiona.com + A : conseiller@Societeimaginaire.fr + Date : 03/06/2018 16:52 + Objet : Réclamations + + + + Numéro Tél. : 0600000000 + E-mail : monsieurdupont@extensiona.com + M. DUPONT + Adresse : 94000 + Objet de la demande : Réclamations + + Bonjour, j'aurais besoin de l'attestation + pour ma declaration de revenus 2018. + Merci d'avance + ",Tr : Réclamations ,lundi 4 juin 2018 10 h 38 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,[],F,20,habitation +" + + + + Bonjour , + + Veuillez recevoir le certificat de cession de mon véhicule afin que vous + puissiez effectuer la résiliation de mon contrat. + Je reviendrai vers vous afin d’assurer mon nouveau véhicule bientôt. + + Bien à vous , + + Mr DUPONT + + + + (Embedded image moved to file: pic.jpg) + + + Envoyé de mon iPad",Résiliation contrat voiture ,lundi 4 juin 2018 11 h 19 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""pic.jpg""]",F,51,resiliation +" + + + + + + + -- + Monsieur Dupont + 02 chemin imaginaire + 84000 + monsieurdupont@extensiona.com + 06.00.00.00.00(See attached file: Relevé d'informations.pdf)",Demande,lundi 4 juin 2018 10 h 58 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""Relevé d'informations.pdf""]",M,77,vehicule +" + + + Bonjour, je vous remercie de ce retour. Pouvez vous répondre aux + différentes questions soulevées dans mon mail afin que je prenne ma + décision. Bien cordialement. Mr Dupont. + + Obtenez Outlook pour iOS + + From: conseiller@Societeimaginaire.fr + Sent: Monday, June 4, 2018 1:56:43 PM + To: monsieurdupont@hotmail.com + Subject: : Votre devis véhicule + + Bonjour, + Veuillez trouver ci-joint le devis que vous nous avez demandé. + La visualisation des fichiers PDF nécessite Adobe Reader. + Cordialement. + La Societe Imaginaire + ",Tr : Re: Interrogations,lundi 4 juin 2018 15 h 37 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,[],F,40,vehicule +" + + + + + + De : monsieurdupond + A : ""conseiller@Societeimaginaire.fr"" + Date : 04/06/2018 13:47 + Objet : RE: Correspondance de La Societe Imaginaire + + + + Bonjour + Ci-joint le rib du compte comme demandé + Bien à vous + + Monsieur DUPONT + (Embedded image moved to file: pic.jpg) + +33(0) 6 00 00 00 00 + + De : conseiller@Societeimaginaire.fr [mailto:conseiller@Societeimaginaire.fr] + Envoyé : lundi 4 juin 2018 12:23 + À : Monsieur Dupont + Objet : Correspondance de La Societe Imaginaire + + Bonjour. + + Veuillez prendre connaissance des documents ci-joints : + 1 - Courrier + 2 - Envoi Devis habitation + + Meilleurs sentiments. + + La Societe Imaginaire + + La visualisation des fichiers PDF nécessite Adobe Reader.(See attached + file: RIB.pdf)",RIB,mardi 5 juin 2018 09 h 04 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,"[""RIB.pdf""]",F,86,regularisation +" + + + + + + Bonjour, + + Veuillez trouver ci-joint le RI de Mme Dupont, + + Vous souhaitant bonne réception, + + + Bien cordialement. + + + (Embedded image moved to file: pic30109.jpg) + [IMAGE] + + Le 2018-06-04 20:37, Mr DUPONT a écrit : + Bonjour Monsieur, + + Pourriez-vous, s'il vous plait, faire parvenir mon relevé d'informations à + la Societe Imaginaire, par retour de mail, en gardant la référence citée en objet qui + vous plait? + + Je vous remercie, + + Monsieur Dupont + + -------- Message d'origine -------- + De : conseiller@Societeimaginaire.fr + Date : 04/06/2018 14:30 (GMT+01:00) + À : monsieurdupont@polytechnique.edu + Objet : Confirmation de votre assurance véhicule  + + + Cher(e) client, + + Nous vous confirmons l'enregistrement de l'assurance de votre véhicule en + en date du 01/05/2017 suite à la demande de résiliation que nous avons + effectuée auprès de la Societe Concurrente. + + Toutefois, ces derniers ne nous ont pas envoyé votre Relevé d'Information + donc il va falloir que vous leur demandiez pour ensuite nous le transmettre + par mail à l'adresse conseiller@Societeimaginaire.fr + En attendant ce document, nous vous + assurons quand même, mais de manière provisoire. + + De plus, il faudra que l'on voit ensemble quel mode de paiement vous souhaitez. + Pour cela, le plus simple et + de nous contacter au 09.00.00.00.00 ou de prévoir un recontact via notre + site Societeimaginaire.fr + + + Bien Cordialement. + + Au nom de l'équipe Societe Imaginaire + + logo Societe Imaginaire + + (See attached file: image-a7c10.png)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)",Re: Confirmation de votre assurance véhicule,mardi 5 juin 2018 10 h 56 CEST,Conseiller ,demandes@societeimaginaire.fr,"[""image-a7c10.png"",""Releve_Information.pdf""]",F,52,resiliation +" + + + Bonjour + Suite à notre entretien téléphonique, veuillez trouver ici mon relevé + d'identité bancaire. + + Au nom de Monsieur Dupont + + Bien cordialement + + -- + Monsieur Dupont + 32 avenue Imaginaire + Tél: +33 (0)600 00 00 00 + ",RIB,mardi 5 juin 2018 11 h 12 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""rib.pdf""]",M,72,compte +" + + + + + Bonjour, + + Nous souhaitons déclarés deux sinistres concernant nos deux véhicules. + + 1) Vehicule1 : + + Le véhicule était stationnement sur le parking et il + présente une trace sur l'aile arrière droite et sur le + pare-choc. + + 2) Vehicule2 : + + Le conducteur s'est garé sur un parking d'entreprise. + Il a cogné avec le pneu avant droit. + + + En vous souhaitant bonne réception de ces éléments, + + Bien cordialement, + + Monsieur Dupont + + (Embedded image moved to file: pic.jpg) + + (Embedded image moved to file: pic.jpg)",sinistres,mardi 5 juin 2018 17 h 37 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""pic.jpg""]",M,39,sinistres +" + + + + Nous restons à votre disposition + + Cordialement + + Societe Imaginaire + + ----- Transféré par Conseiller le 05/06/2018 10:05 ----- + + De : monsieurdupont + A : conseiller@Societeimaginaire.fr + Date : 03/06/2018 10:26 + Objet : Modification de contrat + + + + Bonjour Madame, + + Notre fils, conducteur principal du véhicule étant + en stage puis à l'étranger pour une bonne partie de l'an + prochain, son véhicule est désormais à la maison et il ne sera amené à + l'utiliser que rarement. + Pour cette raison, nous souhaiterions modifier son contrat d'assurance + dès que possible. + + Vous remerciant par avance de votre concours, nous restons à votre + disposition pour toute information complémentaire. + + Cordialement + + Monsieur Dupont + ",contrat vehicule,mardi 5 juin 2018 10 h 06 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,[],M,63,vehicule +" + + + + + De : Monsieur Dupont + A : conseiller@Societeimaginaire.fr + Cc : Monsieur Dupont , Madame Dupont + + Date : 04/06/2018 18:08 + Objet : résiliation couverture véhicule suite cession + + + + Bonjour, + + Le véhicule a été cédé le 2 avril 2018. + + Merci, ci joint le document de cession scanné + + Cordialement, + + + Monsieur Dupont - Orange + monsieurdupont@extensionj.fr + + responsable : Monsieur Dupont + 06 00 00 00 00 + monsieurdupont@extensionj.fr + + + + + (See attached file: cession.pdf)",Tr : résiliation couverture véhicule suite cession,mardi 5 juin 2018 12 h 33 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,"[""cession.pdf""]",M,79,vehicule +" + + + De : Dupont + A : conseiller@Societeimaginaire.fr + Date : 05/06/2018 13:34 + Objet : Réponse au dossier de prêt + + + + Madame, Monsieur, + + Vous nous avez adressé un dossier de prêt concernant votre client. + + Nous vous informons de notre accord. + + Nous restons à votre entière disposition. + + Bien cordialement. + + Conseiller + + Societe Imaginaire + + Tél : 05 00 00 00 00 + Fax : 05 00 00 00 00 + E-mail: conseiller@societeimaginaire.fr + + +",dossier de prêt ,mardi 5 juin 2018 14 h 11 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,[],F,18,vehicule +" + + + + + + Madame, Monsieur, + + + + Je vous prie de trouver ci-joint une déclaration de sinistre, + relative au cas de Monsieur Dupont, survenu le lundi 6 Mai. + + + + Avec nos cordiales salutations. + + + + (Embedded image moved to file: pic.jpg) + +  (See attached file: doc.pdf)",déclarations de sinistre corporel et matériel,mardi 5 juin 2018 16 h 06 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""doc.pdf""]",M,45,habitation +" + + + + Madame, Monsieur, + + Je vous prie de trouver ci-joints les justificatifs demandés pour la + souscription de mon assurance auto. + + Bien cordialement, + + Monsieur Dupont + (See attached file: Attestationemployeur.pdf) +(See attached file: Relevé_d'information.pdf)",Demande d'assurance auto,jeudi 7 juin 2018 12 h 38 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""Attestationemployeur.pdf"",""Relevé_d'information.pdf""]",F,48,resiliation +" + + + + + Envoyé de mon iPhone + + Début du message transféré : + + Expéditeur: monsieurduponte@extensionh.fr + Date: 7 juin 2018 à 16:34:25 UTC+2 + Destinataire: conseiller@Societeimaginaire.fr + Objet: Demande + + + + Bonjour, suite à notre conversation téléphonique voici la preuve + de règlement afin que vous puissiez étudier ma demande de réinscription. + + Cordialement, + Monsieur Dupont + [IMAGE] + + + Envoyé de mon iPhone(See attached file: IMG.PNG)",preuve réglement ,jeudi 7 juin 2018 18 h 22 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""IMG.PNG""]",F,63,compte +" + + + Bonjour + + Je m'aperçois ce jour que j'ai été prélèvée plusieurs fois: + + 1 fois sur mon compte bancaire BANQUE1 + 1 fois sur mon compte BANQUE2 + + Je paye donc 2 fois l'assurance véhicule et habitation + + Pourriez vous me rembourser la somme nécessaire sur mon compte BANQUE1 + le plus rapidement possible. + + En vous remerciant par avance de votre réponse par retour de mail. + Bien cordialement + Monsieur Dupont",prélèvements bancaires,jeudi 7 juin 2018 15 h 16 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,[],F,19,modification +" + + + + Bonjour, + Voici, ci-joins les documents demandés précédemment. + + Je vous remercie de la rapidité de vos service, + Bien à vous, + Monsieur Dupont(See attached file: Liste.docx)(See attached file: + PV.pdf)(See attached file: statuts.pdf)(See attached file: + RIB.jpeg)",documents ,jeudi 7 juin 2018 10 h 45 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,"[""Liste.docx"",""PV.pdf"",""statuts.pdf"",""RIB.jpeg""]",F,39,habitation +" + + + Centre Imaginaire + + 00 Rue de Nomderue + + 23000 VILLE IMAGINAIRE + + + + + + Madame, Monsieur, + + + Merci de bien vouloir me faire parvenir une attestation d'assurance + concernant la mise à disposition de la salle pour 100 personnes. + + objet de la manifestation : réunion + + cordialement. + + + Monsieur Dupont + + ",demande d'attestation d'asurance,jeudi 7 juin 2018 11 h 40 CEST,Monsieur Dupont ,demandes@societeimaginaire.fr,[],F,31,habitation +" + + + (Embedded image moved to file: pic.jpg) Rejoignez-nous sur notre page + Facebook + + (Embedded image moved to file: pic.gif) + + + + ----- Transféré par Conseiller/Societe Imaginaire le 07/06/2018 16:32 ----- + + De : monsieurdupont + A : conseiller@Societeimaginaire.fr + Date : 07/06/2018 16:16 + Objet : Re : Message de votre conseillère personnelle + + + + Bonjour, + + Merci de vos messages. La voiture est conduite par Monsieur Dupont. + Nos deux voitures sont assurées à 100%. + + Cordialement, + + Monsieur Dupont + + Le 07/06/18, ""conseiller@Societeimaginaire.fr"" a + écrit : + + + Chère client, + + Suite à notre bilan du 01 mai dernier, je souhaitais savoir ce que vous + avez décidé. + + + Je devais vous établir un devis pour votre voiture donc si + vous pouviez m'indiquer le conducteur principal, la date de permis de la voiture ainsi + que le type de couverture que vous recherchez, je vous enverrai le devis rapidement. + + + J'attend de vos nouvelles. + + Bien Cordialement. + + Conseiller + Délégation Conseil + conseillerh@Societeimaginaire.fr + + ",Tr : Message de votre conseillère personnelle,jeudi 7 juin 2018 16 h 32 CEST,conseiller@societeimaginaire.fr,demandes@societeimaginaire.fr,[],F,18,vehicule diff --git a/melusine/data/emails.json b/melusine/data/emails.json new file mode 100644 index 0000000..cc60d68 --- /dev/null +++ b/melusine/data/emails.json @@ -0,0 +1,540 @@ +[ + { + "age": 35, + "attachment": "[]", + "attachments": [], + "body": "\n\nJe vous transferts cette demande urgente !\n\nKim Smith\nConseiller MAIF\n\n----- Transféré par Conseiller le 24/05/2018 11:49 -----\n\nDe : Dupont \nA : conseiller@maif.fr\nCc : somebody@hotmail.fr\nDate : 22/02/2022 22:22\nObjet : Demande urgente\n\nBonjour\nJ'ai besoin d'une attestation scolaire pour mon enfant.\nPouvez-vous me l'envoyer rapidement s'il vous plait ?\nJe vous remercie par avance.\n\nClaude Dupont\n3 Rue de la victoire\n79000 Niort\ndupont@societaire.com\n\nEnvoyé de mon iPhone", + "date": "2022-02-22 22:22:22", + "from": "conseiller1@maif.fr", + "header": "Tr : Demande urgente", + "label": "habitation", + "sexe": "F", + "to": "mailbox@maif.fr" + }, + { + "age": 32, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 25/05/2018 08:20 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t24/05/2018 19:37\r\n Objet :\tImmatriculation voiture\r\n\r\n\r\n\r\n Bonsoir madame,\r\n\r\n Je vous informe que la nouvelle immatriculation est enfin\r\n faite. Je vous remercie bien pour votre patience.\r\n Je vous prie de trouver donc la carte grise ainsi que la\r\n nouvelle immatriculation. Je vous demanderai de faire les changements\r\n n\u00e9cessaires concernant l\u2019assurance.\r\n Je vous remercie encore pour tout.\r\n Cordialement,\r\n Monsieur Dupont (See attached file: pj.pdf)", + "date": "2022-04-28 00:56:52", + "from": "conseiller1@societeimaginaire.fr", + "header": "Tr : Immatriculation voiture", + "label": "vehicule", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 66, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjours,\r\n\r\n Suite a notre conversation t\u00e9l\u00e9phonique de Mardi , pourriez vous me dire la\r\n somme que je vous dois afin d'\u00eatre en r\u00e9gularisation .\r\n\r\n Merci bonne journ\u00e9e\r\n\r\n Le mar. 22 mai 2018 \u00e0 10:20, a \u00e9crit\u00a0:\r\n Bonjour.\r\n\r\n Merci de bien vouloir prendre connaissance du document ci-joint :\r\n 1 - Relev\u00e9 d'identit\u00e9 postal (contrats)\r\n\r\n Cordialement.\r\n\r\n La Mututelle Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n ", + "date": "2021-12-12 01:50:29", + "from": "Monsieur Dupont ", + "header": "Re: Envoi d'un document de la Soci\u00e9t\u00e9 Imaginaire", + "label": "compte", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 50, + "attachment": "[\"fichedepaie.png\"]", + "attachments": [ + "fichedepaie.png" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n\r\n Je fais suite \u00e0\u00a0 votre mail. J'ai envoy\u00e9 mon bulletin de salaire\r\n par courrier mais il semblerait que vous ne l'ayez pas r\u00e9ceptionn\u00e9.\r\n Vous trouverez ci-joint mon bulletin de salaire.\r\n\r\n\r\n Bien cordialement,\r\n\r\n\r\n Monsieur Dupont.\r\n\r\n Le 16/05/2018 \u00e0 11:27, conseiller@Societeimaginaire.fr a \u00e9crit\u00a0:\r\n\r\n Cher(e) client,\r\n\r\n Sauf erreur de notre part, nous n'avons pas re\u00e7u votre justificatif\r\n d\u2019emploi.\r\n Nous vous rappelons qu'il nous est indispensable pour valider votre\r\n adh\u00e9sion aupr\u00e8s de notre Societe.\r\n\r\n Il peut s\u2019agir de votre dernier bulletin de paye ou d\u2019une attestation de\r\n votre employeur par exemple.\r\n\r\n Votre conseiller Societe Imaginaire\r\n |------------------------+---------------------------------------|\r\n | |Garanti sans virus. www.avast.com |\r\n |------------------------+---------------------------------------|\r\n\r\n\r\n ", + "date": "2020-08-05 01:42:25", + "from": "Monsieur Dupont ", + "header": "Re: Votre adh\u00e9sion \u00e0 la Soci\u00e9t\u00e9 Imaginaire", + "label": "adhesion", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 15, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour,\r\n Voici ci joint mon bulletin de salaire comme demand\u00e9.\r\n Cordialement\r\n Monsieur Dupont\r\n\r\n Le ven. 25 mai 2018 \u00e0 11:23, a \u00e9crit\u00a0:\r\n\r\n Cher(e) client,\r\n\r\n Sauf erreur de notre part, nous n'avons pas re\u00e7u votre justificatif d\u2019emploi.\r\n Nous vous rappelons qu'il nous est indispensable pour valider votre\r\n adh\u00e9sion aupr\u00e8s de notre Societe Imaginaire.\r\n\r\n Il peut s\u2019agir de votre dernier bulletin de paye ou d\u2019une attestation de\r\n votre employeur.\r\n\r\n Votre conseiller Societe Imaginaire\u00a0\u00a0(See attached file: pj.jpg)", + "date": "2019-09-18 05:58:43", + "from": "Monsieur Dupont ", + "header": "Bulletin de salaire", + "label": "adhesion", + "sexe": "M", + "to": "conseiller@societeimaginaire.fr" + }, + { + "age": 22, + "attachment": "[]", + "attachments": [], + "body": " Madame, Monsieur,\r\n\r\n Je vous avais contact\u00e9s car j'avais pour\r\n projet d'agrandir ma maison. J'avais re\u00e7u un devis pour lequel je n'avais\r\n pas donn\u00e9 suite, les travaux n'\u00e9tant pas encore r\u00e9alis\u00e9s.\r\n\r\n Le projet a maintenant \u00e9t\u00e9 port\u00e9 \u00e0 son terme et je voudrais donc revoir\r\n votre offre si possible.\r\n\r\n Je d\u00e9sire garder le m\u00eame type de contrat.\r\n Je suis \u00e0 votre disposition pour tout renseignement compl\u00e9mentaires.\r\n\r\n Sinc\u00e8res salutations\r\n Monsieur Dupont\r\n ", + "date": "2021-07-22 16:10:06", + "from": "Monsieur Dupont ", + "header": "Modification et extension de ma maison", + "label": "habitation", + "sexe": "F", + "to": "conseiller@societeimaginaire.fr" + }, + { + "age": 28, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 31/05/2018 12:24 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t30/05/2018 15:56\r\n Objet :\tAssurance d'un nouveau logement\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n J'emm\u00e9nage dans un nouveau studio le Vendredi 2 Juin 2019\r\n mais je n'arrive pas \u00e0 souscrire d'assurance via la plateforme en ligne.\r\n\r\n C'est\u00a0pourquoi je vous envoie\u00a0l'\u00e9tat des\u00a0lieux de sortie du deuxi\u00e8me studio\r\n afin de\u00a0pouvoir l'enlever du contrat et ainsi pouvoir assurer le\u00a0nouveau\r\n logement.\r\n\r\n\r\n Cordialement,\r\n\r\n\r\n Monsieur Dupont.\r\n\r\n\r\n T\u00e9l : 07.00.00.00.00\r\n\r\n\r\n Mail : monsieurdupont@extensiona.com (See attached file: pj.pdf)\r\n ", + "date": "2020-04-15 07:44:04", + "from": "Conseiller ", + "header": "Tr : Assurance d'un nouveau logement", + "label": "resiliation", + "sexe": "F", + "to": "conseiller1@societeimaginaire.fr" + }, + { + "age": 39, + "attachment": "[\"image001.png\"]", + "attachments": [ + "image001.png" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n\r\n\r\n Je me permets de venir vers vous car depuis le d\u00e9but de l\u2019ann\u00e9e, nous avons\r\n des difficult\u00e9s pour assurer nos v\u00e9hicules.\r\n\r\n Ces derniers jours, vous nous avez fait parvenir le d\u00e9tail des\r\n d\u00e9nominations et des niveau d\u2019assurance de chaque v\u00e9hicule.\r\n\r\n Merci d\u2019effectuer ces changements\r\n\r\n\r\n\r\n Dans l\u2019attente de votre retour\r\n\r\n\r\n\r\n Cordialement\r\n\r\n\r\n\r\n Monsieur Dupont\r\n\r\n \u00a0(See attached file: image001.png)(See attached file: Assurances v\u00e9hicules2018.pdf)", + "date": "2021-08-13 16:01:02", + "from": "Monsieur Dupont ", + "header": "Assurance v\u00e9hicules", + "label": "vehicule", + "sexe": "M", + "to": "demandes4@societeimaginaire.fr" + }, + { + "age": 38, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Voici la copie du virement effectuer \u00e0 ce jour.\r\n Serait-il possible d\u2019obtenir une attestation d\u2019assurance?\r\n\r\n Cordialement,\r\n\r\n Monsieur Dupont\r\n 06 00 00 00 00\r\n (See attached file: pj.pdf)\r\n\r\n > Le 23 mai 2018 \u00e0 10:17, conseiller@Societeimaginaire.fr a \u00e9crit :\r\n >\r\n > Bonjour,\r\n >\r\n > Nous faisons suite \u00e0 votre dernier courriel.\r\n >\r\n > A ce jour, le montant \u00e0 devoir, permettant de solder votre compte\r\n cotisation, est de 000.00euros.\r\n >\r\n > Nous restons \u00e0 votre disposition pour tous renseignements\r\n compl\u00e9mentaires.\r\n >\r\n > Meilleures salutations,\r\n >\r\n > Conseiller.\r\n > ", + "date": "2021-06-18 04:20:07", + "from": "Monsieur Dupont ", + "header": "Re: Virement", + "label": "autres", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 30, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n BONJOUR\r\n\r\n\r\n\r\n CI-JOINT PRET VEHICULE\r\n\r\n CORDIALEMENT\r\n\r\n GARAGE\r\n\r\n\r\n\r\n\r\n\r\n\r\n > Message du 31/05/18 08:51\r\n > De : monsieurdupont@extensionb.com\r\n > A : \"GARAGE\" \r\n > Copie \u00e0 :\r\n > Objet : Attached Image\r\n >\r\n >(See attached file: pj.pdf)", + "date": "2022-08-30 03:14:42", + "from": "Monsieur Dupont ", + "header": "Pr\u00eat v\u00e9hicule", + "label": "vehicule", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 63, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 31/05/2018 11:59 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t30/05/2018 10:12\r\n Objet :\tRe: Demande\r\n\r\n\r\n\r\n Bonjour Monsieur,\r\n Pouvez-vous m'appeler lundi prochain en fin d'apr\u00e8s-midi ?\r\n Cordialement,\r\n Monsieur Dupont\r\n\r\n Le mer. 31 mai 2018 \u00e0 09:58, a \u00e9crit\u00a0:\r\n Bonjour Mr Dupont,\r\n\r\n Je fais suite \u00e0 mon message t\u00e9l\u00e9phonique de ce jour.\r\n Comme pr\u00e9cis\u00e9, je vous adresse ce mail avec mes coordonn\u00e9es pour que\r\n vous puissiez en retour me faire part de vos disponibilit\u00e9s.\r\n Pour rappel, je souhaite vous rencontrer pour faire un bilan\r\n g\u00e9n\u00e9ral de vos contrats.\r\n\r\n Dans l'attente de vous lire, je vous souhaite une bonne journ\u00e9e.\r\n\r\n Bien cordialement.\r\n\r\n Conseiller.\r\n conseiller@Societeimaginaire.fr\r\n Conseiller Societe Imaginaire.\r\n ", + "date": "2019-06-12 03:40:19", + "from": "conseiller@societeimaginaire.fr", + "header": "Bilan g\u00e9n\u00e9ral contrats", + "label": "bilan", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 23, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique de ce jour,\r\n je joins le num\u00e9ro de t\u00e9l\u00e9phone de mon fils\r\n\r\n Monsieur Dupont\r\n \u00a0tel : 06 00 00 00 00.\r\n\r\n Monsieur Dupont\r\n\r\n Cordialement\r\n ", + "date": "2021-10-04 18:17:05", + "from": "monsieurdupont@extensionf.net", + "header": "Num\u00e9ro de t\u00e9l\u00e9phone", + "label": "modification", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 28, + "attachment": "[\"image1.jpeg\",\"image2.jpeg\",\"image3.jpeg\"]", + "attachments": [ + "image1.jpeg", + "image2.jpeg", + "image3.jpeg" + ], + "body": "\r\n\r\n\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 16:41\r\n Objet :\tRe: Vos documents demand\u00e9s\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Serait-il possible de rebasculer mon v\u00e9hicule d\u00e8s\r\n aujourd'hui en assurance parfaite?\r\n\r\n Voici des photos du jour qui attestent de son parfait \u00e9tat.\r\n\r\n D'avance merci de votre confirmation\r\n\r\n Monsieur Dupont\r\n Envoy\u00e9 de mon iPhone\r\n (See attached file: image1.jpeg)\r\n\r\n\r\n (See attached file: image2.jpeg)\r\n\r\n\r\n (See attached file: image3.jpeg)\r\n\r\n\r\n > Le 21 janv. 2018 \u00e0 20:38, Monsieur Dupont a \u00e9crit :\r\n >\r\n > Bonjour Madame,\r\n >\r\n > Je vous confirme que je souhaite basculer mon contrat actuel sous la\r\n nouvelle forme d\u00e8s \u00e0 pr\u00e9sent.\r\n >\r\n > D'avance merci de votre confirmation.\r\n >\r\n > Bien cordialement,\r\n > Monsieur Dupont\r\n >\r\n > Envoy\u00e9 de mon iPad\r\n >\r\n >> Le 30 nov. 2017 \u00e0 10:06, conseiller@Societeimaginaire.fr a \u00e9crit :\r\n >>\r\n >> Bonjour,\r\n >>\r\n >> Veuillez trouver ci-joint les documents que vous nous avez demand\u00e9s :\r\n >> - Devis1\r\n >> - Devis2\r\n >>\r\n >> La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n >>\r\n >> Bien \u00e0 vous.\r\n >>\r\n >> La SocSociete Imaginaire.\r\n >> \r\n >> ", + "date": "2022-06-14 08:35:02", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Re: Vos documents demand\u00e9s", + "label": "vehicule", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 86, + "attachment": "[\"Relev\u00e9 d'informations.pdf\"]", + "attachments": [ + "Relev\u00e9 d'informations.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n --\r\n Monsieur Dupont (See attached file: Relev\u00e9 d'informations.pdf)", + "date": "2022-09-29 08:59:04", + "from": "Monsieur Dupont ", + "header": "Demande", + "label": "vehicule", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 36, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 09:20 -----\r\n\r\n De : \u00a0 \u00a0 \u00a0 \u00a0Association Dupont \r\n A : \u00a0 \u00a0 \u00a0 \u00a0representant@Societeimaginaire.fr\r\n Date : \u00a0 \u00a0 \u00a0 \u00a001/06/2018 16:03\r\n Objet : \u00a0 \u00a0 \u00a0 \u00a0Re: Correspondance de La Societe Imaginaire\r\n\r\n\r\n\r\n\r\n Bonjour, merci de votre retour rapide.\r\n\r\n Concernent l'attestation du 22 septembre, serait-il possible de faire\r\n appara\u00eetre l'adresse compl\u00e8te de l'\u00e9v\u00e9nement.\r\n\r\n En effet, s'agissant d'un domaine priv\u00e9, les propri\u00e9taires nous ont fait\r\n cette demande.\r\n\r\n Vous remerciant par avance.\r\n\r\n Cordialement.\r\n\r\n Monsieur Dupont,\r\n Association LOI 1901.\r\n T\u00e9l. perso : 06.00.00.00.00\r\n\r\n ", + "date": "2020-06-04 11:29:57", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : attestation", + "label": "habitation", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 88, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n Bonjour Madame,\r\n\r\n Je vous remercie pour l'attestation demand\u00e9e par t\u00e9l\u00e9phone.\r\n je vous rappelle que je suis la propri\u00e9taire de ce v\u00e9hicule et que cette\r\n attestation est \u00e0 destination de mon travail.\r\n\r\n Cordialement\r\n\r\n Madame Dupont\r\n\r\n\r\n\r\n PS: il y a quelques temps j'ai sollicit\u00e9 vos services pour une attestation\r\n et je n'ai eu aucune difficult\u00e9 pour l'obtenir\r\n\r\n\r\n\r\n > Message du 28/05/18 15:22\r\n > De : conseiller@Societeimaginaire.fr\r\n > A : madamedupont@extensionb.com\r\n > Copie \u00e0 :\r\n > Objet : Envoi d'un document de la Societe Imaginaire\r\n >\r\n > Bonjour.\r\n\r\n Merci de bien vouloir prendre connaissance du document ci-joint :\r\n 1 - Attestation d'assurance\r\n\r\n Cordialement.\r\n\r\n La Societe Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.(See attached\r\n file: pj.pdf)", + "date": "2020-01-10 19:37:35", + "from": "Monsieur Dupont ", + "header": "demande attestation - Envoi d'un document de la Mutuelle Imaginaire", + "label": "resiliation", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 57, + "attachment": "[\"Nume\u0301riser.pdf\"]", + "attachments": [ + "Nume\u0301riser.pdf" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour madame,\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique de ce jour, je vous joins un Scan de\r\n mon certificat de cession de mon v\u00e9hicule nautique assur\u00e9 chez vous.\r\n Je vous remercie de la clart\u00e9 de vos explications et reste \u00e0 votre\r\n disposition.\r\n Bien \u00e0 vous\r\n Monsieur Dupont(See attached file: Nume\u0301riser.pdf)\r\n ", + "date": "2020-01-19 11:24:23", + "from": "Monsieur Dupont ", + "header": "certificat de cession de v\u00e9hicule", + "label": "resiliation", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 82, + "attachment": "[\"pj.jpg\"]", + "attachments": [ + "pj.jpg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour,\r\n Vous trouverez ci-joint le certificat de cession attestant de la vente du\r\n v\u00e9hicule pour r\u00e9silier l'assurance.\r\n\r\n Cordialement\r\n Monsieur Dupont(See attached file: pj.jpg)", + "date": "2022-04-15 12:56:46", + "from": "Monsieur Dupont ", + "header": "certificat de cession ", + "label": "vehicule", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 60, + "attachment": "[\"Screenshot.png\",\"Screenshot2.png\",\"Screenshot3.png\"]", + "attachments": [ + "Screenshot.png", + "Screenshot2.png", + "Screenshot3.png" + ], + "body": "\r\n\r\n\r\n\r\n Vous trouverez en pi\u00e8ce jointe le relev\u00e9 de situation ainsi que le bulletin\r\n de paie demand\u00e9 et un rib.\r\n Mon Adresse compl\u00e8te est l\u00e0 suivante : 00 rue imaginaire 33000 Villeimaginaire\r\n \u00a0(See attached file: Screenshot.png)(See attached\r\n file: Screenshot2.png)(See attached file:\r\n Screenshot3.png)", + "date": "2019-02-08 13:05:09", + "from": "Monsieur Dupont ", + "header": "Assurance auto", + "label": "adhesion", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 21, + "attachment": "[\"ACTE VENTE 1.pdf\",\"ACTE VENTE 2.pdf\"]", + "attachments": [ + "ACTE VENTE 1.pdf", + "ACTE VENTE 2.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n Le lundi 4 juin 2018 \u00e0 13:59:14 UTC+2, conseiller@Societeimaginaire.fr\r\n a \u00e9crit :\r\n\r\n\r\n Ch\u00e8re client,\r\n\r\n Nous avons bien re\u00e7u votre email qui a retenu toute notre attention.\r\n\r\n Le document que vous nous avez envoy\u00e9 n'est pas exploitable. Pourriez-vous\r\n nous le renvoyer dans une meilleure r\u00e9solution ?\r\n\r\n Bien Cordialement.\r\n\r\n Au nom de l'\u00e9quipe Societe Imaginaire\r\n\r\n logo Societe Imaginaire\r\n |------------------------+---------------------------------------|\r\n | |Garanti sans virus. www.avast.com |\r\n |------------------------+---------------------------------------|\r\n\r\n\r\n\r\n (See attached file: ACTE VENTE 1.pdf)(See attached file: ACTE VENTE 2.pdf)", + "date": "2020-12-04 05:01:34", + "from": "Monsieur Dupont ", + "header": "Re: Suppression assurance logement", + "label": "succession", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 58, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Permettez-moi de vous signaler un changement d'adresse me concernant.\r\n\r\n Voici ma nouvelle adresse :\r\n\r\n 00 rue du Nomderue\r\n 75000 Paris\r\n\r\n Merci.\r\n\r\n Bien \u00e0 vous,\r\n\r\n Monsieur Dupont\r\n ", + "date": "2022-06-01 14:14:07", + "from": "Monsieur Dupont ", + "header": "changement d'adresse", + "label": "habitation", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 64, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 10:29 -----\r\n\r\n De :\tmonsieurdupont@extensionf.net\r\n A :\tSociete Imaginaire \r\n Date :\t02/06/2018 11:07\r\n Objet :\tFwd: Changement de vehicule\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Pourriez vous faire suite au mail suivant en date du 22 Mai\r\n 2017.\r\n\r\n\r\n De: monsieurdupont@extensionf.net\r\n \u00c0: \"Societe Imaginaire\" \r\n Envoy\u00e9: Mardi 22 Mai 2017 10:15:25\r\n Objet: Changement de vehicule\r\n\r\n Bonjour,\r\n\r\n Merci de bien vouloir transf\u00e9rer l'assurance du vehicuel sur le\r\n Scooter,\r\n dont les r\u00e9f\u00e9rences sont sur la facture fourni en pi\u00e8ce-jointe.\r\n\r\n Me faire parvenir l'attestation.\r\n\r\n Merci.\r\n\r\n MR Dupont,\r\n le 22 mai 2017\r\n\r\n Cordialement\r\n (See attached file: Facture.jpg)", + "date": "2019-04-21 20:43:58", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Fwd: Changement de Scooter !", + "label": "vehicule", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 20, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 10:38 -----\r\n\r\n De :\tmonsieurdupont@extensiona.com\r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 16:52\r\n Objet :\tR\u00e9clamations\r\n\r\n\r\n\r\n Num\u00e9ro T\u00e9l. : 0600000000\r\n E-mail : monsieurdupont@extensiona.com\r\n M. DUPONT\r\n Adresse : 94000\r\n Objet de la demande : R\u00e9clamations\r\n\r\n Bonjour, j'aurais besoin de l'attestation\r\n pour ma declaration de revenus 2018.\r\n Merci d'avance\r\n ", + "date": "2022-02-26 14:07:52", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : R\u00e9clamations ", + "label": "habitation", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 51, + "attachment": "[\"pic.jpg\"]", + "attachments": [ + "pic.jpg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour ,\r\n\r\n Veuillez recevoir le certificat de cession de mon v\u00e9hicule afin que vous\r\n puissiez effectuer la r\u00e9siliation de mon contrat.\r\n Je reviendrai vers vous afin d\u2019assurer mon nouveau v\u00e9hicule bient\u00f4t.\r\n\r\n Bien \u00e0 vous ,\r\n\r\n Mr DUPONT\r\n\r\n\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n\r\n Envoy\u00e9 de mon iPad", + "date": "2021-09-06 07:28:42", + "from": "Monsieur Dupont ", + "header": "R\u00e9siliation contrat voiture ", + "label": "resiliation", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 77, + "attachment": "[\"Relev\u00e9 d'informations.pdf\"]", + "attachments": [ + "Relev\u00e9 d'informations.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n --\r\n Monsieur Dupont\r\n 02 chemin imaginaire\r\n 84000\r\n monsieurdupont@extensiona.com\r\n 06.00.00.00.00(See attached file: Relev\u00e9 d'informations.pdf)", + "date": "2020-06-25 15:26:02", + "from": "Monsieur Dupont ", + "header": "Demande", + "label": "vehicule", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 40, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour, je vous remercie de ce retour. Pouvez vous r\u00e9pondre aux\r\n diff\u00e9rentes questions soulev\u00e9es dans mon mail afin que je prenne ma\r\n d\u00e9cision. Bien cordialement. Mr Dupont.\r\n\r\n Obtenez Outlook pour iOS\r\n\r\n From: conseiller@Societeimaginaire.fr \r\n Sent: Monday, June 4, 2018 1:56:43 PM\r\n To: monsieurdupont@hotmail.com\r\n Subject: : Votre devis v\u00e9hicule\r\n\r\n Bonjour,\r\n Veuillez trouver ci-joint le devis que vous nous avez demand\u00e9.\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n Cordialement.\r\n La Societe Imaginaire\r\n ", + "date": "2022-10-02 04:04:31", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Re: Interrogations", + "label": "vehicule", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 86, + "attachment": "[\"RIB.pdf\"]", + "attachments": [ + "RIB.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n De :\tmonsieurdupond \r\n A :\t\"conseiller@Societeimaginaire.fr\" \r\n Date :\t04/06/2018 13:47\r\n Objet :\tRE: Correspondance de La Societe Imaginaire\r\n\r\n\r\n\r\n Bonjour\r\n Ci-joint le rib du compte comme demand\u00e9\r\n Bien \u00e0 vous\r\n\r\n Monsieur DUPONT\r\n (Embedded image moved to file: pic.jpg)\r\n +33(0) 6 00 00 00 00\r\n\r\n De : conseiller@Societeimaginaire.fr [mailto:conseiller@Societeimaginaire.fr]\r\n Envoy\u00e9 : lundi 4 juin 2018 12:23\r\n \u00c0 : Monsieur Dupont \r\n Objet : Correspondance de La Societe Imaginaire\r\n\r\n Bonjour.\r\n\r\n Veuillez prendre connaissance des documents ci-joints :\r\n 1 - Courrier\r\n 2 - Envoi Devis habitation\r\n\r\n Meilleurs sentiments.\r\n\r\n La Societe Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.(See attached\r\n file: RIB.pdf)", + "date": "2019-02-05 15:53:44", + "from": "conseiller@societeimaginaire.fr", + "header": "RIB", + "label": "regularisation", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 52, + "attachment": "[\"image-a7c10.png\",\"Releve_Information.pdf\"]", + "attachments": [ + "image-a7c10.png", + "Releve_Information.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Veuillez trouver ci-joint le RI de Mme Dupont,\r\n\r\n Vous souhaitant bonne r\u00e9ception,\r\n\r\n\r\n Bien cordialement.\r\n\r\n\r\n (Embedded image moved to file: pic30109.jpg)\r\n [IMAGE]\r\n\r\n Le 2018-06-04 20:37, Mr DUPONT a \u00e9crit\u00a0:\r\n Bonjour Monsieur,\r\n\r\n Pourriez-vous, s'il vous plait, faire parvenir mon relev\u00e9 d'informations \u00e0\r\n la Societe Imaginaire, par retour de mail, en gardant la r\u00e9f\u00e9rence cit\u00e9e en objet qui\r\n vous plait?\r\n\r\n Je vous remercie,\r\n\r\n Monsieur Dupont\r\n\r\n -------- Message d'origine --------\r\n De : conseiller@Societeimaginaire.fr\r\n Date : 04/06/2018 14:30 (GMT+01:00)\r\n \u00c0 : monsieurdupont@polytechnique.edu\r\n Objet : Confirmation de votre assurance v\u00e9hicule\u00a0\r\n\r\n\r\n Cher(e) client,\r\n\r\n Nous vous confirmons l'enregistrement de l'assurance de votre v\u00e9hicule en\r\n en date du 01/05/2017 suite \u00e0 la demande de r\u00e9siliation que nous avons\r\n effectu\u00e9e aupr\u00e8s de la Societe Concurrente.\r\n\r\n Toutefois, ces derniers ne nous ont pas envoy\u00e9 votre Relev\u00e9 d'Information\r\n donc il va falloir que vous leur demandiez pour ensuite nous le transmettre\r\n par mail \u00e0 l'adresse conseiller@Societeimaginaire.fr\r\n En attendant ce document, nous vous\r\n assurons quand m\u00eame, mais de mani\u00e8re provisoire.\r\n\r\n De plus, il faudra que l'on voit ensemble quel mode de paiement vous souhaitez.\r\n Pour cela, le plus simple et\r\n de nous contacter au 09.00.00.00.00 ou de pr\u00e9voir un recontact via notre\r\n site Societeimaginaire.fr\r\n\r\n\r\n Bien Cordialement.\r\n\r\n Au nom de l'\u00e9quipe Societe Imaginaire\r\n\r\n logo Societe Imaginaire\r\n\r\n (See attached file: image-a7c10.png)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)", + "date": "2020-09-29 13:14:36", + "from": "Conseiller ", + "header": "Re: Confirmation de votre assurance v\u00e9hicule", + "label": "resiliation", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 72, + "attachment": "[\"rib.pdf\"]", + "attachments": [ + "rib.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique, veuillez trouver ici mon relev\u00e9\r\n d'identit\u00e9 bancaire.\r\n\r\n Au nom de Monsieur Dupont\r\n\r\n Bien cordialement\r\n\r\n --\r\n Monsieur Dupont\r\n 32 avenue Imaginaire\r\n T\u00e9l: +33 (0)600 00 00 00\r\n ", + "date": "2022-06-09 21:54:35", + "from": "Monsieur Dupont ", + "header": "RIB", + "label": "compte", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 39, + "attachment": "[\"pic.jpg\"]", + "attachments": [ + "pic.jpg" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Nous souhaitons d\u00e9clar\u00e9s deux sinistres concernant nos deux v\u00e9hicules.\r\n\r\n 1) Vehicule1 :\r\n\r\n Le v\u00e9hicule \u00e9tait stationnement sur le parking et il\r\n pr\u00e9sente une trace sur l'aile arri\u00e8re droite et sur le\r\n pare-choc.\r\n\r\n 2) Vehicule2 :\r\n\r\n Le conducteur s'est gar\u00e9 sur un parking d'entreprise.\r\n Il a cogn\u00e9 avec le pneu avant droit.\r\n\r\n\r\n En vous souhaitant bonne r\u00e9ception de ces \u00e9l\u00e9ments,\r\n\r\n Bien cordialement,\r\n\r\n Monsieur Dupont\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n (Embedded image moved to file: pic.jpg)", + "date": "2019-12-20 08:02:52", + "from": "Monsieur Dupont ", + "header": "sinistres", + "label": "sinistres", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 63, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n Nous restons \u00e0 votre disposition\r\n\r\n Cordialement\r\n\r\n Societe Imaginaire\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 05/06/2018 10:05 -----\r\n\r\n De :\tmonsieurdupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 10:26\r\n Objet :\tModification de contrat\r\n\r\n\r\n\r\n Bonjour Madame,\r\n\r\n Notre fils, conducteur principal du v\u00e9hicule \u00e9tant\r\n en stage puis \u00e0 l'\u00e9tranger pour une bonne partie de l'an\r\n prochain, son v\u00e9hicule est d\u00e9sormais \u00e0 la maison et il ne sera amen\u00e9 \u00e0\r\n l'utiliser que rarement.\r\n Pour cette raison, nous souhaiterions modifier son contrat d'assurance\r\n d\u00e8s que possible.\r\n\r\n Vous remerciant par avance de votre concours, nous restons \u00e0 votre\r\n disposition pour toute information compl\u00e9mentaire.\r\n\r\n Cordialement\r\n\r\n Monsieur Dupont\r\n ", + "date": "2020-02-01 01:37:56", + "from": "conseiller@societeimaginaire.fr", + "header": "contrat vehicule", + "label": "vehicule", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 79, + "attachment": "[\"cession.pdf\"]", + "attachments": [ + "cession.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n De :\tMonsieur Dupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Cc\u00a0:\tMonsieur Dupont , Madame Dupont\r\n \r\n Date :\t04/06/2018 18:08\r\n Objet :\tr\u00e9siliation couverture v\u00e9hicule suite cession\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Le v\u00e9hicule a \u00e9t\u00e9 c\u00e9d\u00e9 le 2 avril 2018.\r\n\r\n Merci, ci joint le document de cession scann\u00e9\r\n\r\n Cordialement,\r\n\r\n\r\n Monsieur Dupont - Orange\r\n monsieurdupont@extensionj.fr\r\n\r\n responsable : Monsieur Dupont\r\n 06 00 00 00 00\r\n monsieurdupont@extensionj.fr\r\n\r\n\r\n\r\n\r\n (See attached file: cession.pdf)", + "date": "2019-05-30 15:47:18", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : r\u00e9siliation couverture v\u00e9hicule suite cession", + "label": "vehicule", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 18, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t05/06/2018 13:34\r\n Objet :\tR\u00e9ponse au dossier de pr\u00eat\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n Vous nous avez adress\u00e9 un dossier de pr\u00eat concernant votre client.\r\n\r\n Nous vous informons de notre accord.\r\n\r\n Nous restons \u00e0 votre enti\u00e8re disposition.\r\n\r\n Bien cordialement.\r\n\r\n Conseiller\r\n\r\n Societe Imaginaire\r\n\r\n T\u00e9l : 05 00 00 00 00\r\n Fax : 05 00 00 00 00\r\n E-mail: conseiller@societeimaginaire.fr\r\n\r\n\r\n", + "date": "2019-06-05 21:18:07", + "from": "conseiller@societeimaginaire.fr", + "header": "dossier de pr\u00eat ", + "label": "vehicule", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 45, + "attachment": "[\"doc.pdf\"]", + "attachments": [ + "doc.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n\r\n\r\n Je vous prie de trouver ci-joint une d\u00e9claration de sinistre,\r\n relative au cas de Monsieur Dupont, survenu le lundi 6 Mai.\r\n\r\n\r\n\r\n Avec nos cordiales salutations.\r\n\r\n\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n \u00a0(See attached file: doc.pdf)", + "date": "2020-09-29 17:53:01", + "from": "Monsieur Dupont ", + "header": "d\u00e9clarations de sinistre corporel et mat\u00e9riel", + "label": "habitation", + "sexe": "M", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 48, + "attachment": "[\"Attestationemployeur.pdf\",\"Relev\u00e9_d'information.pdf\"]", + "attachments": [ + "Attestationemployeur.pdf", + "Relev\u00e9_d'information.pdf" + ], + "body": "\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n Je vous prie de trouver ci-joints les justificatifs demand\u00e9s pour la\r\n souscription de mon assurance auto.\r\n\r\n Bien cordialement,\r\n\r\n Monsieur Dupont\r\n (See attached file: Attestationemployeur.pdf)\r\n(See attached file: Relev\u00e9_d'information.pdf)", + "date": "2022-01-22 22:27:05", + "from": "Monsieur Dupont ", + "header": "Demande d'assurance auto", + "label": "resiliation", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 63, + "attachment": "[\"IMG.PNG\"]", + "attachments": [ + "IMG.PNG" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Envoy\u00e9 de mon iPhone\r\n\r\n D\u00e9but du message transf\u00e9r\u00e9\u00a0:\r\n\r\n Exp\u00e9diteur: monsieurduponte@extensionh.fr\r\n Date: 7 juin 2018 \u00e0 16:34:25 UTC+2\r\n Destinataire: conseiller@Societeimaginaire.fr\r\n Objet: Demande\r\n\r\n\r\n\r\n Bonjour, suite \u00e0 notre conversation t\u00e9l\u00e9phonique voici la preuve\r\n de r\u00e8glement afin que vous puissiez \u00e9tudier ma demande de r\u00e9inscription.\r\n\r\n Cordialement,\r\n Monsieur Dupont\r\n [IMAGE]\r\n\r\n\r\n Envoy\u00e9 de mon iPhone(See attached file: IMG.PNG)", + "date": "2021-10-16 21:12:16", + "from": "Monsieur Dupont ", + "header": "preuve r\u00e9glement ", + "label": "compte", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 19, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour\r\n\r\n Je m'aper\u00e7ois ce jour que j'ai \u00e9t\u00e9 pr\u00e9l\u00e8v\u00e9e plusieurs fois:\r\n\r\n 1 fois sur mon compte bancaire BANQUE1\r\n 1 fois sur mon compte BANQUE2\r\n\r\n Je paye donc 2 fois l'assurance v\u00e9hicule et habitation\r\n\r\n Pourriez vous me rembourser la somme n\u00e9cessaire sur mon compte BANQUE1\r\n le plus rapidement possible.\r\n\r\n En vous remerciant par avance de votre r\u00e9ponse par retour de mail.\r\n Bien cordialement\r\n Monsieur Dupont", + "date": "2022-05-02 06:52:20", + "from": "Monsieur Dupont ", + "header": "pr\u00e9l\u00e8vements bancaires", + "label": "modification", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 39, + "attachment": "[\"Liste.docx\",\"PV.pdf\",\"statuts.pdf\",\"RIB.jpeg\"]", + "attachments": [ + "Liste.docx", + "PV.pdf", + "statuts.pdf", + "RIB.jpeg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour,\r\n Voici, ci-joins les documents demand\u00e9s pr\u00e9c\u00e9demment.\r\n\r\n Je vous remercie de la rapidit\u00e9 de vos service,\r\n Bien \u00e0 vous,\r\n Monsieur Dupont(See attached file: Liste.docx)(See attached file:\r\n PV.pdf)(See attached file: statuts.pdf)(See attached file:\r\n RIB.jpeg)", + "date": "2021-01-13 05:59:12", + "from": "Monsieur Dupont ", + "header": "documents ", + "label": "habitation", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 31, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Centre Imaginaire\r\n\r\n 00 Rue de Nomderue\r\n\r\n 23000 VILLE IMAGINAIRE\r\n\r\n\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n\r\n Merci de bien vouloir me faire parvenir une attestation d'assurance\r\n concernant la mise \u00e0 disposition de la salle pour 100 personnes.\r\n\r\n objet de la manifestation : r\u00e9union\r\n\r\n cordialement.\r\n\r\n\r\n Monsieur Dupont\r\n\r\n ", + "date": "2021-01-04 02:42:06", + "from": "Monsieur Dupont ", + "header": "demande d'attestation d'asurance", + "label": "habitation", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + }, + { + "age": 18, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n (Embedded image moved to file: pic.jpg) Rejoignez-nous sur notre page\r\n Facebook\r\n\r\n (Embedded image moved to file: pic.gif)\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller/Societe Imaginaire le 07/06/2018 16:32 -----\r\n\r\n De :\tmonsieurdupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t07/06/2018 16:16\r\n Objet :\tRe : Message de votre conseill\u00e8re personnelle\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Merci de vos messages. La voiture est conduite par Monsieur Dupont.\r\n Nos deux voitures sont assur\u00e9es \u00e0 100%.\r\n\r\n Cordialement,\r\n\r\n Monsieur Dupont\r\n\r\n Le 07/06/18, \"conseiller@Societeimaginaire.fr\" a\r\n \u00e9crit :\r\n\r\n\r\n Ch\u00e8re client,\r\n\r\n Suite \u00e0 notre bilan du 01 mai dernier, je souhaitais savoir ce que vous\r\n avez d\u00e9cid\u00e9.\r\n\r\n\r\n Je devais vous \u00e9tablir un devis pour votre voiture donc si\r\n vous pouviez m'indiquer le conducteur principal, la date de permis de la voiture ainsi\r\n que le type de couverture que vous recherchez, je vous enverrai le devis rapidement.\r\n\r\n\r\n J'attend de vos nouvelles.\r\n\r\n Bien Cordialement.\r\n\r\n Conseiller\r\n D\u00e9l\u00e9gation Conseil\r\n conseillerh@Societeimaginaire.fr\r\n\r\n ", + "date": "2022-04-27 07:20:10", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Message de votre conseill\u00e8re personnelle", + "label": "vehicule", + "sexe": "F", + "to": "demandes@societeimaginaire.fr" + } +] diff --git a/melusine/data/emails_full.json b/melusine/data/emails_full.json new file mode 100644 index 0000000..65c3859 --- /dev/null +++ b/melusine/data/emails_full.json @@ -0,0 +1,5624 @@ +[ + { + "age": 35, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 24/05/2018 11:49 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Cc\u00a0:\tSociete@www.Societe.fr\r\n Date :\t24/05/2018 11:36\r\n Objet :\tDevis habitation\r\n\r\n\r\n\r\n Bonjour\r\n Je suis client chez vous\r\n Pouvez vous m \u00e9tablir un devis pour mon fils qui souhaite\r\n louer l\u2019appartement suivant :\r\n 25 rue du rueimaginaire 77000\r\n Merci\r\n Envoy\u00e9 de mon iPhone", + "body_tokens": [ + "je", + "suis", + "client", + "chez", + "vous", + "pouvez", + "vous", + "m", + "etablir", + "un", + "devis", + "pour", + "mon", + "fils", + "qui", + "souhaite", + "louer", + "l", + "appartement", + "suivant" + ], + "clean_header": "tr : devis habitation", + "date": "2019-03-27 17:34:12", + "flagged_header": "tr : devis habitation", + "flagged_text": "Je suis client chez vous\nPouvez vous m etablir un devis pour mon fils qui souhaite\nlouer l'appartement suivant :", + "from": "conseiller1@societeimaginaire.fr", + "header": "Tr : Devis habitation", + "header_tokens": [ + "tr", + "devis", + "habitation" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Je suis client chez vous" + ], + [ + "BODY", + "Pouvez vous m etablir un devis pour mon fils qui souhaite" + ], + [ + "BODY", + "louer l'appartement suivant :" + ], + [ + "SIGNATURE", + "25 rue du rueimaginaire 77000" + ], + [ + "THANKS", + "Merci" + ], + [ + "FOOTER", + "Envoye de mon iPhone" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je suis client chez vous\nPouvez vous m etablir un devis pour mon fils qui souhaite\nlouer l'appartement suivant :", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "devis", + "habitation", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "suis", + "client", + "chez", + "vous", + "pouvez", + "vous", + "m", + "etablir", + "un", + "devis", + "pour", + "mon", + "fils", + "qui", + "souhaite", + "louer", + "l", + "appartement", + "suivant" + ] + }, + { + "age": 32, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 25/05/2018 08:20 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t24/05/2018 19:37\r\n Objet :\tImmatriculation voiture\r\n\r\n\r\n\r\n Bonsoir madame,\r\n\r\n Je vous informe que la nouvelle immatriculation est enfin\r\n faite. Je vous remercie bien pour votre patience.\r\n Je vous prie de trouver donc la carte grise ainsi que la\r\n nouvelle immatriculation. Je vous demanderai de faire les changements\r\n n\u00e9cessaires concernant l\u2019assurance.\r\n Je vous remercie encore pour tout.\r\n Cordialement,\r\n Monsieur Dupont (See attached file: pj.pdf)", + "body_tokens": [ + "je", + "vous", + "informe", + "que", + "la", + "nouvelle", + "immatriculation", + "est", + "enfin", + "faite", + "je", + "vous", + "prie", + "de", + "trouver", + "donc", + "la", + "carte", + "grise", + "ainsi", + "que", + "la", + "nouvelle", + "immatriculation", + "je", + "vous", + "demanderai", + "de", + "faire", + "changements", + "necessaires", + "concernant", + "l", + "assurance" + ], + "clean_header": "tr : immatriculation voiture", + "date": "2022-04-28 00:56:52", + "flagged_header": "tr : immatriculation voiture", + "flagged_text": "Je vous informe que la nouvelle immatriculation est enfin\nfaite\nJe vous prie de trouver donc la carte grise ainsi que la\nnouvelle immatriculation\nJe vous demanderai de faire les changements\nnecessaires concernant l'assurance", + "from": "conseiller1@societeimaginaire.fr", + "header": "Tr : Immatriculation voiture", + "header_tokens": [ + "tr", + "immatriculation", + "voiture" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonsoir madame," + ], + [ + "BODY", + "Je vous informe que la nouvelle immatriculation est enfin" + ], + [ + "BODY", + "faite" + ], + [ + "THANKS", + "Je vous remercie bien pour votre patience" + ], + [ + "BODY", + "Je vous prie de trouver donc la carte grise ainsi que la" + ], + [ + "BODY", + "nouvelle immatriculation" + ], + [ + "BODY", + "Je vous demanderai de faire les changements" + ], + [ + "BODY", + "necessaires concernant l'assurance" + ], + [ + "THANKS", + "Je vous remercie encore pour tout" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "BODY", + "Monsieur Dupont (See attached file: pj.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Je vous informe que la nouvelle immatriculation est enfin\nfaite\nJe vous prie de trouver donc la carte grise ainsi que la\nnouvelle immatriculation\nJe vous demanderai de faire les changements\nnecessaires concernant l'assurance", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "immatriculation", + "voiture", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "informe", + "que", + "la", + "nouvelle", + "immatriculation", + "est", + "enfin", + "faite", + "je", + "vous", + "prie", + "de", + "trouver", + "donc", + "la", + "carte", + "grise", + "ainsi", + "que", + "la", + "nouvelle", + "immatriculation", + "je", + "vous", + "demanderai", + "de", + "faire", + "changements", + "necessaires", + "concernant", + "l", + "assurance" + ] + }, + { + "age": 66, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjours,\r\n\r\n Suite a notre conversation t\u00e9l\u00e9phonique de Mardi , pourriez vous me dire la\r\n somme que je vous dois afin d'\u00eatre en r\u00e9gularisation .\r\n\r\n Merci bonne journ\u00e9e\r\n\r\n Le mar. 22 mai 2018 \u00e0 10:20, a \u00e9crit\u00a0:\r\n Bonjour.\r\n\r\n Merci de bien vouloir prendre connaissance du document ci-joint :\r\n 1 - Relev\u00e9 d'identit\u00e9 postal (contrats)\r\n\r\n Cordialement.\r\n\r\n La Mututelle Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n ", + "body_tokens": [ + "suite", + "a", + "notre", + "conversation", + "telephonique", + "de", + "mardi", + "pourriez", + "vous", + "me", + "dire", + "la", + "somme", + "que", + "je", + "vous", + "dois", + "afin", + "d", + "etre", + "en", + "regularisation" + ], + "clean_header": "re: envoi d'un document de la societe imaginaire", + "date": "2021-12-12 01:50:29", + "flagged_header": "re: envoi d'un document de la societe imaginaire", + "flagged_text": "Suite a notre conversation telephonique de Mardi , pourriez vous me dire la\nsomme que je vous dois afin d'etre en regularisation", + "from": "Monsieur Dupont ", + "header": "Re: Envoi d'un document de la Soci\u00e9t\u00e9 Imaginaire", + "header_tokens": [ + "re", + "envoi", + "d", + "un", + "document", + "de", + "la", + "societe", + "imaginaire" + ], + "label": "compte", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjours," + ], + [ + "BODY", + "Suite a notre conversation telephonique de Mardi , pourriez vous me dire la" + ], + [ + "BODY", + "somme que je vous dois afin d'etre en regularisation" + ], + [ + "THANKS", + "Merci bonne journee" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Merci de bien vouloir prendre connaissance du document ci-joint :" + ], + [ + "SIGNATURE", + "1 - Releve d'identite postal (contrats)" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "SIGNATURE_NAME", + "La Mututelle Imaginaire" + ], + [ + "FOOTER", + "La visualisation des fichiers PDF necessite Adobe Reader" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Suite a notre conversation telephonique de Mardi , pourriez vous me dire la\nsomme que je vous dois afin d'etre en regularisation", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "re", + "envoi", + "d", + "un", + "document", + "de", + "la", + "societe", + "imaginaire", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "suite", + "a", + "notre", + "conversation", + "telephonique", + "de", + "mardi", + "pourriez", + "vous", + "me", + "dire", + "la", + "somme", + "que", + "je", + "vous", + "dois", + "afin", + "d", + "etre", + "en", + "regularisation" + ] + }, + { + "age": 50, + "attachment": "[\"fichedepaie.png\"]", + "attachments": [ + "fichedepaie.png" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n\r\n Je fais suite \u00e0\u00a0 votre mail. J'ai envoy\u00e9 mon bulletin de salaire\r\n par courrier mais il semblerait que vous ne l'ayez pas r\u00e9ceptionn\u00e9.\r\n Vous trouverez ci-joint mon bulletin de salaire.\r\n\r\n\r\n Bien cordialement,\r\n\r\n\r\n Monsieur Dupont.\r\n\r\n Le 16/05/2018 \u00e0 11:27, conseiller@Societeimaginaire.fr a \u00e9crit\u00a0:\r\n\r\n Cher(e) client,\r\n\r\n Sauf erreur de notre part, nous n'avons pas re\u00e7u votre justificatif\r\n d\u2019emploi.\r\n Nous vous rappelons qu'il nous est indispensable pour valider votre\r\n adh\u00e9sion aupr\u00e8s de notre Societe.\r\n\r\n Il peut s\u2019agir de votre dernier bulletin de paye ou d\u2019une attestation de\r\n votre employeur par exemple.\r\n\r\n Votre conseiller Societe Imaginaire\r\n |------------------------+---------------------------------------|\r\n | |Garanti sans virus. www.avast.com |\r\n |------------------------+---------------------------------------|\r\n\r\n\r\n ", + "body_tokens": [ + "je", + "fais", + "suite", + "a", + "votre", + "mail", + "j", + "ai", + "envoye", + "mon", + "bulletin", + "de", + "salaire", + "par", + "courrier", + "mais", + "il", + "semblerait", + "que", + "vous", + "ne", + "l", + "ayez", + "pas", + "receptionne", + "vous", + "trouverez", + "ci_joint", + "mon", + "bulletin", + "de", + "salaire" + ], + "clean_header": "re: votre adhesion a la societe imaginaire", + "date": "2020-08-05 01:42:25", + "flagged_header": "re: votre adhesion a la societe imaginaire", + "flagged_text": "Je fais suite a votre mail\nJ'ai envoye mon bulletin de salaire\npar courrier mais il semblerait que vous ne l'ayez pas receptionne\nVous trouverez ci_joint mon bulletin de salaire", + "from": "Monsieur Dupont ", + "header": "Re: Votre adh\u00e9sion \u00e0 la Soci\u00e9t\u00e9 Imaginaire", + "header_tokens": [ + "re", + "votre", + "adhesion", + "a", + "la", + "societe", + "imaginaire" + ], + "label": "adhesion", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Je fais suite a votre mail" + ], + [ + "BODY", + "J'ai envoye mon bulletin de salaire" + ], + [ + "BODY", + "par courrier mais il semblerait que vous ne l'ayez pas receptionne" + ], + [ + "BODY", + "Vous trouverez ci-joint mon bulletin de salaire" + ], + [ + "GREETINGS", + "Bien cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Cher(e) client," + ], + [ + "BODY", + "Sauf erreur de notre part, nous n'avons pas recu votre justificatif" + ], + [ + "BODY", + "d'emploi" + ], + [ + "BODY", + "Nous vous rappelons qu'il nous est indispensable pour valider votre" + ], + [ + "BODY", + "adhesion aupres de notre Societe" + ], + [ + "BODY", + "Il peut s'agir de votre dernier bulletin de paye ou d'une attestation de" + ], + [ + "BODY", + "votre employeur par exemple" + ], + [ + "BODY", + "Votre conseiller Societe Imaginaire" + ], + [ + "FOOTER", + "| |Garanti sans virus" + ], + [ + "FOOTER", + "www.avast.com |" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Je fais suite a votre mail\nJ'ai envoye mon bulletin de salaire\npar courrier mais il semblerait que vous ne l'ayez pas receptionne\nVous trouverez ci-joint mon bulletin de salaire", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "re", + "votre", + "adhesion", + "a", + "la", + "societe", + "imaginaire", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "fais", + "suite", + "a", + "votre", + "mail", + "j", + "ai", + "envoye", + "mon", + "bulletin", + "de", + "salaire", + "par", + "courrier", + "mais", + "il", + "semblerait", + "que", + "vous", + "ne", + "l", + "ayez", + "pas", + "receptionne", + "vous", + "trouverez", + "ci_joint", + "mon", + "bulletin", + "de", + "salaire" + ] + }, + { + "age": 15, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour,\r\n Voici ci joint mon bulletin de salaire comme demand\u00e9.\r\n Cordialement\r\n Monsieur Dupont\r\n\r\n Le ven. 25 mai 2018 \u00e0 11:23, a \u00e9crit\u00a0:\r\n\r\n Cher(e) client,\r\n\r\n Sauf erreur de notre part, nous n'avons pas re\u00e7u votre justificatif d\u2019emploi.\r\n Nous vous rappelons qu'il nous est indispensable pour valider votre\r\n adh\u00e9sion aupr\u00e8s de notre Societe Imaginaire.\r\n\r\n Il peut s\u2019agir de votre dernier bulletin de paye ou d\u2019une attestation de\r\n votre employeur.\r\n\r\n Votre conseiller Societe Imaginaire\u00a0\u00a0(See attached file: pj.jpg)", + "body_tokens": [ + "voici", + "ci_joint", + "mon", + "bulletin", + "de", + "salaire", + "comme", + "demande" + ], + "clean_header": "bulletin de salaire", + "date": "2019-09-18 05:58:43", + "flagged_header": "bulletin de salaire", + "flagged_text": "Voici ci_joint mon bulletin de salaire comme demande", + "from": "Monsieur Dupont ", + "header": "Bulletin de salaire", + "header_tokens": [ + "bulletin", + "de", + "salaire" + ], + "label": "adhesion", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Voici ci joint mon bulletin de salaire comme demande" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Cher(e) client," + ], + [ + "BODY", + "Sauf erreur de notre part, nous n'avons pas recu votre justificatif d'emploi" + ], + [ + "BODY", + "Nous vous rappelons qu'il nous est indispensable pour valider votre" + ], + [ + "BODY", + "adhesion aupres de notre Societe Imaginaire" + ], + [ + "BODY", + "Il peut s'agir de votre dernier bulletin de paye ou d'une attestation de" + ], + [ + "BODY", + "votre employeur" + ], + [ + "BODY", + "Votre conseiller Societe Imaginaire (See attached file: pj.jpg)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Voici ci joint mon bulletin de salaire comme demande", + "to": "conseiller@societeimaginaire.fr", + "tokens": [ + "bulletin", + "de", + "salaire", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "voici", + "ci_joint", + "mon", + "bulletin", + "de", + "salaire", + "comme", + "demande" + ] + }, + { + "age": 22, + "attachment": "[]", + "attachments": [], + "body": " Madame, Monsieur,\r\n\r\n Je vous avais contact\u00e9s car j'avais pour\r\n projet d'agrandir ma maison. J'avais re\u00e7u un devis pour lequel je n'avais\r\n pas donn\u00e9 suite, les travaux n'\u00e9tant pas encore r\u00e9alis\u00e9s.\r\n\r\n Le projet a maintenant \u00e9t\u00e9 port\u00e9 \u00e0 son terme et je voudrais donc revoir\r\n votre offre si possible.\r\n\r\n Je d\u00e9sire garder le m\u00eame type de contrat.\r\n Je suis \u00e0 votre disposition pour tout renseignement compl\u00e9mentaires.\r\n\r\n Sinc\u00e8res salutations\r\n Monsieur Dupont\r\n ", + "body_tokens": [ + "je", + "vous", + "avais", + "contactes", + "car", + "j", + "avais", + "pour", + "projet", + "d", + "agrandir", + "ma", + "maison", + "j", + "avais", + "recu", + "un", + "devis", + "pour", + "lequel", + "je", + "n", + "avais", + "pas", + "donne", + "suite", + "travaux", + "n", + "etant", + "pas", + "encore", + "realises", + "projet", + "a", + "maintenant", + "ete", + "porte", + "a", + "son", + "terme", + "et", + "je", + "voudrais", + "donc", + "revoir", + "votre", + "offre", + "si", + "possible", + "je", + "desire", + "garder", + "meme", + "type", + "de", + "contrat", + "je", + "suis", + "a", + "votre", + "disposition", + "pour", + "tout", + "renseignement", + "complementaires" + ], + "clean_header": "modification et extension de ma maison", + "date": "2021-07-22 16:10:06", + "flagged_header": "modification et extension de ma maison", + "flagged_text": "Je vous avais contactes car j'avais pour\nprojet d'agrandir ma maison\nJ'avais recu un devis pour lequel je n'avais\npas donne suite, les travaux n'etant pas encore realises\nLe projet a maintenant ete porte a son terme et je voudrais donc revoir\nvotre offre si possible\nJe desire garder le meme type de contrat\nJe suis a votre disposition pour tout renseignement complementaires", + "from": "Monsieur Dupont ", + "header": "Modification et extension de ma maison", + "header_tokens": [ + "modification", + "et", + "extension", + "de", + "ma", + "maison" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Je vous avais contactes car j'avais pour" + ], + [ + "BODY", + "projet d'agrandir ma maison" + ], + [ + "BODY", + "J'avais recu un devis pour lequel je n'avais" + ], + [ + "BODY", + "pas donne suite, les travaux n'etant pas encore realises" + ], + [ + "BODY", + "Le projet a maintenant ete porte a son terme et je voudrais donc revoir" + ], + [ + "BODY", + "votre offre si possible" + ], + [ + "BODY", + "Je desire garder le meme type de contrat" + ], + [ + "BODY", + "Je suis a votre disposition pour tout renseignement complementaires" + ], + [ + "GREETINGS", + "Sinceres salutations" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je vous avais contactes car j'avais pour\nprojet d'agrandir ma maison\nJ'avais recu un devis pour lequel je n'avais\npas donne suite, les travaux n'etant pas encore realises\nLe projet a maintenant ete porte a son terme et je voudrais donc revoir\nvotre offre si possible\nJe desire garder le meme type de contrat\nJe suis a votre disposition pour tout renseignement complementaires", + "to": "conseiller@societeimaginaire.fr", + "tokens": [ + "modification", + "et", + "extension", + "de", + "ma", + "maison", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "avais", + "contactes", + "car", + "j", + "avais", + "pour", + "projet", + "d", + "agrandir", + "ma", + "maison", + "j", + "avais", + "recu", + "un", + "devis", + "pour", + "lequel", + "je", + "n", + "avais", + "pas", + "donne", + "suite", + "travaux", + "n", + "etant", + "pas", + "encore", + "realises", + "projet", + "a", + "maintenant", + "ete", + "porte", + "a", + "son", + "terme", + "et", + "je", + "voudrais", + "donc", + "revoir", + "votre", + "offre", + "si", + "possible", + "je", + "desire", + "garder", + "meme", + "type", + "de", + "contrat", + "je", + "suis", + "a", + "votre", + "disposition", + "pour", + "tout", + "renseignement", + "complementaires" + ] + }, + { + "age": 28, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 31/05/2018 12:24 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t30/05/2018 15:56\r\n Objet :\tAssurance d'un nouveau logement\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n J'emm\u00e9nage dans un nouveau studio le Vendredi 2 Juin 2019\r\n mais je n'arrive pas \u00e0 souscrire d'assurance via la plateforme en ligne.\r\n\r\n C'est\u00a0pourquoi je vous envoie\u00a0l'\u00e9tat des\u00a0lieux de sortie du deuxi\u00e8me studio\r\n afin de\u00a0pouvoir l'enlever du contrat et ainsi pouvoir assurer le\u00a0nouveau\r\n logement.\r\n\r\n\r\n Cordialement,\r\n\r\n\r\n Monsieur Dupont.\r\n\r\n\r\n T\u00e9l : 07.00.00.00.00\r\n\r\n\r\n Mail : monsieurdupont@extensiona.com (See attached file: pj.pdf)\r\n ", + "body_tokens": [ + "j", + "emmenage", + "dans", + "un", + "nouveau", + "studio", + "vendredi", + "2", + "juin", + "2019", + "mais", + "je", + "n", + "arrive", + "pas", + "a", + "souscrire", + "d", + "assurance", + "via", + "la", + "plateforme", + "en", + "ligne", + "c", + "est", + "pourquoi", + "je", + "vous", + "envoie", + "l", + "etat", + "des", + "lieux", + "de", + "sortie", + "du", + "deuxieme", + "studio", + "afin", + "de", + "pouvoir", + "l", + "enlever", + "du", + "contrat", + "et", + "ainsi", + "pouvoir", + "assurer", + "nouveau", + "logement" + ], + "clean_header": "tr : assurance d'un nouveau logement", + "date": "2020-04-15 07:44:04", + "flagged_header": "tr : assurance d'un nouveau logement", + "flagged_text": "J'emmenage dans un nouveau studio le Vendredi 2 Juin 2019\nmais je n'arrive pas a souscrire d'assurance via la plateforme en ligne\nC'est pourquoi je vous envoie l'etat des lieux de sortie du deuxieme studio\nafin de pouvoir l'enlever du contrat et ainsi pouvoir assurer le nouveau\nlogement", + "from": "Conseiller ", + "header": "Tr : Assurance d'un nouveau logement", + "header_tokens": [ + "tr", + "assurance", + "d", + "un", + "nouveau", + "logement" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "J'emmenage dans un nouveau studio le Vendredi 2 Juin 2019" + ], + [ + "BODY", + "mais je n'arrive pas a souscrire d'assurance via la plateforme en ligne" + ], + [ + "BODY", + "C'est pourquoi je vous envoie l'etat des lieux de sortie du deuxieme studio" + ], + [ + "BODY", + "afin de pouvoir l'enlever du contrat et ainsi pouvoir assurer le nouveau" + ], + [ + "BODY", + "logement" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "BODY", + "Tel : 07" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "Mail : monsieurdupont@extensiona.com (See attached file: pj.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "J'emmenage dans un nouveau studio le Vendredi 2 Juin 2019\nmais je n'arrive pas a souscrire d'assurance via la plateforme en ligne\nC'est pourquoi je vous envoie l'etat des lieux de sortie du deuxieme studio\nafin de pouvoir l'enlever du contrat et ainsi pouvoir assurer le nouveau\nlogement", + "to": "conseiller1@societeimaginaire.fr", + "tokens": [ + "tr", + "assurance", + "d", + "un", + "nouveau", + "logement", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "j", + "emmenage", + "dans", + "un", + "nouveau", + "studio", + "vendredi", + "2", + "juin", + "2019", + "mais", + "je", + "n", + "arrive", + "pas", + "a", + "souscrire", + "d", + "assurance", + "via", + "la", + "plateforme", + "en", + "ligne", + "c", + "est", + "pourquoi", + "je", + "vous", + "envoie", + "l", + "etat", + "des", + "lieux", + "de", + "sortie", + "du", + "deuxieme", + "studio", + "afin", + "de", + "pouvoir", + "l", + "enlever", + "du", + "contrat", + "et", + "ainsi", + "pouvoir", + "assurer", + "nouveau", + "logement" + ] + }, + { + "age": 39, + "attachment": "[\"image001.png\"]", + "attachments": [ + "image001.png" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n\r\n\r\n Je me permets de venir vers vous car depuis le d\u00e9but de l\u2019ann\u00e9e, nous avons\r\n des difficult\u00e9s pour assurer nos v\u00e9hicules.\r\n\r\n Ces derniers jours, vous nous avez fait parvenir le d\u00e9tail des\r\n d\u00e9nominations et des niveau d\u2019assurance de chaque v\u00e9hicule.\r\n\r\n Merci d\u2019effectuer ces changements\r\n\r\n\r\n\r\n Dans l\u2019attente de votre retour\r\n\r\n\r\n\r\n Cordialement\r\n\r\n\r\n\r\n Monsieur Dupont\r\n\r\n \u00a0(See attached file: image001.png)(See attached file: Assurances v\u00e9hicules2018.pdf)", + "body_tokens": [ + "je", + "me", + "permets", + "de", + "venir", + "vers", + "vous", + "car", + "depuis", + "debut", + "de", + "l", + "annee", + "nous", + "avons", + "des", + "difficultes", + "pour", + "assurer", + "nos", + "vehicules", + "ces", + "derniers", + "jours", + "vous", + "nous", + "avez", + "fait", + "parvenir", + "detail", + "des", + "denominations", + "et", + "des", + "niveau", + "d", + "assurance", + "de", + "chaque", + "vehicule", + "merci", + "d", + "effectuer", + "ces", + "changements", + "dans", + "l", + "attente", + "de", + "votre", + "retour" + ], + "clean_header": "assurance vehicules", + "date": "2021-08-13 16:01:02", + "flagged_header": "assurance vehicules", + "flagged_text": "Je me permets de venir vers vous car depuis le debut de l'annee, nous avons\ndes difficultes pour assurer nos vehicules\nCes derniers jours, vous nous avez fait parvenir le detail des\ndenominations et des niveau d'assurance de chaque vehicule\nMerci d'effectuer ces changements\nDans l'attente de votre retour", + "from": "Monsieur Dupont ", + "header": "Assurance v\u00e9hicules", + "header_tokens": [ + "assurance", + "vehicules" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Je me permets de venir vers vous car depuis le debut de l'annee, nous avons" + ], + [ + "BODY", + "des difficultes pour assurer nos vehicules" + ], + [ + "BODY", + "Ces derniers jours, vous nous avez fait parvenir le detail des" + ], + [ + "BODY", + "denominations et des niveau d'assurance de chaque vehicule" + ], + [ + "BODY", + "Merci d'effectuer ces changements" + ], + [ + "BODY", + "Dans l'attente de votre retour" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "PJ", + "(See attached file: image001.png)(See attached file: Assurances vehicules2018.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Je me permets de venir vers vous car depuis le debut de l'annee, nous avons\ndes difficultes pour assurer nos vehicules\nCes derniers jours, vous nous avez fait parvenir le detail des\ndenominations et des niveau d'assurance de chaque vehicule\nMerci d'effectuer ces changements\nDans l'attente de votre retour", + "to": "demandes4@societeimaginaire.fr", + "tokens": [ + "assurance", + "vehicules", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "me", + "permets", + "de", + "venir", + "vers", + "vous", + "car", + "depuis", + "debut", + "de", + "l", + "annee", + "nous", + "avons", + "des", + "difficultes", + "pour", + "assurer", + "nos", + "vehicules", + "ces", + "derniers", + "jours", + "vous", + "nous", + "avez", + "fait", + "parvenir", + "detail", + "des", + "denominations", + "et", + "des", + "niveau", + "d", + "assurance", + "de", + "chaque", + "vehicule", + "merci", + "d", + "effectuer", + "ces", + "changements", + "dans", + "l", + "attente", + "de", + "votre", + "retour" + ] + }, + { + "age": 38, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Voici la copie du virement effectuer \u00e0 ce jour.\r\n Serait-il possible d\u2019obtenir une attestation d\u2019assurance?\r\n\r\n Cordialement,\r\n\r\n Monsieur Dupont\r\n 06 00 00 00 00\r\n (See attached file: pj.pdf)\r\n\r\n > Le 23 mai 2018 \u00e0 10:17, conseiller@Societeimaginaire.fr a \u00e9crit :\r\n >\r\n > Bonjour,\r\n >\r\n > Nous faisons suite \u00e0 votre dernier courriel.\r\n >\r\n > A ce jour, le montant \u00e0 devoir, permettant de solder votre compte\r\n cotisation, est de 000.00euros.\r\n >\r\n > Nous restons \u00e0 votre disposition pour tous renseignements\r\n compl\u00e9mentaires.\r\n >\r\n > Meilleures salutations,\r\n >\r\n > Conseiller.\r\n > ", + "body_tokens": [ + "voici", + "la", + "copie", + "du", + "virement", + "effectuer", + "a", + "ce", + "jour", + "serait-il", + "possible", + "d", + "obtenir", + "une", + "attestation", + "d", + "assurance" + ], + "clean_header": "re: virement", + "date": "2021-06-18 04:20:07", + "flagged_header": "re: virement", + "flagged_text": "Voici la copie du virement effectuer a ce jour\nSerait-il possible d'obtenir une attestation d'assurance?", + "from": "Monsieur Dupont ", + "header": "Re: Virement", + "header_tokens": [ + "re", + "virement" + ], + "label": "autres", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Voici la copie du virement effectuer a ce jour" + ], + [ + "BODY", + "Serait-il possible d'obtenir une attestation d'assurance?" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "SIGNATURE", + "06 00 00 00 00" + ], + [ + "PJ", + "(See attached file: pj.pdf)" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "> Nous faisons suite a votre dernier courriel" + ], + [ + "BODY", + "> A ce jour, le montant a devoir, permettant de solder votre compte" + ], + [ + "BODY", + "cotisation, est de 000" + ], + [ + "BODY", + "00euros" + ], + [ + "BODY", + "> Nous restons a votre disposition pour tous renseignements" + ], + [ + "BODY", + "complementaires" + ], + [ + "GREETINGS", + "> Meilleures salutations," + ], + [ + "SIGNATURE", + "> Conseiller" + ], + [ + "BODY", + "> Message du 31/05/18 08:51\r\n > De : monsieurdupont@extensionb.com\r\n > A : \"GARAGE\" \r\n > Copie \u00e0 :\r\n > Objet : Attached Image\r\n >\r\n >(See attached file: pj.pdf)", + "body_tokens": [], + "clean_header": "pret vehicule", + "date": "2022-08-30 03:14:42", + "flagged_header": "pret vehicule", + "flagged_text": "", + "from": "Monsieur Dupont ", + "header": "Pr\u00eat v\u00e9hicule", + "header_tokens": [ + "pret", + "vehicule" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "BONJOUR" + ], + [ + "SIGNATURE_NAME", + "CI-JOINT PRET VEHICULE" + ], + [ + "GREETINGS", + "CORDIALEMENT" + ], + [ + "BODY", + "GARAGE> Message du 31/05/18 08:51" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Copie" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "PJ", + "(See attached file: pj.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "pret", + "vehicule", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]" + ] + }, + { + "age": 63, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 31/05/2018 11:59 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t30/05/2018 10:12\r\n Objet :\tRe: Demande\r\n\r\n\r\n\r\n Bonjour Monsieur,\r\n Pouvez-vous m'appeler lundi prochain en fin d'apr\u00e8s-midi ?\r\n Cordialement,\r\n Monsieur Dupont\r\n\r\n Le mer. 31 mai 2018 \u00e0 09:58, a \u00e9crit\u00a0:\r\n Bonjour Mr Dupont,\r\n\r\n Je fais suite \u00e0 mon message t\u00e9l\u00e9phonique de ce jour.\r\n Comme pr\u00e9cis\u00e9, je vous adresse ce mail avec mes coordonn\u00e9es pour que\r\n vous puissiez en retour me faire part de vos disponibilit\u00e9s.\r\n Pour rappel, je souhaite vous rencontrer pour faire un bilan\r\n g\u00e9n\u00e9ral de vos contrats.\r\n\r\n Dans l'attente de vous lire, je vous souhaite une bonne journ\u00e9e.\r\n\r\n Bien cordialement.\r\n\r\n Conseiller.\r\n conseiller@Societeimaginaire.fr\r\n Conseiller Societe Imaginaire.\r\n ", + "body_tokens": [ + "pouvez-vous", + "m", + "appeler", + "lundi", + "prochain", + "en", + "fin", + "d", + "apres-midi" + ], + "clean_header": "bilan general contrats", + "date": "2019-06-12 03:40:19", + "flagged_header": "bilan general contrats", + "flagged_text": "Pouvez-vous m'appeler lundi prochain en fin d'apres-midi ?", + "from": "conseiller@societeimaginaire.fr", + "header": "Bilan g\u00e9n\u00e9ral contrats", + "header_tokens": [ + "bilan", + "general", + "contrats" + ], + "label": "bilan", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Monsieur," + ], + [ + "BODY", + "Pouvez-vous m'appeler lundi prochain en fin d'apres-midi ?" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Mr Dupont," + ], + [ + "BODY", + "Je fais suite a mon message telephonique de ce jour" + ], + [ + "BODY", + "Comme precise, je vous adresse ce mail avec mes coordonnees pour que" + ], + [ + "BODY", + "vous puissiez en retour me faire part de vos disponibilites" + ], + [ + "BODY", + "Pour rappel, je souhaite vous rencontrer pour faire un bilan" + ], + [ + "BODY", + "general de vos contrats" + ], + [ + "BODY", + "Dans l'attente de vous lire, je vous souhaite une bonne journee" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "SIGNATURE", + "Conseiller" + ], + [ + "SIGNATURE", + "conseiller@Societeimaginaire.fr" + ], + [ + "SIGNATURE", + "Conseiller Societe Imaginaire" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Pouvez-vous m'appeler lundi prochain en fin d'apres-midi ?", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "bilan", + "general", + "contrats", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "pouvez-vous", + "m", + "appeler", + "lundi", + "prochain", + "en", + "fin", + "d", + "apres-midi" + ] + }, + { + "age": 23, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique de ce jour,\r\n je joins le num\u00e9ro de t\u00e9l\u00e9phone de mon fils\r\n\r\n Monsieur Dupont\r\n \u00a0tel : 06 00 00 00 00.\r\n\r\n Monsieur Dupont\r\n\r\n Cordialement\r\n ", + "body_tokens": [ + "suite", + "a", + "notre", + "entretien", + "telephonique", + "de", + "ce", + "jour", + "je", + "joins", + "numero", + "de", + "telephone", + "de", + "mon", + "fils" + ], + "clean_header": "numero de telephone", + "date": "2021-10-04 18:17:05", + "flagged_header": "numero de telephone", + "flagged_text": "Suite a notre entretien telephonique de ce jour,\nje joins le numero de telephone de mon fils", + "from": "monsieurdupont@extensionf.net", + "header": "Num\u00e9ro de t\u00e9l\u00e9phone", + "header_tokens": [ + "numero", + "de", + "telephone" + ], + "label": "modification", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Suite a notre entretien telephonique de ce jour," + ], + [ + "BODY", + "je joins le numero de telephone de mon fils" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "SIGNATURE", + "tel : 06 00 00 00 00" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "GREETINGS", + "Cordialement" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Suite a notre entretien telephonique de ce jour,\nje joins le numero de telephone de mon fils", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "numero", + "de", + "telephone", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "suite", + "a", + "notre", + "entretien", + "telephonique", + "de", + "ce", + "jour", + "je", + "joins", + "numero", + "de", + "telephone", + "de", + "mon", + "fils" + ] + }, + { + "age": 28, + "attachment": "[\"image1.jpeg\",\"image2.jpeg\",\"image3.jpeg\"]", + "attachments": [ + "image1.jpeg", + "image2.jpeg", + "image3.jpeg" + ], + "body": "\r\n\r\n\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 16:41\r\n Objet :\tRe: Vos documents demand\u00e9s\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Serait-il possible de rebasculer mon v\u00e9hicule d\u00e8s\r\n aujourd'hui en assurance parfaite?\r\n\r\n Voici des photos du jour qui attestent de son parfait \u00e9tat.\r\n\r\n D'avance merci de votre confirmation\r\n\r\n Monsieur Dupont\r\n Envoy\u00e9 de mon iPhone\r\n (See attached file: image1.jpeg)\r\n\r\n\r\n (See attached file: image2.jpeg)\r\n\r\n\r\n (See attached file: image3.jpeg)\r\n\r\n\r\n > Le 21 janv. 2018 \u00e0 20:38, Monsieur Dupont a \u00e9crit :\r\n >\r\n > Bonjour Madame,\r\n >\r\n > Je vous confirme que je souhaite basculer mon contrat actuel sous la\r\n nouvelle forme d\u00e8s \u00e0 pr\u00e9sent.\r\n >\r\n > D'avance merci de votre confirmation.\r\n >\r\n > Bien cordialement,\r\n > Monsieur Dupont\r\n >\r\n > Envoy\u00e9 de mon iPad\r\n >\r\n >> Le 30 nov. 2017 \u00e0 10:06, conseiller@Societeimaginaire.fr a \u00e9crit :\r\n >>\r\n >> Bonjour,\r\n >>\r\n >> Veuillez trouver ci-joint les documents que vous nous avez demand\u00e9s :\r\n >> - Devis1\r\n >> - Devis2\r\n >>\r\n >> La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n >>\r\n >> Bien \u00e0 vous.\r\n >>\r\n >> La SocSociete Imaginaire.\r\n >> \r\n >> ", + "body_tokens": [ + "serait-il", + "possible", + "de", + "rebasculer", + "mon", + "vehicule", + "des", + "aujourd", + "hui", + "en", + "assurance", + "parfaite", + "voici", + "des", + "photos", + "du", + "jour", + "qui", + "attestent", + "de", + "son", + "parfait", + "etat", + "d", + "avance", + "merci", + "de", + "votre", + "confirmation" + ], + "clean_header": "tr : re: vos documents demandes", + "date": "2022-06-14 08:35:02", + "flagged_header": "tr : re: vos documents demandes", + "flagged_text": "Serait-il possible de rebasculer mon vehicule des\naujourd'hui en assurance parfaite?\nVoici des photos du jour qui attestent de son parfait etat\nD'avance merci de votre confirmation", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Re: Vos documents demand\u00e9s", + "header_tokens": [ + "tr", + "re", + "vos", + "documents", + "demandes" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Serait-il possible de rebasculer mon vehicule des" + ], + [ + "BODY", + "aujourd'hui en assurance parfaite?" + ], + [ + "BODY", + "Voici des photos du jour qui attestent de son parfait etat" + ], + [ + "BODY", + "D'avance merci de votre confirmation" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "FOOTER", + "Envoye de mon iPhone" + ], + [ + "PJ", + "(See attached file: image1.jpeg)" + ], + [ + "PJ", + "(See attached file: image2.jpeg)" + ], + [ + "PJ", + "(See attached file: image3.jpeg)" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Madame," + ], + [ + "BODY", + "> Je vous confirme que je souhaite basculer mon contrat actuel sous la" + ], + [ + "BODY", + "nouvelle forme des a present" + ], + [ + "BODY", + "> D'avance merci de votre confirmation" + ], + [ + "GREETINGS", + "> Bien cordialement," + ], + [ + "HELLO", + "> Monsieur Dupont>" + ], + [ + "FOOTER", + "> Envoye de mon iPad" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + ">> Veuillez trouver ci-joint les documents que vous nous avez demandes :" + ], + [ + "BODY", + ">> - Devis1>> - Devis2>>" + ], + [ + "FOOTER", + ">> La visualisation des fichiers PDF necessite Adobe Reader" + ], + [ + "GREETINGS", + ">> Bien a vous" + ], + [ + "SIGNATURE_NAME", + ">> La SocSociete Imaginaire" + ], + [ + "BODY", + ">> " + ], + [ + "BODY", + ">> ", + "header": "Demande", + "header_tokens": [ + "demande" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Monsieur Dupont (See attached file: Releve d'informations.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Monsieur Dupont (See attached file: Releve d'informations.pdf)", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "monsieur", + "dupont", + "see", + "attached", + "file", + "releve", + "d", + "informations", + "pdf" + ] + }, + { + "age": 36, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 09:20 -----\r\n\r\n De : \u00a0 \u00a0 \u00a0 \u00a0Association Dupont \r\n A : \u00a0 \u00a0 \u00a0 \u00a0representant@Societeimaginaire.fr\r\n Date : \u00a0 \u00a0 \u00a0 \u00a001/06/2018 16:03\r\n Objet : \u00a0 \u00a0 \u00a0 \u00a0Re: Correspondance de La Societe Imaginaire\r\n\r\n\r\n\r\n\r\n Bonjour, merci de votre retour rapide.\r\n\r\n Concernent l'attestation du 22 septembre, serait-il possible de faire\r\n appara\u00eetre l'adresse compl\u00e8te de l'\u00e9v\u00e9nement.\r\n\r\n En effet, s'agissant d'un domaine priv\u00e9, les propri\u00e9taires nous ont fait\r\n cette demande.\r\n\r\n Vous remerciant par avance.\r\n\r\n Cordialement.\r\n\r\n Monsieur Dupont,\r\n Association LOI 1901.\r\n T\u00e9l. perso : 06.00.00.00.00\r\n\r\n ", + "body_tokens": [ + "bonjour", + "merci", + "de", + "votre", + "retour", + "rapide", + "concernent", + "l", + "attestation", + "du", + "22", + "septembre", + "serait-il", + "possible", + "de", + "faire", + "apparaitre", + "l", + "adresse", + "complete", + "de", + "l", + "evenement", + "en", + "effet", + "s", + "agissant", + "d", + "un", + "domaine", + "prive", + "proprietaires", + "nous", + "ont", + "fait", + "cette", + "demande" + ], + "clean_header": "tr : attestation", + "date": "2020-06-04 11:29:57", + "flagged_header": "tr : attestation", + "flagged_text": "Bonjour, merci de votre retour rapide\nConcernent l'attestation du 22 septembre, serait-il possible de faire\napparaitre l'adresse complete de l'evenement\nEn effet, s'agissant d'un domaine prive, les proprietaires nous ont fait\ncette demande", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : attestation", + "header_tokens": [ + "tr", + "attestation" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Bonjour, merci de votre retour rapide" + ], + [ + "BODY", + "Concernent l'attestation du 22 septembre, serait-il possible de faire" + ], + [ + "BODY", + "apparaitre l'adresse complete de l'evenement" + ], + [ + "BODY", + "En effet, s'agissant d'un domaine prive, les proprietaires nous ont fait" + ], + [ + "BODY", + "cette demande" + ], + [ + "THANKS", + "Vous remerciant par avance" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Monsieur Dupont," + ], + [ + "SIGNATURE", + "Association LOI 1901" + ], + [ + "SIGNATURE", + "Tel" + ], + [ + "BODY", + "perso : 06" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Bonjour, merci de votre retour rapide\nConcernent l'attestation du 22 septembre, serait-il possible de faire\napparaitre l'adresse complete de l'evenement\nEn effet, s'agissant d'un domaine prive, les proprietaires nous ont fait\ncette demande", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "attestation", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "bonjour", + "merci", + "de", + "votre", + "retour", + "rapide", + "concernent", + "l", + "attestation", + "du", + "22", + "septembre", + "serait-il", + "possible", + "de", + "faire", + "apparaitre", + "l", + "adresse", + "complete", + "de", + "l", + "evenement", + "en", + "effet", + "s", + "agissant", + "d", + "un", + "domaine", + "prive", + "proprietaires", + "nous", + "ont", + "fait", + "cette", + "demande" + ] + }, + { + "age": 88, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n Bonjour Madame,\r\n\r\n Je vous remercie pour l'attestation demand\u00e9e par t\u00e9l\u00e9phone.\r\n je vous rappelle que je suis la propri\u00e9taire de ce v\u00e9hicule et que cette\r\n attestation est \u00e0 destination de mon travail.\r\n\r\n Cordialement\r\n\r\n Madame Dupont\r\n\r\n\r\n\r\n PS: il y a quelques temps j'ai sollicit\u00e9 vos services pour une attestation\r\n et je n'ai eu aucune difficult\u00e9 pour l'obtenir\r\n\r\n\r\n\r\n > Message du 28/05/18 15:22\r\n > De : conseiller@Societeimaginaire.fr\r\n > A : madamedupont@extensionb.com\r\n > Copie \u00e0 :\r\n > Objet : Envoi d'un document de la Societe Imaginaire\r\n >\r\n > Bonjour.\r\n\r\n Merci de bien vouloir prendre connaissance du document ci-joint :\r\n 1 - Attestation d'assurance\r\n\r\n Cordialement.\r\n\r\n La Societe Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.(See attached\r\n file: pj.pdf)", + "body_tokens": [ + "je", + "vous", + "remercie", + "pour", + "l", + "attestation", + "demandee", + "par", + "telephone", + "je", + "vous", + "rappelle", + "que", + "je", + "suis", + "la", + "proprietaire", + "de", + "ce", + "vehicule", + "et", + "que", + "cette", + "attestation", + "est", + "a", + "destination", + "de", + "mon", + "travail" + ], + "clean_header": "demande attestation - envoi d'un document de la mutuelle imaginaire", + "date": "2020-01-10 19:37:35", + "flagged_header": "demande attestation - envoi d'un document de la mutuelle imaginaire", + "flagged_text": "Je vous remercie pour l'attestation demandee par telephone\nje vous rappelle que je suis la proprietaire de ce vehicule et que cette\nattestation est a destination de mon travail", + "from": "Monsieur Dupont ", + "header": "demande attestation - Envoi d'un document de la Mutuelle Imaginaire", + "header_tokens": [ + "demande", + "attestation", + "envoi", + "d", + "un", + "document", + "de", + "la", + "mutuelle", + "imaginaire" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Madame," + ], + [ + "BODY", + "Je vous remercie pour l'attestation demandee par telephone" + ], + [ + "BODY", + "je vous rappelle que je suis la proprietaire de ce vehicule et que cette" + ], + [ + "BODY", + "attestation est a destination de mon travail" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Madame Dupont" + ], + [ + "BODY", + "PS: il y a quelques temps j'ai sollicite vos services pour une attestation" + ], + [ + "BODY", + "et je n'ai eu aucune difficulte pour l'obtenir> Message du 28/05/18 15:22" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Merci de bien vouloir prendre connaissance du document ci-joint :" + ], + [ + "SIGNATURE", + "1 - Attestation d'assurance" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "SIGNATURE", + "La Societe Imaginaire" + ], + [ + "FOOTER", + "La visualisation des fichiers PDF necessite Adobe Reader" + ], + [ + "BODY", + "(See attached" + ], + [ + "BODY", + "file: pj.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je vous remercie pour l'attestation demandee par telephone\nje vous rappelle que je suis la proprietaire de ce vehicule et que cette\nattestation est a destination de mon travail", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "attestation", + "envoi", + "d", + "un", + "document", + "de", + "la", + "mutuelle", + "imaginaire", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "remercie", + "pour", + "l", + "attestation", + "demandee", + "par", + "telephone", + "je", + "vous", + "rappelle", + "que", + "je", + "suis", + "la", + "proprietaire", + "de", + "ce", + "vehicule", + "et", + "que", + "cette", + "attestation", + "est", + "a", + "destination", + "de", + "mon", + "travail" + ] + }, + { + "age": 57, + "attachment": "[\"Nume\u0301riser.pdf\"]", + "attachments": [ + "Nume\u0301riser.pdf" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour madame,\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique de ce jour, je vous joins un Scan de\r\n mon certificat de cession de mon v\u00e9hicule nautique assur\u00e9 chez vous.\r\n Je vous remercie de la clart\u00e9 de vos explications et reste \u00e0 votre\r\n disposition.\r\n Bien \u00e0 vous\r\n Monsieur Dupont(See attached file: Nume\u0301riser.pdf)\r\n ", + "body_tokens": [ + "suite", + "a", + "notre", + "entretien", + "telephonique", + "de", + "ce", + "jour", + "je", + "vous", + "joins", + "un", + "scan", + "de", + "mon", + "certificat", + "de", + "cession", + "de", + "mon", + "vehicule", + "nautique", + "assure", + "chez", + "vous", + "je", + "vous", + "remercie", + "de", + "la", + "clarte", + "de", + "vos", + "explications", + "et", + "reste", + "a", + "votre", + "disposition" + ], + "clean_header": "certificat de cession de vehicule", + "date": "2020-01-19 11:24:23", + "flagged_header": "certificat de cession de vehicule", + "flagged_text": "Suite a notre entretien telephonique de ce jour, je vous joins un Scan de\nmon certificat de cession de mon vehicule nautique assure chez vous\nJe vous remercie de la clarte de vos explications et reste a votre\ndisposition", + "from": "Monsieur Dupont ", + "header": "certificat de cession de v\u00e9hicule", + "header_tokens": [ + "certificat", + "de", + "cession", + "de", + "vehicule" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour madame," + ], + [ + "BODY", + "Suite a notre entretien telephonique de ce jour, je vous joins un Scan de" + ], + [ + "BODY", + "mon certificat de cession de mon vehicule nautique assure chez vous" + ], + [ + "BODY", + "Je vous remercie de la clarte de vos explications et reste a votre" + ], + [ + "BODY", + "disposition" + ], + [ + "GREETINGS", + "Bien a vous" + ], + [ + "BODY", + "Monsieur Dupont(See attached file: Numeriser.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Suite a notre entretien telephonique de ce jour, je vous joins un Scan de\nmon certificat de cession de mon vehicule nautique assure chez vous\nJe vous remercie de la clarte de vos explications et reste a votre\ndisposition", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "certificat", + "de", + "cession", + "de", + "vehicule", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "suite", + "a", + "notre", + "entretien", + "telephonique", + "de", + "ce", + "jour", + "je", + "vous", + "joins", + "un", + "scan", + "de", + "mon", + "certificat", + "de", + "cession", + "de", + "mon", + "vehicule", + "nautique", + "assure", + "chez", + "vous", + "je", + "vous", + "remercie", + "de", + "la", + "clarte", + "de", + "vos", + "explications", + "et", + "reste", + "a", + "votre", + "disposition" + ] + }, + { + "age": 82, + "attachment": "[\"pj.jpg\"]", + "attachments": [ + "pj.jpg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour,\r\n Vous trouverez ci-joint le certificat de cession attestant de la vente du\r\n v\u00e9hicule pour r\u00e9silier l'assurance.\r\n\r\n Cordialement\r\n Monsieur Dupont(See attached file: pj.jpg)", + "body_tokens": [ + "vous", + "trouverez", + "ci_joint", + "certificat", + "de", + "cession", + "attestant", + "de", + "la", + "vente", + "du", + "vehicule", + "pour", + "resilier", + "l", + "assurance" + ], + "clean_header": "certificat de cession ", + "date": "2022-04-15 12:56:46", + "flagged_header": "certificat de cession", + "flagged_text": "Vous trouverez ci_joint le certificat de cession attestant de la vente du\nvehicule pour resilier l'assurance", + "from": "Monsieur Dupont ", + "header": "certificat de cession ", + "header_tokens": [ + "certificat", + "de", + "cession" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Vous trouverez ci-joint le certificat de cession attestant de la vente du" + ], + [ + "BODY", + "vehicule pour resilier l'assurance" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "BODY", + "Monsieur Dupont(See attached file: pj.jpg)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Vous trouverez ci-joint le certificat de cession attestant de la vente du\nvehicule pour resilier l'assurance", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "certificat", + "de", + "cession", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "vous", + "trouverez", + "ci_joint", + "certificat", + "de", + "cession", + "attestant", + "de", + "la", + "vente", + "du", + "vehicule", + "pour", + "resilier", + "l", + "assurance" + ] + }, + { + "age": 60, + "attachment": "[\"Screenshot.png\",\"Screenshot2.png\",\"Screenshot3.png\"]", + "attachments": [ + "Screenshot.png", + "Screenshot2.png", + "Screenshot3.png" + ], + "body": "\r\n\r\n\r\n\r\n Vous trouverez en pi\u00e8ce jointe le relev\u00e9 de situation ainsi que le bulletin\r\n de paie demand\u00e9 et un rib.\r\n Mon Adresse compl\u00e8te est l\u00e0 suivante : 00 rue imaginaire 33000 Villeimaginaire\r\n \u00a0(See attached file: Screenshot.png)(See attached\r\n file: Screenshot2.png)(See attached file:\r\n Screenshot3.png)", + "body_tokens": [ + "vous", + "trouverez", + "en", + "piece", + "jointe", + "releve", + "de", + "situation", + "ainsi", + "que", + "bulletin", + "de", + "paie", + "demande", + "et", + "un", + "rib", + "mon", + "adresse", + "complete", + "est", + "la", + "suivante", + "00", + "rue", + "imaginaire", + "33000", + "villeimaginaire", + "file", + "screenshot2", + "png", + "see", + "attached", + "file", + "screenshot3", + "png" + ], + "clean_header": "assurance auto", + "date": "2019-02-08 13:05:09", + "flagged_header": "assurance auto", + "flagged_text": "Vous trouverez en piece jointe le releve de situation ainsi que le bulletin\nde paie demande et un rib\nMon Adresse complete est la suivante : 00 rue imaginaire 33000 Villeimaginaire\nfile: Screenshot2.png)(See attached file:\nScreenshot3.png)", + "from": "Monsieur Dupont ", + "header": "Assurance auto", + "header_tokens": [ + "assurance", + "auto" + ], + "label": "adhesion", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Vous trouverez en piece jointe le releve de situation ainsi que le bulletin" + ], + [ + "BODY", + "de paie demande et un rib" + ], + [ + "BODY", + "Mon Adresse complete est la suivante : 00 rue imaginaire 33000 Villeimaginaire" + ], + [ + "PJ", + "(See attached file: Screenshot.png)(See attached" + ], + [ + "BODY", + "file: Screenshot2.png)(See attached file:" + ], + [ + "BODY", + "Screenshot3.png)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Vous trouverez en piece jointe le releve de situation ainsi que le bulletin\nde paie demande et un rib\nMon Adresse complete est la suivante : 00 rue imaginaire 33000 Villeimaginaire\nfile: Screenshot2.png)(See attached file:\nScreenshot3.png)", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "assurance", + "auto", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "vous", + "trouverez", + "en", + "piece", + "jointe", + "releve", + "de", + "situation", + "ainsi", + "que", + "bulletin", + "de", + "paie", + "demande", + "et", + "un", + "rib", + "mon", + "adresse", + "complete", + "est", + "la", + "suivante", + "00", + "rue", + "imaginaire", + "33000", + "villeimaginaire", + "file", + "screenshot2", + "png", + "see", + "attached", + "file", + "screenshot3", + "png" + ] + }, + { + "age": 21, + "attachment": "[\"ACTE VENTE 1.pdf\",\"ACTE VENTE 2.pdf\"]", + "attachments": [ + "ACTE VENTE 1.pdf", + "ACTE VENTE 2.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n Le lundi 4 juin 2018 \u00e0 13:59:14 UTC+2, conseiller@Societeimaginaire.fr\r\n a \u00e9crit :\r\n\r\n\r\n Ch\u00e8re client,\r\n\r\n Nous avons bien re\u00e7u votre email qui a retenu toute notre attention.\r\n\r\n Le document que vous nous avez envoy\u00e9 n'est pas exploitable. Pourriez-vous\r\n nous le renvoyer dans une meilleure r\u00e9solution ?\r\n\r\n Bien Cordialement.\r\n\r\n Au nom de l'\u00e9quipe Societe Imaginaire\r\n\r\n logo Societe Imaginaire\r\n |------------------------+---------------------------------------|\r\n | |Garanti sans virus. www.avast.com |\r\n |------------------------+---------------------------------------|\r\n\r\n\r\n\r\n (See attached file: ACTE VENTE 1.pdf)(See attached file: ACTE VENTE 2.pdf)", + "body_tokens": [ + "lundi", + "4", + "juin", + "2018", + "a", + "13", + "59", + "14", + "utc", + "2", + "conseiller", + "societeimaginaire", + "fr", + "conseiller", + "societeimaginaire", + "fr", + "a", + "ecrit", + "nous", + "avons", + "bien", + "recu", + "votre", + "email", + "qui", + "a", + "retenu", + "toute", + "notre", + "attention", + "document", + "que", + "vous", + "nous", + "avez", + "envoye", + "n", + "est", + "pas", + "exploitable", + "pourriez-vous", + "nous", + "renvoyer", + "dans", + "une", + "meilleure", + "resolution" + ], + "clean_header": "re: suppression assurance logement", + "date": "2020-12-04 05:01:34", + "flagged_header": "re: suppression assurance logement", + "flagged_text": "Le lundi 4 juin 2018 a 13:59:14 UTC+2, conseiller@Societeimaginaire.fr\n a ecrit :\nNous avons bien recu votre email qui a retenu toute notre attention\nLe document que vous nous avez envoye n'est pas exploitable\nPourriez-vous\nnous le renvoyer dans une meilleure resolution ?", + "from": "Monsieur Dupont ", + "header": "Re: Suppression assurance logement", + "header_tokens": [ + "re", + "suppression", + "assurance", + "logement" + ], + "label": "succession", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Le lundi 4 juin 2018 a 13:59:14 UTC+2, conseiller@Societeimaginaire.fr" + ], + [ + "BODY", + " a ecrit :" + ], + [ + "HELLO", + "Chere client," + ], + [ + "BODY", + "Nous avons bien recu votre email qui a retenu toute notre attention" + ], + [ + "BODY", + "Le document que vous nous avez envoye n'est pas exploitable" + ], + [ + "BODY", + "Pourriez-vous" + ], + [ + "BODY", + "nous le renvoyer dans une meilleure resolution ?" + ], + [ + "GREETINGS", + "Bien Cordialement" + ], + [ + "BODY", + "Au nom de l'equipe Societe Imaginaire" + ], + [ + "SIGNATURE", + "logo Societe Imaginaire" + ], + [ + "FOOTER", + "| |Garanti sans virus" + ], + [ + "FOOTER", + "www.avast.com |" + ], + [ + "PJ", + "(See attached file: ACTE VENTE 1.pdf)(See attached file: ACTE VENTE 2.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Le lundi 4 juin 2018 a 13:59:14 UTC+2, conseiller@Societeimaginaire.fr\n a ecrit :\nNous avons bien recu votre email qui a retenu toute notre attention\nLe document que vous nous avez envoye n'est pas exploitable\nPourriez-vous\nnous le renvoyer dans une meilleure resolution ?", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "re", + "suppression", + "assurance", + "logement", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "lundi", + "4", + "juin", + "2018", + "a", + "13", + "59", + "14", + "utc", + "2", + "conseiller", + "societeimaginaire", + "fr", + "conseiller", + "societeimaginaire", + "fr", + "a", + "ecrit", + "nous", + "avons", + "bien", + "recu", + "votre", + "email", + "qui", + "a", + "retenu", + "toute", + "notre", + "attention", + "document", + "que", + "vous", + "nous", + "avez", + "envoye", + "n", + "est", + "pas", + "exploitable", + "pourriez-vous", + "nous", + "renvoyer", + "dans", + "une", + "meilleure", + "resolution" + ] + }, + { + "age": 58, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Permettez-moi de vous signaler un changement d'adresse me concernant.\r\n\r\n Voici ma nouvelle adresse :\r\n\r\n 00 rue du Nomderue\r\n 75000 Paris\r\n\r\n Merci.\r\n\r\n Bien \u00e0 vous,\r\n\r\n Monsieur Dupont\r\n ", + "body_tokens": [ + "permettez-moi", + "de", + "vous", + "signaler", + "un", + "changement", + "d", + "adresse", + "me", + "concernant", + "voici", + "ma", + "nouvelle", + "adresse" + ], + "clean_header": "changement d'adresse", + "date": "2022-06-01 14:14:07", + "flagged_header": "changement d'adresse", + "flagged_text": "Permettez-moi de vous signaler un changement d'adresse me concernant\nVoici ma nouvelle adresse :", + "from": "Monsieur Dupont ", + "header": "changement d'adresse", + "header_tokens": [ + "changement", + "d", + "adresse" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Permettez-moi de vous signaler un changement d'adresse me concernant" + ], + [ + "BODY", + "Voici ma nouvelle adresse :" + ], + [ + "SIGNATURE", + "00 rue du Nomderue" + ], + [ + "SIGNATURE", + "75000 Paris" + ], + [ + "THANKS", + "Merci" + ], + [ + "GREETINGS", + "Bien a vous," + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Permettez-moi de vous signaler un changement d'adresse me concernant\nVoici ma nouvelle adresse :", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "changement", + "d", + "adresse", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "permettez-moi", + "de", + "vous", + "signaler", + "un", + "changement", + "d", + "adresse", + "me", + "concernant", + "voici", + "ma", + "nouvelle", + "adresse" + ] + }, + { + "age": 64, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 10:29 -----\r\n\r\n De :\tmonsieurdupont@extensionf.net\r\n A :\tSociete Imaginaire \r\n Date :\t02/06/2018 11:07\r\n Objet :\tFwd: Changement de vehicule\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Pourriez vous faire suite au mail suivant en date du 22 Mai\r\n 2017.\r\n\r\n\r\n De: monsieurdupont@extensionf.net\r\n \u00c0: \"Societe Imaginaire\" \r\n Envoy\u00e9: Mardi 22 Mai 2017 10:15:25\r\n Objet: Changement de vehicule\r\n\r\n Bonjour,\r\n\r\n Merci de bien vouloir transf\u00e9rer l'assurance du vehicuel sur le\r\n Scooter,\r\n dont les r\u00e9f\u00e9rences sont sur la facture fourni en pi\u00e8ce-jointe.\r\n\r\n Me faire parvenir l'attestation.\r\n\r\n Merci.\r\n\r\n MR Dupont,\r\n le 22 mai 2017\r\n\r\n Cordialement\r\n (See attached file: Facture.jpg)", + "body_tokens": [ + "pourriez", + "vous", + "faire", + "suite", + "au", + "mail", + "suivant", + "en", + "date", + "du", + "22", + "mai", + "2017" + ], + "clean_header": "tr : fwd: changement de scooter !", + "date": "2019-04-21 20:43:58", + "flagged_header": "tr : fwd: changement de scooter !", + "flagged_text": "Pourriez vous faire suite au mail suivant en date du 22 Mai\n2017", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Fwd: Changement de Scooter !", + "header_tokens": [ + "tr", + "fwd", + "changement", + "de", + "scooter" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Pourriez vous faire suite au mail suivant en date du 22 Mai" + ], + [ + "BODY", + "2017" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Merci de bien vouloir transferer l'assurance du vehicuel sur le" + ], + [ + "BODY", + "Scooter," + ], + [ + "BODY", + "dont les references sont sur la facture fourni en piece-jointe" + ], + [ + "BODY", + "Me faire parvenir l'attestation" + ], + [ + "THANKS", + "Merci" + ], + [ + "BODY", + "MR Dupont," + ], + [ + "BODY", + "le 22 mai 2017" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "PJ", + "(See attached file: Facture.jpg)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Pourriez vous faire suite au mail suivant en date du 22 Mai\n2017", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "fwd", + "changement", + "de", + "scooter", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "pourriez", + "vous", + "faire", + "suite", + "au", + "mail", + "suivant", + "en", + "date", + "du", + "22", + "mai", + "2017" + ] + }, + { + "age": 20, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 10:38 -----\r\n\r\n De :\tmonsieurdupont@extensiona.com\r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 16:52\r\n Objet :\tR\u00e9clamations\r\n\r\n\r\n\r\n Num\u00e9ro T\u00e9l. : 0600000000\r\n E-mail : monsieurdupont@extensiona.com\r\n M. DUPONT\r\n Adresse : 94000\r\n Objet de la demande : R\u00e9clamations\r\n\r\n Bonjour, j'aurais besoin de l'attestation\r\n pour ma declaration de revenus 2018.\r\n Merci d'avance\r\n ", + "body_tokens": [ + "m", + "dupont", + "adresse", + "94000", + "objet", + "de", + "la", + "deman" + ], + "clean_header": "tr : reclamations ", + "date": "2022-02-26 14:07:52", + "flagged_header": "tr : reclamations", + "flagged_text": "M. DUPONT\nAdresse : 94000\nObjet de la deman", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : R\u00e9clamations ", + "header_tokens": [ + "tr", + "reclamations" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "SIGNATURE", + "Numero Tel" + ], + [ + "SIGNATURE", + ": 0600000000" + ], + [ + "SIGNATURE", + "E-mail : monsieurdupont@extensiona.com" + ], + [ + "BODY", + "M. DUPONT" + ], + [ + "BODY", + "Adresse : 94000" + ], + [ + "BODY", + "Objet de la deman" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Bonjour, j'aurais besoin de l'attestation" + ], + [ + "BODY", + "pour ma declaration de revenus 2018" + ], + [ + "THANKS", + "Merci d'avance" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "M. DUPONT\nAdresse : 94000\nObjet de la deman", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "reclamations", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "m", + "dupont", + "adresse", + "94000", + "objet", + "de", + "la", + "deman" + ] + }, + { + "age": 51, + "attachment": "[\"pic.jpg\"]", + "attachments": [ + "pic.jpg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour ,\r\n\r\n Veuillez recevoir le certificat de cession de mon v\u00e9hicule afin que vous\r\n puissiez effectuer la r\u00e9siliation de mon contrat.\r\n Je reviendrai vers vous afin d\u2019assurer mon nouveau v\u00e9hicule bient\u00f4t.\r\n\r\n Bien \u00e0 vous ,\r\n\r\n Mr DUPONT\r\n\r\n\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n\r\n Envoy\u00e9 de mon iPad", + "body_tokens": [ + "veuillez", + "recevoir", + "certificat", + "de", + "cession", + "de", + "mon", + "vehicule", + "afin", + "que", + "vous", + "puissiez", + "effectuer", + "la", + "resiliation", + "de", + "mon", + "contrat", + "je", + "reviendrai", + "vers", + "vous", + "afin", + "d", + "assurer", + "mon", + "nouveau", + "vehicule", + "bientot" + ], + "clean_header": "resiliation contrat voiture ", + "date": "2021-09-06 07:28:42", + "flagged_header": "resiliation contrat voiture", + "flagged_text": "Veuillez recevoir le certificat de cession de mon vehicule afin que vous\npuissiez effectuer la resiliation de mon contrat\nJe reviendrai vers vous afin d'assurer mon nouveau vehicule bientot", + "from": "Monsieur Dupont ", + "header": "R\u00e9siliation contrat voiture ", + "header_tokens": [ + "resiliation", + "contrat", + "voiture" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour ," + ], + [ + "BODY", + "Veuillez recevoir le certificat de cession de mon vehicule afin que vous" + ], + [ + "BODY", + "puissiez effectuer la resiliation de mon contrat" + ], + [ + "BODY", + "Je reviendrai vers vous afin d'assurer mon nouveau vehicule bientot" + ], + [ + "GREETINGS", + "Bien a vous ," + ], + [ + "SIGNATURE_NAME", + "Mr DUPONT" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ], + [ + "FOOTER", + "Envoye de mon iPad" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Veuillez recevoir le certificat de cession de mon vehicule afin que vous\npuissiez effectuer la resiliation de mon contrat\nJe reviendrai vers vous afin d'assurer mon nouveau vehicule bientot", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "resiliation", + "contrat", + "voiture", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "veuillez", + "recevoir", + "certificat", + "de", + "cession", + "de", + "mon", + "vehicule", + "afin", + "que", + "vous", + "puissiez", + "effectuer", + "la", + "resiliation", + "de", + "mon", + "contrat", + "je", + "reviendrai", + "vers", + "vous", + "afin", + "d", + "assurer", + "mon", + "nouveau", + "vehicule", + "bientot" + ] + }, + { + "age": 77, + "attachment": "[\"Relev\u00e9 d'informations.pdf\"]", + "attachments": [ + "Relev\u00e9 d'informations.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n --\r\n Monsieur Dupont\r\n 02 chemin imaginaire\r\n 84000\r\n monsieurdupont@extensiona.com\r\n 06.00.00.00.00(See attached file: Relev\u00e9 d'informations.pdf)", + "body_tokens": [ + "06", + "00", + "00", + "00", + "00", + "see", + "attached", + "file", + "releve", + "d", + "informations", + "pdf" + ], + "clean_header": "demande", + "date": "2020-06-25 15:26:02", + "flagged_header": "demande", + "flagged_text": "06\n00\n00\n00\n00(See attached file: Releve d'informations.pdf)", + "from": "Monsieur Dupont ", + "header": "Demande", + "header_tokens": [ + "demande" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "SIGNATURE", + "02 chemin imaginaire" + ], + [ + "SIGNATURE", + "84000" + ], + [ + "SIGNATURE", + "monsieurdupont@extensiona.com" + ], + [ + "BODY", + "06" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00(See attached file: Releve d'informations.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "06\n00\n00\n00\n00(See attached file: Releve d'informations.pdf)", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "06", + "00", + "00", + "00", + "00", + "see", + "attached", + "file", + "releve", + "d", + "informations", + "pdf" + ] + }, + { + "age": 40, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour, je vous remercie de ce retour. Pouvez vous r\u00e9pondre aux\r\n diff\u00e9rentes questions soulev\u00e9es dans mon mail afin que je prenne ma\r\n d\u00e9cision. Bien cordialement. Mr Dupont.\r\n\r\n Obtenez Outlook pour iOS\r\n\r\n From: conseiller@Societeimaginaire.fr \r\n Sent: Monday, June 4, 2018 1:56:43 PM\r\n To: monsieurdupont@hotmail.com\r\n Subject: : Votre devis v\u00e9hicule\r\n\r\n Bonjour,\r\n Veuillez trouver ci-joint le devis que vous nous avez demand\u00e9.\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n Cordialement.\r\n La Societe Imaginaire\r\n ", + "body_tokens": [ + "bonjour", + "je", + "vous", + "remercie", + "de", + "ce", + "retour", + "pouvez", + "vous", + "repondre", + "aux", + "differentes", + "questions", + "soulevees", + "dans", + "mon", + "mail", + "afin", + "que", + "je", + "prenne", + "ma", + "decision" + ], + "clean_header": "tr : re: interrogations", + "date": "2022-10-02 04:04:31", + "flagged_header": "tr : re: interrogations", + "flagged_text": "Bonjour, je vous remercie de ce retour\nPouvez vous repondre aux\ndifferentes questions soulevees dans mon mail afin que je prenne ma\ndecision", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Re: Interrogations", + "header_tokens": [ + "tr", + "re", + "interrogations" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Bonjour, je vous remercie de ce retour" + ], + [ + "BODY", + "Pouvez vous repondre aux" + ], + [ + "BODY", + "differentes questions soulevees dans mon mail afin que je prenne ma" + ], + [ + "BODY", + "decision" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "SIGNATURE_NAME", + "Mr Dupont" + ], + [ + "FOOTER", + "Obtenez Outlook pour iOS" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Subject: : Votre devis vehicule" + ], + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Veuillez trouver ci-joint le devis que vous nous avez demande" + ], + [ + "FOOTER", + "La visualisation des fichiers PDF necessite Adobe Reader" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "SIGNATURE", + "La Societe Imaginaire" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Bonjour, je vous remercie de ce retour\nPouvez vous repondre aux\ndifferentes questions soulevees dans mon mail afin que je prenne ma\ndecision", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "re", + "interrogations", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "bonjour", + "je", + "vous", + "remercie", + "de", + "ce", + "retour", + "pouvez", + "vous", + "repondre", + "aux", + "differentes", + "questions", + "soulevees", + "dans", + "mon", + "mail", + "afin", + "que", + "je", + "prenne", + "ma", + "decision" + ] + }, + { + "age": 86, + "attachment": "[\"RIB.pdf\"]", + "attachments": [ + "RIB.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n De :\tmonsieurdupond \r\n A :\t\"conseiller@Societeimaginaire.fr\" \r\n Date :\t04/06/2018 13:47\r\n Objet :\tRE: Correspondance de La Societe Imaginaire\r\n\r\n\r\n\r\n Bonjour\r\n Ci-joint le rib du compte comme demand\u00e9\r\n Bien \u00e0 vous\r\n\r\n Monsieur DUPONT\r\n (Embedded image moved to file: pic.jpg)\r\n +33(0) 6 00 00 00 00\r\n\r\n De : conseiller@Societeimaginaire.fr [mailto:conseiller@Societeimaginaire.fr]\r\n Envoy\u00e9 : lundi 4 juin 2018 12:23\r\n \u00c0 : Monsieur Dupont \r\n Objet : Correspondance de La Societe Imaginaire\r\n\r\n Bonjour.\r\n\r\n Veuillez prendre connaissance des documents ci-joints :\r\n 1 - Courrier\r\n 2 - Envoi Devis habitation\r\n\r\n Meilleurs sentiments.\r\n\r\n La Societe Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.(See attached\r\n file: RIB.pdf)", + "body_tokens": [ + "ci_joint", + "rib", + "du", + "compte", + "comme", + "demande" + ], + "clean_header": "rib", + "date": "2019-02-05 15:53:44", + "flagged_header": "rib", + "flagged_text": "ci_joint le rib du compte comme demande", + "from": "conseiller@societeimaginaire.fr", + "header": "RIB", + "header_tokens": [ + "rib" + ], + "label": "regularisation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Ci-joint le rib du compte comme demande" + ], + [ + "GREETINGS", + "Bien a vous" + ], + [ + "HELLO", + "Monsieur DUPONT" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ], + [ + "SIGNATURE", + "+33(0) 6 00 00 00 00" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Veuillez prendre connaissance des documents ci-joints :" + ], + [ + "SIGNATURE", + "1 - Courrier" + ], + [ + "SIGNATURE", + "2 - Envoi Devis habitation" + ], + [ + "BODY", + "Meilleurs sentiments" + ], + [ + "SIGNATURE", + "La Societe Imaginaire" + ], + [ + "FOOTER", + "La visualisation des fichiers PDF necessite Adobe Reader" + ], + [ + "BODY", + "(See attached" + ], + [ + "BODY", + "file: RIB.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Ci-joint le rib du compte comme demande", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "rib", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "ci_joint", + "rib", + "du", + "compte", + "comme", + "demande" + ] + }, + { + "age": 52, + "attachment": "[\"image-a7c10.png\",\"Releve_Information.pdf\"]", + "attachments": [ + "image-a7c10.png", + "Releve_Information.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Veuillez trouver ci-joint le RI de Mme Dupont,\r\n\r\n Vous souhaitant bonne r\u00e9ception,\r\n\r\n\r\n Bien cordialement.\r\n\r\n\r\n (Embedded image moved to file: pic30109.jpg)\r\n [IMAGE]\r\n\r\n Le 2018-06-04 20:37, Mr DUPONT a \u00e9crit\u00a0:\r\n Bonjour Monsieur,\r\n\r\n Pourriez-vous, s'il vous plait, faire parvenir mon relev\u00e9 d'informations \u00e0\r\n la Societe Imaginaire, par retour de mail, en gardant la r\u00e9f\u00e9rence cit\u00e9e en objet qui\r\n vous plait?\r\n\r\n Je vous remercie,\r\n\r\n Monsieur Dupont\r\n\r\n -------- Message d'origine --------\r\n De : conseiller@Societeimaginaire.fr\r\n Date : 04/06/2018 14:30 (GMT+01:00)\r\n \u00c0 : monsieurdupont@polytechnique.edu\r\n Objet : Confirmation de votre assurance v\u00e9hicule\u00a0\r\n\r\n\r\n Cher(e) client,\r\n\r\n Nous vous confirmons l'enregistrement de l'assurance de votre v\u00e9hicule en\r\n en date du 01/05/2017 suite \u00e0 la demande de r\u00e9siliation que nous avons\r\n effectu\u00e9e aupr\u00e8s de la Societe Concurrente.\r\n\r\n Toutefois, ces derniers ne nous ont pas envoy\u00e9 votre Relev\u00e9 d'Information\r\n donc il va falloir que vous leur demandiez pour ensuite nous le transmettre\r\n par mail \u00e0 l'adresse conseiller@Societeimaginaire.fr\r\n En attendant ce document, nous vous\r\n assurons quand m\u00eame, mais de mani\u00e8re provisoire.\r\n\r\n De plus, il faudra que l'on voit ensemble quel mode de paiement vous souhaitez.\r\n Pour cela, le plus simple et\r\n de nous contacter au 09.00.00.00.00 ou de pr\u00e9voir un recontact via notre\r\n site Societeimaginaire.fr\r\n\r\n\r\n Bien Cordialement.\r\n\r\n Au nom de l'\u00e9quipe Societe Imaginaire\r\n\r\n logo Societe Imaginaire\r\n\r\n (See attached file: image-a7c10.png)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)", + "body_tokens": [ + "veuillez", + "trouver", + "ci_joint", + "ri", + "de", + "mme", + "dupont", + "vous", + "souhaitant", + "bonne", + "reception" + ], + "clean_header": "re: confirmation de votre assurance vehicule", + "date": "2020-09-29 13:14:36", + "flagged_header": "re: confirmation de votre assurance vehicule", + "flagged_text": "Veuillez trouver ci_joint le RI de Mme Dupont,\nVous souhaitant bonne reception,", + "from": "Conseiller ", + "header": "Re: Confirmation de votre assurance v\u00e9hicule", + "header_tokens": [ + "re", + "confirmation", + "de", + "votre", + "assurance", + "vehicule" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Veuillez trouver ci-joint le RI de Mme Dupont," + ], + [ + "BODY", + "Vous souhaitant bonne reception," + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "PJ", + "(Embedded image moved to file: pic30109.jpg)" + ], + [ + "BODY", + "[IMAGE]" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Monsieur," + ], + [ + "BODY", + "Pourriez-vous, s'il vous plait, faire parvenir mon releve d'informations a" + ], + [ + "BODY", + "la Societe Imaginaire, par retour de mail, en gardant la reference citee en objet qui" + ], + [ + "BODY", + "vous plait?" + ], + [ + "THANKS", + "Je vous remercie," + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Cher(e) client," + ], + [ + "BODY", + "Nous vous confirmons l'enregistrement de l'assurance de votre vehicule en" + ], + [ + "BODY", + "en date du 01/05/2017 suite a la demande de resiliation que nous avons" + ], + [ + "BODY", + "effectuee aupres de la Societe Concurrente" + ], + [ + "BODY", + "Toutefois, ces derniers ne nous ont pas envoye votre Releve d'Information" + ], + [ + "BODY", + "donc il va falloir que vous leur demandiez pour ensuite nous le transmettre" + ], + [ + "BODY", + "par mail a l'adresse conseiller@Societeimaginaire.fr" + ], + [ + "BODY", + "En attendant ce document, nous vous" + ], + [ + "BODY", + "assurons quand meme, mais de maniere provisoire" + ], + [ + "BODY", + "De plus, il faudra que l'on voit ensemble quel mode de paiement vous souhaitez" + ], + [ + "BODY", + "Pour cela, le plus simple et" + ], + [ + "BODY", + "de nous contacter au 09" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "SIGNATURE", + "00 ou de prevoir un recontact via notre" + ], + [ + "BODY", + "site Societeimaginaire.fr" + ], + [ + "GREETINGS", + "Bien Cordialement" + ], + [ + "BODY", + "Au nom de l'equipe Societe Imaginaire" + ], + [ + "SIGNATURE", + "logo Societe Imaginaire" + ], + [ + "PJ", + "(See attached file: image-a7c10.png)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Veuillez trouver ci-joint le RI de Mme Dupont,\nVous souhaitant bonne reception,", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "re", + "confirmation", + "de", + "votre", + "assurance", + "vehicule", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "veuillez", + "trouver", + "ci_joint", + "ri", + "de", + "mme", + "dupont", + "vous", + "souhaitant", + "bonne", + "reception" + ] + }, + { + "age": 72, + "attachment": "[\"rib.pdf\"]", + "attachments": [ + "rib.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique, veuillez trouver ici mon relev\u00e9\r\n d'identit\u00e9 bancaire.\r\n\r\n Au nom de Monsieur Dupont\r\n\r\n Bien cordialement\r\n\r\n --\r\n Monsieur Dupont\r\n 32 avenue Imaginaire\r\n T\u00e9l: +33 (0)600 00 00 00\r\n ", + "body_tokens": [ + "suite", + "a", + "notre", + "entretien", + "telephonique", + "veuillez", + "trouver", + "ici", + "mon", + "releve", + "d", + "identite", + "bancaire" + ], + "clean_header": "rib", + "date": "2022-06-09 21:54:35", + "flagged_header": "rib", + "flagged_text": "Suite a notre entretien telephonique, veuillez trouver ici mon releve\nd'identite bancaire", + "from": "Monsieur Dupont ", + "header": "RIB", + "header_tokens": [ + "rib" + ], + "label": "compte", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Suite a notre entretien telephonique, veuillez trouver ici mon releve" + ], + [ + "BODY", + "d'identite bancaire" + ], + [ + "HELLO", + "Au nom de Monsieur Dupont" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "SIGNATURE", + "32 avenue Imaginaire" + ], + [ + "SIGNATURE", + "Tel: +33 (0)600 00 00 00" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Suite a notre entretien telephonique, veuillez trouver ici mon releve\nd'identite bancaire", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "rib", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "suite", + "a", + "notre", + "entretien", + "telephonique", + "veuillez", + "trouver", + "ici", + "mon", + "releve", + "d", + "identite", + "bancaire" + ] + }, + { + "age": 39, + "attachment": "[\"pic.jpg\"]", + "attachments": [ + "pic.jpg" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Nous souhaitons d\u00e9clar\u00e9s deux sinistres concernant nos deux v\u00e9hicules.\r\n\r\n 1) Vehicule1 :\r\n\r\n Le v\u00e9hicule \u00e9tait stationnement sur le parking et il\r\n pr\u00e9sente une trace sur l'aile arri\u00e8re droite et sur le\r\n pare-choc.\r\n\r\n 2) Vehicule2 :\r\n\r\n Le conducteur s'est gar\u00e9 sur un parking d'entreprise.\r\n Il a cogn\u00e9 avec le pneu avant droit.\r\n\r\n\r\n En vous souhaitant bonne r\u00e9ception de ces \u00e9l\u00e9ments,\r\n\r\n Bien cordialement,\r\n\r\n Monsieur Dupont\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n (Embedded image moved to file: pic.jpg)", + "body_tokens": [ + "nous", + "souhaitons", + "declares", + "deux", + "sinistres", + "concernant", + "nos", + "deux", + "vehicules", + "1", + "vehicule1", + "vehicule", + "etait", + "stationnement", + "sur", + "parking", + "et", + "il", + "presente", + "une", + "trace", + "sur", + "l", + "aile", + "arriere", + "droite", + "et", + "sur", + "pare-choc", + "2", + "vehicule2", + "il", + "a", + "cogne", + "avec", + "pneu", + "avant", + "droit", + "en", + "vous", + "souhaitant", + "bonne", + "reception", + "de", + "ces", + "elements" + ], + "clean_header": "sinistres", + "date": "2019-12-20 08:02:52", + "flagged_header": "sinistres", + "flagged_text": "Nous souhaitons declares deux sinistres concernant nos deux vehicules\n1) Vehicule1 :\nLe vehicule etait stationnement sur le parking et il\npresente une trace sur l'aile arriere droite et sur le\npare-choc\n2) Vehicule2 :\nIl a cogne avec le pneu avant droit\nEn vous souhaitant bonne reception de ces elements,", + "from": "Monsieur Dupont ", + "header": "sinistres", + "header_tokens": [ + "sinistres" + ], + "label": "sinistres", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Nous souhaitons declares deux sinistres concernant nos deux vehicules" + ], + [ + "BODY", + "1) Vehicule1 :" + ], + [ + "BODY", + "Le vehicule etait stationnement sur le parking et il" + ], + [ + "BODY", + "presente une trace sur l'aile arriere droite et sur le" + ], + [ + "BODY", + "pare-choc" + ], + [ + "BODY", + "2) Vehicule2 :" + ], + [ + "SIGNATURE", + "Le conducteur s'est gare sur un parking d'entreprise" + ], + [ + "BODY", + "Il a cogne avec le pneu avant droit" + ], + [ + "BODY", + "En vous souhaitant bonne reception de ces elements," + ], + [ + "GREETINGS", + "Bien cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Nous souhaitons declares deux sinistres concernant nos deux vehicules\n1) Vehicule1 :\nLe vehicule etait stationnement sur le parking et il\npresente une trace sur l'aile arriere droite et sur le\npare-choc\n2) Vehicule2 :\nIl a cogne avec le pneu avant droit\nEn vous souhaitant bonne reception de ces elements,", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "sinistres", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "nous", + "souhaitons", + "declares", + "deux", + "sinistres", + "concernant", + "nos", + "deux", + "vehicules", + "1", + "vehicule1", + "vehicule", + "etait", + "stationnement", + "sur", + "parking", + "et", + "il", + "presente", + "une", + "trace", + "sur", + "l", + "aile", + "arriere", + "droite", + "et", + "sur", + "pare-choc", + "2", + "vehicule2", + "il", + "a", + "cogne", + "avec", + "pneu", + "avant", + "droit", + "en", + "vous", + "souhaitant", + "bonne", + "reception", + "de", + "ces", + "elements" + ] + }, + { + "age": 63, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n Nous restons \u00e0 votre disposition\r\n\r\n Cordialement\r\n\r\n Societe Imaginaire\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 05/06/2018 10:05 -----\r\n\r\n De :\tmonsieurdupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 10:26\r\n Objet :\tModification de contrat\r\n\r\n\r\n\r\n Bonjour Madame,\r\n\r\n Notre fils, conducteur principal du v\u00e9hicule \u00e9tant\r\n en stage puis \u00e0 l'\u00e9tranger pour une bonne partie de l'an\r\n prochain, son v\u00e9hicule est d\u00e9sormais \u00e0 la maison et il ne sera amen\u00e9 \u00e0\r\n l'utiliser que rarement.\r\n Pour cette raison, nous souhaiterions modifier son contrat d'assurance\r\n d\u00e8s que possible.\r\n\r\n Vous remerciant par avance de votre concours, nous restons \u00e0 votre\r\n disposition pour toute information compl\u00e9mentaire.\r\n\r\n Cordialement\r\n\r\n Monsieur Dupont\r\n ", + "body_tokens": [ + "nous", + "restons", + "a", + "votre", + "disposition" + ], + "clean_header": "contrat vehicule", + "date": "2020-02-01 01:37:56", + "flagged_header": "contrat vehicule", + "flagged_text": "Nous restons a votre disposition", + "from": "conseiller@societeimaginaire.fr", + "header": "contrat vehicule", + "header_tokens": [ + "contrat", + "vehicule" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Nous restons a votre disposition" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "SIGNATURE", + "Societe Imaginaire" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Madame," + ], + [ + "BODY", + "Notre fils, conducteur principal du vehicule etant" + ], + [ + "BODY", + "en stage puis a l'etranger pour une bonne partie de l'an" + ], + [ + "BODY", + "prochain, son vehicule est desormais a la maison et il ne sera amene a" + ], + [ + "BODY", + "l'utiliser que rarement" + ], + [ + "BODY", + "Pour cette raison, nous souhaiterions modifier son contrat d'assurance" + ], + [ + "BODY", + "des que possible" + ], + [ + "BODY", + "Vous remerciant par avance de votre concours, nous restons a votre" + ], + [ + "BODY", + "disposition pour toute information complementaire" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Nous restons a votre disposition", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "contrat", + "vehicule", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "nous", + "restons", + "a", + "votre", + "disposition" + ] + }, + { + "age": 79, + "attachment": "[\"cession.pdf\"]", + "attachments": [ + "cession.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n De :\tMonsieur Dupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Cc\u00a0:\tMonsieur Dupont , Madame Dupont\r\n \r\n Date :\t04/06/2018 18:08\r\n Objet :\tr\u00e9siliation couverture v\u00e9hicule suite cession\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Le v\u00e9hicule a \u00e9t\u00e9 c\u00e9d\u00e9 le 2 avril 2018.\r\n\r\n Merci, ci joint le document de cession scann\u00e9\r\n\r\n Cordialement,\r\n\r\n\r\n Monsieur Dupont - Orange\r\n monsieurdupont@extensionj.fr\r\n\r\n responsable : Monsieur Dupont\r\n 06 00 00 00 00\r\n monsieurdupont@extensionj.fr\r\n\r\n\r\n\r\n\r\n (See attached file: cession.pdf)", + "body_tokens": [], + "clean_header": "tr : resiliation couverture vehicule suite cession", + "date": "2019-05-30 15:47:18", + "flagged_header": "tr : resiliation couverture vehicule suite cession", + "flagged_text": "", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : r\u00e9siliation couverture v\u00e9hicule suite cession", + "header_tokens": [ + "tr", + "resiliation", + "couverture", + "vehicule", + "suite", + "cession" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "\r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t05/06/2018 13:34\r\n Objet :\tR\u00e9ponse au dossier de pr\u00eat\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n Vous nous avez adress\u00e9 un dossier de pr\u00eat concernant votre client.\r\n\r\n Nous vous informons de notre accord.\r\n\r\n Nous restons \u00e0 votre enti\u00e8re disposition.\r\n\r\n Bien cordialement.\r\n\r\n Conseiller\r\n\r\n Societe Imaginaire\r\n\r\n T\u00e9l : 05 00 00 00 00\r\n Fax : 05 00 00 00 00\r\n E-mail: conseiller@societeimaginaire.fr\r\n\r\n\r\n", + "body_tokens": [ + "vous", + "nous", + "avez", + "adresse", + "un", + "dossier", + "de", + "pret", + "concernant", + "votre", + "client", + "nous", + "vous", + "informons", + "de", + "notre", + "accord", + "nous", + "restons", + "a", + "votre", + "entiere", + "disposition" + ], + "clean_header": "dossier de pret ", + "date": "2019-06-05 21:18:07", + "flagged_header": "dossier de pret", + "flagged_text": "Vous nous avez adresse un dossier de pret concernant votre client\nNous vous informons de notre accord\nNous restons a votre entiere disposition", + "from": "conseiller@societeimaginaire.fr", + "header": "dossier de pr\u00eat ", + "header_tokens": [ + "dossier", + "de", + "pret" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Vous nous avez adresse un dossier de pret concernant votre client" + ], + [ + "BODY", + "Nous vous informons de notre accord" + ], + [ + "BODY", + "Nous restons a votre entiere disposition" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "SIGNATURE", + "Conseiller" + ], + [ + "SIGNATURE", + "Societe Imaginaire" + ], + [ + "SIGNATURE", + "Tel : 05 00 00 00 00" + ], + [ + "SIGNATURE", + "Fax : 05 00 00 00 00" + ], + [ + "SIGNATURE", + "E-mail: conseiller@societeimaginaire.fr" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Vous nous avez adresse un dossier de pret concernant votre client\nNous vous informons de notre accord\nNous restons a votre entiere disposition", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "dossier", + "de", + "pret", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "vous", + "nous", + "avez", + "adresse", + "un", + "dossier", + "de", + "pret", + "concernant", + "votre", + "client", + "nous", + "vous", + "informons", + "de", + "notre", + "accord", + "nous", + "restons", + "a", + "votre", + "entiere", + "disposition" + ] + }, + { + "age": 45, + "attachment": "[\"doc.pdf\"]", + "attachments": [ + "doc.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n\r\n\r\n Je vous prie de trouver ci-joint une d\u00e9claration de sinistre,\r\n relative au cas de Monsieur Dupont, survenu le lundi 6 Mai.\r\n\r\n\r\n\r\n Avec nos cordiales salutations.\r\n\r\n\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n \u00a0(See attached file: doc.pdf)", + "body_tokens": [ + "je", + "vous", + "prie", + "de", + "trouver", + "ci_joint", + "une", + "declaration", + "de", + "sinistre", + "relative", + "au", + "cas", + "de", + "monsieur", + "dupont", + "survenu", + "lundi", + "6", + "mai", + "avec", + "nos", + "cordiales", + "salutations" + ], + "clean_header": "declarations de sinistre corporel et materiel", + "date": "2020-09-29 17:53:01", + "flagged_header": "declarations de sinistre corporel et materiel", + "flagged_text": "Je vous prie de trouver ci_joint une declaration de sinistre,\nrelative au cas de Monsieur Dupont, survenu le lundi 6 Mai\nAvec nos cordiales salutations", + "from": "Monsieur Dupont ", + "header": "d\u00e9clarations de sinistre corporel et mat\u00e9riel", + "header_tokens": [ + "declarations", + "de", + "sinistre", + "corporel", + "et", + "materiel" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Je vous prie de trouver ci-joint une declaration de sinistre," + ], + [ + "BODY", + "relative au cas de Monsieur Dupont, survenu le lundi 6 Mai" + ], + [ + "BODY", + "Avec nos cordiales salutations" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ], + [ + "PJ", + "(See attached file: doc.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Je vous prie de trouver ci-joint une declaration de sinistre,\nrelative au cas de Monsieur Dupont, survenu le lundi 6 Mai\nAvec nos cordiales salutations", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "declarations", + "de", + "sinistre", + "corporel", + "et", + "materiel", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "prie", + "de", + "trouver", + "ci_joint", + "une", + "declaration", + "de", + "sinistre", + "relative", + "au", + "cas", + "de", + "monsieur", + "dupont", + "survenu", + "lundi", + "6", + "mai", + "avec", + "nos", + "cordiales", + "salutations" + ] + }, + { + "age": 48, + "attachment": "[\"Attestationemployeur.pdf\",\"Relev\u00e9_d'information.pdf\"]", + "attachments": [ + "Attestationemployeur.pdf", + "Relev\u00e9_d'information.pdf" + ], + "body": "\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n Je vous prie de trouver ci-joints les justificatifs demand\u00e9s pour la\r\n souscription de mon assurance auto.\r\n\r\n Bien cordialement,\r\n\r\n Monsieur Dupont\r\n (See attached file: Attestationemployeur.pdf)\r\n(See attached file: Relev\u00e9_d'information.pdf)", + "body_tokens": [ + "je", + "vous", + "prie", + "de", + "trouver", + "ci_joint", + "s", + "justificatifs", + "demandes", + "pour", + "la", + "souscription", + "de", + "mon", + "assurance", + "auto" + ], + "clean_header": "demande d'assurance auto", + "date": "2022-01-22 22:27:05", + "flagged_header": "demande d'assurance auto", + "flagged_text": "Je vous prie de trouver ci_joint s les justificatifs demandes pour la\nsouscription de mon assurance auto", + "from": "Monsieur Dupont ", + "header": "Demande d'assurance auto", + "header_tokens": [ + "demande", + "d", + "assurance", + "auto" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Je vous prie de trouver ci-joints les justificatifs demandes pour la" + ], + [ + "BODY", + "souscription de mon assurance auto" + ], + [ + "GREETINGS", + "Bien cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "PJ", + "(See attached file: Attestationemployeur.pdf)" + ], + [ + "PJ", + "(See attached file: Releve_d'information.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je vous prie de trouver ci-joints les justificatifs demandes pour la\nsouscription de mon assurance auto", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "d", + "assurance", + "auto", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "prie", + "de", + "trouver", + "ci_joint", + "s", + "justificatifs", + "demandes", + "pour", + "la", + "souscription", + "de", + "mon", + "assurance", + "auto" + ] + }, + { + "age": 63, + "attachment": "[\"IMG.PNG\"]", + "attachments": [ + "IMG.PNG" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Envoy\u00e9 de mon iPhone\r\n\r\n D\u00e9but du message transf\u00e9r\u00e9\u00a0:\r\n\r\n Exp\u00e9diteur: monsieurduponte@extensionh.fr\r\n Date: 7 juin 2018 \u00e0 16:34:25 UTC+2\r\n Destinataire: conseiller@Societeimaginaire.fr\r\n Objet: Demande\r\n\r\n\r\n\r\n Bonjour, suite \u00e0 notre conversation t\u00e9l\u00e9phonique voici la preuve\r\n de r\u00e8glement afin que vous puissiez \u00e9tudier ma demande de r\u00e9inscription.\r\n\r\n Cordialement,\r\n Monsieur Dupont\r\n [IMAGE]\r\n\r\n\r\n Envoy\u00e9 de mon iPhone(See attached file: IMG.PNG)", + "body_tokens": [], + "clean_header": "preuve reglement ", + "date": "2021-10-16 21:12:16", + "flagged_header": "preuve reglement", + "flagged_text": "", + "from": "Monsieur Dupont ", + "header": "preuve r\u00e9glement ", + "header_tokens": [ + "preuve", + "reglement" + ], + "label": "compte", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "FOOTER", + "Envoye de mon iPhone" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Bonjour, suite a notre conversation telephonique voici la preuve" + ], + [ + "BODY", + "de reglement afin que vous puissiez etudier ma demande de reinscription" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "BODY", + "[IMAGE]" + ], + [ + "FOOTER", + "Envoye de mon iPhone(See attached file: IMG.PNG)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "preuve", + "reglement", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]" + ] + }, + { + "age": 19, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour\r\n\r\n Je m'aper\u00e7ois ce jour que j'ai \u00e9t\u00e9 pr\u00e9l\u00e8v\u00e9e plusieurs fois:\r\n\r\n 1 fois sur mon compte bancaire BANQUE1\r\n 1 fois sur mon compte BANQUE2\r\n\r\n Je paye donc 2 fois l'assurance v\u00e9hicule et habitation\r\n\r\n Pourriez vous me rembourser la somme n\u00e9cessaire sur mon compte BANQUE1\r\n le plus rapidement possible.\r\n\r\n En vous remerciant par avance de votre r\u00e9ponse par retour de mail.\r\n Bien cordialement\r\n Monsieur Dupont", + "body_tokens": [ + "je", + "m", + "apercois", + "ce", + "jour", + "que", + "j", + "ai", + "ete", + "prelevee", + "plusieurs", + "fois", + "je", + "paye", + "donc", + "2", + "fois", + "l", + "assurance", + "vehicule", + "et", + "habitation", + "pourriez", + "vous", + "me", + "rembourser", + "la", + "somme", + "necessaire", + "sur", + "mon", + "compte", + "banque1", + "plus", + "rapidement", + "possible", + "en", + "vous", + "remerciant", + "par", + "avance", + "de", + "votre", + "reponse", + "par", + "retour", + "de", + "mail" + ], + "clean_header": "prelevements bancaires", + "date": "2022-05-02 06:52:20", + "flagged_header": "prelevements bancaires", + "flagged_text": "Je m'apercois ce jour que j'ai ete prelevee plusieurs fois:\nJe paye donc 2 fois l'assurance vehicule et habitation\nPourriez vous me rembourser la somme necessaire sur mon compte BANQUE1\nle plus rapidement possible\nEn vous remerciant par avance de votre reponse par retour de mail", + "from": "Monsieur Dupont ", + "header": "pr\u00e9l\u00e8vements bancaires", + "header_tokens": [ + "prelevements", + "bancaires" + ], + "label": "modification", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Je m'apercois ce jour que j'ai ete prelevee plusieurs fois:" + ], + [ + "SIGNATURE", + "1 fois sur mon compte bancaire BANQUE1" + ], + [ + "SIGNATURE", + "1 fois sur mon compte BANQUE2" + ], + [ + "BODY", + "Je paye donc 2 fois l'assurance vehicule et habitation" + ], + [ + "BODY", + "Pourriez vous me rembourser la somme necessaire sur mon compte BANQUE1" + ], + [ + "BODY", + "le plus rapidement possible" + ], + [ + "BODY", + "En vous remerciant par avance de votre reponse par retour de mail" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je m'apercois ce jour que j'ai ete prelevee plusieurs fois:\nJe paye donc 2 fois l'assurance vehicule et habitation\nPourriez vous me rembourser la somme necessaire sur mon compte BANQUE1\nle plus rapidement possible\nEn vous remerciant par avance de votre reponse par retour de mail", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "prelevements", + "bancaires", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "m", + "apercois", + "ce", + "jour", + "que", + "j", + "ai", + "ete", + "prelevee", + "plusieurs", + "fois", + "je", + "paye", + "donc", + "2", + "fois", + "l", + "assurance", + "vehicule", + "et", + "habitation", + "pourriez", + "vous", + "me", + "rembourser", + "la", + "somme", + "necessaire", + "sur", + "mon", + "compte", + "banque1", + "plus", + "rapidement", + "possible", + "en", + "vous", + "remerciant", + "par", + "avance", + "de", + "votre", + "reponse", + "par", + "retour", + "de", + "mail" + ] + }, + { + "age": 39, + "attachment": "[\"Liste.docx\",\"PV.pdf\",\"statuts.pdf\",\"RIB.jpeg\"]", + "attachments": [ + "Liste.docx", + "PV.pdf", + "statuts.pdf", + "RIB.jpeg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour,\r\n Voici, ci-joins les documents demand\u00e9s pr\u00e9c\u00e9demment.\r\n\r\n Je vous remercie de la rapidit\u00e9 de vos service,\r\n Bien \u00e0 vous,\r\n Monsieur Dupont(See attached file: Liste.docx)(See attached file:\r\n PV.pdf)(See attached file: statuts.pdf)(See attached file:\r\n RIB.jpeg)", + "body_tokens": [ + "voici", + "ci-joins", + "documents", + "demandes", + "precedemment", + "je", + "vous", + "remercie", + "de", + "la", + "rapidite", + "de", + "vos", + "service" + ], + "clean_header": "documents ", + "date": "2021-01-13 05:59:12", + "flagged_header": "documents", + "flagged_text": "Voici, ci-joins les documents demandes precedemment\nJe vous remercie de la rapidite de vos service,", + "from": "Monsieur Dupont ", + "header": "documents ", + "header_tokens": [ + "documents" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Voici, ci-joins les documents demandes precedemment" + ], + [ + "BODY", + "Je vous remercie de la rapidite de vos service," + ], + [ + "GREETINGS", + "Bien a vous," + ], + [ + "BODY", + "Monsieur Dupont(See attached file: Liste.docx)(See attached file:" + ], + [ + "BODY", + "PV.pdf)(See attached file: statuts.pdf)(See attached file:" + ], + [ + "BODY", + "RIB.jpeg)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Voici, ci-joins les documents demandes precedemment\nJe vous remercie de la rapidite de vos service,", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "documents", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "voici", + "ci-joins", + "documents", + "demandes", + "precedemment", + "je", + "vous", + "remercie", + "de", + "la", + "rapidite", + "de", + "vos", + "service" + ] + }, + { + "age": 31, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Centre Imaginaire\r\n\r\n 00 Rue de Nomderue\r\n\r\n 23000 VILLE IMAGINAIRE\r\n\r\n\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n\r\n Merci de bien vouloir me faire parvenir une attestation d'assurance\r\n concernant la mise \u00e0 disposition de la salle pour 100 personnes.\r\n\r\n objet de la manifestation : r\u00e9union\r\n\r\n cordialement.\r\n\r\n\r\n Monsieur Dupont\r\n\r\n ", + "body_tokens": [ + "merci", + "de", + "bien", + "vouloir", + "me", + "faire", + "parvenir", + "une", + "attestation", + "d", + "assurance", + "concernant", + "la", + "mise", + "a", + "disposition", + "de", + "la", + "salle", + "pour", + "100", + "personnes", + "objet", + "de", + "la", + "manifestation", + "reunion" + ], + "clean_header": "demande d'attestation d'asurance", + "date": "2021-01-04 02:42:06", + "flagged_header": "demande d'attestation d'asurance", + "flagged_text": "Merci de bien vouloir me faire parvenir une attestation d'assurance\nconcernant la mise a disposition de la salle pour 100 personnes\nobjet de la manifestation : reunion", + "from": "Monsieur Dupont ", + "header": "demande d'attestation d'asurance", + "header_tokens": [ + "demande", + "d", + "attestation", + "d", + "asurance" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "SIGNATURE_NAME", + "Centre Imaginaire" + ], + [ + "SIGNATURE", + "00 Rue de Nomderue" + ], + [ + "SIGNATURE", + "23000 VILLE IMAGINAIRE" + ], + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Merci de bien vouloir me faire parvenir une attestation d'assurance" + ], + [ + "BODY", + "concernant la mise a disposition de la salle pour 100 personnes" + ], + [ + "BODY", + "objet de la manifestation : reunion" + ], + [ + "GREETINGS", + "cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Merci de bien vouloir me faire parvenir une attestation d'assurance\nconcernant la mise a disposition de la salle pour 100 personnes\nobjet de la manifestation : reunion", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "d", + "attestation", + "d", + "asurance", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "merci", + "de", + "bien", + "vouloir", + "me", + "faire", + "parvenir", + "une", + "attestation", + "d", + "assurance", + "concernant", + "la", + "mise", + "a", + "disposition", + "de", + "la", + "salle", + "pour", + "100", + "personnes", + "objet", + "de", + "la", + "manifestation", + "reunion" + ] + }, + { + "age": 18, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n (Embedded image moved to file: pic.jpg) Rejoignez-nous sur notre page\r\n Facebook\r\n\r\n (Embedded image moved to file: pic.gif)\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller/Societe Imaginaire le 07/06/2018 16:32 -----\r\n\r\n De :\tmonsieurdupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t07/06/2018 16:16\r\n Objet :\tRe : Message de votre conseill\u00e8re personnelle\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Merci de vos messages. La voiture est conduite par Monsieur Dupont.\r\n Nos deux voitures sont assur\u00e9es \u00e0 100%.\r\n\r\n Cordialement,\r\n\r\n Monsieur Dupont\r\n\r\n Le 07/06/18, \"conseiller@Societeimaginaire.fr\" a\r\n \u00e9crit :\r\n\r\n\r\n Ch\u00e8re client,\r\n\r\n Suite \u00e0 notre bilan du 01 mai dernier, je souhaitais savoir ce que vous\r\n avez d\u00e9cid\u00e9.\r\n\r\n\r\n Je devais vous \u00e9tablir un devis pour votre voiture donc si\r\n vous pouviez m'indiquer le conducteur principal, la date de permis de la voiture ainsi\r\n que le type de couverture que vous recherchez, je vous enverrai le devis rapidement.\r\n\r\n\r\n J'attend de vos nouvelles.\r\n\r\n Bien Cordialement.\r\n\r\n Conseiller\r\n D\u00e9l\u00e9gation Conseil\r\n conseillerh@Societeimaginaire.fr\r\n\r\n ", + "body_tokens": [ + "facebook" + ], + "clean_header": "tr : message de votre conseillere personnelle", + "date": "2022-04-27 07:20:10", + "flagged_header": "tr : message de votre conseillere personnelle", + "flagged_text": "Facebook", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Message de votre conseill\u00e8re personnelle", + "header_tokens": [ + "tr", + "message", + "de", + "votre", + "conseillere", + "personnelle" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "PJ", + "(Embedded image moved to file: pic.jpg) Rejoignez-nous sur notre page" + ], + [ + "BODY", + "Facebook" + ], + [ + "PJ", + "(Embedded image moved to file: pic.gif)" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Merci de vos messages" + ], + [ + "BODY", + "La voiture est conduite par Monsieur Dupont" + ], + [ + "BODY", + "Nos deux voitures sont assurees a 100%" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "BODY", + "Le 07/06/18, \"conseiller@Societeimaginaire.fr\" a" + ], + [ + "BODY", + "ecrit :" + ], + [ + "HELLO", + "Chere client," + ], + [ + "BODY", + "Suite a notre bilan du 01 mai dernier, je souhaitais savoir ce que vous" + ], + [ + "BODY", + "avez decide" + ], + [ + "BODY", + "Je devais vous etablir un devis pour votre voiture donc si" + ], + [ + "BODY", + "vous pouviez m'indiquer le conducteur principal, la date de permis de la voiture ainsi" + ], + [ + "BODY", + "que le type de couverture que vous recherchez, je vous enverrai le devis rapidement" + ], + [ + "BODY", + "J'attend de vos nouvelles" + ], + [ + "GREETINGS", + "Bien Cordialement" + ], + [ + "SIGNATURE", + "Conseiller" + ], + [ + "SIGNATURE_NAME", + "Delegation Conseil" + ], + [ + "SIGNATURE", + "conseillerh@Societeimaginaire.fr" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Facebook", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "message", + "de", + "votre", + "conseillere", + "personnelle", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "facebook" + ] + } +] diff --git a/melusine/data/emails_preprocessed.json b/melusine/data/emails_preprocessed.json new file mode 100644 index 0000000..65c3859 --- /dev/null +++ b/melusine/data/emails_preprocessed.json @@ -0,0 +1,5624 @@ +[ + { + "age": 35, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 24/05/2018 11:49 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Cc\u00a0:\tSociete@www.Societe.fr\r\n Date :\t24/05/2018 11:36\r\n Objet :\tDevis habitation\r\n\r\n\r\n\r\n Bonjour\r\n Je suis client chez vous\r\n Pouvez vous m \u00e9tablir un devis pour mon fils qui souhaite\r\n louer l\u2019appartement suivant :\r\n 25 rue du rueimaginaire 77000\r\n Merci\r\n Envoy\u00e9 de mon iPhone", + "body_tokens": [ + "je", + "suis", + "client", + "chez", + "vous", + "pouvez", + "vous", + "m", + "etablir", + "un", + "devis", + "pour", + "mon", + "fils", + "qui", + "souhaite", + "louer", + "l", + "appartement", + "suivant" + ], + "clean_header": "tr : devis habitation", + "date": "2019-03-27 17:34:12", + "flagged_header": "tr : devis habitation", + "flagged_text": "Je suis client chez vous\nPouvez vous m etablir un devis pour mon fils qui souhaite\nlouer l'appartement suivant :", + "from": "conseiller1@societeimaginaire.fr", + "header": "Tr : Devis habitation", + "header_tokens": [ + "tr", + "devis", + "habitation" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Je suis client chez vous" + ], + [ + "BODY", + "Pouvez vous m etablir un devis pour mon fils qui souhaite" + ], + [ + "BODY", + "louer l'appartement suivant :" + ], + [ + "SIGNATURE", + "25 rue du rueimaginaire 77000" + ], + [ + "THANKS", + "Merci" + ], + [ + "FOOTER", + "Envoye de mon iPhone" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je suis client chez vous\nPouvez vous m etablir un devis pour mon fils qui souhaite\nlouer l'appartement suivant :", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "devis", + "habitation", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "suis", + "client", + "chez", + "vous", + "pouvez", + "vous", + "m", + "etablir", + "un", + "devis", + "pour", + "mon", + "fils", + "qui", + "souhaite", + "louer", + "l", + "appartement", + "suivant" + ] + }, + { + "age": 32, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 25/05/2018 08:20 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t24/05/2018 19:37\r\n Objet :\tImmatriculation voiture\r\n\r\n\r\n\r\n Bonsoir madame,\r\n\r\n Je vous informe que la nouvelle immatriculation est enfin\r\n faite. Je vous remercie bien pour votre patience.\r\n Je vous prie de trouver donc la carte grise ainsi que la\r\n nouvelle immatriculation. Je vous demanderai de faire les changements\r\n n\u00e9cessaires concernant l\u2019assurance.\r\n Je vous remercie encore pour tout.\r\n Cordialement,\r\n Monsieur Dupont (See attached file: pj.pdf)", + "body_tokens": [ + "je", + "vous", + "informe", + "que", + "la", + "nouvelle", + "immatriculation", + "est", + "enfin", + "faite", + "je", + "vous", + "prie", + "de", + "trouver", + "donc", + "la", + "carte", + "grise", + "ainsi", + "que", + "la", + "nouvelle", + "immatriculation", + "je", + "vous", + "demanderai", + "de", + "faire", + "changements", + "necessaires", + "concernant", + "l", + "assurance" + ], + "clean_header": "tr : immatriculation voiture", + "date": "2022-04-28 00:56:52", + "flagged_header": "tr : immatriculation voiture", + "flagged_text": "Je vous informe que la nouvelle immatriculation est enfin\nfaite\nJe vous prie de trouver donc la carte grise ainsi que la\nnouvelle immatriculation\nJe vous demanderai de faire les changements\nnecessaires concernant l'assurance", + "from": "conseiller1@societeimaginaire.fr", + "header": "Tr : Immatriculation voiture", + "header_tokens": [ + "tr", + "immatriculation", + "voiture" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonsoir madame," + ], + [ + "BODY", + "Je vous informe que la nouvelle immatriculation est enfin" + ], + [ + "BODY", + "faite" + ], + [ + "THANKS", + "Je vous remercie bien pour votre patience" + ], + [ + "BODY", + "Je vous prie de trouver donc la carte grise ainsi que la" + ], + [ + "BODY", + "nouvelle immatriculation" + ], + [ + "BODY", + "Je vous demanderai de faire les changements" + ], + [ + "BODY", + "necessaires concernant l'assurance" + ], + [ + "THANKS", + "Je vous remercie encore pour tout" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "BODY", + "Monsieur Dupont (See attached file: pj.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Je vous informe que la nouvelle immatriculation est enfin\nfaite\nJe vous prie de trouver donc la carte grise ainsi que la\nnouvelle immatriculation\nJe vous demanderai de faire les changements\nnecessaires concernant l'assurance", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "immatriculation", + "voiture", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "informe", + "que", + "la", + "nouvelle", + "immatriculation", + "est", + "enfin", + "faite", + "je", + "vous", + "prie", + "de", + "trouver", + "donc", + "la", + "carte", + "grise", + "ainsi", + "que", + "la", + "nouvelle", + "immatriculation", + "je", + "vous", + "demanderai", + "de", + "faire", + "changements", + "necessaires", + "concernant", + "l", + "assurance" + ] + }, + { + "age": 66, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjours,\r\n\r\n Suite a notre conversation t\u00e9l\u00e9phonique de Mardi , pourriez vous me dire la\r\n somme que je vous dois afin d'\u00eatre en r\u00e9gularisation .\r\n\r\n Merci bonne journ\u00e9e\r\n\r\n Le mar. 22 mai 2018 \u00e0 10:20, a \u00e9crit\u00a0:\r\n Bonjour.\r\n\r\n Merci de bien vouloir prendre connaissance du document ci-joint :\r\n 1 - Relev\u00e9 d'identit\u00e9 postal (contrats)\r\n\r\n Cordialement.\r\n\r\n La Mututelle Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n ", + "body_tokens": [ + "suite", + "a", + "notre", + "conversation", + "telephonique", + "de", + "mardi", + "pourriez", + "vous", + "me", + "dire", + "la", + "somme", + "que", + "je", + "vous", + "dois", + "afin", + "d", + "etre", + "en", + "regularisation" + ], + "clean_header": "re: envoi d'un document de la societe imaginaire", + "date": "2021-12-12 01:50:29", + "flagged_header": "re: envoi d'un document de la societe imaginaire", + "flagged_text": "Suite a notre conversation telephonique de Mardi , pourriez vous me dire la\nsomme que je vous dois afin d'etre en regularisation", + "from": "Monsieur Dupont ", + "header": "Re: Envoi d'un document de la Soci\u00e9t\u00e9 Imaginaire", + "header_tokens": [ + "re", + "envoi", + "d", + "un", + "document", + "de", + "la", + "societe", + "imaginaire" + ], + "label": "compte", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjours," + ], + [ + "BODY", + "Suite a notre conversation telephonique de Mardi , pourriez vous me dire la" + ], + [ + "BODY", + "somme que je vous dois afin d'etre en regularisation" + ], + [ + "THANKS", + "Merci bonne journee" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Merci de bien vouloir prendre connaissance du document ci-joint :" + ], + [ + "SIGNATURE", + "1 - Releve d'identite postal (contrats)" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "SIGNATURE_NAME", + "La Mututelle Imaginaire" + ], + [ + "FOOTER", + "La visualisation des fichiers PDF necessite Adobe Reader" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Suite a notre conversation telephonique de Mardi , pourriez vous me dire la\nsomme que je vous dois afin d'etre en regularisation", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "re", + "envoi", + "d", + "un", + "document", + "de", + "la", + "societe", + "imaginaire", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "suite", + "a", + "notre", + "conversation", + "telephonique", + "de", + "mardi", + "pourriez", + "vous", + "me", + "dire", + "la", + "somme", + "que", + "je", + "vous", + "dois", + "afin", + "d", + "etre", + "en", + "regularisation" + ] + }, + { + "age": 50, + "attachment": "[\"fichedepaie.png\"]", + "attachments": [ + "fichedepaie.png" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n\r\n Je fais suite \u00e0\u00a0 votre mail. J'ai envoy\u00e9 mon bulletin de salaire\r\n par courrier mais il semblerait que vous ne l'ayez pas r\u00e9ceptionn\u00e9.\r\n Vous trouverez ci-joint mon bulletin de salaire.\r\n\r\n\r\n Bien cordialement,\r\n\r\n\r\n Monsieur Dupont.\r\n\r\n Le 16/05/2018 \u00e0 11:27, conseiller@Societeimaginaire.fr a \u00e9crit\u00a0:\r\n\r\n Cher(e) client,\r\n\r\n Sauf erreur de notre part, nous n'avons pas re\u00e7u votre justificatif\r\n d\u2019emploi.\r\n Nous vous rappelons qu'il nous est indispensable pour valider votre\r\n adh\u00e9sion aupr\u00e8s de notre Societe.\r\n\r\n Il peut s\u2019agir de votre dernier bulletin de paye ou d\u2019une attestation de\r\n votre employeur par exemple.\r\n\r\n Votre conseiller Societe Imaginaire\r\n |------------------------+---------------------------------------|\r\n | |Garanti sans virus. www.avast.com |\r\n |------------------------+---------------------------------------|\r\n\r\n\r\n ", + "body_tokens": [ + "je", + "fais", + "suite", + "a", + "votre", + "mail", + "j", + "ai", + "envoye", + "mon", + "bulletin", + "de", + "salaire", + "par", + "courrier", + "mais", + "il", + "semblerait", + "que", + "vous", + "ne", + "l", + "ayez", + "pas", + "receptionne", + "vous", + "trouverez", + "ci_joint", + "mon", + "bulletin", + "de", + "salaire" + ], + "clean_header": "re: votre adhesion a la societe imaginaire", + "date": "2020-08-05 01:42:25", + "flagged_header": "re: votre adhesion a la societe imaginaire", + "flagged_text": "Je fais suite a votre mail\nJ'ai envoye mon bulletin de salaire\npar courrier mais il semblerait que vous ne l'ayez pas receptionne\nVous trouverez ci_joint mon bulletin de salaire", + "from": "Monsieur Dupont ", + "header": "Re: Votre adh\u00e9sion \u00e0 la Soci\u00e9t\u00e9 Imaginaire", + "header_tokens": [ + "re", + "votre", + "adhesion", + "a", + "la", + "societe", + "imaginaire" + ], + "label": "adhesion", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Je fais suite a votre mail" + ], + [ + "BODY", + "J'ai envoye mon bulletin de salaire" + ], + [ + "BODY", + "par courrier mais il semblerait que vous ne l'ayez pas receptionne" + ], + [ + "BODY", + "Vous trouverez ci-joint mon bulletin de salaire" + ], + [ + "GREETINGS", + "Bien cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Cher(e) client," + ], + [ + "BODY", + "Sauf erreur de notre part, nous n'avons pas recu votre justificatif" + ], + [ + "BODY", + "d'emploi" + ], + [ + "BODY", + "Nous vous rappelons qu'il nous est indispensable pour valider votre" + ], + [ + "BODY", + "adhesion aupres de notre Societe" + ], + [ + "BODY", + "Il peut s'agir de votre dernier bulletin de paye ou d'une attestation de" + ], + [ + "BODY", + "votre employeur par exemple" + ], + [ + "BODY", + "Votre conseiller Societe Imaginaire" + ], + [ + "FOOTER", + "| |Garanti sans virus" + ], + [ + "FOOTER", + "www.avast.com |" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Je fais suite a votre mail\nJ'ai envoye mon bulletin de salaire\npar courrier mais il semblerait que vous ne l'ayez pas receptionne\nVous trouverez ci-joint mon bulletin de salaire", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "re", + "votre", + "adhesion", + "a", + "la", + "societe", + "imaginaire", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "fais", + "suite", + "a", + "votre", + "mail", + "j", + "ai", + "envoye", + "mon", + "bulletin", + "de", + "salaire", + "par", + "courrier", + "mais", + "il", + "semblerait", + "que", + "vous", + "ne", + "l", + "ayez", + "pas", + "receptionne", + "vous", + "trouverez", + "ci_joint", + "mon", + "bulletin", + "de", + "salaire" + ] + }, + { + "age": 15, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour,\r\n Voici ci joint mon bulletin de salaire comme demand\u00e9.\r\n Cordialement\r\n Monsieur Dupont\r\n\r\n Le ven. 25 mai 2018 \u00e0 11:23, a \u00e9crit\u00a0:\r\n\r\n Cher(e) client,\r\n\r\n Sauf erreur de notre part, nous n'avons pas re\u00e7u votre justificatif d\u2019emploi.\r\n Nous vous rappelons qu'il nous est indispensable pour valider votre\r\n adh\u00e9sion aupr\u00e8s de notre Societe Imaginaire.\r\n\r\n Il peut s\u2019agir de votre dernier bulletin de paye ou d\u2019une attestation de\r\n votre employeur.\r\n\r\n Votre conseiller Societe Imaginaire\u00a0\u00a0(See attached file: pj.jpg)", + "body_tokens": [ + "voici", + "ci_joint", + "mon", + "bulletin", + "de", + "salaire", + "comme", + "demande" + ], + "clean_header": "bulletin de salaire", + "date": "2019-09-18 05:58:43", + "flagged_header": "bulletin de salaire", + "flagged_text": "Voici ci_joint mon bulletin de salaire comme demande", + "from": "Monsieur Dupont ", + "header": "Bulletin de salaire", + "header_tokens": [ + "bulletin", + "de", + "salaire" + ], + "label": "adhesion", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Voici ci joint mon bulletin de salaire comme demande" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Cher(e) client," + ], + [ + "BODY", + "Sauf erreur de notre part, nous n'avons pas recu votre justificatif d'emploi" + ], + [ + "BODY", + "Nous vous rappelons qu'il nous est indispensable pour valider votre" + ], + [ + "BODY", + "adhesion aupres de notre Societe Imaginaire" + ], + [ + "BODY", + "Il peut s'agir de votre dernier bulletin de paye ou d'une attestation de" + ], + [ + "BODY", + "votre employeur" + ], + [ + "BODY", + "Votre conseiller Societe Imaginaire (See attached file: pj.jpg)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Voici ci joint mon bulletin de salaire comme demande", + "to": "conseiller@societeimaginaire.fr", + "tokens": [ + "bulletin", + "de", + "salaire", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "voici", + "ci_joint", + "mon", + "bulletin", + "de", + "salaire", + "comme", + "demande" + ] + }, + { + "age": 22, + "attachment": "[]", + "attachments": [], + "body": " Madame, Monsieur,\r\n\r\n Je vous avais contact\u00e9s car j'avais pour\r\n projet d'agrandir ma maison. J'avais re\u00e7u un devis pour lequel je n'avais\r\n pas donn\u00e9 suite, les travaux n'\u00e9tant pas encore r\u00e9alis\u00e9s.\r\n\r\n Le projet a maintenant \u00e9t\u00e9 port\u00e9 \u00e0 son terme et je voudrais donc revoir\r\n votre offre si possible.\r\n\r\n Je d\u00e9sire garder le m\u00eame type de contrat.\r\n Je suis \u00e0 votre disposition pour tout renseignement compl\u00e9mentaires.\r\n\r\n Sinc\u00e8res salutations\r\n Monsieur Dupont\r\n ", + "body_tokens": [ + "je", + "vous", + "avais", + "contactes", + "car", + "j", + "avais", + "pour", + "projet", + "d", + "agrandir", + "ma", + "maison", + "j", + "avais", + "recu", + "un", + "devis", + "pour", + "lequel", + "je", + "n", + "avais", + "pas", + "donne", + "suite", + "travaux", + "n", + "etant", + "pas", + "encore", + "realises", + "projet", + "a", + "maintenant", + "ete", + "porte", + "a", + "son", + "terme", + "et", + "je", + "voudrais", + "donc", + "revoir", + "votre", + "offre", + "si", + "possible", + "je", + "desire", + "garder", + "meme", + "type", + "de", + "contrat", + "je", + "suis", + "a", + "votre", + "disposition", + "pour", + "tout", + "renseignement", + "complementaires" + ], + "clean_header": "modification et extension de ma maison", + "date": "2021-07-22 16:10:06", + "flagged_header": "modification et extension de ma maison", + "flagged_text": "Je vous avais contactes car j'avais pour\nprojet d'agrandir ma maison\nJ'avais recu un devis pour lequel je n'avais\npas donne suite, les travaux n'etant pas encore realises\nLe projet a maintenant ete porte a son terme et je voudrais donc revoir\nvotre offre si possible\nJe desire garder le meme type de contrat\nJe suis a votre disposition pour tout renseignement complementaires", + "from": "Monsieur Dupont ", + "header": "Modification et extension de ma maison", + "header_tokens": [ + "modification", + "et", + "extension", + "de", + "ma", + "maison" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Je vous avais contactes car j'avais pour" + ], + [ + "BODY", + "projet d'agrandir ma maison" + ], + [ + "BODY", + "J'avais recu un devis pour lequel je n'avais" + ], + [ + "BODY", + "pas donne suite, les travaux n'etant pas encore realises" + ], + [ + "BODY", + "Le projet a maintenant ete porte a son terme et je voudrais donc revoir" + ], + [ + "BODY", + "votre offre si possible" + ], + [ + "BODY", + "Je desire garder le meme type de contrat" + ], + [ + "BODY", + "Je suis a votre disposition pour tout renseignement complementaires" + ], + [ + "GREETINGS", + "Sinceres salutations" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je vous avais contactes car j'avais pour\nprojet d'agrandir ma maison\nJ'avais recu un devis pour lequel je n'avais\npas donne suite, les travaux n'etant pas encore realises\nLe projet a maintenant ete porte a son terme et je voudrais donc revoir\nvotre offre si possible\nJe desire garder le meme type de contrat\nJe suis a votre disposition pour tout renseignement complementaires", + "to": "conseiller@societeimaginaire.fr", + "tokens": [ + "modification", + "et", + "extension", + "de", + "ma", + "maison", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "avais", + "contactes", + "car", + "j", + "avais", + "pour", + "projet", + "d", + "agrandir", + "ma", + "maison", + "j", + "avais", + "recu", + "un", + "devis", + "pour", + "lequel", + "je", + "n", + "avais", + "pas", + "donne", + "suite", + "travaux", + "n", + "etant", + "pas", + "encore", + "realises", + "projet", + "a", + "maintenant", + "ete", + "porte", + "a", + "son", + "terme", + "et", + "je", + "voudrais", + "donc", + "revoir", + "votre", + "offre", + "si", + "possible", + "je", + "desire", + "garder", + "meme", + "type", + "de", + "contrat", + "je", + "suis", + "a", + "votre", + "disposition", + "pour", + "tout", + "renseignement", + "complementaires" + ] + }, + { + "age": 28, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 31/05/2018 12:24 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t30/05/2018 15:56\r\n Objet :\tAssurance d'un nouveau logement\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n J'emm\u00e9nage dans un nouveau studio le Vendredi 2 Juin 2019\r\n mais je n'arrive pas \u00e0 souscrire d'assurance via la plateforme en ligne.\r\n\r\n C'est\u00a0pourquoi je vous envoie\u00a0l'\u00e9tat des\u00a0lieux de sortie du deuxi\u00e8me studio\r\n afin de\u00a0pouvoir l'enlever du contrat et ainsi pouvoir assurer le\u00a0nouveau\r\n logement.\r\n\r\n\r\n Cordialement,\r\n\r\n\r\n Monsieur Dupont.\r\n\r\n\r\n T\u00e9l : 07.00.00.00.00\r\n\r\n\r\n Mail : monsieurdupont@extensiona.com (See attached file: pj.pdf)\r\n ", + "body_tokens": [ + "j", + "emmenage", + "dans", + "un", + "nouveau", + "studio", + "vendredi", + "2", + "juin", + "2019", + "mais", + "je", + "n", + "arrive", + "pas", + "a", + "souscrire", + "d", + "assurance", + "via", + "la", + "plateforme", + "en", + "ligne", + "c", + "est", + "pourquoi", + "je", + "vous", + "envoie", + "l", + "etat", + "des", + "lieux", + "de", + "sortie", + "du", + "deuxieme", + "studio", + "afin", + "de", + "pouvoir", + "l", + "enlever", + "du", + "contrat", + "et", + "ainsi", + "pouvoir", + "assurer", + "nouveau", + "logement" + ], + "clean_header": "tr : assurance d'un nouveau logement", + "date": "2020-04-15 07:44:04", + "flagged_header": "tr : assurance d'un nouveau logement", + "flagged_text": "J'emmenage dans un nouveau studio le Vendredi 2 Juin 2019\nmais je n'arrive pas a souscrire d'assurance via la plateforme en ligne\nC'est pourquoi je vous envoie l'etat des lieux de sortie du deuxieme studio\nafin de pouvoir l'enlever du contrat et ainsi pouvoir assurer le nouveau\nlogement", + "from": "Conseiller ", + "header": "Tr : Assurance d'un nouveau logement", + "header_tokens": [ + "tr", + "assurance", + "d", + "un", + "nouveau", + "logement" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "J'emmenage dans un nouveau studio le Vendredi 2 Juin 2019" + ], + [ + "BODY", + "mais je n'arrive pas a souscrire d'assurance via la plateforme en ligne" + ], + [ + "BODY", + "C'est pourquoi je vous envoie l'etat des lieux de sortie du deuxieme studio" + ], + [ + "BODY", + "afin de pouvoir l'enlever du contrat et ainsi pouvoir assurer le nouveau" + ], + [ + "BODY", + "logement" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "BODY", + "Tel : 07" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "Mail : monsieurdupont@extensiona.com (See attached file: pj.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "J'emmenage dans un nouveau studio le Vendredi 2 Juin 2019\nmais je n'arrive pas a souscrire d'assurance via la plateforme en ligne\nC'est pourquoi je vous envoie l'etat des lieux de sortie du deuxieme studio\nafin de pouvoir l'enlever du contrat et ainsi pouvoir assurer le nouveau\nlogement", + "to": "conseiller1@societeimaginaire.fr", + "tokens": [ + "tr", + "assurance", + "d", + "un", + "nouveau", + "logement", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "j", + "emmenage", + "dans", + "un", + "nouveau", + "studio", + "vendredi", + "2", + "juin", + "2019", + "mais", + "je", + "n", + "arrive", + "pas", + "a", + "souscrire", + "d", + "assurance", + "via", + "la", + "plateforme", + "en", + "ligne", + "c", + "est", + "pourquoi", + "je", + "vous", + "envoie", + "l", + "etat", + "des", + "lieux", + "de", + "sortie", + "du", + "deuxieme", + "studio", + "afin", + "de", + "pouvoir", + "l", + "enlever", + "du", + "contrat", + "et", + "ainsi", + "pouvoir", + "assurer", + "nouveau", + "logement" + ] + }, + { + "age": 39, + "attachment": "[\"image001.png\"]", + "attachments": [ + "image001.png" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n\r\n\r\n Je me permets de venir vers vous car depuis le d\u00e9but de l\u2019ann\u00e9e, nous avons\r\n des difficult\u00e9s pour assurer nos v\u00e9hicules.\r\n\r\n Ces derniers jours, vous nous avez fait parvenir le d\u00e9tail des\r\n d\u00e9nominations et des niveau d\u2019assurance de chaque v\u00e9hicule.\r\n\r\n Merci d\u2019effectuer ces changements\r\n\r\n\r\n\r\n Dans l\u2019attente de votre retour\r\n\r\n\r\n\r\n Cordialement\r\n\r\n\r\n\r\n Monsieur Dupont\r\n\r\n \u00a0(See attached file: image001.png)(See attached file: Assurances v\u00e9hicules2018.pdf)", + "body_tokens": [ + "je", + "me", + "permets", + "de", + "venir", + "vers", + "vous", + "car", + "depuis", + "debut", + "de", + "l", + "annee", + "nous", + "avons", + "des", + "difficultes", + "pour", + "assurer", + "nos", + "vehicules", + "ces", + "derniers", + "jours", + "vous", + "nous", + "avez", + "fait", + "parvenir", + "detail", + "des", + "denominations", + "et", + "des", + "niveau", + "d", + "assurance", + "de", + "chaque", + "vehicule", + "merci", + "d", + "effectuer", + "ces", + "changements", + "dans", + "l", + "attente", + "de", + "votre", + "retour" + ], + "clean_header": "assurance vehicules", + "date": "2021-08-13 16:01:02", + "flagged_header": "assurance vehicules", + "flagged_text": "Je me permets de venir vers vous car depuis le debut de l'annee, nous avons\ndes difficultes pour assurer nos vehicules\nCes derniers jours, vous nous avez fait parvenir le detail des\ndenominations et des niveau d'assurance de chaque vehicule\nMerci d'effectuer ces changements\nDans l'attente de votre retour", + "from": "Monsieur Dupont ", + "header": "Assurance v\u00e9hicules", + "header_tokens": [ + "assurance", + "vehicules" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Je me permets de venir vers vous car depuis le debut de l'annee, nous avons" + ], + [ + "BODY", + "des difficultes pour assurer nos vehicules" + ], + [ + "BODY", + "Ces derniers jours, vous nous avez fait parvenir le detail des" + ], + [ + "BODY", + "denominations et des niveau d'assurance de chaque vehicule" + ], + [ + "BODY", + "Merci d'effectuer ces changements" + ], + [ + "BODY", + "Dans l'attente de votre retour" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "PJ", + "(See attached file: image001.png)(See attached file: Assurances vehicules2018.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Je me permets de venir vers vous car depuis le debut de l'annee, nous avons\ndes difficultes pour assurer nos vehicules\nCes derniers jours, vous nous avez fait parvenir le detail des\ndenominations et des niveau d'assurance de chaque vehicule\nMerci d'effectuer ces changements\nDans l'attente de votre retour", + "to": "demandes4@societeimaginaire.fr", + "tokens": [ + "assurance", + "vehicules", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "me", + "permets", + "de", + "venir", + "vers", + "vous", + "car", + "depuis", + "debut", + "de", + "l", + "annee", + "nous", + "avons", + "des", + "difficultes", + "pour", + "assurer", + "nos", + "vehicules", + "ces", + "derniers", + "jours", + "vous", + "nous", + "avez", + "fait", + "parvenir", + "detail", + "des", + "denominations", + "et", + "des", + "niveau", + "d", + "assurance", + "de", + "chaque", + "vehicule", + "merci", + "d", + "effectuer", + "ces", + "changements", + "dans", + "l", + "attente", + "de", + "votre", + "retour" + ] + }, + { + "age": 38, + "attachment": "[\"pj.pdf\"]", + "attachments": [ + "pj.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Voici la copie du virement effectuer \u00e0 ce jour.\r\n Serait-il possible d\u2019obtenir une attestation d\u2019assurance?\r\n\r\n Cordialement,\r\n\r\n Monsieur Dupont\r\n 06 00 00 00 00\r\n (See attached file: pj.pdf)\r\n\r\n > Le 23 mai 2018 \u00e0 10:17, conseiller@Societeimaginaire.fr a \u00e9crit :\r\n >\r\n > Bonjour,\r\n >\r\n > Nous faisons suite \u00e0 votre dernier courriel.\r\n >\r\n > A ce jour, le montant \u00e0 devoir, permettant de solder votre compte\r\n cotisation, est de 000.00euros.\r\n >\r\n > Nous restons \u00e0 votre disposition pour tous renseignements\r\n compl\u00e9mentaires.\r\n >\r\n > Meilleures salutations,\r\n >\r\n > Conseiller.\r\n > ", + "body_tokens": [ + "voici", + "la", + "copie", + "du", + "virement", + "effectuer", + "a", + "ce", + "jour", + "serait-il", + "possible", + "d", + "obtenir", + "une", + "attestation", + "d", + "assurance" + ], + "clean_header": "re: virement", + "date": "2021-06-18 04:20:07", + "flagged_header": "re: virement", + "flagged_text": "Voici la copie du virement effectuer a ce jour\nSerait-il possible d'obtenir une attestation d'assurance?", + "from": "Monsieur Dupont ", + "header": "Re: Virement", + "header_tokens": [ + "re", + "virement" + ], + "label": "autres", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Voici la copie du virement effectuer a ce jour" + ], + [ + "BODY", + "Serait-il possible d'obtenir une attestation d'assurance?" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "SIGNATURE", + "06 00 00 00 00" + ], + [ + "PJ", + "(See attached file: pj.pdf)" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "> Nous faisons suite a votre dernier courriel" + ], + [ + "BODY", + "> A ce jour, le montant a devoir, permettant de solder votre compte" + ], + [ + "BODY", + "cotisation, est de 000" + ], + [ + "BODY", + "00euros" + ], + [ + "BODY", + "> Nous restons a votre disposition pour tous renseignements" + ], + [ + "BODY", + "complementaires" + ], + [ + "GREETINGS", + "> Meilleures salutations," + ], + [ + "SIGNATURE", + "> Conseiller" + ], + [ + "BODY", + "> Message du 31/05/18 08:51\r\n > De : monsieurdupont@extensionb.com\r\n > A : \"GARAGE\" \r\n > Copie \u00e0 :\r\n > Objet : Attached Image\r\n >\r\n >(See attached file: pj.pdf)", + "body_tokens": [], + "clean_header": "pret vehicule", + "date": "2022-08-30 03:14:42", + "flagged_header": "pret vehicule", + "flagged_text": "", + "from": "Monsieur Dupont ", + "header": "Pr\u00eat v\u00e9hicule", + "header_tokens": [ + "pret", + "vehicule" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "BONJOUR" + ], + [ + "SIGNATURE_NAME", + "CI-JOINT PRET VEHICULE" + ], + [ + "GREETINGS", + "CORDIALEMENT" + ], + [ + "BODY", + "GARAGE> Message du 31/05/18 08:51" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Copie" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "PJ", + "(See attached file: pj.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "pret", + "vehicule", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]" + ] + }, + { + "age": 63, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 31/05/2018 11:59 -----\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t30/05/2018 10:12\r\n Objet :\tRe: Demande\r\n\r\n\r\n\r\n Bonjour Monsieur,\r\n Pouvez-vous m'appeler lundi prochain en fin d'apr\u00e8s-midi ?\r\n Cordialement,\r\n Monsieur Dupont\r\n\r\n Le mer. 31 mai 2018 \u00e0 09:58, a \u00e9crit\u00a0:\r\n Bonjour Mr Dupont,\r\n\r\n Je fais suite \u00e0 mon message t\u00e9l\u00e9phonique de ce jour.\r\n Comme pr\u00e9cis\u00e9, je vous adresse ce mail avec mes coordonn\u00e9es pour que\r\n vous puissiez en retour me faire part de vos disponibilit\u00e9s.\r\n Pour rappel, je souhaite vous rencontrer pour faire un bilan\r\n g\u00e9n\u00e9ral de vos contrats.\r\n\r\n Dans l'attente de vous lire, je vous souhaite une bonne journ\u00e9e.\r\n\r\n Bien cordialement.\r\n\r\n Conseiller.\r\n conseiller@Societeimaginaire.fr\r\n Conseiller Societe Imaginaire.\r\n ", + "body_tokens": [ + "pouvez-vous", + "m", + "appeler", + "lundi", + "prochain", + "en", + "fin", + "d", + "apres-midi" + ], + "clean_header": "bilan general contrats", + "date": "2019-06-12 03:40:19", + "flagged_header": "bilan general contrats", + "flagged_text": "Pouvez-vous m'appeler lundi prochain en fin d'apres-midi ?", + "from": "conseiller@societeimaginaire.fr", + "header": "Bilan g\u00e9n\u00e9ral contrats", + "header_tokens": [ + "bilan", + "general", + "contrats" + ], + "label": "bilan", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Monsieur," + ], + [ + "BODY", + "Pouvez-vous m'appeler lundi prochain en fin d'apres-midi ?" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Mr Dupont," + ], + [ + "BODY", + "Je fais suite a mon message telephonique de ce jour" + ], + [ + "BODY", + "Comme precise, je vous adresse ce mail avec mes coordonnees pour que" + ], + [ + "BODY", + "vous puissiez en retour me faire part de vos disponibilites" + ], + [ + "BODY", + "Pour rappel, je souhaite vous rencontrer pour faire un bilan" + ], + [ + "BODY", + "general de vos contrats" + ], + [ + "BODY", + "Dans l'attente de vous lire, je vous souhaite une bonne journee" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "SIGNATURE", + "Conseiller" + ], + [ + "SIGNATURE", + "conseiller@Societeimaginaire.fr" + ], + [ + "SIGNATURE", + "Conseiller Societe Imaginaire" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Pouvez-vous m'appeler lundi prochain en fin d'apres-midi ?", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "bilan", + "general", + "contrats", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "pouvez-vous", + "m", + "appeler", + "lundi", + "prochain", + "en", + "fin", + "d", + "apres-midi" + ] + }, + { + "age": 23, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique de ce jour,\r\n je joins le num\u00e9ro de t\u00e9l\u00e9phone de mon fils\r\n\r\n Monsieur Dupont\r\n \u00a0tel : 06 00 00 00 00.\r\n\r\n Monsieur Dupont\r\n\r\n Cordialement\r\n ", + "body_tokens": [ + "suite", + "a", + "notre", + "entretien", + "telephonique", + "de", + "ce", + "jour", + "je", + "joins", + "numero", + "de", + "telephone", + "de", + "mon", + "fils" + ], + "clean_header": "numero de telephone", + "date": "2021-10-04 18:17:05", + "flagged_header": "numero de telephone", + "flagged_text": "Suite a notre entretien telephonique de ce jour,\nje joins le numero de telephone de mon fils", + "from": "monsieurdupont@extensionf.net", + "header": "Num\u00e9ro de t\u00e9l\u00e9phone", + "header_tokens": [ + "numero", + "de", + "telephone" + ], + "label": "modification", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Suite a notre entretien telephonique de ce jour," + ], + [ + "BODY", + "je joins le numero de telephone de mon fils" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "SIGNATURE", + "tel : 06 00 00 00 00" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "GREETINGS", + "Cordialement" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Suite a notre entretien telephonique de ce jour,\nje joins le numero de telephone de mon fils", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "numero", + "de", + "telephone", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "suite", + "a", + "notre", + "entretien", + "telephonique", + "de", + "ce", + "jour", + "je", + "joins", + "numero", + "de", + "telephone", + "de", + "mon", + "fils" + ] + }, + { + "age": 28, + "attachment": "[\"image1.jpeg\",\"image2.jpeg\",\"image3.jpeg\"]", + "attachments": [ + "image1.jpeg", + "image2.jpeg", + "image3.jpeg" + ], + "body": "\r\n\r\n\r\n\r\n De :\tDupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 16:41\r\n Objet :\tRe: Vos documents demand\u00e9s\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Serait-il possible de rebasculer mon v\u00e9hicule d\u00e8s\r\n aujourd'hui en assurance parfaite?\r\n\r\n Voici des photos du jour qui attestent de son parfait \u00e9tat.\r\n\r\n D'avance merci de votre confirmation\r\n\r\n Monsieur Dupont\r\n Envoy\u00e9 de mon iPhone\r\n (See attached file: image1.jpeg)\r\n\r\n\r\n (See attached file: image2.jpeg)\r\n\r\n\r\n (See attached file: image3.jpeg)\r\n\r\n\r\n > Le 21 janv. 2018 \u00e0 20:38, Monsieur Dupont a \u00e9crit :\r\n >\r\n > Bonjour Madame,\r\n >\r\n > Je vous confirme que je souhaite basculer mon contrat actuel sous la\r\n nouvelle forme d\u00e8s \u00e0 pr\u00e9sent.\r\n >\r\n > D'avance merci de votre confirmation.\r\n >\r\n > Bien cordialement,\r\n > Monsieur Dupont\r\n >\r\n > Envoy\u00e9 de mon iPad\r\n >\r\n >> Le 30 nov. 2017 \u00e0 10:06, conseiller@Societeimaginaire.fr a \u00e9crit :\r\n >>\r\n >> Bonjour,\r\n >>\r\n >> Veuillez trouver ci-joint les documents que vous nous avez demand\u00e9s :\r\n >> - Devis1\r\n >> - Devis2\r\n >>\r\n >> La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n >>\r\n >> Bien \u00e0 vous.\r\n >>\r\n >> La SocSociete Imaginaire.\r\n >> \r\n >> ", + "body_tokens": [ + "serait-il", + "possible", + "de", + "rebasculer", + "mon", + "vehicule", + "des", + "aujourd", + "hui", + "en", + "assurance", + "parfaite", + "voici", + "des", + "photos", + "du", + "jour", + "qui", + "attestent", + "de", + "son", + "parfait", + "etat", + "d", + "avance", + "merci", + "de", + "votre", + "confirmation" + ], + "clean_header": "tr : re: vos documents demandes", + "date": "2022-06-14 08:35:02", + "flagged_header": "tr : re: vos documents demandes", + "flagged_text": "Serait-il possible de rebasculer mon vehicule des\naujourd'hui en assurance parfaite?\nVoici des photos du jour qui attestent de son parfait etat\nD'avance merci de votre confirmation", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Re: Vos documents demand\u00e9s", + "header_tokens": [ + "tr", + "re", + "vos", + "documents", + "demandes" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Serait-il possible de rebasculer mon vehicule des" + ], + [ + "BODY", + "aujourd'hui en assurance parfaite?" + ], + [ + "BODY", + "Voici des photos du jour qui attestent de son parfait etat" + ], + [ + "BODY", + "D'avance merci de votre confirmation" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "FOOTER", + "Envoye de mon iPhone" + ], + [ + "PJ", + "(See attached file: image1.jpeg)" + ], + [ + "PJ", + "(See attached file: image2.jpeg)" + ], + [ + "PJ", + "(See attached file: image3.jpeg)" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Madame," + ], + [ + "BODY", + "> Je vous confirme que je souhaite basculer mon contrat actuel sous la" + ], + [ + "BODY", + "nouvelle forme des a present" + ], + [ + "BODY", + "> D'avance merci de votre confirmation" + ], + [ + "GREETINGS", + "> Bien cordialement," + ], + [ + "HELLO", + "> Monsieur Dupont>" + ], + [ + "FOOTER", + "> Envoye de mon iPad" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + ">> Veuillez trouver ci-joint les documents que vous nous avez demandes :" + ], + [ + "BODY", + ">> - Devis1>> - Devis2>>" + ], + [ + "FOOTER", + ">> La visualisation des fichiers PDF necessite Adobe Reader" + ], + [ + "GREETINGS", + ">> Bien a vous" + ], + [ + "SIGNATURE_NAME", + ">> La SocSociete Imaginaire" + ], + [ + "BODY", + ">> " + ], + [ + "BODY", + ">> ", + "header": "Demande", + "header_tokens": [ + "demande" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Monsieur Dupont (See attached file: Releve d'informations.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Monsieur Dupont (See attached file: Releve d'informations.pdf)", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "monsieur", + "dupont", + "see", + "attached", + "file", + "releve", + "d", + "informations", + "pdf" + ] + }, + { + "age": 36, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 09:20 -----\r\n\r\n De : \u00a0 \u00a0 \u00a0 \u00a0Association Dupont \r\n A : \u00a0 \u00a0 \u00a0 \u00a0representant@Societeimaginaire.fr\r\n Date : \u00a0 \u00a0 \u00a0 \u00a001/06/2018 16:03\r\n Objet : \u00a0 \u00a0 \u00a0 \u00a0Re: Correspondance de La Societe Imaginaire\r\n\r\n\r\n\r\n\r\n Bonjour, merci de votre retour rapide.\r\n\r\n Concernent l'attestation du 22 septembre, serait-il possible de faire\r\n appara\u00eetre l'adresse compl\u00e8te de l'\u00e9v\u00e9nement.\r\n\r\n En effet, s'agissant d'un domaine priv\u00e9, les propri\u00e9taires nous ont fait\r\n cette demande.\r\n\r\n Vous remerciant par avance.\r\n\r\n Cordialement.\r\n\r\n Monsieur Dupont,\r\n Association LOI 1901.\r\n T\u00e9l. perso : 06.00.00.00.00\r\n\r\n ", + "body_tokens": [ + "bonjour", + "merci", + "de", + "votre", + "retour", + "rapide", + "concernent", + "l", + "attestation", + "du", + "22", + "septembre", + "serait-il", + "possible", + "de", + "faire", + "apparaitre", + "l", + "adresse", + "complete", + "de", + "l", + "evenement", + "en", + "effet", + "s", + "agissant", + "d", + "un", + "domaine", + "prive", + "proprietaires", + "nous", + "ont", + "fait", + "cette", + "demande" + ], + "clean_header": "tr : attestation", + "date": "2020-06-04 11:29:57", + "flagged_header": "tr : attestation", + "flagged_text": "Bonjour, merci de votre retour rapide\nConcernent l'attestation du 22 septembre, serait-il possible de faire\napparaitre l'adresse complete de l'evenement\nEn effet, s'agissant d'un domaine prive, les proprietaires nous ont fait\ncette demande", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : attestation", + "header_tokens": [ + "tr", + "attestation" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Bonjour, merci de votre retour rapide" + ], + [ + "BODY", + "Concernent l'attestation du 22 septembre, serait-il possible de faire" + ], + [ + "BODY", + "apparaitre l'adresse complete de l'evenement" + ], + [ + "BODY", + "En effet, s'agissant d'un domaine prive, les proprietaires nous ont fait" + ], + [ + "BODY", + "cette demande" + ], + [ + "THANKS", + "Vous remerciant par avance" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Monsieur Dupont," + ], + [ + "SIGNATURE", + "Association LOI 1901" + ], + [ + "SIGNATURE", + "Tel" + ], + [ + "BODY", + "perso : 06" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Bonjour, merci de votre retour rapide\nConcernent l'attestation du 22 septembre, serait-il possible de faire\napparaitre l'adresse complete de l'evenement\nEn effet, s'agissant d'un domaine prive, les proprietaires nous ont fait\ncette demande", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "attestation", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "bonjour", + "merci", + "de", + "votre", + "retour", + "rapide", + "concernent", + "l", + "attestation", + "du", + "22", + "septembre", + "serait-il", + "possible", + "de", + "faire", + "apparaitre", + "l", + "adresse", + "complete", + "de", + "l", + "evenement", + "en", + "effet", + "s", + "agissant", + "d", + "un", + "domaine", + "prive", + "proprietaires", + "nous", + "ont", + "fait", + "cette", + "demande" + ] + }, + { + "age": 88, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n Bonjour Madame,\r\n\r\n Je vous remercie pour l'attestation demand\u00e9e par t\u00e9l\u00e9phone.\r\n je vous rappelle que je suis la propri\u00e9taire de ce v\u00e9hicule et que cette\r\n attestation est \u00e0 destination de mon travail.\r\n\r\n Cordialement\r\n\r\n Madame Dupont\r\n\r\n\r\n\r\n PS: il y a quelques temps j'ai sollicit\u00e9 vos services pour une attestation\r\n et je n'ai eu aucune difficult\u00e9 pour l'obtenir\r\n\r\n\r\n\r\n > Message du 28/05/18 15:22\r\n > De : conseiller@Societeimaginaire.fr\r\n > A : madamedupont@extensionb.com\r\n > Copie \u00e0 :\r\n > Objet : Envoi d'un document de la Societe Imaginaire\r\n >\r\n > Bonjour.\r\n\r\n Merci de bien vouloir prendre connaissance du document ci-joint :\r\n 1 - Attestation d'assurance\r\n\r\n Cordialement.\r\n\r\n La Societe Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.(See attached\r\n file: pj.pdf)", + "body_tokens": [ + "je", + "vous", + "remercie", + "pour", + "l", + "attestation", + "demandee", + "par", + "telephone", + "je", + "vous", + "rappelle", + "que", + "je", + "suis", + "la", + "proprietaire", + "de", + "ce", + "vehicule", + "et", + "que", + "cette", + "attestation", + "est", + "a", + "destination", + "de", + "mon", + "travail" + ], + "clean_header": "demande attestation - envoi d'un document de la mutuelle imaginaire", + "date": "2020-01-10 19:37:35", + "flagged_header": "demande attestation - envoi d'un document de la mutuelle imaginaire", + "flagged_text": "Je vous remercie pour l'attestation demandee par telephone\nje vous rappelle que je suis la proprietaire de ce vehicule et que cette\nattestation est a destination de mon travail", + "from": "Monsieur Dupont ", + "header": "demande attestation - Envoi d'un document de la Mutuelle Imaginaire", + "header_tokens": [ + "demande", + "attestation", + "envoi", + "d", + "un", + "document", + "de", + "la", + "mutuelle", + "imaginaire" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Madame," + ], + [ + "BODY", + "Je vous remercie pour l'attestation demandee par telephone" + ], + [ + "BODY", + "je vous rappelle que je suis la proprietaire de ce vehicule et que cette" + ], + [ + "BODY", + "attestation est a destination de mon travail" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Madame Dupont" + ], + [ + "BODY", + "PS: il y a quelques temps j'ai sollicite vos services pour une attestation" + ], + [ + "BODY", + "et je n'ai eu aucune difficulte pour l'obtenir> Message du 28/05/18 15:22" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Merci de bien vouloir prendre connaissance du document ci-joint :" + ], + [ + "SIGNATURE", + "1 - Attestation d'assurance" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "SIGNATURE", + "La Societe Imaginaire" + ], + [ + "FOOTER", + "La visualisation des fichiers PDF necessite Adobe Reader" + ], + [ + "BODY", + "(See attached" + ], + [ + "BODY", + "file: pj.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je vous remercie pour l'attestation demandee par telephone\nje vous rappelle que je suis la proprietaire de ce vehicule et que cette\nattestation est a destination de mon travail", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "attestation", + "envoi", + "d", + "un", + "document", + "de", + "la", + "mutuelle", + "imaginaire", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "remercie", + "pour", + "l", + "attestation", + "demandee", + "par", + "telephone", + "je", + "vous", + "rappelle", + "que", + "je", + "suis", + "la", + "proprietaire", + "de", + "ce", + "vehicule", + "et", + "que", + "cette", + "attestation", + "est", + "a", + "destination", + "de", + "mon", + "travail" + ] + }, + { + "age": 57, + "attachment": "[\"Nume\u0301riser.pdf\"]", + "attachments": [ + "Nume\u0301riser.pdf" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour madame,\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique de ce jour, je vous joins un Scan de\r\n mon certificat de cession de mon v\u00e9hicule nautique assur\u00e9 chez vous.\r\n Je vous remercie de la clart\u00e9 de vos explications et reste \u00e0 votre\r\n disposition.\r\n Bien \u00e0 vous\r\n Monsieur Dupont(See attached file: Nume\u0301riser.pdf)\r\n ", + "body_tokens": [ + "suite", + "a", + "notre", + "entretien", + "telephonique", + "de", + "ce", + "jour", + "je", + "vous", + "joins", + "un", + "scan", + "de", + "mon", + "certificat", + "de", + "cession", + "de", + "mon", + "vehicule", + "nautique", + "assure", + "chez", + "vous", + "je", + "vous", + "remercie", + "de", + "la", + "clarte", + "de", + "vos", + "explications", + "et", + "reste", + "a", + "votre", + "disposition" + ], + "clean_header": "certificat de cession de vehicule", + "date": "2020-01-19 11:24:23", + "flagged_header": "certificat de cession de vehicule", + "flagged_text": "Suite a notre entretien telephonique de ce jour, je vous joins un Scan de\nmon certificat de cession de mon vehicule nautique assure chez vous\nJe vous remercie de la clarte de vos explications et reste a votre\ndisposition", + "from": "Monsieur Dupont ", + "header": "certificat de cession de v\u00e9hicule", + "header_tokens": [ + "certificat", + "de", + "cession", + "de", + "vehicule" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour madame," + ], + [ + "BODY", + "Suite a notre entretien telephonique de ce jour, je vous joins un Scan de" + ], + [ + "BODY", + "mon certificat de cession de mon vehicule nautique assure chez vous" + ], + [ + "BODY", + "Je vous remercie de la clarte de vos explications et reste a votre" + ], + [ + "BODY", + "disposition" + ], + [ + "GREETINGS", + "Bien a vous" + ], + [ + "BODY", + "Monsieur Dupont(See attached file: Numeriser.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Suite a notre entretien telephonique de ce jour, je vous joins un Scan de\nmon certificat de cession de mon vehicule nautique assure chez vous\nJe vous remercie de la clarte de vos explications et reste a votre\ndisposition", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "certificat", + "de", + "cession", + "de", + "vehicule", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "suite", + "a", + "notre", + "entretien", + "telephonique", + "de", + "ce", + "jour", + "je", + "vous", + "joins", + "un", + "scan", + "de", + "mon", + "certificat", + "de", + "cession", + "de", + "mon", + "vehicule", + "nautique", + "assure", + "chez", + "vous", + "je", + "vous", + "remercie", + "de", + "la", + "clarte", + "de", + "vos", + "explications", + "et", + "reste", + "a", + "votre", + "disposition" + ] + }, + { + "age": 82, + "attachment": "[\"pj.jpg\"]", + "attachments": [ + "pj.jpg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour,\r\n Vous trouverez ci-joint le certificat de cession attestant de la vente du\r\n v\u00e9hicule pour r\u00e9silier l'assurance.\r\n\r\n Cordialement\r\n Monsieur Dupont(See attached file: pj.jpg)", + "body_tokens": [ + "vous", + "trouverez", + "ci_joint", + "certificat", + "de", + "cession", + "attestant", + "de", + "la", + "vente", + "du", + "vehicule", + "pour", + "resilier", + "l", + "assurance" + ], + "clean_header": "certificat de cession ", + "date": "2022-04-15 12:56:46", + "flagged_header": "certificat de cession", + "flagged_text": "Vous trouverez ci_joint le certificat de cession attestant de la vente du\nvehicule pour resilier l'assurance", + "from": "Monsieur Dupont ", + "header": "certificat de cession ", + "header_tokens": [ + "certificat", + "de", + "cession" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Vous trouverez ci-joint le certificat de cession attestant de la vente du" + ], + [ + "BODY", + "vehicule pour resilier l'assurance" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "BODY", + "Monsieur Dupont(See attached file: pj.jpg)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Vous trouverez ci-joint le certificat de cession attestant de la vente du\nvehicule pour resilier l'assurance", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "certificat", + "de", + "cession", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "vous", + "trouverez", + "ci_joint", + "certificat", + "de", + "cession", + "attestant", + "de", + "la", + "vente", + "du", + "vehicule", + "pour", + "resilier", + "l", + "assurance" + ] + }, + { + "age": 60, + "attachment": "[\"Screenshot.png\",\"Screenshot2.png\",\"Screenshot3.png\"]", + "attachments": [ + "Screenshot.png", + "Screenshot2.png", + "Screenshot3.png" + ], + "body": "\r\n\r\n\r\n\r\n Vous trouverez en pi\u00e8ce jointe le relev\u00e9 de situation ainsi que le bulletin\r\n de paie demand\u00e9 et un rib.\r\n Mon Adresse compl\u00e8te est l\u00e0 suivante : 00 rue imaginaire 33000 Villeimaginaire\r\n \u00a0(See attached file: Screenshot.png)(See attached\r\n file: Screenshot2.png)(See attached file:\r\n Screenshot3.png)", + "body_tokens": [ + "vous", + "trouverez", + "en", + "piece", + "jointe", + "releve", + "de", + "situation", + "ainsi", + "que", + "bulletin", + "de", + "paie", + "demande", + "et", + "un", + "rib", + "mon", + "adresse", + "complete", + "est", + "la", + "suivante", + "00", + "rue", + "imaginaire", + "33000", + "villeimaginaire", + "file", + "screenshot2", + "png", + "see", + "attached", + "file", + "screenshot3", + "png" + ], + "clean_header": "assurance auto", + "date": "2019-02-08 13:05:09", + "flagged_header": "assurance auto", + "flagged_text": "Vous trouverez en piece jointe le releve de situation ainsi que le bulletin\nde paie demande et un rib\nMon Adresse complete est la suivante : 00 rue imaginaire 33000 Villeimaginaire\nfile: Screenshot2.png)(See attached file:\nScreenshot3.png)", + "from": "Monsieur Dupont ", + "header": "Assurance auto", + "header_tokens": [ + "assurance", + "auto" + ], + "label": "adhesion", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Vous trouverez en piece jointe le releve de situation ainsi que le bulletin" + ], + [ + "BODY", + "de paie demande et un rib" + ], + [ + "BODY", + "Mon Adresse complete est la suivante : 00 rue imaginaire 33000 Villeimaginaire" + ], + [ + "PJ", + "(See attached file: Screenshot.png)(See attached" + ], + [ + "BODY", + "file: Screenshot2.png)(See attached file:" + ], + [ + "BODY", + "Screenshot3.png)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Vous trouverez en piece jointe le releve de situation ainsi que le bulletin\nde paie demande et un rib\nMon Adresse complete est la suivante : 00 rue imaginaire 33000 Villeimaginaire\nfile: Screenshot2.png)(See attached file:\nScreenshot3.png)", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "assurance", + "auto", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "vous", + "trouverez", + "en", + "piece", + "jointe", + "releve", + "de", + "situation", + "ainsi", + "que", + "bulletin", + "de", + "paie", + "demande", + "et", + "un", + "rib", + "mon", + "adresse", + "complete", + "est", + "la", + "suivante", + "00", + "rue", + "imaginaire", + "33000", + "villeimaginaire", + "file", + "screenshot2", + "png", + "see", + "attached", + "file", + "screenshot3", + "png" + ] + }, + { + "age": 21, + "attachment": "[\"ACTE VENTE 1.pdf\",\"ACTE VENTE 2.pdf\"]", + "attachments": [ + "ACTE VENTE 1.pdf", + "ACTE VENTE 2.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n Le lundi 4 juin 2018 \u00e0 13:59:14 UTC+2, conseiller@Societeimaginaire.fr\r\n a \u00e9crit :\r\n\r\n\r\n Ch\u00e8re client,\r\n\r\n Nous avons bien re\u00e7u votre email qui a retenu toute notre attention.\r\n\r\n Le document que vous nous avez envoy\u00e9 n'est pas exploitable. Pourriez-vous\r\n nous le renvoyer dans une meilleure r\u00e9solution ?\r\n\r\n Bien Cordialement.\r\n\r\n Au nom de l'\u00e9quipe Societe Imaginaire\r\n\r\n logo Societe Imaginaire\r\n |------------------------+---------------------------------------|\r\n | |Garanti sans virus. www.avast.com |\r\n |------------------------+---------------------------------------|\r\n\r\n\r\n\r\n (See attached file: ACTE VENTE 1.pdf)(See attached file: ACTE VENTE 2.pdf)", + "body_tokens": [ + "lundi", + "4", + "juin", + "2018", + "a", + "13", + "59", + "14", + "utc", + "2", + "conseiller", + "societeimaginaire", + "fr", + "conseiller", + "societeimaginaire", + "fr", + "a", + "ecrit", + "nous", + "avons", + "bien", + "recu", + "votre", + "email", + "qui", + "a", + "retenu", + "toute", + "notre", + "attention", + "document", + "que", + "vous", + "nous", + "avez", + "envoye", + "n", + "est", + "pas", + "exploitable", + "pourriez-vous", + "nous", + "renvoyer", + "dans", + "une", + "meilleure", + "resolution" + ], + "clean_header": "re: suppression assurance logement", + "date": "2020-12-04 05:01:34", + "flagged_header": "re: suppression assurance logement", + "flagged_text": "Le lundi 4 juin 2018 a 13:59:14 UTC+2, conseiller@Societeimaginaire.fr\n a ecrit :\nNous avons bien recu votre email qui a retenu toute notre attention\nLe document que vous nous avez envoye n'est pas exploitable\nPourriez-vous\nnous le renvoyer dans une meilleure resolution ?", + "from": "Monsieur Dupont ", + "header": "Re: Suppression assurance logement", + "header_tokens": [ + "re", + "suppression", + "assurance", + "logement" + ], + "label": "succession", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Le lundi 4 juin 2018 a 13:59:14 UTC+2, conseiller@Societeimaginaire.fr" + ], + [ + "BODY", + " a ecrit :" + ], + [ + "HELLO", + "Chere client," + ], + [ + "BODY", + "Nous avons bien recu votre email qui a retenu toute notre attention" + ], + [ + "BODY", + "Le document que vous nous avez envoye n'est pas exploitable" + ], + [ + "BODY", + "Pourriez-vous" + ], + [ + "BODY", + "nous le renvoyer dans une meilleure resolution ?" + ], + [ + "GREETINGS", + "Bien Cordialement" + ], + [ + "BODY", + "Au nom de l'equipe Societe Imaginaire" + ], + [ + "SIGNATURE", + "logo Societe Imaginaire" + ], + [ + "FOOTER", + "| |Garanti sans virus" + ], + [ + "FOOTER", + "www.avast.com |" + ], + [ + "PJ", + "(See attached file: ACTE VENTE 1.pdf)(See attached file: ACTE VENTE 2.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Le lundi 4 juin 2018 a 13:59:14 UTC+2, conseiller@Societeimaginaire.fr\n a ecrit :\nNous avons bien recu votre email qui a retenu toute notre attention\nLe document que vous nous avez envoye n'est pas exploitable\nPourriez-vous\nnous le renvoyer dans une meilleure resolution ?", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "re", + "suppression", + "assurance", + "logement", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "lundi", + "4", + "juin", + "2018", + "a", + "13", + "59", + "14", + "utc", + "2", + "conseiller", + "societeimaginaire", + "fr", + "conseiller", + "societeimaginaire", + "fr", + "a", + "ecrit", + "nous", + "avons", + "bien", + "recu", + "votre", + "email", + "qui", + "a", + "retenu", + "toute", + "notre", + "attention", + "document", + "que", + "vous", + "nous", + "avez", + "envoye", + "n", + "est", + "pas", + "exploitable", + "pourriez-vous", + "nous", + "renvoyer", + "dans", + "une", + "meilleure", + "resolution" + ] + }, + { + "age": 58, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour,\r\n\r\n Permettez-moi de vous signaler un changement d'adresse me concernant.\r\n\r\n Voici ma nouvelle adresse :\r\n\r\n 00 rue du Nomderue\r\n 75000 Paris\r\n\r\n Merci.\r\n\r\n Bien \u00e0 vous,\r\n\r\n Monsieur Dupont\r\n ", + "body_tokens": [ + "permettez-moi", + "de", + "vous", + "signaler", + "un", + "changement", + "d", + "adresse", + "me", + "concernant", + "voici", + "ma", + "nouvelle", + "adresse" + ], + "clean_header": "changement d'adresse", + "date": "2022-06-01 14:14:07", + "flagged_header": "changement d'adresse", + "flagged_text": "Permettez-moi de vous signaler un changement d'adresse me concernant\nVoici ma nouvelle adresse :", + "from": "Monsieur Dupont ", + "header": "changement d'adresse", + "header_tokens": [ + "changement", + "d", + "adresse" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Permettez-moi de vous signaler un changement d'adresse me concernant" + ], + [ + "BODY", + "Voici ma nouvelle adresse :" + ], + [ + "SIGNATURE", + "00 rue du Nomderue" + ], + [ + "SIGNATURE", + "75000 Paris" + ], + [ + "THANKS", + "Merci" + ], + [ + "GREETINGS", + "Bien a vous," + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Permettez-moi de vous signaler un changement d'adresse me concernant\nVoici ma nouvelle adresse :", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "changement", + "d", + "adresse", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "permettez-moi", + "de", + "vous", + "signaler", + "un", + "changement", + "d", + "adresse", + "me", + "concernant", + "voici", + "ma", + "nouvelle", + "adresse" + ] + }, + { + "age": 64, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 10:29 -----\r\n\r\n De :\tmonsieurdupont@extensionf.net\r\n A :\tSociete Imaginaire \r\n Date :\t02/06/2018 11:07\r\n Objet :\tFwd: Changement de vehicule\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Pourriez vous faire suite au mail suivant en date du 22 Mai\r\n 2017.\r\n\r\n\r\n De: monsieurdupont@extensionf.net\r\n \u00c0: \"Societe Imaginaire\" \r\n Envoy\u00e9: Mardi 22 Mai 2017 10:15:25\r\n Objet: Changement de vehicule\r\n\r\n Bonjour,\r\n\r\n Merci de bien vouloir transf\u00e9rer l'assurance du vehicuel sur le\r\n Scooter,\r\n dont les r\u00e9f\u00e9rences sont sur la facture fourni en pi\u00e8ce-jointe.\r\n\r\n Me faire parvenir l'attestation.\r\n\r\n Merci.\r\n\r\n MR Dupont,\r\n le 22 mai 2017\r\n\r\n Cordialement\r\n (See attached file: Facture.jpg)", + "body_tokens": [ + "pourriez", + "vous", + "faire", + "suite", + "au", + "mail", + "suivant", + "en", + "date", + "du", + "22", + "mai", + "2017" + ], + "clean_header": "tr : fwd: changement de scooter !", + "date": "2019-04-21 20:43:58", + "flagged_header": "tr : fwd: changement de scooter !", + "flagged_text": "Pourriez vous faire suite au mail suivant en date du 22 Mai\n2017", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Fwd: Changement de Scooter !", + "header_tokens": [ + "tr", + "fwd", + "changement", + "de", + "scooter" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Pourriez vous faire suite au mail suivant en date du 22 Mai" + ], + [ + "BODY", + "2017" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Merci de bien vouloir transferer l'assurance du vehicuel sur le" + ], + [ + "BODY", + "Scooter," + ], + [ + "BODY", + "dont les references sont sur la facture fourni en piece-jointe" + ], + [ + "BODY", + "Me faire parvenir l'attestation" + ], + [ + "THANKS", + "Merci" + ], + [ + "BODY", + "MR Dupont," + ], + [ + "BODY", + "le 22 mai 2017" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "PJ", + "(See attached file: Facture.jpg)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Pourriez vous faire suite au mail suivant en date du 22 Mai\n2017", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "fwd", + "changement", + "de", + "scooter", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "pourriez", + "vous", + "faire", + "suite", + "au", + "mail", + "suivant", + "en", + "date", + "du", + "22", + "mai", + "2017" + ] + }, + { + "age": 20, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 04/06/2018 10:38 -----\r\n\r\n De :\tmonsieurdupont@extensiona.com\r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 16:52\r\n Objet :\tR\u00e9clamations\r\n\r\n\r\n\r\n Num\u00e9ro T\u00e9l. : 0600000000\r\n E-mail : monsieurdupont@extensiona.com\r\n M. DUPONT\r\n Adresse : 94000\r\n Objet de la demande : R\u00e9clamations\r\n\r\n Bonjour, j'aurais besoin de l'attestation\r\n pour ma declaration de revenus 2018.\r\n Merci d'avance\r\n ", + "body_tokens": [ + "m", + "dupont", + "adresse", + "94000", + "objet", + "de", + "la", + "deman" + ], + "clean_header": "tr : reclamations ", + "date": "2022-02-26 14:07:52", + "flagged_header": "tr : reclamations", + "flagged_text": "M. DUPONT\nAdresse : 94000\nObjet de la deman", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : R\u00e9clamations ", + "header_tokens": [ + "tr", + "reclamations" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "SIGNATURE", + "Numero Tel" + ], + [ + "SIGNATURE", + ": 0600000000" + ], + [ + "SIGNATURE", + "E-mail : monsieurdupont@extensiona.com" + ], + [ + "BODY", + "M. DUPONT" + ], + [ + "BODY", + "Adresse : 94000" + ], + [ + "BODY", + "Objet de la deman" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Bonjour, j'aurais besoin de l'attestation" + ], + [ + "BODY", + "pour ma declaration de revenus 2018" + ], + [ + "THANKS", + "Merci d'avance" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "M. DUPONT\nAdresse : 94000\nObjet de la deman", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "reclamations", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "m", + "dupont", + "adresse", + "94000", + "objet", + "de", + "la", + "deman" + ] + }, + { + "age": 51, + "attachment": "[\"pic.jpg\"]", + "attachments": [ + "pic.jpg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour ,\r\n\r\n Veuillez recevoir le certificat de cession de mon v\u00e9hicule afin que vous\r\n puissiez effectuer la r\u00e9siliation de mon contrat.\r\n Je reviendrai vers vous afin d\u2019assurer mon nouveau v\u00e9hicule bient\u00f4t.\r\n\r\n Bien \u00e0 vous ,\r\n\r\n Mr DUPONT\r\n\r\n\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n\r\n Envoy\u00e9 de mon iPad", + "body_tokens": [ + "veuillez", + "recevoir", + "certificat", + "de", + "cession", + "de", + "mon", + "vehicule", + "afin", + "que", + "vous", + "puissiez", + "effectuer", + "la", + "resiliation", + "de", + "mon", + "contrat", + "je", + "reviendrai", + "vers", + "vous", + "afin", + "d", + "assurer", + "mon", + "nouveau", + "vehicule", + "bientot" + ], + "clean_header": "resiliation contrat voiture ", + "date": "2021-09-06 07:28:42", + "flagged_header": "resiliation contrat voiture", + "flagged_text": "Veuillez recevoir le certificat de cession de mon vehicule afin que vous\npuissiez effectuer la resiliation de mon contrat\nJe reviendrai vers vous afin d'assurer mon nouveau vehicule bientot", + "from": "Monsieur Dupont ", + "header": "R\u00e9siliation contrat voiture ", + "header_tokens": [ + "resiliation", + "contrat", + "voiture" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour ," + ], + [ + "BODY", + "Veuillez recevoir le certificat de cession de mon vehicule afin que vous" + ], + [ + "BODY", + "puissiez effectuer la resiliation de mon contrat" + ], + [ + "BODY", + "Je reviendrai vers vous afin d'assurer mon nouveau vehicule bientot" + ], + [ + "GREETINGS", + "Bien a vous ," + ], + [ + "SIGNATURE_NAME", + "Mr DUPONT" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ], + [ + "FOOTER", + "Envoye de mon iPad" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Veuillez recevoir le certificat de cession de mon vehicule afin que vous\npuissiez effectuer la resiliation de mon contrat\nJe reviendrai vers vous afin d'assurer mon nouveau vehicule bientot", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "resiliation", + "contrat", + "voiture", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "veuillez", + "recevoir", + "certificat", + "de", + "cession", + "de", + "mon", + "vehicule", + "afin", + "que", + "vous", + "puissiez", + "effectuer", + "la", + "resiliation", + "de", + "mon", + "contrat", + "je", + "reviendrai", + "vers", + "vous", + "afin", + "d", + "assurer", + "mon", + "nouveau", + "vehicule", + "bientot" + ] + }, + { + "age": 77, + "attachment": "[\"Relev\u00e9 d'informations.pdf\"]", + "attachments": [ + "Relev\u00e9 d'informations.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n --\r\n Monsieur Dupont\r\n 02 chemin imaginaire\r\n 84000\r\n monsieurdupont@extensiona.com\r\n 06.00.00.00.00(See attached file: Relev\u00e9 d'informations.pdf)", + "body_tokens": [ + "06", + "00", + "00", + "00", + "00", + "see", + "attached", + "file", + "releve", + "d", + "informations", + "pdf" + ], + "clean_header": "demande", + "date": "2020-06-25 15:26:02", + "flagged_header": "demande", + "flagged_text": "06\n00\n00\n00\n00(See attached file: Releve d'informations.pdf)", + "from": "Monsieur Dupont ", + "header": "Demande", + "header_tokens": [ + "demande" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "SIGNATURE", + "02 chemin imaginaire" + ], + [ + "SIGNATURE", + "84000" + ], + [ + "SIGNATURE", + "monsieurdupont@extensiona.com" + ], + [ + "BODY", + "06" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00(See attached file: Releve d'informations.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "06\n00\n00\n00\n00(See attached file: Releve d'informations.pdf)", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "06", + "00", + "00", + "00", + "00", + "see", + "attached", + "file", + "releve", + "d", + "informations", + "pdf" + ] + }, + { + "age": 40, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour, je vous remercie de ce retour. Pouvez vous r\u00e9pondre aux\r\n diff\u00e9rentes questions soulev\u00e9es dans mon mail afin que je prenne ma\r\n d\u00e9cision. Bien cordialement. Mr Dupont.\r\n\r\n Obtenez Outlook pour iOS\r\n\r\n From: conseiller@Societeimaginaire.fr \r\n Sent: Monday, June 4, 2018 1:56:43 PM\r\n To: monsieurdupont@hotmail.com\r\n Subject: : Votre devis v\u00e9hicule\r\n\r\n Bonjour,\r\n Veuillez trouver ci-joint le devis que vous nous avez demand\u00e9.\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.\r\n Cordialement.\r\n La Societe Imaginaire\r\n ", + "body_tokens": [ + "bonjour", + "je", + "vous", + "remercie", + "de", + "ce", + "retour", + "pouvez", + "vous", + "repondre", + "aux", + "differentes", + "questions", + "soulevees", + "dans", + "mon", + "mail", + "afin", + "que", + "je", + "prenne", + "ma", + "decision" + ], + "clean_header": "tr : re: interrogations", + "date": "2022-10-02 04:04:31", + "flagged_header": "tr : re: interrogations", + "flagged_text": "Bonjour, je vous remercie de ce retour\nPouvez vous repondre aux\ndifferentes questions soulevees dans mon mail afin que je prenne ma\ndecision", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Re: Interrogations", + "header_tokens": [ + "tr", + "re", + "interrogations" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Bonjour, je vous remercie de ce retour" + ], + [ + "BODY", + "Pouvez vous repondre aux" + ], + [ + "BODY", + "differentes questions soulevees dans mon mail afin que je prenne ma" + ], + [ + "BODY", + "decision" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "SIGNATURE_NAME", + "Mr Dupont" + ], + [ + "FOOTER", + "Obtenez Outlook pour iOS" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Subject: : Votre devis vehicule" + ], + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Veuillez trouver ci-joint le devis que vous nous avez demande" + ], + [ + "FOOTER", + "La visualisation des fichiers PDF necessite Adobe Reader" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "SIGNATURE", + "La Societe Imaginaire" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Bonjour, je vous remercie de ce retour\nPouvez vous repondre aux\ndifferentes questions soulevees dans mon mail afin que je prenne ma\ndecision", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "re", + "interrogations", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "bonjour", + "je", + "vous", + "remercie", + "de", + "ce", + "retour", + "pouvez", + "vous", + "repondre", + "aux", + "differentes", + "questions", + "soulevees", + "dans", + "mon", + "mail", + "afin", + "que", + "je", + "prenne", + "ma", + "decision" + ] + }, + { + "age": 86, + "attachment": "[\"RIB.pdf\"]", + "attachments": [ + "RIB.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n De :\tmonsieurdupond \r\n A :\t\"conseiller@Societeimaginaire.fr\" \r\n Date :\t04/06/2018 13:47\r\n Objet :\tRE: Correspondance de La Societe Imaginaire\r\n\r\n\r\n\r\n Bonjour\r\n Ci-joint le rib du compte comme demand\u00e9\r\n Bien \u00e0 vous\r\n\r\n Monsieur DUPONT\r\n (Embedded image moved to file: pic.jpg)\r\n +33(0) 6 00 00 00 00\r\n\r\n De : conseiller@Societeimaginaire.fr [mailto:conseiller@Societeimaginaire.fr]\r\n Envoy\u00e9 : lundi 4 juin 2018 12:23\r\n \u00c0 : Monsieur Dupont \r\n Objet : Correspondance de La Societe Imaginaire\r\n\r\n Bonjour.\r\n\r\n Veuillez prendre connaissance des documents ci-joints :\r\n 1 - Courrier\r\n 2 - Envoi Devis habitation\r\n\r\n Meilleurs sentiments.\r\n\r\n La Societe Imaginaire\r\n\r\n La visualisation des fichiers PDF n\u00e9cessite Adobe Reader.(See attached\r\n file: RIB.pdf)", + "body_tokens": [ + "ci_joint", + "rib", + "du", + "compte", + "comme", + "demande" + ], + "clean_header": "rib", + "date": "2019-02-05 15:53:44", + "flagged_header": "rib", + "flagged_text": "ci_joint le rib du compte comme demande", + "from": "conseiller@societeimaginaire.fr", + "header": "RIB", + "header_tokens": [ + "rib" + ], + "label": "regularisation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Ci-joint le rib du compte comme demande" + ], + [ + "GREETINGS", + "Bien a vous" + ], + [ + "HELLO", + "Monsieur DUPONT" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ], + [ + "SIGNATURE", + "+33(0) 6 00 00 00 00" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Veuillez prendre connaissance des documents ci-joints :" + ], + [ + "SIGNATURE", + "1 - Courrier" + ], + [ + "SIGNATURE", + "2 - Envoi Devis habitation" + ], + [ + "BODY", + "Meilleurs sentiments" + ], + [ + "SIGNATURE", + "La Societe Imaginaire" + ], + [ + "FOOTER", + "La visualisation des fichiers PDF necessite Adobe Reader" + ], + [ + "BODY", + "(See attached" + ], + [ + "BODY", + "file: RIB.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Ci-joint le rib du compte comme demande", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "rib", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "ci_joint", + "rib", + "du", + "compte", + "comme", + "demande" + ] + }, + { + "age": 52, + "attachment": "[\"image-a7c10.png\",\"Releve_Information.pdf\"]", + "attachments": [ + "image-a7c10.png", + "Releve_Information.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Veuillez trouver ci-joint le RI de Mme Dupont,\r\n\r\n Vous souhaitant bonne r\u00e9ception,\r\n\r\n\r\n Bien cordialement.\r\n\r\n\r\n (Embedded image moved to file: pic30109.jpg)\r\n [IMAGE]\r\n\r\n Le 2018-06-04 20:37, Mr DUPONT a \u00e9crit\u00a0:\r\n Bonjour Monsieur,\r\n\r\n Pourriez-vous, s'il vous plait, faire parvenir mon relev\u00e9 d'informations \u00e0\r\n la Societe Imaginaire, par retour de mail, en gardant la r\u00e9f\u00e9rence cit\u00e9e en objet qui\r\n vous plait?\r\n\r\n Je vous remercie,\r\n\r\n Monsieur Dupont\r\n\r\n -------- Message d'origine --------\r\n De : conseiller@Societeimaginaire.fr\r\n Date : 04/06/2018 14:30 (GMT+01:00)\r\n \u00c0 : monsieurdupont@polytechnique.edu\r\n Objet : Confirmation de votre assurance v\u00e9hicule\u00a0\r\n\r\n\r\n Cher(e) client,\r\n\r\n Nous vous confirmons l'enregistrement de l'assurance de votre v\u00e9hicule en\r\n en date du 01/05/2017 suite \u00e0 la demande de r\u00e9siliation que nous avons\r\n effectu\u00e9e aupr\u00e8s de la Societe Concurrente.\r\n\r\n Toutefois, ces derniers ne nous ont pas envoy\u00e9 votre Relev\u00e9 d'Information\r\n donc il va falloir que vous leur demandiez pour ensuite nous le transmettre\r\n par mail \u00e0 l'adresse conseiller@Societeimaginaire.fr\r\n En attendant ce document, nous vous\r\n assurons quand m\u00eame, mais de mani\u00e8re provisoire.\r\n\r\n De plus, il faudra que l'on voit ensemble quel mode de paiement vous souhaitez.\r\n Pour cela, le plus simple et\r\n de nous contacter au 09.00.00.00.00 ou de pr\u00e9voir un recontact via notre\r\n site Societeimaginaire.fr\r\n\r\n\r\n Bien Cordialement.\r\n\r\n Au nom de l'\u00e9quipe Societe Imaginaire\r\n\r\n logo Societe Imaginaire\r\n\r\n (See attached file: image-a7c10.png)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)", + "body_tokens": [ + "veuillez", + "trouver", + "ci_joint", + "ri", + "de", + "mme", + "dupont", + "vous", + "souhaitant", + "bonne", + "reception" + ], + "clean_header": "re: confirmation de votre assurance vehicule", + "date": "2020-09-29 13:14:36", + "flagged_header": "re: confirmation de votre assurance vehicule", + "flagged_text": "Veuillez trouver ci_joint le RI de Mme Dupont,\nVous souhaitant bonne reception,", + "from": "Conseiller ", + "header": "Re: Confirmation de votre assurance v\u00e9hicule", + "header_tokens": [ + "re", + "confirmation", + "de", + "votre", + "assurance", + "vehicule" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Veuillez trouver ci-joint le RI de Mme Dupont," + ], + [ + "BODY", + "Vous souhaitant bonne reception," + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "PJ", + "(Embedded image moved to file: pic30109.jpg)" + ], + [ + "BODY", + "[IMAGE]" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Monsieur," + ], + [ + "BODY", + "Pourriez-vous, s'il vous plait, faire parvenir mon releve d'informations a" + ], + [ + "BODY", + "la Societe Imaginaire, par retour de mail, en gardant la reference citee en objet qui" + ], + [ + "BODY", + "vous plait?" + ], + [ + "THANKS", + "Je vous remercie," + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Cher(e) client," + ], + [ + "BODY", + "Nous vous confirmons l'enregistrement de l'assurance de votre vehicule en" + ], + [ + "BODY", + "en date du 01/05/2017 suite a la demande de resiliation que nous avons" + ], + [ + "BODY", + "effectuee aupres de la Societe Concurrente" + ], + [ + "BODY", + "Toutefois, ces derniers ne nous ont pas envoye votre Releve d'Information" + ], + [ + "BODY", + "donc il va falloir que vous leur demandiez pour ensuite nous le transmettre" + ], + [ + "BODY", + "par mail a l'adresse conseiller@Societeimaginaire.fr" + ], + [ + "BODY", + "En attendant ce document, nous vous" + ], + [ + "BODY", + "assurons quand meme, mais de maniere provisoire" + ], + [ + "BODY", + "De plus, il faudra que l'on voit ensemble quel mode de paiement vous souhaitez" + ], + [ + "BODY", + "Pour cela, le plus simple et" + ], + [ + "BODY", + "de nous contacter au 09" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "BODY", + "00" + ], + [ + "SIGNATURE", + "00 ou de prevoir un recontact via notre" + ], + [ + "BODY", + "site Societeimaginaire.fr" + ], + [ + "GREETINGS", + "Bien Cordialement" + ], + [ + "BODY", + "Au nom de l'equipe Societe Imaginaire" + ], + [ + "SIGNATURE", + "logo Societe Imaginaire" + ], + [ + "PJ", + "(See attached file: image-a7c10.png)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)(See attached file: Releve_Information.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Veuillez trouver ci-joint le RI de Mme Dupont,\nVous souhaitant bonne reception,", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "re", + "confirmation", + "de", + "votre", + "assurance", + "vehicule", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "veuillez", + "trouver", + "ci_joint", + "ri", + "de", + "mme", + "dupont", + "vous", + "souhaitant", + "bonne", + "reception" + ] + }, + { + "age": 72, + "attachment": "[\"rib.pdf\"]", + "attachments": [ + "rib.pdf" + ], + "body": "\r\n\r\n\r\n Bonjour\r\n Suite \u00e0 notre entretien t\u00e9l\u00e9phonique, veuillez trouver ici mon relev\u00e9\r\n d'identit\u00e9 bancaire.\r\n\r\n Au nom de Monsieur Dupont\r\n\r\n Bien cordialement\r\n\r\n --\r\n Monsieur Dupont\r\n 32 avenue Imaginaire\r\n T\u00e9l: +33 (0)600 00 00 00\r\n ", + "body_tokens": [ + "suite", + "a", + "notre", + "entretien", + "telephonique", + "veuillez", + "trouver", + "ici", + "mon", + "releve", + "d", + "identite", + "bancaire" + ], + "clean_header": "rib", + "date": "2022-06-09 21:54:35", + "flagged_header": "rib", + "flagged_text": "Suite a notre entretien telephonique, veuillez trouver ici mon releve\nd'identite bancaire", + "from": "Monsieur Dupont ", + "header": "RIB", + "header_tokens": [ + "rib" + ], + "label": "compte", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Suite a notre entretien telephonique, veuillez trouver ici mon releve" + ], + [ + "BODY", + "d'identite bancaire" + ], + [ + "HELLO", + "Au nom de Monsieur Dupont" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "SIGNATURE", + "32 avenue Imaginaire" + ], + [ + "SIGNATURE", + "Tel: +33 (0)600 00 00 00" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Suite a notre entretien telephonique, veuillez trouver ici mon releve\nd'identite bancaire", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "rib", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "suite", + "a", + "notre", + "entretien", + "telephonique", + "veuillez", + "trouver", + "ici", + "mon", + "releve", + "d", + "identite", + "bancaire" + ] + }, + { + "age": 39, + "attachment": "[\"pic.jpg\"]", + "attachments": [ + "pic.jpg" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Nous souhaitons d\u00e9clar\u00e9s deux sinistres concernant nos deux v\u00e9hicules.\r\n\r\n 1) Vehicule1 :\r\n\r\n Le v\u00e9hicule \u00e9tait stationnement sur le parking et il\r\n pr\u00e9sente une trace sur l'aile arri\u00e8re droite et sur le\r\n pare-choc.\r\n\r\n 2) Vehicule2 :\r\n\r\n Le conducteur s'est gar\u00e9 sur un parking d'entreprise.\r\n Il a cogn\u00e9 avec le pneu avant droit.\r\n\r\n\r\n En vous souhaitant bonne r\u00e9ception de ces \u00e9l\u00e9ments,\r\n\r\n Bien cordialement,\r\n\r\n Monsieur Dupont\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n (Embedded image moved to file: pic.jpg)", + "body_tokens": [ + "nous", + "souhaitons", + "declares", + "deux", + "sinistres", + "concernant", + "nos", + "deux", + "vehicules", + "1", + "vehicule1", + "vehicule", + "etait", + "stationnement", + "sur", + "parking", + "et", + "il", + "presente", + "une", + "trace", + "sur", + "l", + "aile", + "arriere", + "droite", + "et", + "sur", + "pare-choc", + "2", + "vehicule2", + "il", + "a", + "cogne", + "avec", + "pneu", + "avant", + "droit", + "en", + "vous", + "souhaitant", + "bonne", + "reception", + "de", + "ces", + "elements" + ], + "clean_header": "sinistres", + "date": "2019-12-20 08:02:52", + "flagged_header": "sinistres", + "flagged_text": "Nous souhaitons declares deux sinistres concernant nos deux vehicules\n1) Vehicule1 :\nLe vehicule etait stationnement sur le parking et il\npresente une trace sur l'aile arriere droite et sur le\npare-choc\n2) Vehicule2 :\nIl a cogne avec le pneu avant droit\nEn vous souhaitant bonne reception de ces elements,", + "from": "Monsieur Dupont ", + "header": "sinistres", + "header_tokens": [ + "sinistres" + ], + "label": "sinistres", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Nous souhaitons declares deux sinistres concernant nos deux vehicules" + ], + [ + "BODY", + "1) Vehicule1 :" + ], + [ + "BODY", + "Le vehicule etait stationnement sur le parking et il" + ], + [ + "BODY", + "presente une trace sur l'aile arriere droite et sur le" + ], + [ + "BODY", + "pare-choc" + ], + [ + "BODY", + "2) Vehicule2 :" + ], + [ + "SIGNATURE", + "Le conducteur s'est gare sur un parking d'entreprise" + ], + [ + "BODY", + "Il a cogne avec le pneu avant droit" + ], + [ + "BODY", + "En vous souhaitant bonne reception de ces elements," + ], + [ + "GREETINGS", + "Bien cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Nous souhaitons declares deux sinistres concernant nos deux vehicules\n1) Vehicule1 :\nLe vehicule etait stationnement sur le parking et il\npresente une trace sur l'aile arriere droite et sur le\npare-choc\n2) Vehicule2 :\nIl a cogne avec le pneu avant droit\nEn vous souhaitant bonne reception de ces elements,", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "sinistres", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "nous", + "souhaitons", + "declares", + "deux", + "sinistres", + "concernant", + "nos", + "deux", + "vehicules", + "1", + "vehicule1", + "vehicule", + "etait", + "stationnement", + "sur", + "parking", + "et", + "il", + "presente", + "une", + "trace", + "sur", + "l", + "aile", + "arriere", + "droite", + "et", + "sur", + "pare-choc", + "2", + "vehicule2", + "il", + "a", + "cogne", + "avec", + "pneu", + "avant", + "droit", + "en", + "vous", + "souhaitant", + "bonne", + "reception", + "de", + "ces", + "elements" + ] + }, + { + "age": 63, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n\r\n Nous restons \u00e0 votre disposition\r\n\r\n Cordialement\r\n\r\n Societe Imaginaire\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller le 05/06/2018 10:05 -----\r\n\r\n De :\tmonsieurdupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t03/06/2018 10:26\r\n Objet :\tModification de contrat\r\n\r\n\r\n\r\n Bonjour Madame,\r\n\r\n Notre fils, conducteur principal du v\u00e9hicule \u00e9tant\r\n en stage puis \u00e0 l'\u00e9tranger pour une bonne partie de l'an\r\n prochain, son v\u00e9hicule est d\u00e9sormais \u00e0 la maison et il ne sera amen\u00e9 \u00e0\r\n l'utiliser que rarement.\r\n Pour cette raison, nous souhaiterions modifier son contrat d'assurance\r\n d\u00e8s que possible.\r\n\r\n Vous remerciant par avance de votre concours, nous restons \u00e0 votre\r\n disposition pour toute information compl\u00e9mentaire.\r\n\r\n Cordialement\r\n\r\n Monsieur Dupont\r\n ", + "body_tokens": [ + "nous", + "restons", + "a", + "votre", + "disposition" + ], + "clean_header": "contrat vehicule", + "date": "2020-02-01 01:37:56", + "flagged_header": "contrat vehicule", + "flagged_text": "Nous restons a votre disposition", + "from": "conseiller@societeimaginaire.fr", + "header": "contrat vehicule", + "header_tokens": [ + "contrat", + "vehicule" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Nous restons a votre disposition" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "SIGNATURE", + "Societe Imaginaire" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour Madame," + ], + [ + "BODY", + "Notre fils, conducteur principal du vehicule etant" + ], + [ + "BODY", + "en stage puis a l'etranger pour une bonne partie de l'an" + ], + [ + "BODY", + "prochain, son vehicule est desormais a la maison et il ne sera amene a" + ], + [ + "BODY", + "l'utiliser que rarement" + ], + [ + "BODY", + "Pour cette raison, nous souhaiterions modifier son contrat d'assurance" + ], + [ + "BODY", + "des que possible" + ], + [ + "BODY", + "Vous remerciant par avance de votre concours, nous restons a votre" + ], + [ + "BODY", + "disposition pour toute information complementaire" + ], + [ + "GREETINGS", + "Cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Nous restons a votre disposition", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "contrat", + "vehicule", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "nous", + "restons", + "a", + "votre", + "disposition" + ] + }, + { + "age": 79, + "attachment": "[\"cession.pdf\"]", + "attachments": [ + "cession.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n De :\tMonsieur Dupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Cc\u00a0:\tMonsieur Dupont , Madame Dupont\r\n \r\n Date :\t04/06/2018 18:08\r\n Objet :\tr\u00e9siliation couverture v\u00e9hicule suite cession\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Le v\u00e9hicule a \u00e9t\u00e9 c\u00e9d\u00e9 le 2 avril 2018.\r\n\r\n Merci, ci joint le document de cession scann\u00e9\r\n\r\n Cordialement,\r\n\r\n\r\n Monsieur Dupont - Orange\r\n monsieurdupont@extensionj.fr\r\n\r\n responsable : Monsieur Dupont\r\n 06 00 00 00 00\r\n monsieurdupont@extensionj.fr\r\n\r\n\r\n\r\n\r\n (See attached file: cession.pdf)", + "body_tokens": [], + "clean_header": "tr : resiliation couverture vehicule suite cession", + "date": "2019-05-30 15:47:18", + "flagged_header": "tr : resiliation couverture vehicule suite cession", + "flagged_text": "", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : r\u00e9siliation couverture v\u00e9hicule suite cession", + "header_tokens": [ + "tr", + "resiliation", + "couverture", + "vehicule", + "suite", + "cession" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "\r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t05/06/2018 13:34\r\n Objet :\tR\u00e9ponse au dossier de pr\u00eat\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n Vous nous avez adress\u00e9 un dossier de pr\u00eat concernant votre client.\r\n\r\n Nous vous informons de notre accord.\r\n\r\n Nous restons \u00e0 votre enti\u00e8re disposition.\r\n\r\n Bien cordialement.\r\n\r\n Conseiller\r\n\r\n Societe Imaginaire\r\n\r\n T\u00e9l : 05 00 00 00 00\r\n Fax : 05 00 00 00 00\r\n E-mail: conseiller@societeimaginaire.fr\r\n\r\n\r\n", + "body_tokens": [ + "vous", + "nous", + "avez", + "adresse", + "un", + "dossier", + "de", + "pret", + "concernant", + "votre", + "client", + "nous", + "vous", + "informons", + "de", + "notre", + "accord", + "nous", + "restons", + "a", + "votre", + "entiere", + "disposition" + ], + "clean_header": "dossier de pret ", + "date": "2019-06-05 21:18:07", + "flagged_header": "dossier de pret", + "flagged_text": "Vous nous avez adresse un dossier de pret concernant votre client\nNous vous informons de notre accord\nNous restons a votre entiere disposition", + "from": "conseiller@societeimaginaire.fr", + "header": "dossier de pr\u00eat ", + "header_tokens": [ + "dossier", + "de", + "pret" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Vous nous avez adresse un dossier de pret concernant votre client" + ], + [ + "BODY", + "Nous vous informons de notre accord" + ], + [ + "BODY", + "Nous restons a votre entiere disposition" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "SIGNATURE", + "Conseiller" + ], + [ + "SIGNATURE", + "Societe Imaginaire" + ], + [ + "SIGNATURE", + "Tel : 05 00 00 00 00" + ], + [ + "SIGNATURE", + "Fax : 05 00 00 00 00" + ], + [ + "SIGNATURE", + "E-mail: conseiller@societeimaginaire.fr" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Vous nous avez adresse un dossier de pret concernant votre client\nNous vous informons de notre accord\nNous restons a votre entiere disposition", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "dossier", + "de", + "pret", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "vous", + "nous", + "avez", + "adresse", + "un", + "dossier", + "de", + "pret", + "concernant", + "votre", + "client", + "nous", + "vous", + "informons", + "de", + "notre", + "accord", + "nous", + "restons", + "a", + "votre", + "entiere", + "disposition" + ] + }, + { + "age": 45, + "attachment": "[\"doc.pdf\"]", + "attachments": [ + "doc.pdf" + ], + "body": "\r\n\r\n\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n\r\n\r\n Je vous prie de trouver ci-joint une d\u00e9claration de sinistre,\r\n relative au cas de Monsieur Dupont, survenu le lundi 6 Mai.\r\n\r\n\r\n\r\n Avec nos cordiales salutations.\r\n\r\n\r\n\r\n (Embedded image moved to file: pic.jpg)\r\n\r\n \u00a0(See attached file: doc.pdf)", + "body_tokens": [ + "je", + "vous", + "prie", + "de", + "trouver", + "ci_joint", + "une", + "declaration", + "de", + "sinistre", + "relative", + "au", + "cas", + "de", + "monsieur", + "dupont", + "survenu", + "lundi", + "6", + "mai", + "avec", + "nos", + "cordiales", + "salutations" + ], + "clean_header": "declarations de sinistre corporel et materiel", + "date": "2020-09-29 17:53:01", + "flagged_header": "declarations de sinistre corporel et materiel", + "flagged_text": "Je vous prie de trouver ci_joint une declaration de sinistre,\nrelative au cas de Monsieur Dupont, survenu le lundi 6 Mai\nAvec nos cordiales salutations", + "from": "Monsieur Dupont ", + "header": "d\u00e9clarations de sinistre corporel et mat\u00e9riel", + "header_tokens": [ + "declarations", + "de", + "sinistre", + "corporel", + "et", + "materiel" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Je vous prie de trouver ci-joint une declaration de sinistre," + ], + [ + "BODY", + "relative au cas de Monsieur Dupont, survenu le lundi 6 Mai" + ], + [ + "BODY", + "Avec nos cordiales salutations" + ], + [ + "PJ", + "(Embedded image moved to file: pic.jpg)" + ], + [ + "PJ", + "(See attached file: doc.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "M", + "text": "Je vous prie de trouver ci-joint une declaration de sinistre,\nrelative au cas de Monsieur Dupont, survenu le lundi 6 Mai\nAvec nos cordiales salutations", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "declarations", + "de", + "sinistre", + "corporel", + "et", + "materiel", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "prie", + "de", + "trouver", + "ci_joint", + "une", + "declaration", + "de", + "sinistre", + "relative", + "au", + "cas", + "de", + "monsieur", + "dupont", + "survenu", + "lundi", + "6", + "mai", + "avec", + "nos", + "cordiales", + "salutations" + ] + }, + { + "age": 48, + "attachment": "[\"Attestationemployeur.pdf\",\"Relev\u00e9_d'information.pdf\"]", + "attachments": [ + "Attestationemployeur.pdf", + "Relev\u00e9_d'information.pdf" + ], + "body": "\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n Je vous prie de trouver ci-joints les justificatifs demand\u00e9s pour la\r\n souscription de mon assurance auto.\r\n\r\n Bien cordialement,\r\n\r\n Monsieur Dupont\r\n (See attached file: Attestationemployeur.pdf)\r\n(See attached file: Relev\u00e9_d'information.pdf)", + "body_tokens": [ + "je", + "vous", + "prie", + "de", + "trouver", + "ci_joint", + "s", + "justificatifs", + "demandes", + "pour", + "la", + "souscription", + "de", + "mon", + "assurance", + "auto" + ], + "clean_header": "demande d'assurance auto", + "date": "2022-01-22 22:27:05", + "flagged_header": "demande d'assurance auto", + "flagged_text": "Je vous prie de trouver ci_joint s les justificatifs demandes pour la\nsouscription de mon assurance auto", + "from": "Monsieur Dupont ", + "header": "Demande d'assurance auto", + "header_tokens": [ + "demande", + "d", + "assurance", + "auto" + ], + "label": "resiliation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Je vous prie de trouver ci-joints les justificatifs demandes pour la" + ], + [ + "BODY", + "souscription de mon assurance auto" + ], + [ + "GREETINGS", + "Bien cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "PJ", + "(See attached file: Attestationemployeur.pdf)" + ], + [ + "PJ", + "(See attached file: Releve_d'information.pdf)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je vous prie de trouver ci-joints les justificatifs demandes pour la\nsouscription de mon assurance auto", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "d", + "assurance", + "auto", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "vous", + "prie", + "de", + "trouver", + "ci_joint", + "s", + "justificatifs", + "demandes", + "pour", + "la", + "souscription", + "de", + "mon", + "assurance", + "auto" + ] + }, + { + "age": 63, + "attachment": "[\"IMG.PNG\"]", + "attachments": [ + "IMG.PNG" + ], + "body": "\r\n\r\n\r\n\r\n\r\n Envoy\u00e9 de mon iPhone\r\n\r\n D\u00e9but du message transf\u00e9r\u00e9\u00a0:\r\n\r\n Exp\u00e9diteur: monsieurduponte@extensionh.fr\r\n Date: 7 juin 2018 \u00e0 16:34:25 UTC+2\r\n Destinataire: conseiller@Societeimaginaire.fr\r\n Objet: Demande\r\n\r\n\r\n\r\n Bonjour, suite \u00e0 notre conversation t\u00e9l\u00e9phonique voici la preuve\r\n de r\u00e8glement afin que vous puissiez \u00e9tudier ma demande de r\u00e9inscription.\r\n\r\n Cordialement,\r\n Monsieur Dupont\r\n [IMAGE]\r\n\r\n\r\n Envoy\u00e9 de mon iPhone(See attached file: IMG.PNG)", + "body_tokens": [], + "clean_header": "preuve reglement ", + "date": "2021-10-16 21:12:16", + "flagged_header": "preuve reglement", + "flagged_text": "", + "from": "Monsieur Dupont ", + "header": "preuve r\u00e9glement ", + "header_tokens": [ + "preuve", + "reglement" + ], + "label": "compte", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "FOOTER", + "Envoye de mon iPhone" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "BODY", + "Bonjour, suite a notre conversation telephonique voici la preuve" + ], + [ + "BODY", + "de reglement afin que vous puissiez etudier ma demande de reinscription" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "BODY", + "[IMAGE]" + ], + [ + "FOOTER", + "Envoye de mon iPhone(See attached file: IMG.PNG)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "preuve", + "reglement", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]" + ] + }, + { + "age": 19, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Bonjour\r\n\r\n Je m'aper\u00e7ois ce jour que j'ai \u00e9t\u00e9 pr\u00e9l\u00e8v\u00e9e plusieurs fois:\r\n\r\n 1 fois sur mon compte bancaire BANQUE1\r\n 1 fois sur mon compte BANQUE2\r\n\r\n Je paye donc 2 fois l'assurance v\u00e9hicule et habitation\r\n\r\n Pourriez vous me rembourser la somme n\u00e9cessaire sur mon compte BANQUE1\r\n le plus rapidement possible.\r\n\r\n En vous remerciant par avance de votre r\u00e9ponse par retour de mail.\r\n Bien cordialement\r\n Monsieur Dupont", + "body_tokens": [ + "je", + "m", + "apercois", + "ce", + "jour", + "que", + "j", + "ai", + "ete", + "prelevee", + "plusieurs", + "fois", + "je", + "paye", + "donc", + "2", + "fois", + "l", + "assurance", + "vehicule", + "et", + "habitation", + "pourriez", + "vous", + "me", + "rembourser", + "la", + "somme", + "necessaire", + "sur", + "mon", + "compte", + "banque1", + "plus", + "rapidement", + "possible", + "en", + "vous", + "remerciant", + "par", + "avance", + "de", + "votre", + "reponse", + "par", + "retour", + "de", + "mail" + ], + "clean_header": "prelevements bancaires", + "date": "2022-05-02 06:52:20", + "flagged_header": "prelevements bancaires", + "flagged_text": "Je m'apercois ce jour que j'ai ete prelevee plusieurs fois:\nJe paye donc 2 fois l'assurance vehicule et habitation\nPourriez vous me rembourser la somme necessaire sur mon compte BANQUE1\nle plus rapidement possible\nEn vous remerciant par avance de votre reponse par retour de mail", + "from": "Monsieur Dupont ", + "header": "pr\u00e9l\u00e8vements bancaires", + "header_tokens": [ + "prelevements", + "bancaires" + ], + "label": "modification", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour" + ], + [ + "BODY", + "Je m'apercois ce jour que j'ai ete prelevee plusieurs fois:" + ], + [ + "SIGNATURE", + "1 fois sur mon compte bancaire BANQUE1" + ], + [ + "SIGNATURE", + "1 fois sur mon compte BANQUE2" + ], + [ + "BODY", + "Je paye donc 2 fois l'assurance vehicule et habitation" + ], + [ + "BODY", + "Pourriez vous me rembourser la somme necessaire sur mon compte BANQUE1" + ], + [ + "BODY", + "le plus rapidement possible" + ], + [ + "BODY", + "En vous remerciant par avance de votre reponse par retour de mail" + ], + [ + "GREETINGS", + "Bien cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Je m'apercois ce jour que j'ai ete prelevee plusieurs fois:\nJe paye donc 2 fois l'assurance vehicule et habitation\nPourriez vous me rembourser la somme necessaire sur mon compte BANQUE1\nle plus rapidement possible\nEn vous remerciant par avance de votre reponse par retour de mail", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "prelevements", + "bancaires", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "je", + "m", + "apercois", + "ce", + "jour", + "que", + "j", + "ai", + "ete", + "prelevee", + "plusieurs", + "fois", + "je", + "paye", + "donc", + "2", + "fois", + "l", + "assurance", + "vehicule", + "et", + "habitation", + "pourriez", + "vous", + "me", + "rembourser", + "la", + "somme", + "necessaire", + "sur", + "mon", + "compte", + "banque1", + "plus", + "rapidement", + "possible", + "en", + "vous", + "remerciant", + "par", + "avance", + "de", + "votre", + "reponse", + "par", + "retour", + "de", + "mail" + ] + }, + { + "age": 39, + "attachment": "[\"Liste.docx\",\"PV.pdf\",\"statuts.pdf\",\"RIB.jpeg\"]", + "attachments": [ + "Liste.docx", + "PV.pdf", + "statuts.pdf", + "RIB.jpeg" + ], + "body": "\r\n\r\n\r\n\r\n Bonjour,\r\n Voici, ci-joins les documents demand\u00e9s pr\u00e9c\u00e9demment.\r\n\r\n Je vous remercie de la rapidit\u00e9 de vos service,\r\n Bien \u00e0 vous,\r\n Monsieur Dupont(See attached file: Liste.docx)(See attached file:\r\n PV.pdf)(See attached file: statuts.pdf)(See attached file:\r\n RIB.jpeg)", + "body_tokens": [ + "voici", + "ci-joins", + "documents", + "demandes", + "precedemment", + "je", + "vous", + "remercie", + "de", + "la", + "rapidite", + "de", + "vos", + "service" + ], + "clean_header": "documents ", + "date": "2021-01-13 05:59:12", + "flagged_header": "documents", + "flagged_text": "Voici, ci-joins les documents demandes precedemment\nJe vous remercie de la rapidite de vos service,", + "from": "Monsieur Dupont ", + "header": "documents ", + "header_tokens": [ + "documents" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Voici, ci-joins les documents demandes precedemment" + ], + [ + "BODY", + "Je vous remercie de la rapidite de vos service," + ], + [ + "GREETINGS", + "Bien a vous," + ], + [ + "BODY", + "Monsieur Dupont(See attached file: Liste.docx)(See attached file:" + ], + [ + "BODY", + "PV.pdf)(See attached file: statuts.pdf)(See attached file:" + ], + [ + "BODY", + "RIB.jpeg)" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Voici, ci-joins les documents demandes precedemment\nJe vous remercie de la rapidite de vos service,", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "documents", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "voici", + "ci-joins", + "documents", + "demandes", + "precedemment", + "je", + "vous", + "remercie", + "de", + "la", + "rapidite", + "de", + "vos", + "service" + ] + }, + { + "age": 31, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n Centre Imaginaire\r\n\r\n 00 Rue de Nomderue\r\n\r\n 23000 VILLE IMAGINAIRE\r\n\r\n\r\n\r\n\r\n\r\n Madame, Monsieur,\r\n\r\n\r\n Merci de bien vouloir me faire parvenir une attestation d'assurance\r\n concernant la mise \u00e0 disposition de la salle pour 100 personnes.\r\n\r\n objet de la manifestation : r\u00e9union\r\n\r\n cordialement.\r\n\r\n\r\n Monsieur Dupont\r\n\r\n ", + "body_tokens": [ + "merci", + "de", + "bien", + "vouloir", + "me", + "faire", + "parvenir", + "une", + "attestation", + "d", + "assurance", + "concernant", + "la", + "mise", + "a", + "disposition", + "de", + "la", + "salle", + "pour", + "100", + "personnes", + "objet", + "de", + "la", + "manifestation", + "reunion" + ], + "clean_header": "demande d'attestation d'asurance", + "date": "2021-01-04 02:42:06", + "flagged_header": "demande d'attestation d'asurance", + "flagged_text": "Merci de bien vouloir me faire parvenir une attestation d'assurance\nconcernant la mise a disposition de la salle pour 100 personnes\nobjet de la manifestation : reunion", + "from": "Monsieur Dupont ", + "header": "demande d'attestation d'asurance", + "header_tokens": [ + "demande", + "d", + "attestation", + "d", + "asurance" + ], + "label": "habitation", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "SIGNATURE_NAME", + "Centre Imaginaire" + ], + [ + "SIGNATURE", + "00 Rue de Nomderue" + ], + [ + "SIGNATURE", + "23000 VILLE IMAGINAIRE" + ], + [ + "HELLO", + "Madame, Monsieur," + ], + [ + "BODY", + "Merci de bien vouloir me faire parvenir une attestation d'assurance" + ], + [ + "BODY", + "concernant la mise a disposition de la salle pour 100 personnes" + ], + [ + "BODY", + "objet de la manifestation : reunion" + ], + [ + "GREETINGS", + "cordialement" + ], + [ + "HELLO", + "Monsieur Dupont" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Merci de bien vouloir me faire parvenir une attestation d'assurance\nconcernant la mise a disposition de la salle pour 100 personnes\nobjet de la manifestation : reunion", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "demande", + "d", + "attestation", + "d", + "asurance", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "merci", + "de", + "bien", + "vouloir", + "me", + "faire", + "parvenir", + "une", + "attestation", + "d", + "assurance", + "concernant", + "la", + "mise", + "a", + "disposition", + "de", + "la", + "salle", + "pour", + "100", + "personnes", + "objet", + "de", + "la", + "manifestation", + "reunion" + ] + }, + { + "age": 18, + "attachment": "[]", + "attachments": [], + "body": "\r\n\r\n\r\n (Embedded image moved to file: pic.jpg) Rejoignez-nous sur notre page\r\n Facebook\r\n\r\n (Embedded image moved to file: pic.gif)\r\n\r\n\r\n\r\n ----- Transf\u00e9r\u00e9 par Conseiller/Societe Imaginaire le 07/06/2018 16:32 -----\r\n\r\n De :\tmonsieurdupont \r\n A :\tconseiller@Societeimaginaire.fr\r\n Date :\t07/06/2018 16:16\r\n Objet :\tRe : Message de votre conseill\u00e8re personnelle\r\n\r\n\r\n\r\n Bonjour,\r\n\r\n Merci de vos messages. La voiture est conduite par Monsieur Dupont.\r\n Nos deux voitures sont assur\u00e9es \u00e0 100%.\r\n\r\n Cordialement,\r\n\r\n Monsieur Dupont\r\n\r\n Le 07/06/18, \"conseiller@Societeimaginaire.fr\" a\r\n \u00e9crit :\r\n\r\n\r\n Ch\u00e8re client,\r\n\r\n Suite \u00e0 notre bilan du 01 mai dernier, je souhaitais savoir ce que vous\r\n avez d\u00e9cid\u00e9.\r\n\r\n\r\n Je devais vous \u00e9tablir un devis pour votre voiture donc si\r\n vous pouviez m'indiquer le conducteur principal, la date de permis de la voiture ainsi\r\n que le type de couverture que vous recherchez, je vous enverrai le devis rapidement.\r\n\r\n\r\n J'attend de vos nouvelles.\r\n\r\n Bien Cordialement.\r\n\r\n Conseiller\r\n D\u00e9l\u00e9gation Conseil\r\n conseillerh@Societeimaginaire.fr\r\n\r\n ", + "body_tokens": [ + "facebook" + ], + "clean_header": "tr : message de votre conseillere personnelle", + "date": "2022-04-27 07:20:10", + "flagged_header": "tr : message de votre conseillere personnelle", + "flagged_text": "Facebook", + "from": "conseiller@societeimaginaire.fr", + "header": "Tr : Message de votre conseill\u00e8re personnelle", + "header_tokens": [ + "tr", + "message", + "de", + "votre", + "conseillere", + "personnelle" + ], + "label": "vehicule", + "messages": [ + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "PJ", + "(Embedded image moved to file: pic.jpg) Rejoignez-nous sur notre page" + ], + [ + "BODY", + "Facebook" + ], + [ + "PJ", + "(Embedded image moved to file: pic.gif)" + ] + ], + "text_from": null + }, + { + "clean_header": null, + "date": null, + "header": "", + "tags": [ + [ + "HELLO", + "Bonjour," + ], + [ + "BODY", + "Merci de vos messages" + ], + [ + "BODY", + "La voiture est conduite par Monsieur Dupont" + ], + [ + "BODY", + "Nos deux voitures sont assurees a 100%" + ], + [ + "GREETINGS", + "Cordialement," + ], + [ + "HELLO", + "Monsieur Dupont" + ], + [ + "BODY", + "Le 07/06/18, \"conseiller@Societeimaginaire.fr\" a" + ], + [ + "BODY", + "ecrit :" + ], + [ + "HELLO", + "Chere client," + ], + [ + "BODY", + "Suite a notre bilan du 01 mai dernier, je souhaitais savoir ce que vous" + ], + [ + "BODY", + "avez decide" + ], + [ + "BODY", + "Je devais vous etablir un devis pour votre voiture donc si" + ], + [ + "BODY", + "vous pouviez m'indiquer le conducteur principal, la date de permis de la voiture ainsi" + ], + [ + "BODY", + "que le type de couverture que vous recherchez, je vous enverrai le devis rapidement" + ], + [ + "BODY", + "J'attend de vos nouvelles" + ], + [ + "GREETINGS", + "Bien Cordialement" + ], + [ + "SIGNATURE", + "Conseiller" + ], + [ + "SIGNATURE_NAME", + "Delegation Conseil" + ], + [ + "SIGNATURE", + "conseillerh@Societeimaginaire.fr" + ] + ], + "text_from": null + } + ], + "sexe": "F", + "text": "Facebook", + "to": "demandes@societeimaginaire.fr", + "tokens": [ + "tr", + "message", + "de", + "votre", + "conseillere", + "personnelle", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "[PAD]", + "facebook" + ] + } +] From a0672d417c82472865c3191e5896beca41d78f2f Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:28:36 +0100 Subject: [PATCH 05/37] :poop: IO functionalities --- melusine/io/__init__.py | 7 +++ melusine/io/_classes.py | 95 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 melusine/io/__init__.py create mode 100644 melusine/io/_classes.py diff --git a/melusine/io/__init__.py b/melusine/io/__init__.py new file mode 100644 index 0000000..8c9e89a --- /dev/null +++ b/melusine/io/__init__.py @@ -0,0 +1,7 @@ +""" +The melusine.io module includes classes for input/output data. +""" + +from melusine.io._classes import IoMixin + +__all__ = ["IoMixin"] diff --git a/melusine/io/_classes.py b/melusine/io/_classes.py new file mode 100644 index 0000000..2daec83 --- /dev/null +++ b/melusine/io/_classes.py @@ -0,0 +1,95 @@ +""" +Contain IO classes implementation. + +Contained classes: [IoMixin] +""" +from __future__ import annotations + +import logging +from typing import Any, Dict, List, Optional, Type, TypeVar + +from melusine import config + +logger = logging.getLogger(__name__) + +T = TypeVar("T", bound="IoMixin") + + +class InitError(Exception): + """ + Error raised when object instantiation fails. + """ + + +class IoMixin: + """ + Defines generic load methods. + """ + + def __init__(self, **kwargs: Any): + """Initialize attribute.""" + self.json_exclude_list: List[str] = ["_func", "json_exclude_list"] + + @classmethod + def from_config( + cls: Type[T], + config_key: Optional[str] = None, + config_dict: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> T: + """ + Instantiate a class from a config key or a config dict. + + Parameters + ---------- + config_key: str + Configuration key. + config_dict: Dict[str, Any] + Dictionary of config. + kwargs: Any + + Returns + ------- + _: T + Instantiated objet. + """ + # Load from Melusine config + if config_dict is None: + if config_key is None: + raise ValueError("You should specify one and only one of 'config_key' and 'config_value'") + else: + config_dict = config[config_key] + else: + if config_key is not None: + raise ValueError("You should specify one and only one of 'config_key' and 'config_value'") + + # Update with keyword arguments + config_dict.update(**kwargs) + + return cls.from_dict(**config_dict) + + @classmethod + def from_dict(cls: Type[T], **params_dict: Dict[str, Any]) -> T: + """ + Method to instantiate a class based a dict object. + + Parameters + ---------- + params_dict: Dict[str, Any] + Parameters dict. + + Returns + ------- + _: T + Instantiated objet. + """ + # Exclude parameters starting with an underscore + init_params = {key: value for key, value in params_dict.items() if not key.startswith("_")} + + try: + instance = cls(**init_params) + return instance + except Exception as error: + raise InitError(f"Failed to instantiate {cls.__name__} with attributes {init_params}.").with_traceback( + error.__traceback__ + ) From 0585c20c8a56e93c5947f90ac1ecafcf743df731 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:31:47 +0100 Subject: [PATCH 06/37] :sparkles: New feature: MelusineRegex --- melusine/regex/__init__.py | 10 ++ melusine/regex/emergency_regex.py | 69 +++++++++++++ melusine/regex/reply_regex.py | 64 +++++++++++++ melusine/regex/thanks_regex.py | 84 ++++++++++++++++ melusine/regex/transfer_regex.py | 65 +++++++++++++ melusine/regex/vacation_reply_regex.py | 128 +++++++++++++++++++++++++ 6 files changed, 420 insertions(+) create mode 100644 melusine/regex/__init__.py create mode 100644 melusine/regex/emergency_regex.py create mode 100644 melusine/regex/reply_regex.py create mode 100644 melusine/regex/thanks_regex.py create mode 100644 melusine/regex/transfer_regex.py create mode 100644 melusine/regex/vacation_reply_regex.py diff --git a/melusine/regex/__init__.py b/melusine/regex/__init__.py new file mode 100644 index 0000000..b987cd3 --- /dev/null +++ b/melusine/regex/__init__.py @@ -0,0 +1,10 @@ +""" +The melusine.regex module includes tools for handling regexes. +""" +from melusine.regex.emergency_regex import EmergencyRegex +from melusine.regex.reply_regex import ReplyRegex +from melusine.regex.thanks_regex import ThanksRegex +from melusine.regex.transfer_regex import TransferRegex +from melusine.regex.vacation_reply_regex import VacationReplyRegex + +__all__ = ["EmergencyRegex", "ReplyRegex", "ThanksRegex", "TransferRegex", "VacationReplyRegex"] diff --git a/melusine/regex/emergency_regex.py b/melusine/regex/emergency_regex.py new file mode 100644 index 0000000..6d1ba7d --- /dev/null +++ b/melusine/regex/emergency_regex.py @@ -0,0 +1,69 @@ +from typing import Dict, List, Optional, Union + +from melusine.base import MelusineRegex + + +class EmergencyRegex(MelusineRegex): + """ + Detect reply patterns in headers such as "re:". + """ + + @property + def positive(self) -> Union[str, Dict[str, str]]: + """ + Define regex patterns required to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return r"urgent|emergency" + + @property + def neutral(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns to be ignored when running detection. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return r"emergency exit" + + @property + def negative(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns prohibited to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return dict( + blacklist=r"Mrs. TooInsistent|Mr. Annoying", + not_my_business=r"GalaxyFarFarAway", + ) + + @property + def match_list(self) -> List[str]: + """ + List of texts that should activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return [ + "We have an emergency", + "This message is urgent", + ] + + @property + def no_match_list(self) -> List[str]: + """ + List of texts that should NOT activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return [ + "Mr. Annoying is calling for an emergency", + "Mrs. TooInsistent called 8 times for an urgent matter", + "There is an emergency in GalaxyFarFarAway", + ] diff --git a/melusine/regex/reply_regex.py b/melusine/regex/reply_regex.py new file mode 100644 index 0000000..e50194f --- /dev/null +++ b/melusine/regex/reply_regex.py @@ -0,0 +1,64 @@ +from typing import Dict, List, Optional, Union + +from melusine.base import MelusineRegex + + +class ReplyRegex(MelusineRegex): + """ + Detect reply patterns in headers such as "re:". + """ + + @property + def positive(self) -> Union[str, Dict[str, str]]: + """ + Define regex patterns required to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return r"^(re:|re :)" + + @property + def neutral(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns to be ignored when running detection. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return None + + @property + def negative(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns prohibited to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return None + + @property + def match_list(self) -> List[str]: + """ + List of texts that should activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return [ + "re: notre discussion", + "re : bonjour", + "Re : compte rendu", + "RE: rdv du 01/01/2001", + ] + + @property + def no_match_list(self) -> List[str]: + """ + List of texts that should NOT activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return ["renard agile", "tr: re: message du jour" "fwd:re: important notice"] diff --git a/melusine/regex/thanks_regex.py b/melusine/regex/thanks_regex.py new file mode 100644 index 0000000..6e77651 --- /dev/null +++ b/melusine/regex/thanks_regex.py @@ -0,0 +1,84 @@ +from typing import Dict, List, Optional, Union + +from melusine.base import MelusineRegex + + +class ThanksRegex(MelusineRegex): + """ + Detect thanks patterns such as "merci". + """ + + @property + def positive(self) -> Union[str, Dict[str, str]]: + """ + Define regex patterns required to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return r"\bmerci+s?\b|\bremercie?" + + @property + def neutral(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns to be ignored when running detection. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return None + + @property + def negative(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns prohibited to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + forbidden_thanks_words = [ + r"oui", + r"non", + r"atten[dt]", + r"inform[ée]", + r"proposition", + r"ci\b", + r"join[st]?\b", + ] + + return dict(QUESTION=r"\?", FORBIDDEN_WORDS=r"\b(" + "|".join(forbidden_thanks_words) + ")") + + @property + def match_list(self) -> List[str]: + """ + List of texts that should activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return [ + "merci", + "Je vous remercie pour votre rapidité", + "un grand MERCI à la MAIF", + "je tiens à remercier l'équipe", + ] + + @property + def no_match_list(self) -> List[str]: + """ + List of texts that should NOT activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return [ + # réponse à une question ouverte + "oui, merci à vous", + "non, merci quand même", + "merci, j'attends votre réponse", + "j'aimerais être tenu informée merci", + "Merci, faites moi une proposition", + "Merci, ci-joint le formulaire", + "ci-attaché ledocument, merci", + "Madame Mercier", + ] diff --git a/melusine/regex/transfer_regex.py b/melusine/regex/transfer_regex.py new file mode 100644 index 0000000..1ab5026 --- /dev/null +++ b/melusine/regex/transfer_regex.py @@ -0,0 +1,65 @@ +from typing import Dict, List, Optional, Union + +from melusine.base import MelusineRegex + + +class TransferRegex(MelusineRegex): + """ + Detect transfer patterns in headers such as "tr:". + """ + + @property + def positive(self) -> Union[str, Dict[str, str]]: + """ + Define regex patterns required to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return r"^(tr:|fwd :|tr :|fwd:)" + + @property + def neutral(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns to be ignored when running detection. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return None + + @property + def negative(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns prohibited to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return None + + @property + def match_list(self) -> List[str]: + """ + List of texts that should activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return [ + "tr: notre discussion", + "tr : bonjour", + "Tr : compte rendu", + "fwd: rdv du 01/01/2001", + "Fwd: Votre déclaration", + ] + + @property + def no_match_list(self) -> List[str]: + """ + List of texts that should NOT activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return ["transfert d'argent", "re: tr: message du jour" "re:Fwd important notice"] diff --git a/melusine/regex/vacation_reply_regex.py b/melusine/regex/vacation_reply_regex.py new file mode 100644 index 0000000..b82a6a2 --- /dev/null +++ b/melusine/regex/vacation_reply_regex.py @@ -0,0 +1,128 @@ +from typing import Dict, List, Optional, Union + +from melusine.base import MelusineRegex + + +class VacationReplyRegex(MelusineRegex): + """ + Detect vacation reply patterns such as "Je suis absent du bureau". + """ + + @property + def positive(self) -> Union[str, Dict[str, str]]: + """ + Define regex patterns required to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return dict( + VAC_REP_URGENCY=r"en\s+?cas\s+?d'?\s?urgenc.{1,100}(?:contact|app?eler?)|pour toute urgence.{1,100}contact", + VAC_REP_HOLIDAYS=( + r"^.{,30}(?:je suis |etant )?(:?actuellement)?(?:(? Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns to be ignored when running detection. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return None + + @property + def negative(self) -> Optional[Union[str, Dict[str, str]]]: + """ + Define regex patterns prohibited to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return dict( + VAC_REP_FORBIDDEN_PROC=r"pas.{,10}traite|pas.{,10}identifie|pas.{,10}aboutir?", + VAC_PREP_FORBIDDEN_ACTION=( + r"dans.{,7}attente|merci par avance|(?:serai[st]?|aurai[st]?) souhait(able)?|relanc[ée]r?" + ), + VAC_PREP_FORBIDDEN_FORGOT=( + r"je.{,10}perm(?:ets?|is?)|effectivement|\boublie?|malheureusement|suite [aà]|" + r"(?:monsieur|madame).{,15}absent?" + ), + VAC_REP_FORBIDDEN_RETURN=( + r"(?:retour|reponse) tardi(?:f|ve)|vien[ts] de (?:prendre (?:note|conn?aiss?ance)|recevoir)" + ), + VAC_REP_FORBIDDEN_EXTRA=( + r"\bai\b.{,15}app?ell?er?|\bai\b.{,7}\blu[es]?\b|\barr?eter?.{,5}date|(?:aucune|pas).{,7}nouvelle|" + r"chaque ann[ée]e|voisin" + ), + VAC_REP_FORBIDDEN_AMBIGUOUS=r"en\s+?cas\s+?de\s?besoin.{1,100}(?:contact|app?eler?)|going on leave", + VAC_REP_FORBIDDEN_ABSENCE=( + r"dur[ée]e? ind[ée]termin[ée]e?|" + r"expertise judiciaire|doi.{,35}au plus tard|^.{,50}adresse[rz] vos|en mon absence" + ), + VAC_REP_FORBIDDEN_EMAIL=( + r"pas prise? en compte|chang[ée]r?.{,5}(?:adresse|mail)|" + r"mail.{,15}(?:(in)?acti|pas lu)|acc[eè]s limit[ée]" + ), + VAC_REP_FORBIDDEN_SICKNESS=r"arr?[eê]t de travail|\bmaternit[ée]\b|maladie", + VAC_REP_FORBIDDEN_LEAVE=( + r"\bposte\b.{,50}\bvacant\b|d[ée]finitivement|succe[ée]d[ée]|" + r"quitt[ée]|remplacante?|plus parti.{,25}cabinet|retraite" + ), + VAC_REP_FORBIDDEN_HOME=( + r"^.{,15}(?:devant ).{,5}(?:absente|en cong[ée]s?)|" + r"^.{,15}absente? de (?:mon domicile?|chez|.{,5}maison)" + ), + VAC_REP_FORBIDDEN_CO=r"entreprise|document", + ) + + @property + def match_list(self) -> List[str]: + """ + List of texts that should activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return [ + "en cas d'urgence, vous pouvez m'appeler sur mon mobile au 01 02 03 04 05", + "pour toute urgence, vous pouvez me contacter au 01 02 03 04 05", + "étant actuellement absent, je vous répondrais lors de mon retour le 01/01/2001", + "Je suis en congé, je vous répondrais à mon retour", + "Je suis absent du flag_date au flag_date", + "je prendrais connaissance de votre message dès mon retour", + "I am currently out of office", + "ceci est une réponse automatique", + ] + + @property + def no_match_list(self) -> List[str]: + """ + List of texts that should NOT activate the MelusineRegex. + + Returns: + _: List of texts. + """ + return [ + "Je souhaite une réponse même si je suis actuellement en congé", + "en attendant, je suis en congé, dans l'attente de votre réponse", + "veuillez m'excuser, j'ai oublié de répondre avant de partir en vacances", + "excusez mon retour tardif, j'étais en déplacement professionnel", + "je suis absent pour une durée indéterminée, veuillez contacter Jane", + "je suis actuellement en congé avec un accès limité à ma messagerie", + "je suis en congé maladie", + "je suis en congé maternité", + "Je suis absent car je suis parti en retraite", + ] From ce4bea27b6e00dea89ca5550baeffa5824080cfd Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:33:38 +0100 Subject: [PATCH 07/37] :sparkles: New feature: Utils (Show melusine version) --- melusine/utils/__init__.py | 6 +++ melusine/utils/show_versions.py | 85 +++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 melusine/utils/__init__.py create mode 100644 melusine/utils/show_versions.py diff --git a/melusine/utils/__init__.py b/melusine/utils/__init__.py new file mode 100644 index 0000000..612fb05 --- /dev/null +++ b/melusine/utils/__init__.py @@ -0,0 +1,6 @@ +""" +The melusine.utils module includes utils functionalitites. +""" +from melusine.utils.show_versions import show_versions + +__all__ = ["show_versions"] diff --git a/melusine/utils/show_versions.py b/melusine/utils/show_versions.py new file mode 100644 index 0000000..2317365 --- /dev/null +++ b/melusine/utils/show_versions.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import sys + +from melusine import __version__ as melusine_version + + +def show_versions() -> None: + """ + Print out version of melusine and dependencies to stdout. + + Examples + -------- + >>> melusine.show_versions() # doctest: +SKIP + --------Version info--------- + melusine: 3.0.0 + Platform: Linux-5.15.90.1-microsoft-standard-WSL2-x86_64-with-glibc2.35 + Python: 3.11.3 (main, Apr 15 2023, 14:44:51) [GCC 11.3.0] + \b + ----Optional dependencies---- + numpy: 1.24.2 + pandas: 2.0.0 + pytorch: + """ + # note: we import 'platform' here as a micro-optimisation for initial import + import platform + + # optional dependencies + deps = _get_dependency_info() + + # determine key length for alignment + keylen = max(len(x) for x in [*deps.keys(), "melusine", "Platform", "Python"]) + 1 + + print("--------Version info---------") + print(f"{'melusine:':{keylen}s} {melusine_version}") + print(f"{'Platform:':{keylen}s} {platform.platform()}") + print(f"{'Python:':{keylen}s} {sys.version}") + + print("\n----Optional dependencies----") + for name, v in deps.items(): + print(f"{name:{keylen}s} {v}") + + +def _get_dependency_info() -> dict[str, str]: + """ + Collect information about optional dependencies. + + Returns: + _: Dict of optional dependencies and associated versions. + """ + # See the list of dependencies in pyproject.toml/setup.cfg + opt_deps = [ + "tensorflow", + "torch", + "torchvision", + "torchlib", + "transformers", + ] + return {f"{name}:": _get_dependency_version(name) for name in opt_deps} + + +def _get_dependency_version(dep_name: str) -> str: + """ + Get the version of a dependency. + + Args: + dep_name: Name of the dependency. + + Returns: + _: Dependency version or "" + """ + # import here to optimize the root melusine import + import importlib + + try: + module = importlib.import_module(dep_name) + except ImportError: + return "" + + if hasattr(module, "__version__"): + module_version = module.__version__ + else: + module_version = "<__version__ unavailable>" # pragma: no cover + + return module_version From b4493d7d32704ad41f4af2ce8cb1f7cfd3b0b95a Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:34:26 +0100 Subject: [PATCH 08/37] :sparkles: New feature: MelusinePipeline testing --- melusine/testing/__init__.py | 5 ++ melusine/testing/pipeline_testing.py | 121 +++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 melusine/testing/__init__.py create mode 100644 melusine/testing/pipeline_testing.py diff --git a/melusine/testing/__init__.py b/melusine/testing/__init__.py new file mode 100644 index 0000000..f193126 --- /dev/null +++ b/melusine/testing/__init__.py @@ -0,0 +1,5 @@ +from .pipeline_testing import assert_pipeline_results + +__all__ = [ + "assert_pipeline_results", +] diff --git a/melusine/testing/pipeline_testing.py b/melusine/testing/pipeline_testing.py new file mode 100644 index 0000000..bed25f7 --- /dev/null +++ b/melusine/testing/pipeline_testing.py @@ -0,0 +1,121 @@ +""" +Module that contains utility functions for tests (in /tests). +""" +from typing import Any, Dict + +from melusine.base import MelusineTransformer +from melusine.pipeline import MelusinePipeline + +expected_suffix: str = "_expected" + + +def assert_pipeline_results(email: Dict[str, Any], pipeline_name: str) -> None: + """ + Assert that the pipeline execution result correspond to the testcase expectation. + + Parameters + ---------- + email: Dict[str, Any] + Email content. + pipeline_name: str + Name of a Melusine pipeline. + """ + # Instantiate Pipeline + pipeline: MelusinePipeline = MelusinePipeline.from_config(pipeline_name) + + # Useful fields + test_name: str = email["test_name"] + + # Loop on pipeline transformers + for transformer_name, transformer in pipeline.steps: + email = assert_transformation(email, transformer, transformer_name, test_name) + + # Check that the pipeline returns a dict + assert isinstance(email, dict) + + # Look for untested fields + untested_fields = [x for x in email if x.endswith(expected_suffix)] + assert not untested_fields, f"Field(s) {untested_fields} have not been tested for" f"Test-case '{test_name}'" + + +def assert_transformation( + email: Dict[str, Any], transformer: MelusineTransformer, transformer_name: str, test_name: str +) -> Dict[str, Any]: + """ + + Parameters + ---------- + email: Dict[str, Any] + Email data dict + test_name: str + Name of the current test + transformer: MelusineTransformer + Data transformer instance + transformer_name: str + Name of the current transformer + + Returns + ------- + email: Dict[str, Any] + Transformed email data dict + """ + # Apply transformer on email + email = transformer.transform(email) + + # Do we have an expected value for this transformer? + expected_key = f"{transformer_name}{expected_suffix}" + if expected_key in email: + expectation_dict: Dict[str, Any] = email.pop(expected_key) + + # Loop on columns with an expected value + for col, expected_value in expectation_dict.items(): + # Specific case for the message column + if col.startswith("messages"): + assert_message_attribute(col, email, expected_value, test_name, transformer_name) + + # Regular case + else: + assert email[col] == expected_value, ( + f"Failure for test {test_name} at step {transformer_name}. " + f"Value expected for column {col} : {expected_value}. " + f"Value obtained for column {col} : {email[col]}" + ) + return email + + +def assert_message_attribute( + col: str, email: Dict[str, Any], expected_value: Any, test_name: str, transformer_name: str +) -> None: + """ + + Parameters + ---------- + col: str + Column name + email: Dict[str, Any] + Email data dict + expected_value: Any + Expected message attribute value + test_name: str + Name of the current test + transformer_name: str + Name of the current transformer + + Returns + ------- + + """ + # Check the number of messages in the conversation + assert len(email["messages"]) == len(expected_value) + + # Loop on messages + for message, expected_attr in zip(email["messages"], expected_value): + _, attribute_name = col.rsplit(".") + attribute_value = getattr(message, attribute_name) + + # Test attribute values + assert attribute_value == expected_attr, ( + f"Failure for test {test_name} at step {transformer_name}. " + f"Value expected for attribute {attribute_name} : {expected_attr}. " + f"Value obtained for attribute {attribute_name} : {attribute_value}" + ) From 7752bec5909327955af4ecf7f01c088fbc225ccd Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:35:14 +0100 Subject: [PATCH 09/37] :sparkles: New feature: New melusine base classes --- melusine/base.py | 574 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 574 insertions(+) create mode 100644 melusine/base.py diff --git a/melusine/base.py b/melusine/base.py new file mode 100644 index 0000000..ae220a3 --- /dev/null +++ b/melusine/base.py @@ -0,0 +1,574 @@ +""" +Base classes of the Melusine framework. + +Implemented classes: [ + MelusineTransformer, + MelusineDetector, + MelusineModel, + BaseLabelProcessor, + MissingModelInputFieldError, + MissingFieldError, + MelusineFeatureEncoder +] +""" +from __future__ import annotations + +import copy +import inspect +import logging +import re +from abc import ABC, abstractmethod +from typing import Any, Callable, Dict, Iterable, List, Optional, TypeVar, Union + +import pandas as pd +from sklearn.base import BaseEstimator, TransformerMixin + +from melusine.backend import backend +from melusine.io import IoMixin + +logger = logging.getLogger(__name__) + +# Dataset types supported by Melusine : pandas DataFrame and dicts +MelusineDataset = Union[Dict[str, Any], pd.DataFrame] + +# Corresponding items are: +# - Dataset : Pandas DataFrame => Item : Pandas Series +# - Dataset Dict => Item Dict +MelusineItem = Union[Dict[str, Any], pd.Series] +Transformer = TypeVar("Transformer", bound="MelusineTransformer") + + +class TransformError(Exception): + """ + Exception raised when an error occurs during the transform operation. + """ + + +class MelusineTransformer(BaseEstimator, TransformerMixin, IoMixin): + """ + Define a MelusineTransformer object. + + Is an abstract class. + + It can be a Processor or a Detector. + """ + + def __init__( + self, + input_columns: Union[str, Iterable[str]], + output_columns: Union[str, Iterable[str]], + func: Optional[Callable] = None, + ) -> None: + """ + Attribute initialization. + + Parameters + ---------- + input_columns: Union[str, Iterable[str]] + List of input columns + output_columns: Union[str, Iterable[str]] + List of output columns + func: Callable + Transform function to be applied + """ + IoMixin.__init__(self) + + self.input_columns: List[str] = self.parse_column_list(input_columns) + self.output_columns: List[str] = self.parse_column_list(output_columns) + self.func = func + + @staticmethod + def parse_column_list(columns: Union[str, Iterable[str]]) -> List[str]: + """ + Transform a string into a list with a single element. + + Parameters + ---------- + columns: Union[str, Iterable[str]] + String or list of strings with column name(s). + + Returns + ------- + _: List[str] + A list of column names. + """ + # Change string into list of strings if necessary + # "body" => ["body] + if isinstance(columns, str): + columns = [columns] + return list(columns) + + def transform(self, data: MelusineDataset) -> MelusineDataset: + """ + Transform input data. + + Parameters + ---------- + data: MelusineDataset + Input data. + + Returns + ------- + _: MelusineDataset + Transformed data (output). + """ + if self.func is None: + raise AttributeError(f"Attribute func of MelusineTransformer {type(self).__name__} should not be None") + try: + return backend.apply_transform( + data=data, input_columns=self.input_columns, output_columns=self.output_columns, func=self.func + ) + + except Exception as exception: + func_name = self.func.__name__ + class_name = type(self).__name__ + input_columns = self.input_columns + raise TransformError( + f"Error in class: '{class_name}' " + f"with method '{func_name}' " + f"input_columns: {input_columns}\n" + f"{str(exception)}" + ).with_traceback(exception.__traceback__) from exception + + +class BaseMelusineDetector(MelusineTransformer, ABC): + """ + Used to define detectors. + + Template Method str based on the MelusineTransformer class. + """ + + def __init__( + self, + name: str, + input_columns: List[str], + output_columns: List[str], + ): + """ + Attributes initialization. + + Parameters + ---------- + name: str + Name of the detector. + input_columns: + Detector input columns. + output_columns: + Detector output columns. + """ + # self.name needs to be set before the super class init + # Name is used to build the output_columns + self.name = name + + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + ) + + @property + def debug_dict_col(self) -> str: + """ + Standard name for the column containing the debug info. + + Typically, a detector may return the following outputs: + - output_result_col: bool + > Ex: thanks_result: True + - output_value_col: Any + > Ex: thanks_output: "Remerciement plat" + - output_score_col: float + > Ex: thanks_score: 0.95 + - (debug) debug_dict_col: Dict[str, Any] + > Ex: debug_thanks: {"thanks_text": "Merci"} + """ + return f"debug_{self.name}" + + @property + @abstractmethod + def transform_methods(self) -> List[Callable]: + """ + Specify the sequence of methods to be called by the transform method. + + Returns + ------- + _: List[Callable] + List of methods to be called by the transform method. + """ + + def transform(self, df: MelusineDataset) -> MelusineDataset: + """ + Re-definition of super().transform() => specific detector's implementation + + Transform input data. + + Parameters + ---------- + df: MelusineDataset + Input data. + + Returns + ------- + _: MelusineDataset + Transformed data (output). + """ + # Debug mode ON? + debug_mode: bool = backend.check_debug_flag(df) + + # Validate fields of the input data + self.validate_input_fields(df) + + # Work on a copy of the DataFrame and limit fields to effective input columns + # data_ = backend.copy(data, fields=self.input_columns) + + # Work on a copy of the DataFrame and keep all columns + # (too complex to handle model input columns) + data_ = backend.copy(df) + + # Get list of new columns created by the detector + return_cols = copy.deepcopy(self.output_columns) + + # Create debug data dict + if debug_mode: + data_ = backend.setup_debug_dict(data_, dict_name=self.debug_dict_col) + return_cols.append(self.debug_dict_col) + + for method in self.transform_methods: + first_arg_name: str = list(inspect.signature(method).parameters)[0] + + if first_arg_name == "row": + # Run row-wise method + data_ = backend.apply_transform( + data=data_, input_columns=None, output_columns=None, func=method, debug_mode=debug_mode + ) + else: + data_ = method(data_, debug_mode=debug_mode) + + # Add new fields to the original MelusineDataset + data = backend.add_fields(left=df, right=data_, fields=return_cols) + + return data + + def validate_input_fields(self, data: MelusineDataset) -> None: + """ + Make sure that all the required input fields are present. + + Parameters + ---------- + data: MelusineDataset + Input data. + """ + input_fields: List[str] = backend.get_fields(data) + missing_fields: List[str] = [x for x in self.input_columns if x not in input_fields] + if missing_fields: + raise MissingFieldError(f"Fields {missing_fields} are missing from the input data") + + +class MelusineDetector(BaseMelusineDetector, ABC): + """ + Defines an interface for detectors. + All detectors used in a MelusinePipeline should inherit from the MelusineDetector class and + implement the abstract methods. + This ensures homogeneous coding style throughout the application. + Alternatively, melusine user's can define their own Interface (inheriting from the BaseMelusineDetector) + to suit their needs. + """ + + @property + def transform_methods(self) -> List[Callable]: + """ + Specify the sequence of methods to be called by the transform method. + + Returns + ------- + _: List[Callable] + List of methods to be called by the transform method. + """ + return [self.pre_detect, self.detect, self.post_detect] + + @abstractmethod + def pre_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """What needs to be done before detection.""" + + @abstractmethod + def detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """Run detection.""" + + @abstractmethod + def post_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """What needs to be done after detection (e.g., mapping columns).""" + + +class MissingFieldError(Exception): + """ + Exception raised when a missing field is encountered by a MelusineTransformer + """ + + +class MelusineRegex(ABC): + """ + Class to standardise text pattern detection using regex. + """ + + REGEX_FLAGS: re.RegexFlag = re.IGNORECASE | re.MULTILINE + + # Match fields + MATCH_RESULT: str = "match_result" + NEUTRAL_MATCH_FIELD: str = "neutral_match_data" + POSITIVE_MATCH_FIELD: str = "positive_match_data" + NEGATIVE_MATCH_FIELD: str = "negative_match_data" + + # Match data + MATCH_START: str = "start" + MATCH_STOP: str = "stop" + MATCH_TEXT: str = "match_text" + + def __init__(self, substitution_pattern: str = " ", default_match_group: str = "DEFAULT"): + if not isinstance(substitution_pattern, str) or (len(substitution_pattern) > 1): + raise ValueError( + f"Parameter substitution_pattern should be a string of length 1, not {substitution_pattern}" + ) + self.substitution_pattern = substitution_pattern + self.default_match_group = default_match_group + + @property + def regex_name(self) -> str: + """ + Name of the Melusine regex object. + Defaults to the class name. + """ + return getattr(self, "_regex_name", type(self).__name__) + + @property + @abstractmethod + def positive(self) -> Union[Dict[str, str], str]: + """ + Define regex patterns required to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + + @property + def neutral(self) -> Optional[Union[Dict[str, str], str]]: + """ + Define regex patterns to be ignored when running detection. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return None + + @property + def negative(self) -> Optional[Union[Dict[str, str], str]]: + """ + Define regex patterns prohibited to activate the MelusineRegex. + + Returns: + _: Regex pattern or dict of regex patterns. + """ + return None + + @property + @abstractmethod + def match_list(self) -> List[str]: + """ + List of texts that should activate the MelusineRegex. + + Returns: + _: List of texts. + """ + + @property + @abstractmethod + def no_match_list(self) -> List[str]: + """ + List of texts that should NOT activate the MelusineRegex. + + Returns: + _: List of texts. + """ + + def _get_match( + self, text: str, base_regex: Union[str, Dict[str, str]], regex_group: Optional[str] = None + ) -> Dict[str, List[Dict[str, Any]]]: + """ + Run specified regex on the input text and return a dict with matching group as key. + + Args: + text: Text to apply regex on. + base_regex: Regex to apply on text. + regex_group: Name of the group the regex belongs to. + + Returns: + Dict of regex matches for each regex group. + """ + match_data_dict = {} + + if regex_group is None: + regex_group = self.default_match_group + + if isinstance(base_regex, dict): + for group, regex in base_regex.items(): + group_match_data = self._get_match(text, regex, group) + match_data_dict.update(group_match_data) + else: + for match in re.finditer(base_regex, text, flags=self.REGEX_FLAGS): + if not match_data_dict.get(regex_group): + match_data_dict[regex_group] = [] + + # Get match position + start, stop = match.span() + + match_data_dict[regex_group].append( + { + self.MATCH_START: start, + self.MATCH_STOP: stop, + self.MATCH_TEXT: text[start:stop], + } + ) + + return match_data_dict + + def ignore_text( + self, + text: str, + match_data_dict: Dict[str, List[Dict[str, Any]]], + ) -> str: + """ + Replace neutral regex match text with substitution text to ignore it. + + Args: + text: Input text. + match_data_dict: Regex match results. + + Returns: + _: Text with substituions. + """ + for _, match_list in match_data_dict.items(): + for match_data in match_list: + start = match_data[self.MATCH_START] + stop = match_data[self.MATCH_STOP] + + # Mask text to ignore + text = text[:start] + self.substitution_pattern * (stop - start) + text[stop:] + + return text + + def get_match_result(self, text: str) -> bool: + """ + Apply MelusineRegex patterns (neutral, negative and positive) on the input text. + Return a boolean output of the match result. + + Args: + text: input text. + + Returns: + _: True if the MelusineRegex matches the input text. + """ + result = self(text) + return result[self.MATCH_RESULT] + + def __call__(self, text: str) -> Dict[str, Any]: + """ + Apply MelusineRegex patterns (neutral, negative and positive) on the input text. + Return a detailed output of the match results as a dict. + + Args: + text: input text. + + Returns: + _: Regex match results. + """ + match_dict = { + self.MATCH_RESULT: False, + self.NEUTRAL_MATCH_FIELD: {}, + self.NEGATIVE_MATCH_FIELD: {}, + self.POSITIVE_MATCH_FIELD: {}, + } + + negative_match = False + + if self.neutral: + neutral_match_data = self._get_match(text=text, base_regex=self.neutral) + match_dict[self.NEUTRAL_MATCH_FIELD] = neutral_match_data + + text = self.ignore_text(text, neutral_match_data) + + if self.negative: + negative_match_data = self._get_match(text=text, base_regex=self.negative) + negative_match = bool(negative_match_data) + match_dict[self.NEGATIVE_MATCH_FIELD] = negative_match_data + + positive_match_data = self._get_match(text=text, base_regex=self.positive) + positive_match = bool(positive_match_data) + match_dict[self.POSITIVE_MATCH_FIELD] = positive_match_data + + match_dict[self.MATCH_RESULT] = positive_match and not negative_match + + return match_dict + + def describe(self, text: str, position: bool = False) -> None: + """ + User-friendly description of the regex match results. + + Args: + text: Input text. + position: If True, print regex match start and stop positions. + """ + + def _describe_match_field(match_field_data: Dict[str, List[Dict[str, Any]]]) -> None: + """ + Format and print result description text. + + Args: + match_field_data: Regex match result for a given field. + """ + for group, match_list in match_field_data.items(): + for match_dict in match_list: + print(f"{indent}({group}) {match_dict[self.MATCH_TEXT]}") + if position: + print(f"{indent}start: {match_dict[self.MATCH_START]}") + print(f"{indent}stop: {match_dict[self.MATCH_STOP]}") + + indent = " " * 4 + match_data = self(text) + + if match_data[self.MATCH_RESULT]: + print("The MelusineRegex match result is : POSITIVE") + else: + print("The MelusineRegex match result is : NEGATIVE") + + if not any( + [ + match_data[self.NEUTRAL_MATCH_FIELD], + match_data[self.NEGATIVE_MATCH_FIELD], + match_data[self.POSITIVE_MATCH_FIELD], + ] + ): + print("The input text did not match anything.") + + if match_data[self.NEUTRAL_MATCH_FIELD]: + print("The following text was ignored:") + _describe_match_field(match_data[self.NEUTRAL_MATCH_FIELD]) + + if match_data[self.NEGATIVE_MATCH_FIELD]: + print("The following text matched negatively:") + _describe_match_field(match_data[self.NEGATIVE_MATCH_FIELD]) + + if match_data[self.POSITIVE_MATCH_FIELD]: + print("The following text matched positively:") + _describe_match_field(match_data[self.POSITIVE_MATCH_FIELD]) + + def test(self) -> None: + """ + Test the MelusineRegex on the match_list and no_match_list. + """ + for text in self.match_list: + match = self(text) + assert match[self.MATCH_RESULT] is True, f"Expected match for text\n{text}\nObtained: {match}" + + for text in self.no_match_list: + match = self(text) + assert match[self.MATCH_RESULT] is False, f"Expected no match for text:\n{text}\nObtained: {match}" + + def __repr__(self) -> str: + return f"{type(self).__name__}(positive:{self.positive},neutral:{self.neutral},negative:{self.negative})" From 62cbd2e6f6292ad61cba71abec3b110cdc3cb5fa Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:35:56 +0100 Subject: [PATCH 10/37] :sparkles: New feature: Builtin MelusineDetectors --- melusine/detectors.py | 683 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 683 insertions(+) create mode 100644 melusine/detectors.py diff --git a/melusine/detectors.py b/melusine/detectors.py new file mode 100644 index 0000000..fec6b31 --- /dev/null +++ b/melusine/detectors.py @@ -0,0 +1,683 @@ +""" +Classes of detectors. + +Implemented classes: [ThanksDetector, VacationReplyDetector, ExpeditorDetector, +ReplyDetector, TransferDetector, RecipientsDetector] + +""" +from typing import Any, Dict, List, Tuple + +from melusine.base import MelusineDetector, MelusineItem, MelusineRegex +from melusine.message import Message +from melusine.regex import EmergencyRegex, ReplyRegex, ThanksRegex, TransferRegex, VacationReplyRegex + + +class ThanksDetector(MelusineDetector): + """ + Class to detect emails containing only thanks text. + + Ex: + Merci à vous, + Cordialement + """ + + # Class constants + BODY_PART: str = "BODY" + THANKS_PART: str = "THANKS" + GREETINGS_PART: str = "GREETINGS" + + # Intermediate columns + THANKS_TEXT_COL: str = "thanks_text" + THANKS_PARTS_COL: str = "thanks_parts" + HAS_BODY: str = "has_body" + THANKS_MATCH_COL: str = "thanks_match" + + def __init__( + self, + messages_column: str = "messages", + name: str = "thanks", + ) -> None: + """ + Attributes initialization. + + Parameters + ---------- + messages_column: str + Name of the column containing the messages. + + name: str + Name of the detector. + """ + + # Input columns + self.messages_column = messages_column + input_columns: List[str] = [self.messages_column] + + # Output columns + self.result_column = f"{name}_result" + output_columns: List[str] = [self.result_column] + + # Detection regex + self.thanks_regex: MelusineRegex = ThanksRegex() + + super().__init__( + name=name, + input_columns=input_columns, + output_columns=output_columns, + ) + self.complex_regex_key: str + + def pre_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Extract text to analyse. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + # Check if a BODY part is present in the last message + has_body: bool = row[self.messages_column][0].has_tags( + target_tags={self.BODY_PART}, stop_at={self.GREETINGS_PART} + ) + + # Extract the THANKS part in the last message + thanks_parts: List[Tuple[str, str]] = row[self.messages_column][0].extract_parts(target_tags={self.THANKS_PART}) + + # Compute THANKS text + if not thanks_parts: + thanks_text: str = "" + else: + thanks_text = "\n".join(x[1] for x in thanks_parts) + + # Save debug data + if debug_mode: + debug_dict = { + self.THANKS_PARTS_COL: thanks_parts, + self.THANKS_TEXT_COL: thanks_text, + self.HAS_BODY: has_body, + } + row[self.debug_dict_col].update(debug_dict) + + # Create new columns + row[self.THANKS_TEXT_COL] = thanks_text + row[self.HAS_BODY] = has_body + + return row + + def detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Use regex to detect thanks. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + debug_info: Dict[str, Any] = {} + + text: str = row[self.THANKS_TEXT_COL] + + detection_data = self.thanks_regex(text) + detection_result = detection_data[self.thanks_regex.MATCH_RESULT] + + # Save debug data + if debug_mode: + debug_info[self.thanks_regex.regex_name] = detection_data + row[self.debug_dict_col].update(debug_info) + + # Create new columns + row[self.THANKS_MATCH_COL] = detection_result + + return row + + def post_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Apply final eligibility rules. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + + # Match on thanks regex & Does not contain a body + row[self.result_column] = row[self.THANKS_MATCH_COL] and not row[self.HAS_BODY] + + return row + + +class VacationReplyDetector(MelusineDetector): + """ + Implement a detector which detects automatic response message like vacation or out of office replies. + """ + + # Class constants + CONST_TEXT_COL_NAME: str = "vacation_reply_text" + CONST_DEBUG_TEXT_KEY: str = "text" + CONST_DEBUG_PARTS_KEY: str = "parts" + + def __init__( + self, + name: str, + messages_column: str = "messages", + ) -> None: + """ + Attributes initialization. + + Parameters + ---------- + name: str + Detector's name. + messages_column: str + Name of the column containing the messages. + """ + self.messages_column = messages_column + + # Detection regex + self.vacation_reply_regex: MelusineRegex = VacationReplyRegex() + + # Input columns + input_columns: List[str] = [messages_column] + + # Output columns + self.result_column = f"{name}_result" + output_columns: List[str] = [self.result_column] + + super().__init__( + name=name, + input_columns=input_columns, + output_columns=output_columns, + ) + + def pre_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Extract/prepare the text to analyse. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + # Last message body + last_message: Message = row[self.messages_column][0] + body_parts = last_message.extract_last_body() + + if body_parts: + row[self.CONST_TEXT_COL_NAME] = "\n".join(text for tag, text in body_parts) + else: + row[self.CONST_TEXT_COL_NAME] = "" + + # Prepare and save debug data + if debug_mode: + debug_dict: Dict[str, Any] = { + self.CONST_DEBUG_TEXT_KEY: row[self.CONST_TEXT_COL_NAME], + } + if self.messages_column: + debug_dict[self.CONST_DEBUG_PARTS_KEY] = body_parts + row[self.debug_dict_col].update(debug_dict) + + return row + + def detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Use regex to detect if an email is an automatic response like an Out of office or Vacation reply. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + debug_info: Dict[str, Any] = {} + + text: str = row[self.CONST_TEXT_COL_NAME] + + detection_data = self.vacation_reply_regex(text) + detection_result = detection_data[self.vacation_reply_regex.MATCH_RESULT] + + # Save debug data + if debug_mode: + debug_info[self.vacation_reply_regex.regex_name] = detection_data + row[self.debug_dict_col].update(debug_info) + + row[self.result_column] = detection_result + + return row + + def post_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Apply final eligibility rule. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + return row + + +class ReplyDetector(MelusineDetector): + """ + The ReplyDetector detects if an email is a reply. + + If the header of the email starts with "re", it returns True. + If not, it returns False. + """ + + # class constant + CONST_ANALYSED_TEXT_COL: str = "reply_text" + + def __init__( + self, + name: str, + header_column: str = "clean_header", + ) -> None: + """ + Attributes initialization. + + Parameters + ---------- + name: str + Name given to the detector. + header_column: [str] + Name of the column used for the email header. + """ + # Set instance attributes + self.header_column = header_column + + # Detection regex + self.reply_regex: MelusineRegex = ReplyRegex() + + # Input columns + input_columns: List[str] = [self.header_column] + + # Output columns + self.result_column = f"{name}_result" + output_columns: List[str] = [self.result_column] + + super().__init__( + name=name, + input_columns=input_columns, + output_columns=output_columns, + ) + + def pre_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Retrieve text to analyze. + + Log debug information if debug_mode is True. + + Parameters + ---------- + row: MelusineItem + Data of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + # Retrieve text to be analysed + row[self.CONST_ANALYSED_TEXT_COL] = row[self.header_column].lower() + + # Store debug infos + if debug_mode: + debug_dict = { + self.CONST_ANALYSED_TEXT_COL: row[self.CONST_ANALYSED_TEXT_COL], + } + row[self.debug_dict_col].update(debug_dict) + + return row + + def detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Check if a header starts with "RE:". + + Parameters + ---------- + row: MelusineItem + Data of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + debug_info: Dict[str, Any] = {} + + text: str = row[self.CONST_ANALYSED_TEXT_COL] + + detection_data = self.reply_regex(text) + detection_result = detection_data[MelusineRegex.MATCH_RESULT] + + # Save debug data + if debug_mode: + debug_info[self.reply_regex.regex_name] = detection_data + row[self.debug_dict_col].update(debug_info) + + row[self.result_column] = detection_result + + return row + + def post_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Apply final eligibility rules. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + # No implementation needed + return row + + +class TransferDetector(MelusineDetector): + """ + The TransferDetector detects if an email is a transfer. + It returns True if the header starts with "tr:", "fwd:" of if the meta is not empty. + """ + + # class constant + CONST_ANALYSED_TEXT_COL: str = "reply_text" + + # Debug columns + CONST_DEBUG_MESSAGE_META: str = "messages[0].meta" + + def __init__( + self, + name: str, + header_column: str = "clean_header", + messages_column: str = "messages", + ) -> None: + """ + Attributes initialization. + + Parameters + ---------- + name: str + Name given to the detector. + header_column: [str] + Name of the column used for the email header. + messages_column: [str] + Name of the column used for the message. + """ + # Set instance attributes + self.header_column = header_column + self.messages_column = messages_column + self.transfer_regex: MelusineRegex = TransferRegex() + + # Input columns + input_columns: List[str] = [self.header_column, self.messages_column] + + # Output columns + self.result_column = f"{name}_result" + output_columns: List[str] = [self.result_column] + + super().__init__( + name=name, + input_columns=input_columns, + output_columns=output_columns, + ) + + def pre_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Retrieve text to analyze. + + Log debug information if debug_mode is True. + + Parameters + ---------- + row: MelusineItem + Data of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + row[self.CONST_ANALYSED_TEXT_COL] = row[self.header_column].lower() + + # Store debug infos + if debug_mode: + debug_dict = { + self.CONST_ANALYSED_TEXT_COL: row[self.CONST_ANALYSED_TEXT_COL], + } + row[self.debug_dict_col].update(debug_dict) + + return row + + def detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Check if a header starts with "tr: , fwd:". + or if the body begins with metadata (e.g, From: , To:, Subject:, etc.) + + Parameters + ---------- + row: MelusineItem + Data of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + debug_info: Dict[str, Any] = {} + + text: str = row[self.CONST_ANALYSED_TEXT_COL] + meta: str = row[self.messages_column][0].meta + + detection_data = self.transfer_regex(text) + detection_result = detection_data[MelusineRegex.MATCH_RESULT] + + # Save debug data + if debug_mode: + debug_info[self.transfer_regex.regex_name] = detection_data + debug_info[self.CONST_DEBUG_MESSAGE_META] = meta + row[self.debug_dict_col].update(debug_info) + + row[self.result_column] = detection_result or meta != "" + + return row + + def post_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Apply final eligibility rules. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + # No implementation needed + return row + + +class EmergencyDetector(MelusineDetector): + """ + Implement a detector which detects automatic response message like vacation or out of office replies. + """ + + # Class constants + CONST_TEXT_COL_NAME: str = "effective_text" + CONST_DEBUG_TEXT_KEY: str = "text" + + def __init__( + self, + name: str, + header_column: str = "header", + text_column: str = "det_normalized_last_body", + ) -> None: + """ + Attributes initialization. + + Parameters + ---------- + name: str + Detector's name. + header_column: str + Name of the column containing the text of the email. + """ + self.header_column = header_column + self.text_column = text_column + + # Detection regex + self.regex: MelusineRegex = EmergencyRegex() + + # Input columns + input_columns: List[str] = [header_column, text_column] + + # Output columns + self.result_column = f"{name}_result" + output_columns: List[str] = [self.result_column] + + super().__init__( + name=name, + input_columns=input_columns, + output_columns=output_columns, + ) + + def pre_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Extract/prepare the text to analyse. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + # Last message body + message_text: str = row[self.text_column] + header: str = row[self.header_column] + + row[self.CONST_TEXT_COL_NAME] = "\n".join([header, message_text]) + + # Prepare and save debug data + if debug_mode: + debug_dict: Dict[str, Any] = { + self.CONST_DEBUG_TEXT_KEY: row[self.CONST_TEXT_COL_NAME], + } + row[self.debug_dict_col].update(debug_dict) + + return row + + def detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Apply regex on the effective text. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + debug_info: Dict[str, Any] = {} + + text: str = row[self.CONST_TEXT_COL_NAME] + + detection_data = self.regex(text) + detection_result = detection_data[self.regex.MATCH_RESULT] + + # Save debug data + if debug_mode: + debug_info[self.regex.regex_name] = detection_data + row[self.debug_dict_col].update(debug_info) + + row[self.result_column] = detection_result + + return row + + def post_detect(self, row: MelusineItem, debug_mode: bool = False) -> MelusineItem: + """ + Apply final eligibility rule. + + Parameters + ---------- + row: MelusineItem + Content of an email. + debug_mode: bool + Debug mode activation flag. + + Returns + ------- + row: MelusineItem + Updated row. + """ + return row From 19b6b7b16000a0352aacd0b41f35e32fa2b12363 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:36:39 +0100 Subject: [PATCH 11/37] :sparkles: New feature: Message class --- melusine/message.py | 233 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 melusine/message.py diff --git a/melusine/message.py b/melusine/message.py new file mode 100644 index 0000000..08af8dc --- /dev/null +++ b/melusine/message.py @@ -0,0 +1,233 @@ +""" +Data container class for email. + +An email body can contain many "messages". + +Implemented classes: [Message] +""" +import re +from datetime import datetime +from typing import Iterable, List, Optional, Tuple + +from melusine import config + + +class Message: + """ + Class acting as a data container for email data (text, meta and features) + """ + + DEFAULT_STR_LINE_LENGTH = 120 + DEFAULT_STR_TAG_NAME_LENGTH = 22 + + def __init__( + self, + text: str, + header: str = "", + meta: str = "", + date: Optional[datetime] = None, + text_from: str = "", + text_to: Optional[str] = None, + tags: Optional[List[Tuple[str, str]]] = None, + ): + """ + Attributes initialization. + + Parameters + ---------- + text: str + Message text content. + header: str + Message text header. + meta: str + Message raw metadata. + date: datetime + Message date. + text_from: str + Email sender. + text_to: str + Email receiver. + tags: List[Tuple[str, str]] + Tagged test parts. + (should be passed as init argument for debug purposes only) + """ + self.text = text + self.header = header + self.meta = meta + self.date = date + self.text_from = text_from + self.text_to = text_to + + self.tags = tags + self.clean_header: str = "" + self.clean_text: str = "" + + @property + def str_tag_name_length(self) -> int: + """ + When printing a message, number of characters for the TAG field. + """ + if "message" not in config: + return self.DEFAULT_STR_TAG_NAME_LENGTH + else: + return config["message"].get("str_tag_name_length", self.DEFAULT_STR_TAG_NAME_LENGTH) + + @property + def str_line_length(self) -> int: + """ + When printing a message, total number of characters in each line (text + separation + tag). + """ + if "message" not in config: + return self.DEFAULT_STR_LINE_LENGTH + else: + return config["message"].get("str_line_length", self.DEFAULT_STR_LINE_LENGTH) + + def extract_parts(self, target_tags: Iterable[str] = None, stop_at: Iterable[str] = None) -> List[Tuple[str, str]]: + """ + Function to extract target tags from the message. + + Parameters + ---------- + target_tags: + Tags to be extracted. + stop_at: + Tags for which extraction should stop. + + Returns + ------- + _: List[Tuple[str, str]] + List of extracted tags. + """ + if not self.tags: + return [] + + # List of tags in the message + tag_name_list: List[str] = [x[0] for x in self.tags] + + if target_tags is None: + target_tags = tag_name_list + + # When stop tags are specified, work on a restricted message + # (Ex: All tags until GREETINGS) + if stop_at: + upper_bound: int = len(tag_name_list) + for tag_name in stop_at: + if tag_name in tag_name_list: + upper_bound = min(upper_bound, tag_name_list.index(tag_name)) + # Restrict message + effective_tags = self.tags[:upper_bound] + else: + effective_tags = self.tags + + return [x for x in effective_tags if x[0] in target_tags] + + def extract_last_body( + self, target_tags: Iterable[str] = ("BODY",), stop_at: Iterable[str] = ("GREETINGS",) + ) -> List[Tuple[str, str]]: + """ + Extract the BODY parts of the last message in the email. + + Parameters + ---------- + target_tags: Iterable[str] + stop_at: Iterable[str] + + Returns + ------- + _: List[Tuple[str, str]] + """ + return self.extract_parts(target_tags=target_tags, stop_at=stop_at) + + def has_tags( + self, + target_tags: Iterable[str] = ("BODY",), + stop_at: Optional[Iterable[str]] = None, + ) -> bool: + """ + Function to check if input tags are present in the message. + + Parameters + ---------- + target_tags: + Tags of interest. + stop_at: + Tags for which extraction should stop. + + Returns + ------- + _: bool + True if target tags are present in the message. + """ + if self.tags is None: + return False + + if not stop_at: + stop_at = set() + + found: bool = False + for tag, _ in self.tags: + # Check if tag in tags of interest + if tag in target_tags: + found = True + break + + # Stop when specified tag is reached + if tag in stop_at: + break + + return found + + def format_tags(self) -> str: + """ + Create a pretty formatted representation of text and their associated tags. + + Returns: + _: Pretty formatted representation of the tags and texts. + """ + if self.tags is None: + return self.text + else: + tag_text_length = self.str_line_length - self.str_tag_name_length + text = "" + for tag_name, tag_text in self.tags: + text += tag_text.ljust(tag_text_length, ".") + tag_name.rjust(self.str_tag_name_length, ".") + "\n" + + return text.strip() + + def __repr__(self) -> str: + """ + String representation. + + Returns + ------- + _: str + Readable representation of the Message. + """ + if self.meta: + meta = re.sub(r"\n+", r"\n", self.meta).strip("\n ") + else: + meta = "NA" + text: str = re.sub(r"\n+", r"\n", self.text) + return f"Message(meta={repr(meta)}, text={repr(text)})" + + def __str__(self) -> str: + """ + Repr representation. + + Returns + ------- + _: str + Readable representation of the Message. + """ + title_len = 22 + fill_len = (self.str_line_length - title_len) // 2 + + text = "" + text += f"{'='*fill_len}{'Message':^{title_len}}{'='*fill_len}\n" + text += f"{'-'*fill_len}{'Meta':^{title_len}}{'-'*fill_len}\n" + text += f"{self.meta or 'N/A'}\n" + text += f"{'-'*fill_len}{'Text':^{title_len}}{'-'*fill_len}\n" + text += self.format_tags() + "\n" + text += f"{'='*fill_len}{'=' * title_len}{'='*fill_len}\n\n" + + return text From 575c5247fe59c6734282435679cffbb0ecee5155 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:37:06 +0100 Subject: [PATCH 12/37] :sparkles: New feature: MelusinePipeline --- melusine/pipeline.py | 402 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 402 insertions(+) create mode 100644 melusine/pipeline.py diff --git a/melusine/pipeline.py b/melusine/pipeline.py new file mode 100644 index 0000000..f714c0b --- /dev/null +++ b/melusine/pipeline.py @@ -0,0 +1,402 @@ +""" +This module contains classes for the MelusinePipeline object. + +Implemented classes: [PipelineConfigurationError, MelusinePipeline] +""" +from __future__ import annotations + +import copy +import importlib +from typing import Dict, Iterable, List, Optional, Set, Tuple, TypeVar + +from sklearn.pipeline import Pipeline + +from melusine import config +from melusine.backend import backend +from melusine.backend.base_backend import Any +from melusine.base import MelusineTransformer +from melusine.io import IoMixin + +T = TypeVar("T") + + +class PipelineConfigurationError(Exception): + """ + Error raised when an error is found in the pipeline configuration. + """ + + +class MelusinePipeline(Pipeline): + """ + This class defines and executes data transformation. + + The MelusinePipeline is built on top of sklearn Pipelines. + """ + + OBJ_NAME: str = "name" + OBJ_KEY: str = "config_key" + OBJ_PARAMS: str = "parameters" + STEPS_KEY: str = "steps" + OBJ_CLASS: str = "class_name" + OBJ_MODULE: str = "module" + + def __init__( + self, + steps: List[Tuple[str, MelusineTransformer]], + memory: Optional[bool] = None, + verbose: bool = False, + ) -> None: + """ + Initialize attributes. + + Parameters + ---------- + steps: List[Tuple[str, MelusineTransformer]] + List of the pipeline steps. + memory: bool + If True, cache invariant transformers when running grid searches. + verbose: bool + Verbose mode. + """ + Pipeline.__init__(self, steps=steps, memory=memory, verbose=verbose) + + self.memory = memory + self.verbose = verbose + + @property + def input_columns(self) -> List[str]: + """ + Input fields of the Pipeline. + + Returns + ------- + _: List[str] + List of input fields. + """ + column_set: Set[str] = set() + for _, step in self.steps: + # UNION between sets + column_set |= set(step.input_columns) + + return list(column_set) + + @property + def output_columns(self) -> List[str]: + """ + Output fields of the Pipeline. + + Returns + ------- + _: List[str] + List of output fields. + """ + column_set: Set[str] = set() + for _, step in self.steps: + column_set |= set(step.output_columns) + + return list(column_set) + + @classmethod + def get_obj_class(cls, obj_params: Dict[str, Any]) -> Any: + """ + Get the class object of an instance. + + Parameters + ---------- + obj_params: Dict[str, Any]. + + Returns + ------- + _: Any + Class object. + """ + obj_class_name = obj_params.pop(cls.OBJ_CLASS) + obj_module = obj_params.pop(cls.OBJ_MODULE) + + obj_class = MelusinePipeline.import_class(obj_class_name, obj_module) + + return obj_class + + @staticmethod + def import_class(obj_class_name: str, obj_module: str) -> Any: + """ + Method to import a class dynamically. + + Parameters + ---------- + obj_class_name: str + Name of the object to be imported. + obj_module: str + Name of the module containing the object to be imported. + + Returns + ------- + _: Any + Class object. + """ + # Import object class from name and module + module = importlib.import_module(obj_module) + if not hasattr(module, obj_class_name): + raise AttributeError(f"Object `{obj_class_name}` cannot be loaded from module `{module}`.") + obj_class = getattr(module, obj_class_name) + return obj_class + + @classmethod + def flatten_pipeline_config(cls, conf: Dict[str, Any]) -> Dict[str, Any]: + """ + Flatten nested Melusine Pipelines. + + This makes it easier for the rest of the processing. + + Parameters + ---------- + conf: Dict[str, Any] + Base pipeline conf possibly containing nested pipelines. + + Returns + ------- + _: Dict[str, Any] + Flattened conf. + """ + new_conf: List[Any] = list() + for step in conf[cls.STEPS_KEY]: + if step.get(cls.OBJ_CLASS, "") == cls.__name__: + subpipeline_conf = cls.flatten_pipeline_config(step["parameters"]) + new_conf.extend(subpipeline_conf[cls.STEPS_KEY]) + else: + new_conf.append(step) + conf[cls.STEPS_KEY] = new_conf + + return conf + + @classmethod + def from_config( + cls, config_key: Optional[str] = None, config_dict: Optional[Dict[str, Any]] = None, **kwargs: Any + ) -> MelusinePipeline: + """ + Instantiate a MelusinePipeline from a config key. + + Parameters + ---------- + config_key: str + Key of the pipeline configuration. + config_dict: dict + Dict containing the pipeline configuration. + + Returns + ------- + _: MelusinePipeline + Pipeline instance. + """ + init_params = dict() + + # Get config dict + if config_key and not config_dict: + raw_config_dict = config[config_key] + config_dict = cls.parse_pipeline_config(raw_config_dict) + + elif config_dict and not config_key: + config_dict = cls.parse_pipeline_config(config_dict) + else: + raise ValueError("You should specify one and only one of 'config_key' and 'config_dict'") + + # Prepare step list + steps = list() + + # Load steps meta data + steps_meta = config_dict.pop(cls.STEPS_KEY) + + # Instantiate transformers + for obj_meta in steps_meta: + # Step name + step_name: str = obj_meta.pop(cls.OBJ_NAME, None) + + # Step class + obj_class = cls.get_obj_class(obj_meta) + + # Step arguments + obj_params = obj_meta[cls.OBJ_PARAMS] + + if issubclass(obj_class, IoMixin): + obj = obj_class.from_config(config_dict=obj_params) + else: + raise TypeError(f"Object {obj_class} does not inherit from the SaverMixin class") # pragma: no cover + + # Add step to pipeline + steps.append((step_name, obj)) + + # Init params + init_params.update(config_dict) + init_params.update(kwargs) + + # Instantiate MelusinePipeline object + return cls(steps=steps, **init_params) + + @classmethod + def validate_step_config(cls, step: Dict[str, Any]) -> Dict[str, Any]: + """ + Validate a pipeline step configuration. + + Parameters + ---------- + step: Dict with a pipeline step configuration + + Returns + ------- + _: Validated pipeline step configuration. + """ + if not step.get(cls.OBJ_CLASS) or not step.get(cls.OBJ_MODULE): + raise PipelineConfigurationError( + f"Pipeline step conf should have a {cls.OBJ_MODULE} key and a {cls.OBJ_CLASS} key." + ) + + if step.get(cls.OBJ_KEY): + return { + cls.OBJ_CLASS: step[cls.OBJ_CLASS], + cls.OBJ_MODULE: step[cls.OBJ_MODULE], + cls.OBJ_KEY: step[cls.OBJ_KEY], + } + + if not step.get(cls.OBJ_NAME) or not step.get(cls.OBJ_PARAMS): + raise PipelineConfigurationError( + f"Pipeline step conf should have a {cls.OBJ_NAME} key and a {cls.OBJ_KEY} key " + f"(unless a {cls.OBJ_KEY} is specified)." + ) + + if not isinstance(step[cls.OBJ_PARAMS], dict): + raise PipelineConfigurationError( + f"The key {cls.OBJ_PARAMS} should be dictionary not {type(step[cls.OBJ_PARAMS])}" + ) + + return { + cls.OBJ_CLASS: step[cls.OBJ_CLASS], + cls.OBJ_MODULE: step[cls.OBJ_MODULE], + cls.OBJ_NAME: step[cls.OBJ_NAME], + cls.OBJ_PARAMS: step[cls.OBJ_PARAMS], + } + + @classmethod + def validate_pipeline_config(cls, pipeline_conf: Dict[str, Any]) -> Dict[str, Any]: + """ + Validate a pipeline configuration. + + Parameters + ---------- + pipeline_conf: Dict with a pipeline configuration + + Returns + ------- + _: Validated pipeline configuration. + """ + validated_pipeline_conf: Dict[str, Any] = {cls.STEPS_KEY: []} + steps = pipeline_conf.get(cls.STEPS_KEY) + + if not steps or not isinstance(steps, list): + raise PipelineConfigurationError( + f"Pipeline conf should have a {cls.STEPS_KEY} key containing a list of steps." + ) + else: + for step in steps: + validated_pipeline_conf[cls.STEPS_KEY].append(cls.validate_step_config(step)) + + return validated_pipeline_conf + + @classmethod + def parse_pipeline_config(cls, config_dict: Dict[str, Any]) -> Dict[str, Any]: + """ + Parse config dict to replace config key by the associated configurations. + + Parameters + ---------- + config_dict: Dict[str, Any] + Initial config. + + Returns + ------- + _: Dict[str, Any] + Parsed config. + """ + config_dict = copy.deepcopy(config_dict) + + # Validate raw pipeline conf + config_dict = cls.validate_pipeline_config(config_dict) + + steps = [] + for step in config_dict[cls.STEPS_KEY]: + # Step defined from the config + config_key = step.get(cls.OBJ_KEY) + if config_key: + # Use config key as step name + step[cls.OBJ_NAME] = config_key + _ = step.pop(cls.OBJ_KEY) + # Update step parameters + step[cls.OBJ_PARAMS] = config[config_key] + + # Nested pipeline + if step[cls.OBJ_CLASS] == cls.__name__: + raw_nested_pipeline_config = step[cls.OBJ_PARAMS] + step[cls.OBJ_PARAMS] = cls.parse_pipeline_config(raw_nested_pipeline_config) + + # Add parsed step config to step list + steps.append(step) + + config_dict[cls.STEPS_KEY] = steps + + return MelusinePipeline.flatten_pipeline_config(config_dict) + + @classmethod + def get_config_from_key(cls, config_key: str) -> Dict[str, Any]: + """ + Parse config dict to replace config key by the associated configurations. + + Parameters + ---------- + config_key: Pipeline configuration key + + Returns + ------- + _: Dict[str, Any] + Parsed config. + """ + return cls.parse_pipeline_config(config_dict=config[config_key]) + + def validate_input_fields(self, data: Any) -> None: + """ + Validate input fields prior of executing the pipeline. + Use the input_columns and output_columns attributes of each step. + + Parameters + ---------- + data: Any + Input data. + """ + active_fields: Set[str] = set(backend.get_fields(data)) + + for step_name, step in self.steps: + difference = set(step.input_columns).difference(active_fields) + if difference: + raise ValueError( + f"Error at step '{step_name}'.\n" + f"Fields {difference} should be either:\n" + "- Present in input fields or\n" + "- Created by a previous pipeline step" + ) + + active_fields |= set(step.output_columns) + + def transform(self, X: Iterable[Any]) -> Iterable[Any]: # NOSONAR + """ + Transform input dataset. + + Parameters + ---------- + X: Dataset + Input Dataset. + + Returns + ------- + _: Dataset + Output Dataset. + """ + self.validate_input_fields(X) + return super().transform(X) From d5b26a45cf75b070b04f63cb1d4d14c8a351c432 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:37:41 +0100 Subject: [PATCH 13/37] :sparkles: New feature: Builtin MelusineProcessors --- melusine/processors.py | 2065 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2065 insertions(+) create mode 100644 melusine/processors.py diff --git a/melusine/processors.py b/melusine/processors.py new file mode 100644 index 0000000..ad46afe --- /dev/null +++ b/melusine/processors.py @@ -0,0 +1,2065 @@ +""" +Define Processors +Processors are objects that can be used as standalone or as steps of a MelusinePipeline. + +Implemented classes: [ + Normalizer, + RegexTokenizer, + Phraser, + BaseSegmenter, + Segmenter, + BaseExtractor, + TextExtractor, + TokensExtractor, + Tag, + BaseContentTagger, + ContentTagger, + DeterministicTextFlagger, + Cleaner, +] +""" +from __future__ import annotations + +import logging +import re +import unicodedata +from abc import abstractmethod +from re import Pattern +from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union + +import arrow + +from melusine.base import MelusineDataset, MelusineTransformer +from melusine.message import Message + +logger = logging.getLogger(__name__) + + +class Normalizer(MelusineTransformer): + """ + Normalizer transforms raw text into standard text by: + - Lowercasing + - Applying unicode normalization standards such as NFD or NFKD + """ + + def __init__( + self, + form: str = "NFKD", + lowercase: bool = True, + fix_newlines: bool = True, + input_columns: str = "text", + output_columns: str = "text", + ): + """ + Parameters + ---------- + form: str + Unicode normalization form + lowercase: bool + If True, lowercase the text + input_columns: str + Input columns for the transform operation + output_columns: str + Outputs columns for the transform operation + """ + + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + ) + + if "messages" in self.input_columns: + self.func = self.normalize_message + else: + self.func = self.normalize_text + + # Unicode normalization form + self.form = form + + # Lower casing + self.lowercase = lowercase + + # Fix newlines + self.fix_newlines = fix_newlines + + def normalize_message(self, message_list: List[Message]) -> List[Message]: + """ + Normalize the text of a message. + + Parameters + ---------- + message_list: List[Message] + Input message list + + Returns + ------- + _: List[Message] + Normalized message list + """ + + for message in message_list: + message.clean_text = self.normalize_text(message.text) + message.clean_header = self.normalize_text(message.header) + + return message_list + + def normalize_text(self, text: str) -> str: + """ + Apply the normalization transformations to the text. + + Parameters + ---------- + text: str + Input text to be normalized + + Returns + ------- + text: str + Normalized text + """ + if not isinstance(text, str): + text = "" + + # Uncommon characters + text = text.replace("’", "'") + text = text.replace("œ", "oe") + + # Unicode normalization + if self.form: + text = unicodedata.normalize(self.form, text).encode("ASCII", "ignore").decode("utf-8") + + # Lowercasing + if self.lowercase: + text = text.lower() + + # Fix newlines + if self.fix_newlines: + # Replace platform newlines by standard newline + text = "\n".join(text.splitlines()) + + # Replace multipe spaces by single space + text = re.sub(r" +", " ", text) + + # Replace multipe newline/spaces patterns by single newline + text = re.sub(r" *\n+ *", "\n", text) + + # Undesired newlines following quotes + # Ex : "<\nabc@domain.fr\n>" => "" + text = re.sub(r"<\n(\w)", r"<\1", text) + text = re.sub(r"(\w)\n>", r"\1>", text) + + # Replace non-breaking spaces + text = text.replace("\xa0", " ") + + return text + + +class RegexTokenizer(MelusineTransformer): + """ + Class to split a text into tokens using a regular expression. + """ + + def __init__( + self, + tokenizer_regex: str = r"\w+(?:[\?\-\"_]\w+)*", + stopwords: List[str] = None, + lowercase: bool = True, + normalization_form: Optional[str] = None, + input_columns: str = "text", + output_columns: str = "tokens", + ): + """ + Parameters + ---------- + tokenizer_regex: str + Regex used to split the text into tokens + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + func=self.tokenize, + ) + # Lowercasing + self.lowercase = lowercase + + # Normalization + self.normalization_form = normalization_form + + # Tokenizer regex + self.tokenizer_regex = tokenizer_regex + + # Stopwords + if not stopwords: + self.stopwords = set() + else: + self.stopwords = set(stopwords) + + def _text_to_tokens(self, text: str) -> Sequence[str]: + """ + Method to split a text into a list of tokens. + + Parameters + ---------- + text: str + Text to be split + + Returns + ------- + tokens: Sequence[str] + List of tokens + """ + tokens = re.findall(self.tokenizer_regex, text, re.M + re.DOTALL) + + return tokens + + def _remove_stopwords(self, tokens: Sequence[str]) -> Sequence[str]: + """ + Method to remove stopwords from tokens. + + Parameters + ---------- + tokens: Sequence[str] + List of tokens + + Returns + ------- + tokens: Sequence[str] + List of tokens without stopwords + """ + return [token for token in tokens if token not in self.stopwords] + + def tokenize(self, text: str) -> Sequence[str]: + """ + Method to apply the full tokenization pipeline on a text. + + Parameters + ---------- + text: str + Input text to be tokenized + + Returns + ------- + tokens: Sequence[str] + List of tokens + """ + # Lowercase + if self.lowercase: + text = text.lower() + + if self.normalization_form: + text = unicodedata.normalize(self.normalization_form, text).encode("ASCII", "ignore").decode("utf-8") + + # Text splitting + tokens = self._text_to_tokens(text) + + # Stopwords removal + tokens = self._remove_stopwords(tokens) + + return tokens + + +class BaseSegmenter(MelusineTransformer): + """ + Class to split a conversation into a list of Messages. + This is an abstract class defining the Segmenter interface. + Melusine users should implement a subclass or use an existing subclass to segment their emails. + """ + + def __init__( + self, + strip_characters: str = "\n >-", + input_columns: str = "body", + output_columns: str = "messages", + regex_flags: re.RegexFlag = re.MULTILINE | re.IGNORECASE, + ): + """ + Parameters + ---------- + strip_characters: str + Characters to be stripped of text segments + input_columns: str + Input columns for the transform operation + output_columns: str + Outputs columns for the transform operation + regex_flags: re.RegexFlag + Regex flags for segmentation + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + func=self.segment_text, + ) + + self.strip_characters = strip_characters + + # Compile segmentation regex + regex_list = self.create_segmentation_regex_list() + self._compiled_segmentation_regex = self.compile_regex_from_list(regex_list, flags=regex_flags) + + @staticmethod + @abstractmethod + def create_segmentation_regex_list() -> Iterable[str]: + """ + Method to create a compiled regex that can be used to segment an email. + + Returns + ------- + _: Iterable[str] + List of segmentation regexs + """ + + @staticmethod + def compile_regex_from_list(regex_list: Iterable[str], flags: Union[int, re.RegexFlag] = re.M) -> Pattern: + """ + Method to create a meta-regex from a list of regexs. + + Parameters + ---------- + regex_list: Iterable[str] + List of individual regexs + flags: int | RegexFlag + Regex flags + + Returns + ------- + _: Pattern[AnyStr] + Compiled meta-regex + """ + regex_list = ["(?:" + r + ")" for r in regex_list] + regex = "|".join(regex_list) + + # Add an overall capture group + regex = "(" + regex + ")" + + return re.compile(regex, flags=flags) + + def create_messages(self, match_list: List[str]) -> List[Message]: + """ + Method to create Message instances based on the segmented email data. + + Parameters + ---------- + match_list: List[str] + List of text elements matched by the segmentation regex + + Returns + ------- + _: List[Message] + """ + # Create first message meta based on email meta + first_message_meta = "" + + # Strip characters + match_list = [x.strip(self.strip_characters) for x in match_list] + + # Case email starts with a transition pattern + if match_list[0] == "": + if len(match_list) > 1: + # Adapt first message meta + first_message_meta = match_list[1] + + # Skip first 2 indices (1st text + 1st meta pattern) + match_list = match_list[2:] + + else: + # Empty message + return [Message(text="")] + + # Insert placeholder for the first message meta + match_list.insert(0, "") + + # Even indices are meta patterns + meta_list = [x for i, x in enumerate(match_list) if i % 2 == 0] + + # Odd indices are text + text_list = [x for i, x in enumerate(match_list) if i % 2 == 1] + + # Replace first message meta + meta_list[0] = first_message_meta + + return [Message(text=text, meta=meta) for text, meta in zip(text_list, meta_list)] + + def segment_text(self, text: str) -> List[Message]: + """ + Method to segment a conversation by splitting the text on transition patterns. + Ex: + > Input : "Thank you\nSent by Mr Smith\nHello\nSee attached the document.\nBest Regards" + > Output : + - Message(text="Thank you", meta=None) + - Message(text="Hello\nSee attached the document.\nBest Regards", meta="Sent by Mr Smith") + + Parameters + ---------- + text: str + Full text conversation + + Returns + ------- + _: List[Message] + List of messages + """ + # Strip start / end characters + text = text.strip(self.strip_characters) + + # Split text using the compiled segmentation regex + matches = self._compiled_segmentation_regex.split(text) + + return self.create_messages(matches) + + +class Segmenter(BaseSegmenter): + """ + Class to split a conversation into a list of Messages. + Inherits from BaseSegmenter. + Implement methods to segment french emails. + """ + + def __init__( + self, + strip_characters: str = "\n >-", + input_columns: str = "body", + output_columns: str = "messages", + ): + """ + Parameters + ---------- + strip_characters: str + Characters to be stripped of text segments + input_columns: str + Input columns for the transform operation + output_columns: str + Outputs columns for the transform operation + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + strip_characters=strip_characters, + ) + + @staticmethod + def create_segmentation_regex_list() -> Iterable[str]: + """ + Method to create a compiled regex that can be used to segment an email. + + Returns + ------- + _: Iterable[str] + List of segmentation regexs + """ + # Meta patterns of the form "META_KEYWORD : META_CONTENT" + # Ex: "De : jean@gmail.com" + meta_keywords_list_with_semicolon = [ + r"Date", + r"De", + r"Exp[ée]diteur", + r"[ÀA]", + r"Destinataire", + r"Envoy[ée](?: le| par)?", + r"R[ée]pondre [àa]", + r"Pour", + r"From", + r"To", + r"Sent", + r"Cc", + ] + piped_keywords_with_semicolon = "(?:" + "|".join(meta_keywords_list_with_semicolon) + ")" + starter_pattern_with_semicolon = rf"^.{{,5}}(?:{piped_keywords_with_semicolon} ?\n? ?: *\n?)" + + # Meta patterns of the form "META_KEYWORD" + # Ex: "Transféré par jean@gmail.com" + # (pas de ":") + # Ex: "------ Message transmis ------" + + regex_weekdays = ( + r"(?:[Ll]undi|[Ll]un\.|[Mm]ardi|[Mm]ar\.|[Mm]ercredi|[Mm]er\.|[Jj]eudi|[Jj]eu\.|" # noqa + r"[Vv]endredi|[Vv]en\.|[Ss]amedi|[Ss]am\.|[Dd]imanche|[Dd]im\.)" # noqa + ) + regex_months = ( + r"(?:[Jj]anvier|[Ff][ée]vrier|[Mm]ars|[Aa]vril|[Mm]ai|[Jj]uin|[Jj]uillet|" # noqa + r"[Aa]o[ûu]t|[Ss]eptembre|[Oo]ctobre|[Nn]ovembre|[Dd][eé]cembre|" # noqa + r"(?:janv?|f[ée]vr?|mar|avr|juil?|sept?|oct|nov|d[ée]c)\.)" + ) + + meta_keywords_list_without_semicolon = [ + # Le 2021-01-02 11:20 jane@gmail.fr a écrit : + # Le 02 juillet 1991 à 11:20 jane@gmail.fr a écrit : + # Le mardi 31 août 2021 à 11:09, a écrit : + ( + rf"\bLe (?:" + rf"\d{{2}}/\d{{2}}/\d{{4}}|\d{{4}}-\d{{2}}-\d{{2}}|{regex_weekdays}|" # noqa + rf"\d{{1,2}} {regex_months})(?:.|\n){{,30}}\d{{2}}:\d{{2}}(?:.|\n){{,50}}(?:\<.{{,30}}\>.{{,5}})?\ba [éecrit]" # noqa + ), + r"Transf[ée]r[ée] par", + r"D[ée]but du message transf[ée]r[ée] :", + r"D[ée]but du message r[ée]exp[ée]di[ée] :", + r"Message transmis", + r"(?:Message|[Mm]ail) transf[ée]r[ée]", + r"(?:Courriel|Message|Mail) original", + r"(?:Message|Mail|Courriel) d'origine", + r"Original [Mm]essage", + r"Forwarded message", + r"Forwarded by", + ] + piped_keywords_without_semicolon = "(?:" + "|".join(meta_keywords_list_without_semicolon) + ")" # noqa + starter_pattern_without_semicolon = f"{piped_keywords_without_semicolon}(?:[\n ]*--+)?" + + # Combine pattern with and without semicolon + starter_pattern = rf"(?:{starter_pattern_with_semicolon}|{starter_pattern_without_semicolon})" # noqa + + # Match everything until the end of the line. + # Match End of line "\n" and "space" characters + end_pattern = r".*[\n ]*" + + # Object / Subject pattern (These patterns are not sufficient to trigger segmentation) + object_line_pattern = "(?:^.{,5}(?:Objet|Subject|Sujet) ?\n? ?: *\n?)" + end_pattern + + # Starters are separated from the meta-values by + # a semicolon and optional spaces/line breaks + full_generic_meta_pattern = rf"(?:{starter_pattern}{end_pattern}(?:{object_line_pattern})*)+" + + # Make a tuple of patterns + pattern_list = (full_generic_meta_pattern,) + + return pattern_list + + +class BaseExtractor(MelusineTransformer): + """ + Class to extract data from a list of messages. + This is an abstract class defining the interface for extractor classes. + Melusine users should implement a subclass (or use an existing subclass) to extract data from a list of messages. + """ + + def __init__( + self, + input_columns: Union[str, Iterable[str]], + output_columns: str, + ): + """ + Parameters + ---------- + input_columns: Union[str, Iterable[str]] + Input columns for the transform operation + output_columns: str + Outputs columns for the transform operation + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + func=self.extract, + ) + + @abstractmethod + def extract(self, message_list: List[Message]) -> Any: + """ + Method to extract data from a list of messages. + + Parameters + ---------- + message_list: List[Message] + List of Messages + + Returns + ------- + _: str + Extracted text + """ + + +class TextExtractor(BaseExtractor): + """ + Class to extract text data from a list of messages. + """ + + def __init__( + self, + input_columns: str = "messages", + output_columns: str = "last_message", + include_tags: List[str] = None, + exclude_tags: List[str] = None, + sep: str = "\n", + n_messages: Union[int, None] = 1, + stop_at: Iterable[str] = ("GREETINGS",), + ): + """ + Parameters + ---------- + input_columns: str + Input columns for the transform operation + output_columns: str + Outputs columns for the transform operation + include_tags: List[str] + Message tags to be included in the text extraction + exclude_tags: List[str] + Message tags to be excluded from the text extraction + sep: str + Separation symbol to join text parts + n_messages: Union[int, None] + Number of messages to take into account (starting with the latest) + stop_at: List[str] + When stop_at tags are encountered, stop extracting text of the message + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + ) + + if include_tags and exclude_tags: + raise ValueError(f"{type(self).__name__} :" "You should specify only of include_tags/exclude_tags") + + self.include_tags = include_tags + self.exclude_tags = exclude_tags + self.sep = sep + self.n_messages = n_messages + self.stop_at = stop_at + + def extract(self, message_list: List[Message]) -> str: + """ + Method to extract text parts from a list of messages. + + Parameters + ---------- + message_list: List[Message] + Input message list + + Returns + ------- + _: str + Extracted text + """ + if self.n_messages is None: + n_messages = len(message_list) + else: + n_messages = self.n_messages + + text_list = list() + + for message in message_list[:n_messages]: + # Message has been tagged + if message.tags is not None: + if self.include_tags: + tags = message.extract_parts(target_tags=self.include_tags, stop_at=self.stop_at) + message_text_list = [x[1] for x in tags] + elif self.exclude_tags: + tags = message.extract_parts(target_tags=None, stop_at=self.stop_at) + message_text_list = [part for tag, part in tags if tag not in self.exclude_tags] + else: + message_text_list = [part for tag, part in message.tags] + + # Join message text list + extracted_text = self.sep.join(message_text_list) + + # Message has not been tagged + else: + extracted_text = message.text + + text_list.append(extracted_text) + + return self.sep.join(text_list).strip() + + +class TokensExtractor(BaseExtractor): + """ + Class to extract tokens from different DataFrame columns. + Ex: + > (input column 1) body_tokens: ["hello", "how", "are", "you"] + > (input column 2) header_tokens: ["catch", "up"] + > (output column) all_tokens: ["catch", "up", "hello", "how", "are", "you"] + + """ + + def __init__( + self, + input_columns: Union[str, Iterable[str]] = ("header_tokens", "body_tokens"), + output_columns: str = "tokens", + sep_token: str = "[PAD]", + pad_size: int = 5, + ): + """ + Parameters + ---------- + input_columns: Union[str, Iterable[str]] + Input columns for the transform operation + output_columns: str + Outputs columns for the transform operation + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + ) + + self.sep_token = sep_token + self.pad_size = pad_size + + def extract(self, row: MelusineDataset) -> List[str]: + """ + Method to extract tokens from different columns of a DataFrame. + + Parameters + ---------- + row: MelusineDataset + Emails input data + + Returns + ------- + _: List[str] + List of extracted tokens + """ + + pad_pattern = [self.sep_token] * self.pad_size + tokens = list() + for col in self.input_columns: + tokens += row[col] + tokens += pad_pattern + + # Remove trailing padding tokens + tokens = tokens[: -self.pad_size] + + return tokens + + +class Tag(property): + """ + Class used by the ContentTagger to identify text tags such as: + - BODY + - HELLO + - GREETINGS + """ + + +TagPattern = Union[str, Iterable[str], re.Pattern] + + +class BaseContentTagger(MelusineTransformer): + """ + Class to add tags to a text + This is an abstract class defining the interface for all ContentTaggers. + Melusine users should implement a subclass (or use an existing subclass) to add tags to texts. + """ + + def __init__( + self, + input_columns: str = "messages", + output_columns: str = "messages", + tag_list: List[str] = None, + default_tag: str = "BODY", + valid_part_regex: str = r"[a-z0-9?]", + default_regex_flag: int = re.IGNORECASE, + post_process: bool = True, + text_attribute: str = "text", + ): + """ + Parameters + ---------- + input_columns: str + output_columns: str + tag_list: List[str] + (Ordered) List of tags to look for + default_tag: str + Tag given to arbitrary text parts + default_regex_flag: int + Default flag to compile regex + text_attribute: str + Message attribute containing the text data + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + func=self.tag_email, + ) + + # If no tag list is specified, use all the tags + if not tag_list: + self.tag_list = self.get_tag_list() + else: + self.tag_list = tag_list + + # Sep default tag + self.default_tag = default_tag + + # Set default flag + self.default_regex_flag = default_regex_flag + + # Set text attribute + self.text_attribute = text_attribute + + # Activate post-processing + self.post_process = post_process + + # Pattern to split text into sentences (=parts) + self.split_pattern = self.compile_split_pattern() + + # Pattern to validate that a text part is valid + self.valid_part_regex = valid_part_regex + + # Build the regex_dict + self.regex_dict = {} + for tag in self.tag_list: + self.regex_dict[tag] = self.compile_tag_regex(tag) + + def __getitem__(self, key: str) -> re.Pattern: + """ + Method to access regex corresponding to individual tags easily. + Ex: + > t = ContentTagger() + > t["HELLO"].match("bonjour") + + Parameters + ---------- + key: str + Name of a tag + + Returns + ------- + _: re.Pattern + Compiled regex + """ + return self.regex_dict[key] + + @staticmethod + def compile_split_pattern() -> re.Pattern: + """ + Method to compile the sentence split regex pattern. + + Ex: + Bonjour Mr. Dupont. Salutations + will be splited using the split pattern into + ["Bonjour Mr. Dupont", "Salutations"] + + Returns + ------- + _: re.Pattern + Compiled regex + """ + # Dot exception patterns + _madame_pattern = r"(? List[str]: + """ + Method to get the list of available tags. + + Returns + ------- + _: List[str] + List of tags + """ + return [p for p in dir(cls) if isinstance(getattr(cls, p), Tag)] + + def tag_email(self, messages: List[Message]) -> Union[List[Message], None]: + """ + Method to apply content tagging on an email (= List of Messages) + + Parameters + ---------- + messages : List[Message] + List of messages + + Returns + ------- + messages : List[Message] + List of messages after content tagging + """ + if not messages: + return None + + for message in messages: + tags = self.tag_text(getattr(message, self.text_attribute)) + message.tags = tags + + return messages + + def compile_tag_regex(self, tag: str) -> re.Pattern: + """ + Method to validate and compile the regex associated with the input tag. + Return an error if the regex pattern is malformed. + + Parameters + ---------- + tag: str + Tag of interest + + Returns + ------- + _: re.Pattern + compiled regex + """ + # Collect data associated with the input tag + if hasattr(self, tag): + regex = getattr(self, tag) + else: + raise ValueError(f"Unknown tag {tag}") + + # If a list is provided, pipe it into a string + if (not isinstance(regex, str)) and isinstance(regex, Iterable): + regex = "|".join(regex) + + # Compile regex from string + if isinstance(regex, str): + try: + regex = re.compile(regex, flags=self.default_regex_flag) + except re.error: + raise ValueError(f"Invalid regex for tag {tag}:\n{regex}") + elif isinstance(regex, re.Pattern): + pass + else: + raise ValueError( + f"Tag {tag} does not return any of the supported types : " + "str " + "List[str] " + "re.Pattern " + f"Got {type(regex)} instead." + ) + + return regex + + def tag_text(self, text: str) -> List[Tuple[str, str]]: + """ + Method to apply content tagging on a text. + + Parameters + ---------- + text: str + Input text + + Returns + ------- + _: List[Tuple[str, str]] + List of tag/text couples (ex: [("HELLO", "bonjour")]) + """ + parts = self.split_text(text) + tags = list() + for part in parts: + tags.append(self.tag_part(part)) + + # Post process tags + if self.post_process: + tags = self.post_process_tags(tags) + + return tags + + def split_text(self, text: str) -> List[str]: + """ + Method to split input text into sentences/parts using a regex. + + Parameters + ---------- + text: str + Input text + + Returns + ------- + _: List[str] + List of parts/sentences + """ + # Replace multiple spaces by single spaces + text = re.sub(r" +", " ", text) + + # Split text into sentences + parts = self.split_pattern.split(text) + + # Cleanup sentence split + clean_parts = self.clean_up_after_split(parts) + + return [p.strip() for p in clean_parts if self.validate_part(p)] + + def validate_part(self, text: str) -> bool: + """ + Method to validate a text part. + By default, check that it contains at least one of: + - a letter + - a number + - an interrogation mark. + + Parameters + ---------- + text: Text part to be validated + + Returns + ------- + _: bool + True if text part is valid + """ + return bool(re.search(self.valid_part_regex, text, flags=re.I)) + + @staticmethod + def clean_up_after_split(parts: List[Union[str, None]]) -> List[str]: + """ + Clean up sentences after splitting. + Typically, put punctuation back at the end of sentences. + + Parameters + ---------- + parts: List[Union[str, None]] + + Returns + ------- + clean_parts: List[str] + """ + clean_parts: List[str] = [] + for part in parts: + if not part: + continue + + # Part contains punctuation only + if (len(clean_parts) > 0) and re.search(r"^[ .?!\n]+$", part): + # Add characters to the previous part + clean_parts[-1] += part + continue + + # Regular part + clean_parts.append(part.strip("\n")) + + return clean_parts + + def tag_part(self, part: str) -> Tuple[str, str]: + """ + Method to apply tagging on a text chunk (sentence/part). + + Parameters + ---------- + part: str + Text chunk + + Returns + ------- + match_tag: str + Output tag + part: str + Original text + """ + match_tag = self.default_tag + + for tag, regex in self.regex_dict.items(): + match = regex.match(part) + if match: + match_tag = tag + break + + return match_tag, part + + @staticmethod + def word_block(n_words: int, word_character_only: bool = False) -> str: + """ + Method to dynamically generate regex patterns to match block of words. + + Parameters + ---------- + n_words: int + Number of words to be matched + word_character_only: bool + If True, match word characters only + + Returns + ------- + _: str + Regex matching desired pattern + """ + if word_character_only: + positive = r"\w" + else: + # Non-space characters except - and – (considered word separators) + positive = r"[^\r\s\t\f\v \-–]" + + return rf"(?:[ \-–]*(?:{positive}+(?:[ \-–]+{positive}+){{,{n_words - 1}}})? *)" + + def __call__(self, text: str) -> List[Tuple[str, str, str]]: + """ + Method to find all regex patterns matching the input text. + + Parameters + ---------- + text: str + Text to match + + Returns + ------- + match_list: List[Tuple[str, str]] + List of matching regexes and associated tags + """ + full_match_list = list() + + # Split parts of the input string + parts = self.split_text(text) + for part in parts: + for tag in self.regex_dict.keys(): + # Get regex or list of regexes + regex = getattr(self, tag) + + # Find matching regexes + matching_regex_list = self.find_matching_regex_patterns(part, regex) + + # Format result + full_match_list.extend([(part, tag, regex) for regex in matching_regex_list]) + + return full_match_list + + def find_matching_regex_patterns(self, part: str, regex: TagPattern) -> List[str]: + """ + Given a regex string, a regex pattern or a list of regexes. + Find all matching patterns + """ + matching_regex_list = [] + # Tag defined with a string regex + if isinstance(regex, str): + regex_match = re.match(regex, part, flags=self.default_regex_flag) + if regex_match: + matching_regex_list.append(regex) + + # Tag defined with a list of string regexes + elif isinstance(regex, Iterable): + for r in regex: + regex_match = re.match(r, part, flags=self.default_regex_flag) + if regex_match: + matching_regex_list.append(r) + + else: + regex_match = regex.match(part) + if regex_match: + matching_regex_list.append(regex.pattern) + + return matching_regex_list + + @abstractmethod + def post_process_tags(self, tags: List[Tuple[str, str]]) -> List[Tuple[str, str]]: + """ + Method to apply tagging rules posterior to the standard regex tagging. + + Parameters + ---------- + tags: List[Tuple[str, str]] + Original tags + + Returns + ------- + _: List[Tuple[str, str]] + Post-processed tags + """ + + +class ContentTagger(BaseContentTagger): + """ + Class to add tags to a text. + This class inherits from the base class BaseContentTagger. + This class implements Content Tagging for French emails. + + Implemented tags: + - HELLO + - PJ + - DISCLAIMER + - FOOTER + - GREETINGS + - SIGNATURE + """ + + ENGLISH_TIMES = ["day", "morning", "afternoon", "evening", "night", "week(-?end)?"] + + def __init__( + self, + input_columns: str = "messages", + output_columns: str = "messages", + tag_list: List[str] = None, + default_tag: str = "BODY", + valid_part_regex: str = r"[a-z0-9?]", + default_regex_flag: int = re.IGNORECASE | re.MULTILINE, + post_process: bool = True, + text_attribute: str = "text", + ): + """ + Parameters + ---------- + input_columns: str + output_columns: str + tag_list: List[str] + (Ordered) List of tags to look for + default_tag: str + Tag given to arbitrary text parts + default_regex_flag: int + Default flag to compile regex + text_attribute: str + Message attribute containing the text data + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + tag_list=tag_list, + default_tag=default_tag, + valid_part_regex=valid_part_regex, + default_regex_flag=default_regex_flag, + post_process=post_process, + text_attribute=text_attribute, + ) + + @Tag + def GREETINGS(self) -> Union[str, List[str], re.Pattern]: + """ + Tag associated with email closure sentences. + Watchout, this tag typically marks the end of a message. + Ex: "Cordialement" + """ + english_times_pattern = "|".join(self.ENGLISH_TIMES) + return [ + r"^.{0,30}cordialement.{0,30}$", + r"^.{0,5}sinc[èe]rement.{0,30}$", + r"^.{0,10}cdl?t.{0,16}$", + r"^.{0,10}bien aimablement.{0,16}$", + r"^.{0,10}courtoisement.{0,16}$", + r"^.{0,10}bien [àa] (?:toi|vous).{0,16}$", + r"^.{0,10}sentiments? (?:d[ée]vou[ée]s?|mutualistes?).{0,16}$", + r"^.{0,10}(Veuillez.{,3})?(accepte[zr] .{,8}|receve[zr] .{,8})?(meilleure?s?|sinc[eè]res?|cordiale?s?)? ?(salutations?|sentiments?).{0,16}$", + r"^.{0,45}(?:(?:l')?expression|assurance) de (?:nos|mes) sentiments.{0,16}$", + r"^.{0,50}(?:salutations?|sentiments?) distingu.{0,30}$", + r"^.{0,10}Respectueusement.{0,16}$", + r"^.{0,20}(?:souhait.{,10})?(?:continuation|r[ée]ception).{0,16}$", + r"^.{0,20}dans l'attente de (?:votre (?:retour|r[ée]ponse)|vous lire).{0,16}$", + r"^.{0,30}(?:une )bonne r[ée]ception.{0,16}$", + r"^.{0,3}Bonne r[ée]ception.{0,3}$", + r"^.{0,3}votre bien d[ée]vou[ée]e?.{0,3}$", + r"^.{0,3}amicalement votre.{0,3}$", + r"^.{,3}je vous prie de croire.{,50}(expression|assurance)?.{,50}(consideration|salutations|sentiments).{,30}$", + # English + r"^.{0,3}regards.{0,3}$", + r"^.{0,3}(best|warm|kind|my) *(regards|wishes)?.{0,3}$", + r"^.{0,3}(yours)? *(truly|sincere?ly|respectfull?y|faithfully).{0,3}$", + r"^.{0,3}yours.{0,3}$", + r"^.{0,3}cheers.{0,3}$", + "^.{0,3}(talk|write|see you|speak to you) soon.{0,3}$", + "^.{0,3}take care.{0,3}$", + "^.{0,3}catch you later.{0,3}$", + fr"^.{{0,3}}have an? (blessed|excellent|good|fantastic|great) ({english_times_pattern}).{{0,3}}$", + r"i am looking forward to hearing from you.{0,3}$", + "^.{0,3}looking forward to your reply.{0,3}$", + "^.{0,3}hoping to hear from you( soon)?.{0,3}$", + ] + + @Tag + def HELLO(self) -> Union[str, List[str], re.Pattern]: + """ + Tag associated with email opening sentences. + Sentences that can be either opening or closing should be placed here. + Ex1: "Bonjour" + Ex2: "Bonne année" + """ + english_times_pattern = "|".join(self.ENGLISH_TIMES) + + # === Souhaits de bonheur === + # J'espère que vous avez passé un bon week-end, etc + # Bonne semaine + bon_list = [ + r"ann[ée]e", + r"(fin de )?journ[ée]e?", + r"soir[ée]e?", + r"week[ -]?end", + r"we", + r"nuit", + r"(fin de )?semaine", + r"f[eê]tes?", + r"(fin d')?apr[eè]s[ -]?midi", + ] + + deb_bon_list = [ + r"Bon(?:ne)?", + r"Bel(?:le)?", + r"Beau", + r"joyeux?(?:se)?", + r"Excel?lent(?:e)?", + ] + + hello_words_list = [ + r"ch[èe]r[.()es]{,4}", + r"bonjour", + r"bonsoir", + r"madame", + r"monsieur", + r"mesdames", + r"messieurs", + # English + fr"good {english_times_pattern}", + r"hi( there)?", + r"hello", + r"greetings", + r"dear", + r"dear (m(rs?|s)\.?|miss|madam|mister|sir)( or (m(rs?|s)\.?|miss|madam|mister|sir))?", + r"sir", + r"how are you (doing|today)", + r"(it is|it's)? ?(good|great) to hear from you", + r"i hope (you are|you're)( doing)? well", + fr"i hope (you are|you're) having an? ?(great|wonderful|fantastic)? ({english_times_pattern})", + r"i hope this email finds you well", + r"to whom it may concern", + ] + hello_pattern = "|".join(hello_words_list) + + return [ + r"^.{0,10}((\b" + hello_pattern + r")\b\s*){1,3}(\w+\b\s*){,4}.{,3}(?!.)$", + rf"^.{{0,16}}(?:{'|'.join(deb_bon_list)}) \b(?:{'|'.join(bon_list)}).{{0,40}}$", + ] + + @Tag + def PJ(self) -> Union[str, List[str], re.Pattern]: + """ + Tag associated with email attachment mentions. + Ex: "See attached files" + """ + return [ + r"\(See attached file: .{,60}?\..{1,4}\)", + r"\(Embedded image(?: moved to file:)? .{,60}?\)", + r"^.{,4}[\[\(][a-z0-9-_]*\.[a-zA-Z]{3,4}[\]\)]\s*$", + ] + + @Tag + def FOOTER(self) -> Union[str, List[str], re.Pattern]: + """ + Tag associated with email footer sentences. + Ex: "Envoyé de mon iPhone" + """ + prefix = r"^.{0,40}" + suffix = r".*" + + text_list = [ + # Français + r"[aà] [l]['’ ]attention de.{0,80}$", + r"Les informations contenues dans ce courrier [ée]lectronique et toutes les pi[eè]ces qui y sont jointes", + r"Le pr[ée]sent document est couvert par le secret professionnel", + r"Ce message et toutes les pi[eè]ces jointes sont confidentiels", + r"Ce message et les fichiers [ée]ventuels", + r"Ce message est confidentiel", + r"Ce message contient des informations confidentielles" + r"Si vous avez recu ce message ([ée]lectronique )?par erreur", + r"Toute modification, [ée]dition, utilisation", + r"Tout usage, communication ou reproduction", + r"Il ne peut etre lu, ni copi[ée], ni communiqu[ée], ni utilis[ée]", + r"L'[ée]metteur d[ée]cline toute responsabilit[ée]", + r"Sauf mention contraire, le present message", + r"Afin de faciliter nos echanges et optimiser le traitement", + r"Afin de contribuer au respect de l'environnement", + r"N'imprimez ce message que si cela est indispensable", + r"Pensez a l'environnement avant d'imprimer ce message", + r"Droit a la d[ée]connexion", + r"Ceci est un mail automatique", + r"Les formats de fichiers acceptés sont : PDF, DOC, DOCX, JPEG, JPG, TIFF, TXT, ODT, XLS, XLSX", + r"Tout autre format de fichiers ne sera pas transmis au dossier", + # English + r"This message and any attachments are confidential", + r"This e-mail and any files transmitted", + r"If you have received this (?:message|email) in error", + r"Any unauthorized modification", + r"The sender shall not be liable", + ] + + diclaimer_regex_list = [f"{prefix}{x}{suffix}" for x in text_list] + + miscellaneous_footer_regex = [ + r"(?:courrier electronique|virus|antivirus){2,}", + r"^.{0,10}Partag[ée] [aà].{0,5} partir de Word pour \b\w+\b$", + r"^.{0,10}Provenance : Courrier pour Windows", + r"^.{0,10}garanti sans virus.{0,30}", + r"^.{0,10}www.avg.com", + r"^.{0,10}www.avast.com", + r"^.{0,10}T[ée]l[ée]charg.{0,10}$", + r"^.{0,2}Obtenir{0,2}$", + r"(?:Obtenez|T[ée]l[ée]charge[zr])? ?Outlook pour .*", + r"^.{0,10}La visualisation des fichiers PDF n[ée]cessite.*", + r"^.{0,10}Si vous recevez ce message par erreur", + r"^.{0,10}Retrouvez-nous sur www\.maif-\w+\.fr", + ( + r"^.{0,10}afin de contribuer au respect de l'environnement, merci de n'imprimer ce courriel qu'en c" + r"as de n[ée]cessit[ée]" + ), + ( + r"^.{0,10}(?:Envoy[ée]|Numeris[ée]|Partag[ée]) de(?:puis)?\s*(?:mon)?\s*(?:mobile|smartphone|appareil|" + r"\biP.|Galaxy|Yahoo|T[ée]l[ée]phone|(?:l'application))" + ), + r"^.{0,25}pour Android.{0,5}$", + r"^.{0,5}Envoy[ée] avec.{0,10}$", + r"^.{0,5}Envoy[ée] [àa] partir de.{0,35}$", + r"^.{0,2}Courrier.{0,2}$", + r"^.{0,2}Pour Windows.{0,2}$", + r"^.{0,10}Scann[ée] avec.{,30}$", + r"^.{,3}sans virus.{,3}$", + # English + r"^.{0,5}Sent with .*", + r"^.{0,5}Sent from my .*", + ] + + return diclaimer_regex_list + miscellaneous_footer_regex + + @Tag + def THANKS(self) -> Union[str, List[str], re.Pattern]: + """ + Tag associated with email thanks sentences. + Ex: "Merci beaucoup" + """ + thanks_expressions = [ + r"(re)?(merci(e|ant)?(\s(d'|par)\s?avance)?)", + r"thanks?( you)?", + r"thx", + ] + thanks_pattern = r"\b(" + "|".join(thanks_expressions) + r")\b" + + exception_expressions = [ + r"de", + r"d", + r"mais", + r"cependant", + r"par contre", + r"toutefois", + r"pourtant", + r"but", + r"however", + ] + exception_pattern = r" *\b(" + "|".join(exception_expressions) + ") *" + + return [ + r"^.{0,20}" + thanks_pattern + r"(?!.{0,5}" + exception_pattern + r").{0,40}(?!.)", + ] + + @Tag + def SIGNATURE(self) -> Union[str, List[str], re.Pattern]: + """ + Tag associated with email signature sentences. + Ex: "Tel : 0600000000" + """ + + # Jobs lines + jobs = [ + r"associations?", + r"(?:\w+ )?analyste?", + r"conducteur", + r"[ée]quipe", + r"soci[ée]t[ée]", + r"secr[ée]taires?", + r"secr[ée]tariats?", + r"directions?", + r"services?", + r"assistante?", + r"gestionnaire", + r"technicienn?e?", + r"conseill[eè]re?", + r"maitres?", + r"avocats?", + r"s\.a\.s\.", + r"squad", + r"charg[ée]e? d['e]\s*\w+(?:\s*\w+){,2}", + # English + r"Lead", + r"Chief", + r"VP", + r"C.O", + ] + job_regex = r"\b(" + r"|".join(jobs) + r")\b" + line_with_known_job = rf"(?:^ *.{{,5}}{job_regex}( +{self.word_block(6)})?(?:\n+|$))" + + # Street address regex + street_word_list = [ + r"all[ée]e", + r"avenue", + r"boulevard", + r"chemin", + r"cours", + r"[ée]splanade", + r"h?ameau", + r"impasse", + r"lotissement", + r"passage", + r"place", + r"square", + r"quai", + r"r[ée]sidence", + r"rue", + r"sentier", + ] + street_word_pattern = "(" + "|".join(street_word_list) + ")" + + # A number (house number) or range, free words (up to 2), an equivalent of street (rue, allée, etc) + # and more free words (up to 5), free chars at the end (up to 2) + street_address_regex = ( + r"^ *\d+(?:-\d+)?(?:bis|ter)?,? +(\w+\b *){,2}\b" + street_word_pattern + r"\b *(\w+\b[ -]*){,5}.{,2}$" + ) + + # Email address + email_address_regex = r"(?:[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})" + + return [ + # Phone / Fax + r"(?:^.{,3}(?:T[ée]l(?:[ée]phone)?\.?|mobile|phone|num[ée]ro|ligne).{,20}(?: *(?:\n+|$)))", + ( + r"^(.{,10}:? ?\(?((?:\+|00)\(?33\)?(?: ?\(0\))?|0)\s*[1-" + r"9]([\s.-]*\d{2}){4}.{,10}){,3}" + rf"({email_address_regex}.{{,10}})?" + "( *(\n+|$))" + ), + r"^.{,3}(T[ée]l[ée]?(phone|copie)?|Fax|mobile|phone|num[ée]ro|ligne).{,20}$", + r"^.{,3}Appel non surtax[ée].{,3}$", + # Street / Address / Post code + street_address_regex, + r"^.{,3}Adresse.{,3}$", + r"(?: *(?:BP|Boite Postale) *\d{,6} .{,30}(?: *(?:\n|$)))", + r"(?: *\b\d{5}\b(?: *(?:\n|$))?(?: *(?:\S+(?: +\S+){,5})? *)(?: *(?:\n|$)))", + # postal address with only street name and number : EX : 9/11 rue Jeanne d'Arc + r"^.{,3}\d+(?:[ /-]?\d+)? " + street_word_pattern + r".{,50}$", + # EX number 2 : 23, rue de la Monnaie + r"^.{,5}" + street_word_pattern + r".{,50}$", + # postal address with only postal code and city name : EX : 76000 ROUEN + r"^.{,3}\d{5}[\xa0| ][A-Z].{,5}$", + # Known job title + line_with_known_job, + # Contact + r"^.{,15}Pour nous contacter.{,15}$", + r"^.{,3}Contact (e.mail|t[eé]l[eé]phone).{,3}$", + # email adress EX: Adresse mail : cyrimmmman80@gmail.com + r"^.{,3}([Aa]dresse mail|Mail).{,3}" + email_address_regex + r"$", + # address with date EX : Torroella de Montgri, le 5 avril 2023 + r"^[A-Za-z]+(?: [A-Za-z]+)*, le \d{1,2} [A-Za-z]+ \d{4}.{,3}$", + ] + + def post_process_tags(self, tags: List[Tuple[str, str]]) -> List[Tuple[str, str]]: + """ + Method to apply tagging rules posterior to the standard regex tagging. + + Parameters + ---------- + tags: List[Tuple[str, str]] + Original tags + + Returns + ------- + _: List[Tuple[str, str]] + Post-processed tags + """ + # Signature lines containing first/last name + tags = self.detect_name_signature(tags) + + return tags + + def detect_name_signature(self, tags: List[Tuple[str, str]]) -> List[Tuple[str, str]]: + """ + Method to detect lines containing First name / Surname + Ex: Mr Joe Dupond + + Parameters + ---------- + tags: List[Tuple[str, str]] + Original tags + + Returns + ------- + _: List[Tuple[str, str]] + Post processed tags + """ + # First name / Last name Signatures + capitalized_words: str = r"[A-Z][-'A-za-zÀ-ÿ]{,10}" + particles: str = r"le|d[ei]|d'?|v[oa]n|del" + line_with_name: str = ( + rf"(?:^[ >]*{capitalized_words}(?:-{capitalized_words})?(?:(?: +(?:{particles}" + rf"))?\.? {{,2}}{capitalized_words}(?:-{capitalized_words})?){{1,4}}\.? *(?:\n+|$))" + ) + + # Forbidden words (lowercase) + forbidden_words: Set[str] = {"urgent", "attention"} + + new_tags: List[Tuple[str, str]] = list() + for tag, text in tags: + if tag == self.default_tag: + match = re.match(line_with_name, text) + has_forbidden_words: bool = bool(forbidden_words.intersection(text.lower().split())) + + if match and not has_forbidden_words: + tag = "SIGNATURE_NAME" + + new_tags.append((tag, text)) + + return new_tags + + +class TransferredEmailProcessor(MelusineTransformer): + """ + Processing specific to transferred emails such as: + - Extracting the email address of the original sender (before transfer) + - Removing empty messages related to the transfer action + """ + + def __init__( + self, + output_columns: Iterable[str] = ("messages", "det_clean_from"), + tags_to_ignore: Iterable[str] = ("FOOTER", "SIGNATURE"), + messages_column: str = "messages", + ): + """ + Parameters + ---------- + output_columns: str + tags_to_ignore: Iterable[str] + If a message contains only tags in this list, it will be ignored + messages_column: DataFrame column containing a list of Message instances + """ + self.messages_column = messages_column + input_columns = [self.messages_column] + + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + func=self.process_transfered_mail, + ) + + self.tags_to_ignore = tuple(tags_to_ignore) + self.json_exclude_list.append("input_columns") + + @property + def email_pattern(self) -> str: + """ + Regex pattern to detect an email address. + """ + return r"\w+(?:[-+.']\w+)*@\w+(?:[-.]\w+)*\.\w+(?:[-.]\w+)*" + + @property + def meta_email_address_regex(self) -> str: + """ + Regex to extract an email address from an email transition pattern. + Ex: + De: jane@gmail.fr A: joe@gmail.fr Envoyé à: 11h22 + => jane@gmail.fr + """ + # De: jane@gmail.fr A: joe@gmail.fr Envoyé à: 11h22 + # De:\nJane \nA: joe@gmail.fr Envoyé à: 11h22 + # De: Jane "jane@gmail.fr"\nA: joe@gmail.fr Envoyé à: 11h22 + start_pattern = r"(?:Message de\b|\bDe\b|Exp[ée]diteur|\bFrom\b) ?\n? ?: *\n?(?:.{,30}[ <\"])?" + email_pattern = self.email_pattern + end_pattern = r"(?: *<.*>)?(?:.{,5}(?:$|\n)|.{,5}(?:Envoy|A|Sent|À))" + meta_pattern_1 = f"{start_pattern}({email_pattern}){end_pattern}" + + # Le 28 févr. 2023 à 10:33, joee@gmail.fr a écrit : + # Le dim., 28 févr. 2023 à 10:33, joee@gmail.fr a écrit : + # Le 01/01/2001 10:33, Joe a écrit : + start_pattern = r"Le (?:\d.{,15}[aà]|[a-z]{3}.{,50}[aà]|\d{2}/\d{2}/\d{4}) \d{,2}[:hH]\d{,2}.{,30} ? a [ée]crit ?:" + meta_pattern_2 = f"{start_pattern}({email_pattern}){end_pattern}" + + # Assemble patterns + meta_pattern = "|".join([meta_pattern_1, meta_pattern_2]) + + return meta_pattern + + def process_transfered_mail(self, message_list: List[Message]) -> Tuple[List[Message], Optional[str]]: + """ + Run all transformations related to transfer emails. + + Args: + message_list: Emails input data + + Returns: + message_list: List of messages in the conversation + clean_address_from: Processed sender email address + """ + clean_address_from: Optional[str] = None + + # Filter out transfer message (contains only irrelevant tags) + message_list = self.filter_message_list(message_list) + + # Extract email address data from transition pattern + top_message = message_list[0] + extracted_address_from: Optional[str] = self.extract_email_address(top_message) + + # If no address + if extracted_address_from: + clean_address_from = extracted_address_from + + return message_list, clean_address_from + + def extract_email_address(self, message: Message) -> Optional[str]: + """ + Extract sender email address from message meta (transition pattern). + + Args: + message: Message with text and metadata + + Returns: + extracted_address_from: Extracted sender address if available + """ + extracted_address_from = None + + if message.meta: + # Extract email address + match_list = re.findall(self.meta_email_address_regex, message.meta) + + # Filter out empty matches + match_list = [match for match_group in match_list for match in match_group if match] + + # Sanity check on address + if match_list and "@" in match_list[0]: + extracted_address_from = match_list[0] + + return extracted_address_from + + def filter_message_list(self, message_list: List[Message]) -> List[Message]: + """ """ + top_message = message_list[0] + + parts = top_message.extract_parts() + contains_only_tags_to_ignore = all([tag.startswith(self.tags_to_ignore) for tag, _ in parts]) + + if contains_only_tags_to_ignore and (len(message_list) > 1): + message_list = message_list[1:] + + return message_list + + +class DeterministicTextFlagger(MelusineTransformer): + """ + Class to flag text patterns such as : + "new york" => "new_york" + """ + + def __init__( + self, + text_flags: Dict[str, Any], + input_columns: str = "text", + output_columns: str = "text", + remove_multiple_spaces: bool = True, + add_spaces: bool = True, + ): + """ + Parameters + ---------- + text_flags: Dict[str, Any] + Dict containing flag name as key and regex pattern as value + add_spaces: bool + If true, add spaces around flags + remove_multiple_spaces: bool + If True, remove multiple spaces after flagging + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + func=self.flag_text, + ) + self.text_flags = text_flags + self.add_spaces = add_spaces + self.remove_multiple_spaces = remove_multiple_spaces + + @staticmethod + def default_flag_text( + text: str, + flag_dict: Dict[str, str], + add_spaces: bool = True, + remove_multiple_spaces: bool = True, + ) -> str: + """ + Method to apply flagging on a text. + General flagging: replace remarkable expressions by a flag + Ex: 0123456789 => flag_phone_ + + Parameters + ---------- + flag_dict: Dict[str, str] + Flagging dict with regex as key and replace_text as value + text: str + Text to be flagged + add_spaces: bool + If true, add spaces around flags + remove_multiple_spaces: bool + If True, remove multiple spaces after flagging + + Returns + ------- + text: str + Flagged text + """ + # Support for nested flag dicts + for key, value in flag_dict.items(): + if isinstance(value, dict): + text = DeterministicTextFlagger.default_flag_text( + text=text, + flag_dict=value, + add_spaces=add_spaces, + remove_multiple_spaces=remove_multiple_spaces, + ) + else: + # Add spaces to avoid merging words with flags + if add_spaces: + replace_value = " " + value + " " + else: + replace_value = value + text = re.sub(key, replace_value, text, flags=re.I) + + if remove_multiple_spaces: + text = re.sub(" +", " ", text) + return text.strip() + + def flag_text(self, text: str) -> str: + """ + Method to flag text. + + Parameters + ---------- + text: str + Text to be flagged + + Returns + ------- + _: str + Flagged text + """ + # Join collocations + text = self.default_flag_text( + text, + self.text_flags, + add_spaces=self.add_spaces, + remove_multiple_spaces=self.remove_multiple_spaces, + ) + + return text + + +class Cleaner(MelusineTransformer): + """ + Class to clean text columns + """ + + def __init__( + self, + substitutions: Dict[str, Any], + input_columns: str = "text", + output_columns: str = "text", + ): + """ + Parameters + ---------- + substitutions: Dict[str, Any] + Dict containing replace pattern and replacement value + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + func=self.clean, + ) + self.substitutions = substitutions + + def clean(self, text: str) -> str: + """ + Method to clean text. + + Parameters + ---------- + text: str + Text to be flagged + + Returns + ------- + _: str + Flagged text + """ + # Join collocations + text = DeterministicTextFlagger.default_flag_text( + text, + self.substitutions, + add_spaces=False, + remove_multiple_spaces=True, + ) + + return text + + +class DateProcessor(MelusineTransformer): + + """ + Parse string date to iso format string date + """ + + ISO_FORMAT = "%Y-%m-%d" + LANGUAGE = ["en_US", "fr_FR", "es_ES", "it_IT", "nl_NL", "de_DE", "tr_TR"] + CALENDAR_ABBR = { + "lu,": "lun", + "ma,": "mar", + "me,": "mer", + "je,": "jeu", + "jeudii": "jeudi", + "ve,": "ven", + "sa,": "sam", + r"^di,": "dim", + r"\sjune\s": " jun ", + r"\sjuly\s": " jul ", + } + FRENCH_CALENDAR_ABBR = { + "fev ": "févr ", + "jun": "juin ", + "jul ": "juil ", + "aout": "août", + "sep ": "sept ", + "dec ": "déc ", + "iuillet": "juillet", + "juihet": "juillet", + "sptembre": "septembre", + "seplentre": "septembre", + "septembm": "septembre", + "septernbre": "septembre", + "juillat": "juillet", + "decembre": "décembre", + "fevrier": "février", + } + PATTERN_DATE = { + r"(\d{4}-\d{2}-\d{2})": "YYYY-MM-DD", + r"(\d{4}/\d{2}/\d{2})": "YYYY/MM/DD", + r"(\d{2}-\d{2}-\d{4})": "DD-MM-YYYY", + r"(\d{2}/\d{2}/\d{4})": "DD/MM/YYYY", + r"([A-zÀ-ÿ]{4,10}, \d{1,2} [A-zÀ-ÿ]{3,4}, \d{4})": "dddd, DD MMM, YYYY", + r"([A-zÀ-ÿ]{4,10}, \d{1,2} [A-zÀ-ÿ]{3,4} \d{4})": "dddd, DD MMM YYYY", + r"([A-zÀ-ÿ]{4,10} \d{1,2} [A-zÀ-ÿ]{3,4} \d{4})": "dddd DD MMM YYYY", + r"([A-zÀ-ÿ]{4,10}, \d{1,2} [A-zÀ-ÿ]{,10}, \d{4})": "dddd, DD MMMM, YYYY", + r"([A-zÀ-ÿ]{4,10}, \d{1,2} [A-zÀ-ÿ]{,10} \d{4})": "dddd, DD MMMM YYYY", + r"([A-zÀ-ÿ]{4,10} \d{1,2} [A-zÀ-ÿ]{,10} \d{4})": "dddd DD MMMM YYYY", + r"(\d{1,2} [A-zÀ-ÿ]{3,4} \d{4})": "DD MMM YYYY", + r"(\d{1,2} [A-zÀ-ÿ]{3,4}, \d{4})": "DD MMM, YYYY", + r"(\d{1,2} [A-zÀ-ÿ]{,10} \d{4})": "DD MMMM YYYY", + r"(\d{1,2} [A-zÀ-ÿ]{,10}, \d{4})": "DD MMMM, YYYY", + r"([A-zÀ-ÿ]{3}, \d{1,2} [A-zÀ-ÿ]{3}, \d{4})": "ddd, DD MMM, YYYY", + r"([A-zÀ-ÿ]{3}, \d{1,2} [A-zÀ-ÿ]{3} \d{4})": "ddd, DD MMM YYYY", + r"([A-zÀ-ÿ]{3}\. \d{1,2} [A-zÀ-ÿ]{,10}\.? \d{4})": "ddd DD MMM YYYY", + r"([A-zÀ-ÿ]{2}, \d{1,2} [A-zÀ-ÿ]{3}, \d{4})": "ddd, DD MMM, YYYY", + r"([A-zÀ-ÿ]{2}, \d{1,2} [A-zÀ-ÿ]{3} \d{4})": "ddd, DD MMM YYYY", + r"(\d{1,2} [A-zÀ-ÿ]{3} \d{4})": "DD MMM YYYY", + } + + def __init__( + self, + input_columns: str = "date", + output_columns: str = "date", + ) -> None: + """ + Parameters + ---------- + input_columns: str + Input columns for the transform operation + output_columns: str + Outputs columns for the transform operation + """ + super().__init__( + input_columns=input_columns, + output_columns=output_columns, + func=self.parse_date_to_iso, + ) + + @classmethod + def parse_date_to_iso(cls, date_: str) -> Optional[str]: + """ + This function use the package arrow to convert a date from string format with any + type of format (i.e. vendredi 8 juillet 2020 -> 2020-07-08) + This package is prefered to datetime because datetime use locale settings and + arrow do not use locale setting + Visit https://arrow.readthedocs.io/en/latest/ for more information + + Parameters + ---------- + date_: str + date read from mail with any format + + Returns + ------- + date_: str + date_ as string with iso format (YYYY-MM-DD) + """ + # Initialization + matched_group: Optional[str] = None + date_ = date_ or "" + date_ = date_.lower() + + for pattern, format_ in cls.PATTERN_DATE.items(): + pattern_compiled = re.compile(pattern) + matched_date_format = pattern_compiled.search(date_) + if matched_date_format is not None: + matched_group = matched_date_format.group() + + # Replace single digit by 0+digit (i.e. 9 -> 09) + matched_group = cls.process_single_digit(matched_group, pattern) + + # Replace some known abbreviations + for key, value in cls.CALENDAR_ABBR.items(): + matched_group = re.sub(key, value, matched_group) + + matched_group = cls.convert_to_iso_format(matched_group, format_) + + # The format matched so we end the for loop + break + + return matched_group + + @staticmethod + def process_single_digit(matched_group: str, pattern: str) -> str: + """ + Replace single digit by 0+digit + i.e.: 9 -> 09 + """ + if r"\d{1,2}" in pattern: + # Case when digit is in the middle of the string + number_searched = re.search(r"\s\d\s", matched_group) + if number_searched is not None: + number = number_searched.group().strip() + matched_group = matched_group.replace(f" {number} ", f" 0{number} ") + # Case when digit is the first char of the string + number_searched = re.search(r"^\d\s", matched_group) + if number_searched is not None: + number = number_searched.group().strip() + matched_group = re.sub(r"^\d\s", f"0{number} ", matched_group) + return matched_group + + @classmethod + def convert_to_iso_format(cls, matched_group: str, format_: str) -> Optional[str]: + """ + Try to convert the date found as any string form to ISO format + """ + # In case we are working with abbreviations + if "ddd" in format_ or "MMM" in format_: + for lang in cls.LANGUAGE: + try: + matched_group_copy = matched_group + if lang == "fr_FR": + matched_group_copy = re.sub("[^A-zÀ-ÿ0-9:, ]+", "", matched_group_copy) + for key, value in cls.FRENCH_CALENDAR_ABBR.items(): + matched_group_copy = re.sub(key, value, matched_group_copy) + matched_group = arrow.get(matched_group_copy, format_, locale=lang).datetime.strftime( + cls.ISO_FORMAT + ) + break + except arrow.parser.ParserMatchError: + pass + + # ISO Format + if re.search(r"\d{4}-\d{2}-\d{2}", matched_group): + return matched_group + + # We failed finding the abbreviation so we give up and return None + return None + + # We were not working with abbrevations so we use arrow package + return arrow.get(matched_group, format_).datetime.strftime(cls.ISO_FORMAT) From 734665d138c8b60f385c87c1752f9bc2c562ade4 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:39:25 +0100 Subject: [PATCH 14/37] :white_check_mark: Refactored melusine tests --- tests/backend/__init__.py | 0 tests/backend/test_backends.py | 151 ++++ tests/base/__init__.py | 0 tests/base/test_melusine_detectors.py | 32 + tests/base/test_melusine_regex.py | 153 ++++ tests/base/test_melusine_transformers.py | 36 + tests/base/test_message.py | 187 +++++ tests/conf/__init__.py | 0 tests/conf/test_config.py | 94 +++ tests/conftest.py | 95 +++ tests/data/__init__.py | 0 tests/data/test_data.py | 19 + tests/detectors/__init__.py | 0 tests/detectors/test_reply_detector.py | 239 ++++++ tests/detectors/test_thanks_detector.py | 142 ++++ tests/detectors/test_transfer_detector.py | 344 ++++++++ .../detectors/test_vacation_reply_detector.py | 210 +++++ tests/docs/__init__.py | 0 tests/docs/test_configurations.py | 7 + tests/docs/test_detectors.py | 23 + tests/docs/test_getting_started.py | 10 + tests/fixtures/__init__.py | 0 tests/fixtures/backend.py | 284 +++++++ tests/fixtures/basic_emails.py | 75 ++ tests/fixtures/docs.py | 16 + tests/fixtures/pipelines.py | 41 + tests/fixtures/processors.py | 29 + tests/functional/__init__.py | 0 tests/functional/test_emails_fixtures.py | 493 +++++++++++ tests/functional/test_emails_generic.py | 15 + tests/huggingface/__init__.py | 0 .../huggingface/test_basic_classification.py | 63 ++ tests/io/__init__.py | 0 tests/io/test_io_mixin.py | 50 ++ tests/pipeline/__init__.py | 0 tests/pipeline/test_pipeline.py | 157 ++++ tests/pipeline/test_pipeline_basic.py | 314 +++++++ tests/pipeline/test_pipeline_testing.py | 52 ++ tests/pipeline/test_pipeline_with_ml.py | 0 tests/processors/__init__.py | 0 tests/processors/test_content_tagger.py | 764 ++++++++++++++++++ tests/processors/test_processors.py | 569 +++++++++++++ tests/regex/__init__.py | 0 tests/regex/test_builtin_regex.py | 26 + tests/utils/__init__.py | 0 tests/utils/test_utils.py | 6 + 46 files changed, 4696 insertions(+) create mode 100644 tests/backend/__init__.py create mode 100644 tests/backend/test_backends.py create mode 100644 tests/base/__init__.py create mode 100644 tests/base/test_melusine_detectors.py create mode 100644 tests/base/test_melusine_regex.py create mode 100644 tests/base/test_melusine_transformers.py create mode 100644 tests/base/test_message.py create mode 100644 tests/conf/__init__.py create mode 100644 tests/conf/test_config.py create mode 100644 tests/conftest.py create mode 100644 tests/data/__init__.py create mode 100644 tests/data/test_data.py create mode 100644 tests/detectors/__init__.py create mode 100644 tests/detectors/test_reply_detector.py create mode 100644 tests/detectors/test_thanks_detector.py create mode 100644 tests/detectors/test_transfer_detector.py create mode 100644 tests/detectors/test_vacation_reply_detector.py create mode 100644 tests/docs/__init__.py create mode 100644 tests/docs/test_configurations.py create mode 100644 tests/docs/test_detectors.py create mode 100644 tests/docs/test_getting_started.py create mode 100644 tests/fixtures/__init__.py create mode 100644 tests/fixtures/backend.py create mode 100644 tests/fixtures/basic_emails.py create mode 100644 tests/fixtures/docs.py create mode 100644 tests/fixtures/pipelines.py create mode 100644 tests/fixtures/processors.py create mode 100644 tests/functional/__init__.py create mode 100644 tests/functional/test_emails_fixtures.py create mode 100644 tests/functional/test_emails_generic.py create mode 100644 tests/huggingface/__init__.py create mode 100644 tests/huggingface/test_basic_classification.py create mode 100644 tests/io/__init__.py create mode 100644 tests/io/test_io_mixin.py create mode 100644 tests/pipeline/__init__.py create mode 100644 tests/pipeline/test_pipeline.py create mode 100644 tests/pipeline/test_pipeline_basic.py create mode 100644 tests/pipeline/test_pipeline_testing.py create mode 100644 tests/pipeline/test_pipeline_with_ml.py create mode 100644 tests/processors/__init__.py create mode 100644 tests/processors/test_content_tagger.py create mode 100644 tests/processors/test_processors.py create mode 100644 tests/regex/__init__.py create mode 100644 tests/regex/test_builtin_regex.py create mode 100644 tests/utils/__init__.py create mode 100644 tests/utils/test_utils.py diff --git a/tests/backend/__init__.py b/tests/backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/backend/test_backends.py b/tests/backend/test_backends.py new file mode 100644 index 0000000..a6d8ab8 --- /dev/null +++ b/tests/backend/test_backends.py @@ -0,0 +1,151 @@ +import numpy as np +import pandas as pd +import pytest + +from melusine.backend import backend +from melusine.backend.dict_backend import DictBackend +from melusine.backend.pandas_backend import PandasBackend +from melusine.processors import Normalizer + + +def test_reset_backend(): + """Test""" + dict_data = {"input_col": "àçöbïù"} + df_data = pd.DataFrame([dict_data]) + processor = Normalizer(input_columns="input_col", output_columns="output_col") + + dict_backend = DictBackend() + backend.reset(dict_backend) + dict_out = processor.transform(dict_data) + assert isinstance(dict_out, dict) + + backend.reset() + df_out = processor.transform(df_data) + assert isinstance(df_out, pd.DataFrame) + + +def test_unknown_backend(): + with pytest.raises(ValueError): + backend.reset("unknown") + + +def test_backend_error(): + backend._backend = None + with pytest.raises(AttributeError): + _ = backend.backend + backend.reset() + + +def test_check_debug_flag_dict(): + dict_data = {"input_col": "àçöbïù"} + dict_backend = DictBackend() + + assert dict_backend.check_debug_flag(dict_data) is False + + dict_data = {"debug": False} + assert dict_backend.check_debug_flag(dict_data) is False + + dict_data = {"debug": True} + assert dict_backend.check_debug_flag(dict_data) is True + + +def test_check_debug_flag_pandas(): + df = pd.DataFrame([{"input_col": "àçöbïù"}]) + pandas_backend = PandasBackend() + + assert pandas_backend.check_debug_flag(df) is False + + df.debug = False + assert pandas_backend.check_debug_flag(df) is False + + df.debug = True + assert pandas_backend.check_debug_flag(df) is True + + +def test_add_fields_dict(): + dict_data1 = {"col1": 1, "col8": 8} + dict_data2 = {"col1": 10, "col2": 2} + dict_backend = DictBackend() + + data = dict_backend.add_fields(dict_data1, dict_data2) + assert data == {"col1": 10, "col2": 2, "col8": 8} + + +def test_dict_backend(backend_base_data, backend_testcase): + """Test""" + expected_data = backend_testcase["expected_data"] + func = backend_testcase["func"] + input_columns = backend_testcase["input_columns"] + output_columns = backend_testcase["output_columns"] + kwargs = backend_testcase.get("kwargs", dict()) + + dict_backend = DictBackend() + n_values = len(list(backend_base_data.values())[0]) + + for i in range(n_values): + data_dict = {key: backend_base_data[key][i] for key in backend_base_data} + expected_data_dict = {key: expected_data[key][i] for key in expected_data} + + data_dict_transform = dict_backend.apply_transform( + data=data_dict, func=func, input_columns=input_columns, output_columns=output_columns, **kwargs + ) + assert data_dict_transform == expected_data_dict + + +def test_dict_backend_impossible_situation(): + dict_backend = DictBackend() + with pytest.raises(ValueError): + _ = dict_backend.apply_transform( + data={"a": 0}, func=lambda x: x + 1, input_columns=["int_col"], output_columns=None + ) + + +@pytest.mark.parametrize("progress_bar", [False, True]) +def test_pandas_backend(backend_base_data, backend_testcase, progress_bar): + """Test""" + expected_data = backend_testcase["expected_data"] + func = backend_testcase["func"] + input_columns = backend_testcase["input_columns"] + output_columns = backend_testcase["output_columns"] + kwargs = backend_testcase.get("kwargs", dict()) + + pandas_backend = PandasBackend(progress_bar=progress_bar, workers=1) + + df_base = pd.DataFrame(backend_base_data) + + df_expected = pd.DataFrame(expected_data) + df_transform = pandas_backend.apply_transform( + data=df_base, func=func, input_columns=input_columns, output_columns=output_columns, **kwargs + ) + pd.testing.assert_frame_equal(df_transform, df_expected) + + +def test_pandas_backend_multiprocess(backend_base_data, backend_testcase): + """Test""" + expected_data = backend_testcase["expected_data"] + func = backend_testcase["func"] + input_columns = backend_testcase["input_columns"] + output_columns = backend_testcase["output_columns"] + kwargs = backend_testcase.get("kwargs", dict()) + + pandas_backend = PandasBackend(progress_bar=False, workers=2) + + # Test on a small dataset (does not trigger multiprocessing) + df_base = pd.DataFrame(backend_base_data) + df_expected = pd.DataFrame(expected_data) + + df_transform = pandas_backend.apply_transform( + data=df_base, func=func, input_columns=input_columns, output_columns=output_columns, **kwargs + ) + pd.testing.assert_frame_equal(df_transform, df_expected) + + # Augment dataset size to trigger multiprocessing (preserve column type) + df_long = pd.DataFrame({col: value for col in df_base for value in [np.repeat(df_base[col].values, 3, axis=0)]}) + df_expected_long = pd.DataFrame( + {col: value for col in df_expected for value in [np.repeat(df_expected[col].values, 3, axis=0)]} + ) + + df_transform = pandas_backend.apply_transform( + data=df_long, func=func, input_columns=input_columns, output_columns=output_columns, **kwargs + ) + pd.testing.assert_frame_equal(df_transform, df_expected_long) diff --git a/tests/base/__init__.py b/tests/base/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/base/test_melusine_detectors.py b/tests/base/test_melusine_detectors.py new file mode 100644 index 0000000..1b379f6 --- /dev/null +++ b/tests/base/test_melusine_detectors.py @@ -0,0 +1,32 @@ +from typing import Callable, List + +import pandas as pd + +from melusine.base import BaseMelusineDetector + + +class MyDetector(BaseMelusineDetector): + @property + def transform_methods(self) -> List[Callable]: + return [self.row_method, self.df_method] + + def row_method(self, row, debug_mode=False): + input_data = row[self.input_columns[0]] + row[self.output_columns[0]] = input_data + "_row" + return row + + def df_method(self, df, debug_mode=False): + df[self.output_columns[1]] = df[self.input_columns[0]].str.upper() + "_df" + return df + + +def test_detector_transform_dataframe_wise(): + + df = pd.DataFrame([{"input_col": "test0"}, {"input_col": "test1"}]) + detector = MyDetector(name="test_detector", input_columns=["input_col"], output_columns=["row_output", "df_output"]) + df = detector.transform(df) + + assert df["row_output"].iloc[0] == "test0_row" + assert df["row_output"].iloc[1] == "test1_row" + assert df["df_output"].iloc[0] == "TEST0_df" + assert df["df_output"].iloc[1] == "TEST1_df" diff --git a/tests/base/test_melusine_regex.py b/tests/base/test_melusine_regex.py new file mode 100644 index 0000000..6235bc1 --- /dev/null +++ b/tests/base/test_melusine_regex.py @@ -0,0 +1,153 @@ +from typing import Dict, List, Optional, Union + +import pytest + +from melusine.base import MelusineRegex + + +class VirusRegex(MelusineRegex): + """ + Detect computer viruses but not software bugs. + """ + + @property + def positive(self) -> Union[str, Dict[str, str]]: + return r"virus" + + @property + def neutral(self) -> Optional[Union[str, Dict[str, str]]]: + return dict( + NEUTRAL_MEDICAL_VIRUS="corona virus", + NEUTRAL_INSECT="ladybug", + ) + + @property + def negative(self) -> Optional[Union[str, Dict[str, str]]]: + return dict( + NEGATIVE_BUG="bug", + ) + + @property + def match_list(self) -> List[str]: + return [ + "This email contains a virus", + "There is a virus in the ladybug software", + "The corona virus is not a computer virus", + ] + + @property + def no_match_list(self) -> List[str]: + return [ + "This process just had a bug", + "This is a bug not a virus", + "There are ladybugs on the windows", + ] + + +def test_erroneous_substitution_pattern(): + with pytest.raises(ValueError): + regex = VirusRegex(substitution_pattern="12345") + + +def test_method_test(): + regex = VirusRegex() + regex.test() + assert True + + +def test_match_method(): + regex = VirusRegex() + match_data = regex("The computer virus in the ladybug software caused a bug in the corona virus dashboard") + + assert match_data[MelusineRegex.MATCH_RESULT] is False + assert match_data[MelusineRegex.POSITIVE_MATCH_FIELD] == { + "DEFAULT": [{"match_text": "virus", "start": 13, "stop": 18}] + } + assert match_data[MelusineRegex.NEUTRAL_MATCH_FIELD] == { + "NEUTRAL_INSECT": [{"match_text": "ladybug", "start": 26, "stop": 33}], + "NEUTRAL_MEDICAL_VIRUS": [{"match_text": "corona virus", "start": 63, "stop": 75}], + } + assert match_data[MelusineRegex.NEGATIVE_MATCH_FIELD] == { + "NEGATIVE_BUG": [{"match_text": "bug", "start": 52, "stop": 55}] + } + + +def test_direct_match_method(): + regex = VirusRegex() + + bool_match_result = regex.get_match_result("The computer virus") + + assert bool_match_result is True + + bool_match_result = regex.get_match_result( + "The computer virus in the ladybug software caused a bug in the corona virus dashboard" + ) + + assert bool_match_result is False + + +def test_describe_method(capfd): + """ + Test describe method. + """ + regex = VirusRegex() + + # Negative match on bug (group NEGATIVE_BUG) and ignore ladybug and corona virus + regex.describe("The computer virus in the ladybug software caused a bug in the corona virus dashboard") + out, err = capfd.readouterr() + assert "NEGATIVE_BUG" in out + assert "start" not in out + + # Same but include match positions + regex.describe( + "The computer virus in the ladybug software caused a bug in the corona virus dashboard", + position=True, + ) + out, err = capfd.readouterr() + assert "match result is : NEGATIVE" in out + assert "NEGATIVE_BUG" in out + assert "start" in out + + regex.describe("This is a dangerous virus") + out, err = capfd.readouterr() + assert "match result is : POSITIVE" in out + assert "start" not in out + + regex.describe("Nada") + out, err = capfd.readouterr() + assert "The input text did not match anything" in out + + +def test_repr(): + """ + Test __repr__ method + """ + regex = VirusRegex() + assert "VirusRegex" in repr(regex) + + +def test_default_neutral_and_negative(): + """ + Test a regex class using default neutral and negative properties. + """ + + class SomeRegex(MelusineRegex): + """ + Test class. + """ + + @property + def positive(self): + return r"test" + + @property + def match_list(self): + return ["test"] + + @property + def no_match_list(self): + return ["bip bip"] + + regex = SomeRegex() + assert regex.neutral is None + assert regex.negative is None diff --git a/tests/base/test_melusine_transformers.py b/tests/base/test_melusine_transformers.py new file mode 100644 index 0000000..b80b57b --- /dev/null +++ b/tests/base/test_melusine_transformers.py @@ -0,0 +1,36 @@ +import pandas as pd +import pytest + +from melusine.base import MelusineTransformer, TransformError + + +class FakeClass(MelusineTransformer): + def __init__(self): + super().__init__(input_columns="input_col", output_columns="output_col", func=self.my_method) + + def my_method(self, df, debug_mode=False): + raise ValueError + + +def test_transform_error(): + df = pd.DataFrame([{"input_col": "test"}]) + instance = FakeClass() + + with pytest.raises(TransformError, match="FakeClass.*my_method.*input_col"): + _ = instance.transform(df) + + +def test_from_config_from_key_error(): + """Unit test""" + + with pytest.raises(ValueError): + _ = MelusineTransformer.from_config(config_key="key", config_dict={"foo": "bar"}) + + +def test_missing_func(): + """Unit test""" + df = pd.DataFrame([{"a": [1, 2, 3]}]) + transformer = MelusineTransformer(input_columns=["a"], output_columns=["b"]) + + with pytest.raises(AttributeError): + transformer.transform(df) diff --git a/tests/base/test_message.py b/tests/base/test_message.py new file mode 100644 index 0000000..4cea0c2 --- /dev/null +++ b/tests/base/test_message.py @@ -0,0 +1,187 @@ +import re + +from melusine import config +from melusine.message import Message + + +def test_message_repr(): + + message = Message(text="Hello") + + assert re.search(r"meta='NA'", repr(message)) + assert re.search(r"text='Hello'", repr(message)) + + message = Message(text="Hello", meta="someone@domain.fr") + + assert re.search(r"meta='someone@domain.fr'", repr(message)) + assert re.search(r"text='Hello'", repr(message)) + + +def test_message_has_tags(): + + message = Message(text="Hello") + message.tags = [ + ("HELLO", "Bonjour"), + ("BODY", "Pouvez-vous"), + ("GREETINGS", "Cordialement"), + ] + + assert not message.has_tags(target_tags=["FOOTER"]) + assert message.has_tags(target_tags=["BODY"]) + assert message.has_tags(target_tags=["FOOTER", "HELLO"]) + + +def test_message_has_tags_stop_at(): + + message = Message(text="Hello") + message.tags = [ + ("HELLO", "Bonjour"), + ("GREETINGS", "Cordialement"), + ("BODY", "Blah Blah Blah"), + ] + + assert not message.has_tags(target_tags=["BODY"], stop_at=["GREETINGS"]) + + +def test_message_has_tags_no_tags(): + + message = Message(text="Hello") + + assert not message.has_tags(target_tags=["BODY"]) + + +def test_message_extract_parts(): + + message = Message(text="Hello") + message.tags = [ + ("HELLO", "Bonjour"), + ("BODY", "Pouvez-vous"), + ("GREETINGS", "Cordialement"), + ] + + assert message.extract_parts(target_tags={"BODY"}) == [("BODY", "Pouvez-vous")] + assert message.extract_parts(target_tags=["GREETINGS", "HELLO"]) == [ + ("HELLO", "Bonjour"), + ("GREETINGS", "Cordialement"), + ] + + +def test_message_extract_parts_stop(): + + message = Message(text="Hello") + message.tags = [ + ("HELLO", "Bonjour"), + ("FOOTER", "Envoyé depuis mon Iphone"), + ("GREETINGS", "Cordialement"), + ("BODY", "Blah Blah Blah"), + ] + + extracted = message.extract_parts(target_tags=["BODY"], stop_at=["FOOTER", "GREETINGS"]) + + assert extracted == [] + + +def test_message_extract_parts_no_tags(): + message = Message(text="Hello") + + assert not message.extract_parts(target_tags={"BODY"}) + + +def test_message_extract_last_body(): + + message = Message(text="Hello") + message.tags = [ + ("HELLO", "Bonjour"), + ("BODY", "Pouvez-vous"), + ("GREETINGS", "Cordialement"), + ] + + assert message.extract_last_body() == [("BODY", "Pouvez-vous")] + + +def test_str(): + # Arrange + message = Message(meta="Test\nmeta", text="Hello") + message.tags = [ + ("TAG", "ABC"), + ("TAAG", "ABCD"), + ("TAAAG", "ABCDE"), + ] + + expected_list = [ + r"=+ +Message +=+", + r"-+ +Meta +-+", + r"Test", + r"meta", + r"-+ +Text +-+", + r"ABC\.+TAG", + r"ABCD\.+TAAG", + r"ABCDE\.+TAAAG", + r"=+", + ] + + # Act + result = str(message).strip() + + # Assert + assert len(result.splitlines()) == len(expected_list) + for text_line, regex in zip(result.splitlines(), expected_list): + assert re.match(regex, text_line) + + +def test_str_no_meta(): + # Arrange + message = Message(text="Hello") + message.tags = [ + ("TAG", "ABC"), + ("TAAG", "ABCD"), + ("TAAAG", "ABCDE"), + ] + + expected_list = [ + r"=+ +Message +=+", + r"-+ +Meta +-+", + r"N/A", + r"-+ +Text +-+", + r"ABC\.+TAG", + r"ABCD\.+TAAG", + r"ABCDE\.+TAAAG", + r"=+", + ] + + # Act + result = str(message).strip() + + # Assert + assert len(result.splitlines()) == len(expected_list) + for text_line, regex in zip(result.splitlines(), expected_list): + assert re.match(regex, text_line) + + +def test_str_no_tags(): + # Arrange + message = Message(text="Hello") + + expected_list = [ + r"=+ +Message +=+", + r"-+ +Meta +-+", + r"N/A", + r"-+ +Text +-+", + r"Hello", + r"=+", + ] + + # Act + result = str(message).strip() + + # Assert + assert len(result.splitlines()) == len(expected_list) + for text_line, regex in zip(result.splitlines(), expected_list): + assert re.match(regex, text_line) + + +def test_str_no_conf(reset_melusine_config): + config.reset({"Test": "Test"}) + message = Message(text="test", tags=[("TEST TAG", "TEST TEXT")]) + print(message) + assert True diff --git a/tests/conf/__init__.py b/tests/conf/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conf/test_config.py b/tests/conf/test_config.py new file mode 100644 index 0000000..69fe595 --- /dev/null +++ b/tests/conf/test_config.py @@ -0,0 +1,94 @@ +import logging +import os +import re + +import pytest + +from melusine import config +from melusine._config import MelusineConfig, MelusineConfigError + + +def test_load_default_conf(caplog): + with caplog.at_level(logging.WARNING): + + test_conf = MelusineConfig() + test_conf.reset() + + assert test_conf + assert MelusineConfig.LOG_MESSAGE_DEFAULT_CONFIG in caplog.text + + +def test_load_conf_from_env_variable(caplog): + + try: + with caplog.at_level(logging.INFO): + test_conf = MelusineConfig() + os.environ[MelusineConfig.ENV_MELUSINE_CONFIG_DIR] = test_conf.DEFAULT_CONFIG_PATH + test_conf.reset() + + expected_config_path_log = MelusineConfig.LOG_MESSAGE_CONFIG_PATH.format( + config_path=test_conf.DEFAULT_CONFIG_PATH + ) + + assert test_conf + assert MelusineConfig.LOG_MESSAGE_CONFIG_FROM_ENV_VARIABLE in caplog.text + assert expected_config_path_log in caplog.text + assert MelusineConfig.LOG_MESSAGE_DEFAULT_CONFIG not in caplog.text + + finally: + del os.environ[MelusineConfig.ENV_MELUSINE_CONFIG_DIR] + + +def test_load_conf_from_config_path(caplog): + + with caplog.at_level(logging.INFO): + test_conf = MelusineConfig() + test_conf.reset(config_path=test_conf.DEFAULT_CONFIG_PATH) + + expected_config_path_log = MelusineConfig.LOG_MESSAGE_CONFIG_PATH.format( + config_path=test_conf.DEFAULT_CONFIG_PATH + ) + + assert test_conf + assert expected_config_path_log in caplog.text + assert MelusineConfig.LOG_MESSAGE_CONFIG_FROM_ENV_VARIABLE not in caplog.text + assert MelusineConfig.LOG_MESSAGE_DEFAULT_CONFIG not in caplog.text + + +def test_load_conf_from_config_dict(caplog): + + with caplog.at_level(logging.INFO): + test_conf = MelusineConfig() + test_conf.reset(config_dict={"my_key": "hello"}) + + assert test_conf["my_key"] == "hello" + + +def test_config_modif_error(): + + test_conf = MelusineConfig() + test_conf.reset(config_dict={"my_key": "hello"}) + + with pytest.raises(MelusineConfigError, match=re.escape(MelusineConfigError.CONST_CONFIG_ERROR_MESSAGE)): + test_conf["new_key"] = "hey" + + with pytest.raises(MelusineConfigError): + test_conf.pop() + + with pytest.raises(MelusineConfigError): + test_conf.popitem() + + +def test_shared_variable(): + + # Shared variable TEST_VAR specified in conf/shared.yaml + # Conf test_shared_variable specified in global.yaml + assert config["global"]["test_shared_variable"] == "test" + + +def test_export_config(tmp_path): + + file_list = config.export_default_config(path=str(tmp_path)) + assert file_list + for file in file_list: + assert file.endswith(".yaml") diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5dd461e --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,95 @@ +""" +Setup tests and import fixtures +""" +import numpy as np +import pytest + +from melusine import config + +# Declare fixtures +pytest_plugins = [ + "tests.fixtures.backend", + "tests.fixtures.basic_emails", + "tests.fixtures.docs", + "tests.fixtures.pipelines", + "tests.fixtures.processors", +] + + +# =============== Generic fixtures =============== +# Print statements inside fixtures are only visible when running +# pytest -s + + +@pytest.fixture(scope="session") +def df_emails(): + from melusine.data import load_email_data + from melusine.processors import RegexTokenizer + + # Load data + df_emails = load_email_data(type="full") + + # Tokenize text + tokenizer = RegexTokenizer(input_columns="body") + df_emails = tokenizer.transform(df_emails) + + # Add mock meta features + df_emails["test_meta__A"] = np.random.randint(0, 2, size=len(df_emails)) + df_emails["test_meta__B"] = np.random.randint(0, 2, size=len(df_emails)) + + return df_emails + + +# =============== Fixtures with "function" scope =============== +@pytest.fixture(scope="function") +def reset_melusine_config(): + """ + When a test modifies the melusine configuration, this fixture can be used to reset the config. + """ + # Code executed before the test starts + pass + + # Run the test + yield + + # Code executed after the test ends + config.reset() + + +@pytest.fixture(scope="function") +def use_test_config(conf_normalizer, conf_tokenizer, conf_phraser): + """ + Add test configurations. + """ + # Code executed before the test starts + test_conf_dict = config.dict() + + test_conf_dict["test_tokenizer"] = conf_tokenizer + test_conf_dict["test_normalizer"] = conf_normalizer + test_conf_dict["test_phraser"] = conf_phraser + + config.reset(config_dict=test_conf_dict) + + # Run the test + yield + + # Code executed after the test ends + config.reset() + + +@pytest.fixture(scope="function") +def use_dict_backend(): + """ + When a test modifies the melusine backend, this fixture can be used to reset the backend. + """ + from melusine.backend.active_backend import backend + + # =========== Code executed before the test starts =========== + # Use a dict backend to test a pipeline message by message + backend.reset("dict") + + # =========== Run the test =========== + yield + + # =========== Code executed after the test ends =========== + backend.reset() diff --git a/tests/data/__init__.py b/tests/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/test_data.py b/tests/data/test_data.py new file mode 100644 index 0000000..3ac019f --- /dev/null +++ b/tests/data/test_data.py @@ -0,0 +1,19 @@ +import pytest + +from melusine.data import load_email_data + + +@pytest.mark.parametrize( + "type, expected_column", + [("raw", "body"), ("preprocessed", "tokens"), ("full", "tokens")], +) +def test_load_data(type, expected_column): + + df = load_email_data(type=type) + assert expected_column in df + + +def test_load_data_error(): + + with pytest.raises(ValueError): + _ = load_email_data(type="unsupported_type") diff --git a/tests/detectors/__init__.py b/tests/detectors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/detectors/test_reply_detector.py b/tests/detectors/test_reply_detector.py new file mode 100644 index 0000000..b939fef --- /dev/null +++ b/tests/detectors/test_reply_detector.py @@ -0,0 +1,239 @@ +""" +Unit tests of the ReplyDetector. +""" +import pandas as pd +import pytest +from pandas import DataFrame + +from melusine.detectors import ReplyDetector +from melusine.pipeline import MelusinePipeline + + +def test_instantiation(): + """Instanciation base test.""" + + # Instantiate manually a detector + detector = ReplyDetector( + name="reply", + header_column="clean_header", + ) + assert isinstance(detector, ReplyDetector) + + +@pytest.mark.parametrize( + "row, good_result", + [ + ( + {"reply_text": "Devis habitation"}, + False, + ), + ( + {"reply_text": "tr: Devis habitation"}, + False, + ), + ( + {"reply_text": "re: Envoi d'un document de la Société Imaginaire"}, + True, + ), + ( + {"reply_text": "re : Virement"}, + True, + ), + ( + {"reply_text": ""}, + False, + ), + ], +) +def test_deterministic_detect(row, good_result): + """Method base test.""" + + # Instanciate manually a detector + detector = ReplyDetector( + name="reply", + header_column="clean_header", + ) + # Test method + row = detector.detect(row) + res = row[detector.result_column] + assert res == good_result + + +@pytest.mark.parametrize( + "df_emails, expected_result", + [ + ( + DataFrame( + { + "clean_header": ["Re: Suivi de dossier"], + } + ), + True, + ), + ( + DataFrame( + { + "clean_header": ["Suivi de dossier"], + } + ), + False, + ), + ( + DataFrame( + { + "clean_header": ["Tr: Suivi de dossier"], + } + ), + False, + ), + ( + DataFrame( + { + "clean_header": [""], + } + ), + False, + ), + ], +) +def test_transform(df_emails, expected_result): + """Unit test of the transform() method.""" + + # Copy for later load/save test + df_copy = df_emails.copy() + + # Instantiate manually a detector + detector = ReplyDetector( + name="reply", + header_column="clean_header", + ) + + # Get result column name + res_col = detector.result_column + + # Apply the detector on data + df_emails = detector.transform(df_emails) + + # Verify result + result = df_emails[res_col][0] + assert result == expected_result + + +@pytest.mark.parametrize( + "df_emails, expected_result, expected_debug_info", + [ + ( + DataFrame( + { + "clean_header": ["Re: Suivi de dossier"], + } + ), + True, + { + "ReplyRegex": { + "match_result": True, + "negative_match_data": {}, + "neutral_match_data": {}, + "positive_match_data": {"DEFAULT": [{"match_text": "re:", "start": 0, "stop": 3}]}, + }, + "reply_text": "re: suivi de dossier", + }, + ), + ], +) +def test_transform_debug_mode(df_emails, expected_result, expected_debug_info): + """Unit test of the debug mode.""" + + # Copy for later load/save test + df_copy = df_emails.copy() + + # Instanciate manually a detector + detector = ReplyDetector( + name="reply", + header_column="clean_header", + ) + + # Get column names + res_col = detector.result_column + debug_dict_col = detector.debug_dict_col + + # Transform data + df_emails.debug = True + df_emails = detector.transform(df_emails) + + # Collect results + result = df_emails[res_col].iloc[0] + debug_result = df_emails[debug_dict_col].iloc[0] + + # Test result + assert result == expected_result + assert debug_result == expected_debug_info + + +@pytest.mark.parametrize( + "df, expected_result", + [ + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": ["Re: Suivi de dossier"], + "body": ["Bonjour,\nle traitement de ma demande est deplorable.\nje suis tres en colere.\n"], + } + ), + True, + ), + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": ["re: Envoi d'un document de la Société Imaginaire"], + "body": ["Bonjour,\nLe traitement de ma demande est déplorable.\nJe suis très en colère.\n"], + } + ), + True, + ), + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": ["te: Virement"], + "body": [ + "Bonjour,\nJe vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h.\nBien cordialement,\nJohn Smith." + ], + } + ), + False, + ), + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": [""], + "body": [ + "Bonjour,\nJe vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h.\nBien cordialement,\nJohn Smith." + ], + } + ), + False, + ), + ], +) +def test_pipeline_from_config(df, expected_result): + """ + Instanciate from a config and test the pipeline. + """ + # Pipeline config key + pipeline_key = "reply_pipeline" + + # Create pipeline from config + pipeline = MelusinePipeline.from_config(config_key=pipeline_key) + + # Apply pipeline on data + df_transformed = pipeline.transform(df) + result = df_transformed["reply_result"][0] + + # Check + assert result == expected_result diff --git a/tests/detectors/test_thanks_detector.py b/tests/detectors/test_thanks_detector.py new file mode 100644 index 0000000..b84510a --- /dev/null +++ b/tests/detectors/test_thanks_detector.py @@ -0,0 +1,142 @@ +""" +Unit test of the ThanksDetector. + +""" +from tempfile import TemporaryDirectory + +import pandas as pd +import pytest + +from melusine.base import MissingFieldError +from melusine.detectors import ThanksDetector +from melusine.message import Message + + +@pytest.fixture +def thanks_detector_df(): + m0 = Message("") + m0.tags = [ + ("HELLO", "Bonjour"), + ("THANKS", "Merci beaucoup"), + ] + m0_messages = [m0] + m0_expected = True + m0_debug_expected = { + "match_result": True, + "negative_match_data": {}, + "neutral_match_data": {}, + "positive_match_data": {"DEFAULT": [{"match_text": "Merci", "start": 0, "stop": 5}]}, + } + + m1 = Message("") + m1.tags = [ + ("HELLO", "Bonjour"), + ("THANKS", "Merci, j'attends une reponse"), + ] + m1_messages = [m1] + m1_expected = False + m1_debug_expected = { + "match_result": False, + "negative_match_data": {"FORBIDDEN_WORDS": [{"match_text": "attend", "start": 9, "stop": 15}]}, + "neutral_match_data": {}, + "positive_match_data": {"DEFAULT": [{"match_text": "Merci", "start": 0, "stop": 5}]}, + } + + df = pd.DataFrame( + { + "messages": [m0_messages, m1_messages], + "detection_expectation": [m0_expected, m1_expected], + "debug_expectation": [m0_debug_expected, m1_debug_expected], + } + ) + + return df + + +def test_thanks_detector(thanks_detector_df): + """Unit test of the debug mode.""" + df = thanks_detector_df + df_copy = df.copy() + detector = ThanksDetector( + name="thanks", + ) + result_col = detector.result_column + debug_dict_col = detector.debug_dict_col + + # Transform data + df.debug = True + df = detector.transform(df) + + # Test result + assert result_col in df.columns + assert debug_dict_col in df.columns + + for i, row in df.iterrows(): + assert row[result_col] == row["detection_expectation"] + assert row[debug_dict_col][detector.thanks_regex.regex_name] == row["debug_expectation"] + + +def test_thanks_detector_missing_field(thanks_detector_df): + """Unit test of the debug mode.""" + df = thanks_detector_df.copy() + + detector = ThanksDetector( + name="thanks", + ) + df = df.drop(detector.input_columns, axis=1) + + # Transform data + with pytest.raises(MissingFieldError, match=str(detector.input_columns)): + _ = detector.transform(df) + + +@pytest.mark.parametrize( + "tags, has_body, thanks_text, thanks_parts", + [ + ( + [ + ("HELLO", "Bonjour madame"), + ("BODY", "Voici le dossier"), + ("THANKS", "Merci a vous"), + ], + True, + "Merci a vous", + [("THANKS", "Merci a vous")], + ), + ( + [ + ("HELLO", "Bonjour madame"), + ("THANKS", "Merci"), + ("THANKS", "Merci a vous"), + ], + False, + "Merci\nMerci a vous", + [("THANKS", "Merci"), ("THANKS", "Merci a vous")], + ), + ], +) +@pytest.mark.usefixtures("use_dict_backend") +def test_thanks_detector_debug(tags, has_body, thanks_text, thanks_parts): + """Unit test of the debug mode.""" + + data = { + "messages": [Message(text="", tags=tags)], + "debug": True, + } + + detector = ThanksDetector( + name="thanks", + ) + + # Transform data + data = detector.transform(data) + + # Test result + assert "debug_thanks" in data + assert "has_body" in data["debug_thanks"] + assert "thanks_text" in data["debug_thanks"] + assert "thanks_parts" in data["debug_thanks"] + + assert data["debug_thanks"]["has_body"] == has_body + assert data["debug_thanks"]["thanks_text"] == thanks_text + assert data["debug_thanks"]["thanks_parts"] == thanks_parts diff --git a/tests/detectors/test_transfer_detector.py b/tests/detectors/test_transfer_detector.py new file mode 100644 index 0000000..ca4e659 --- /dev/null +++ b/tests/detectors/test_transfer_detector.py @@ -0,0 +1,344 @@ +""" +Unit tests of the TransferDetector. +""" + + +import pandas as pd +import pytest +from pandas import DataFrame + +from melusine.detectors import TransferDetector +from melusine.message import Message +from melusine.pipeline import MelusinePipeline + + +def test_instanciation(): + """Instanciation base test.""" + + detector = TransferDetector(name="transfer", header_column="det_clean_header", messages_column="messages") + assert isinstance(detector, TransferDetector) + + +@pytest.mark.parametrize( + "row, good_result", + [ + ( + { + "reply_text": "tr: Devis habitation", + "messages": [ + Message( + meta="", + text="Bonjour, je vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h. Bien cordialement, John Smith.", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "je vous confirme l'annulation du rdv du 01/01/2022 " + "à 16h. ", + ), + ("GREETINGS", "Cordialement, John Smith."), + ], + ) + ], + }, + True, + ), + ( + { + "reply_text": "re: Envoi d'un document de la Société Imaginaire", + "messages": [ + Message( + meta="this is meta", + text="Bonjour, je vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h. Bien cordialement, John Smith.", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "je vous confirme l'annulation du rdv du 01/01/2022 " + "à 16h. ", + ), + ("GREETINGS", "Cordialement, John Smith."), + ], + ) + ], + }, + True, + ), + ( + { + "reply_text": "re: Virement", + "messages": [ + Message( + meta="", + text="Bonjour, je vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h. Bien cordialement, John Smith.", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "je vous confirme l'annulation du rdv du 01/01/2022 " + "à 16h. ", + ), + ("GREETINGS", "Cordialement, John Smith."), + ], + ) + ], + }, + False, + ), + ( + { + "reply_text": "", + "messages": [ + Message( + meta="", + text="Bonjour, je vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h. Bien cordialement, John Smith.", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "je vous confirme l'annulation du rdv du 01/01/2022 " + "à 16h. ", + ), + ("GREETINGS", "Cordialement, John Smith."), + ], + ) + ], + }, + False, + ), + ], +) +def test_deterministic_detect(row, good_result): + """Method base test.""" + + # Instanciate manually a detector + detector = TransferDetector( + name="transfer", + header_column="det_clean_header", + messages_column="messages", + ) + row = detector.detect(row) + res = row[detector.result_column] + assert res == good_result + + +@pytest.mark.parametrize( + "df_emails, expected_result", + [ + ( + DataFrame( + { + "det_clean_header": "tr: Rdv", + "messages": [ + [ + Message( + meta="", + text="Bonjour, je vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h. Bien cordialement, John Smith.", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "je vous confirme l'annulation du rdv du 01/01/2022 " + "à 16h. ", + ), + ("GREETINGS", "Cordialement, John Smith."), + ], + ) + ] + ], + } + ), + True, + ), + ], +) +def test_transform(df_emails, expected_result): + """Unit test of the transform() method.""" + + # Copy for later load/save test + df_copy = df_emails.copy() + + # Instanciate manually a detector + detector = TransferDetector( + name="transfer", + header_column="det_clean_header", + messages_column="messages", + ) + + # Get result column name + res_col = detector.result_column + + # Apply the detector on data + df_emails = detector.transform(df_emails) + + # Verify result + result = df_emails[res_col][0] + assert result == expected_result + + +@pytest.mark.parametrize( + "df_emails, expected_result, expected_debug_info", + [ + ( + DataFrame( + { + "det_clean_header": ["Tr: Suivi de dossier"], + "messages": [ + [ + Message( + meta="", + text="Bonjour, je vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h. Bien cordialement, John Smith.", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "je vous confirme l'annulation du rdv du 01/01/2022 " + "à 16h. ", + ), + ("GREETINGS", "Cordialement, John Smith."), + ], + ) + ] + ], + } + ), + True, + { + "reply_text": "tr: suivi de dossier", + "messages[0].meta": "", + "TransferRegex": { + "match_result": True, + "negative_match_data": {}, + "neutral_match_data": {}, + "positive_match_data": {"DEFAULT": [{"match_text": "tr:", "start": 0, "stop": 3}]}, + }, + }, + ), + ], +) +def test_transform_debug_mode(df_emails, expected_result, expected_debug_info): + """Unit test of the debug mode.""" + + # Copy for later load/save test + df_copy = df_emails.copy() + + # Instanciate manually a detector + detector = TransferDetector( + name="transfer", + header_column="det_clean_header", + messages_column="messages", + ) + + # Get column names + res_col = detector.result_column + debug_dict_col = detector.debug_dict_col + + # Transform data + df_emails.debug = True + df_emails = detector.transform(df_emails) + + # Collect results + result = df_emails[res_col].iloc[0] + debug_result = df_emails[debug_dict_col].iloc[0] + + # Test result + assert result == expected_result + assert debug_result == expected_debug_info + + +@pytest.mark.parametrize( + "df, expected_result", + [ + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": ["tr :Suivi de dossier"], + "body": [ + "", + ], + } + ), + True, + ), + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": ["fwd: Envoi d'un document de la Société Imaginaire"], + "body": [ + "Bonjour,\n\n\n\n\n\nUn taux d’humidité de 30% a été relevé le 19/04/2022.\n\n\n\nNous reprendrons contact avec l’assurée" + + " en Aout 2022.\n\n\n\n\n\n\nBien cordialement,\n\n\n\n\n\nNuméro Auxiliaire : 116113 T / 116133 J\n\n\n\n\n\n\n\n\nABOU" + + " ELELA Donia\n\n\nSté LVP\n-\n\nL\na\nV\nalorisation du\nP\natrimoine\n\n\n2, rue de la Paix\n\n\n94300 VINCENNES" + + "\n\n\n\n\n\n\n\nTél : 0143740992\n\n\nPort : 0767396737\n\n\nhttp://lvpfrance.fr\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" + + "\n\n\nDe :\nAccueil - Lucile RODRIGUES \n\n\n\nEnvoyé :\njeudi 13 janvier 2022 15:26\n\n\nÀ " + + ":\nCommercial \n\n\nObjet :\nTR: Evt : M211110545P survenu le 15/10/2021 - Intervention entreprise" + + " partenaire\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDe :\n\n\ngestionsinistre@maif.fr\n[\nmailto:gestionsinistre@maif.fr\n]\n\n\n\n" + + "Envoyé :\njeudi 13 janvier 2022 15:13\n\n\nÀ :\nAccueil - Lucile RODRIGUES\n\n\nObjet :\nEvt : M211110545P survenu le 15/10/2021" + + " - Intervention entreprise partenaire\n\n\n\n\n\nMerci de bien vouloir prendre connaissance du document ci-joint.\n\n\n\nSentiments" + + "mutualistes.\n\nLa MAIF", + ], + } + ), + True, + ), + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": ["Virement"], + "body": [ + "Bonjour,\n\n\n\n\n\nUn taux d’humidité de 30% a été relevé le 19/04/2022.\n\n\n\nNous reprendrons contact avec l’assurée" + + " en Aout 2022.\n\n\n\n\n\n\nBien cordialement,\n\n\n\n\n\nNuméro Auxiliaire : 116113 T / 116133 J\n\n\n\n\n\n\n\n\nABOU" + + " ELELA Donia\n\n\nSté LVP\n-\n\nL\na\nV\nalorisation du\nP\natrimoine\n\n\n2, rue de la Paix\n\n\n94300 VINCENNES" + + "\n\n\n\n\n\n\n\nTél : 0143740992\n\n\nPort : 0767396737\n\n\nhttp://lvpfrance.fr\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" + + "\n\n\nDe :\nAccueil - Lucile RODRIGUES \n\n\n\nEnvoyé :\njeudi 13 janvier 2022 15:26\n\n\nÀ " + + ":\nCommercial \n\n\nObjet :\nTR: Evt : M211110545P survenu le 15/10/2021 - Intervention entreprise" + + " partenaire\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDe :\n\n\ngestionsinistre@maif.fr\n[\nmailto:gestionsinistre@maif.fr\n]\n\n\n\n" + + "Envoyé :\njeudi 13 janvier 2022 15:13\n\n\nÀ :\nAccueil - Lucile RODRIGUES\n\n\nObjet :\nEvt : M211110545P survenu le 15/10/2021" + + " - Intervention entreprise partenaire\n\n\n\n\n\nMerci de bien vouloir prendre connaissance du document ci-joint.\n\n\n\nSentiments" + + "mutualistes.\n\nLa MAIF", + ], + } + ), + False, + ), + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": ["tr: virement"], + "body": [ + "Bonjour,\n\n\n\n\n\nUn taux d’humidité de 30% a été relevé le 01/01/2001.\n\n\n\nNous reprendrons contact avec l’assurée" + + " en Aout 2022.\n\n\n\n\n\n\nBien cordialement,\n\n\n\n\n\nNuméro : 000000\n\n\n\n\n\n\n\n\nJohn" + + " Smith\n\n\nL\na\nValorisation du\nPatrimoine\n\n\n1, rue de la Paix\n\n\n79000 Niort" + + "\n\n\n\n\n\n\n\nTél : 0123456789\n\n\nPort : 0123456789\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" + + "\n\n\nDe :\nAccueil - John Smith \n\n\n\nEnvoyé :\njeudi 01 janvier 2001 01:01\n\n\nÀ " + + ":\nCommercial \n\n\nObjet :\nTR: Accident survenu le 01/01/2021 - Intervention" + + " partenaire\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDe :\n\n\ntest@test.fr\n[\nmailto:test@test.fr\n]\n\n\n\n" + + "Envoyé :\njeudi 01 janvier 2001 01:01\n\n\nÀ :\nAccueil\n\n\nObjet :\nAccident survenu le 01/01/2001" + + " - Intervention partenaire\n\n\n\n\n\nMerci de bien vouloir prendre connaissance du document ci-joint.\n\n\n\n" + + "Cordialement\n", + ], + } + ), + True, + ), + ], +) +def test_pipeline_from_config(df, expected_result): + """ + Instanciate from a config and test the pipeline. + """ + # Pipeline config key + pipeline_key = "transfer_pipeline" + + # Create pipeline from config + pipeline = MelusinePipeline.from_config(config_key=pipeline_key) + + # Apply pipeline on data + df_transformed = pipeline.transform(df) + result = df_transformed["transfer_result"][0] + + # Check + assert result == expected_result diff --git a/tests/detectors/test_vacation_reply_detector.py b/tests/detectors/test_vacation_reply_detector.py new file mode 100644 index 0000000..d733779 --- /dev/null +++ b/tests/detectors/test_vacation_reply_detector.py @@ -0,0 +1,210 @@ +""" +Unit tests of the VacationReplyDetector +""" +import pandas as pd +import pytest +from pandas import DataFrame + +from melusine.detectors import VacationReplyDetector +from melusine.message import Message +from melusine.pipeline import MelusinePipeline + + +def test_instanciation(): + """Instanciation base test.""" + detector = VacationReplyDetector( + name="vacation_reply", + messages_column="messages", + ) + assert isinstance(detector, VacationReplyDetector) + + +@pytest.mark.parametrize( + "df, good_result", + [ + ( + DataFrame( + { + "messages": [ + [ + Message( + text="Bonjour, je vous confirme l'annulation du rdv du 01/01/2022 " + + "à 16h. Bien cordialement, John Smith.", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "je vous confirme l'annulation du rdv du 01/01/2022 à 16h.", + ), + ("GREETINGS", "Bien cordialement, John Smith."), + ], + ) + ] + ] + } + ), + False, + ), + ( + DataFrame( + { + "messages": [ + [ + Message( + text="Bonjour, \nActuellement en conge je prendrai connaissance" + + " de votre message ulterieurement.\nCordialement,", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "Actuellement en conge je prendrai connaissance de votre message ulterieurement.", + ), + ("GREETINGS", "Cordialement, "), + ], + ) + ] + ] + } + ), + True, + ), + ], +) +def test_transform(df, good_result): + """Unit test of the transform() method.""" + df_copy = df.copy() + + message_column = "messages" + + detector = VacationReplyDetector( + name="vacation_reply", + messages_column=message_column, + ) + output_col = detector.result_column + + df = detector.transform(df) + result = df[output_col][0] + assert result == good_result + + +@pytest.mark.parametrize( + "df, good_detection_result, good_debug_info", + [ + ( + DataFrame( + { + "messages": [ + [ + Message( + text="Bonjour, \nActuellement en conge je prendrai connaissance" + + " de votre message ulterieurement.\nCordialement,", + tags=[ + ("HELLO", "Bonjour,"), + ( + "BODY", + "Actuellement en conge je prendrai connaissance de votre message ulterieurement.", + ), + ("GREETINGS", "Cordialement, "), + ], + ) + ] + ] + } + ), + True, + { + "parts": [ + ( + "BODY", + "Actuellement en conge je prendrai connaissance de votre message ulterieurement.", + ) + ], + "text": "Actuellement en conge je prendrai connaissance de votre message ulterieurement.", + "VacationReplyRegex": { + "match_result": True, + "negative_match_data": {}, + "neutral_match_data": {}, + "positive_match_data": { + "VAC_REP_HOLIDAYS": [{"match_text": "Actuellement " "en " "conge", "start": 0, "stop": 21}], + "VAC_REP_OUT_OF_OFFICE": [ + {"match_text": "je " "prendrai " "connaissance", "start": 22, "stop": 46} + ], + }, + }, + }, + ), + ], +) +def test_transform_debug_mode(df, good_detection_result, good_debug_info): + """Unit test of the debug mode.""" + df_copy = df.copy() + + messages_column = "messages" + + detector = VacationReplyDetector( + name="vacation_reply", + messages_column=messages_column, + ) + output_col = detector.result_column + debug_dict_col = detector.debug_dict_col + + # Transform data + df.debug = True + df = detector.transform(df) + + # Collect results + result = df[output_col].iloc[0] + debug_result = df[debug_dict_col].iloc[0] + + # Test result + assert result == good_detection_result + assert debug_result == good_debug_info + + +@pytest.mark.parametrize( + "df, expected_result", + [ + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": [""], + "body": [ + "Bonjour, \nActuellement en congé je prendrai connaissance" + + " de votre message ultérieurement.\nCordialement," + ], + } + ), + True, + ), + ( + pd.DataFrame( + { + "from": ["test@gmail.com"], + "header": [""], + "body": [ + "Bonjour,\nje vous confirme l'annulation du rdv du 01/01/2001 " + + "à 16h.\nBien cordialement,\nJohn Smith." + ], + } + ), + False, + ), + ], +) +def test_pipeline_from_config(df, expected_result): + """ + Instanciate from a config and test the pipeline. + """ + # Pipeline config key + pipeline_key = "vacation_reply_pipeline" + + # Create pipeline from config + pipeline = MelusinePipeline.from_config(config_key=pipeline_key) + + # Apply pipeline on data + df_transformed = pipeline.transform(df) + result = df_transformed["vacation_reply_result"][0] + + # Check + assert result == expected_result diff --git a/tests/docs/__init__.py b/tests/docs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/docs/test_configurations.py b/tests/docs/test_configurations.py new file mode 100644 index 0000000..379ac08 --- /dev/null +++ b/tests/docs/test_configurations.py @@ -0,0 +1,7 @@ +def test_tutorial001(add_docs_to_pythonpath): + from docs_src.Configurations.tutorial001 import from_config, from_config_dict, modify_conf_with_dict, print_config + + _ = from_config() + _ = from_config_dict() + _ = print_config() + _ = modify_conf_with_dict() diff --git a/tests/docs/test_detectors.py b/tests/docs/test_detectors.py new file mode 100644 index 0000000..ec1124c --- /dev/null +++ b/tests/docs/test_detectors.py @@ -0,0 +1,23 @@ +def test_tutorial002(add_docs_to_pythonpath): + from docs_src.MelusineDetectors.tutorial001 import run as run001 + + _ = run001() + + +def test_tutorial002(add_docs_to_pythonpath): + from docs_src.MelusineDetectors.tutorial002 import run as run002 + + _ = run002() + + +def test_tutorial003(add_docs_to_pythonpath): + + from docs_src.MelusineDetectors.tutorial003 import run as run003 + + _ = run003() + + +def test_tutorial004(add_docs_to_pythonpath): + from docs_src.MelusineDetectors.tutorial004 import run as run004 + + _ = run004() diff --git a/tests/docs/test_getting_started.py b/tests/docs/test_getting_started.py new file mode 100644 index 0000000..b9df1bb --- /dev/null +++ b/tests/docs/test_getting_started.py @@ -0,0 +1,10 @@ +def test_tutorial001(add_docs_to_pythonpath): + from docs_src.GettingStarted.tutorial001 import run as run001 + + _ = run001() + + +def test_tutorial002(add_docs_to_pythonpath): + from docs_src.GettingStarted.tutorial001 import run as run002 + + _ = run002() diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/fixtures/backend.py b/tests/fixtures/backend.py new file mode 100644 index 0000000..0fbcf67 --- /dev/null +++ b/tests/fixtures/backend.py @@ -0,0 +1,284 @@ +import pytest + + +@pytest.fixture +def backend_base_data(): + return { + "str_col": ["foo", "bar"], + "int_col": [1, 2], + } + + +def single_input_single_output(value, test_keyword_arg=False): + return_value = value.upper() + if test_keyword_arg: + return_value += "_kwarg" + return return_value + + +def single_input_multi_output(value, test_keyword_arg=False): + return_value = value.upper() + if test_keyword_arg: + return_value += "_kwarg" + return return_value, value.capitalize() + + +def multi_input_single_output(value1, value2, test_keyword_arg=False): + return_value = value1 + if test_keyword_arg: + return_value += "_kwarg" + return value2 * return_value + + +def multi_input_multi_output(value1, value2, test_keyword_arg=False): + return_value = value1.upper() + if test_keyword_arg: + return_value += "_kwarg" + return return_value, value2 * 2 + + +def row_input_single_output(row, test_keyword_arg=False): + return_value = row["str_col"] + if test_keyword_arg: + return_value += "_kwarg" + return f'{return_value}_{row["int_col"]}' + + +def row_input_multi_output(row, test_keyword_arg=False): + return_value = row["str_col"].upper() + if test_keyword_arg: + return_value += "_kwarg" + return return_value, row["int_col"] * 2 + + +def row_input_row_output(row, test_keyword_arg=False): + return_value = row["str_col"].upper() + if test_keyword_arg: + return_value += "_kwarg" + row["new_str_col"] = return_value + return row + + +testcase_single_input_single_output = dict( + test_name="testcase_single_input_single_output", + func=single_input_single_output, + input_columns=["str_col"], + output_columns=["new_str_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO", "BAR"], + }, +) + +testcase_single_input_single_output_kwarg = dict( + test_name="testcase_single_input_single_output_kwarg", + func=single_input_single_output, + input_columns=["str_col"], + output_columns=["new_str_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO_kwarg", "BAR_kwarg"], + }, + kwargs=dict(test_keyword_arg=True), +) + + +testcase_single_input_multi_output = dict( + test_name="testcase_single_input_multi_output", + func=single_input_multi_output, + input_columns=["str_col"], + output_columns=["new_str_col1", "new_str_col2"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col1": ["FOO", "BAR"], + "new_str_col2": ["Foo", "Bar"], + }, +) + +testcase_single_input_multi_output_kwarg = dict( + test_name="testcase_single_input_multi_output_kwarg", + func=single_input_multi_output, + input_columns=["str_col"], + output_columns=["new_str_col1", "new_str_col2"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col1": ["FOO_kwarg", "BAR_kwarg"], + "new_str_col2": ["Foo", "Bar"], + }, + kwargs=dict(test_keyword_arg=True), +) + +testcase_row_input_single_output = dict( + test_name="testcase_row_input_single_output", + func=row_input_single_output, + input_columns=["str_col", "int_col"], + output_columns=["new_str_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["foo_1", "bar_2"], + }, +) + +testcase_row_input_single_output_kwarg = dict( + test_name="testcase_row_input_single_output_kwarg", + func=row_input_single_output, + input_columns=["str_col", "int_col"], + output_columns=["new_str_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["foo_kwarg_1", "bar_kwarg_2"], + }, + kwargs=dict(test_keyword_arg=True), +) + +testcase_no_input_single_output = dict( + test_name="testcase_no_input_single_output", + func=row_input_single_output, + input_columns=None, + output_columns=["new_str_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["foo_1", "bar_2"], + }, +) + +testcase_no_input_single_output_kwarg = dict( + test_name="testcase_no_input_single_output_kwarg", + func=row_input_single_output, + input_columns=None, + output_columns=["new_str_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["foo_kwarg_1", "bar_kwarg_2"], + }, + kwargs=dict(test_keyword_arg=True), +) + +testcase_multi_input_no_output = dict( + test_name="testcase_multi_input_no_output", + func=row_input_row_output, + input_columns=["str_col", "int_col"], + output_columns=None, + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO", "BAR"], + }, +) + +testcase_multi_input_no_output_kwarg = dict( + test_name="testcase_multi_input_no_output_kwarg", + func=row_input_row_output, + input_columns=["str_col", "int_col"], + output_columns=None, + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO_kwarg", "BAR_kwarg"], + }, + kwargs=dict(test_keyword_arg=True), +) + +testcase_row_input_row_output = dict( + test_name="testcase_row_input_row_output", + func=row_input_row_output, + input_columns=None, + output_columns=None, + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO", "BAR"], + }, +) + +testcase_row_input_row_output_kwarg = dict( + test_name="testcase_row_input_row_output_kwarg", + func=row_input_row_output, + input_columns=None, + output_columns=None, + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO_kwarg", "BAR_kwarg"], + }, + kwargs=dict(test_keyword_arg=True), +) + +testcase_no_input_multi_output = dict( + test_name="testcase_no_input_multi_output", + func=row_input_multi_output, + input_columns=None, + output_columns=["new_str_col", "new_int_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO", "BAR"], + "new_int_col": [2, 4], + }, +) + +testcase_no_input_multi_output_kwarg = dict( + test_name="testcase_no_input_multi_output_kwarg", + func=row_input_multi_output, + input_columns=None, + output_columns=["new_str_col", "new_int_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO_kwarg", "BAR_kwarg"], + "new_int_col": [2, 4], + }, + kwargs=dict(test_keyword_arg=True), +) + +testcase_row_input_multi_output = dict( + test_name="testcase_row_input_multi_output", + func=row_input_multi_output, + input_columns=["str_col", "int_col"], + output_columns=["new_str_col", "new_int_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO", "BAR"], + "new_int_col": [2, 4], + }, +) + +testcase_row_input_multi_output_kwarg = dict( + test_name="testcase_row_input_multi_output_kwarg", + func=row_input_multi_output, + input_columns=["str_col", "int_col"], + output_columns=["new_str_col", "new_int_col"], + expected_data={ + "str_col": ["foo", "bar"], + "int_col": [1, 2], + "new_str_col": ["FOO_kwarg", "BAR_kwarg"], + "new_int_col": [2, 4], + }, + kwargs=dict(test_keyword_arg=True), +) + + +testcase_list = [value for key, value in locals().items() if key.startswith("testcase")] + + +def get_fixture_name(fixture_value): + return fixture_value.get("test_name", "missing_test_name") + + +@pytest.fixture( + params=testcase_list, + ids=get_fixture_name, +) +def backend_testcase(request): + """Fixture to test all backend configurations""" + testcase = request.param + return testcase diff --git a/tests/fixtures/basic_emails.py b/tests/fixtures/basic_emails.py new file mode 100644 index 0000000..5282d31 --- /dev/null +++ b/tests/fixtures/basic_emails.py @@ -0,0 +1,75 @@ +import pandas as pd +import pytest + + +@pytest.fixture +def text_basic_hello_world(): + return {"text": "Hello world", "header": "Hello"} + + +@pytest.fixture +def text_basic_with_accent(): + return { + "text": "Bonjour\nPouvez-vous résilier mon contrat?\nJean Dupont", + "header": "Demande de résiliation", + } + + +@pytest.fixture +def email_basic_hello_world(): + return { + "body": "Hello world", + "header": "Hello", + "chanel": "mail", + "from": "lulu@gmail.com", + "to": ["gedgestionsinistre@maif.fr"], + "bal": "", + "date": "", + "ged_chanel": "", + "pli_id": "", + "flux_id": "", + "entity": "", + "soc_type": "", + "soc_num": "", + "nb_documents": 0, + } + + +@pytest.fixture +def email_basic_with_accent(): + return { + "body": "Bonjour\nPouvez-vous résilier mon contrat?\nJean Dupont", + "header": "Demande de résiliation", + "chanel": "mail", + "from": "lulu@gmail.com", + "to": ["gedgestionsinistre@maif.fr"], + "bal": "", + "date": "", + "ged_chanel": "", + "pli_id": "", + "flux_id": "", + "entity": "", + "soc_type": "", + "soc_num": "", + "nb_documents": 0, + } + + +@pytest.fixture +def dataframe_basic(text_basic_hello_world, text_basic_with_accent): + return pd.DataFrame( + [ + text_basic_hello_world, + text_basic_with_accent, + ] + ) + + +@pytest.fixture +def email_dataframe_basic(email_basic_hello_world, email_basic_with_accent): + return pd.DataFrame( + [ + email_basic_hello_world, + email_basic_with_accent, + ] + ) diff --git a/tests/fixtures/docs.py b/tests/fixtures/docs.py new file mode 100644 index 0000000..4a2b0f9 --- /dev/null +++ b/tests/fixtures/docs.py @@ -0,0 +1,16 @@ +import sys +from pathlib import Path +from typing import Generator + +import pytest + +# Package source root +docs_folder = Path(__file__).parents[2] / "docs" + + +@pytest.fixture +def add_docs_to_pythonpath() -> Generator[None, None, None]: + """Testing""" + # Add docs to python path + sys.path.insert(0, str(docs_folder)) + yield None diff --git a/tests/fixtures/pipelines.py b/tests/fixtures/pipelines.py new file mode 100644 index 0000000..352f5d4 --- /dev/null +++ b/tests/fixtures/pipelines.py @@ -0,0 +1,41 @@ +import pytest + +from melusine.pipeline import MelusinePipeline + + +@pytest.fixture +def conf_pipeline_basic(): + return { + "test_pipeline": { + "steps": [ + { + "class_name": "Normalizer", + "module": "melusine.processor", + "config_key": "test_normalizer", + }, + { + "class_name": "RegexTokenizer", + "module": "melusine.processor", + "config_key": "test_tokenizer", + }, + ] + } + } + + +@pytest.fixture +def pipeline_default(): + # Load json config + conf = MelusinePipeline.get_config_from_key(config_key="my_pipeline") + + # Prevent model loading + # (Set all model_name parameters to None) + for step in conf["steps"]: + step["name"] = step.pop("config_key", None) + params = step["parameters"] + + if "model_name" in params: + params["model_name"] = None + + # Create pipeline from a json config file (using config key "my_pipeline") + return MelusinePipeline.from_config(config_dict=conf, verbose=True) diff --git a/tests/fixtures/processors.py b/tests/fixtures/processors.py new file mode 100644 index 0000000..28b1c1f --- /dev/null +++ b/tests/fixtures/processors.py @@ -0,0 +1,29 @@ +import pytest + + +@pytest.fixture +def conf_normalizer(): + return { + "form": "NFKD", + "input_columns": ["text"], + "lowercase": True, + "output_columns": ["text"], + } + + +@pytest.fixture +def conf_tokenizer(): + return { + "stopwords": ["le", "les"], + "tokenizer_regex": '\\w+(?:[\\?\\-\\"_]\\w+)*', + } + + +@pytest.fixture +def conf_phraser(): + return { + "input_columns": ["tokens"], + "output_columns": ["tokens"], + "threshold": 10, + "min_count": 10, + } diff --git a/tests/functional/__init__.py b/tests/functional/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/functional/test_emails_fixtures.py b/tests/functional/test_emails_fixtures.py new file mode 100644 index 0000000..500b464 --- /dev/null +++ b/tests/functional/test_emails_fixtures.py @@ -0,0 +1,493 @@ +""" +================================== Test-Case fixtures ================================== +# Overview +Tests-cases are defined in this file and tested using `test_emails_generic`. +The fixture `test_message` will successively take the value of every test-case defined +below. + +# Adding a new test case +To add a new test-case, create below a variable named "testcase_${NAME}". +The variable should be a dict with the following fields: +- Input fields (ex: "body", "header", "from", "to", etc): + Used to create the input email passed to the pipeline +-- Example -- +testcase_hello_world = {"body": "Hello World"} + +- Expected output fields (ex: normalizer_expected, segmenter_expected, etc): + Used to verify the data resulting from a transformation + The `segmenter_expected` field will test the results posterior to the pipeline step + named `segmenter`. + The content of an expected output field is a dictionary with a key for each DataFrame + columns to be tested. +-- Example -- +testcase_hello_world = { + "body": "Hello World", + "tokenizer_expected: { + "tokens": ['hello', 'world']" # <== Test the content of the tokens column + } +} + +# Test Message class attributes: +The `messages` column contains a list of Message class instances. +The value of the class instance attributes can be tested +with the syntax `messages.attribute`. +-- Example -- +testcase_hello_world = { + "body": "Hello World\nMessage transféré\nBonjour Monde", + "segmenter_expected: { + "messages.text": ['Hello World', 'Bonjour Monde']" # <== Test the text attribute + "messages.meta": [None, 'Message transfere']" # <== Test the meta attribute + } +} +======================================================================================== +""" +import pytest + +testcase_initial_cleaning_1 = dict( + test_name="Initial leaning line breaks", + body="BonJour wORLD\r\n L'orem \r\n\t\r\nIp-sum\r Lo_rem \nip.sum.", + body_cleaner_expected={ + "tmp_clean_body": "BonJour wORLD\nL'orem\nIp-sum\nLo_rem\nip.sum.", + }, + content_tagger_expected={ + "messages.tags": [ + [ + ("HELLO", "BonJour wORLD"), + ("BODY", "L'orem"), + ("BODY", "Ip-sum"), + ("BODY", "Lo_rem"), + ("BODY", "ip.sum."), + ], + ], + }, + tokenizer_expected={ + "ml_body_tokens": [ + "l", + "orem", + "ip-sum", + "lo_rem", + "ip", + "sum", + ], + }, +) + +# Watch-out : Multi-line string are NOT Tuples ("abc" "def") != ("abc", "def") +testcase_initial_cleaning_2 = dict( + test_name="Initial leaning special characters", + body="Hello\xa0World\n’œ’ <\nyoo\n>", + body_cleaner_expected={ + "tmp_clean_body": "Hello World\n'oe' ", + }, +) + + +# Watch-out : Multi-line strings are NOT Tuples ("abc" "def") != ("abc", "def") +testcase_segmentation1 = dict( + test_name="Segmentation Date/Envoyé/À/Objet/Le", + body=( + "De : test@free.fr \nEnvoyé : lundi 30 août 2021 21:26 \nÀ : _Délégation 00.0 - " + "Ville \nObjet : Re: Soc : 0000000P - Votre lettre \nBonjour, \n" + "Vous trouverez ci-joint l'attestation \nMerci de me confirmer la bonne réception de ce " + "message. \nVous en remerciant par avance. \nCordialement, \nJean Dupont \nLe 2021-08-18 10:30, " + "ville@malf.fr a écrit : \nBonjour, \nVeuillez trouver ci-jointe la lettre \nLa visualisation des " + "fichiers PDF nécessite Adobe Reader. \nSentiments mutualistes. \nLa MAIF \n" + ), + segmenter_expected={ + "messages.text": [ + ( + "Bonjour,\nVous trouverez ci-joint l'attestation\nMerci de me confirmer la bonne réception " + "de ce message.\nVous en remerciant par avance.\nCordialement,\nJean Dupont" + ), + ( + "Bonjour,\nVeuillez trouver ci-jointe la lettre\nLa visualisation des fichiers PDF nécessite " + "Adobe Reader.\nSentiments mutualistes.\nLa MAIF" + ), + ], + "messages.meta": [ + ( + "De : test@free.fr \nEnvoyé : lundi 30 août 2021 21:26\nÀ : _Délégation 00.0 - " + "Ville \nObjet : Re: Soc : 0000000P - Votre lettre" + ), + "Le 2021-08-18 10:30, ville@malf.fr a écrit :", + ], + }, + content_tagger_expected={ + "messages.tags": [ + [ + ("HELLO", "Bonjour,"), + ("BODY", "Vous trouverez ci-joint l'attestation"), + ("BODY", "Merci de me confirmer la bonne réception de ce message."), + ("THANKS", "Vous en remerciant par avance."), + ("GREETINGS", "Cordialement,"), + ("SIGNATURE_NAME", "Jean Dupont"), + ], + [ + ("HELLO", "Bonjour,"), + ("BODY", "Veuillez trouver ci-jointe la lettre"), + ("FOOTER", "La visualisation des fichiers PDF nécessite Adobe Reader."), + ("GREETINGS", "Sentiments mutualistes."), + ("SIGNATURE_NAME", "La MAIF"), + ], + ], + }, +) + +# Watch-out : Multi-line string are NOT Tuples ("abc" "def") != ("abc", "def") +testcase_segmentation2 = dict( + test_name="Segmentation Direct Transfer", + body=( + "De : Jean Dupond \nEnvoyé : mardi 31 août 2021 21:45 \n" + "À : _Délégation Conseil 000 - La Ville \n" + "Objet : Demande d'attestation Identifiant : 0000000N \nBonjour \n" + "Pouvez-vous me transmettre deux attestations au nom de mes enfants \n- Jane Dupond \n- Joe Dupond \n" + "Merci par avance \n-- \nCordialement \nMr Jean Dupond" + ), + segmenter_expected={ + "messages.text": [ + ( + "Bonjour\nPouvez-vous me transmettre deux attestations au nom de mes enfants\n" + "- Jane Dupond\n- Joe Dupond\nMerci par avance\n--\nCordialement\nMr Jean Dupond" + ) + ], + "messages.meta": [ + ( + "De : Jean Dupond \nEnvoyé : mardi 31 août 2021 21:45\n" + "À : _Délégation Conseil 000 - La Ville \n" + "Objet : Demande d'attestation Identifiant : 0000000N" + ), + ], + }, + content_tagger_expected={ + "messages.tags": [ + [ + ("HELLO", "Bonjour"), + ( + "BODY", + "Pouvez-vous me transmettre deux attestations au nom de mes enfants", + ), + ("BODY", "- Jane Dupond"), + ("BODY", "- Joe Dupond"), + ("THANKS", "Merci par avance"), + ("GREETINGS", "Cordialement"), + ("SIGNATURE_NAME", "Mr Jean Dupond"), + ] + ], + }, +) + +# Watch-out : Multi-line string are NOT Tuples ("abc" "def") != ("abc", "def") +testcase_segmentation3 = dict( + test_name="Segmentation Début du message transféré", + body=( + "Envoyé de mon iPhone \nDébut du message transféré : \nDe: Jane Dupond \n" + "Date: 11 août 2021 à 17:04:01 UTC+2 \nÀ: Joe DUPOND \n" + "Objet: Rép. : X - ETAT DES LIEUX \nBonjour Mme X, \n" + "Suite à l'état des lieux de ce matin, je suis passé à l'agence transmettre les objets \n" + "Bien cordialement \nJane Dupond \nEnvoyé de mon iPhone \nLe 11 août 2021à 15:35, " + "Joe DUPOND a écrit : \nBonjour, \n" + "Veuillez trouver ci-joint votre état des lieux sortant. \n" + "Vous en souhaitant bonne réceptionn, \nBien cordialement, \nJoe DUPOND \n " + ), + segmenter_expected={ + "messages.text": [ + "Envoyé de mon iPhone", + ( + "Bonjour Mme X,\nSuite à l'état des lieux de ce matin, je suis passé à l'agence transmettre " + "les objets\nBien cordialement\nJane Dupond\nEnvoyé de mon iPhone" + ), + ( + "Bonjour,\nVeuillez trouver ci-joint votre état des lieux sortant.\n" + "Vous en souhaitant bonne réceptionn,\nBien cordialement,\nJoe DUPOND" + ), + ], + "messages.meta": [ + "", + ( + "Début du message transféré :\nDe: Jane Dupond \n" + "Date: 11 août 2021 à 17:04:01 UTC+2\nÀ: Joe DUPOND \n" + "Objet: Rép. : X - ETAT DES LIEUX" + ), + "Le 11 août 2021à 15:35, Joe DUPOND a écrit :", + ], + }, +) + +# Watch-out : Multi-line string are NOT Tuples ("abc" "def") != ("abc", "def") +testcase_segmentation4 = dict( + test_name="Segmentation Original Message", + body=( + "De : marie@protonmail.com \nEnvoyé : mardi 31 août 202114:04 \n" + "À : _Délégation 00.1- Ville \n" + "Objet : Re : Soc : 0000000M - Votre attestation Assurance Habitation Responsabilité civile locative \n" + "Bonjour, \nJe vous renvoie mon RIB concernant le contrat \nd'assurance Habitation Responsabilité civile " + "locative. \nBien à vous \nMarie \nN° sociétaire : 0000000M \nSent with Proto_nMail Secure Email. \n" + "--- Original Message --- \nLe mardi 31 août 2021 à 11:09, a écrit : \n" + "Bonjour, \nVeuillez trouver ci-joint l'attestation « Responsabilité civile locative » \n" + "que vous nous avez demandée. \nLa visualisation des fichiers PDF nécessite Adobe Reader. \n" + "Sentiments mutualistes. \nLa MAIF \n" + ), + segmenter_expected={ + "messages.text": [ + ( + "Bonjour,\nJe vous renvoie mon RIB concernant le contrat\nd'assurance Habitation " + "Responsabilité civile locative.\nBien à vous\nMarie\nN° sociétaire : 0000000M\n" + "Sent with Proto_nMail Secure Email." + ), + ( + "Bonjour,\nVeuillez trouver ci-joint l'attestation « Responsabilité civile locative »\n" + "que vous nous avez demandée.\nLa visualisation des fichiers PDF nécessite Adobe Reader.\n" + "Sentiments mutualistes.\nLa MAIF" + ), + ], + "messages.meta": [ + ( + "De : marie@protonmail.com\nEnvoyé : mardi 31 août 202114:04\n" + "À : _Délégation 00.1- Ville \nObjet : Re : Soc : 0000000M - Votre attestation " + "Assurance Habitation Responsabilité civile locative" + ), + "Original Message ---\nLe mardi 31 août 2021 à 11:09, a écrit :", + ], + }, +) + + +# Watch-out : Multi-line string are NOT Tuples ("abc" "def") != ("abc", "def") +testcase_segmentation5 = dict( + test_name="Segmentation Le lun. xxx a écrit", + body=( + "Bonjour, \nVeuillez trouver en PJ mon RI \n" + "Le lun. 30 août 2021à 09:40, DUPOND Marie a écrit : \nBonjour" + ), + segmenter_expected={ + "messages.text": ["Bonjour,\nVeuillez trouver en PJ mon RI", "Bonjour"], + "messages.meta": [ + "", + "Le lun. 30 août 2021à 09:40, DUPOND Marie a écrit :", + ], + }, +) + +# Watch-out : Multi-line string are NOT Tuples ("abc" "def") != ("abc", "def") +testcase_segmentation6 = dict( + test_name="Segmentation Direct Transfer", + body="Bonjour et merci\nCordialement", + content_tagger_expected={ + "messages.tags": [ + [ + ("THANKS", "Bonjour et merci"), + ("GREETINGS", "Cordialement"), + ] + ], + }, +) + +# Watch-out : Multi-line string are NOT Tuples ("abc" "def") != ("abc", "def") +testcase_transfer_1 = dict( + test_name="Direct transfer", + body="De:\nsender.before.transfer@test.fr\nDate:\n3 mars 2023 à 16:42:50\nBonjour\nbla bla", + transferred_email_processor_expected={ + "messages.text": ["Bonjour\nbla bla"], + "messages.meta": ["De:\nsender.before.transfer@test.fr\nDate:\n3 mars 2023 à 16:42:50"], + "det_original_from": "sender.before.transfer@test.fr", + }, +) + +testcase_transfer_2 = dict( + test_name="FOOTER + Transfer", + body="Envoyé de mon iphone\nDe:\nsender.before.transfer@test.fr\nDate:\n3 mars 2023 à 16:42:50\nBonjour\nbla bla", + transferred_email_processor_expected={ + "messages.text": ["Bonjour\nbla bla"], + "messages.meta": ["De:\nsender.before.transfer@test.fr\nDate:\n3 mars 2023 à 16:42:50"], + "det_original_from": "sender.before.transfer@test.fr", + }, +) + +testcase_transfer_3 = { + "test_name": "FOOTER + Transfer (no email address in meta)", + "from": "email_sender@test.fr", + "body": "Envoyé de mon iphone\nDe:\nJohn Doe\nDate:\n3 mars 2023 à 16:42:50\nBonjour\nbla bla", + "transferred_email_processor_expected": { + "messages.text": ["Bonjour\nbla bla"], + "messages.meta": ["De:\nJohn Doe\nDate:\n3 mars 2023 à 16:42:50"], + "det_original_from": None, + }, +} + +testcase_transfer_4 = { + "test_name": "BODY + Transfer", + "from": "email_sender@test.fr", + "body": "Ceci est un BODY\nDe:\nsender.before.transfer@test.fr\nDate:\n3 mars 2023 à 16:42:50\nBonjour\nbla bla", + "transferred_email_processor_expected": { + "messages.text": ["Ceci est un BODY", "Bonjour\nbla bla"], + "messages.meta": ["", "De:\nsender.before.transfer@test.fr\nDate:\n3 mars 2023 à 16:42:50"], + "det_original_from": None, + }, +} + +testcase_false_thanks = dict( + test_name="Thanks with question mark", + body="Bonjour\nQu'en est-il svp ? Merci\nJoe Dupont", + thanks_detector_expected={ + "thanks_result": False, + }, +) + +testcase_basic_thanks = dict( + test_name="Basic Thanks", + body="Bonjour\nMerci pour cette réponse\nSincèrement\nBLA Bla BLA", + thanks_detector_expected={ + "thanks_result": True, + }, +) + + +testcase_false_vacation_reply = dict( + test_name="Simple vacation reply (False)", + body="Bonjour\nQu'en est-il svp ? Merci\nJoe Dupont", + vacation_reply_detector_expected={ + "vacation_reply_result": False, + }, +) + +testcase_true_vacation_reply = dict( + test_name="Simple vacation reply (True)", + body="Bonjour, \nActuellement en congé je prendrai connaissance" + + " de votre message ultérieurement.\nCordialement,", + vacation_reply_detector_expected={ + "vacation_reply_result": True, + }, +) + + +testcase_real_message = dict( + test_name="real_email_1", + body=( + "De :\n_Délégation - Ville \n\n\nEnvoyé :\nlundi 27 septembre 2021 22:19\n\n\n" + "À :\nBAL REZO \n\n\nObjet :\nTR : Soc : 1111111A - Votre" + " attestation assurance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDe :\nJean Dupont " + "\n\n\nEnvoyé :\nlundi 27 septembre 2021 22:23:43 (UTC+01:00) Brussels, Copenhagen, Madrid, Paris" + "\n\n\nÀ :\n_Délégation - Ville \n\n\nSujet :\nRE: Soc : 1111111A - " + "Votre attestation assurance\n\n\n\n\n\n\n\n\n\n\n\n\n\nBonjour,\n\n\n\n\nJe vous remercie" + "pour votre attestation.\n\n\n\n\nLe notaire chante\n\n\n\n\n\n\n\n\n\n\nCordialement,\n\n\n\n\n" + "Jean Dupont\n\n\n\n\n\n\n\n\n\n\n\nDe :\nma-ville@maif.fr \n\n\n" + "Envoyé :\nvendredi 24 septembre 2021 17:42\n\n\nÀ :\njdupont@gmail.com\n\n\nObjet :\n" + "Soc : 1111111A - Votre attestation assurance\n\n\n\n\n\n\n\n\n\nBonjour,\n\n" + "Veuillez trouver ci-joint l'attestation.\n\nLa visualisation des fichiers PDF nécessite " + "Adobe Reader.\n\nSentiments mutualistes.\n\nLa MAIF" + ), + messages=[ + "", + ( + "Bonjour,\n\n\n\n\nJe vous remercie pour votre attestation.\n\n\n\n\n" + "Le notaire chante\n\n\n\n\n\n\n\n\n\n\nCordialement,\n\n\n\n\nJean Dupont" + ), + ( + "Bonjour,\n\nVeuillez trouver ci-joint l'attestation.\n\nLa visualisation des fichiers PDF " + "nécessite Adobe Reader.\n\nSentiments mutualistes.\n\nLa MAIF" + ), + ], +) + + +testcase_true_reply = dict( + test_name="Replydetecteur (True)", + header="Re: Suivi de dossier", + reply_detector_expected={ + "reply_result": True, + }, +) + +testcase_true_reply1 = dict( + test_name="Replydetecteur (True)", + header="re: Suivi de dossier", + reply_detector_expected={ + "reply_result": True, + }, +) +testcase_false_reply = dict( + test_name="Replydetecteur (false)", + header="tr: Suivi de dossier", + reply_detector_expected={ + "reply_result": False, + }, +) +testcase_false_reply1 = dict( + test_name="Replydetecteur (false)", + header="", + reply_detector_expected={ + "reply_result": False, + }, +) +testcase_true_transfer = dict( + test_name="Transferdetecteur (True)", + header="Tr: Suivi de dossier", + body="Bonjour,\n\n\n\n\n\nUn taux d’humidité de 30% a été relevé le 19/04/2022.\n\n\n\nNous reprendrons contact avec l’assurée" + + " en Aout 2022.\n\n\n\n\n\n\nBien cordialement,\n\n\n\n\n\nNuméro Auxiliaire : 116113 T / 116133 J\n\n\n\n\n\n\n\n\nABOU" + + " ELELA Donia\n\n\nSté LVP\n-\n\nL\na\nV\nalorisation du\nP\natrimoine\n\n\n2, rue de la Paix\n\n\n94300 VINCENNES" + + "\n\n\n\n\n\n\n\nTél : 0143740992\n\n\nPort : 0767396737\n\n\nhttp://lvpfrance.fr\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" + + "\n\n\nDe :\nAccueil - Lucile RODRIGUES \n\n\n\nEnvoyé :\njeudi 13 janvier 2022 15:26\n\n\nÀ " + + ":\nCommercial \n\n\nObjet :\nTR: Evt : M211110545P survenu le 15/10/2021 - Intervention entreprise" + + " partenaire\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDe :\n\n\ngestionsinistre@maif.fr\n[\nmailto:gestionsinistre@maif.fr\n]\n\n\n\n" + + "Envoyé :\njeudi 13 janvier 2022 15:13\n\n\nÀ :\nAccueil - Lucile RODRIGUES\n\n\nObjet :\nEvt : M211110545P survenu le 15/10/2021" + + " - Intervention entreprise partenaire\n\n\n\n\n\nMerci de bien vouloir prendre connaissance du document ci-joint.\n\n\n\nSentiments" + + "mutualistes.\n\nLa MAIF", + transfer_detector_expected={ + "transfer_result": True, + }, +) +testcase_true_transfer1 = dict( + test_name="Transferdetecteur (True)", + header="Suivi de dossier", + body="De :\nAccueil - Lucile RODRIGUES \n\n\n\nEnvoyé :\njeudi 13 janvier 2022 15:26\n\n\nÀ " + + ":\nCommercial \n\n\nObjet :\nTR: Evt : M211110545P survenu le 15/10/2021 - Intervention entreprise" + + " partenaire\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDe :\n\n\ngestionsinistre@maif.fr\n[\nmailto:gestionsinistre@maif.fr\n]\n\n\n\n" + + "Envoyé :\njeudi 13 janvier 2022 15:13\n\n\nÀ :\nAccueil - Lucile RODRIGUES\n\n\nObjet :\nEvt : M211110545P survenu le 15/10/2021" + + " - Intervention entreprise partenaire\n\n\n\n\n\nMerci de bien vouloir prendre connaissance du document ci-joint.\n\n\n\nSentiments" + + "mutualistes.\n\nLa MAIF", + transfer_detector_expected={ + "transfer_result": True, + }, +) +testcase_false_transfer = dict( + test_name="Transferdetecteur (False)", + header="test", + body="Bonjour, ceci est un message de test", + transfer_detector_expected={ + "transfer_result": False, + }, +) +testcase_false_transfer1 = dict( + test_name="Transferdetecteur (False)", + header="", + body="", + transfer_detector_expected={ + "transfer_result": False, + }, +) + +testcase_list = [value for key, value in locals().items() if key.startswith("testcase")] + + +def get_fixture_name(fixture_value): + return fixture_value.get("test_name", "missing_test_name") + + +@pytest.fixture( + params=testcase_list, + ids=get_fixture_name, +) +def testcase(request, default_pipeline="my_pipeline"): + testcase = request.param + + # Set default testcase parameters + testcase["pipeline"] = testcase.get("pipeline", default_pipeline) + + # Set default email fields + testcase["body"] = testcase.get("body", "") + testcase["header"] = testcase.get("header", "") + testcase["from"] = testcase.get("from", "") + testcase["to"] = testcase.get("to", "") + testcase["attachments"] = testcase.get("attachments", list()) + + return testcase diff --git a/tests/functional/test_emails_generic.py b/tests/functional/test_emails_generic.py new file mode 100644 index 0000000..5414516 --- /dev/null +++ b/tests/functional/test_emails_generic.py @@ -0,0 +1,15 @@ +import pytest + +from melusine.testing import assert_pipeline_results + +from .test_emails_fixtures import testcase + + +# The test_message fixture sequentially takes the value of the +# test_cases defined in melusine_code/tests/fixtures/test_emails_fixtures.py +@pytest.mark.usefixtures("use_dict_backend") +def test_pipeline_steps(testcase): + + # Run pipeline tests + pipeline_name = testcase.pop("pipeline") + assert_pipeline_results(testcase, pipeline_name) diff --git a/tests/huggingface/__init__.py b/tests/huggingface/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/huggingface/test_basic_classification.py b/tests/huggingface/test_basic_classification.py new file mode 100644 index 0000000..c5500b4 --- /dev/null +++ b/tests/huggingface/test_basic_classification.py @@ -0,0 +1,63 @@ +from unittest.mock import patch + +import pandas as pd +import pytest + +transformers = pytest.importorskip("transformers") +from typing import List + +from transformers.pipelines.zero_shot_classification import ZeroShotClassificationPipeline + + +class MockZeroShotClassificationPipeline(ZeroShotClassificationPipeline): + def __init__(self, task, model, tokenizer): + pass + + def __call__(self, sequences, candidate_labels, hypothesis_template): + if isinstance(sequences, List): + # Standard return with 2 elements + return [ + { + "sequence": sequences[0], + "labels": ["négatif", "positif"], + "scores": [0.5, 0.5], + }, + { + "sequence": sequences[1], + "labels": ["négatif", "positif"], + "scores": [0.5, 0.5], + }, + ] + + if "gentillesse" in sequences: + return { + "sequence": sequences, + "labels": ["positif", "négatif"], + "scores": [0.9756866097450256, 0.024313366040587425], + } + elif "pas satisfait" in sequences: + return { + "sequence": sequences, + "labels": ["négatif", "positif"], + "scores": [0.7485730648040771, 0.25142696499824524], + } + else: + return { + "sequence": sequences, + "labels": ["négatif", "positif"], + "scores": [0.5, 0.5], + } + + +def test_tutorial001(add_docs_to_pythonpath): + from docs_src.BasicClassification.tutorial001 import run, transformers_standalone + + with patch( + "docs_src.BasicClassification.tutorial001.pipeline", + new=MockZeroShotClassificationPipeline, + ): + result = transformers_standalone() + assert isinstance(result, List) + + df = run() + assert isinstance(df, pd.DataFrame) diff --git a/tests/io/__init__.py b/tests/io/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/io/test_io_mixin.py b/tests/io/test_io_mixin.py new file mode 100644 index 0000000..e0eb1bf --- /dev/null +++ b/tests/io/test_io_mixin.py @@ -0,0 +1,50 @@ +import pytest + +from melusine import config +from melusine.io import IoMixin +from melusine.io._classes import InitError + + +class FakeClass(IoMixin): + def __init__(self, test_arg): + super().__init__() + self.test_arg = test_arg + + +def test_from_config_dict(): + config_dict = {"test_arg": "hey"} + instance = FakeClass.from_config(config_dict=config_dict) + + assert instance.test_arg == "hey" + + +@pytest.mark.usefixtures("use_dict_backend", "reset_melusine_config") +def test_from_config_key(): + my_dict = {"test_arg": "hey"} + test_conf_dict = config.dict() + test_conf_dict["testclass_conf"] = my_dict + config.reset(config_dict=test_conf_dict) + + instance = FakeClass.from_config(config_key="testclass_conf") + + assert instance.test_arg == "hey" + + +def test_from_config_dict_error(): + unknown_arg = "unknown_arg" + config_dict = {"test_arg": "hey", unknown_arg: 42} + + with pytest.raises(InitError, match=f"{FakeClass.__name__}.*{unknown_arg}"): + _ = FakeClass.from_config(config_dict=config_dict) + + +def test_from_config_dict_and_config_key_error(): + config_dict = {"test_arg": "hey"} + + with pytest.raises(ValueError): + _ = FakeClass.from_config(config_dict=config_dict, config_key="blabla") + + +def test_from_config_dict_and_config_key_none_error(): + with pytest.raises(ValueError): + _ = FakeClass.from_config() diff --git a/tests/pipeline/__init__.py b/tests/pipeline/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py new file mode 100644 index 0000000..92a5226 --- /dev/null +++ b/tests/pipeline/test_pipeline.py @@ -0,0 +1,157 @@ +""" +Unit test for pipeline.py +""" +import pandas as pd +import pytest + +from melusine.base import MelusineTransformer +from melusine.pipeline import MelusinePipeline + +# Dummy variables +dum0 = 42 +dum1 = "dum" +dum2 = "dumdum" +dum3 = "duuum" + + +# Define dummy processors +class DummyProcessor(MelusineTransformer): + def __init__(self, input_columns="a", output_columns=("b",), dummy_attr=dum1): + super().__init__(input_columns, output_columns, func=self.add_dummy_col) + self.dummy_attr = dummy_attr + + def add_dummy_col(self, col_a_data): + return self.dummy_attr + + +def test_pipeline_with_processors(): + d1 = DummyProcessor() + d2 = DummyProcessor(output_columns=("c",), dummy_attr=dum2) + + # Create pipeline + pipe = MelusinePipeline(steps=[("d1", d1), ("d2", d2)], verbose=True) + + # Create data + df = pd.DataFrame({"a": [dum0, dum0]}) + + # Fit the pipeline and transform the data + df_transformed = pipe.fit_transform(df) + + # Most basic test, check that the pipeline returns a pandas DataFrame + assert isinstance(df_transformed, pd.DataFrame) + assert "a" in df_transformed.columns + assert "b" in df_transformed.columns + assert "c" in df_transformed.columns + + assert df_transformed["a"].iloc[0] == dum0 + assert df_transformed["b"].iloc[0] == dum1 + assert df_transformed["c"].iloc[0] == dum2 + + +def test_pipeline_with_arbitrary_transformer(): + class ArbitraryTransformer: + def __init__(self, dummy_attr=dum1): + self.dummy_attr = dummy_attr + + def add_dummy_col(self, col_a_data): + return self.dummy_attr + + def fit(self, x, y=None): + return self + + def transform(self, x): + x["b"] = x["a"].apply(self.add_dummy_col) + + return x + + d1 = ArbitraryTransformer() + + # Create pipeline + pipe = MelusinePipeline(steps=[("d1", d1)], verbose=True) + + # Create data + df = pd.DataFrame({"a": [dum0, dum0]}) + + # Fit the pipeline and transform the data + df_transformed = pipe.fit_transform(df) + + # Most basic test, check that the pipeline returns a pandas DataFrame + assert isinstance(df_transformed, pd.DataFrame) + assert "a" in df_transformed.columns + assert "b" in df_transformed.columns + + +def test_meta_pipeline(): + d1 = DummyProcessor() + d2 = DummyProcessor(output_columns=("c",), dummy_attr=dum2) + d3 = DummyProcessor(output_columns=("d",), dummy_attr=dum3) + + # Create pipeline + pipe1 = MelusinePipeline(steps=[("d1", d1), ("d2", d2)], verbose=True) + pipe2 = MelusinePipeline(steps=[("d3", d3)]) + meta_pipe = MelusinePipeline(steps=[("pipe1", pipe1), ("pipe2", pipe2)]) + + # Create data + df = pd.DataFrame({"a": [dum0, dum0]}) + + # Fit the pipeline and transform the data + df_transformed = meta_pipe.fit_transform(df) + + # Most basic test, check that the pipeline returns a pandas DataFrame + assert isinstance(df_transformed, pd.DataFrame) + assert "a" in df_transformed.columns + assert "b" in df_transformed.columns + assert "c" in df_transformed.columns + assert "d" in df_transformed.columns + + assert df_transformed["a"].iloc[0] == dum0 + assert df_transformed["b"].iloc[0] == dum1 + assert df_transformed["c"].iloc[0] == dum2 + assert df_transformed["d"].iloc[0] == dum3 + + +def test_pipeline_from_config(): + _ = MelusinePipeline.from_config(config_key="my_pipeline") + + +def test_pipeline_from_config_error(): + with pytest.raises(ValueError, match=r"'config_key' and 'config_dict'"): + _ = MelusinePipeline.from_config(config_key="x", config_dict={"a": 5}) + + +def test_pipeline_get_config_from_key(): + conf = MelusinePipeline.get_config_from_key(config_key="my_pipeline") + assert conf + + +def test_pipeline_from_config_missing_class(): + config_dict = { + "steps": [ + { + "class_name": "UnknownClass", + "module": "melusine.processors", + "name": "test", + "parameters": { + "mel": "usine", + }, + }, + ] + } + with pytest.raises(AttributeError, match=r"UnknownClass.*melusine.processors"): + _ = MelusinePipeline.from_config(config_dict=config_dict) + + +def test_pipeline_input_output_columns(): + d1 = DummyProcessor(input_columns=("a",), output_columns=("b",)) + d2 = DummyProcessor(input_columns=("b",), output_columns=("c",), dummy_attr=dum2) + + # Create pipeline + pipe = MelusinePipeline(steps=[("d1", d1), ("d2", d2)], verbose=True) + + assert len(pipe.input_columns) == 2 + assert "a" in pipe.input_columns + assert "b" in pipe.input_columns + + assert len(pipe.output_columns) == 2 + assert "b" in pipe.output_columns + assert "c" in pipe.output_columns diff --git a/tests/pipeline/test_pipeline_basic.py b/tests/pipeline/test_pipeline_basic.py new file mode 100644 index 0000000..63646f2 --- /dev/null +++ b/tests/pipeline/test_pipeline_basic.py @@ -0,0 +1,314 @@ +""" +Example script to fit a minimal preprocessing pipeline +""" +import pandas as pd +import pytest + +from melusine import config +from melusine.pipeline import MelusinePipeline, PipelineConfigurationError +from melusine.processors import Normalizer, RegexTokenizer + + +def test_pipeline_basic(dataframe_basic): + """ + Train a pipeline by explicitly instatiating all the transformers. + """ + # Input data + df = dataframe_basic.copy() + + # Instantiate processors + normalizer = Normalizer(lowercase=True, form="NFKD") + tokenizer = RegexTokenizer() + + # Create pipeline + pipe = MelusinePipeline(steps=[("normalizer", normalizer), ("tokenizer", tokenizer)], verbose=True) + + # Fit the pipeline and transform the data + df_transformed = pipe.fit_transform(df) + + # Most basic test, check that the pipeline returns a pandas DataFrame + assert isinstance(df_transformed, pd.DataFrame) + + +@pytest.mark.usefixtures("use_test_config") +def test_pipeline_from_config(dataframe_basic): + """ + Train a pipeline using transformers defined in a pipeline config file. + """ + # Input data + df = dataframe_basic.copy() + + # Set config keys + normalizer_key = "test_normalizer" + tokenizer_key = "test_tokenizer" + pipeline_key = "test_pipeline" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "class_name": "Normalizer", + "module": "melusine.processors", + "config_key": normalizer_key, + }, + { + "class_name": "RegexTokenizer", + "module": "melusine.processors", + "config_key": tokenizer_key, + }, + ] + } + + test_conf_dict = config.dict() + test_conf_dict[pipeline_key] = conf_pipeline_basic + config.reset(config_dict=test_conf_dict) + + # Create pipeline from a json config file (using config key "my_pipeline") + pipe = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True) + + # Fit the pipeline and transform the data + df_transformed = pipe.fit_transform(df) + + # Make basic tests + assert isinstance(df_transformed, pd.DataFrame) + assert normalizer_key in pipe.named_steps + assert tokenizer_key in pipe.named_steps + + +@pytest.mark.usefixtures("use_test_config") +def test_pipeline_from_dict(dataframe_basic): + """ + Train a pipeline using transformers defined in a pipeline config file. + """ + # Input data + df = dataframe_basic.copy() + + # Set config keys + normalizer_name = "normalizer" + tokenizer_key = "test_tokenizer" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "name": normalizer_name, + "class_name": "Normalizer", + "module": "melusine.processors", + "parameters": { + "form": "NFKD", + "input_columns": ["text"], + "lowercase": True, + "output_columns": ["text"], + }, + }, + { + "class_name": "RegexTokenizer", + "module": "melusine.processors", + "config_key": tokenizer_key, + }, + ] + } + + # Create pipeline from a json config file (using config key "my_pipeline") + pipe = MelusinePipeline.from_config(config_dict=conf_pipeline_basic, verbose=True) + + # Fit the pipeline and transform the data + df_transformed = pipe.fit_transform(df) + + # Make basic tests + assert isinstance(df_transformed, pd.DataFrame) + assert normalizer_name in pipe.named_steps + assert tokenizer_key in pipe.named_steps + + +@pytest.mark.usefixtures("use_test_config") +def test_missing_config_key(): + """ + Train a pipeline using transformers defined in a pipeline config file. + """ + # Set config keys + normalizer_name = "normalizer" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "name": normalizer_name, + "class_name": "Normalizer", + "module": "melusine.processors", + "parameters": { + "form": "NFKD", + "input_columns": ["text"], + "lowercase": True, + "output_columns": ["text"], + }, + }, + { + "class_name": "RegexTokenizer", + "module": "melusine.processors", + }, + ] + } + + # Create pipeline from a json config file (using config key "my_pipeline") + with pytest.raises(PipelineConfigurationError): + _ = MelusinePipeline.from_config(config_dict=conf_pipeline_basic, verbose=True) + + +@pytest.mark.usefixtures("use_test_config") +def test_invalid_config_key(): + """ + Train a pipeline using transformers defined in a pipeline config file. + """ + incorrect_config_key = "INCORRECT_CONFIG_KEY" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "class_name": "Normalizer", + "module": "melusine.processors", + "config_key": incorrect_config_key, + }, + { + "class_name": "RegexTokenizer", + "module": "melusine.processors", + "name": "test_name", + "parameters": {"test_key": "test_value"}, + }, + ] + } + + # Create pipeline from a json config file (using config key "my_pipeline") + with pytest.raises(KeyError, match=incorrect_config_key): + _ = MelusinePipeline.from_config(config_dict=conf_pipeline_basic, verbose=True) + + +@pytest.mark.usefixtures("use_test_config") +@pytest.mark.parametrize( + "pipeline_conf", + [ + pytest.param( + { + "NOT_STEPS": [ + { + "class_name": "Normalizer", + "module": "test_module", + "config_key": "test_key", + }, + ] + }, + id="Missing steps key", + ), + pytest.param( + { + "steps": [ + { + "class_name": "Normalizer", + "config_key": "test_key", + }, + ] + }, + id="Missing module key", + ), + pytest.param( + { + "steps": [ + { + "class_name": "Normalizer", + "config_key": "test_key", + "name": "test_name", + }, + ] + }, + id="Missing parameters key", + ), + pytest.param( + { + "steps": [ + { + "class_name": "Normalizer", + "module": "test_module", + "name": "test_name", + "parameters": "THIS SHOULD BE A DICT", + }, + ] + }, + id="Erroneous parameters type", + ), + pytest.param( + { + "steps": [ + { + "class_name": "Normalizer", + "module": "melusine.processors", + "parameters": { + "form": "NFKD", + "input_columns": ["text"], + "lowercase": True, + "output_columns": ["text"], + }, + }, + { + "class_name": "RegexTokenizer", + "module": "melusine.processors", + "config_key": "test_tokenizer", + }, + ] + }, + id="Missing name key", + ), + ], +) +def test_pipeline_config_error(pipeline_conf): + """ + Train a pipeline using transformers defined in a pipeline config file. + """ + # Create pipeline from a json config file (using config key "my_pipeline") + with pytest.raises(PipelineConfigurationError): + _ = MelusinePipeline.from_config(config_dict=pipeline_conf) + + +def test_missing_input_field(dataframe_basic): + """ + Try to transform with an ill config pipeline. + (The tokenizer step expects an input field "my_missing_field" which is not present) + """ + # Input data + df = dataframe_basic.copy() + + # Set config keys + normalizer_name = "normalizer" + tokenizer_name = "tokenizer" + missing_field_name = "my_missing_field" + + # Pipeline configuration + conf_pipeline_basic = { + "steps": [ + { + "name": normalizer_name, + "class_name": "Normalizer", + "module": "melusine.processors", + "parameters": { + "input_columns": ["text"], + "output_columns": ["text"], + }, + }, + { + "name": tokenizer_name, + "class_name": "RegexTokenizer", + "module": "melusine.processors", + "parameters": { + "input_columns": [missing_field_name], + "output_columns": ["tokens"], + }, + }, + ] + } + + # Create pipeline from a json config file (using config key "my_pipeline") + pipe = MelusinePipeline.from_config(config_dict=conf_pipeline_basic, verbose=True) + + # Fit the pipeline and transform the data + with pytest.raises(ValueError, match=rf"(?s){tokenizer_name}.*{missing_field_name}"): + pipe.transform(df) diff --git a/tests/pipeline/test_pipeline_testing.py b/tests/pipeline/test_pipeline_testing.py new file mode 100644 index 0000000..07d1e0b --- /dev/null +++ b/tests/pipeline/test_pipeline_testing.py @@ -0,0 +1,52 @@ +import pytest + +from melusine.testing.pipeline_testing import assert_pipeline_results + + +@pytest.mark.usefixtures("use_dict_backend") +def test_pipeline_testing(): + testcase = { + "test_name": "Simple test", + "body": "Hello\r\nGood bye", + "from": "test@gmail.com", + "header": "Test header", + "body_cleaner_expected": {"tmp_clean_body": "Hello\nGood bye"}, + } + + assert_pipeline_results(testcase, "my_pipeline") + + +@pytest.mark.usefixtures("use_dict_backend") +def test_pipeline_testing_error(): + test_name = "Expected result error" + pipeline_step_name = "body_cleaner" + field_name = "tmp_clean_body" + expected_value = "NotTheRightText" + + testcase = { + "test_name": test_name, + "body": "Hello\r\nGood bye", + "from": "test@gmail.com", + "header": "Test header", + f"{pipeline_step_name}_expected": {field_name: expected_value}, + } + + with pytest.raises(AssertionError, match=f"{test_name}.*{pipeline_step_name}.*{field_name}.*{expected_value}"): + assert_pipeline_results(testcase, "my_pipeline") + + +@pytest.mark.usefixtures("use_dict_backend") +def test_pipeline_testing_untested_field(): + test_name = "Untested field test" + pipeline_step_name = "non_existent_step" + + testcase = { + "test_name": test_name, + "body": "Hello\r\nGood bye", + "from": "test@gmail.com", + "header": "Test header", + f"{pipeline_step_name}_expected": {"field", "expected_value"}, + } + + with pytest.raises(AssertionError, match=f"{pipeline_step_name}.*{test_name}"): + assert_pipeline_results(testcase, "my_pipeline") diff --git a/tests/pipeline/test_pipeline_with_ml.py b/tests/pipeline/test_pipeline_with_ml.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/processors/__init__.py b/tests/processors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/processors/test_content_tagger.py b/tests/processors/test_content_tagger.py new file mode 100644 index 0000000..efc9047 --- /dev/null +++ b/tests/processors/test_content_tagger.py @@ -0,0 +1,764 @@ +import re + +import pytest + +from melusine.message import Message +from melusine.processors import BaseContentTagger, ContentTagger, Tag + + +def test_content_tagger(): + # Text segments (= individual messages in an email conversation) + text_segments = [ + "Envoye de mon iphone", + ("Bonjour Mme X,\nSuite a blh blah blah\n" "Bien cordialement\nJane Dupond\n" "(See attached file: flex.jpg)"), + ( + "Bonjour,\nVeuillez trouver ci-joint blah\n" + "Merci d'avance,\nCordialement,\n" + "Toute modification, edition, utilisation ou diffusion non autorisee est interdite" + ), + ] + + # Expected tags + expected_tags = [ + [ + ("FOOTER", "Envoye de mon iphone"), + ], + [ + ("HELLO", "Bonjour Mme X,"), + ("BODY", "Suite a blh blah blah"), + ("GREETINGS", "Bien cordialement"), + ("SIGNATURE_NAME", "Jane Dupond"), + ("PJ", "(See attached file: flex.jpg)"), + ], + [ + ("HELLO", "Bonjour,"), + ("BODY", "Veuillez trouver ci-joint blah"), + ("THANKS", "Merci d'avance,"), + ("GREETINGS", "Cordialement,"), + ( + "FOOTER", + "Toute modification, edition, utilisation ou diffusion non autorisee est interdite", + ), + ], + ] + + # Mock the output of a Segmenter (List of Message object) + messages = [Message(text=segment) for segment in text_segments] + + # Instantiate and apply the Tagger + tagger = ContentTagger() + output_messages = tagger.tag_email(messages) + + # Test output tags + output_tags = [x.tags for x in output_messages] + assert output_tags == expected_tags + + +def test_tag_null_message(): + messages = None + + # Instantiate and apply the Tagger + tagger = ContentTagger() + output_messages = tagger.tag_email(messages) + + assert output_messages is None + + +@pytest.mark.parametrize( + "text, expected_parts", + [ + ( + "Bonjour, merci pour votre message!\nComment-allez vous?! Je suis satisfait!!!\n" + "Bien cordialement\n\n\n\nJane Dupond\n", + [ + "Bonjour,", + "merci pour votre message!", + "Comment-allez vous?!", + "Je suis satisfait!!!", + "Bien cordialement", + "Jane Dupond", + ], + ), + ], +) +def test_content_tagger_split_text(text, expected_parts): + # Instantiate and apply the Tagger + tagger = ContentTagger() + output_parts = tagger.split_text(text) + + assert output_parts == expected_parts + + +@pytest.mark.parametrize( + "text, expected_tags", + [ + ( + "Bonjour Mme X,\nSuite a blh blah blah.\n" + "Bien cordialement\nJane Dupond\n" + "(See attached file: flex.jpg)", + [ + ("HELLO", "Bonjour Mme X,"), + ("BODY", "Suite a blh blah blah."), + ("GREETINGS", "Bien cordialement"), + ("SIGNATURE_NAME", "Jane Dupond"), + ("PJ", "(See attached file: flex.jpg)"), + ], + ), + ( + "Bonjour, je confirme le rdv. Cordialement, John Smith", + [ + ("HELLO", "Bonjour,"), + ("BODY", "je confirme le rdv."), + ("GREETINGS", "Cordialement, John Smith"), + ], + ), + ( + ( + "Bonjour,\nSuite a notre intervention du 16.02.22 , un taux d'humidité de 50% a été relevé.\n" + "Cordialement.\n177, rue de la fée - 75000 Paris.\n" + "Horaires : du lundi au jeudi de 08h00 à 16h30 et le vendredi de 08h00 à 16h00.\n" + "Tel : 01.45.53.11.33" + ), + [ + ("HELLO", "Bonjour,"), + ("BODY", "Suite a notre intervention du 16.02.22 , un taux d'humidité de 50% a été relevé."), + ("GREETINGS", "Cordialement."), + ("SIGNATURE", "177, rue de la fée - 75000 Paris."), + ("BODY", "Horaires : du lundi au jeudi de 08h00 à 16h30 et le vendredi de 08h00 à 16h00."), + ("SIGNATURE", "Tel : 01.45.53.11.33"), + ], + ), + ( + ( + "bonjour\n" + "15 jours après les premières réparations, un défaut a été détecté. " + "Bien à vous\n" + "Britney Spears" + ), + [ + ("HELLO", "bonjour"), + ("BODY", "15 jours après les premières réparations, un défaut a été détecté."), + ("GREETINGS", "Bien à vous"), + ("SIGNATURE_NAME", "Britney Spears"), + ], + ), + ( + ( + "Bonjour monsieur Smith\n" + "merci. Bien à vous\n" + "Britney Spears\n" + "22 hollywood boulevard\n" + "79000 Niort\n" + ), + [ + ("HELLO", "Bonjour monsieur Smith"), + ("THANKS", "merci."), + ("GREETINGS", "Bien à vous"), + ("SIGNATURE_NAME", "Britney Spears"), + ("SIGNATURE", "22 hollywood boulevard"), + ("SIGNATURE", "79000 Niort"), + ], + ), + ( + ( + "Merci de me faire suivre les docs à ma nouvelle adresse qui est 0 rue du parc, 75000 Paris. " + "Merci d'avance. \nAcceptez notre salutation," + ), + [ + ("BODY", "Merci de me faire suivre les docs à ma nouvelle adresse qui est 0 rue du parc, 75000 Paris."), + ("THANKS", "Merci d'avance."), + ("GREETINGS", "Acceptez notre salutation,"), + ], + ), + ( + ( + "Bonjour\n" + "Je vous relance concernant ma télévision avec le devis en PJ.\n" + "Désolé pour la qualité.\n" + "Je l'ai envoyé à partir de mon ordi.\n" + "Excellente journée à vous,\n" + "Bon we\n" + "Votre bien dévoué\n" + "amicalement votre\n" + "Cordiales salutations.\n" + "Françoise-Bénédicte Dupond\n" + "Envoyé à partir de \nCourrier \npour Windows" + ), + [ + ("HELLO", "Bonjour"), + ("BODY", "Je vous relance concernant ma télévision avec le devis en PJ."), + ("BODY", "Désolé pour la qualité."), + ("BODY", "Je l'ai envoyé à partir de mon ordi."), + ("HELLO", "Excellente journée à vous,"), + ("HELLO", "Bon we"), + ("GREETINGS", "Votre bien dévoué"), + ("GREETINGS", "amicalement votre"), + ("GREETINGS", "Cordiales salutations."), + ("SIGNATURE_NAME", "Françoise-Bénédicte Dupond"), + ("FOOTER", "Envoyé à partir de"), + ("FOOTER", "Courrier"), + ("FOOTER", "pour Windows"), + ], + ), + ( + "C'est bien note, merci beaucoup.\nSentiments dévoués.\nTélécharger \nOutlook pour Android", + [ + ("THANKS", "C'est bien note, merci beaucoup."), + ("GREETINGS", "Sentiments dévoués."), + ("FOOTER", "Télécharger"), + ("FOOTER", "Outlook pour Android"), + ], + ), + ( + "Impeccable, je vous remercie beaucoup pour votre rapidité.\nObtenir\nOutlook pour Android", + [ + ("THANKS", "Impeccable, je vous remercie beaucoup pour votre rapidité."), + ("FOOTER", "Obtenir"), + ("FOOTER", "Outlook pour Android"), + ], + ), + ( + ( + "Cher Monsieur,\nJe vous confirme la bonne réception de votre précédent email.\n" + "Je vous en remercie.\nBien cordialement,\nJohn Smith" + ), + [ + ("HELLO", "Cher Monsieur,"), + ("BODY", "Je vous confirme la bonne réception de votre précédent email."), + ("THANKS", "Je vous en remercie."), + ("GREETINGS", "Bien cordialement,"), + ("SIGNATURE_NAME", "John Smith"), + ], + ), + ( + ( + "chère madame,\n" + "URGENT URGENT\n" + "Merci de me faire suivre les docs à ma nouvelle adresse qui est 0 rue du parc, 75000 Paris. " + "Merci d'avance. \nRecevez nos salutations,\nVous en souhaitant bonne réception" + ), + [ + ("HELLO", "chère madame,"), + ("BODY", "URGENT URGENT"), + ("BODY", "Merci de me faire suivre les docs à ma nouvelle adresse qui est 0 rue du parc, 75000 Paris."), + ("THANKS", "Merci d'avance."), + ("GREETINGS", "Recevez nos salutations,"), + ("GREETINGS", "Vous en souhaitant bonne réception"), + ], + ), + pytest.param( + "Un témoignage sous X\nEnvoyé depuis mon téléphone Orange", + [ + ("BODY", "Un témoignage sous X"), + ("FOOTER", "Envoyé depuis mon téléphone Orange"), + ], + id="Edge case where a line ends with an isolated character", + ), + pytest.param( + " ??\n !??!", + [ + ("BODY", "??!??!"), + ], + id="Edge case where the two first lines are missing word characters", + ), + ( + "Bonjour Mme X,\nSuite a blh blah blah.\n" + "Bien cordialement\nJane Dupond\n" + "(See attached file: flex.jpg)", + [ + ("HELLO", "Bonjour Mme X,"), + ("BODY", "Suite a blh blah blah."), + ("GREETINGS", "Bien cordialement"), + ("SIGNATURE_NAME", "Jane Dupond"), + ("PJ", "(See attached file: flex.jpg)"), + ], + ), + ( + "\nChère Madame\n\nC'est bien noté, merci\nBien reçu\nJ.Smith\n\n", + [ + ("HELLO", "Chère Madame"), + ("THANKS", "C'est bien noté, merci"), + ("BODY", "Bien reçu"), + ("SIGNATURE_NAME", "J.Smith"), + ], + ), + ( + "\nBonjour Monsieur, ceci n'est pas un hello\nBonne fin de journee\nsalutations", + [ + ("BODY", "Bonjour Monsieur, ceci n'est pas un hello"), + ("HELLO", "Bonne fin de journee"), + ("GREETINGS", "salutations"), + ], + ), + ( + "\nBonjour Monsieur Stanislas von den hoeggenboord\n\nbien à toi\nJ. Smith\nChargé de clientèle", + [ + ("HELLO", "Bonjour Monsieur Stanislas von den hoeggenboord"), + ("GREETINGS", "bien à toi"), + ("SIGNATURE_NAME", "J. Smith"), + ("SIGNATURE", "Chargé de clientèle"), + ], + ), + ( + ( + "\n1 rdv à 18h\n\n2 ème message laissé à la locataire\n3je m'en vais au bois\n" + "4 allée des iris\n 5bis rue Patrick Sebastien\n6-8 cours mirabeau\n 7 ter place du dahu\n" + "8 de la rue très longue qui ne doit pas être taggée signature" + ), + [ + ("BODY", "1 rdv à 18h"), + ("BODY", "2 ème message laissé à la locataire"), + ("BODY", "3je m'en vais au bois"), + ("SIGNATURE", "4 allée des iris"), + ("SIGNATURE", "5bis rue Patrick Sebastien"), + ("SIGNATURE", "6-8 cours mirabeau"), + ("SIGNATURE", "7 ter place du dahu"), + ("BODY", "8 de la rue très longue qui ne doit pas être taggée signature"), + ], + ), + ( + ( + "à L'attention de M Bob,\n" + "Bonjour,\n" + "Je vous informe que je vais accepter la proposition de L , à savoir le paiement d'une indemnité forfaitaire de résiliation du CCMI de 4000 € TTC pour clore cette affaire.\n" + "Cordialement.\n" + "Bob Smith" + ), + [ + ("FOOTER", "à L'attention de M Bob,"), + ("HELLO", "Bonjour,"), + ( + "BODY", + "Je vous informe que je vais accepter la proposition de L , à savoir le paiement d'une indemnité forfaitaire de résiliation du CCMI de 4000 € TTC pour clore cette affaire.", + ), + ("GREETINGS", "Cordialement."), + ("SIGNATURE_NAME", "Bob Smith"), + ], + ), + ( + ( + "Monsieur Bob Smith\n" + "Adresse mail : BobSmith90@gmail.com\n" + "Lucy Ange\n\n" + "Bonjour Monsieur,\n" + "Suite à notre entretien téléphonique de ce matin, et au message que vous m'avez envoyé sur ma messagerie, je voudrais effectuer la réparation du véhicule Renault Twingo dans un garage partenaire de la Maif situé, si c'est possible.\n" + "Dans l'attente de votre réponse et en vous remerciant par avance,\n\n\n" + "Monsieur Bob Smith\n\n\n" + "Envoyé à partir de\n" + "Courrier\npour Windows\n\n\n\n" + "Sans virus.\nwww.avast.com" + ), + [ + ("HELLO", "Monsieur Bob Smith"), + ("SIGNATURE", "Adresse mail : BobSmith90@gmail.com"), + ("SIGNATURE_NAME", "Lucy Ange"), + ("HELLO", "Bonjour Monsieur,"), + ( + "BODY", + "Suite à notre entretien téléphonique de ce matin, et au message que vous m'avez envoyé sur ma messagerie, je voudrais effectuer la réparation du véhicule Renault Twingo dans un garage partenaire de la Maif situé, si c'est possible.", + ), + ("BODY", "Dans l'attente de votre réponse et en vous remerciant par avance,"), + ("HELLO", "Monsieur Bob Smith"), + ("FOOTER", "Envoyé à partir de"), + ("FOOTER", "Courrier"), + ("FOOTER", "pour Windows"), + ("FOOTER", "Sans virus."), + ("FOOTER", "www.avast.com"), + ], + ), + ( + ( + "Bob Smith\n\n\n" + "A l’attention de Madame Lucy Ange,\n\n\n\n\n\n" + "Bonjour Madame Ange,\n\n\n\n\n\n\n\n\n" + "J’espère que vous allez bien.\n\n\n\n\n\n" + "Pour faire suite à mon mail du 21 février 2023, je me permets de revenir vers vous pour avoir votre avis sur le devis que j’ai demandé auprès d’un enquêteur.\n\n\n\n" + "Voici son retour :\n\n\n\n\n\n" + "Qu’en pensez-vous svp ?\n\n\n\n\n\n" + "Je reste à votre disposition pour tout complément d’information et vous remercie de l’intérêt que vous porterez à ma demande,\n\n\n\n\n\n" + "Bien Cordialement,\n\n\n\n\n\n" + "Bob Smith\n\n\n" + "Tél. 06.83.22.95.94" + ), + [ + ("SIGNATURE_NAME", "Bob Smith"), + ("FOOTER", "A l’attention de Madame Lucy Ange,"), + ("HELLO", "Bonjour Madame Ange,"), + ("BODY", "J’espère que vous allez bien."), + ( + "BODY", + "Pour faire suite à mon mail du 21 février 2023, je me permets de revenir vers vous pour avoir votre avis sur le devis que j’ai demandé auprès d’un enquêteur.", + ), + ("BODY", "Voici son retour :"), + ("BODY", "Qu’en pensez-vous svp ?"), + ( + "BODY", + "Je reste à votre disposition pour tout complément d’information et vous remercie de l’intérêt que vous porterez à ma demande,", + ), + ("GREETINGS", "Bien Cordialement,"), + ("SIGNATURE_NAME", "Bob Smith"), + ("SIGNATURE", "Tél."), + ("SIGNATURE", "06.83.22.95.94"), + ], + ), + pytest.param( + ( + "cordialement\nContact e-mail\n\n\nContact téléphone\n\n01 23 45 67 89 / abcabc@hotmail.fr\n" + "Torroella de Montgri, le 5 avril 2023\nLes formats de fichiers acceptés sont : PDF, DOC, DOCX, JPEG, " + "JPG, TIFF, TXT, ODT, XLS, XLSX\nTout autre format de fichiers ne sera pas transmis au dossier" + ), + [ + ("GREETINGS", "cordialement"), + ("SIGNATURE", "Contact e-mail"), + ("SIGNATURE", "Contact téléphone"), + ("SIGNATURE", "01 23 45 67 89 / abcabc@hotmail.fr"), + ("SIGNATURE", "Torroella de Montgri, le 5 avril 2023"), + ( + "FOOTER", + "Les formats de fichiers acceptés sont : PDF, DOC, DOCX, JPEG, JPG, TIFF, TXT, ODT, XLS, XLSX", + ), + ("FOOTER", "Tout autre format de fichiers ne sera pas transmis au dossier"), + ], + id="diverse_signature_patterns", + ), + pytest.param( + ( + "bonjour\nmon body\nJ. Smith\n\n01 23 45 67 89\nSecrétaire en charge des avions\n" + "Business Analyst – Tribu Sinistres – Squad Flux Entrants\n" + "Société nationale des chemins de fer\nConseiller MAIF\nGestionnaire sinistre - C99G\n" + "Service des lettres anonymes\nTechnicienne de gestion - EQUIPE ABC\n" + ), + [ + ("HELLO", "bonjour"), + ("BODY", "mon body"), + ("SIGNATURE_NAME", "J. Smith"), + ("SIGNATURE", "01 23 45 67 89"), + ("SIGNATURE", "Secrétaire en charge des avions"), + ("SIGNATURE", "Business Analyst – Tribu Sinistres – Squad Flux Entrants"), + ("SIGNATURE", "Société nationale des chemins de fer"), + ("SIGNATURE", "Conseiller MAIF"), + ("SIGNATURE", "Gestionnaire sinistre - C99G"), + ("SIGNATURE", "Service des lettres anonymes"), + ("SIGNATURE", "Technicienne de gestion - EQUIPE ABC"), + ], + id="signature_jobs", + ), + pytest.param( + ( + "bonjour\nmon body\nCordialement\n\n" + "analyste -------------------------------------- test test test test test test test\n" + ), + [ + ("HELLO", "bonjour"), + ("BODY", "mon body"), + ("GREETINGS", "Cordialement"), + ("BODY", "analyste -------------------------------------- test test test test test test test"), + ], + id="check_catastrophic_backtracking", + ), + ], +) +def test_tag_text_generic(text, expected_tags): + # Instantiate and apply the Tagger + tagger = ContentTagger() + output_tags = tagger.tag_text(text) + # Test output tags + assert output_tags == expected_tags + + +@pytest.mark.parametrize( + "text, expected_tags", + [ + pytest.param( + ( + "Merci\n" + "Je vous remercie\n" + "Merci d'avance\n" + "Je vous remercie par avance\n" + "Vous en remerciant par avance.\n" + ), + [ + ("THANKS", "Merci"), + ("THANKS", "Je vous remercie"), + ("THANKS", "Merci d'avance"), + ("THANKS", "Je vous remercie par avance"), + ("THANKS", "Vous en remerciant par avance."), + ], + id="french thanks patterns", + ), + ], +) +def test_tag_text_french(text, expected_tags): + # Instantiate and apply the Tagger + tagger = ContentTagger() + output_tags = tagger.tag_text(text) + # Test output tags + assert output_tags == expected_tags + + +@pytest.mark.parametrize( + "text, expected_tags", + [ + pytest.param( + ( + "Thank you so much\n" + "thanks\n" + "thx Joanna\n" + "thanks but you forgot bla\n" + "Thx however I still need the document\n" + ), + [ + ("THANKS", "Thank you so much"), + ("THANKS", "thanks"), + ("THANKS", "thx Joanna"), + ("BODY", "thanks but you forgot bla"), + ("BODY", "Thx however I still need the document"), + ], + id="english thanks patterns", + ), + pytest.param( + ( + "Best\n" + "warm Wishes\n" + "regards\n" + "best regards\n" + "cheers\n" + "yours\n" + "yours truly\n" + "Sincerely\n" + "see you soon\n" + "Speak to you soon\n" + "talk soon\n" + "Take care\n" + "Catch you later\n" + "Have a fantastic day\n" + "Looking forward to your reply\n" + "I am looking forward to hearing from you\n" + "Hoping to hear from you\n" + ), + [ + ("GREETINGS", "Best"), + ("GREETINGS", "warm Wishes"), + ("GREETINGS", "regards"), + ("GREETINGS", "best regards"), + ("GREETINGS", "cheers"), + ("GREETINGS", "yours"), + ("GREETINGS", "yours truly"), + ("GREETINGS", "Sincerely"), + ("GREETINGS", "see you soon"), + ("GREETINGS", "Speak to you soon"), + ("GREETINGS", "talk soon"), + ("GREETINGS", "Take care"), + ("GREETINGS", "Catch you later"), + ("GREETINGS", "Have a fantastic day"), + ("GREETINGS", "Looking forward to your reply"), + ("GREETINGS", "I am looking forward to hearing from you"), + ("GREETINGS", "Hoping to hear from you"), + ], + id="english greetings", + ), + pytest.param( + ( + "Hello John\n" + "hi\n" + "Hi there\n" + "good to hear from you\n" + "it is good to hear from you\n" + "I hope you are having a great week\n" + "how are you doing\n" + "how are you positioned about the matter\n" + "i hope you are doing well\n" + "Good Morning Joanna\n" + "good Afternoon\n" + "Dear Jacky\n" + "Sir\n" + "Dear Madam\n" + "Dear Mr\n" + "Dear Ms.\n" + "Dear miss\n" + "Dear mrs.\n" + "Dear sir or madam\n" + "To whom it may concern\n" + ), + [ + ("HELLO", "Hello John"), + ("HELLO", "hi"), + ("HELLO", "Hi there"), + ("HELLO", "good to hear from you"), + ("HELLO", "it is good to hear from you"), + ("HELLO", "I hope you are having a great week"), + ("HELLO", "how are you doing"), + ("BODY", "how are you positioned about the matter"), + ("HELLO", "i hope you are doing well"), + ("HELLO", "Good Morning Joanna"), + ("HELLO", "good Afternoon"), + ("HELLO", "Dear Jacky"), + ("HELLO", "Sir"), + ("HELLO", "Dear Madam"), + ("HELLO", "Dear Mr"), + ("HELLO", "Dear Ms."), + ("HELLO", "Dear miss"), + ("HELLO", "Dear mrs."), + ("HELLO", "Dear sir or madam"), + ("HELLO", "To whom it may concern"), + ], + id="english hello", + ), + pytest.param( + ( + "VP of Data Science\n" + "Chief of staff\n" + "CTO at TestMelusine\n" + "CEOABC test\n" + "Lead business developer\n" + ), + [ + ("SIGNATURE", "VP of Data Science"), + ("SIGNATURE", "Chief of staff"), + ("SIGNATURE", "CTO at TestMelusine"), + ("BODY", "CEOABC test"), + ("SIGNATURE", "Lead business developer"), + ], + id="english job signature patterns", + ), + ], +) +def test_tag_text_english(text, expected_tags): + # Instantiate and apply the Tagger + tagger = ContentTagger() + output_tags = tagger.tag_text(text) + # Test output tags + assert output_tags == expected_tags + + +def test_tag_list(): + # Limit tags to "HELLO" and the default tag ("BODY") + tag_list = ["HELLO"] + + # Text segment (= individual message in an email conversation) + text = "bonjour\nblah blah blah\nmerci\ncordialement" + + # Expected tags + expected_tags = [ + ("HELLO", "bonjour"), + ("BODY", "blah blah blah"), + ("BODY", "merci"), + ("BODY", "cordialement"), + ] + + # Instantiate and apply the Tagger + tagger = ContentTagger(tag_list=tag_list) + output_tags = tagger.tag_text(text) + + # Test output tags + assert expected_tags == output_tags + + +def test_undefined_tag(): + unknown_tag = "UNKNOWN_TAG" + + # Setup an unknown tag + tag_list = [unknown_tag] + + # Instantiate Tagger + with pytest.raises(ValueError, match=rf".*{unknown_tag}.*"): + _ = ContentTagger(tag_list=tag_list) + + +def test_unsupported_type(): + class MyClass(ContentTagger): + """Test class""" + + @Tag + def TEST_TAG(self): + """Test method""" + return 3.3 + + with pytest.raises(ValueError, match="supported types"): + _ = MyClass() + + +def test_compiled_pattern(): + class MyClass(ContentTagger): + """Test class""" + + @Tag + def TEST_TAG(self): + """Test method""" + return re.compile(r"cool_pattern") + + tagger = MyClass() + subtext, tag, match = tagger("cool_pattern is what I am looking for")[0] + + # Check tag result + assert tag == "TEST_TAG" + + +def test_str_pattern(): + class MyClass(ContentTagger): + """Test class""" + + @Tag + def TEST_TAG(self): + """Test method""" + return r"cool_pattern" + + tagger = MyClass() + subtext, tag, match = tagger("cool_pattern is what I am looking for")[0] + + # Check tag result + assert tag == "TEST_TAG" + + +def test_malformed_regex(): + from melusine.processors import Tag + + malformed_regex = r"[*." + + # Create a tagger containing an ill defined Tag (malformed regex) + class CustomTagger(ContentTagger): + """Test class""" + + @Tag + def HELLO(self): + """Test method""" + return malformed_regex + + # Instantiate Tagger + with pytest.raises(ValueError, match=rf"Invalid regex"): + _ = CustomTagger() + + +def test_direct_tagging(): + tagger = ContentTagger() + match = tagger["HELLO"].match("Bonjour") + + assert bool(match) + + +def test_call_method(): + tagger = ContentTagger() + + match_list = tagger("Bonjour a tous") + subtext, tag, regex = match_list[0] + + assert tag == "HELLO" + + +@pytest.mark.parametrize( + "text, n_words, word_character_only, expected_match", + [ + pytest.param("Hello you", 4, False, True, id="4 words match"), + pytest.param("Hello how are you today", 4, False, False, id="4 words no match"), + pytest.param("Hello! you?", 4, False, True, id="4 words match with special characters"), + pytest.param( + "Hello! you?", 4, True, False, id="4 words match with special characters (word character only True)" + ), + ], +) +def test_word_blocks(text, n_words, word_character_only, expected_match): + regex = BaseContentTagger.word_block(n_words, word_character_only=word_character_only) + + search_regex = r"^" + regex + r"$" + match = bool(re.search(search_regex, text)) + assert match == expected_match diff --git a/tests/processors/test_processors.py b/tests/processors/test_processors.py new file mode 100644 index 0000000..ba4257a --- /dev/null +++ b/tests/processors/test_processors.py @@ -0,0 +1,569 @@ +""" +Unit test for processors.py +""" +import pytest + +from melusine.processors import ( + DateProcessor, + DeterministicTextFlagger, + Message, + Normalizer, + RegexTokenizer, + Segmenter, + TextExtractor, + TokensExtractor, + TransferredEmailProcessor, +) + + +@pytest.mark.parametrize( + "input_text, lowercase, output_text", + [ + ("Héllö WORLD", True, "hello world"), + ("Hèllo WÖRLD", False, "Hello WORLD"), + ("", False, ""), + ], +) +def test_normalizer(input_text, lowercase, output_text): + """Unit test""" + + normalizer = Normalizer(lowercase=lowercase) + text = normalizer.normalize_text(input_text) + assert text == output_text + + assert normalizer.normalize_text(1.25) == "" + + +def test_normalizer_messages(): + """Unit test""" + + message_1 = Message(meta="", text="Héllö WORLD") + message_2 = Message(meta="abcd", text="Héllö heLLo") + normalizer = Normalizer(lowercase=True, input_columns="messages") + message_list = normalizer.normalize_message([message_1, message_2]) + + assert message_list[0].clean_text == "hello world" + assert message_list[1].clean_text == "hello hello" + + +@pytest.mark.parametrize( + "input_text, output_tokens, lowercase, normalization_form", + [ + ("le petit Chat", ["petit", "chat"], True, None), + ("le Géant", ["géant"], True, None), + ("le Géant", ["Geant"], False, "NFKD"), + ("Comme un grand", ["Comme", "grand"], False, None), + ("le un et je", [], False, None), + ], +) +def test_tokenizer(input_text, output_tokens, lowercase, normalization_form): + """Unit test""" + + tokenizer = RegexTokenizer( + tokenizer_regex=r"\w+(?:[\?\-\"_]\w+)*", + stopwords=["le", "un", "et", "je"], + lowercase=lowercase, + normalization_form=normalization_form, + ) + + tokens = tokenizer.tokenize(input_text) + assert tokens == output_tokens + + +@pytest.mark.parametrize( + "input_text, expected_messages", + [ + ("Hello World", [Message(meta="", text="Hello World")]), + ( + "Merci\nDe : jean@gmail.com\nObjet: Votre attestation\nVoici l'attestation", + [ + Message(meta="", text="Merci"), + Message( + meta="De : jean@gmail.com\nObjet: Votre attestation", + text="Voici l'attestation", + ), + ], + ), + ( + "Merci\nObjet: Votre attestation\nVoici l'attestation", + [ + Message(meta="", text="Merci\nObjet: Votre attestation\nVoici l'attestation"), + ], + ), + ( + "Merci\nDe : jean@gmail.com\nSujet : ABCD\nObjet: Votre attestation\nVoici l'attestation", + [ + Message(meta="", text="Merci"), + Message( + meta="De : jean@gmail.com\nSujet : ABCD\nObjet: Votre attestation", + text="Voici l'attestation", + ), + ], + ), + ( + "Je vous ai Envoyé :\n- le devis\nla facture\nSalutations", + [ + Message(meta="", text="Je vous ai Envoyé :\n- le devis\nla facture\nSalutations"), + ], + ), + ( + "Message 1\nLe 2 févr. 2022 à 09:10,\ntest@maif.fr\na écrit :\n\n\nMessage 2", + [ + Message(meta="", text="Message 1"), + Message(meta="Le 2 févr. 2022 à 09:10,\ntest@maif.fr\na écrit :", text="Message 2"), + ], + ), + ( + "Message 1\nmail transféré\n------------\nLe 2 févr. 2022 à 09:10,\ntest@maif.fr\na écrit :\n\n\nMessage 2", + [ + Message(meta="", text="Message 1"), + Message( + meta="mail transféré\n------------\nLe 2 févr. 2022 à 09:10,\ntest@maif.fr\na écrit :", + text="Message 2", + ), + ], + ), + ], +) +def test_segmenter(input_text, expected_messages): + """Unit test""" + + segmenter = Segmenter() + result = segmenter.segment_text(input_text) + for i, message in enumerate(result): + assert message.meta == expected_messages[i].meta + assert message.text == expected_messages[i].text + + +@pytest.mark.parametrize( + "input_message_list, expected_text", + [ + ( + [ + Message(meta="", text="Hello world"), + Message( + meta="", + text="Hello world", + ), + ], + "Hello world", + ), + ( + [ + Message(meta="", text="Merci"), + Message( + meta="De : jean@gmail.com\nObjet: Votre attestation", + text="Voici l'attestation", + ), + ], + "Merci", + ), + ( + [ + Message(meta="", text="Merci", tags=[("THANKS", "Merci")]), + ], + "Merci", + ), + ], +) +def test_text_extractor(input_message_list, expected_text): + """Unit test""" + + extractor = TextExtractor(output_columns="text", n_messages=1) + result = extractor.extract(input_message_list) + assert result == expected_text + + +def test_text_extractor_error(): + """Unit test""" + with pytest.raises(ValueError): + _ = TextExtractor(output_columns="text", n_messages=1, include_tags=["A"], exclude_tags=["B"]) + + +def test_text_extractor_multiple_messages(): + """Unit test""" + message_list = [ + Message(meta="", text="", tags=[("BODY", "A"), ("GREETINGS", "G"), ("BODY", "A")]), + Message(meta="", text="", tags=[("BODY", "B"), ("BODY", "B"), ("BODY", "B")]), + Message(meta="", text="", tags=[("GREETINGS", "G"), ("BODY", "C"), ("BODY", "C")]), + ] + expected_output = "A\nB\nB\nB" + + extractor = TextExtractor( + output_columns="text", + n_messages=None, + stop_at=["GREETINGS"], + include_tags=["BODY"], + ) + result = extractor.extract(message_list) + assert result == expected_output + + +def test_text_extractor_with_tags(): + """Unit test""" + input_message_list = [ + Message(meta="", text="Bonjour\nblahblah\nMerci"), + Message(meta="", text="Bonjour2\nMerci2"), + ] + input_message_list[0].tags = [("HELLO", "Bonjour"), ("CUSTOM_TAG", "blahblah"), ("THANKS", "Merci")] + input_message_list[1].tags = [("HELLO", "Bonjour2"), ("THANKS", "Merci2")] + + extractor = TextExtractor( + output_columns="text", + exclude_tags=["HELLO", "CUSTOM_TAG"], + ) + result = extractor.extract(input_message_list) + assert result == "Merci" + + +def test_token_extractor(): + """Unit test""" + separator = "PAD" + pad_size = 2 + + token_extractor = TokensExtractor( + input_columns=("body_tok", "header_tok"), + output_columns=("toks,"), + pad_size=pad_size, + sep_token=separator, + ) + + data = { + "body_tok": ["my", "body"], + "header_tok": ["my", "header"], + } + + extracted = token_extractor.extract(data) + + assert extracted == ["my", "body", separator, separator, "my", "header"] + + +@pytest.mark.parametrize( + "input_text, output_text", + [ + ("appelle moi au 0606060606", "appelle moi au flag_phone"), + ("Tel:0606060606", "Tel: flag_phone"), + ("ecris moi a l'adresse test@domain.com", "ecris moi a l'adresse flag_email"), + ("nada nothing rien", "nada nothing rien"), + ("", ""), + ], +) +def test_text_flagger_default(input_text, output_text): + """Unit test""" + text_flags = { + "numeric_flags": {r"\d{10}": "flag_phone"}, + r"\w+@\w+\.\w{2,4}": "flag_email", + } + text_flagger = DeterministicTextFlagger(text_flags=text_flags) + text = text_flagger.flag_text(input_text) + + assert text == output_text + + +@pytest.mark.parametrize( + "input_text, output_text, add_spaces, remove_multiple_spaces", + [ + ("Tel:0606060606", "Tel:flag_phone", False, True), + ("Tel: 0606060606", "Tel: flag_phone", True, True), + ("Tel: 0606060606", "Tel: flag_phone", True, False), + ("", "", False, False), + ], +) +def test_text_flagger_args(input_text, output_text, add_spaces, remove_multiple_spaces): + """Unit test""" + text_flags = { + "numeric_flags": {r"\d{10}": "flag_phone"}, + } + text_flagger = DeterministicTextFlagger( + text_flags=text_flags, + add_spaces=add_spaces, + remove_multiple_spaces=remove_multiple_spaces, + ) + text = text_flagger.flag_text(input_text) + + assert text == output_text + + +def test_date_processor_instanciation(): + """Test instantiate processor""" + _ = DateProcessor(input_columns="date", output_columns="date") + + +@pytest.mark.parametrize( + "date_str, expected_iso_format", + [ + ("jeudi 29 avril 2021 21:07", "2021-04-29"), + ("jeudi 9 avril 2021 21:07", "2021-04-09"), + ("mar. 11 mai 2021 à 14:29", "2021-05-11"), + ("4 mai 2021 à 18:14:26 UTC+2", "2021-05-04"), + ("Lundi 04 janvier 2021 11:35", "2021-01-04"), + ("mié, 16 dic 2020 a las 16:00", "2020-12-16"), + ("mié, 16 dic, 2020 a las 16:00", "2020-12-16"), + ("gio 9 set 2021 alle ore 16:16", "2021-09-09"), + ("woensdag 16 december 2020 10:01", "2020-12-16"), + ("lundi 19 juihet 2021, 15:25:42 utc+2", "2021-07-19"), + ("16 june 2021 14:38", "2021-06-16"), + ("5 july 2021 at 16:49:41 cest", "2021-07-05"), + ("sunday, 13 jun 2021", "2021-06-13"), + ("sunday, 13 jun, 2021", "2021-06-13"), + ("26 eki 2020 pzt 14:27", "2020-10-26"), + ("vendredi 6 février 2019 09 h 02", "2019-02-06"), + ], +) +def test_date_processor(date_str: str, expected_iso_format: str) -> None: + """Simple base tests""" + date_processor = DateProcessor() + date_iso_format: str = date_processor.parse_date_to_iso(date_str) + + assert date_iso_format == expected_iso_format + + +@pytest.mark.parametrize( + "message_list, tags_to_ignore, expected_len_message_list, expected_original_from", + [ + # Direct transfer + pytest.param( + [ + Message( + meta="De: test.test@test.fr \n" + "Envoyé: mercredi 4 mai 2022 11:11\n" + "A: avocats@test.fr; BOB Morane \n" + "Objet: dossier Test ,\n", + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ) + ], + "('FOOTER', 'SIGNATURE')", + 1, + "test.test@test.fr", + id="direct_transfer", + ), + # Direct transfer with Footer + pytest.param( + [ + Message( + meta="", + text="Envoyé depuis mon Iphone", + tags=[("FOOTER", "Envoyé depuis mon Iphone")], + ), + Message( + meta="De: test.test@test.fr \n" + "Envoyé: mercredi 4 mai 2022 11:11\n" + "A: avocats@test.fr; BOB Morane \n" + "Objet: dossier Test ,\n", + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + "test.test@test.fr", + id="direct_transfer_with_footer", + ), + # Direct transfer with Signature (pattern De:\ntest@gmail\nA:\n) + pytest.param( + [ + Message( + meta="", + text="Jane Doe\n4 rue des oliviers 75001 Ville", + tags=[ + ("SIGNATURE", "4 rue des oliviers 75001 Ville"), + ], + ), + Message( + meta="De :\ntest.test42@test.fr\nEnvoyé :\nvendredi 03 mars 2023 14:28\nÀ :" + "\nana@test.fr\nObjet :\nTEST", + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + "test.test42@test.fr", + id="direct_transfer_with_signature", + ), + # Direct transfer with multiple Signature patterns + pytest.param( + [ + Message( + meta="", + text="Jane Doe\n4 rue des oliviers 75001 Ville", + tags=[ + ("SIGNATURE_NAME", "Jane Doe"), + ("SIGNATURE", "4 rue des oliviers 75001 Ville"), + ], + ), + Message( + meta="De :\ntest.test42@test.fr\nEnvoyé :\nvendredi 03 mars 2023 14:28\nÀ :" + "\nana@test.fr\nObjet :\nTEST", + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + "test.test42@test.fr", + id="direct_transfer_with_multiple_signatures", + ), + # Other transition pattern (pattern De:\nTest \nA:\n) + pytest.param( + [ + Message( + meta=( + "De:\nANNA <42test.test@test.fr>\nDate:\n3 mars 2023 à 16:42:50 UTC+1\n" + "À:\nbob@test.fr\nObjet:\nTR : 1223456" + ), + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + "42test.test@test.fr", + id="transition_pattern_de_date_a", + ), + # Other transition pattern (pattern De: "test@gmail"\nA:) + pytest.param( + [ + Message( + meta=( + """De: "test_test@test.fr"\nDate:\n3 mars 2023 à 16:42:50 UTC+1\n""" + "À:\nbob@test.fr\nObjet:\nTR : 1223456" + ), + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + "test_test@test.fr", + id="transition_pattern_de_a", + ), + # Other transition pattern (pattern Le 1 mars..., Abc a écrit) + pytest.param( + [ + Message( + meta=("Le 2 mars 2023 à 18:18, Bob a écrit :"), + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + "test.test.test@test.fr", + id="transition_pattern_le_date_text_a_ecrit", + ), + # Other transition pattern (pattern Le 01/01/2001 12:12, abc@gmail.com a écrit) + pytest.param( + [ + Message( + meta=("Le 01/01/2001 11:14, test.test.test@test.fr a écrit :"), + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + "test.test.test@test.fr", + id="transition_pattern_le_date_numbers_a_ecrit", + ), + # Other transition pattern (pattern Le dim., 12:12, abc@gmail.com a écrit) + pytest.param( + [ + Message( + meta=("Le 01/01/2001 11:14, test.test.test@test.fr a écrit :"), + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + "test.test.test@test.fr", + id="transition_pattern_le_weekday_a_ecrit", + ), + # Test argument tags_to_ignore (removed Signature from the list) + pytest.param( + [ + Message( + meta="", + text="Jane Doe\n4 rue des oliviers 75001 Ville", + tags=[("SIGNATURE", "Jane Doe\n4 rue des oliviers 75001 Ville")], + ), + Message( + meta="De: test.test@test.fr \n" + "Envoyé: mercredi 4 mai 2022 11:11\n" + "A: avocats@test.fr; BOB Morane \n" + "Objet: dossier Test ,\n", + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER',)", + 2, + None, + id="tags_to_ignore", + ), + # Not a transfer + pytest.param( + [ + Message( + meta="", + text="J'entends le loup, le renard et la belette", + tags=[("BODY", "J'entends le loup, le renard et la belette")], + ), + Message( + meta="De: test.test@test.fr \n" + "Envoyé: mercredi 4 mai 2022 11:11\n" + "A: avocats@test.fr; BOB Morane \n" + "Objet: dossier Test ,\n", + text="bla bla bla", + tags=[("BODY", "bla bla bla")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 2, + None, + id="not_a_transfer", + ), + # Empty mail + pytest.param( + [ + Message( + meta="", + text="", + tags=[("BODY", "")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + None, + id="empty_email", + ), + # No email address in the meta + pytest.param( + [ + Message( + meta="", + text="Envoyé de mon iPhone", + tags=[("FOOTER", "bla 1")], + ), + Message( + meta="Nothing useful", + text="bla 2", + tags=[("BODY", "bla 2")], + ), + ], + "('FOOTER', 'SIGNATURE')", + 1, + None, + id="missing_email_address", + ), + ], +) +def test_transferred_email_processor(message_list, tags_to_ignore, expected_len_message_list, expected_original_from): + """Unit test""" + processor = TransferredEmailProcessor(tags_to_ignore=tags_to_ignore, messages_column="message_list") + processed_message_list, clean_from = processor.process_transfered_mail(message_list=message_list) + + assert clean_from == expected_original_from + assert len(processed_message_list) == expected_len_message_list diff --git a/tests/regex/__init__.py b/tests/regex/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/regex/test_builtin_regex.py b/tests/regex/test_builtin_regex.py new file mode 100644 index 0000000..f664c54 --- /dev/null +++ b/tests/regex/test_builtin_regex.py @@ -0,0 +1,26 @@ +from melusine.regex import EmergencyRegex, ReplyRegex, ThanksRegex, TransferRegex, VacationReplyRegex + + +def test_reply_regex(): + regex = ReplyRegex() + regex.test() + + +def test_thanks_regex(): + regex = ThanksRegex() + regex.test() + + +def test_transfer_regex(): + regex = TransferRegex() + regex.test() + + +def test_vacation_reply_regex(): + regex = VacationReplyRegex() + regex.test() + + +def test_emergency_regex(): + regex = EmergencyRegex() + regex.test() diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py new file mode 100644 index 0000000..51ab711 --- /dev/null +++ b/tests/utils/test_utils.py @@ -0,0 +1,6 @@ +from melusine.utils import show_versions + + +def test_show_versions(): + show_versions() + assert True From eb21aca1733df5d6eb623b323ef02cb74ce1934f Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:40:18 +0100 Subject: [PATCH 15/37] :white_check_mark: Setup tox --- tox.ini | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tox.ini diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..315c2b5 --- /dev/null +++ b/tox.ini @@ -0,0 +1,46 @@ +[tox] +requires = + tox>=4 +env_list = clean, core38, core310, transformers, report + +[gh-actions] +python = + 3.8: clean, core38, transformers + 3.10: core310 + +[testenv] +commands = pytest --cov --cov-append --cov-report xml +deps = + pytest + pytest-cov +depends = + {core38,transformers}: clean + report: core38,transformers + +[testenv:core38] +deps={[testenv]deps} +commands={[testenv]commands} + +[testenv:core310] +deps={[testenv]deps} +commands=pytest tests + +[testenv:clean] +deps = coverage[toml] +skip_install = true +commands = coverage erase + +[testenv:transformers] +description = run unit tests with the transformers dependency +deps={[testenv]deps} +commands = pytest tests/huggingface --cov --cov-append --cov-report xml +extras = transformers + +[testenv:report] +deps = coverage[toml] +skip_install = true +commands = + -coverage combine + -coverage report + -coverage html + -coverage xml \ No newline at end of file From 5c5ce8690ba05216b5356add0ec2fecbd68b958b Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:41:38 +0100 Subject: [PATCH 16/37] :wrench: Setup pyproject.toml --- pyproject.toml | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6623956 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,61 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "melusine" +authors = [ + {name = "TODO", email = "todo@maif.fr"}, + {name = "Hugo Perrier", email = "hugorperrier@gmail.com"}, +] +description = "Melusine is" +requires-python = ">=3.8" +keywords = ["nlp", "emails"] +license = {text = "todo"} +classifiers = [ + "Framework :: XYXY", + "Programming Language :: Python :: 3", +] +dependencies = [ + "arrow", + "pandas>2", + "scikit-learn>=1", + "tqdm>=4.34", + "omegaconf>=2.0", + # flashtext >= 2.7 +] +dynamic = ["version", "readme"] + +[project.optional-dependencies] # Optional +dev = [] +test = ["pytest", "coverage"] +transformers = ["transformers>4"] + +[tool.setuptools.packages.find] +where = ["."] # list of folders that contain the packages + +[tool.setuptools] +# If there are data files included in your packages that need to be +# installed, specify them here. +package-data = {"conf" = ["*.json", "*.yaml", "*.yml",], "data" = ["*.csv"]} + +[tool.setuptools.dynamic] +version = {attr = "melusine.VERSION"} +readme = {file = ["README.md"]} + +[tool.black] +line-length = 120 + +[tool.coverage.run] +omit = [ + # omit init files in docs_src + "docs/__init__.py", + "docs/docs_src/*/__init__.py", + # omit test files + "tests/*", + # omit anything in a .local directory anywhere + "*/.local/*", + # omit single files + "melusine/connectors/__init__.py", + "melusine/connectors/exchange.py", +] \ No newline at end of file From 673db63c5b324d7dd5955d2627e5f3c6cdbc9e26 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:42:02 +0100 Subject: [PATCH 17/37] :recycle: Remove setup.py --- setup.py | 119 ------------------------------------------------------- 1 file changed, 119 deletions(-) delete mode 100755 setup.py diff --git a/setup.py b/setup.py deleted file mode 100755 index 94800d7..0000000 --- a/setup.py +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""The setup script.""" -import glob -from setuptools import setup - -with open("README.md") as readme_file: - readme = readme_file.read() - -requirements = [ - "tensorflow>=2.8.0", - "pandas>=1.3.0", - "scikit-learn>=1.0", - "gensim>=4.1.2", - "tqdm>=4.34", - "unidecode>=1.0", - "flashtext>=2.7", - "h5py>=3.0", - "joblib>=1.0", - "PyYAML>=4.2", -] - -# Optional dependencies -exchange_requirements = ["exchangelib>=4.2.0"] -transformers_requirements = ["transformers==3.4.0"] -viz_requirements = ["plotly", "streamlit>=0.57.3"] -lemmatizer_requirements = ["spacy>=3.0.0,<=3.0.4", "spacy-lefff==0.4.0"] -stemmer_requirements = ["nltk>=3.6.7"] -emoji_requirements = ["emoji>=1.6.3"] -tf_probability = ["tensorflow-probability==0.14.0"] - -# Test dependencies -setup_requirements = ["pytest-runner"] -test_requirements = transformers_requirements + ["pytest"] - - -# Ex: Install all dependencies with ``pip install melusine[all]` -extras_require = { - "exchange": exchange_requirements, - "transformers": transformers_requirements, - "viz": viz_requirements, - "lemmatizer": lemmatizer_requirements, - "stemmer": stemmer_requirements, - "emoji": emoji_requirements, - "tf_probability": tf_probability, -} -all_requirements = list(set([y for x in extras_require.values() for y in x])) -extras_require["all"] = all_requirements - -# Conf files -conf_json_files = glob.glob("melusine/config/**/*.json", recursive=True) - - -setup( - author="Sacha Samama, Tom Stringer, Antoine Simoulin, Benoit Lebreton, Tiphaine Fabre, Hugo Perrier", - author_email="tiphaine.fabre@maif.fr", - classifiers=[ - "Development Status :: 2 - Pre-Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Natural Language :: English", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - ], - description=( - """Melusine is a high-level package for french emails preprocessing, """ - """classification and feature extraction, written in Python.""" - ), - entry_points={}, - install_requires=requirements, - license="Apache Software License 2.0", - long_description=readme, - long_description_content_type="text/markdown", - include_package_data=True, - keywords="melusine", - name="melusine", - package_dir={ - "melusine": "melusine", - "melusine.config": "melusine/config", - "melusine.utils": "melusine/utils", - "melusine.nlp_tools": "melusine/nlp_tools", - "melusine.prepare_email": "melusine/prepare_email", - "melusine.summarizer": "melusine/summarizer", - "melusine.models": "melusine/models", - "melusine.data": "melusine/data", - "melusine.connectors": "melusine/connectors", - }, - packages=[ - "melusine", - "melusine.config", - "melusine.utils", - "melusine.nlp_tools", - "melusine.prepare_email", - "melusine.summarizer", - "melusine.models", - "melusine.data", - "melusine.connectors", - ], - data_files=[ - ("config", conf_json_files), - ( - "data", - [ - "melusine/data/emails.csv", - "melusine/data/emails_preprocessed.pkl", - "melusine/data/emails_full.pkl", - ], - ), - ], - setup_requires=setup_requirements, - test_suite="tests", - tests_require=test_requirements, - extras_require=extras_require, - url="https://github.com/MAIF/melusine", - version='2.3.6', - zip_safe=False, -) From af16b2ec06e146701756f96dd4f1ecc34d68065c Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 15:43:09 +0100 Subject: [PATCH 18/37] :construction_worker: Update CI --- .github/workflows/main.yml | 49 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e69de29..f886f6d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -0,0 +1,49 @@ +name: Build & Test +on: push + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: psf/black@stable + with: + src: "./melusine" + + test: + name: Test + needs: lint + runs-on: ubuntu-latest + strategy: + fail-fast: false + max-parallel: 2 + matrix: + python-version: ["3.8", "3.10"] + steps: + - name: Checkout the repository + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox tox-gh-actions + - name: Test with tox + run: tox + + coverage: + name: Create coverage badge + needs: test + runs-on: ubuntu-latest + steps: + - name: Coverage Badge + uses: codecov/codecov-action@v3 + + deploy: + name: Deploy python package + needs: test + runs-on: ubuntu-latest + steps: + - run: echo "Deploying .." From 15567dee06123727a8d4a4c3afa76598f2b22176 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:01:11 +0100 Subject: [PATCH 19/37] :construction_worker: Setup pre-commit hooks CI --- .pre-commit-config.yaml | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7abecec --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,66 @@ +default_language_version: + python: python3 +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: check-ast + - id: check-byte-order-marker + - id: check-case-conflict + - id: check-docstring-first + - id: check-executables-have-shebangs + - id: check-json + - id: check-yaml + exclude: ^chart/ + - id: debug-statements + - id: end-of-file-fixer + exclude: ^(docs/|gdocs/) + - id: pretty-format-json + args: ['--autofix'] + - id: trailing-whitespace + args: ['--markdown-linebreak-ext=md'] + exclude: ^(docs/|gdocs/) + - id: check-added-large-files + args: ['--maxkb=500'] + - id: no-commit-to-branch + args: ['--branch', 'master', '--branch', 'develop'] +- repo: https://github.com/psf/black + rev: 21.12b0 + hooks: + - id: black + args: [--line-length=120] + additional_dependencies: ['click==8.0.4'] +- repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v0.931' + hooks: + - id: mypy + args: [--ignore-missing-imports, --disallow-untyped-defs, --show-error-codes, --no-site-packages] + files: ^melusine +- repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + exclude: '^tests/|^docs' + args: ['--ignore=E501,D2,D3,D4,D104,D100,D106,D107,W503,D105,E203'] + additional_dependencies: [ flake8-docstrings, "flake8-bugbear==22.8.23" ] +- repo: https://github.com/pre-commit/mirrors-isort + rev: v5.4.2 + hooks: + - id: isort + args: ["--profile", "black", "-l", "120"] +- repo: https://github.com/asottile/pyupgrade + rev: v2.7.2 + hooks: + - id: pyupgrade + args: [--py37-plus] +- repo: https://github.com/asottile/blacken-docs + rev: v1.8.0 + hooks: + - id: blacken-docs + additional_dependencies: [black==21.12b0] +- repo: https://github.com/compilerla/conventional-pre-commit + rev: v2.1.1 + hooks: + - id: conventional-pre-commit + stages: [commit-msg] + args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test] \ No newline at end of file From 475414d6b201dd7df391a9cc767d34718e140bf3 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:18:32 +0100 Subject: [PATCH 20/37] :sparkles: New feature: Builtin MelusineProcessors --- melusine/processors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/melusine/processors.py b/melusine/processors.py index ad46afe..82c0d17 100644 --- a/melusine/processors.py +++ b/melusine/processors.py @@ -1258,7 +1258,7 @@ def GREETINGS(self) -> Union[str, List[str], re.Pattern]: "^.{0,3}(talk|write|see you|speak to you) soon.{0,3}$", "^.{0,3}take care.{0,3}$", "^.{0,3}catch you later.{0,3}$", - fr"^.{{0,3}}have an? (blessed|excellent|good|fantastic|great) ({english_times_pattern}).{{0,3}}$", + rf"^.{{0,3}}have an? (blessed|excellent|good|fantastic|great) ({english_times_pattern}).{{0,3}}$", r"i am looking forward to hearing from you.{0,3}$", "^.{0,3}looking forward to your reply.{0,3}$", "^.{0,3}hoping to hear from you( soon)?.{0,3}$", @@ -1306,7 +1306,7 @@ def HELLO(self) -> Union[str, List[str], re.Pattern]: r"mesdames", r"messieurs", # English - fr"good {english_times_pattern}", + rf"good {english_times_pattern}", r"hi( there)?", r"hello", r"greetings", @@ -1316,7 +1316,7 @@ def HELLO(self) -> Union[str, List[str], re.Pattern]: r"how are you (doing|today)", r"(it is|it's)? ?(good|great) to hear from you", r"i hope (you are|you're)( doing)? well", - fr"i hope (you are|you're) having an? ?(great|wonderful|fantastic)? ({english_times_pattern})", + rf"i hope (you are|you're) having an? ?(great|wonderful|fantastic)? ({english_times_pattern})", r"i hope this email finds you well", r"to whom it may concern", ] From a794e49fa333419f2bef6934bf19d1de29c08e93 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:19:19 +0100 Subject: [PATCH 21/37] :wrench: Update gitignore --- .gitignore | 109 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 96 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index f3f9bb2..d11065a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,26 +1,109 @@ # Mac OS variable .DS_Store +# Python cache +__pycache__/ +*.pyc -# Doc build -_build/ -build/ +# Notebooks checkpoints +.ipynb_checkpoints/ +notebooks/ +# Byte-compiled / optimized / DLL files +*.py[cod] +*$py.class -# Package -*.egg-info/ +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec -# Python cache -__pycache__/ -*.pyc +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ .pytest_cache/ +# IPython +profile_default/ +ipython_config.py -# Notebooks checkpoints -.ipynb_checkpoints/ +# pyenv +.python-version + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# mlflow +mlruns/ + +# tensorboard +runs/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site +/docs/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# VSCode +.vscode -# Tutorial repository -tutorial/data/ -tutorial/custom_conf/ \ No newline at end of file +# Pytest-env +pytest.ini \ No newline at end of file From c7e581094130a04622d373af6ed2e089edcbc388 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:26:02 +0100 Subject: [PATCH 22/37] :construction_worker: Setup pre-commit hooks --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7abecec..e3c6410 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -63,4 +63,4 @@ repos: hooks: - id: conventional-pre-commit stages: [commit-msg] - args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test] \ No newline at end of file + args: [] # optional: list of Conventional Commits types to allow e.g. [feat, fix, ci, chore, test] From 2e795daf7807523aaf70adab158c60f9d730f0a8 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:27:57 +0100 Subject: [PATCH 23/37] :poop: Fake emails data --- melusine/data/emails.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/melusine/data/emails.json b/melusine/data/emails.json index cc60d68..5f9236e 100644 --- a/melusine/data/emails.json +++ b/melusine/data/emails.json @@ -3,7 +3,7 @@ "age": 35, "attachment": "[]", "attachments": [], - "body": "\n\nJe vous transferts cette demande urgente !\n\nKim Smith\nConseiller MAIF\n\n----- Transféré par Conseiller le 24/05/2018 11:49 -----\n\nDe : Dupont \nA : conseiller@maif.fr\nCc : somebody@hotmail.fr\nDate : 22/02/2022 22:22\nObjet : Demande urgente\n\nBonjour\nJ'ai besoin d'une attestation scolaire pour mon enfant.\nPouvez-vous me l'envoyer rapidement s'il vous plait ?\nJe vous remercie par avance.\n\nClaude Dupont\n3 Rue de la victoire\n79000 Niort\ndupont@societaire.com\n\nEnvoyé de mon iPhone", + "body": "\n\nJe vous transferts cette demande urgente !\n\nKim Smith\nConseiller MAIF\n\n----- Transf\u00e9r\u00e9 par Conseiller le 24/05/2018 11:49 -----\n\nDe : Dupont \nA : conseiller@maif.fr\nCc : somebody@hotmail.fr\nDate : 22/02/2022 22:22\nObjet : Demande urgente\n\nBonjour\nJ'ai besoin d'une attestation scolaire pour mon enfant.\nPouvez-vous me l'envoyer rapidement s'il vous plait ?\nJe vous remercie par avance.\n\nClaude Dupont\n3 Rue de la victoire\n79000 Niort\ndupont@societaire.com\n\nEnvoy\u00e9 de mon iPhone", "date": "2022-02-22 22:22:22", "from": "conseiller1@maif.fr", "header": "Tr : Demande urgente", From a33b59f0bde3ca47d3714fc3728773483f36c819 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:28:55 +0100 Subject: [PATCH 24/37] :see_no_evil: Update gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d11065a..8516c00 100644 --- a/.gitignore +++ b/.gitignore @@ -106,4 +106,4 @@ dmypy.json .vscode # Pytest-env -pytest.ini \ No newline at end of file +pytest.ini From 32373fb0514b223d9c6b531b93eb1b326ac94195 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:33:02 +0100 Subject: [PATCH 25/37] :wrench: Update repo management files --- AUTHORS.rst | 2 +- CONTRIBUTING.md | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 8dcb905..cce7f0a 100755 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -19,4 +19,4 @@ Contributors * Hugo Perrier * Victor Bigand -To be continued ... \ No newline at end of file +To be continued ... diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e09dc1f..8e4bf84 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -62,9 +62,9 @@ mkvirtualenv melusine ``` Go in the melusine directory ``` -cd melusine +cd melusine ``` -Install you local package +Install you local package ``` python setup.py develop ``` @@ -83,7 +83,7 @@ To contribute to Melusine, you will need to create a personal branch. ``` git checkout -b feature/my-contribution-branch ``` -We recommand to use a convention of naming branch. +We recommand to use a convention of naming branch. - **feature/your_feature_name** if you are creating a feature - **hotfix/your_bug_fix** if you are fixing a bug @@ -95,7 +95,7 @@ Before committing your modifications, we have some recommendations : ``` pytest ``` -- Try to build Melusine +- Try to build Melusine ``` python setup.py bdist_wheel ``` @@ -125,7 +125,7 @@ git commit -m ‘detailed description of your change’ git push origin feature/my-contribution-branch ``` -Your branch is now available on your remote forked repository, with your changes. +Your branch is now available on your remote forked repository, with your changes. Next step is now to create a Pull Request so the Melusine Team can add your changes to the official repository. @@ -138,7 +138,7 @@ To create one, on the top of your forked repository, you will find a button "Com pull request -As you can see, you can select on the right side which branch of your forked repository you want to associate to the pull request. +As you can see, you can select on the right side which branch of your forked repository you want to associate to the pull request. On the left side, you will find the official Melusine repository. Due to increase of external contributions, we advise you to create a pull request on develop so we can test before integrating it to the final release. From 6e5178092ef9650b524930fab89d013c41c8f991 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:33:29 +0100 Subject: [PATCH 26/37] :wrench: Setup pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6623956..1869ce3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,4 +58,4 @@ omit = [ # omit single files "melusine/connectors/__init__.py", "melusine/connectors/exchange.py", -] \ No newline at end of file +] From 97fead39c322ab571dd532fa296fcaf0df695cc4 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:33:57 +0100 Subject: [PATCH 27/37] :white_check_mark: Setup tox --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 315c2b5..0051cb7 100644 --- a/tox.ini +++ b/tox.ini @@ -43,4 +43,4 @@ commands = -coverage combine -coverage report -coverage html - -coverage xml \ No newline at end of file + -coverage xml From 6c95bb354e2edb3ff4ab027e6ac1934bc58a4096 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:38:29 +0100 Subject: [PATCH 28/37] :memo: Setup mkdocs --- mkdocs.yml | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 mkdocs.yml diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..8121d3a --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://squidfunk.github.io/mkdocs-material/schema.json +site_name: Melusine +site_description: A framework for automated email qualification +site_url: https://todo.com + +repo_name: MAIF/melusine +repo_url: https://github.com/MAIF/melusine + +theme: + name: 'material' + features: + - content.tabs.link + - content.code.annotate + - content.code.copy + - content.code.select + - announce.dismiss + - navigation.tabs + +nav: + - Intro: + - index.md + - Project history: + - history/history.md + - Tutorials: + - tutorials/00_GettingStarted.md +# - tutorials/01_MelusinePipeline.md +# - tutorials/02_MelusineTransformers.md +# - tutorials/03_MelusineRegex.md +# - tutorials/04_UsingModels.md + - tutorials/05a_MelusineDetectors.md + - tutorials/05b_MelusineDetectorsAdvanced.md + - tutorials/06_Configurations.md + - tutorials/07_BasicClassification.md + - Advanced Tutorials: + - advanced/ContentTagger.md + - advanced/CustomDetector.md + - advanced/ExchangeConnector.md + - advanced/PreTrainedModelsHF.md + - Code Philosophy: + - philosophy/philosophy.md + - Contributions: + - contribute/how_to_contribute.md + - contribute/maif.md + +markdown_extensions: + mdx_include: + base_path: docs + tables: + admonition: +# pymdownx.details: +# pymdownx.extra: + pymdownx.tabbed: + alternate_style: true + pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + pymdownx.inlinehilite: + pymdownx.snippets: + dedent_subsections: true + pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format '' + attr_list: + md_in_html: From 7775853926ce9f83fdfdac840233b995bae0ef32 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:38:55 +0100 Subject: [PATCH 29/37] :memo: Setup mkdocs --- docs/index.md | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 docs/index.md diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..fda1d13 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,69 @@ +# Welcome to melusine + +![Melusine logo](_static/melusine.png){ align=center } + + +## Overview + + +Melusine is a high-level library for emails processing that can be used to : + +- Categorize emails using AI, regex patterns or both +- Prioritize urgent emails +- Extract information +- And much more ! + +## Why melusine ? + +The added value of melusine mainly resides in the following aspects: + +- **Off-the-shelf features** : melusine comes with a number of features that can be used straightaway + - Segmenting messages in an email conversation + - Tagging message parts (Email body, signatures, footers, etc) + - Transferred email handling +- **Execution framework** : users can focus on the email qualification code and save time on the boilerplate code + - debug mode + - pipeline execution + - code parallelization + - etc +- **Integrations** : the modular nature of melusine makes it easy to integrate with a variety of AI frameworks + (HuggingFace, Pytorch, Tensorflow, etc) +- **Production ready** : melusine builds-up on the feedback from several years of running automatic email processing +in production at MAIF. + + +## The melusine package + + melusine/ + docs/ # Documentation (using mkdocs-material). + exemples/ # Tutorials and exemples + src/ # Sources of the melusine package. + backend/ # Define execution backends (JSON, Pandas, Polars, etc) + conf/ # Configuration loading and default conf + data/ # Dummy data for examples and prototyping + io/ # Save/Load operations + models/ # AI/ML related features + regex/ # Regex related code + testing/ # Pipeline testing code + tests/ # Extensive testing of the code and the tutorials. + + +## Getting started + +Get started with melusine following our (tested!) tutorials: + +* [Getting Started](tutorials/00_GettingStarted.md){target=_blank} + +* [MelusinePipeline](tutorials/01_MelusinePipeline.md){target=_blank} + +* [MelusineTransformers](tutorials/02_MelusineTransformers.md){target=_blank} + +* [MelusineRegex](tutorials/03_MelusineRegex.md){target=_blank} + +* [ML models](tutorials/04_UsingModels.md){target=_blank} + +* [MelusineDetector](tutorials/05a_MelusineDetectors.md){target=_blank} + +* [Configurations](tutorials/06_Configurations.md){target=_blank} + +* [Basic Classification](tutorials/07_BasicClassification.md){target=_blank} From d639c5fc0f4284710e50fcafcf72646bf83ab5cc Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:40:28 +0100 Subject: [PATCH 30/37] :memo: Documentation static files --- docs/_static/melusine.png | Bin 0 -> 119835 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/_static/melusine.png diff --git a/docs/_static/melusine.png b/docs/_static/melusine.png new file mode 100644 index 0000000000000000000000000000000000000000..f02347eb6f821984b59b00873f1d849d2266e32e GIT binary patch literal 119835 zcmY)V1ym(XtTqbc?mGC!9R?WO-Q9g~hr!+5-QC^Y2X}WL+#NO!m-n3S|NeXTTD^94 zI-OLis*}!>>Tm@)aYQ&=I1msJL`ew|B@htM;qN022I`x#dL&fy{Rir(BrXI}GmU@x zeFFPOLemihgiiXu1GJborTm*g(?UhVNkdkK%gD}#!NAzg(1gLw=Fc}b2ndfG*LTy# z#K{2QW@ByZ$mPaM@;@A0-|hd%j3j{nVR5qJCDD*o00`STm;hKAelh$c;e!JJ06Y%H zrd&!QV*ls%?=xN!b0?=iT#SsauC5HOEDUxIW{ganoSck5nHiax>AyMX9o=o64BY5# z9ZCOJ$p24{h>4?-gT)^w3p-oDe{u~B?VO!>Nl5-v^nZW<+fOG8)BnFFTgU%n*0+I- z|6O5ZV))7Ue`SB)%JUzUOWwi4~*Lvh)wX zNg^c2-W>L8*K^CXKp##Mz$H-V?3;jp41G;eXaA z#{JhkD1Z!#6ZlguBnC>Hvix;_xFhTX7`OzJihb~%Lnq@D9(5?lK|d&=B0@38$Dp7G zQnG}T?N23mNw)k|YZgFbvQ=#)Cp~P%LNPR8Ae)9R{fibagYYs+8-oH*GtKF+GM=0O z&TRknejoeMXev8y*ZY}K@RY=AwNgUm!xIPw2`@4~KX2VC)F}$2(&1356Yvj1$;G5n z@OzL9&E*IV7Y9*Hp1?3o*!=V2&A_gBZ6BfZA|M$6pq55Up#L^Or$t~-C~l%`?B5vH zs3$#&upc1nCs5EYv#0@MyL>?7hxo5M+L|xj1BSQ_eI$*y6@A}gxf~u&=Zh8nQm?NM zC+!}?;E%dL`ShASlUUQ)MlF1}aXSsF%vGy;qSm=+JpdzCP2Crhnrx24OrQoyHu&YV!2p`TCP&AWO}MpOahr) z1BjtgWkX-$6!T%6=&pI^D1;D%I>_OPED}uk9^1#xEuVO6pqaknANnR=Q;JN=bEir zuDZZXqw*6asZ9Ls{JQrtb;qX>v-gvv$Q<~V>#L#V0ZOZq=Xc3aq~>tFr{)&RYu&o# z!cwn9;9gQ9bRcRymC-DeK`bA$L^zy5 zYT*}B%yo^LY909Y4hQnX7O3S<;G11}B89A+=kb>7<#y-wHdXW<=D23EH?78)x0F`+ z-p~-%)&fa7>W0nJR$)>>+}5k}`DLkzw9TzN|5|HfPwOMCZ+NM4jRSe5Mom}$wP|{R zm|)XyHOV)a_VkaJm0UK1bqwP1ULLtxKJO}FPw}L592}fH z5Saty;&hR?e8GenV|cRvW`yFG*d41~iyxM|8K$XVNM|{l%>f6zHKV#N1MMFmlyjIc z2L&I|gWPJ^{35LbQ+d3}L`rcCJ}dL%p}Kx^`9i7of>?BwOZ7U1v2idwc1yRjSX=Bg zyeq8=3^6WklQywpU?(?i@&iguc=7q@)f4|`U9YHSznpH5S3A94!{E_$wv~P?kzH-~ z9o!4|qMegpqp@Jr`Jj(3r7`uvvC7d-1h;3Omv;S)IIT)Wkgmt8=5o8EnbmBbV97Ln zIZ@Rvh~4?2;F5*gje>gu4D+`+>DK-l@PpU^#*yD%u(lZlM%pB%QdYs|g>}BLb2Hwd zP8yw9AwJ5pl>2IIPIl;}oI3=Hs6mxo3v`V0laZjYHYKz{1C{5?_+{7RilsCuZ`1Y-&+bLOG3 zDgVN+6T|_gSW%{iRWG0Qk}B}B|F~u{z;qV}4T6Ad@mQkaCkl!_WfI}Js%iW?j)Q}R z?gp#)4r@4M0-4NMxgz$3zK_ahg-OBL5__!+r?JZU&BbLp-Ar?%#fDTUP9ZKF25s5W_wa=nwt}ulbJn+? z{CmlN*=hg2XL$?5q>9>m2S(zBv(_hH+`G>wKRLC3=$6YrU;&~g*>kpa{Deiu2V zU-y?n=7*hd&ksw+?wbH#v71t_wT5Z*5LS3{?K%NL?RLvz*$t%@- z?kI~1q zah!YGwO1bcB*6dO>yfiDUpMXfn!(_Edt*Q*SFOVYhOjj z9J2cYc{uzQtB9DZNf&BIS*FT1ii(q{%xA|-(vg>9qGCtQw zYOpuny1d1WgnQRSSwd;qg@m#9PU@iS2c~9$SGz=Iiw((C5Fj%wz+YSSYR2GXr zd+>KOES_|4r$PAFZEWX_G!^>{$b4#)7oV}jK~Q3cC*GgwRV57L{lE;af~J7-_%mlq zZZt%Dv^Y||+))GiQAo#!933jx$FU~F!$RLiVEyMvJNrhQ6SHLlBxk!W;FuQ%Bgb^b@o)6HkN}0tNdJYms3m|BiQ1gdL z5BgFEUGkn`&H-)<5-{-|t+p%Enw9h$O}EjYKCYYYW+d>MvZ$qm#DyDkr5?lVKi}nr zJ}p2WZL&;io-7@r19NjE57K-)xHJ=W+aZ6g?V*)=?8si}Uy$dceC*H!mAS=STy?zJ zih~_zx$22<`MZe|;yt_JQ*S%Z@Jg6>@WU|0&pXU`-6UcPIzYMa%d)%U{jyvqo7b^6 zUagdMFCE{7i4VSnjjK<^lO5qpBNiH{R zn7J1er>v~AY6VH8lljH_lp1*^ED~?Pah%{f9p}9JC7Lw1aot5w8pR>6-C!I}d6!ls zlfKgy`t{V?CLk-2!Fl|!<6SHlZmaD;K>(=FUWM>_AES)#H<1#hQL8wdodyUjG1j)fj$;GT0a;UzIN#N{qee`Xihj5vtI5} z-!KkOq_2x?@uSf25?p#ab?}`4Z7)F6{0Cm8*+{|cuvj)~!Rz_lzfD#W`n+^9M|dzd zLKsal#OoTn>GF1IUzC<6CSj%S8L&=8$pGlsOko<7uDX8W^1NKX)UZpN>v=dT;If8)utpC~cz_gv($M`bIO^@;_FV9wSJql{|_4#)Fn6L*#j31sh zd`Zqz^Pu2_bT9QxB%h+C&kM&)wO#Zq@RKE(kjr%l8HChZwjxp2_msuBwYUli_WnD7 z>`78)_On#ciND%xW^{)CYwt7M;vj3o_X+b1Q#F1lp&+?9)0zcoQD4Ai4Jv`M;s-xv zk)U@t`X0JG)Fi1{uBR;qXGApIVHg7?-I7!Rg+;4bjyGFCMda`9J*czWm!C%mxgs(7 zOn0*^^*axQaeKI@m!{jZ*TbV_iiKXJ7#k?|!iZr^Yb)+$v5BHn;PiQthZ2f21rE6r zpmE;*Ci{e(A3?#{Kg-Avy!P}3am#YpcRVJ_v(D|h;_;YB7<(QP7_YL6x@Nj!s9Q#6 z@Eymk%)APHq*EE9yPiJZ;umIkSR8JPQ%7lL8spiRoJ$@ue(RZiF~{KF^Tqq?p5QA zt{2`!tgpL`QSz!uzdd7;#{~+tT8+#gknX^O>Qv<58HQBeXY9UI*As47y0k|~oJoVd zL;l(?WMO@B`tp4GDsCGSekYEp^e7YwIrN7GhB5IHHBqr4 zL%Lv~@o!hXJCxT$sd~8S9QJ!q61LM>obgZ%&>o+`*kVqu1_uK42X*R8b*NSA5Tg-1 z0be!1qV4>uAJiaY*DHBG2XO;P&ZN5?qyy(JFUjg+9A=0p>$K;-^Jlb)ZcWf0*@9ZB ze?Z%P-Y#1|w`508t5A)@z$L|OYFeJI<@xV_&HZkKobSX2*Z4|=r$nrW_kV@3B&1=I zyFBGB6Pxor4@C8zPQfvw@j2+|@lMwVW#W@bMq5}7u1KSkYJj|x`;KXlQuV>xfqIK- ziV<0qIqtwUJ_G*ulfr0?04e_YubR$x?hAe_hlhJDDSuu1Be53PL`;i+t+e2ybV=^Z zRhrCj+w;`o5K&qZnB;VZi}Cl1zi)U3VV&W>)T{N#xhupyGf}WBIY7YN41%7}<1_O4hT9IoToA4osctn0LxH=%J zoVK=uCgukwkh-GKtmhbx40*!!?(fvhniUx4EP-#9{4Ygb{*(>bn8?6Zx4A{bt0y9< zuwf+LJD9io8UBkbeB;G|vM6CIr^hbnZ$M(SWOuIDOxH_jT@loB7P3bnL_B}%T zHq}}^UCr}?Vfr?KRq(4pqUQ7+%(28fCG0b;YAsezhkpVSDr-)*=JUAqnVQ5y`ZB_Cl;exI;h&Q}5dLWfxl2{?C^;yYtze?r=DE$yyIj)Sd}N@pLK& zJhnaf4dK*Z%1C|KgQ7{ZOYQ#_>Tj^IJ{Dl)_CEMVKVzHk3Cq*w77zA3?=%>89&ligFR&H z3toa`_zQdMeMem>kr6PoKK?=4&U@$(j3EdCXmP(k)}|z8V^}A67)(^l7lO>~8Xurd z$;r8bZ-#)h2KEzK$8NbkNr%4Z3GA5quurYG*cyGOQ1>^TugHhXLPufIr8MUf@eT_V z%~ned>^Z)36h6G$19M`&Fq}1-oy+Ua7r$MfuI-LW53ElD@dwX)D z@p!*y=>1QpjKob<<4jBY{gM0qMazb*%Qok+#+jaIx6K$Sh{d0BmP@@7CCJX*5+4fV zQPa@R>W*)$0FkV!Ur^B2Fq&Kh&A}3MU5){DP4I=BD-M9|8VG6+=zDYk^fx=a;!{(* z1`JPSddMmGV2}02<(wCxB{7^XBBXxMpNLKRqfhI zeBpo4mOK95_*rc8_2QhK@3AxY{zaoqq&^RlGlk7M2uF=JyA{=$$F00{)c|qmsu^kM z+OBR)!|ndan`_xZDiNN!KD>Lw8sGz@2)tOU(?iztEt?E{#OIoGu`N`s*?1PW9 z>A2JFa(JlDA>>E!gMwYY3^y(mLfN`^y_ta8Psc?S<}m@2XyI@S2<*JpSYZ&?wn^qK zR3hIn%k|#7kUP8i8;HD4j;(*e?X#K*5>{&-Pr~+aPfW2T-1)WrR%Jd{fVaqdP}oR* z?z0zO!a{Duq>&%|XT4~Q&1(?md*v24S~0erka(UKB3I?G5zO01wlccU4MWbuud0)Z zuOCn=SLtna#Nh)5dJi&AxGg!YlLXQvN9VyT@t{bN$eymJNwMU1o@T8i+q(~e@qw3$ zGTxJ4-K;5*)H0GEjMc8T0@T$`U|chUNYQaD#wR%Iy$}Gh-pSKU! z&b{}*_KaRj>szBt1yV_ zyz3}xyAK(@*h*(@#RA$?beYh&#}`aLzEOvmR~i;lQc_hT)8Kc! z3LgG5JPezzi*Wn<0Z%93VQ2=~dEJ6GKn9DqR>|Nh$!M<*Z~KA+9-cK(6Mbzwh1zJn zn8wm69vOz^_A3Y$y~=)nm|}u121SAt6%K<6z_u&DtYQGg_nM|GE8`-2GWc8jW4fS! zP1|s-g{z(pRoL^r@rbDN?5y%~sdc9T&SadcA5QRzN_Ub;K2B`s6s>*o8G?Rt?`qnlj}MT^yPnrkTU7{cj3Zmj74C?f`S5i!Bo zx@_1v_G=T$M&dg?XfH*?y9@TdJ1upQD3v<^pFm!wG1Dr8Br6wV6lWi$1j?fyu-=l`n*zv|*w>K2A z!4<6?WbH66e%QdfR64L!N@{FhZW4(_);D-s;C5|N4NY{9Zk>xs!rv^>!0=t8$e<4KoYv_1Uo~IzFbe z5TP*AjYC%xk@Lr`ROG62lk8`q_Ergaty{Ni6bbXcEHX1Q@2NN%Y7=^H z+UH=wAh9uwqN?Gl49)iCat7GY_7PIe_Xop|P%p$0B~dI|l_!v279Z!7%C2Gv?pCE( z74@W--W^)ekT!|B$e%Iz^0ORtezqT5Ig`Sprw1ZD{d-6s8h7O$c23+s>1!_1kF64_{>oW_%A#TM z1la&?p@!hlD3oFj4Z)Zt4NmP#Rz>C$=(?0@*2B<9=@xNzpb&A5_09WiNmT>&eczl5 z>xh-f=~sUk1YK+27j=QaLnq{B?exhZIIm^=YK%muQq11a?9H@m@L}IPA$g>WNdhfW zZy4l*0wce^01*-6Jf2CjQ2N(`SJS(_LDNJ&lfcz_u~3=%criB+&uA~uK~6Tr?YxX& z*4*>7yutSG6S^vx3Yyxla;ne-#$AZlQIGYZaptnzET`V8wg*?_FWgl6s*MBXn2ElG z_vlO>Ta5CAhuOYcv<~zcKAtonZ4#4Ktw;isTGhp7;dnCp{N7;iK$6XQuN7j83}d}C zEtyeMDl?t8%3@%+hxoz001s zW_b);XJ=1gVwPRJQG%fiKH}JuPOBe9Xe_ZTMz@%r3K*T=ZWQ&%1I9LGrET0#s=pPg z70+J+W5&_=@B9Ss<<5LXjg6mqN@;^@pemDa#G4&&mu7)be7jX^ab_?xzQ2_%g8pdc z#^?=H3wrjNyC+w7Th-~B}V zqPLPD+M1o7M-Ns)P@&ifK$#gz)iGq3;#YFj=y+gEh+1p*HaO)GGV$U&aJ1T~>5e${ zmVtIwz(bz<;z(GXZ#Z81Q$hP^x)25mQP%HyAQ!z9>fb7d*WXmWhWvJio=@%80? zc}c&=68RCm{w);uw)nA)y+R_>wEW5@bbDu`Z<{iOX;W}u*8s`31}1|v6%PZ0I&$UO zm<2<@h>k3!XgXG;2&UB7*tbt&~OVURFf;>SUEv0yqqOY)fsZG9SiVAhG4Gf-ebgWFn%}v49JO%y#@QI%xUdQ0tVYi}{+;vh z!jqafe`&m_R%y(6NXt9*uiD$Euowiqj({_R04lj`1q87EScgYZW6&x5>;Xw^OEKl| zw46Nw;eP9fpXFD*-3P;EM;H4NM9aMT1$GD?wiN%WydzgJFVgOZvBGrZY$WBs;PEJ9 zv(VuT*RL3Z@w7SR1eu2DVHtO7tN7+KxdIF!3uQV}$QN>dlQY7RLc(G{o>2g1VK-<7 z$ih8=K!mASfPA~%4)rQ69J8VOY`Z8*HsLNxurJOS5MzoszQ|kiOO&U{5q`}ri3!@|5GS{BDEA{0?u6k`pEGb!=AttZ8mMSn|#-RIoPo5J8F<-XU+ z{A}cARepx}IN3fOVu$(X? z>VC65->PhFHs9W~w6y)5R!{{|TcKWUI^m;n@bPu5@5>d55rpOUX>47k(>;z=`5_qc z?EieksAWEdY-!3X$G2okpLQLF{&jsk(@cuYy@`8<@HSG+@18aIAkacSp4zCW9}{uT zLNmn{r1)`nNny6HO4TIwU)^b{RbbU{J3sGjyuR?=wZs%XH+b zZ0EhR?*mct;)rZd1A<-F%S+bCf$0RQ#4UntD5<)R$)_=cR1QmWp4!AkQ!nRJ zt=oo1!!eK9AIQ?FKdLr2K^`?5U7}CVXgoEs9P7fsgV;)H>ZW4&Dk4SI?C2(gepy>; zRjAe~Z#aQAoh#{4JVqNZllYNY8EVh@Jj5epZLFDPL*gNkjh4VJLGu+4PZXbckiUMi&l03}CUumO+ zx9vfPrAIKdTg4}&=9-<}x7#N+%e`;BxO zhieuM5<;2*pAKfNF~$yZxa+n4sb$ttlwqEh4KCao75virn^=j@?Q;X{C69TG=AE7X zZvaKuG)<$nVSB^PA%`T!OrYiQsYgUzSejcnI@2P^SKSXUF1I>mnhvNj`r4qM+IPky z+VZ9oP<+6)b8$4vq`;^7VkRsSbP~g_$FsEsIi5q~Hz^?$kV825WKZLJ^!~gC`w!KX zRUfnmdlqZ0vfq-n@Cv@;TDm1`8PfWwAAgC3L$wAn-d;%3HkvdaA|_x!D7I1hL51)_ zDLwMHV)(bQq~3|7NI`Zu7wEMsqM~v6HuZcCRR?aIJd-VQAY21zWMJewiBXi=O`XqO zC+I1h!P#VgjWSg%;7G;xf%H$Z=XpR1X3}{0vZ>WO3x^`g+HF-NFRhdqPxrq7&!lU| zap$_xN?x%QQLzq&-3n%$E03Tqg*-dl1zxtD?x$*cCtlbDgy%oAkb55=9Mw2U2i_v= z{=i~QrJGTf$I=42!a*rTOaN@ zmiBCu9Q@(I|1v2H{~S;v>CiyZoM6;o&D7i$`+$_bDAOZLJoGQt(8Fx)mZ$8ckHI-r z+jt-VZM8gCA_Eg968SorR{dyK0s(yU9-D6Vs=Zx1OYkPBt|-1u!7a$zr_CZD_$VNf zk=gbYpucB}Y`#;I9XaL8?GuoT-SF2&cKv+CiobEimhB*t(aQj0SF!a*-ENqU zzgDV>-RAa_+)LE+sk-$d7*VsLm0I`P z=PMq)n?H*OUb+#N`{fD9!DBQ0(q*&x zq2>1Mi+pYklklwnBo4?&1XcJFGfawR zoB@N^KKZWGG#1A2%-6CT0*S5BDZIiO+bMTZc|hy++^3Rlld?DtAXOG+2`JAmOmb`w zpU-Q1br}_&*ts@<_>#~O-Uo-o*(f8V6z>XpJ;^Bjy}e&s2_#-q3>^<>+5jmUuML&W zAmava*-7kk(kyY=v|n>RXnRxR{gZ6|5c2av0ri@JVlF!aPa9s6&tUo1yWz9qq>?#? ze~5eHJVsASz*rHjEBh&eBL?YqR%^vN33Vj`|1A=T;I1oI{f+}*LlCAvG#ftM^2Z{0*-}HK{%Z>FuXrhbdPzH`Ku+FFW zP#Zt%f7974p?UUy%Xm)vl0k%ao=weUOCC?RHsDS#Pz0*M(>U#7vS%MPh>qXoW9>mv zlCHn5JPHt(WQShcRDA z72J^4_@7>n#i`Y;i?nqI!d>5SxSR_mm2DvZH^0S&8*P`F!J8CJgL!;`{a{P^bNFY{ zj_A#oo*gsQWv&Dfj08UL&RxhOu{XU9bxDF_98|ldrYy3usZPCIX$TPo0UI+T8Btv0 zu1cZ0g7;=N5GIlXEM8yUdqduRAi|M(KADS`%yh3U1MB-#dF+a#FZayn+fNUBPKK- zxZ{cZoe*!G>^`&@MJ*A>G2dQ;_z7QD>1FfI?_@|K4{I$&uc;X)2v*jMf&iW} zlKH38>Q5_QbF=WO9V43}56U#F3NkVHgG%e-IExkY>U~Fa&!Wa2n@dF#R|2=LhP_Y zkO-v2Yn8PxYL0`6r+7Y{25VZZYF}IO849lEr%^-q{3xK;L7UID@d{(O<$FH^fX8yS&iz7C1 zD>NuU_NEVgoj^x07hd$Bb*q$y0QZ8t>Ds%K#biB-a(T7jJvULKqj~SQUa7<+yaL<(ugb6|v_XMy+ki>m>GK5` zji6wJ2rZZ)l7~+>sqwd6`y+&FQZxZKhk;ts=~T#L(7)fGHAk_qqZ>d9I=1(AS{j@ku-N^#_iVR~A zbLJFwsf~m=PUZI_@_aejIg1tkhqwTYd@h4-y$-&=^L!`@9jaN%Z#pTXZ9>!b6FIc!yu5-;s8p;CH1V5 zWbQ#-&r1@>lM6iY$2FX57hF8RZg>oD&Z^TeFbN{y!8l~q`X}JV;9SvwPEn74;7?RF z&hnaNJK?Kw0FO7i&y|fx;Msr6CL)0lVWW*gWPu^wW_3A&>= z4V^a&I*;HCJ2QaHJ88LGxt}CcK#m!U5;<^`Z?G@s@}}$hS`zR^R65QT*=Qv!z^kc2 zd(~On4IHOs^XZ2eMrpMJ}(XH%LVa*WHuv9jZ@)S?}mhGD2yLH$ZbnQJ+zZEDL zgj77Lz#k-Mt9w1+_Kd2t2Rh zEs^P$!KB9dVokTR^@`ju$kAB8BIq^0id5YOm+vPSW78ejKH)!9)*!rr{bsSpvnwK>lf=VxwKVIqu;q~eeySUP;+1Iuk7mJO!6zgG@os3{C}cO z$B-vkS;hx5$ymB9%95`-R8H%(?bk+?4c70-InmszLeU?IsA zDlXfV%C%6?aF0^aIe?uu*k4XtY{X)wQAwz6Bb(2c!+!;U;76thmXg)^Vwg}*+<`1|ve9PEGykOa!&q_OcL zIXs__-vDNx=aX@vKEsE0$Zs~SW3^mn|9-*D#B;G2c*YtxcqshC?f0dBMP<3!ibfl~ zUaU%w(;gMy=wxxx;fmIkY5UA#jPgNS7>c7v;G!DxNx1OoCwaTUE1zW1Hbk(mTup42 z(T{mj-49nOB?HHRS##(iu zP5Rl$VwNh;hK-)ii?MttH zDhAS@T}sS=9Rz+jp6sj~{b6@$w@+q2xhe`W8ga3^l4pSw+_#i}>d_6m7 zW1r(OPTi7<1@t3JYZ`<+Td6cdK8-?Qqczhn%ESZEV<8pYS1gxVq)xc8up9r`4&y)C z%E9?H8e_h-4XF)Wc1J)Nj}?$jmcmh!Y@yART)!TOEinB@MRW++&kcoOo5;)tx2#~U zP<#@TfA;rqT<2Z;_+JSy?Jk7O~t_=LRHuc;z`nptRO7S`Tohjj`?fVMz%9FZE0h9EO8(xXX z+wJ#0hthv@P%P(?IW*`n3?oCTJK0;38k`@|i^-j;NT?eI4l5JR3NKH|;O+^~nH^Oz zTv(U|0sQV2n~4y}3~#Uj zSuG{}_;t*N)5v6B@dSoo{N^E%&}mnJSR_l&4 zp9uFye`b^vPMgM$M%%1=up&%AS7Dw{4upM3w6HqU+mwW1cKnG+-;|c+yf9{o;sOQR z&1tDdkT*j4)H*F+CIeu@zW7?f&UzNCvoZR!x4LbIYTEg(cE3J2a}Y^~GCB8=Yhp6# zV?8NV-cX#{pbnWh$e!&TJ^8`>E&6 zn&=T-zyl;<$(m-u3IV(?*Og5_*XudHGW7RL2op}vJ%j3ZG{%(OB-;>l56-f^efPzEKv41@!1cMmppSB@n4!DfC zpt2aaGMOVL1R)8&JNWFYtZg4=Xn)>&4nV^yRQQ@HQuFsi4xoXV0(^|H-}&QeREmas zUi(QibT7v6bwm=>eSz8*;q_LX?xqr;Ad$g7Mq(yQJEcVYQ&^4+7T)wzSflcK-+p6= zLY)W5xUFRS%Nf++{6nB?5|p51muJHzT{LBsMiIX-y*;k-+w``Kok*%7?HznL7~%Xa zAVZa`&**9`l-9gnRdiO8qEaI+#D9MdyKf^?#f#o=^WgEUD2^L~-X^bn+@F3po-$P= z4`B@g?p2lgrW!JHlgHyB(BY5s$llPoY22-jPV3-Z8AC81Q64K93f#aFh`_M09?oZg z2NwNE6?f)ck6A3aD1Z;d-|Y1cR*cAC7DLi*$|6&u4jey0F3TKMO=WZMxQWKW{qX z0YtIRWQj$YNpTGPiN_wyv}%Vp!aq#Ay$rp1swg7o)~PW@Q@{UKO&VCM`DcAN@IU9aJ9 zU57HTr0~Rl8`f;Qv_8%AX5AVSz?az?IWGMnjAyLQ+aFIU@z&)y<0z?(@>N3Su8G0P zFEq7hZ-ca{(KZj`9!Ge+8PMM>0kE4c6Y6fD9XNV7Z4w+#G;Sj^KghN+zU zyhdno!mlxct16NEV4WnS zYq0n}OBz!*QQI}I8pzOl2gm<)xQW{sh-sxRoqXW<8pjw?6xoa*1|Qk=9L}cU-b<>U zJER|T4RHF+_;TGBd6vTL+J`S5ffqjsF-k5lq-Yz1z3uU46B`ZNP8`U8kH>T~x{i&H zsN=ZR9E0&P%^@E|^j)STb#l?sk@$(40s{+%h*$R%)-P4R%A_?tH!L*dNXz{ibdxqL z_Z#kl2%hkBe<*OUol?Ek}?oS&idhfaP^cZ&);l2LRcx<+bsW^;?z7F zStwuEWvrM%VPH^Aujkp0;jvD^$mg>EetIQ<&zW?|Z>l){IooTcM$<~C#vquS$)VF^)tb0vd-ozQ=`*)<}{n~z1c8*lt1Z!$;KSJ z&Nw8gpQO=KntGzV$dN<-HW9sDA5q9`=jQE|4@7V#mv@H~qy-!c^5;vmnB_VQcl6{Y zK-!>(yqe)C9HSY&k0wO7e}Yy{e?ftlP1o<6j>%UvS)5iSt&Wu07QOc!lWP_%_kM8} ziLcPBHm%#11naJOIpv#~R27kefARmVi5}w8x{@9$YZh=}zoPsVk3#Xqz!V*ke{Nxw zf5mys33PV)Qw7o9gMP?IX-dOz!Vu!5O6aSbxA6-l18nvAd+S&o^^027Hq;~D=-=nw z<2|p;N+U!NX)#bWi4$l6-O`n@(pNW8Y`2Pa7D1wBHiIPo%nZc(pG0>65p_QvjT__m zuSBey+x)(m8l_5&m`nl|86gVlquHNTh;JxT5aY&8SFfD{me{;a8sIWISa(0mjoc5O z72MxFX*IP353=51TaT*ZMMhK4(}2rs_*`0P^!vUf4n=$nK_sM8S)hdO5@w}@2J(ir z7@+V}@(%-bpZ$*-*|9wRdY&Jw;ZwCYO%kyV>WNAJ1`Ji}nf_TP0|C*$`BRvQPe3NH zy7^I-F)pW3)wr5REPz*@*W(<5i*1}HrPlepVR(M;=GxQvwY^tdZ>lO>Fx0~o2z~F8 zTnB3^K;yCFjWnFjy0j3lyu8}v?E}JKmld4LT zk2aFBD9l(DW-GDsXy;#h%AB--x${0EU%IiK8YNKo6^hu8_2Mi8PPQc|$X!Z?K0eVW z{N?yO%p{geIL|o8RXK5jqY8{g_>=<_Sifjo)^S@@hxe1l*r+#D?Iyd-yUqI{}j@ykuCMCdQ6G`e#dm zif87FlVam=1$_63UJ0C|dQ(W_J@S0bvV!a+G;_a@=Z=GwR3^U7gnm3ngU||a^F9!679Hwb`GHgyj+tg<4G+9+< z8f|Huk`NND-ZSj%X#A@%KS_KF6I(dEIE0XB&!>w+w8T30F;6SM;6O<9w=#{y7a40B z03F)_cHAIn0Uu%47pS-bolKqj+MH1e;;B}Uv{vMl^d@=?f1g~!Ho2Ka)>9nv-*$@Q z^i8?B#>@wbWGHrAj)bLYd;nco6eO&6Mpp4OTJfJIhCJT$ti}uY?0W%SCAR*MJNJWC zWE*Kt0VGBQx5434m!Q8;atI^b5=m2{hZ(G!3am;M;g3i*a#}OIe6zvfEiF^jj2BL2 z53NNDR7+q)$`!(R$*mzh-0;fRPZj2|XZ$-WQrFFYoxR^m2y1Rxyn!;}VVn7DUqJ5n zfSvFTDfq29=_dd7vy95i`Vk`bcN9X8>+066HX%_-0{Jw)CCw^XJl1+`yPlyLxm_r~ zUu~L@MhGmWUUu?G9!e>(E(;Vjp%*(j83m4eU}zd(;lhG!$#{j#E^04W6P%A_E)hvo z=)5b+BUGa;f%Cwrb-oT+;o~x3ImGZ!TIktpFXX>ah?1&oB3?!aOma8k?!GZ9p)@NM zr~4n!l6lh^yc6ws{9(Lr>)dvlsf95Pr$+O_2`=03FF8(!Q3$E4wrd5&U%8K4PLhs{ zPf<%yzfb&rIO-7bI1oO+w2Iyf7$l0mUuy{j^1^KW#!hz3?r@;fm`m1XT=U<15kc%{ z%DUEZe!*lAeFBDFZy+}waV$Xw80ZfyWW~u6<&k2hk#}br6lyzEFGiT;K|`} z#@#H-#Ac%fwV>kE%;#Wik3~h|st1|~PlJZ{{gB%Rlke$Uyg9YqDQ_^ zm0U42UpIj)m^ItZIqy72u=~eE_QN6>;*fr5V@%*-xO$?b4iO?kA~h=zma`2^nk6}EhaYxW7+V)8t%p@#1YSDJ zfruiKB27E43gIn$002M$NklZ7jhf@`4=u=6Z)5Y7;~IQ zgUl8!S||eV6xV<= zWdyQL*17pd?|vMh{MI&Ei2v?; z?}?J?;#3toV-}FD0oB!!*&+$ZG(DiPOwrxtSMlHvgmn3j3kZ_M7J~)n_r1mNFIBEU}NL!MRXk z-keQ>B*#vfUDMYV4PG`T3XF?YG~asdFsO7Lo4!Q&}D)Cz+(o zG^typz{bNM_nu-qqwbPXv64)tt#pZCwrb6qweY^fCh7Xj^*%^pDKZpbp`zOf8yNFw z2&de5Q$Ks`kw>k&*sh#GVnyc7*IuWp_TYertjDRRIo0JJ`jNF4draq;=!v$_dHDcw z4Mdp*WKJ;3ya&?8=L!jbHhid6RT)8$q3;N^tq?X7+}435C?Ft3He^pd^^{$B@dfgX z=CpgM5>`Nxq2p4lL@t^y(`v6Tmqv z>{rI=rP9B}0EfnWZe_5#ob%&ej!GqsET5ic(D8 zmcdN4n=^3=n*^xCz(QCj)?Ye;Oi-rid_v`7lmII4Hz7xapRnIaW8=>`HgyX? znaP9mV+=^#FRJ_k32Z@Gao6OPGVXxBDt1g+;E_tcT7E#UK7IUbwsPglAvtGBGImeU zMZ{Cb5RNt*_uUlxXZb3tUS~h6E%y1Kflv6^ zBWI31`t-I@5<+|Sx#uF10%kDMXXmP(k1Fkaff&Oe+=i}7M$K<6puw#%?$Us;ieP5} zIgXXE2=V+z=Q_ao=bonfZSa6u&P&nu5QsITO=f-!# zTbq8WjL8 zFr3GstH?Q4jf~2o+S*%rv9Ipx-(RW%ovqVhhuOXN-)Efrz{;V5YlGHu0*+F(l z_v1_^&OE;mKBR~KA4mnA#7!ywXP18P12ONRO^E&ar<)m*KNKU4yIyCq=~oCzffUih zg!+c6@jehDq|ZU_(fc$1@qU4|9@>T{VF66}aejQsMHf3V!wE&GL@O|KoByAgo=jz8`=8?Ce2TJ`JsCglD1-zO6qQbmu53!iG`%A)fg zC8I)HxyN?+0fBKAz52%`G8{47Q$!%_BvI4@pLop273~Upo6Q1kqDHZK8duY1&+zdG z`x?P7o?Kgo(RYkbtdPVmZA|!P_4k(~c^fOHO~q1C*aIZ+BPm_GO=?MODu7>q{e^)Q z22oVJ+P~!epBxvvykEU!4Yslc(qi!tU5kphZ3Z!hd^q5L;IT*S`WtU>L)u7R?pr0p zuaJr^6;k@rqPD#tEIeHGgzZrTSy(s%QrMb&SlmD^NSUJ0&SH4!<(K&tO<}<${eURK z0vz(JU8}ZJ9+(EhYROMJX9Pk1AxTbzbXZA#`~5dxxM9l$YUuJyFY~jV-V)6DN{er6 zQDb99jI<}zhF|LWjr1R*6jHQSltoNkXVap54T}`TY;FL7T}Zho6X8PHxQPK00`^%b zch)u{JX8c!N43)Bte4p0Tppb@J^uHS`bI|0>V6&twUJv0_BSP+>eh~wcdDc#(~c3h zVc=5(=)dHhrTFIQy$ncRzba z)a!ls-*1gu9c&AAX4txQTbC3i3>IoAE9ym1cLz6Vs_Waj%EHtWuAahXXS7sWxHS3f z9=&XtE?@t|ef{;u5B+p`&<+`s-HMF59JJq5u**kZ(GZs7@z$2mVt;l^XA%RmtJa3 zW%RzBxDOmDI0df2Sf^PkWOt`H5~yLoVnv_$_~VZEvqDT2qH~MkiLx?Y(V%WUdt6~g z>O2;T80$Anq@n^FpRvWbL`Hz4im*w@!f@F3PD-rgPJYimMdZka?sPt(8mm-`Z-Ac^p-FfS6eudBoo$K}P-P`K!-$23=MeQ%Chd8?| zmI-CxObsEYBPHe$Up3YVo7Z9ARg2x0xX=#-F(wc&caNr zD*`wto^-M;64e6h4fU1W0_B>uV7{{mIU}PTQ0aiRy}f`k!-9pYaLaAC>86$|2`=N~8f^U6qQO?%x77YCr&(lWL+Xkj82E|F)>b$`vzw@kJ7D z$~P0OeVev%d!j6U+Y88_2@eJH{F8%h;@9I{(K(cBTk@ftAawxharWFfPSIU(@kKU7 zJ97{oKwgBU9N|9;P*{jc2|*nxlVlG)_>i-)5SCabDuji4uBb>5IS82WzWbNrIp=E# znV?^Gt5hUviBkDQDk32K1wSsbJvet%9swwWpQ&kg|GuP9Z|HZUTjoh@2kwf5lHi=F zuvl)1@k2V{Nj%EJ86D@L=mt_g{HYtRg!JpqlkgKaa1_)+jbCnoG^wR$3c!T>E~u*U4HlNWIyxXn)dL5h zh6R}LT@7Tdob6F-! zKaV~1u+c$do|6m2fpE>}r2V1ExeiFM3Hf~~k50LOf zU9fTjl14?~si&XzuZih#9(s&!PFb{MvFlnUMBnmPwHH#HlqK}#mPg*Qkfi&^pBQs! z@8_|)b;FSvv5}O1T32uZ+_-{HU?T}%eIQBWFr5b5qcUV zHHka4aY0yzc7bC8q)X~^M*{g{2Nwzs!I%LL4X|3ZYIz&7!Jxj31=#-;D^~QP;1}9A z2+9RBaIomvlnj$#&l#c;0Su8}eCFBCE!j;54;0z`VY3Rt+OWz!^T|)BR6eOrvKrf= z1z1CYR?$a}oVHky=p!ty^XAUAho2r~O(fNb>IAGo&S#Ls4EM)v5OF*GVX}vX|9FYi z%Swu~6cJbn{@a=rd2lYqd0u5LG^i?o$dE?eUo5rDueic3tZ?-bVOb<`v<)ma+JW!@ z-!rs}NBkdKpHLzQQ>04=5-O52KF&(n#s3V;YrncK41}TVkuqI&&RJ($qk0Wp@FLi4 z4fP{WkScA?y^pBS1fVUDQ0*^vB2*sb2pg9WChL7R^zy=5EAEk9D$3Q8aB$n`8kY%WkkLB3riZkj2_$lE% z&%gdRqcG1m)BleOXkzHN%I#Akdi{-DExB-pkYgh-seCYL!a-5=>D}8GQ5I7uBo_J? zUwX-~Z|Pxo-~!0xX9Bb1$2{0Guf1q(nl^WqApYA6LOobGKuiegOwbw6-_rl;cEa&? zl-Q*o90ynGA(>W+ekMl{+t1y62x^(Hdv@Sb*ShTnGztg^v00_tk0Uh$A z{7~hnaGWpuo+zh3B`OE@DFQuV;iHa($M`dBSM>e{fAEoIWMp`l&<>n;9(&BOjwI-- zoPhK|gs^;}Y~PVNw=PF@iKhok`Doqchr}&6a0FzNQ<^*b8$8aqx$)8AOAa|Ef{wg zmakl?l0EnIGaFnX73KiKaJntT%@cYd^%K4^4xtn|w}x#@8A%h?@ItZnd&%4)NR~cF zf)#EBmu#=ocp5$COPe`uW@h#3)l-$;FB(scw06wV7}@CBu#j(FdTzf}BA2yR19GJd zw{deb_D(BDE{n}#6Uxjpzili+Kh#vaRE@RGme*0pjz#@Fu~E6Gl?9oJhAFDX2Q~FY zgEZA{v7~p*gGHao53A=E35zss&`2iZ=KEre|Mr5gBWFS33=%<>MxyFKWM9h;>S@v$ zL_GrGjAMi-QM;J0a=@BGa?z=1Ei_tOu*K;*fgi|%4(TEM@zJ~Q*_Bsc)zedHA7+boujmNf|bARgkRpB7D@2>z+DCnr=@%H7LF)hITln>oIO$l#L4;n4J2C z^jIhnLgIQX?6%5OTbv^1Z0%H8l9@Vjl5+{NmSJ&tQFbR^CmtS(JQ$>Ue6VM~(4eEpE*k~?r(b*7sgiRpzEH~9Bm4&S(WA$>WuOnFV9u3CDEBvM+1wXt z7CTf3I0xgr6gC<4S}tkFu%Le=GjiYQ3ZOD2OJz%;X`IGbpzZ9oTW+;sN+&>Q2Uw#w zD{mxE(X-4+7>D#g+#pD7!(wR%0Tuj#Ho8aBbgYIkgWwnP+ZCUTN2qZ&8l1bL=nrta zJ@+5}PihMzB%K<7db5E5ao%;$-LBGcos1l;(&i7U|0tVH_%kyPa)H>R-WCp!ET)Ytt2vF8@8 zIiAT5`uWchqbja;@J6CB>dLj0D=t48$Po_T1-k^2R5>UN9qXN{_o{No%N`?!nN+}6xSfGnZM~$nKU<0#IohymY+a*}RSrXE{EJ`cILc>-n_Zlt|dzSn{A7m{P|54Vi zRj;nydH3DA3Tt^hEbYUhcZq~*&Jp#4o+LIk1JcYcE$YZQYk~9vZuI;_8F6P31~Rmu zUUdG&Hd^WpZkFsn>~Al?R{T&`EbCxa4@w=R2B71I5$)?G*$c~(_6=hS);X4)Ft+}> zgr_h90P8;JM{?`OG}%|JTD#^pnJC;;b?CFDVv}}}3Q6MiQ-lxWV5TTp&O#F*%)cKN zEU68}@gsLHC&UTJQMIWmfTyL!O1>C;>wX6f=}j!?L99)r~v>GEWD@RwYC zv0oDWl`P;K-|aXXKKL{1cIe?YeCQXFk}bYjLds@Fnps6sJD-(2Po;j z*Y$N4Df)M?>2hit%?Tlszib~}y*2Nr#TE4(W$JL(q$hjQ#p4jXlZ?geT_Y?!5MTE5C1f@8`gRoiTH! z8-XBfNQ-xbh!Oz<2LeZgdX9^Ib8esV0J*W2>?6nJ&UecVmYbmVn<6(o=6L@`Zn(ZZ zCdc~k2`fJ9KWR9D6ifd+LM_z*<>ijk|3PY`iE4}w<<6HwmyXU% z+)2>*JM3IiX?yz0(-9*1%deXp+sGT`&kiSO>|P^(?ql-~By1jrFcvaYhib8U1iu`s zr8e;IDA#|Wb209XW6{906t@7NngFPs{P|!11mws<@r-oM&X!%(FSO7>(Xfzf%>8FY zq77&-5EZHkb*ff(qtCCr^`_0%*$zT0AW-a4f*hZIa*$v7!ww#*4uExvq~v|~-0Lb5 zGv>^8>BP_v*;R&oVHq}W)zT#~Crfb~pzf(+%keBHO5q+^ftfaWiX#cyEhpeRLOpF7 zH?e0Wa24!~)~~NSg^TQAfBq4@ek-Rmw)7k2XYtOdj$4{mcHu}Ls*%mZFtL=O;K>V} zT+Vq<|#qWwsH_|CW+F>;i3l};uQ{27x=moAOp_(IqK*yIHeAgi-~Ls+d$ z8d#;j=&8d>&-)+6kyoUN+#{^z@Dz3 z&tZC=BUf7AoIuk14qNk;??#aG$loaVcJqiT`I!A9qsbB7dA43lE9CZiH5BIZ50e|9 z2KrVF^0_+tVm-MJkd9?H5_S`GOrR!t|FmCByvwoM8L4Dmmp?n1 zbz+OH*TMyY4_l~v6{av{BF+|MxvK^VZHJsW$lnM4mJ~KjJnSz5g$wUtEY`=Lc%t>_ z(Zl^%c-2P!zB7I_u6*%_O?0o=iRhhiE5(d?b7k43sIvvJJNjP&?SUW*)JoM#8NR@2 z40#bKFOsIPR=M#3D=eHP-Po_6^}FFlTOx{vo7TCc7xQ4~=D}tS@w2N(=!3n>m61_s z85>s|@*xg)Py*NieeChaT&EIMfSl4JJbe$l7po}PX~hg6RLtooo^0REo#kaB+_nH^ zWHAS^XH`qtmUOemC7*D1d#g;qz53c~9;8pN-kR+Z|Id|buuE|Wnjxq%d-g21PQ;yt zqya!BhW2)k9rS}B7eGISzT%ueAa2SKAbdyYrd``MRzYm|;71s4=qM(OQP?uZpYa*y z>`y|N0Okx2?br!+Ye;yi!p!BH;xv$ZU2d%Yo};$lkd<((o9l#h*Q;#nW!^AT8?#JN zr|U)8u2j~0J zbcx?UgiwYcL(+lrD<{WpWsLG>x6?w7)oPve%+hskkj4Te0FhG8{p7C2L$I^jGn)JM z`PFvcAyCCRrm8m3ZY8>t#M%vuE<4X3WiJ)$BIvgByZRa-(tdLo=J@?piv&dz87g1x*lLR?t1=^OjtXX$IyYZHr{j9RP)Lj5rUI0|^ zfcx%uOFszsg!KGlmLI-B6*b(ij_XYh{&cX1ARkIW+W0<+LH0lle;AkJrvqwLzt;C#gPL*I3UDFwF7ayC9?3- zTxuKY7azF#{xabd{!TquGrPs&tkuS0y)5XYiuz8MYEgREqLtuv{ zp0&hOMxcjR>PvlZCb#a9M;}dR&;ezuWLPgL*&ZYj<9hY$XO`c0-^|$VC%#iqd^%#5 zf`eecRELk#bFR|6C~i!syGC*`&0`20os`wL-mm_ViDb zLE-ogG6ab<)&B|d&y%(SRcC6coH-r(m zTsGED*~Z;jo|IX2b%(M&DhF7H`9QZ7RF#@Z1?|ESW&tVf?DA7qC|a%udQB9}ak}Z_ z6bZX*``_E6`|IioL(2t?MTrqUA6!wD-<)+d~ z^hduDGjpgfK;c2*B9$A==_Q8Ul6T4iOOhKR?$ecAokXd0?sAmQ?TR@IJiE=rmZZD6 zFysrH9g6w~oz;CkY^0SeRnj^iewbVKK{yE;wOjzq87sP?)G?X zUwr(jWn^SH`;>l#%7tQw1swX6w8J~d4>mo=8~BBJz<7A=b&W5bUndIQ@Ev4(=GkZ2 zH>h{$|K}fl9H~^PQi|gLrLx?i_Ufu|K`vqK^5x6ZC2NrKqQ($r34s#j%UO-eRcx6A zzvj!@(zWMZ=w;xJQmF31pMPd=y&g3haru>3xGIp;oYw0|hBekH5j~ObxIlT0)RVcn z!*nJ$J$uM`2Y@Q$h(pg~l>D1=5f;@Zty-j zVjE;46d>4O!Bz*cyZAx=GnMZ5>a00>|50v*+;1^QU18JiC0AOmqTn7T5;JJ!s27>50x@H3gk#kE|dgePzk^oG&*%dBves7qjHXtkn}|` zy1-RS7f8S)KE*R+*DhBGJG*o@=SLfXAXv!J1%3a`w{5A)(LwDFvIi+^7wHIzy8V~q zBeY4=MveW}3N^*4lhvqsq#^vt!97IID1CNuS3S#SFM(@$NOxvW%Jbd#9IP`V9~ zz*u(~SHI%2%iRWR*whi9BZY;!GZKqVH{W@?&7U#Lh0V@9;|yKm-8fSc&;$M%4U}v;z zH0Q1R4dsRZQa+Z%7GVOlp9h~0t3|-=6Fp8j)sZ5Tj_={AcJ>Ly6HtHnb7%ZeYRW;` zC`WNgc6JrJ4q=y1HL1IFKGZ3dX|ra!Kuo~`TW98Md(;hKoRXqUu!1vO*HB^D8Gv;S zut>8Rxm1#?%YKO7n;81V17!K(8|=b^T@dF&Ula*KJA}Rg;r2SMhqGuCDTpjdZSZ$} zrcZ6%$S}z%x)x%&O}Xr2b1o!aA9(OVS6zW6PXFx)pp8(o zDZNigsoJF4;a$79F@-~qINVlf69uxQ-NV@45oO3OP4c5W3+B(ZN)o`L4FWtN!7}W< zf)we?Fa|;!K`FAv2BgUM(BCs=%z*gtr0^#W->4%7JBpMkWxp(0YMnbB66TG9MB+&C zyJ^$XFT3K3RIW=yU${+&_Eu77;84;?WP=nhx$IJVWWd92cs;D8l#{$T2H;o#j@*Q= z#@M^!-i*^x2luS@>vvPCSh*m@AlGrT;=Ct^njfsyKv9OK*yd+g5Vtv@ElofbQ9U97 z^WPkcS3rwWWSKZiItfu|7ahjx6=`8KVwR|E3m-xlmueY0j-*r=9jXNdsmM9y~a$|6O;bcRS&P z)V8_-;<2Znvbo>Sa8ps|>xv-;N#ep0D|dK=bZ=DM8pJv))G>tFm7h`UW>!gsEFLRp z@Y`g$RoD3BV@mC`19jsY5Om-ong8~)f7rwi@k&)JZ}aBOcWR2Ghks-qFHIEh*0ve(5V-km z$ud`=AZyGDc9i}d0wHWzb`;Pae~McGax5uH>uaR5hyiyDEo^OlwntfMPf$eV%9Z1N zkI6QuJ`Cssj8lMe^3Jiq@8UXOGy=dc^fQvEASw(Lgm0mKTv3LRiDDAS>Djw?+?asA zD+$f(KmLe3``mNsn9cj&vx8h%3~mI%N#qHVMqr1t+3Teagf%KQJETwh;z)=8l!qsq zR1o|yaYuB_X1B|xFQ)509Xz;zRtg3uYy6RNqZGcX;;+?nyCTQUE`BuBjsR68oM{bI z`e(?k9WrD{fjtTrRwt#p86$Qbu!v|pc1h`)AWUW?zA!U3U1H?{xWkQ0SZh|V;nI3) z+z9-C@u(xj2Z$%OSPBC{6lcnhlERwu-BcU$)hPS@r{x|7(`Ci>F6R1PPe1dF%zB*~ zUpPZL9&qCyI_v+~I}13gs_yS^hOi3+rDs688x*9(La|!`1yLUZJFo#2Y*g$(5T#8} z6p`){R1lF8M8&{*6p49%-*xZ0T*iqz1;g`y*XOhEIrp4>cJICRUI{Xssc^J9%cN9w z-F?TRf0r@cC+Z6)Z+BCK7ZoToV)^GvTh$X=TKFZ-*-7D~7k^Z;{v|S0534`KOq~s4N71HnV+MTbKqBd>q2oh3Rlg5qhG{{Ml?=k}FNSHNkdf>!> zkO_U}{eaYEmrE`VOQM!_;Jao9%uO~xEZiTCdp}H^__6xz@9GmJZ;*v2`Sa8gtm>EsK4ajPo5_( z>(L`jojxsG-}73t;_s6n!M3*a^W%r{M4)a2$cSxK{-#ZugbySX_K3*p?1U+=Ba!?l zN4jZ77Yz8helkGN1;ps>hg2au`>`L57q79BS-eR%Ej?uCj=m{?$9fPW=}>Qg(=#UD zSdhwxNwdPg-CySA02?QEm(!of6vno!Lo*4Nw3Rmts7wd!cYs`- zG4=8X7!UOI?na7UJIVr}t^+*&#DH+SPP3faXlow}K9|;L^Te3tBxZdPtq<{NQv`34 zlannip82?*HG5X*CyPt<8aE0}&N|b4RuYN$7?5YvbHL#xZR3N$7kSWD4vd8#WSs}0 zCAY_#h?FzmQ%=SwNEDAeFTe0&cwRCLH(YnUIoj^iyHBywhaV1dg#9x?@!KfxWxD1G z&oRrzk3!}Ls1oq>!0_vJy<2%arG`vJ%zN65>E?a5j z6Tt`}+^#%6QvQ>pbs2 zc;Pd7cH~N=yQ%jf+7 zpOllGn`HDj^bde)2S)CJ_eR?)zcy#0i=%54yWFWB*TEt z<+B5yE|#2}40419jAfmc`fjbRxkiq@kjLt>i*KxlRhfaxcZx!h%9Sf`fs{$dTqW`I znTr>Rk-xJ$9WNt|N1#JF_OnP~c>)o9b}&ft5g_jn{6FuAgGf2M8q&b^Opgn@hOu%= z3Q~+8B%WUaSK8CJt}iH!gOoP(@APR02_(2HqNt3Kl}D1gImz!wA&OtW{Z7bZ@9^8V z;`@k^j%`WA^|3O8I>h1`vrXcx^I7Mb$UFLU$>l2;$?w*iZwb|ERyALQ=u6Qbjv#YV zp6q@@Uo-i*lDFqiI=E8$K*BheX#wWc_{Z;mSlRGK@y)mE<$_6;b0l1_D}c>lIq9d3 z772}o0e!NSt5(|1n*1q0P`g${hyp{=)Fm@u-W;q2Z-&TSAlbB#oSYmQHdDPdkDL;a zHctG1^207Gz-ilnc8x59k^edTg|tgQIk+=mP8RW-7=Qo2c%?V(bbP=*)*m(H(Zk7` zK7OgU_d9bBOGIq^X*-_#6(j|z6P-sNJKm1idd0B)@-?f&l~-RSRhbSZBXH(<=NeMP z(K84R#OrybZEpmWN@)YLNMu|DgD@4z={sMNX+js$LaB4Os`YYd*+S8cQiJOH-lx; zT{L7Xr8`?`%0yPKmVr;lv0~PCU@O^X|JQpmV*PaBDp&rqd>C zZX<=46)sd=ph)Tx+w|*k;;_35_Q`UEP$5%m3Z5JX@{%5EZlP|$D$CcEEr1z?`@9Y}G}vMs=8GmaGF)3Xn={{wPl zYkPt6?xuQizgssUnN-1#6o)8|8#eG~nS8cmSIX%=0);|ZnZ?4YOsA7hHq&LaHTUU$mM z2BNZj{^9Bg2}!6PX%PYx7aSNPoCcZ7$G@kO_PSkGR1Pg)R%#ZD4axd@z5@~0sa+>* z`}tSfNcy|cZvipqwFH^RB!09feUk4-{8`P~A*675M$Z6KU z+mo+8UaP7-T&kF71i%p>(uSrruwM~aqk7C(%Ks>vx~@UYj-d2{LNM})>LnwcjTR5Bp3Inr*~thv`m)66lYuAnT|zVh`| zg%HE7Cr0wtB}IHc_1p^OAR(1EcNAV^ZoT9T~oEI&_FWHhE z9DSQ5Vx+Bnkp3idY5<#-@!IVTlN+9M&OY17;V;Wr!%f=Np)e29~yLO zE}tez>83l#gNhA54h7`)+Ci5TFV*W2ODG?1Sj0F5u985-UP3;ypZKqh18rEXYBkH9 z_0sz(qvD`{c$m}?zWREFEqd@%X%~QRlarI}KKjV+)B7PV^NiB~i}vr)lL7))-rnhb zm)SGYl&y_$fKG%zNvMbP;arj)mI4{P+kb7(MKP7BwVLm1Um_i+gRrKMq$O*E2L2Rz04KO7p2*8 z5zX^&=-BDxa9OwRCJ=%ImrLaNz@ZcK5ZJA7yob>;SoLb8^r;`l2-w*a6!1!8mk@}o zh>`n*e&I6!uHgNPCYAI;!(F}a4YQ}uF!xHTx186C=`Ma8_vr!cPhb2ZQ&*LeQ_RZC z$WbHB?;Vqucf=v&2FTyjz)A~9u~nNkn(&cySy&*Aayld4x!OS06Q729ZTN0O_~qwc z!s@ka!fNU3u>duZv>b7F75L%@-|ue;*Kl=GlaJ-HQRC8yl!5I z+9KW|d1usry!-LjO>%=3@vZ_t7VvhBPcu9BsTwnEd?9gfjkdG*;lYPQ_j0)$6WJ-x z?u^UE|0f|Ud~!lQ2Yf7hxj0$Z(aDST&BsH0Jk(!Y7t`jI=EHkBk#Rah0u?G8&oKoz zTxQ~`S4=i!#ik@}UY=)WSH5wo2KOMoW+KZ&&G>S~iqO4VH~ajxp+jw>LzxMbqn3E6 zPsv#{I<-)uM#%aReo8Nva{?q?Z_@qTd2_=RmtQUe0hJ3M)Bc#P$^*PMNMoe2?tjxY z+iPxtK#a7CDqsv#T_>s{&+5_T+RFP)r{l47B?%uNB1k#GjPRZ?1X;vXV= zA$-Gs_*p+=7<#{mv?{wJEYU-LaiH;)U$%BY{%wNSqKe0gSxbRBU97++K-uPu_O0*p z3nH@Lk&|w=NDzPhH-Z?MIFL!7diLq?`J%<<+Ya(5J5<#<;Yq~;i~j0SBE4RH{WT_1 zeuYd?m5MId*-0zC#`5nWM`9=%wI7fnJ6nJ;0kAlL!mq#SMjLBC3SqC8d!cx!RK02_ zF4E=h0FVV`@=Zt`NEJEQo=F-0rJQ9QORoN`CsBy|j3!eIBwISxe zn1A?|1UT{K1}HjgkY9v!J#Vj1sge;SuI*B8ve;w3Wo-WN)5bu(>5F*a9qocJjo-&_ zWW3FI1`tQyjf^CD*PVBoE+WtAXQ*f#Lbwv&H;KH@4NwLGY%-`ZwL}qo<+s__j7Nb%jNTy8>s`8@qEK4aK>`3-D^b`52r<4Wv@u87uByW# z4aNbw@~ogp3iyiR{KXuvVkAUALE=Lo1yVNTQ6?ZEJ3xFU&r!`=8m|&1`#XBwZj5-| z(T6?`vXd_5<2yE6Pd+`+NW44)DW+FOO8pxoLA6RLorEfsDI4Kys|{YV3Z4#*$Wz7tn{osg)xZWNh#yZ-$&zDqTSuMKpaXigP?@A)|w_ofe zvsom!ujI5BP2RLlZvC4?uhf)2`$HQn`W^84P){J8Ca~xKYd_XcfBY?-(<+h8jMY8T z`rang+1rVG??=B954)-;_V?-kfY~Kwa|lADZ^?r=E>-O7C28drNSD4um1o_CZ$s^x zwd}Bhup7GSpxiwMawP~bOCQ@wG5?1z+us!*U1qbh(T5j5eAx`G^Eh}Oc6Sl+Qb)BEVFo$q50 zbwm94c>aA(7S#B3vw6>`^};@`@_edtOwzTW@f-cv<7G{}nvr>__z{G~;3g3eTW;H$ zG+K*?w@#`NkW*fK=|z!LF0piTiX5rfhaY`tlMD$v)OdVs@GdPtb?~vo4wl`skE_hS zOPfw!{zrcd1byZ4lDqfU%z0c%)5dgV?V_kZEPFMCy^ zJmh!;7(ds%KfTV(BLs3*h;%tkMmJuSX~7j=EDc!tsvsi{P~L#}@AA4$Ne?*$<`C*} z2zyG=ZV#md0~PSRgq|a+91&fjNRAo)W<6sv{50QQ*!Ei$ zcu^JUqFh?)>U85+rtuvZe;cMB4jx)u*}2l_!d*c~E9Yga_s{ zE!D?9W!DbUC@I^LkUzd&wK68yl+(2N?)N`_H`hz8W&W$Xc*IFbg^jerL?Q1)``8gF zyX9UobydGfWAjg#)%WS1kf`i#1P~MCO89~3qNN9r+N^I9D60>G^wOby_Lh$7W(j?O z57R@6e20(?fV3lgE*3u|kvWOv2B-sMZS1bbOQn(~0wDl>lvO`k#69?j@s1u~?FRKj z71@+_aD+%ZAu;-wPS)^a(Iws_zPM~b8!vqP#}f=0@*SoPQIn!QNi#p&j&gDGV@;SU zKK4QT?r)~%HvYIVd@lR0zlo}}aqYSg{@5If94ao3;v-Z8KM^QfIVE)O+%=piSrc?J zF>LT)zkYVA#4-=;nzzQiX9yK(TKJ73yC$`1(+00gN-(^sG90h_sk-u?gI0v}KnCeR zfkTWcu*x2jB)U^&{zT6xCv$iAu14uJU^X662Wa1zFdCscwo<)J`Cr%FK#Pl&+}Fg2 z@0uL6F-q=$>STy;<4->tPsnzMd9Yw5X~AX3%TBYc-2T9XNALi>Q?*?}$dR)70y1j! zXsKT;-X(T6W38NrH`JwYk-}k>44b!a)7JFa(k2g5EFmdBJkC4vNE;{qe`XVlb~8Z_ z@gOscw`FUDKUt6ph3ghg@-R$r9?fL|RCoMsCZKPYx8CGP(sP%rus?=={TcZ1*b z4T6X$#4nP%4MxAeT=7*{wq{lM`-h0MIPC#hy-wvSrh|x+V~jAs<7561@)9Vkr%CzX zQ=;~SAl|8GoF49yG4Tenwu6iaEDg-kq2!ORNLVIsy!*B`K|cj_hbxpTZ*lIu`|d!v z_WJ8&q3H}`sXQQEVkB8_k$OivnFmD9gpE0;;Iq426eheowy+Q>2a>~1-QoGpjFv!ry43I*x& zJ~Qw*kdoji-f;*fdxjm)61 zPVuNi;Vy13ZImX9_Y)4>S0)$pX3yuFnt!6FPwzJbHfmK!HeN5MlKA>(R^C1F9j9gh z%Uo=HKsI<-x%<98Vb!wb_OAR9mBJxXq6Y~Ut5M5%(6kjNYA6h_4FH~b8`^_B`XNn( z!Wesy`za&Hyp~S6uMK@IbUd-6@f7JNml44X9CmuK|9jyD7ld1HzS*qgU^IU4pg~qL zs6|jl9v?U$JpSlo;d)77br zF#>=;N0S)Te{dit%bf;zWE*B#7q}l^g)B`;002M$Nkl3XYVFc&~7y9;^bW#iBh zAo&Z`DSe{}0H{D$zjI9X`sQU`iKOKN!cUBee6!Oc481=BJE~A!UQXEIGMSkyc>$!u z1gF52q~zXD9_%3(v1qFOGR?xvFh!PY0FX&~B+^V1DS$6IMW+YsoHt4;n?A_AWg=gD z92T^+00|lo79cv07axbV=LCz6Fop$iJpSBs&xYo88%XakcVtxZA$>=Tt{UX|v$#T9 zGk$hDW%qb{`W2)Pne>3thnZCv5S;uUn)e@EVqu~UG9t#__r>y%+5*8ayj>sw=T^gb z?sFkO+Jx~qYssf!+TL-)Tw(wvH%6$C6_;7)Y@Viz(d>I&De8+sGKha&s^Ul*x zJ!OV0?iaaq@~o-hBbkkB-@1J`MjlpB-y`1h@+g9#`v;}uKlh8zY`mlAS>liqVSnj5 zV!}>Vdkp!hz6{?R73OP9oksmt4m&mKe-VY);OBz^2`aD);;dAGA5kIuD1aCXGBQu3 zEopqO!|kdYjNzB*7(Gval}jYm+{ei_%}ZsrNroy6fKUc%HPa?d4tRk0uTFRPwq&=* zNiQ#e2?bvSlX7p38Dl#|ToHL$(#yl&v8Ya;IU|&;P|kMGnbHN*OQ--nlPl4XbjX3G z$4h&dz*Cyd0mi1$6;tD6T5@k6BBkZw@r*?at0N^2JUGJ*2#ecc70VlW4wGVs%Gfh~ z6`wX~4xI!{@jePzg#i-Rar&$!(@8~^FVUQy4x%XZ(p3tjTukeDb(j0T# zv4$`qBS!Q^V{3b?KJ-I=19^bhFz#^cmDkt?iBs)fdld_1HCH#v6y)mFtIg{QX5sb` z0z@*EO*MV%{p;mq+&~S$G#ufK(@zg?z4NxnrZ?L-gEE2sF8oCF(_1ua9%{&EFDheD zFAydwen@d6uVTt>#E21v8VcA|{aDw2D*FTlTYL4)3<4Bb#29TLKOsJ$ z|D6@8C@}J~y@>0Zf&@wG0A~y%Wc0l{W~{EeQ&Kha)iaaULs=MLVxX!4K=!Ds!HR{U zF=O8ib*tAj(j{?J&wnkjOsqtKxZ7o|1^7k_B9Phlep8`e)g6Yq+4OOG^a zS#DK??KcXaj z_ucma0UQiMNI75b(<+8?vg=x0$P%O)50F~%u5nDe_U8KS^&zhr>9mb(`>t5IGK`)0 zQD`a(1sgQR3;A$L2KgQ-)v>c>iL2*z*A=?rvdgfh1`_0O(w?AikYgF3`*(GPjb}nF z`d=_Xg+v)14371J-YVq~1;)vanNl!|o@uG*IMoT{P6x3$8Lejs<;s<_^;hY(Raps` z?Yv+AM;G)HBW)YmDupmq_m?Uht>6enw|f@QX>>5gb47M6qFoijID6^&y9({}?h9S_ zDC7v=ph?|&VXdU&+-g3~hyh5e?DQ~f{rZSuVeZUX_B`!RT}>3@hs7=KY2(m#0JcKm zH{2kLG->_JrTr%UKV%jFNy$|4-~iI3KRj*j_XME42sVAE9d}y38|8Nd$D#nT${Hf| z{zIxU4&D}@iFXK?aEo@~{@mlmU(H%jCVdzmH<5eq7tF!|n+xMOpxvB!vU6&?G-e}xwH8`vFG8Sj&=62gK^KYRYX z@W#;B^*)4i&p%&=_i7_*A9i*aha>tHl(;CY+NJq2YV8ov` zx#RZRjkgLfmH%v%pjJ^%B$(l^@@|V*4;2qDW2!?}Uh$uB=Y4$(_33qc5eWmOR;*YN z9K#8+p~5JAdv$?FjH;~486h$1dY=joVq|N$N1hs02kXH33+9J=`t}WDhL0$!oy!*0 z^DxzElHQ-7dziz}5;7Dg*1O#f3`Bs_O5Hkj!$)GUmoFRHz5o7uMEZ@z3p<`z%)jWu z3(eyHhN#0*ymFYIKP}$VX3UVc?~NHEN7{`+k420LGXHev-FJmsZ@I-ht$3Se)gUN1 z172>fAZ{lkQdW>BIp-h|7wy7I7_MI^7_1A-(MVMC&oPaw|vGYY2 zg{cyhyQB9#;Trkn1up8+S)M;m4s9f4gqgvwMM6ai7X-`(5NXtPQu|oB>`RMZQ^o;~ zs8m^`*@Gn$^$z{r1Nho@Q6_Z zKKPN}2cbrC*p~Zg@rvbU;*>VD zPvqX}q4ApS2F@gL%0e3-R^m`wtm326eke$Xy&}@OX06u|+?=Jin=8zAM&moIeLc^8C1BZr5EQc-}v z`o9W37I$U+Xm92VY7sL(T^NoL`SP?*rM7y28*jqsf2!7+?LxMxytnZmod-f7#U5Y~N&(&<@zRY9K;jWR zj>jAN#_OT`CEZLx7jNI3N<7WIwF6|*{iq!(2=OQxkH?1_-d1>?2vW2Rqd%xH0N57= zdCXb%Y3O_Zedccv65b`@aMfyML??e-hXk(!>PD(4Sdd1+Z8>E+KT)6&qbkPerjl(HK#G5>!S|-oI5q>` zfEjg=MJk{_cnP^xv~tWEU2*p6yamfX4}ZxD$suJ+hib`{Y{wkm7V+ODa!=hs?i=Nw zu(j;R7L_P}JRqq$OXN>Xez|p%`1k4EGmqd8e=^}?^P`DC6{oY=bL9V2-A10f5|Roh zg|FAF3@4v>VyIrVnr$jdmOR`f9CbQC>>i}JLS@2=_#sCfd1UJ8XPsGS!Ka^wa`K`B z2{<=xyesQkQ1YUWX)`vR%n^_n0zRmT066hR0Uw2YfcU=t+Jm?s(r75_HqSrvY$z>h z;b#U83~i)q$)?Na4(O3FcEI5!!v_*7LhXuoerhB+(&t%wNuQ2d9ITiyNbNzNN1R)aOyQsJ zN9+Fif{qw{Hm+5H0fbVvFJ7{QBZx^TIWNRJNbe&t3-B*xq`V9syw!j7$wV`fjTGmU zsZ)xyYT1g-0K1)g)saW&+D+eJJ}y7iIh@YuZJw&Cp5gz3e5_fl`=wpY)fSoYm zSUF3+T=}oo32NE0Wv1rwxl|Tb2wBHF%Z?Q`lbg$hY)?J)R5RY(Z{QQ5X{|atkpwgO zG{aYvl}!Vx3Xbr7!TC&5en58S0&PT^zz^cZ*3b_92kD-TmGzPO(jzneqP@-kA1e!E z=dUfcsA78+3d;{N9R~Yh%Rm3vi9r3fO!I%8N_4qMdfVh>p-AJ#jpfAmaGP6sbvn=g z8W)Tl_A$c%LGHpfhUK4sVTHm%)PN_RFv=EVA|4oD@nej61n`VL5+{y6J{Aw_++UEd5+^dY+9upuI?MFQD?f{qxO05l*ve#3QpjwbJRS=ZPm$)&<~ z-E)t5c5bYRyQd*YI-JfIZ!tXlK)=vdqy~)P9^LBbFlNk{!txl;C}Tuiq)R+~c2Qv_vj+$hoJoE&pAW;~<+)hpu<>KmKbDKn>uCbu;l zd*TV9TbHgj?yFa?zLQHoPI&Y|#2ZH;lMqJ{pK%u7&OWc07bx-H-}gQ_`Ig#`kXh}j z)r}-heDWtwMNy909@joR+V2sw=0l%*8niv>Ge=fR;HId?HdcV}!qH#UHb;s|Yo*e> zNO2=a+-Rrid3uY6VOzzutK#?~#cqCEWb{Q7C}CWL zk}kdU(p1s+SuvX{%u$`X>YMR=q3bfYuRn(*TujK?Oz^(BH1-`%fDcpI*gcbPRzMlu`ELa&MFX@U zh~O7_Qw4`FkpxgP!;^QQ3~Sx5o( zT`9H28nxM#`a43QuudJ@g&dPWj#bsJ_>Y9QMf-T)^6me1V4N{tST9$9vnH%wz9QUw z>n%EHbP6?u43Q*{2S-wTLtrzwX3g4wbp6-z-oa_ILB0BBfB*qIpI5Xw{e_eG7bIYF z?u8eGuU4-NM`(OvW{`NK4Yi_Sy+&ro_!Ci&9)9pav!CkokaSjjy(0A0=@XqqB-1B; z@(G(~J1MHHUnx#&g$??~@tPej^o;9Hpg6W<`IVQ7$KCz%x19e3VYNT(8bs~|_DTRu=dXahPCz9aAQ3fygh#l{ooe`Qy6e5#o z2l<7GLLe>L$R}B%>17rVpN=)E*NCN^QC!cLyt$_=1TUMH&!791=fHQ`9ls#ok?{L! z*RCDTJo5|_AXuUk*tJ((9llzzBD}A@enh_LDkmqKZArc*udIMLn1ln63Ixa>U?;s` z`BFOt_%nNvzz^c=p;%J*Mv|zUgqJOoWUFAy7Fh{Opj-)Lkx&%!;$#9i?;5Kp0H+4E zlP_x6w>((!5zM<^!JZ2_Zo}tbX^~XnnbL=-`Ebo{n~-NtfxJBAG>yFa&g%{P24Nak z`60dcI8`bi5iaO_QFvE|(a#YA zMY5H#;ANjROZ;vDlC1Yg|4}j;Og6><_~0DH1u5>OD{5{84piwb{5($^NZX;nBSs2J z1yx}fr<@*N(xVm%$Zr?bF@9@r*$*5yZk&nywrkfeTzS=%;mx<+G9HQITgPyv6O@KN zD>%TQpBY-RvobO$ASRgaVO)q zIjEU8h)bnry)wsTLc0P#N(T|Z3>e@icl(pzkN1^NCQm!T&l8G9{}ZyCHu2LdpFj62 zWd}H&_PqKUGlmS04F1j6U$3-xtwb)z;p*|?pW^9b*wEKa+7>-L%9hm$jXa3UNz)Cu zlRp57H}&U;fCFh|2Q0b}2x(vl9IxsaYe&R>R|56p^n7?q#n>Y4Cv_HEua)Wy*~?A` zy1!lbeH7?`!dl?Bj2=EB^@SH-2=l*O8YWNt#Prxu5Ql7--L_Gh34xc5JRAXKWw;`t zgmV#zx+17Td2mBEO-OLk#}kcb`^um};guI(k`wS&#+UUq(#E0s!N+ra_Pq1JX&xDX zS6+H4Oqx77AX$F5F<2p^KpU`1ou&Y3 z^=Q@Ii~#7~76?7hDdcGNQ~s9U&m!RgEL@c&UAmtZpn?dL*UOxr<%2Zj{YpNNn^0!H z`kHG*;V5re)930pGmy}}eY;RY_8?JBK|LfhAYD`$l9ditfZ~CLzi^3zZL^i#dZMyX zp+u?hi}Xbyqf(XwgeaLWyAoIkf0d9;V_8#j8?dhb3c_?%AhjK$=|(ltJ)7pc^#2mw zW3j%d_*AKKY`t~a?769rNkHRs@k}QRA>f&U6C~2bE+9rfGK`+N6H@Ix!y8d0*sJA4UUEJuEDxMImm~N;^3EG2`iqz)DshsZ%je)=0m0%*atv-d`SWz3v8`SW{T9(DRl5TB z$k7@W`E=*sJq9sSY;^|5Uzg7OWA*CQC+qPXg`#Sptp^Vr8=5>C(^Op6AA*b7{C*Pm-6xpFG|p0b6B}%Rj5=k*&J`8GM-=6^zI8JlL|0chv{t3bQZ zuK?;Fz&UcK!(#96ybb?RyQ#IJftVQsZMsQ_ql}Dp<8>h(aNLUGH2QulIN1RVRUa%w zi#<}*H1NS_<&XArcGAjeT-pO<_0?Bj%1eEf7^&D*1it^}CxkzIP*Hiep6(Q-SJXcB z^!#lF8lZ?IcT=zJ*>mgXGiRs1{K_jPOu`9rs=UB(@+x?t9_Ee*I0r zh4s?GH*(a8;A^9gPmh=ON?gVn@%ZL&srmHMNpaNZQDMXSHD({VOXtqvmfLO>Rq4kV za;#skUKl%Ugy{^^-?ZV91q+S!fT~d6d+rt4si>W1In_>7e*vuP9Zx)wg~ukA^1oXF ziXsQ3JX_4OH*>6LyZ=WGV8}5gCFO6GhSSq}Ju9M)*@lD_r^OH6dDnP$ly+YY-s^9_ zCDd$E-)Pfw=gzfVCmkB!saXvI(#j8Ti9`knJF03rIgA_qX82l@={pH+FmX|ky+J(G zt9$k=Bxk$ql6`k{pURCvfMnW5&kN~3vKr?{Vbe(yBi7%3yCIYo=?kkCEEbLs(uw|m z=8=yl7K9!*UKg&v@kW#MORoWFLp-l@0?cZ&h7RKoYuA~?rc3QQ5`-t$u1%Y;bn#~~ z9n$pjOYa#LLEu4A78Lupr$$Z?IXg?GmqUWjwAhA!C2ao1%sHyNy4X zekj(LUg_^{lPc863_=-x@v`)T4OxoKpG<%93I}58z*#m=gA`q(w?a2aIyn@ZKRF) zjxp)uIlb}|k8g;_+&|;=Gs3h<)5BLf>9uIp(wqxlbKSLO5|Tb8t@iEO8IPS!irZMG z3_$Z1&BN_C+^TI)k?_#tkA-0)N7(Zoms}E3Qc{q!S7$+7wDgPc>yJN# zaqoF$!Y%&f3p1|&miL->FGEVJ)&`_9{E^ytwNzByC+nyZZ>;Ll0l z?>~@HbaXabwQZBGQE)E{<>qw5LO$^6r)}Z&x_DYFbWNmU!g&8ve(7>Dl3Cp7%iRGW zr704k8$M!0I9q1H0PJF(a>nW5^OeiPwD%{ZiigMs@E?u$#tKVxACuCKzvo;1KTF|C z#cR#~C?LGHeZz(gg{7yOv`B^lX6ckXRyxK1l~E(|> zh}t!3nuKY!YE?}~7815^zW9dp0JWuE)BA~!g8)sp9+#SW$EII@3zbt+!V6ORuPh!r z056+i*XjR{w2kg{ zYi*KMHJm9J8Tnboj(fjZxtwue}H=lOWD_5_dJkZdKo?2T}YAg2wz#TiWg@{M}QN>VH*FRFZ$G1XC@}$Uz<3_yV`?y zAWL}gBXvJg&)VtQRo9MsR$QSlY6J?~+qG-A9f@A4sw928etn^D)_qfG`#ezK`tTF8EO$ zZJc3i*mXN8jX0we$V#wHYu#3rm9aW_^oa1scae<&uA(}2I98IXN7?#Bz2RX4K6dR- znji9{uEfEV8>i>a7kAa>qOj>Qo~cdr5b+q1)~5Y0>C)9Uc=hCA2bKdijgfp!Nl6Jf zB!;yAqpUn#(!E=#Uah(zMM+SvkWrC$qFLwp4vzlXqd^_uCmwYDW=1PRkF%q+4)TR) z$!ANXFZe=uO8&giCo8K?StBn5OBXK*$scAyLuEQeX&k9Eiu=2`EAvU-$+o1z-f$2Zun=u8 z*y&`bbTGQ{kwOW*Yof4y&fGak&6+k#$d4rn<~M*0Z5ZeWaxg{|kT_<{*utT9FQR8f zitNx;{Cdr5BLh>m^JEV0G+A&uM&{X|vN5LUuk;}8I|L)oJog;qR@YC&BxB8*HLYCt_URo4Jojw)b;I`;_Fjy<$bsENw?qAD^qr!{cE?C zosSDhT;DA$P9Mq~Ba|j5$iws>-Xmqt4B_od96`1#_?t*1027LO zWH47O&aVE6lqZ6a*chP{OuN#byvq-;g+}SGU%y^{-}=ZC$PJdCKabD%WAVB`ig+1^ z+(o&v0@8sb47~OBTQ)8k=U!KyW2f?=M;;E-C5Z@+8h+uP0Ms8=!hxmb{901l@eoBw z^q)17>I{bRaT^~scCYjh*fX!}Y9Ck*P?)||cv0bGrO{B4&r^6-@8+p(H$^`N@fA^? zMMX-C1^xAtVIjr+Ro_ffyxWydJ*9W5{_`FBmZ>8oMH%){^n4s^QabBVC-7O_VO=`ztV#kWXPLK?eQ)B^k^0_4p*Pv?U3 zF9^8N`bDPbiWS{ideuHCz(33gA5RpMd3wMB5i(x%1m}Z{@eb^mM3MhH{VFXR|GFZe zgP6bR|LXr5g<%R;sDGL$V0x*o!X*m5)s`f8GjKG(DJ zbp_FGm+(?y^pIOMJIZ*AdalR$12hEb@3-%M)=tZ%mI4oYgM1DmID}9YKpe=X;P(6O zzuyFKAYFpwVnKF+;Zd_eANcgZ(4tAx@bSbC&DSc)bUgXwuuuy2$DG_TtQED5G=02T zCUHMVsIC`_#jP|o)aekxpHC)CFt0FcWNimKtgv5>l`E?sL^5T(KKJ5_;k~i%S^h)c z8XZ=xT@}s~uUYQLY|&MopC~{jpi%Q~A&x71W+#ZaXa)C{#u4}=*;%P8uGorWov8S> zfWX&7a_UG~diX#NcJGnC40Q0V^>`yOEYbjYYxI~4vhnD96{R>Qy z6V^7ZTiZfFIS?-JcNQUmA8f3$^S<-md%_#?Jb(@uixlZ`#q!HNcib6H)?$Vo-guH= z0?F^TD3rZ~PK(sTf%{@Qy#PA$`D3m=_v4>sdargiDwFzzT{W3-%0sFZa_BnU6DBES z38bM&q`^Gu9{phZ`0?Yd@boEt&ow`Qv^mv*{18|Ix?Fs*QJ+{tM!xq>s4XMiSf*lb zG6sJC*Zlx>6|5gDlImV&nRg@ew-C5lA2 z!!(f_Sx`2zkWeZbS+QH;boDr29soLdSnaJL-cZ3#BBWV zT}e4Ao-$`|yf*WE1deoe%leJN3Y~t>67@`MFxt4V%D7(F zcllnxvwVRaBQIZ)-zAAg=SjF;xj}1zIh$p@m~?a=gMm&hFQg(X8DWBG>mW$pf25P- zw>oJYsKEte7?S0yqeq65WUlR;3(gPEKKGp2^J9S`zE5ONGbgpDOq7Q-5X;B^8Fn?% zD?3Ue-%Ca6AP%~k2w2ptR?T))40v{pOhi9DHjx02MEVO$kB*tiV{2f_!W z1qmA9WSMQiipXN|?y8lqXdWn7d~pv1c?3v1VX~b;9({nY0K8)i6vKaMFP;fjl0KD~ zzPEX9v+}y)_UQ>(TItlOlO@UGLmmJ~5n~OIPgwwRiWEtUG;!G4ruk7Kvp*LeRh}#? z@#7HIL09ei|Jo>vI+6+bG-xJwyHU?TSLFes0ks3(VvE0LI$>`|WQ!n5~*08BjO5?%Hd& zfy8wB0$~?QNlDrM+>-;PL_gO~m6U_=0R`;wr=AS37ieq1*Dt@SzV@7ad_6lr8*o%p z20Zq-jbkEwB(f;r@$FCC52r@C2@8{jP#vmDPy0V|K0JTv=e9BAwCLY+B~REy0Q#S- zb;{e;d6jRB;+Vv}uKClr$~Q{{Vx=pfKWadBYw!#(4~JAGkgYl~5a_%o+PfDGx(c}R`b6Oz1sG7-CZ+0|@H4g-<*o)XZV&*cQ??Mz)?a( z7)geF*}Yr0P+U7%JifQ?ctXI?cWeygu)u1=7(#k&=uq){yPKf~lJhcr5Jifd>UowM zr0B(DoFLu~S?b=q?+(vB|GbSE&y#rYcCb7$W%{Iu)g3=`c_IbuO4&F4`Mc(%?|FrN zgkU1pfpkpJI1~8GOoeaqvuf&dlZe!{yvmFfuJF@OKNWsm&Y@AVhnfl43Fk++$t=qUI&Gdl2NV}e~ zu|y3H_hbktB49(j_%!(MfG*_FH{OV~2b<^i(y_&PH)+Rz=ih*^*>u-!)IhHITCr{+ zbi;K}G|r9;q7U-&k}V{Xml@|#TE3;cEt($m<{Z_D4pQ&Nc1LvPIcJ|^2njC;XLLL{ zOx0-vi7oWb&?!Vx>qzN8J~ikiGYEb5`R9eFpLsgSk#daT?t_XxbwrnF-62ad!1xwj zDFkxhUA;bRSh(Y^yNo=6BTx)cPg2>Eh14fa_}E0E$(Koh3IJqbM6Bh%GD)Nlc#BLT zWU)G0G2KL4qW0Y8*Jt{`ud(44M3{6W7C% z-eiZG(IoZnGQ0ZZ#^~{b{rl^rf4=R;X%o`L+W_`_IVrN>vvZGMJTgl=t9PWsA_8)} zT03#X+5sOsY*x-V?Q~g_iEOkWR~BTTE%%Zsvv^)2?Z~_Xbdsb3zLM`}4|mUxH)JQZ zHEJ5$K3)82(!>uxGUSsJ=gvNkIO&5N{J&R^H3aGqHeVl+<~6+0 z&Khs^Ri1mJ-=vCFl-jMwrD5{?IfhUfE2#0XX`U`GBtY7HqTlXxVLbU9VNJrJ_tjE; z;#qp&X|hI*77||j&DvNrgb9K4HivwNI?4RmbHe4_dzhXnah1=9D#HT3r`_}Iv}k$p zvnPTYhtr}irnHOsCxF3`E1;7Z0Cc?4HgisRK^Q3AN2Il+R3CoTz(O)~@zO8sgw#O> z1KMa&`bl*3EB|w)85FqY+Mbg78g0TCq(!>%h>o-q1->Od0W!pJ8KbT%ua3v`P;JZIt0OUO=pWt1=d0KgZStpx#Qf9pm~?Bs|FPj0~515$0x z`gKF&MvW~w7B|#3x?I{l)NWAEboB5*;_-8nXWDNuEY4{ZD?5D98?}PZz1LnZG1LYQDjob3n|)t zvXyGPEnguN|!b20ybeW62W-9QtQ+@@%LzRpC5dm87S>oe8`~b8O)%p4sPyk z5yly!P)dS?O=R|sG3%;F{*F3hY@u46TClB1C_?WIVL&tvC-of}_=kCTO&`_gVFvk; zlp38?*-wjCM)P2hCbXz0QYHop1AxIoz$}tHgiAu}mhhp}J6;!QphFv(m=j3?UL=As z*r~))Lraj<+i%L`+?nCnlTHkM`t}V|B~mXki)HO;Gk8cleh_6;Hp=4VVc?v7_SrH9 z+)s#cu8~B@9|t#WmO2b+vhxziFE>D$eFu)OK7bU_X-uhBHN5uv zP+K&}Cz1ZJ1foCQ9-KsMd6Y7l61q<#el1>eqBepE#zbi=h-cRAzKh43gmkl8={Hy}l^%U4HM`N}ce|7`d z+NtuQ-qC{m*Q{NeG-UW2w&16C63VQMtOJlJ*<7Pfc3N023-VJR8~;z+UQWLR9q|H?mVLo&r2UOTXeE^fQ*U+cF79m!fK5z)zb!+ z;<5$+`My|2vWfj#{u)=1QH$#77D+oNA^D8&O&@Qu*qPigevl2AI(>?%*dVC#zI7b(z?MZ`|ojuT;Y z93jU#`TdcH9}Xj<{6@u#(Wtr43Md!B>x*#U8)EeUJO^;62Yezwkep6|ZCkg6zr@bi zTN^ey#K-b;hM{?3bSTRHBPmccgD<(qw6oqz^O(8~LOKFN+Vxl#~=hj+_`-h%qttmb_pyAiH+y8d}IK6-wS9 zN4`Olk#s3j@q-Sw33%TF_sgHe`XpCtmMFz9$Z8_^$hHwF{>=gcl(FVi5)tWWdUmtL#ddvjZM~o=)%Z zeGZrYpqE?b%Vf{&zeXOj6J2%^&OuzpDAWN2uW*z+U#cmTkGxrYPd%kmSS`hB#zH{^ z+RgdFESwIUw7>fNi%>~Of9}HhoF2WCs_2{jR7YJ*C(%F2x2y6|b`dhFuB<_3jR!LN z{)d&+9EhQXR?2eY?C&0C1r<08$b-0`hpq@2IrWPgeXD3B{ z`<JOUP&ts3xagvb%m=69$JAmx ztwgST2f(H{Uk3o0!;2xtr0D!wJbytqI7ZWh^JbJRCG zuPFT~^slb0_4y9@icIpc^xj|{@-dHelV$+0_;6)V`y$O^c>Hn4OL6tVu>8xf%n@ia zo#Y~<)(^tZn|=uu%9b;=4{VCgoZvk}&i~7#zlNE)sWYd``0=5(81mf94ryv1kyYAtI40bA`yJs*G18H61k&bZ57Yd> zJ1Z`73~BEry|c|o5(?2&sZ~QK=>=xrGSRfsBpv(f7w$-tRHlFVmh>T4!q-C;1K=6r@GrgcvUoZ5ja0?6{08#y z@ldu>a(GCm0XANKYHZf5c_^djE^L(F<;h9<-cD{3ku(tb2RP|0kov*KAAYdLRQp%d z zMcJMHBOu`S^iu=F1wvZy$zseoGQNH4si(3CSpLf6a)otDXD6f>mysb6#d%QACMy(? zz|7V@cim$oRoFvYv|%#G*az%wmS&h2tSgi)Q6R3@mw5^dl?h8Ho5qj&4N`xbC7aOQ zF6m|lN)QkPye*KAK<}|mojR1A)8aP@H!A?~NkyLI({E&D&8NikBUv?|lTncZs~OE> zmo->?ofr)9$ce$sF06_qt~zTlzwqpHVaCjvq5neh~6#LZ%oI*d8B(FRbB$`x~@mM4- zIRE@07eO`&ee!x;JOSDUpl{;)=jmsjLC?!{Jbf|}kMB{#0g#>p(xzz-Gx+hiAVJbe z(dpd*i9Z&-L)9O&aee~jpiSTnXPS$umsaJmN=zksBFne(_^<3R(S?LzSNrk(QNTlsF2p0Rc$GL0%eAZFLH0{V-% zxRJwB%`G?GEb7v}Hs9$-0&52S1#-N<-vhR(lfO;|68R5TJH4%R7`*8YF1^?57{9Ss zv`j@%;W$U9y04`?kFQ!t{H>cDY0ece*_IpdjrAw;`^fqQuqFaXWRDm%GW5Fhj!;Vi zYP5l^WxC>rbfN?rp}1)6q5++x@gLO#y+t>qU}@#|&mM;u)gWHx>bfr%Z6JS^-|dxQ z`aYPwv$>+&%8Bjf8*d8ZC9pG2$f9TW%fl==H=QUc$gf29grrrq(h(-r+DZ!iEA;Fp z+1G5-rcLP3@x*Yu%!;vt!mk;UT7c)p0^<3_BLq%u{0Cv7w}->jS+aC--l?aBZ-uOU zpn7FU=a=;xY*F`E{&uTMVfq?O8)^Y#x00 zA@k4Y?N!CMjWxY?3}BqYaqQ`%8wp6+2cO+8^WHCCZnTb&cAw=h?maM!XZ; z@_>{8PH97jy%BJ8mR&)N?B-3~=~Ma~qtNK6v7!1$Qk)H&HVN0=c1z6WF7a9)-P)GY zf=W*H`fu8_Lx9W2w<8@8k?QRjsAcpU|L;lU`Biyp*u}_s?a5%vmaVBXXU>H9U}V8W zMmzJ24O*eXNdXHoQn|1`i=Tf3=QMtQOdJpKvSSNpsXr0 zQk*mN_yt~xq`s}^HP@K(Jo)LQA=|runL}Yz!te&8_*R~{q!PL{vgm>Ph9wIYhHu3a z28wF424LL$voAgmzla5caRR6;jNOAIxCc_~DYY}Hkfdr3J4Nt5mE*l=`O`)ImT*4& z>~@5bi#j!oe(Q~xi=^`IQw9`^Ggms@Cp3DI*a__aK)ppoCSPYvrZ5rFAh0LJ$^-8gcza%yJ^Bqt}^m2Y^S84!p3D+w6@&KIK2OfBljeSARZ=XoJPyoGn~(IcF9;RT^}+t!+V2UuHh@~{<5`B8@h z_CG+X8#BTkH$?2xQjDCGS3dmA39(m0`fOp;@DVc8SHVti{yeX*v^S^3bsN4law#@g zQQT(nLAZjQ`1j)3uv4eL0Anes#KGpLF@7P=>$2q}Jw~E$T%b63nLRBQ)0Ape!yNH* zH*MNv15Iskq{2SxH|@*vozLib7FXF!Cuifv9}00oswa5^C<9rIANq{b;=Eb2!y~=> znu7qIrPmkCYi|R+cf|j^^7u;MWcL)+SKs`swk)a*`?h;}-)pQMjE~c* zbZb^?ILSRM%Qc+t;73lLJ>5JfFiFt=X{8e%p0!(uK|XWdoN)D(R~b{jgZwa}mo|9F z5Yw;3`h*W|N0g2%IXw_SF3lx~v+B#`W(0utAP%+^3maoDu3p2vU8;!`5qlk)KT6U^QMuXS{`+jya2pw^`?HK zb+WG1pmwI1ww=J19z?OkQ7o&1|T)FRR2;g+!8YKwxGWy}X&^%G`#0iCRtjc6R8@ zbd1o)>LVHg@*=0jr=rR4Xsqai^e$>2BvwJ9YZZF?w-M zI&{9IYrt(3dFvbN2gXTOW2LCl*)9qU0DA#|^G{OmAHiU&ZpE069s5 zfyDre4)E9@gVf4N?go(%kZOh0dR>nz?F54o?@D>Cu!Vs|L7}uH(?F8wBSMyV=CxNt z7l}1Dk(BFMV$9!p_g!K5h~Z(8NFnGZ2uVF2Qn{xfKoC$c24S*W#jdUM<3IIP|DIKpG1N7^Vq-2 zC*}S+eFCyg8$Oa5K|GTCsXoy(vlEZ}sRt+CXZt^FNYTcN5@{&}dH0?7$k>Av`V|Kl zsf`2gA@??F*ub2>7DW50@Tm_hT}lhC+;erH5>jE|S!_iD#x z(LA;;UAEMQ0n(TBNoK%%%0KV(Ps3>^bqY|DfQ-f+@j+O6@5o_l zFF%B~Yg0a0sQ{du$S!WV#NZn@Y-9+>-X(w>Xcs9BTUtyUCKTLX$Y7MQREa~wr#hja z(gEbQ-AKdRjr+E;wTKk&$Wfyt!B#u0TffdGW5ibMpSGZapk3$%*F2C@DF-L>sZyfu z-KS6ZKpspuiBccZ&j~Q$H+;7tb_zGiJ0T=a5VS>p0b|J_B_+jfd4{?G(4qY?c0~~9 zNa@kur}|CYRMC(Nw z>{ywInGIIbBQD_a{Av#q_#wW3!@>-}`>Udp7y-45o>yHRrcR$0M$5r2x=sK-BO?-Q zTMZ#`653n7f}{u=NOO(hZ5p=~>}H<=<54&+mtZPAlt&6+jr+eF)teDGKS zAXbmXj#9Fn$oC9cA5Xx4rWPR>txsXyu(VtCCawdE%v$bF%Q(3sZD>2k5*`sHbGTY z9tq*Z#(?tA68t2eSwzOCkSEBPwR)!VoDq$m?H!lCbwQljs>@wUr!V!9M0^s)DPERd zHAYF;%%W-m(q_~B_zTa5$3<1?Brg|7ixR=wRJLpxdtbCjQ4=^~4&di>=B%0Fh?>=H zTtU@BpB5`S)Z;+0i=c!ZED3nL%z%_t29}|{6uwr#dd~)xEhF_4McFm5ry)ig>}tFY zG!Lhgh0+~!0g$|CGA1S7tRVxkn4r9l;ynC07UR9uxys+N@~h{|LPY?0m-fBKOYh3S z0Xz-9Wq0L^|ApsY5SD!QnRdw^hoK`zgtcGIHY*_2B$8jaNMS<^C_%Hx8`6?;e@hUh z(&ZTvt@riq8xAdBCRDDFY-A0DWL(OxYE{@Yk^B=WHw%?ZpR!Q>B0Wv)IPR#wB^N&b z;tQ)oB56jx$oiMG;@gQYI6*^Jrp?Yg;|!b7NSnIzR&+Gk{no8hCnRel<&ayX7)yr@ zxXJ~18~D5Yyn0?JR&(!dL!JjqZKidLBg5nw@_wSesa&I)ZL;L6FJP;(rPPBynmkG4 zK`z6h8?*c@YocjWk0Kh=0OJSmAe6UFtF|(T|Fap*a21=0lRhaV#K^k9T6TlND7Ar| z8g-3KMm@|UV3M+!AE8j5G$jO=guw{>2fE%K&ohU}IN7YtX*R*E!>fHDG3FHUQ+~$D zeM+afZcoxFZ?iVhnnGgF41C)3npsOgcC3dj8#Xb-w(P4f!#mob?*woJotUlLvXxUp zIr+F{W6IjY=^hm@X3f-ySW!q?EM#$}YzO8Qu*mq9gNmX?^zN3*G*}=+& zp9!9zGsww_&ZkTS2bDFpSj9j0_vu~Z)9~*>nk)?M+qH|jP3msS?$GaFdFf?KrdsV< zrfARi|LF9@2`WAQL<vXjuMQq;CsuSsL7EQ4Ck@|_ z`12fJwFG0B8SpfzZ+-&hr!Q;Os~bMi=?aABq;m3QOyZ?|{mRv$a&n5<#{6XF4AU=V zGg(1aWWHbXjr1KWg;b@JSACQFI!8{1VV@#qxd%%skoi!jfv?`Yc0KXk{tAtnH4TqE z`9wIi(3v5mGG3jsrX5BY7;jx}%JZ$wzR$%&>Qfa`) zD*cxQIh9F9Ues?wH0>M@EVYfAD8`@ctn?-;)RCR%ej>`3tz zrHugs)`#36d@EO8{X~u_d6ioJrsE^7jXI%(vf8fWf1w2n7sULu{`7`?a*Kfaz@$Ddq0d5k{{uF>8hS*k-B^%VPAI5(?!Nw0`tA(ugZ2jq1 zlWv64fe_U?IkFu&dX%pX)ct;iGO9Z!(INvh@9Jd3TVn{r;lTEW%^5> zFbA90Z4e%NYJfQ*=L7GvbUyYewN&pAOac5^&(pi|oqx*&BJiI&i;QFeB#u{&PlOmTl8rC3aPa=2S*^OZfPlQ9_3J~+ zB0kNA4H_Df^K{de!SjxPj%1zJfp+-n^QC6l!g*Ef8d2ii_Tw*R4;KGx>7^5&W^V6k zORocJ9SGt8l#{#@Y3rPnfkhiJcO+8*PWKI(G&WK{@4TIIl6F?#P+v&mz_KsDG!HP| z&Rh}0H;9Yp{8tt&z~kGW0`eoDi#C21&Yc$|^=UlO#%j}`lNKaPHf1c(@UT!tY1^i4 z;l_;`ZB<X!rJnaRBP61lj3lNd_BU)mtE-Xnhz|d;2IDjee|%xy$XL* z20h=Vduv<&es^E@2*jvHQo9+`yObKT9gQYogF+Lun`9vdG3GUR$1A_sZjNWDSP)zy zlYEJTolU}bQcA{^lSIkGb5b;)AeU6fckCEWlHOrc=@6pgfaI2^&-cD?s*YJMQt@%T zY&>I;lVyz1@~D>Kl1nZz-YYx5CtrFtggZgrPK}2rS1>gcj2l4OOD~W3wEUiUfX7c;E{~V? zN<6*+DS8?il{@XXIsFgvt9nEg>3uD?Bp|If(q`jLQ5ULLs%+$?M9ZJuGLSAR41jws zh_5eW)gZ24y@y>OhfPd=l`oQc_J6(a_!2`7$96bYrWUJ&2X4R1tiWId1HL)&%89S_ zfSg0W^!)RMrH;e`d#0XWtvWrad)BzTAgR)9N;VZw@<-5KSU19w%MoSE8)DS^gn(~3 zc^xfEO~3o^4=0HFgu!s?;{Cuqp#R=oOQMN$;)?9rqYrX=QPPeae^(l(D$iRKdh2;H zZS+zvy70m+LT-g6+fYcJLkcaDlrWO!3uRhz%a7Oz&@r=Q@ld5)`EX>@=AnC+i$m?& zwNq=?u9Yg1>~;y?P>G@nXbu1)H8%SyeXEQ97Z}nhb%}>P1u^ z5yBucCphd}s-a^8E54GsxmSKWrAZzHz~6CYAts=9fno4Y@_l!s20w0&qyTIr&jI3x z{3T@6T)K%CO^D%@mtHcG zCg$lp-Lz??{qJe}mHNfkH*Ng*|HR8IpZGGyf0y0+_W&C3NC zeJ8VOA;ef{?iZhh`c9G)c-rXP$SZznxYY*@nqsY0e8 z4}kPM&9wP?o_>wC%TC`QE{$zb!f6sl!B8=-uEu`&L8yL2)$p_IqaNMrXp{0IAJ5m* zqO3Wpf!Bvk{-MlNKLQ5@wypFhX)b=t$!DP<^4ypC5z<^HW1o{ z#+1~31CUvJ@VzP%Ydg}8ubbZ!&*H|t=Ux1BFIPtS5tny$>eMxl0guX?1^o!=9q{*_ zPJUd;$vIHO_3~BMuf78mzU~;PVN9M(ExZu^x2M)Y>#*Q4|Tz0 z7`(2qjhTD6j-;|jTswe2;v4T5@`bT#3< zTvW=3HKf`Sq$7RRq<=R49E#Tbf9#zFpjE}z$LDfEu|+^d@*-W5A}Jtte8#h|u(7+I zg`#4ME%L-fu`s{@l?G`{6p*k81rsdl`~TKC>$o}GF7Ac1VBgti_H?b8H7jE3&T>kd z`DkKb3P@W+VQF5!VHi1fv{7XoF*287iW76;I^6?1&+ z-g3{ByPy2fu3rY`wI~Cd{4xc_VJA3K@!nKj;1ZroK>1S_{VZ8zQ_7YMB_1)-m^7r^ z?e%n|0$KpdV6~!Ut2Soo*%$8Q$`!|Nl`WA?^c_!?*B;*a#(gD|n+HV6ZWr>`S3&@9 z41LoyT-1_$S$&-{GQ?m587!J+Zjb`uJ&xzkZ^&l4g9#%gSPPe8L+x?_>`4NI^?1?W zkc-falN~HsvNZl#ZJ~6OP$D=dm{yINgaQ2@7t_)i+&OHJS0b>dqWBp?EGXu_|G@{X zT*phf@!o1PlV?kcK-r^^s7It6e1~Mnf&=M&TeSxw42iIj1n|qupCHNYd(K;p2z`%umFqgZ`$Ov(w(#4E#g<9S`{PZTRoE6 zF_Fkdd;vLz^67zV6PTz#E(afUP`F*B)2qcj*-auiAbF^&F!|JR$|UZ}isPMeSxM9!^dGVhPD(&^|=|@O|PZ;&J(PWyCl3ia9jK3_zbi zVY>O2TTHjIGf2fk7TsU(-&OT{2R)9kIcff;L9|${ zJ4@xCs^63Ke4;Q0ibLC#DCU}Y(Xw<(JYrPI=>TZz&DGc$)I`G6WnLEfp}T1Fz`q(9 z>}QGjqw=n|z%n^_0KRd0!bBC%f%1cih12AWk7kB9hYdAbhp?#E*J%=+q)ZwIkYsZD z{>@$g4nlx@h5+v1mtP8dwr>|^&zmbH+h-7LC?#fm#gL3l08FF7xMQx{|gFFyA?6Y=TL zzP)iPA0lS0&u4!TFfzM#RiRu9MXQr|rwl*-{;O?%*nBxKM$lpczStJuU2itebSP0hHUs@WrcYlD-1^B$8roOM+|Y(hYJ>cXXE67 ze*Mbobi;r=TK#j8(z;(RjZ;wa9f$!~t1AW&WVS#Z*c|O7ag#f*ze$_j?PObWm5zPb zV%*lQQ`nqx>O~e+a+y+EaEYaf>A2zsl{ayERNia#d$HU>@rSVvxq*8XuRHDeOIq2o zv0Z|GIKS$|0jxsAGNt!^^^*g1(;#P&r zZ@&W%2va7081}#YPD2KSjh2h}{u}OFL{gH$M9kCo+@ z6=647Lyz7sQm((%4nWy-&6U5`O50B}3xgkZtzp90s}g?rmn1+G0Atz^5XZ=T7>JQR zhLP(=iAb?2ern)ALykY`y-m|S>`q%79_|~hDr$$j^o&vae$rQ1N#j=1>b+^_g3dWdZrmm3xV~_XOhH7$nK^&?8CrGXj8Z~UlV!@7z9dXQ_ zSurefss}|gM5gkSw;fL84}xQ-wc)W>o{_M`FLlUJWllheMvQ7`9~cyPPyT39teY-v zH-M9gusc^8fr-QeFoE2~4_ZDaBOWyT%#sF>(Guc*=+Q?)1Cd%09bxi-Ons4N!SHua zmoz~RPd)u~xct(~>@MaxHqy-6#s# zpM3TiQ``Xc#O@z;LcCLI52-Km#$CJb6MhkgbmG3TNt5)hUAnAN`yeLpsGjzEhg>Kp ze^ypvDG14>S?5RyCa%3WwPnxgRvt9 z-d9E5UPYDxbpza0eQmpy!_tm&$@iN5ug+HF8JeCqd8CKI#wg@FOi2R%h1m?@DNVz<}@{X{R|r;v1fihsn$(Jx>$F%R8F}NX$GAER_93ZethEDR-d|CuVO@ zlCA0(B8G7TMkwxcwnVNp9+`A$GmJsse%GDhH0fJC=)i*#3XwgJ1fp|~h-(^uv|yrf z2P-MNDUU&7IBL+eNs*owDFTZwP^(5wbEyxDH}!vK)W}e`X07m#zz_x3-yGg4#N{1;ScGg)kVk=`9cus1)?m- zKzKP(3Sh?BOZB@{&pmpeIOUbJXAPuw-BEFE6Uh7sl2^}d+qG@d%r;iEJDU>vh&xj@ zF#NtkX5`=|Cq17VDz|q`taFPQF;Z8Dcv-wW?!8I6B_1)xTfLqJN*xwOae}44``$ZY z&z5^?@YqI1h$F^bw<+}yb>#t~69vYJV~##Lw9{$nx*KjV`-Gu<*>L5>mxV(QIW&xy zLgSHRMvHt?&hD2`?)~|~d8QuCS@}s5%{_oGqV^;H$0sFsaWIbTsGT|B<%zyyS|S^mkE~7`c|Zoi7CskoaMGG6oV$vGD&;7MJ6N#2$su#QD5* z!PjEAX&RP&`@KXRjxz3EHcOm*xgUnBvTTExoxEt=lJpMZ|65ccWN`<{T1>aD-HcRC z@6t!2nykB}PoFf5-#+&6BX(lka*HiZ^U4#V1YCFRwQ2N^;2MpI7u9Cp$;~U5str2D z+krO6ss?kbie!U*oaEGI%s+tmJd96VEa+T+GIP+fiAU1)-*IrVg`R1SU`me+y$?{5Sqs(@`uU8M8Rbe@>H*0 zJ=~+6at~Rk�LZ?tJtiv&T13nkv4~>EJ7o7dg$rG1tCnbK6C8|1^I3AI4^573}O8@h=SiB3y>lU7~e?W^GHrBtFq#cR-j>TykQnbNVh0~ z4Z(8VtsN}VIvWiDKala*apOX{N;`&czWX-(N4E92+l|8a-+yO6IkgmZIivaJ=bf>G zf(oe?3lpX974SaNYnsG5*4=W`&EaHGRK|=O&1c&T-l)x^^!0K=giE!e9!`xw)&oEG zJ*&TFu7&da_Umt9o2|CC)1Hs9tRms>V~BUk9t9tZQNQ@Rm$RX|j3Y%Xwdgi*>);K^qIezr(cj+8eK{ADY<-+0(H=CG-y) zJT`qpWSn~R=+TrC&POJ~3_a8Jl;J+H-`)4vW&u)h1BpUC`_$8HcnF8g9hz&7)dOTc z0b<`W{j2d~6OZ=Fjr?M!i9|e^3suC?{nFb*jl#nx@1uAy%1EaoCeb+J7k^DTMbQGW z)wL*6E}S%NwbVe_ndGE2IBn6Q#l~d^m!n%MFTYjH7;WKXSvpc-VzS5s`$~^w*k%h; z5a0`Rv=)T*$#$~j0<$|UdN^F{86x5UkVZ1Vv5MxQt;)w#StWnZ>^JEk*HuX*RYXSm zJ#c>l$$8$%@uKC2VF+?33bVga{s49Mhp=AOsO$b7j(BJvxs_`<{4)g2@m^8V-bKKN*P`gDHqP;<$tWOduZ=-z60ZQ zS|1~lc12x7aM~{oKzqW(1w+$=x=R#ukqt`qR~wc8Sv_CrDT{qZHYig+oMam+>h`w8 ze1wE-*P#9~0YRl_9IAZJEvWrsI}b%jyyP}=CgEs%jE*DtkD`xEfI4~ywU2)L&sy7IWu`kH5cVBkyg<*=c0VS*{ z!~+rr4KJJ)`-yAMIXQD$v_TZxZ7V~~LSV_@i>u^G?NfSs)+fH?xcnyX6(Xs^UXN_! zOVV@c{$3~Zi2U*L@|}Cnh7B8p7p3IkRw-eIB#X%BA5uDC)v-#qnzRT-n~Q3x^>JZWmfe_j$e@92<^k* zvv;>{Cfn+1eJlm`D8TsQlTX8dyY4fUfc}kk@KTJ>{I}0rBRJq2~8fG zV*mLBahEzWahO^duT|I2OW#Z(TMN*l1Q$CO*_l?rxFEN5TRtHZ%(@63p9_5GY7LkY(71v?k z1NN8ol6t18Mk6HVEeA~_KS*?8sqSJ?Z#Q6vdL-Np?x^}ic3SMPMyk0OA;rlee}Gh=XO0;O98-|(|Vp-=7r}5 zg}R~$EdO>z@fWDv`1wgvuPG?Z7AUcZQ3F8zpSbY0BOwbHm+o6)V@||`)f!xg zFMt>Tg)hvu*(hl-A$S1l<8^Y^3&nzlrjPHXPxL$81&$L3V!bAf*1OkY;Vh#~#85?>3#6x&mAm`ywe1t-@%)KTy66NQ^y( zUd~a3gR+2(&@uak$f?|)mD{46$ZE^%j%Jo@_rY*fai@wJBD&99wK^zTx-@?L0 z-E;C~l7_bnWJ&CFB3KTx_qHNTQ9-o4+%x+eV6$?^ z-Qb20E|OAhlyK9|4v27Ebm2vzU8jyFr&=6k^}GwqWgWnx!|LU~geRYRGThwzCTS$I zrL2$gI9_ejL-nfBrgiHz|99jO>0gTx379>1PLQ!&8Y5k8(!@OT`G>J(R!#D_<}h~I zIMuDayRBW5r%f>C&d@SC(Cp<-Y8RC;Oj!CbFZfv=Pc4IBTFHj&||(Oav?iFO(T0#n?0Kd+Tjs zpRV1?NWnNX^a^@vqkk`4^=DENZpu7889fp?PgS4+bS3nS@)6u8g_DLyO}a>pKXjKxWsXql$to*ML1hg!37zV1uD4?oX+k95w-&|iR z`exB%dTP&}#;r@dywriRQEmY3Hnekbl2FN)y3 z(sq^NhlO7+v{UjHLYV4rWEz3>CuYoVy}t^pE>CRIO}AcIA-Em3-yy6J=j*OwjLm7h zdV)UAh~I9ncH#8ndV~k>f54DD%Kdvw(EoT{d0@~yDNR2ic9MU2rSq3u?E_5>nj%suU7*C!$;i&30hdJ9IVoYG9rBeUO>4+Pt$-Cp4| znGKpE(F3?Fb1F0C%?m`sQ*IC?;PuXJS{{C^lv-m60Yp<=BvdaiC-nv2eiIHm;Gj?` zZg6;#L^Ocj*IjS-!?Yi5%fqJW>FP4<}xj!^V6tQY6)<(yqDs>UHW@ z5J+d$**4QV@3KqUR=ZR-iUEvDFz^9t8|!?9Q&S|6o_)*W4{H!iFTc^ABG<1&S;f-t zzYX;=cF!WphTg;lxD#eBpW5?OLyVASQDgzUEM_#-#pYI|pBypPR}d85PSN9jdQCxL zwm?ZojC4G!pOI2o@DX7DMT{Xa@g?0A>8`;Cn_ddDZ3;ClQ)?zf0!egUK9RjY;Po_;1&s!>fOwKq*g z9KmuzFrJi8LdcESyF~RCq#yE}K4W?~>zs4KtOfJzF0HVTqUQ}V#&GUWD}D=i-E+4r zU)*hNl3m%eYYR^|>L!PeB93Ud?A~c3kSgTZ3opAg)Nj(*xTC%8vWr`o-@fqcD^Ap>Xlc^MxmH$0P5@I`b3*!!8m_YnLXc6UNcnkK#F8KX z=5=Jj^F`0^*`Od@c)|JMmYe$+QuH{4%MB2o$&Ncl=B?(7L^NEoVwdSY5v15&T#S%^ zAQ|Qdq2#6vEMg#C?k?FuQx1ptdg4DfnoE1qM$NRbdFal&!^E-U5}I;~t5CE*qDnyZ zYSqkLB$1peMAk(CLbNaWh`i~|GR#aLA|RjJF<}Xpfy6Jm}NPG$A=y0zY89q$XF7bN8tN(*>H7fND9kcgE} zgYgH_)}UU8cn0c#eZ8+#DMpfDF(}DE`LcQ_HxrA8MKAgBZ@8WY$TM1JNS+zBGAu6N zJWg)g#M44W+e<@UHBLEfKE68ijj+#t`^Fkvb#O$S+jR39j&g9aZrygzaPpaFgqc$R zX7yH`Ha2+MQy#d3hv+WYNU3qAr{XFfCqfvsFgWdtGn~tqA_KfHDJOs(PoP{}tB?KI zg#Li|&@|L#pMAo^4?buNQC>a7fou~Y%`(-hR)b1o3x(P!f#5uBF5N}(aOh^uWt0tr zVJQZnkC?A8y4@t?c$W+Vy!Y<#u%FDbJ^Jt?;Rg9}{ldIcSi2BaG%`lp4^YzA@K`BJ z+24}hsIi$H!guj@P1|J`(++2(68zXHmIhz~Wc6fMeik`DLGtctfINBkMUXUj=iys^ zVXD-ceg4tMMkd;4zx_g^y7dF*b>K$*@u%h1?&MokK-w^hz)6Y$-OdLeXjV0-6Ya3E zz}ud3pi1tHv(E~{MvpXWA(Xc`0a2wnQUJb3vpo_FM3pO5Hi`u0pbdR<1j%=>1o|88 zA)TlajVa2$E6z4Z&Wn1%-P}p{ja(%l_5l2>v5QCrfQV0IZhUK=xkoI%^Ij4khh;=c zH9wGVZrhcxz>XDSj9n) zzat(3FIVhW>TjX6K>k>xhtgG2xgv zJU;n29V}e9P*&^bC;U9DhsjHuI5H*OVbYpXr%^+@uSD$@+^xThd|E>!yWMMLBy@@9 zCH=gHr4ID(F6!HgA~{`r=_RJhjW*aA$kCAu^>{}*HzA|Wm07R0O`GLghBL2i;^D2Y zbb0`!@g*Uz31_N{r9{WTM<0C@M!)}oA@JNPdiT6an@iiE^rJUcjCmyqVEu0*x)uaxkhn8 zf(*Z!_f>$y@Ds_bz4^+&?Dx?)o=$FE%0YbO;GP&TAcXC<5|dL|6ZgQP1&b~WLrm;` z_un6|ao17zfFRO>pTa@wPc^Sz)p+^r> zN5&l%eG~1s!pJW-X+Syz5NQEG+*q*pzMwxT7YryPw6QT%5FcwE#v*QFf2bR}Ur}uF z&ksL^zPH~V+dS;FL)fK4#ZaqO1~rBpxk_QTupg2Kj5dS5`I?C8IA4DV;_plK4szT{ z&!EQxYoc_}?fxi-(^iNiGEiYylqx1x%*MqWfT6qyYfwqd}$0 zmDjb~qeXas_=s@mK?f%Wqz|wnD@g$pW?}&DvT#x6Xt^QDxuR<Nn;3eZ}vg+rHf;zHygn?>Xk^qZ4gdRArzoyj|D~@czP!gAFNm?$*^#2x#n~e8mma z*-5u3w{UP-R(?}n!eNu~;im^m15Ug3ZE|{~P0*#@a5tmmVE6~`*;8J3t4AGjsO}i6 zf`!yP>lnl@YeO+h;m7HHw4SMYXPsHCcLRk>kH13yv5C47tU_Z{RRiT!!)hX}wW8wxKb9ofH{oh(O(S_g$tSeg6Xw3_mRUF?=$8MtDuC z5<#kPhs*eQnh@h!#oJz)?4<|97{jg`HMYiri1Y4wH6 z)49wTXe9|)oOl4r=H+u2eZON|mJCTPv9(9al zoH~1u0=bltGGU$KN^zh*_|QY)oL;?bqIw;0jSM7?(K$lam8w^@MIc(20u>^g$mFqN z#Y(F`8hje+t_9gWPDx(&!YYT?m&J-S`HkWQtbZVI-BHXfAVhZeg_UPERnPR3EubRD z!MO{Eb9V5NG~N8UbXnNC=^o)X#qpD7c5ya^Z`2bFQkc}@^oXut)Z@iT&1v+n07c@h z;?KnzPs{bpRKHG;-$*^v^!|rj1C0g9t@29awE3kx4`4HrQ7a?-`NQ&%-Z-{_(zkew z^&RrY!p&9JTy11g|H>GnPnYWs|3+~R!#uSAK?jBpCQS^hfBH2H8#c_$q7nY`%Pyl{ zWB~G}65@&rFX@RbGVm&>;x#Eb7jY$Kc9pKo(9j<(vv- z-~lL$KOPgZ*3ZBEY=(IkELdo&-ah{HlQ8sy_ro93;A0>XX45+LO$Zx(j%(JemHyAw zwh(hlU{U_=$E8X;b}I0+3*vgaz4i9n;iOYeG1);Ts*`s34iW->6Uk`$M>9iJX&d5u zalsm?yW65nxv+E1>LTU#3w_z`ADv3%HmYdc|XE=bYa zmo{OHLXJmm9^?C0jo-RbwN|5gbrg87)%CzyQ9ziqy5cJXr)T&0bEDevOFft=d0o#$ zrI$EK>h~9V0FgC!Cw>|x!0g4mH~i!6f9S!77$S5zOF|;Uc;ijI!^^L~YRWT)zx_@) z=e%=8Iz1~im-<4$=VKhX8)ldHekx-{m2J6PWsy=3XN^?|e@ZlBjMh&B*W`|v^l173 z+=#9QMX@hdn-{b9$-E|8PhSF#Dmz2M|&Q^UAXqdkZz{%*2+ov=m2Mz)(L zJwz3V3laoK*MV=lSF2&9RcsGN$!8uWFD{Eb?YTtvmyZm1JiIOAwvbAF5+xFPy3x5$2^Pi4-4?`Le%x^*Uz;7eJ(&oefs4WLz~9U5~SB+EM|$x3IqhD zg}L{_ph1ap$%bVaF;SJNF9iur8aG~-k&&@h%rt9-c$jeM>gNV>c`j3%&eZd~-k+EM zBDvG!?OaCGD_aD}$C01DuLtzmk6H^j3DR!>8yxQF-+KG4aP)tU4%MXyzC+vgVcs`i zhqvE(Cv=zSiC6r{@wq|1K2zk%EG?K#4|-8U%E<4LrTqVo_qz z=H3Z#1Zh8t_zwe;2cR7Oc(&}&f0Qb65PK;c2_eca#E5`FMuzjQ%$7XdDlhTi5e?Tf6CNF26!|iZ$(Y=#!9iNZ*mp zl&MoKX|$)rbk-K%T%@dQ7;06oVKxNMJmU;2%dt|p+-|SEjNF8pt!m<)1yM3KsE3!G zz<#R4#cUU(Z0>Z;R->36koPm@qjJIesf^?W=^K!8NdVd)J_J?Y3@cJ{|9hWetoY~e z7KN?q)-ye{C87KqqYShMa}2pbR3djARDQuH9RmHcb#Hi6@7|VFav5j`{IGog(vVle9<5}5QAjyCTz<=u zmw4m}as=sNa%hz_o_siUN_bJ+q7K#S)DC+}o!oA+IDvK{gs{1X=EMi9ytXuE({tYZ z1!2U5ak_(xld+XR#cI158JnFy$k6NVX>V*7Bo7NekWnkc+-Mp79nQ>g<6~r0HZbmQ z5jMj=j2n<6a*hD)xZSqfg^tZzB`7tJTpt_sjNS1mNZRoy^hn=t-~G~cH@QxhF4l0u zQ#xGgf*6k>89j`zv~IfZANca%(7Ivcu==mn>&*?zTk}3crqx(4UVHtuzy|Yeyie(kXdl0e))SLEjJ3BX;uCsWFST%CmP#M~rknyEe4tGbq=5 z_0?DD$ksvP!U$_&0gDIViwFan$AQxw`Y0id&Y3$W%+!f+{KSdIeodHkv4l61T{Vo= z5)Z_Q+!(u4()4r*8%>wrxdqZlPmgukFS@TR2>3~glLc(xAY4b5nWc->kEs(UnOs$I zS*Xvf9JC#M!NYr1-8FKGMChINh8#LmfB#Q0&3x*vD?dr1#46N<1XaYAwvQjveCuF!)+{=@=+d;89*C@JgP{4E(p`MH6S>B zv)+#PIbrGF*v3WoxzZmBcptfIo+aoHcU5yg|00}l%y9wTyFg}TWdUA1e_^OEb4Xk} zWEQ3bRcoUaCE6~?46e~Qa--YTMRCZ`)xs+I-F4@k_FQ`T<>~0~m0|@DP#KM(G{##) z-9ZCvs4@daASZViXJF)@K5zg4KmbWZK~&;m9z&w{d58g$Dx-BoUqR5}*ST}J{wIq>Jc>AUa04?30F9RT+2(EehemTDrlcT`THr2?`jyG~zJ ziBDRdUT(@%kTC2tf&Ap>ahRw)UiRejxc^LXq7IbS4R*O8(~<^WAIinz^v_zI(*KF| zH-6FY{PueB4kUk%`IfA|l$k{sXf9JllsS6B=R0E>zsXZ#N?q3Iq>_TXwSbQWHd{RW zMwta|f3NhIzVzCVFz~Vd>HBu?p6HKSwQ7aBjT`AMmwU#trrs^HTStbBphO}@4G4{y1bg)L@d~XT zF=9m7nNy|)gkPsknrz|&*;ST-#RC8ZLHfQVBSQRs`Sll_2){Q$dbklMPWmv65UaEU z`XxI^P?$+UIv_(%Po8FUU`O9Qeqp=pJYCl!IxvBP=1W~NO9Ow}jGgUsA}MXPGu-S# zJXwUdk0t1j*9pqPI-TaDOcDOA1Qj>OqEKc(88>VIv*h~V{m5PLa4~V9E|I&&(pcB^ zYqoKJ(XPySfNK(zmG8Ijz9#Q``e~=xNe`y4efI7Wo_+PDu!l(VoILXiY&dJ{g!=N} z7sH7?P6&q`dT4GDBfB5gaqc}qj45)e8lj+2A`zo1#jc0bBApE}e$BVveV2ZZWY}Q- zK(-IS>U$Iz#5@3tI*USdVL-^4`lj3Onp*%H|mp)NAd!m~sG4SN!JDpBTAS@e+QK!fniYQy%Vs=jndC=jo><#FR2C z6Ebm=#!nE*PAdDfsYyY>Tfp0y@ek)IRy2L2j9}f6Aw$xz`hWsnkQTrlAmD?Cu<`Kb&p-wdB1ic7=bwj3A~C)4B8nnn zP`g2W)2IR>WNifDvU79siD+8UcaOg@TwlIduKdI5?WftoET7ulHWuuZ2R`{^*lyR# zi8kLDbr)xK3?Wd2t&%M7W-^h_86UB>$NqDAS^4$Wo zH+%%ESFV=ji~YiFx7-?-hxf_`WSiElEnV-E++;%k(65%RHsEZ;;i4Y%!N~CX>#v)# z4Zh{jk08cXdeG!^x}K}_*rF*n1^H}&5{DR74?8W&JywGP`JuJ4D4%}uMHiW&RZfYn zu-6fzGva{Ievpc_1q&DG68rNoRdR%HNg#aLH{V(F)T~>_SfZT+6H=3Nr&)mbpS%q2 zXuP6$XHuBjJGqqCFt~<2Z2NpG3h;?IM#fT+>q9hx`;(H?F4@(a%>qmmXol${HCx=* zAn?8*u7sUCWT*$I<6HXU4`GQoJ%3xYH0vaH*d|n~v`g4oixx~0+%ePiERrl{KvA}h z)#oqW?W*5fw?d0q>{RP{dVj$&ecK}snv!!ic>diYr2Z623wWEN*dOG0|9$tFioyeA zA`0FI1o=@z$0j8^pwD2Y!lWCVuocAs2S@cvZRSz`2$K);xy_n2v$Plk*?@mMN$K9J zGA@;yV#3L$!E*Xkq7b7xQ+*n{Th#kG@jKF3D;bAZ5Z%af63(TX2{X6hcc-J zr$+9Vs^~dQPhUOwZ5;B3GA`{<*IEgBg=!i`9<1?GkpcUNh9Km7!!}a%pI^Pri%HZoqEr4z)#H#hR=K@xhN^8L>2RO&n^^uKF_t#K(!KB8 zS*Tcys2?(8fa@B~sa`O>pw!^!pMJI-K8nR5&A+Qn*cg#6YibdzS+$xNGYaQa?Ic{v zWg8^jYlYwbkVcgKje6jl7ry&Dl#*wkA7ti=T;zc7DJZ%YU@UPj*i4L0{Y6=+Uae}( zAhi5^9%Yp~8mzwj^s{iX7;R3}{JtwABRu%P111mMQVcxxw3#12ZA!RO%IojCG@&+8H)h|=>nH~C3gPKoP%F!0o6HvLfldeBTK_y; zoFEnuA*APFue}~>>a^{3qpcjG@9yWWx2L=A?=EpURipave@_k!FTF6>P6{BU`~qpR zA;K^;E{GhmEx&iEymNT*`RC0xrq_jhh(j6OR%DEG@{jx$Ar&u=EkOIzPpn1s{dQW5 z;M~p$paO8>L{;6}Zw|E$E`5mdkRflrDS6ddp=4kobTpiIt0%iHQaC56Jx|X|oxvQ(6t_`$VAr*{7ZgM;&#P-D~}Om!&St zdEP~{$kjXLpl!+LiGfcjkF})!pm`^Fvc@fEr7&{Bn_W@q_8C z=gB7-QuKZzE#goPM0XzP|5#|+qPd-5vdbs+t#Dg__NO1pRKigDVhr-oa_&@%ZN`XYSv*{62jc$efTzZBOobJEAHk5eC` z@!Iv$@ACQvXMgEA9TaZY6ezWlh8X?E8W{?>A-2-pP+E^3JyyT<#+&QL zrcJUz$l&nPVS!p*pvr=tMRTo?hIWz!Ep0uso7ve+k$biTr#TFq`idp1!%YYf-gSN#fbJ|LFD1o zevuZk>#n~(Ob~M9j?ddOH+hoA_A2K%jqOh2qCMrLlSQ&!ZW}Cy;`+4 z5;^u#8#ZhhuKw>e;Z5C1V#5_h_~aWOqg;)#hBnpHBVHC~%!&U@Ik{&=j>#qyVTD35; zkT1N2l1VWoLi=(bx=eDOvbR_0cd%X!+QtzjOkI%Ej+dnm0Fr)g+b)I_w-1v){WvtJ zkrD2e`mx_t_P2))lQoE{#_V&^dFPw`Nvm6YJosYBedkSmZw;;5?pbO#GTx?}lgs#k z^a}69ZFlz#m~;b}i@ru==-%;&!;c6L_PamS)tw_;uV~@9``){3ooLayV>nAj#c#X) z_VDa;&xJ|S@2a>?PdZnx+%cw3jrkZ@B?>W8es!cH$ItYBq#iE1(C7e^l|8f7xA*P4 zcIdF7;YYDL&-!YP?Y93N53ztH?Jwm6A)?hE{gs>$cdk^~xY=GFJXjOpaNF@iiXp7( zZI9RM#~+tjmGJ*oA_f&J?i9+4q+C#7BC@E%iFw@(H-!1$eWUi=KCy!>X!;u?J_u{o zvY(BVnqIc(H_Ytvfs9(U!kJ>TpCh?Rm@B;RvT6gqb5a~VacpScqE#3*;yojs9(}^G zp-sEJOq`^H)bJf}zyX$or-w@&ai1OZTv#}Ju8AbWH`Da*Yh7e zK+#WvSU#J~-x7xyX;}>nkYlt49obE9|#-S35m& z0%I_hG@wj=LPT55=np=SIiw2V#TQ=`GsO!sQu?37-4T<3(+(4ZcU%@g?oLr;E{SyW z7n`WvW%+`f-#U{AND=v|JMX?LJp1(1p=F~c_M5y*E6`>DcYu(qf#RqSGLDTz>RKi{ zh-Kj<%?213Scl*eDd zov1fne}gH#D6M4gY?^43|HPO?{0c=Q?+kx8{3^1!2CDV7&wX!%I&qORNUp30h#FZ~ z1z4`}hMra6-Qn*tagC;{adJL>N-K40q#^PEB@!`Ges$&=Js`-kdi0t$Oy2B*Zq{!) zzO3w|t$Os4hwW~M6WF%fY+GWGqsnLkymms={UGJrRaPHfe&v<$n$*x8bIdWea1$-VYaFdP(RfA@NFL#26&5)BAL{w69Kx zv$PR9>X@Sg%BrKl_+V^s!oOX2!6>bOlN<=0D;xWf3G?6e0R=2EFPRzWw&waOYijg}tQx0-df)mi`d76^EkhN<{|G zk*Bv+Ze8-_q=@FA>qS1jqi|Sf?m?-sdm9@^W}X_tLd$)UXo-V%RW^xjhMFY4J(B&;=;UwT>k{P_#Q zxJeU3O=-JvO9aJLb_O_$9vq3tCrz3#(MVjcy!uMmyYt?L3@HoYcpL%Z$0YFIkW+@s z)YUaNTp#9tIos4u{i5`K{ry)HPeF$#u!~NgOc2t-%?(8J2Jv2g)s-fHXF~isIp#K< z+~l=!!^9BPx}?RWsLB*=H`*9T?h?2b*|hy5G8p93#~$x*+^4YVV-e%O>hIk}Qe(sO z!3Xb!%dfmLbnetCo>r`Xy{|!{z0Nw@w5~u>i{@_;Qh!QK3($AyB*yT1-}~+{v5Y-s zBQ+U0qQkPyPIRtV>kEAv(JVXMFKtQ$jncOWCKDZzE6TlGK75gIp>69MP}vxsjJ9n zkUD3JEt|VCmfW=Xgg`JD$Zb769C=}Z;iOZ0hL?uD9w(pvmaE0p%gSlf)3ZX9pg$TH zS{P!xZFHdm_!T91TR@(6b70XOKi{_()71hgW|;EfB;5lI4`-iwmdUR14UR(0opJg@ zhbtY8nlTnY(xLpg^c@?gJ9HmhC8MUv%z55LS-(`;McV@OA$`2F)C(Tp^JF8bqdX#x z*evsH#6wY4s^L&0hre%-A*lnT!5HCTeku%uUiu>qdd1!KkD+nTl(S(Alq$sN#n&L> z^tgpwa79lI28Tu>u-dUPCQXe z6T2n`ECb&Mx({N0RwU-QsfquDunq9 z<{O!ja(WvSMVVc$ltpUXgz;uN$SD@|v2mVi!?aiHHeuGRSyJt`U!pvtM~^l#m|1pL z8&4JD9Q?wIp}tN5l$rSMQuaYo`-aWF`n7jAD=_qF3QB(qFt1QtQmMjDMp8fUutURV zQak8t2Ad?PQLHued3;LCp6qv4dBXV<8S6etkCb0y#WSYH(%)J|UxQMK7-<%D@;~Zq z$f}F<%*3y3xw31kh_eq(IP$}*&;gQ|pAgZF$sHo7^#47A>on9g4zv=bjfjb?s8BY-2|2l6ud)1sD$?NBX+EEOkuN2I(sC zPWXwCIS3_zaJa^QSYbOesprZ8e4JRAST?NhkWP{7IyL5%`kR{;D7A=DgFyYC=A?lN zR8}OkG)PS#Djy`dX@uN_Aw5n>+i$zQAzn_1kUO7$;rTFBCqxthgOmvuMSh;P4Q%~j z;btIzF!Ft`Yf zUwrbJ?QjWGl7Myw@$4X3ObqsN8ZSy9KgI=?I0pRhe_`hu)x;nb(}uH9GNF@o*NpOp z>#n^vp~=Wk+mPES<8Hg|7QWHWzkTOUc6#LA(Z>)jeE?bb`rbE&x{ZZcSEsm8i?UBQ zR@%&E=F`to4cTqq?q+er$1fBcp4gm?z1OI?vnl>?`>|P++n0bIO1qNsz(_->UH7x7 zW1)MO<_5A3S0`82Q2G~SB(39* z(+M#yXXcAp6yd0Bw#d2P7hil4uD$L$A*t@+`$daF%RSrJN#UDC--o_;+#aThv0^~~ z$3wHGO-*0wpMU>l`d~>LovL#`{~~ni*fDU?o-5E~!(2O8iAGCcr9A8I8@wWM-$m%ecO7g^15^WEpc6t zMLi&=a{p3A5AxtZX~+*eMpsc<9ujquH>;UgG;YnM1NJY@mO=?jXf;#WdBV#U5d}m}q`qxgtCzWeNY2 zZeI+|5|#(Bv7po6wKv`%6Gcsp!DhnPaoPmEYC1Bb>3SSa``2H4oiR$SP}-hvVb>{@ zUdd|#xKRN_sYZ+cfXym%TZ6#H6yqrui6 zNY|%bHs@lXWSKoDL{4T5NEA3j3Tw1SvoP?o-T=(a`bL^Ng z;qLqHwH?1_-1G9w!Xb|x5?Z!sVX|uO&pp+}mt7j(AN8IgGwuPY4=16y3+IOur22`) z7Raieo*T~gL}aZD zjrv_K8j|qK0h|e^oyK8TjjvG(k6%i3Ba4PBm55QD3!ThXbiH?CA<+WA?$b}v7^mKj zFZT8$mw#BeO!!(16TkizLx?D5X6N5sViT}R1BByo#1~CI`VIe2KKaB9m_G8*Ll&%a z*Dj_4u8ZUvrSsJgQ8Ydz&x4LTI#k`MQrNr0-iD;ehjc+iOBQ|;dOdfx$-4Q%8I8}o zmyP%S10D|}-g!sHPOl2b9dm4GAtcP{>84xygfb!zb8^JSC7j*gij+J5%Q=Bl)_wi@ zg@a`SvTohFB$HUAL82dus})$SdoJ3&Bmm_wrljq+2{k0}jFymgojU5?PofV3$eGRe z*rQO(OP(-GRFll?)M-=0&a#)dQ^kt5qbJYgvA}QQj1m_rB-H&5JRppo{9!o#%ri|U zk(Ts6^*)5;eXq3Qe7*GhP(vFs7Q~X$=F~9-MbiRsp+gyHBiY7^8dJchpMGMluLsQE zMG{SGe8!mpQH~sd;RO<8y+2hCh_PhdgQ!3y2}&hmR0C>#FxKOpbi&(_LzTd;DwfUpRGAYg^|6JnDV%|$YOQtG&JcS4LVg5)cbuE?X$hs!R#%=(&B9!kP7 zH0mc#K7eS*8v);^b&5Dga+n}vc7Vh+d%I_}Y`rKzTJ z|BdakdtM-g1*ad^BkZUhG~}imue;vvRU2tMAfj^2(MKCct+!8Ml$niGJ8340Lh@MP znLUF$}7xD0>-FoSFJA1H`kkKEDsY+Yv{0H;UQUdN1m0&kM)PvPP9ERZMpE4UL8RO&TY(9Jz+7&K)~h17tRSl3K>Eo(@wUHMD+wBR5@c zjAWD)cys?*S`j0i%!=Js1EiYVbwUJXKpq`BcQWyYoI2J9AP7<72Z&ujYHTT_^mP1$ zxLeW*5k!>P!sG*z@9gM@z4ev}=#&3-(q?kdfd_@gGCYecn*%2e5F!s|gYJ;(xBibl zYNS(6KF%CN+E}_6qq~J3C!H92$#Mhvc%GhK)b%p%rU`XN-`gz@zB>}0HR}rt{@%O8 zL%k-ALLcc#?bfYZXhIpqW$Qp5UT59`6q$cAeTHdVfka#qfU=d>DSz?8uR}X+JV1&7 zI!61-9M3gZUmey+#0HaEoYLuc>Qz|aZ9{sb>yM{Te)P#I_4z}OJQ8M3on~S=a3D{e zHqBIek%sr1zt8$&RygL2o&gncE5)4S-#y)wyXm%oU1==&LDwuTt>52z`>init+$PH zeT6g~!BswH+T?KF1s8`Xa>5@K1c4yC< z8=BUx8*YG1etk27WubS--Qi02Er*UNvC z#xyPD0NQu(_ju;H5&lqy*u7|sWZI&63oC_h0%X54d?FIm3o`tSm2?neVVBw4DYNQR zK8H7CfV%Ikw}t}`IXJvC>@DLsZr!TYdhO_a%LeN%DHGpTD&)$Egbqcau*>b`O1UMh z1%#@6+_V%U-rTRg5~JS*9piplOrP zUz~&cOO&Ce)NuiX<6*%g?PQ34=i8T3uQuR`C&DM6eHt#4X8X>syqb_P3Vh0HeebKQr0bu)Q`& zVvK{5w2c_%#*P|o3Pn&B;bSb9t;0ZKVQ8!eGk&k=NipH55t3oUl|MN|3Hz;qP*w{F zh;d&PZ3M*=bH%#m&6}rnl6C835+4J{Pl)KDgp^gYUY&5wWmm`mEC{h?qJ}vwn55;4 z7x!)Ny!(!k7Y{t(z|dBTe_5a%NQ z5n4?9>EuvDNIR#pdA?b>(J~N*-(DZDQ)Y2JUl^Ode#6NQ?__4-b!an|3N|dod3|ex*Js*nC>RHqN3LUPeF)TRr@A#2{foF@(q+V*G^IP;z6O5LrY)HUQyxh?~>SoOID}^0By0u};xi zx-U;T`IJOF1}Q)|PsGxCSFEWc%Wt3M(MXPkAGP2jiR8y9Y z5{Z)MSMmVqf)u$UMfWIxf3!}#;|GJx*s){7|DAArm@#&Is8y?0p|ojM1^519tlWO* z9bwG)(Pn)3#g|?b6VNdh&BO4+-x*WqNHn8a%NDk+_r9_SsXv=V3()6m_!dha@xpJv zmJQdPOg4&WJi zP5S!#Z^Jl=B|PxpgJy(xpRQfQ!r60;tePAzKfgiF$SSszS*D&nds=)A9kfyiU69Wf|tMtLpa*tMF?6`56BgJ@l?h)^l?Bz$IXkcKO6GF87 zg-hYS(9VOQHVVb3m2i$8aWi+)%d9ugzW z=n?M;A)aNYLlz3wB;QBfBzRc?A_To zKjDHUEZ@=TxkQ>!#*H2m`rLX;cu$&B4wN!+zIz&$!AAH)odGWWzyA80z+DIRiPn?v zgoBKXPFG04d$w#9bc)v<>iKpjvV};)g$3k=?B&Pejy*yG^3~!B7qaS0%1Rs_IB$DM zW09BN-NG!Jr!{og&~WiZ7lo5g?HOKvVX&$3^Ky7u`OPK{c>39A;-VS*n1O2#W%Q0*|QSS(O9x^S`Byl zs+CB?UN+Jn%0YS1(>PCeg>|dd3~N<>2fnkjh4h1x@qd>6Y|H}xIqInObh&c6GuUu~ zdQy3E-4IlEWr+^uU&epTU8kZOU?2G$Y*drS8sjq##=xlWsMX=r*jrC((5Vp|N)2LErkOOTYfs1p5y=^w0z%Axt!VOKcQ%eB0ZbD1V_J^zA~A+xf2zhQ3? zMGhBUd{O8lQ&Y&HM%%*UdU%*~u8?^lG!9WOFQ11?xv3V&tOc;4%IWd<6{|w?28}|Q zhGoL7eQvSmv@^~yMIc=DaBYP49IhXd#=IlK(=_HD(6^uD+ErC%`E71~r3NuZI_5+z zaM0$A=?25DjbzX6L8)CsjhQ1v!u#R_X|*80V#O0hSO|>I{OF_bl5}N0|Jo~I)|44x z_uY55Y{>UP-LG9H+j@{ZV6wpvmTQm;r^HiEJ2hN+`4wiuiAa2B;70Q(NSO;WF35kG z5aYHwF?(A;&YAJ~XW=&qBEK+ra5zk+usAsa+?#qmKnig10&2nn1IYF_Z8BPkoqZQ^ zy5<#<7Kgk>&osP`Gi1(C=5XB&H-u}hy2>aFeDe*0NA~dCE27+C(xmaBbEht*Xrd^} z=lQ1GO|JzQFU(I|xbkIsFARBmXc#CHXaIg4+O-dj8a1-D5#6e2hmtm}bWy5-WC`NKH=n( z!U8Gs-bqX)oTLi^$VJZjYEC#(X0Tvvp)8(PLDJ1{TyG;Z7}b_S2N;fei7bjzbN?1i z+x^%upa&Jbsa@DSrQ{W(&6Il2M+-0ySgW{$hcJo_Pwpw_&zX}zcNj!i^y3fV;$G)6 zch~47nXY)VwQj+oxkq%yI z#V#QD0tTW3p-s`f(aW#CVv8|JBY?YYm7T*IFTZR^(aB}6z5co(MQH!rIby8?b#R}} z`7$h%n8y3>zaRFI87Lr{hk2CHKb&mB%D;`dhV}p{-hJ;qs^^(jHzp+D8wlRBfS4e-CaR}JbwOuNoXJ~Hm=>LplRkeK6Ru#3l}a7=UsGRct_mC2Z(VB@V%^`Stf-!;ahU7`np^t_>-7k)O@{$dZ}$@h2f{ zy+!CPs>YFrAD%92J8KlDzCQNUgS7)+YxTPMLTN#aPK5hTUB!{3tc(NyqX$lkL>(fF zi-pNgx3XyYf`^ONlBb?|Iy~~w!(p3UDv31F#W*eBefw>*%-*bNv&0<|$PXw333lvrhFz(op@7}P1seLS~bqU5B9+%xFFXknpRO7@AEhL#I_Syu$&tm2 zg$_V1*F`cG%+8Ai41~yl0|~5@3kTsDr=6BS3Y>)Qx%XZvtezP<%Z?)UAX(hUj~N@D z8#KtQZNL?VU-F`98J5sL-X3tI0%&fDBE_ug;%&)hVeo51LhF`oY%<1LH|xq0kIe+S zB2fy!&i3bDeoip45EBk#pmOEP=5Fjiv=Qaz$*eu_cYqXsy2cF*Sy)Mc%$`%5c-)km zYJr?u07SU(yKlpR`|NABK?%>A69vXAn@+3;9ed0%e!by?^jwYCrxga3n{qZ^HF4|= zN(InKbXL5>rs((o^i0x|K#EjEzfp0-CEip$AbyZzwxBc3YS2~ro8*MJCu9Lb z{*W-p9-^6~k4U0e;Q(O*XtjAj#(leV>Z*abE-d?Ld6+tRQo!)=2@)tTjYu)tCbQ?v z4wqhjdHBy!|FQb!gy7W&77o(#qWKHfLu~dTlRu;hSL=sUKGfYvx6q<_^YDQT`63tU zlbdvr{oG>biV1ndOp+>AFK?&JkRk=~@5m}1^Pi(-7Ok^!pHin3QY_Z4+N3kHai$aF z56hOy3eS#-`N5oFZ2H4*!ZQAFxgHTfSpzw?)Z^Oi^F(b^yiE#94PvC#)v0V_)Sv6% zq$(G8%g8l^8F$Mo49N0aJzy@7PO4RLcdcAQm~r*0Nthd5%sBO-5S&vYT&T_{;$av_OBQ|;dYySzLc0wM+!Iee8J2zX zU8p4GzN4m03@w}PVJg6|L_u60m1VtrOVba;2r$X@gIK?FnsC(1+v}r`KMpTRzOklG z)*!R2T7dAR3)8?fiL#6uH9AzPR4H6{?X{-3!TW)yiJ6fNORrXZAn`R5xI`j8hDc;yR*BkN3+TfxTPhJ_F8aq$h`kl|8$IRpY(P@=#qA3z``H4=2?@DFNPD=b z^qTp_tZ>O?m&u|ASC}!t2H+jy?m9q<1;)yv_Q74chsPg%)a2Qs9kZtCAxFF87B@BN5XIf#a-z@E}D~pf26EgPwmrbdg!B;Zh0@;Lkl=6!@K9 zCOx5JH2SSL-@OfnNKfxiKmTM|Q=eoQ@!oskzI*PC-z%=j^@5e0PU?561#)TuwBZ0K zq4!O!kJFsu>N|VaRq=3V#N7=Cbo|4n?m%YT!+O>m&-j%6?|8ZlT>Z=u!ymv z=hXMq(@%#Liz;I2FGluf}-W^FvP@Pwr;LA)q@AE=XekK>Ow zxyQR7=oebFX>H2OqbE%!K6k#28#D;pNH;E!S=#oY2>*Eh&YnFxy!Fo8c8?q_i+@Ms z&y9U<4tum}Y2#&Ml(h((A(eCqY5_1S6qFgh5V6PHbVlP^`;{UT_zLrWO}Z;NA<}r&VP0 ztV)$Cp@9};rDcopj2SbexOYH|+$VC`1JYswQUuZZZp|_%g@3$`ko{mr0xIlSA={b| z$2S_UzV@1hWupH5PfR=|8kw*lM0G+KKYEPGT)9Pil)}&dVz!aA(J$-$WbxHytEI^U zgi2l;1H7K({kYU_oqXa+#$06jc;jXAGGiEd#^fm`ulW0l-`B6p#wdI$&rP-km`koz zs%EWP`K>3DifbnnKA)iHWqajDau)u=Q zcsjIik5`VsJ=be*ydJg@cjS~$KMGSvjZ2VEOFL4e@+|%=?ikK}eb}4U9=_OT)i@sK z)6YH&x7>Vl=qMeHFw-Q*BOD?bkVGfSATzcnF|!3M7dP*7&%Y4rG-{CPT^w<@mJ#F? zD^}Wfw|0_MnTo>S>xtrr1q&BQX>*@gHt~&?@6y?1U4Ltz(5mg8IxWV`j1@(BQfa1I zfEGYh1BSO|qE1wmC`KW`i5ly7YySQrmxDyscOr$5SxHM`<7~T&sTI_j>eF>rN4=k~ zY#-6XZ+1Zmh8#|LAUj-P1%3J#@Lw*i5iEE-z*kZh{p1VJhm}7p4=)XT#)N5=&jv`~ z-&9K1o1ezsllr;^i@vqOBb%8OPkvfu3%dj#C~3%T9Y5Opp*cEsYsFHe7z?6=9at zn)!mKI02@cpT1iZ{(IfEW+)UtAfE#<7P*A{b>xr!&r5E+E_7|%(Mq<%_S>7A86tAO zsX9g7(f4+##M|(+NEpbcxRc^Y5wRD3({6}@V5MW1n$=?)lq}kVuxwyF&n$u!iMQuT zJ*9NX%uA7x_q%^bYto~-la1YzZg^@glES4D-F#a>`%lK;FUx;0L+ItSmK6Y48+{ji zay%XXU4UduSJVQvU}>qU28|&{Ts^(DfPJa5b9&_BY~7qWbJ9MS9lbTOZ?;78b6854 zHe-4iE75_2WhEYAa{%PHLib*rq=3^;JKYc}$Rvt5?PDqZ!vq6Hjv5*M)`{3nNo7U0 z1Q}3wn30|sI51SMQBC{T^~F>O!^VS@Ka3zn@}ce6kbLs#r{P(h!a7U#bLF2a6*g85 z!Z?u5mgTk#i+@;}I5GOSqISKWxX%rGE}U}G$+j3}*7qMua}tUm5dGQUp0ub7no57O zYXRmgYA7>m?QTlLWndv}tNG@0D<2KwOgP;&-kT`>&*W}aOgNhbVq{>bb3u-*wsi1d zy(4#VnV!ESo>5*LglVr!mT`pcyrOW?1s9s!BIGdW_}_f{O?d8^XQUqOrBFK~!%i-n z3R3h%{yQ1Z?I-(|&04k)XDBLY|2-8&+lF_wqag@R*G~)>U?!Bf>tM3_cMu;W)Nx|8 zfg!~eB>2R|eRnvF!%9#8_@4=0w7g^`CoGsZ$5mNboZ|T7_2jhp#~*)&R&lYNXgwJR zC~O`g=A&J8@w&7%MF$VKE4xz9i z-d0`@OgJzvZdQ;l#27~~tbDQ6uXHsTyCFKA%P_R#Y*kaX9M9?1Yu#z5o|=Y5b#!!2 zo;oF9jJS2HRtoZOML++1mf2w3fB*f>-xo~#QHuU#R&Jj>qehQ1Ta)OCg{%y0vu#W~ zB3eb#02#Hej0JyUQuQ{K_kuu(4|k;yQe-c7As73#L6NI{|p&7`a>0 zR?a!>X%waF^<h_F1WZlbJ0c#uTJlptxG#?|=R_ zX0Nj@I6pip`Qd{OJTRdMgRu(u9CtUHviaKXq8nJX5j_h?v?q0+Y{n(serO{SQn|@jpi$WoabmkLU{`8r6h|UF}>}nWYKQ z0zXRIOmTsiclz|{VWE)tUTxc2*4~DEcLX_Z+&KFN=@^ohhbgYIrt;Z*Tfjxds_j}O z9R7d*7cRN)q9z)LG0T$;7$>U6FHAVy^bFN|so3eMUf?wlWWMi4b3}~F z#1Nt~0U7cb$RBPDR?8jqSV83aE<>*+8W<5E`QP>QQQgMKJzP&))eEh-H(A3mr1U(qm-&|A7{Q%vsh41 zhcA zSK)rQ)i)_g7VSKgL*=#wPi6PUwBv|=Bj;h?So~PbA>pCPq-BeiVP_34{D7jacXsJh z{@j44;q}Fx`O!zAaid0df68RygoDn{Cqyc2(6oukAaa8K=Bs(3$?0cUIByg3^fviO z=?r=KmGu}+2#01GkmA+XTocYc=NzeVi)k5hB;)bn;C$)z*Fx)tP3+z<=Q_p4*|KHJ zt?FK;oa5!Dr8W%X<-}6Rfd2hMAIWF}o-eKgZ81g`RXTO;V$4YHpK?pAP9vL$#8+HJ-XPm}eL;MXdVf~kKi9i<+Tm@vAImjm z==zo?=AldxqY|N%B;=U)c==(gtE2o9I}P3)qVMik zY*jAIn?KL8E9!oroSbU=-g!rOSStV6aYxJV?ZHOloC`0oJ08E6wScKVsTSA>Ex@>R zQhtY4ZO!QSA5t_ia{O4k1K*%XTW=wq(3MR$^iJFF5N3+2dgQ24Sk+;b1tTfR%of}Y zQp%QYn0*>Kq@P9pS_3#0Zm+bVV%F4ikZ@IRJuFW5%YV3BjNSV|Pd~GV4G^Oub83Q= z2&QeL_j~koid1Mdzo4lwtJj^D4PtRZ-FZ)GM)C!WsGJpR&Qo zsZgE4XOIVi{p_w5YB5IuJ<=vcQaADi$&}jy`M-a&V0@sQn|{a<6cOaX-5d)%$c}o9 zpD-cRtXaeA#<==J*5v;;@QKj0rO1GP#qP#f;E^kw_2n1V4*%$KU&H~$S*!MtL1B?u zhf7F5q1QA%a6w-GsC{pcjnB@nYri8IdkSr@9w=NI}5r106+jqL_t(^d&k@F zyrXNtWXiSC7wrF7;A`Ewwer#XAIR{b!_-qxA=eIpPqu-QFi#J8^695=&fs$tYFv*27?~%k+&gJXZN)DW}hYB6ix=2bg zJ>7)2wSkisIuoat9nNYGRTvEycRwy&Bt?6;luu^6ZW7|`z& zR6ZenjUo_BcT-NEHe<6+i#|C0Kci8 zeC5k`%TuK|FTUval{g?S{PEy{df-4IRPD8|MW|96Y$l6=N@Bo#zp*i~n2J)NR(At6 zRHe|MQB}NX`C=S>R4l#v_mf+0xRIX8C{0qDp#;frOuc1PT*0ySoLq$GP7tnZV_Zo?|3l4{P{u9GO?$4*K|#CJE_Z*a*n~4gWrWNXF1v zax%HWV;KW&crG&AM z&TFx~U~KK&!;$N&G2Z64-;f9*(oju3hTZ=gxs&>S0l1=U&k=<@V|N)GBSrI8^HGP_ z7!>xBu~w0QgdTi%P#&$*ULy>f(Le_r`qa=osycjoqDYq0{jz1AL_>xS52TuP4e()v`1x37nWvG~7zPgb{ZDkGg+gl8%$CF!L$lc=(0zhJ z#7!}!+?ast#pY*!F2|UB#ac_eB<7uCZo3F+fyb-9UtCL`$!p+|JD7ViWR}KWEa?9i zcUaJCLs=2Gx^QYuj#Kg+7uRNI(UFw z>a^SUOy%+DVE-G@&H($CW_FyB6;)rT80+Pn}|7!^zbg*bP3+?8BX6vV~BIq(7F7v+G^VQ1(zlb)T zlmycfbWwPFFflvaExUnh@u30uZ{X(v{Y0qZ6tn01K5yxN)8|S=Lcg$wmBKQi55b8t zXw)mT-k+3aD?(wcZ2tFcSkU5N*k%{9JXzYbF8#czMOL7xo?V7{Q2r|8Fp!5C%)-Lr zI2rIXgMWD8fYF8Y|N9>zY%aTMTbf!Jz42*dCGWTOv+pvb!Xj5K4!eeAi`XcJ7``G0 z$u{U;^6|nm+5g)}4FJ9@F!#HKQ6^@a`xqAAL9`8YmsJQJ!79@uNX)V4&-sgfcu15b z_+{w-Ci0efH^d`e%;xAyN)(2N7bUqvUDpS{SiUm7Rh>zMNpp?-f14$a1lvwZ%<-N= z&{=q)`ZiZUh>?|FG~Y<_-ydK>e`nendiSyzl?+T~ezrFv`oF=m1SA^yYm3vdv@+W? zMNaGYZqWbV$ofH!0neHKyAVp6fK+y7MmR00*Sqg z*Tg19*k%wo&^;XmnQCry6*@2GVrxL49TZHPF>JVHKHz5(DQ-2j^7DzwzNT43${`qo z^mjg}_tcy-^B=qHK2t6RiZL?=<~#dTQlPQ&4}c0JbS6=oM3jFloe`k_*PFMBfnN|u zt0MI00p{&ntHn2(h+AX{haO1j3gP@RDZ6EC2L}gqf_)GV+`gazNqg+KADQa#6nXW5({hP zlZGIMF-PouEh4{i5;cEo(YmxVzbJ{>Hi z%WfEnzEcP~jySe=C`IgN=8LMt%V3KDjagBs$N>}*C`yiwgGa_3iz^+`4TLrg>16kJ z6Yur+`8MP6To+Gp0&Pr`2~;Thx|s;K-Tu^uvpM<&TL;u<+o{Q2Z^ifp4taQ`!D>G+ zMj-yOD~mdRFgorv0BQ_B>udc~Lp0=H!w(#=RDEPEw|TK;iD-aGT#w>2VrcRoM8Qmy z9rJ4Fw3jkvk4N{UBXP;TWP4H~IRYf(1vaqIe1z!B4G3yVTinT7 zOj@qzyrFka0I(*qwoC%twrK*^S65i&ABz!@fAYTKXIisG+#9qj;!05(PIjG2e zS4(+SG3(Pmq90jVOlov%BQ{+s036!=d}1v?A$DJ}L2R{|O;Jmqeljk=q?7e78|1{^ z{jqoYJyjQm0>$L5IPJu_E;?w`;g!{LNr8O8b<GX9#?jf2KF8fL);05g8hga_)mgN7LCOuE_I`rw|zs zkv43K{^NPC1iHUW{M$nNkbG*a$i!>0(UR4ln@v7#ytUoK6d89Sy6y2J^{F?|=SVs3 z8_~8b=Rb*Zu`q|)+W1bJj7*<}G_b`CC|CumG*hX7LbBFmyzy z@;r&PT(Nz^l3Bq75cFrT1pN0oF>X9=y!a1x6AG(E76@sjF?I$zxZ!uvUL&+rgY4{z zWM4mwM1uy(fR=pW$>_f_iblFZ{;1P*b1T*bYmN#nda@9d6pAQ3S_w*JV;CIJ)R589 zQf+>w^Nq@Ki&;uParNi7F%$|K7`U$@lFYmSb1=d}6%y?3upR-Rsgi7G4V=?xXWUF^ zqBuc^$Om;Pq`RJ%#M3W+cPSz? z$0E_ZPbJDrIu71QzGEB*o}U+U+_BV-YjB9cOH<9Rb|I`G4ss>VJvXav5&r4_S$%B5 zBT~I29YsmTyK3|WisHb_Z*vvgNlF&*py{Cc`w8R7;}u}95Oomm=X_kHNhu$5h;f(Pg}}QCEw@i*z9Io zoX8@nC3_e?{Wd%RAt~}U%`7Kr5BdDBBAkJrQ5G#eyth~V!gQ5A(O@=tU3if3B3(nJ zbzl+=j!D$#uFFaIOIk9qh+1P4KCiPZ?uHIE0umb#`?TMeF>$3xwy&O|Q?yWAoayza z-h5i9QZ6lc+-bK+_NE_0ei)HafHGffN%ozgq+XFN7!n%^Js--&;&Yt8^_D|~W_>pW z=44Vu@ifJ~{ctVUn>qmxf0f7<)oh99t#9gQv(fgQFM;$%=X`|LUCn9mgGA3VERhQJ z=hF3$dTp1_=9Ell?b*<2m)VSGh~dBf)lb2i5|>Q9AcUe3>jPByeTuCdO?^{o68X-_ zUEf8uY>>rV^c}iHnMZ1}%sI?<4K*TWz5HnR1H#EjOGAeVO-Ls84ee z5BF>un;ANcZrmBPcBHGWwKwr}34+*HC*KVp3qQ%N)a%`Px_dYr3i(AUK9~d!vkdPS zYQ<_L@r>ohbaTe-AdIzB5tIZIF{vOYs?%?B9WhI+_|A~yXw68!YSDKMF7?hSnfk=S zCJHPTf5zA0Dj6xKmaE5ocU1Ph&gPXQoWpnl@!6luh!=KByN`ylHX7&d06rJXP!=r8%w zy>cXbt&J*yG@;v7MmTq!jG4Z>HQs5DYtQH}(X2VLXqLK%w5O4+C1f-G4EUJ+y7h6r z$a{P$-dwryFldjXuqZ2tHIcGud9*d2KPXnTkI#@U;rF;?n0e=!#Her;7!JK24&%%n z(d7tN%0KQZW9%#Y-I_fKh#={&e&h8^-PT9PbNNPzQ1+{LsDWUb(_Jj#?6`@o$z6n_ z%0fSW>#ij$#s7fVCC>JM6q;wkDvlI6-R_3Vd*@5_6+BAJ{Rg4FI3UuSMer3%dqq?v zup~ZLEt!aOCc#aO94OSt!6Lj+r9~aU9hc_W(gp{j55Gky5I$s>X*Kx+P8>}X-I+K? zx>ctae=*1d1RPtGH$i5~Y)D5eb0WW(uA&H&oZ)Los7A2nAXfpWPyA>a zEn>6szT_@&iBns`5)x~-+=U%kY3zGosC1RLNLZ6fQ!!aRf!?{veS;8NgfLT@QGBk@ zI|$$U!IWoJO&Fj0S0-&Mg#P%CC(D(IPy#HtUrZ1sGZXqkM0- zRy8?wTtN&4ql;slWU1UBllbT6=EhjR3(|2^QkgGk)$e8}*4(&tz0Gy3mpmfrv37kM z#wQWf4KItamma~WTpBeK<5RAS+8)=f9IsjL7PfaWRT<0m$4g4$dWorvkwe|**)&A$ zFnm*+IuI5)zP?I_oEa@d&wg>Cz(Uz1yFy$}F8XYC7MUFQMU!OI{V~CeweIaHEb=q;XeI;_KQw{W_0wYh!vFxlu z2PH&4yf{F#s`A0MH3bgR?atscHa322dxz#9S2j>|%UJj0!){^>1tF$&YUEIT_5iA_ z3DoHj2I;|MHrd0*EeDtTiHbTIT2a0;_;85p3W-}tEE=a0J+!ep0IHN!yVW*I`{%v8 zDlVjAj8O_xwd&yixgC zgV8wo{E{PH0m1AxE04Mau4@i7d20bI-+a?~)O9`fP15f7)xqXRwJW+{GO%7#?sdt~ zpQzLQiACKMTMG+>v1NvY7#q*)LhOAG^>?ws-!fpIA~FCtT*vcal<4mTgH9{edzOji zkG!929iA!L&Lf5KkR#ddrg9%*Rh<`FTpw$<1$vvEqX1d=NzZcQm@)3bEC{p16kW%# z@eB@m!q>$j54PuXRcLznd|zQx%xiPLAaRDP4Ws8lWjEfCXF^o~&my@yJLo|)z6I5ki!`WtxWs&9 z2u;A4+kq(q1=NfA4da;&Hd`u;SO)H=sq`2LAIL;P&tk2$P-|Ezd|%WkA{=U&j9jX< ze%EJwkKf|YTZ+jarv-GA=>GDmfsn8=ri7&O0uij)hlgRnhv*7a z1*1b$`RBcfJ~HLUmZwyas0uBd1!@{p8dqVoXxDg(IH9-7B=;$uMMnG%^hj;b7|g7g z8U7|SKXN=a4mu+^y1H04VROT=uAi*as#tEcpS1ME1#ws}2>MScKd3^XnT*U#@Q!}#Z&w6QibxcA;@A1h!ahlrT`(t>mE^EWw)O6= zU2Kq6?^r7O)Z!GkZ8y0`@!!E zA@v^Sz6I3FIy4$-mzxp}Du(%2S*lbxI0zW?&aoN;ZF9nskt7++PUVu6=KmJP+aRRl z-+kJ{#hX*l<>^du>CO~ICKM^4vDmNva`+gCY5pC7M2xYonbJ?Te&~|Msx&j15yU0| zw9Lg%#>UTkXG02i{V?A5N%I0Z15`+G157U>XI(c~w-+Q?@*v8f;K#ZRG-25<{Y{iB zPq57Cgkftf#<1AL(%7t$*WCJ$bc>Gsh28r3()?Ov^inquCf$_zFV`}CH(Cxy*FNv9 z8WMb)-yJOiT)ry{G;uq0B+^L3a;%{-K*wYtU}>NPW(Wd{tQXvgKoemJlw&V;UG&Pu zGKxjR5A9H^GWh8~)7y#(v;V2!Y+e@&jPUusWJsB-NM^LcmKTSax$`RS9hykzM%z$3 zk@)L6-OZ1F%X$P87cn>-YpNhAaDPJ7(T;l<}|m3bf4?N3I{cy;I7&fTROjmxzW_W*``+|MrLcOBosayzR$T&rsEMm>QV>8Yg$f9jXxeq;Fa9-f4!_S4hNY;dwaVSqo>uKG{Ueb3r$`_ z9=Kj1B_W%%W`y#3;R+LcFJl&Ea>YOWTq;X!eFV>Aa)Bi<`$!0VonM0?3-vWFE#?&N z4bQIx8RnQ%zZk-#j1>|wtC;gT?CPVLE-_e)n%sz$;5IlwOCI^%kqGxdqbEEn8SbaA zcd{LOXgDtIs6kY8=NQYerib-%TRu2ZNY+K$s0tPI$jV9+$)^q!u#2-C+HwDt zd^;ge_rEKX?`n|~Ptu|Qu)n`8?OJLJlUk+<6ElqS?66&dFQINZ(_jmSUM^okGx_#a zG!e(-%A^C)4{rK80QIh!>W0lJ`}+w^m!;tySV}nLGz*(dLEpHV z03F-@XWfH^^JlT4JPz5dC?;yn_di~xnV!#@H%nwO5^as=uc`Gvvb$T;HvKK0GS0y^ zCyUiN?e{xle~jOuqIR+H@XBI7k-74izQ3rv_4oj4shgCG*fj#rL}u803>5=FsyK7>93Su7(Tai-;TX*fYc!aR-A5SU+bxVdprm$OX(AY z34n|Ae9~#B1l|k&oy<#zF~4xe#NM2^r6iu6(#5?UJa7fmY8#6s{%DZmt~zCsA2`4k?f2_{X`Dr^K3{22E*h2;(P!FcM#)0A zUTzhgnQd~Tvqf73{NUj3woHngPc(U+=mxv&z9lNlC(%|vN3~yX!_9Pw%WuLA6*Raq z!oIfAQE7c*v*GFK-rvR0R*TY%D>p!2hVE76IcyhlU-chJ>;vEL(-0NH5IoR{zBK9f zQ|t&D;QA2onbupfuf$TGu^D?{wSFl)0u4DBPA;M1#& zC(12N68&4>qMg#aJTvgk!<2}4D2@eH4>l*K+wk_mw_R3^UZ+Shy|DWR8!K_Oc0SKs zzGyJZb^D9sVa^9BU#qLSc1Alag)d$aGY%}HqCJe@C4HosW+)lw>E{OKbTA$#&;O>< z`S#9}HiUE+ObFj>RhVn%`eReeW|`E7y-TIC zrwcw$GTgmpMD~Wx1J#TvhuQ9Gq#pO+>T^C+=vVt*V0tn$ENNA;PlCSF+j(~*V5Fz$ zPK-XiExF(=#DB7qAd06U`IP`?I8^kG7|YYRd0_b|o|9M>Hx)We2MZ@DR%kt~!5|{> zOu)~EfY6SmBB;};QWhA8Z(QRii?bqMIB$sCmn+u;6(;m86=&9@Lc1DP@`<>=l^_0? z6ZwTB!!WB}bTN7+H*IJck}83glRO|6cQ+wj`qNM6&;BhJ`XVe=i++i14?*~k(Ubn* z>v>hUcIgdsrh$q~y5rD^9dF_e)o-28&sJPGLjn$rJ4^6G)CTkPtVbEh8YuY<6or`_ zo4!9f)47xnguIsqtj5tLj*w=L64PlG^`5QV_nwMP_Lu`zZ%RuD8?HB)ENvHjkG4bi z{C(F@ERPgxgm=J8$3Hi4ff(-Cz1HTqqqH{rFQs zb1p`$-T9CGtt`ig@B2&Z>tQfWxmMWqJg) z%5{N!1Y$eCZP>aIGe3O4>4V>MerC>lg>G?=m$XtTQ@mbn@DkX{^DU|Wh4e}f`w&}X zuW?b<>*A-+ca+aUbTO#*VHZ;2vCp#Hrr+@GY&>yi&1;PX-0bv4+;8|2)J5XJ|Pt+Y55YX?TX8HZy~$R;Wp4L$<`n>U7>jbG2VhV^d} z6ZW=!`9VZpj|P8N&{JT2*Gc$T6ofeZFPn7&V3`*6yrb&xrCj|Jz9wWppa?u`r0sCZ z$P8{Z)00NduwPGg=GKpSXAI>qN&Y-rp-$0$w_5J>Zo8*Fy#!R`90&eKkPSwTe{HU5 zlJ~v^0Nt(EvW*+ClWbBiKi>zTKHF^L56y|&&*M37Ix;nk&{`bVAyOKVoJB(~M2cwq zQ;W<8Rbwi`YQ4P|I&1{y!Df^G>9`GqKAvVN4Yr#4RYJ;4=^q5jfWfphMlG^t65r{$ zF;8lH?mLdJwU1)8-b5$#*-?%ERQ5u50lfC#7{>~gn5nlvm+s|GR?yzIeObsPbj1B0 zwaAKensLQfie4`5m_o%7D=v~*N8fLC-mfeS_llgfR2M#d**>^}V@xidxXa$t75X%Gc=?d=sf&Kte7 zRHe^wsSZXFlwZnvm>Kqxzhcs=pNRPqZjG}|Y0JorTi{ag-PTQdenLa#KTtH8T(jHX zH$P6Sk`G~X7)zOt1l(e4)&~s&{4FjofDjC0Na|z2bq7_W+HeO2W^nXZR9`7y=hfsa zNC13ytp5!Vm62DcZTDKAH(_#pH(QASOXPjzagUfRA%f$Qh>*)2Z zw9NXxNwQ*YGSHJK=N^*nDs+>FF^1b8d1d;Ca03!)sQI`Voqmf3!V+>g5&D));#w|f zy%_<&EPf+;TO=F(#sX$h9YqR3987p(4L15&g=BjJ{!qXy^i>UsN00Hx8!I&Cu$zo= z+%N1>>Lc$&p-VU({`<>UD;BIUT4W3y*My!`xEWhcArlsljvdNz*J8I_8$NQ3oK8#u z6vI%;Wr89|I%bB4L&BLM){^q+u$(Y=D`Az~z9vbTKcePmuqH;TW^C}J znvN%Zm3_#_vLGK#@rDGAv+08CNIg7!rUqpF{+GtfxGVwsq~^qfla|4_{tvVgVy8W7 zGR-j#sf{Z-iQ#kxl@lSfZ6K4ETd&T-`HP<^56 z<;F9ANk*q*%c-JBYEH39WT>GGmFS=@O&Lr?&s8_jY0+-r7|l~vr;H|<_%4^H;Rq(X z9H_4lfa0&KhudgV>LY>V;QS@9p6iFJfwX85YxlbZ*f0r82O|_GjQ!eZ&tN$9ri=+Y z^4Oer#>{2~p)UtYm9jkW79re_fEx1Pd_rXq2<8fdd+i*gV83?j%PVj0Lk}>>Go{^q z?X)O~o6F0p&}oBSuDmsz?v?OoW1fs=Z6GAtf z-3*7@h&mC4(H_ITRJ}ogNO}}Csn5Bk+SDplX>L4;4r0=~wOtB8xNR!mjt(l(qK|*~ z%Y;aT;yAl)bXs9QGM}Tf@;Hpte5FjfGO`F70WCVndi{-rWK^rxj7p^*bwjFSlN`76 zcr(~Sk4Az7>T?A7V~qi!uhf1BJm~<9u*(Fc?Kku(=tMA9nx;|cYP9cjshH1}o?PCL38R!ASWKJJ`5Hmm%YuYjD1#NM9@ z?1@V^K*65$mhn@{8L%?CHN5Bcuf05}i^N_LalaOYy2IH#fmClYl|)-OQeIXSAC9b* zx6=Pq+phDj==JSd+@YpHeDq|#tD1Fa#fWt=V!D{OB@p0u^@F0Z8z*-J>ENWtD~2B3Vd>VsI;m94w~2w!elLIP})q zPYa>Bl^AAnhoeZG4y)aN?N?Y|X>=;97+}_S?iGXwYvkTlu=?$D%+;&D4+cUC%>xVna%{b&>Yef?bszZ-Gyt}Mpuisri8vGm0HZ_$_6N{=FylquYOO=Ucs9!qC1vHeq4=gAInt4iFkkW%Vy9g#`Uj%> z_X)c@5Rx8gN~lbU`^UqwnFr@}q{Xa`k_vuJ%W(~%Mb z@=Tgt)La8B%(*6GA?xC4d}$I*#Ju5Z^TMd-u*Yu$?Y^jip!|B&W@ihGRo#)~kkFXE z%y7;Ft*vgO&|$uhW)!oxsF}KV2~_H7&OifgQ-a770o~#rCn;Du;wGEbkiGzpk#Nj0 zhxZ-cr>nOIHqTl1k(qMo^z1c7h1hBub?7npE`DB~p--8LzefvK3Vr$o!f$9-p)w~8 zDSFcyy<2Vo4ko@AI@koOvQ8YRYdfFWCfb-rsUfUvw~DZxJylt|kLJ&u0_+@@CuTr| zrxM-|BjA+}r90z>x*v2H z53E3}Mxju%-xy=8j6{*5K5&a+SV~$2j8@g-UZ|;76z_lj%U@5dF`}OsG_*aAOXQr8 zcvEyi5^?7UHmb$tt`tWoLhJ|>y$I^|Jqis{i%JPCO6*2ZvfNwQc_l}j<6G_Eq#2Pw z^dFCNryi)=1dy2F64v#X`+mGTBH%+98`1WyyRMUu>0ezV&DBo{yi{QE=noT1n$M^# zvPgc;YAOBo-ZyVIrlA$puDu`_I%*jBew1sOOYV;CzQ#uAm*hhStTLL0Mrod#?R?+S z2}KD48%`MD)({$G(p(QZ7|n|+F-Tt}p}{c$e|SXILL4Wuj^R-kXXviGzaF`)lhu6; z8T4~=wtzR7Ve8&)z9e=@qtT~8A791)#7Dr)J;{Tju5X-8zXS81})g}WR)r-SHtaYUtkYN(B_%_-5= z*l=bgcW?NDeM-lkNqK7=XQJE=G}2jl)gy8Z6=yJj|a$2OC{;*xtZGw1D9bCUw@_7%iWy4 z^_8M)Z$=Eg_dF&~Bv~Y$szS_gO8nBqYc%X422qs=1j{VrMIsEpR{bN$r5V#eawu`u zOq$#ZLBmY65@ata0h@4EQJKhgQTjX_o-fWGe0-8gb8#mUKVH@^L2PWQ5v0@XGK@8h zkE=LlU)v?8Iz0GVAO2d6N>Yx96Zr-Ox>mLSRq*=T4Zue=sNZm)WWuKtQ zl`us2QVY+TC#-b^))>qs1OXrRVFmBR9QnR$yJT3@$eH%%oum!B8y*K!7)?-25sIKE z`)Lw}Va_M>b~n4bHvj4?dPdg{Tjwsrq)0Q(oh#}GM5Z!qfz=EzN!iDY%F2#;FYRGa zHq?1iLsaqp=0PeD4Z{pcGc*!Ja@n@OzRaf>?*(1g@Ow)yqC8DX{1P){1~ApYr7omp zAAg!SB_@7rd2Ui4+aZ@OfQwd7Z?mq&{Q%}C3Tn7~xZa72HRnKGt7l+4$Ud?M#XxSA zuR+vAGJCzMauxQpg2`^7>Ef--rlatr?oXUYrpE6pUYZko{U^_HKA~3Pf&nV)wk`G; z1^*fRDW3wwW=FZ>OVaSqW*GDz01Y+P)(jW7ZV|GFzw>$|V9NdS!s1TM*cFB=4 zgpS8QV|x=En~Aba726&loC1URibjLPkf$%~10x_=Ttbe!vsw8u9Lh|p+Z4uG7yA3Y zIP5aLLxl~i%tt1S=#^$Xi$ogRUJeXQE$h{-0Y5|>`(%UrqVpL}_?H;%0E#adHUSg=97ZNURiXi{5&59w-MVP6XT`0cFaRg5I%XMl zfS19puLv{f^!`ETcO3b+~bg#{$N2KC}TxHKa_ ztLU{Weyo|@6{OR>*W0NSQ=8S{By3%}e6gr_LOu@(gbM8AgK<<2Lj_PKu{L<#jwr3K< zsiEKxUZc*=$Y&-daTT{#I^&&{0u>U(Dp$s7k}6L)YBmbi{!;9W0@a-QKONC>rXgmy zx&4D2i9uE8%@9|oj@U^#; zuHj~fUEU$)@1&JdSB!ZysAcKJ&*Zbq9Kyuez#&Q4>$L1_hOV}|kF9RL9u^D4%ob(` zHYfpFsj9iJ6~U)1dsy`I*j8+U3#%2$%!IvF#V!`PRKlloR2q>NKj5$(mVV{@B@q=U z=$4Gy_wVjmh6DOk#vk_=aft_?5{|rx^Uk@?tjPNc!Bx>aR;_;d zsDy*;2Q2R*rD;M{%O@!6jwJm)uYh5+af)(VpH$eLMDo1)=pR;2FLMG6(nrmWfabm`LHi`t$T! zNySx-pNe&AFF<3bQ>F`IB;CQd6%u-#QM2;BghuyGV0=_1P9`Fw^Vl#_mXX7+{fL+h z6iC-+XN+h`7hRz+&wnceTw2hv9~ct!g4qdk0Aw0xI^0m-EWlk#YZc28KDa*@o%oKr za*PfMWUnaRsi}7GyYLqyD*YAMH;k!A(>}b`EksbtLXf0%nirGASwdVWy4rxkOnv&q znnQ>m&vY)B$JXnxVfi>?mrB09L8yGdrOAUbF~6OSCjNeV(xrsnKdwybXNTC$v)ArTs;1T&z7IrI=Btofegj&!TeduW17ltpwq-5Y9^+t720kE zX{)E#nRgPR<0FV160VcO`>>ASBSBI^vYqNCzNZWCkIo)jWHz{7>T}sr}uJl2Uc~vtA3z^=B};WLK@eEkC~! zGlUK{d4dT~evf&?{dwpf0>uv;PRt~XtL(FhEjD~LY`Bqd|6&zjKnYsjI|K(|=!^-v zM9JQytI8C8Uny?Pnp4idi*>~Q#=kB?qEN5%i+W>Lc3spu$~@F8tQd?=W<+s{H-@4~M@*IALf5he+6b~)-Fuv&@lTS{45X64cC2JZC+qPU z9|ZK-2~$YndKH>qQ{R0;JnH*we4^!u6pFnUFmoRp6Te$C29J+n%iK@thyVV-7Jxr9 zb-QfAC2_2gNw)7mwy>|?KHV|Bm(Ep|e;WoTBERTovE6*2OCQOwXe^yY1;!^0Y}`N40W5QGQq!(dOwns! zT*LHlyRFAwsqh^h_6&6ZyYD$cX=hUQ_>b2U0{#5?Dw~5&t8-lFR+{I!ghs`P??CfuCO&ZZHDX-BC|_4T#<^g0qT8 z%`}!auOGk~Lut_+OUb=)>eiOLQtZiOa4j{q7;xrY^A$-)&u82?T5_F7ve=1IqGwQt zu=psJcU}nV(%V1gal*|dT`mO0YBqii>iFk?isWbhYbODn{QMt0H%=}kaWL9J?n4sB zAoS@(9Rg0H&1RJ=$H9OzfcM61Db~njpF&2ChFIlH?;u}(b}g$dDGdkP3N{Vn8=#!vt z_=~xZSV6{2KBuwe`C>ZA@tqQ-ztA9Hjh)5u45-;)8^sztrODDsV~KpLJi2#`Q)lW! zsjlZx$a}rC>qyj&Kap{p-#^%2rquy<&i@ULX?;KjW0ct>Gl%p}=6Q*ehRz-$Hky@0 zG=JrOsS^47KDa_OeEY`vOO z6v@ra&GDLFt0}-Op~Vdc;ENrIhIKI+|GPrlVxg_>U&{`w#d=d)FV!HoS{U>epi~6%x$S;%gslU5`#5dt7)#EDG5M8d9?J8>uMg(r*(~L3enac)QpoGG%M(Asd z%~FGVgmd3dWUxJKuPfpjM~m0FrD_i@#a8l#c{mw^8~)<(cj5j20Py8%l`{t|+sSX= z+v_dXwX)wHl>cCNTB}3og&iSEdFBOxMVhmAH8S7l7jLFQ3FGmqyS^8G+)nbT^FLle zKfSNo&7&u2!8F|n_f&IS_GZ@3gYPOhj;d-y;+KsBrW2KHzPONyxee6Bg~Y3p01VvX z#hjHM3i&mA6+^k`7e%;_KHf|rq92X1k2f!tXfv{VWE0smzG~ftwn? zq|i3AXDvne-}ijeCWA6K0hRQ|&dXd}tqi9)lt?a)J#4z~rh4`> z*Yty)%d`#e6kLp`k00o zNLBe>ZZZ}3p~ zNPBjxw5x}V33Cp@ii-$R5OEYn`A&YY6X_DGB;JLpT^CB9YNIW8fkZ56gb!YWz1)jV zwgrd8rUwpSq-2ph>vI0}g2zaq<;Q7P^wrb!{=s5;7xLp^UWCtAT0_D#z3DOg#p`S2QWK5 zbAVkhv_F%JqAg#9V_OE<&g-W3`7=AOcWxQYm9hbw;My-L4$P@A-)IeV+H4}eT~5T? zdV7vNZ2B_tZIg%EIF;udCwxJ~qH335S}a4|M-YqtQD;e2hLEbzqisHumIg|y$M#z^0D*0dqXKUj*Q2YK7Og6A{#pO z4WaTG1)VVe$AKgQT_Yc3o@^m zYQ+9C4bU$ilS5Lyo7zov%d2fRuQYa9jfTJ6K~?J}p<4_ohx z`9Z6x5^*j59!k5*@uGA%94f~MFto)h5qxIuS5C``h%I*FL`6-Qtn?{*W7Sycva05m zfirWIF0*;a!EU8+ilPfjiT5FXX6$+<;(Du}Ex?qsrZjLW$J^Cl+&DIkQm@h1Pvw<4 zgk{n28BAbqv`HY%@%mT$`Vhsjw_;VL`1@Ea5kBKudlMg#I!OKEU3fOfedlMQ@DN|;%C%%?dFlF%SNhb}=XT#*TG)0VUS`G%EkizRoJBY9h)sV8FY z!o?i3>PAT2&upWop;cSN6%=bo92~xRdzqQ^mM?S>d>LE;O;eTf%P188;iqQ1AB*CE zy;8YRnXd0I1_W`+gnkz|dSt}ro{gnUR+vc1qk(C(bn>kgU%*SJ@0vbc&r^)KOf+`XkP>dol6TR3feBuTI#@p|_ znhygHf9&>6@5=hz3fW1V)Gx@JXcW+2EEcSvULzP;Sx!$uQgXiz2;gxTi&#=vqWj}4 zaLiVS07V#`PBKh-gpKhELb-|0`zd;sO62W+ctGG3Sc5{?NF%GruePjOMdA;=fuFvTAhof^8hy9i=N1o#-y?C)h(6L`8}^?^v*f8vt3F#%FcV5BVZ=xeLDMD!W9Ps4 zg|tU@D}VneeZrkFE{aSG8G*D;QaUrHx9-OzTJ_OFbzPF_nu8(=QjN69*Sm}5!rNbc z-CHpx-aq&i^(kK3elmAg!mYtf35R@t3rs+EPQuOWMqo3V5DN=RfKX_UO>)A)CYcRA z=&3&ZuNJ@^x?rhNR7k=twC0QPK98Q%LXi!jZw08=SS7CnrG1WE;tmlEBF_S-@

NhWjJf+uY^NnM*HM&Kz3S0e4M zQ~wbjAEQWKhUlt;1*o0zAf3#Rbj#TJbyXCB{EOA<9O+foXO5^FXrl0%+(G*LH8vPN zV$;rN!er!pcDu)~Od0Rd-%)jIgji1g35L<}T&@3{u-C zd+41v;F_)~X;neJh-#Dt0ndnRSDPq%4{9h(j6clO93y_LjMXoYfx^t=YRAI~JYZZ8 zN27=rEGU;4{lzJh)!}#h^NJP!@A1pE%IN>;C%Bm}%6c0}aR<+Pb9Iw{>7my4Z1Evd zpM#uS8|%28C_nkdqj&PV^vjC=Cp{P@(f=uP!qd0$g7ccP?ODm|VKivF3ocDKj_Ky# z@=Qp%{2J5MF&v5(%0l<@-#*?=HJ1EK8rud{ypt?Q&|yCdrBMUfId^$ zKk*iUS1|-847)EobspOpd zgzofzQe1}2V4XNDGF^LF$1OnZYub+21OG$)smEedqMl_+^?*s4l% z;SEsc6q8cA715}tD2ogAKd(qwt*ZkxJ@o9ztY$q;IJNNRwf)g{6oWBku90m?A6!ML zf`B5uy}1n%_kP0=_sViU8M#4QFsQ3@_?Mo4i4^VbntbTRZUHpA`BTDXwJ@CJx*7FW z{r@rb&rz9p-~T_HZQHhO*JK-$ZM$Y_vTL%<$+m5GW!tXb_4>TO_geQl|DSb?z4!C6 zlZst8JhV&g@~>WaYV4dwKyF7a83@;#vERPTMf8yzh-TjK_F21wyA{{D6=Y}#dHvkX zu&{QmRYs=k^1%>xeiW=H4SpFnc924mI*F+Dld6ow!3Fsk6@sTHrj|rxuMe1TQJ|+D zr~H8lBNlG(P<=S15b&s-l7IY;Vei@O<{hS-Z@{!>A6OdcC|y66ec+Z^r#;T-fyoQY%2+r zt4@rmmF_t0<+KN~q6DyLQ{`-+wrU){69>eX<_2m8mm`ilCla!w}pT=Y1^Vs#9RB1Q?vdIY)p>Wp9fxC5)sO#eWt>XYu*q z;dJ0bj3u5HIJRp$=#O%XQp6`fBZMO^@l)dpOj@haY0xYQWH)g59_do8bB&7z^{bd` zE9IqA*e7zqYUB_8+={Kj~QkXH3vt_#qt1X|5;K`T`ScI zLiEtoim;_sQ)AO=+0?--H|cmT;=jY#76PVS1Z&oJvhdItakUEC-TfF41oyw@j)N{OmJIS^Q+2$N4e!_me3dKq9i70m0=_v?dXGn+NG#y9Tm=d1Up z(C>J@NUhdu9#^_Q8n>-8)Ne8?uYsS()aRr4=W1fJ-DInkdtkXfa@QL10phnCaJN=k z?>~aHl|^g=HX-1~)XVkF;#v~Icb?^HM!=wQ>Lh@h$Q&nDu9ywwy-!z> zS*&JLaTTM`3p#Aad_ExHhN%@Dfrcy!06&JE(5i|jwwOuOGW3)ELBQwo>ngptF9{LT zG>9k05rcVq>M!ZH*G@p&N?EBP;{wr%sp911?BZMh(i5SLbB)0|XX5J;VCEB@ z`YJHb6sL=vJD!OJls|vge?)?*<#qTU(>g$z06MYIX&rGOe=JK7a4x}2V|ejFAqK(S z!Bfyv>sd^{&v_PF5s*jF#ob>AORV8~jYY+%P+Ti|I>z-D|@nBiMXr^CJEmK23ZgLkdr~B-yi5 z#^RVa5Uy^w>A0Fl`ou+L{8P2azpt8u{rH zL-JLQ_=z>mA{4F$iBp~ctE%7xIlKbzMI72uVK<5+y*GqB+2J>I7ux0f7>fgnjFgqX z26Us+9Mbf0VvI6Ic|-4&Vr4j3iw&x*tDjlyuvB|XS%$B-;2>P&m$m4vwHwO0BcBpc z!EYWo;t*1rQT{PDt|HDR#0eX3%n|>ym8XdUU2qnP&QOk>Tf)?r*fw+@S}1{12>+Jw z;(;W?g@dr9r+BA`ZRj>2m1bKUZ|EJHlN5xG#O=NS3z91+s>8>DyYUtoVa3_%T>MB8K&4+;l%a*_2liJJQu5rk zos&ILxy^$VQdD!$X&_iME7N`1>CAi{MOZya6+W+wo0-6KIpNY$uo6$c=hREo`vTpD zkmk<~1-cK>_)vrMgf!JUMyb=F^E)~mqiC-4rgoV2E78sixltk2?u3#M<8Qi5F0S_* zMug!eyqN$ra$c%Zrq9{ap}@BD=@Z}YC(k9lg$r$1jl*+eS8aq};h@Y3kDH@XFR#9k z&BhpVYKv3C6n+#QKHeJ^HgaGx{o@?@-SpkhkfCVTg6cv>IdH^0V)pntG^rOmEKGHO@4*yMCOxEFKHbXVa%V9p7+vZ0dUrV2<@=nf z!h`jDQ-eR8_y3?oJQb`WdnyLQF6!hbD4emMy~gha7AT5-s$!)cRM4ugjWazwFlo^e zY%4vAkU7Fq8CIJypt0S?r;D{U1A=(5_EFa6&*0PVHegM0=Ft*yK!Ss`Fcbr64ZieKMei@FO(v zWuN$w2O|e_Sfs}G|A#0F8tP350=;8k8?u0%I>@Xo9TP_u2A<7rTq2lsnNIlMUfy}c zq2wzJeeefyUhp&DeV9viCZPqw!K?sarT`xYJ#>aI7gWGWTf*@FA2y0F0`!g!ufw@x zp2G{O{XNI`AfvLDs&GNlb@RIa5!{Oy(ZP>SeF3Lo7$)ir#!3OXpUS9vm=jKmRKCSD z$LHg-6pyD2MFULIw8%qGq)Jfo-J8pnB0`|JdN` zDv?v^Vvh_Dj=xRen=hO--2oF>kJmDFT#)QpJB4;>-x1B{4uyID?stiKa7Ln-9g^Pk zhS_n`|8&M=Cr9duemm|VO09M3wN6?dn{RN%zJ}5=dlnTrYe$=vzZ+QmICSiNaFA9N>Z?THl1sX~z zRERM8AI#;w{R|0?<&7;J8s5?0APM!#oIM5or#MIP5wr3&d4 zU#8*6_?mFws%(<2|K@MVz6?PPgN`6ABjry=aBxlj7Cyhmz4%ju4Bc{zv2tVYs|Sxh zJUqa>C`RW+QB-J*dDVf-p`+5|P8yJR1^9&FtjL2*Pi46Ve5hbC=pbM^ z)jJ6Oo1DYcSPh_2lAqM{v zdGfq6`niuVb>z?(gFth(%Ow2i5_wO4E)=M4q5r*}tV2VZSZW^n^i8yK57-u^l|k4ld&qYoy8r;r$EF_ySC>@s^PO9;p` zJC_r*7yN&tg$55EMpAEjDmMq7J${`OFjkJN);me1eH`(6gu6DhBS5^>e%z?G1zvT z6SZ$dRrXHbcpQ&m2mkN?IAT0#tRmr04Rvw;tVsJn6*w~_FVZnsR@pt&U zUaO<=5bDgn?dawfgeKK3Jf;gQND#5Z-Ty;w{pts?>fjsvHu(~0 zdmXPl{CdACluK;%N52zP^1-7E)Bv>IBDU~wugZsu5NSqNT=r!@>cbwjpn4)2eo3;< z;q3g?BE3E+APoh@QSP-d+taWNO$*)*T2mUE;s)S+EX6CZmSA$|YbsAPEu}3Uzt%SlQ zNobc1*yr*OI8;|>PQ0)#24G;P=AyT@qkzNStxo{aJOtOw*Z&}JWu7>BN_I7jcPNKF z+wLtvw0PFH{vPnDe zzary(lQ0puV(GXHJg!V=Xc<*c{y6_l~LP>eK z(V`VYa&owmm+N|yQ_;j%O<8rCQ_)D*R6?&jaicyHJ>7+wJ@NThZ3j001jxpZUaNC1 zWYB98Yj=}hN@`$*v1y~j@?$|jd3GDk10|;QztyC=sf-!+wyh(WwG%+#N#+XrMG|RF zhCE@xG<++fowGJ<8J!MS+3PkrQ=jdm&7}l;EU3>TFU?bhoy+Tl8N0&lOhPby^C+TM zR9V(#_a}J9ayj!70ipXz!g~W8gM}&UsqXs^IQMEa_vZxq33yz+X@`z*>Ok|VlND?4 z7cCv#q8PP8)?mKnpIui#hH@{BurZQ7C$hCQX5eRdBOj*`j*z;$W{|I`$Tujqe=k;z zUXRd|{LB2rP+tmdweHwgr@&{OZ8sFwPa1@Hl6f3DVxp86$wXF;*f=dPe7f9wLX*1h z(P$hzlTbE>vygtP#k%}<*v%%(7CWH4POiS5l9f^t2u-Jrm_q9#`2n%vM`vd9u&HFx z4h_iVy4qy=?aNk7>jzuf3okt(arG0;9{U`dAwB^G2`;G%7q{O2nQ1HxzNQuUs*PWi zOVn)yQHy}e@)Jf`dw1!b71aDCke+2ItBu`KFR!n7CFI11On#^u62Tqhe>t{Fujg@@ z*lFOq$n7GhinhKWws1UvZEh^|W$_6IAC(M>)RwB>lA-bY8FLm^&Zf*?3m9DkPDrwA98>`f-Z zO4Fm+XM3kZIry43BEBwhHmQ#7M_+J=3lm6s>SSWH=d$NE{c3q>ju~W{)P~*38~h zn=kyegofby9iVtbW_JE`Igb$@*--NfA4gl!!XvfOeoJb5#B?tqJ*9Xu_u329vqTVG z@h^n|pL=RauQk?u^>&?M8c~r+bTlrPYPvry0ap?$d!0O*)?S(_F*R)oQ75|ALy1_d zmI&0@jlm=zJ1{5GEznk4C-~$!WfJr`aC@`G&2F}QvBviE#c7(Uyt@t3|DL&1>wAq; zYcBzG?7LE!?M%N|w;&YJl>h`}d#|AIp#W&49f{6$DQL9GDH|MSlQup?fe45f(0G3b zYU_cBXd25L=Mp(Yit{*uyFZ)vD?Q^hEZ}cP?(iAg{0m*@R~?6M=QRsv6#b3^W1ojT z-TLVfas@O|zXFM);Zqs3X}*0PPK7+|^vluq7MW#EKv+?6F)ce;C@J_Rvi)$$ROZ2gl5mpnwsSZE z|Ib#obmHE4;r9<$TtS=U{x?*iU}}X7h+)3zxby}BxovjQ2Y}DJeY#&i?08TrlOEN+ zQaagZdr>)G>il{qukg6hRi~O_PUMr@nFP1-c+eUVo_U?%wf{}GfY&!To8JRX939q8 zp^BCwm3H{o=ljIfR>$FU3xLk`_?s-7&n+Hzp=1I$B_zi=uCdFHz_^zr+uClaVM1Zi zX1gsq|DF2Vd}yL%phaRyKIIGH z?-xd#dvG8-)3qxJE8?WkKW2{m@9bDI&KLpqUI1QA?G3>~k5SJaZf*dPpz)^FDZJPc z1F2{PJZu_bQxs^ZDW0ebAFIjT!Pq#?lw%A3+jL9G=tG3z66FFK5%_Ll?xS6^9SePZ zF%TU*%5q`p4n?Gej!3nEq6M4?^BvX^8D^9V;g9X_F)J2)#Z+w~N=~++2f~dMR)q{6 z72KF$ca(WFSj1Y2GU7PUEKqQ$QA%th2J$|FQ|*wxF^uo`VSJb2)6&ZF&S3HKsFM56 z)tax*H~IzM!-J$IA$xw$M4%E+c{n7($;FvKfbSDm{*p)@g~t7AW}`NbCT7~GTb-%r zPA(lTOnNN(<;7=xC9%gfZt(F69+nYV{+9anp<(6#f<>zVO9mtvPQh1KY)ZgsN91&+ zC0E~XPSR=yN5<=Sw=DR%^*SLwJ~{La6I)^k^m}0ho9_K2Kcy#M$q_|R-P-aIKxnbs zoDAOv6MT^Uoz(Q)gZH?qVKq=a&E@nIQ{OD|QB{{>{v6TN%ye$eEcW8U(K$Ed99}>= ziI?f)>HTECBnN_X6Vy7CQ@TbFuYanOYY_}eqzE_ihrs!pl)`F@5;5<0Km1`h@v#hl z)u)OLEJ88@w@3!xbIASg*gahN0C42b%3nFQVH2rLiLb85^bJ4Pf$WK~G@dp5*durQ z#6jPSRaQ2e0~+&r?k%TKGm$Pi>TxL-O4M(lH>>Zphz)e|4=FplTFL%%r%^!Iq7H?# zaiklCGXV}pF%w$?Lm8`M>y=L{1wpMU{R#x_Py%gEAix_gTy-tlu}5Pp>P#Y)(`knl z%oIkWS~b!4J=^mjt6?S;y;L#5GB-df41tJDOFy8k`S#A}TNm(o13uQbPM(fXEYcjf z9lIs=!;TH;Snn0y4hWY^GwU^r%niK#Etlk{gjv~a2@o*%Gppg5QH%|*HL2KS66sDz&BsS3Qo0aa#z|Nex3VdhUm{u662YihTB zWtQ`k&dI)sStRQTG9MtIPtT|8@{i9D&f40E+m&2`%<+l@hl@OfBP?HJH=2hElg*>vw>T?+TU?1s+9Qr1WdG z839Dv+S=3xMB4<vv5%w7YH2^d^AUP*?KOm8#wvU%O=|a&Jg3u@tG{bnF6ibDu-IglP{94|| zx0VsXNem4epBi7YIwAb)7KGE>ELuIYJJ=Q+aKFYLRbDN(Xwqs<-i)MGUs4%{RC*wj z(&roG&&I@LCK(!p;eY>{7O%QKE{{Sy-N_qdS7v~KgOLP!bZ}s0a-&W$Mz~RVaHW2I zO9@MhD8To8Q8SOb|2HnR2my-demBS-7+hyxkoMwAc>5&0oiCpt#a5VVm^pD;no}a5;qLztL7hyi$md)iO(cZ3f`jXS;k4^$ z2b;L2yusGk@12L8|Fe6YXuW}BWRVQc)g?!(5%p90--TiHN-xxH0Zu;xeY1 zoo7{auhgbgqW_D%$zKQoC$F*9_u2tBN_O$@T;Ujob%fm9r>vtpk862Lz9E~`6PKkp z4ootEaNkV17nMfLX=13ZP|PB?W6?L)jOHB?(zK>8=bP%8o77mbphY z;Omw;do5!=Jyn%tKhXcSc%CQ`ZJ-nAP{Ab5ZCf4@Ir`_eOU4U6S%tZ{k8poHllBY< zkfsWXh_{|^4bwteCW$$?V>f1v_d>vB)6V6#-y&Nv^h;;^Ayox(?+0R|Ss`5>@X$M^ zMrDo=*8SZ#AoDliCQr0;>?xaH!#!oWI`eDTWczLusMA!&SvCu8J2sh`arHi1todP? z9xI!n!u@o)ptYw*ZM`LpNWd^7^w?PO3pRTG)P2Q}{pt<z7`-IT%Lk_<5kBdMD zXNTkWu+;<>Q)(qskC<+wRemC=)h>T4);@ma$~>8G1AtA6izu|WidS*uo=2j1Kk{rg zU;MNSNE)i)Oc;ffV~{ZwNBl|B&(9qb6OqDO=Wvh;%|=^e%lcAzINnS_fDDUlR9AC9 zl_B_p(r;=9@{iC`uAtAuEPuqyX72H6!`kd5N2k@V-li_at&UUOZJ;N;{PnFwA+y*w z7f_E%SK1c}{XVW;?^fO~YGIB9g>pTPs=~|!tFkqCjj_ecolnTz|tvB8kI|a8@ts!-(cAT9xJ>=%z z3Sz{>$1uzVbRE_Pwu?J-zEU;mno~UM01(~nS^!-bROi}>3Dp(mc#|wI>*PcqeCML% znTFZzHojJf6F#%`O+b8LcatRK&u;=$oE?us1V7UOr=a(j+kM`K;4h6Y%k4Qc&u)hJOyS+dXwOcOHE9#%_oJys5|Qm zubBJK$18Rp40QBwDW2qvG5{dHTW7lC2FyUulu&k&)>}o)Ev$enPDM%Yz?@`XugfQ< zF(fioK74Q+*NSos@i8im>9)Fq(`HFE?3rH*$N-@lQ;uMZr?p?Jm&~Q@gM#XIB-qxT zPY4@%w(8B;dQmZb{HKt~)m@J$ZfXECp+n8)g2o!gKEoq^{lV_Fs@xP`y!%6-O7*$K zF$LSmPvz=YdlHWTJ`~??WqN$r7r`n=#n61z78&KkNe+auv7ATIWzVfQcRXqMt=`^e zhH$)OTIHDx7PkW!dR9C$0#Z*OLYwF@l$g=4PoxssX5Is6OG9pQ35R-r1D z^#S_yMgOL@^LKl76Rs@f`=6lEX-vMbzL?w665q*EbbRnT7XT*}B*hI13w}Ao&ngAC*y)sR1N1ZPIi5Iz- z#@dj|N5n_x{fB9JST%k8v?$|iD-I#Di~d!_JW1?C5J)96HreUclN9W-fyB&)xbQ7i zJn^irW(W2|gDu)P_!@ygu8io1yfRrW#;6~Z2MZ2W&Y4a>oBZ4$yYRlXwaBS*|I;)SxdC`g6KrwvFs1&?m<(mAATMi~&RMZVqiSO)Q#b=+;pTk@xi8 zmkN}ic8##iCc%l1AQ2wo)uJLj<5kQ}QKrd^#1cLVb``TYMTMJ07p-)oWGufYwh6|^ z-&8oj{8H2SlYiFld6uy{W6-YamDL>xHd#=I2Pbk?Vve@mjab0h9!VSX||QbETmeb9zLs-p>W+ zfppY+IgP0YKvK6y*Iw9*JJi0L!M-;2ivc3$-|aP`>JB}umR9AU%MRTU5NZXtWs$T$ zEBcRmeSF-M;!lfdCo8xBIBn~>s6ReaIJYLI@+ZxRfF&iIPwTHgu4C&S!^19U-5@{r zhBCXBz@YxvIuu!Y;SN(w7{88+DDk9yoU!4=`vHGK9_2F&Q$Q(R-Sw4lMv3>mMf+2+ zaOcIiOuWfHrx|Rl>?2R?S~EbRnRUX1HvOA zVSy{~?`$wHcz@Br_TF*5He$t$3 zda%<*^P_e<-ccoUXCTf9XP?Dt;w><*=R~GsS2>aEWXap5Vad?JD&D~uBoz8+{7dGg znc%7X&QuE^kvhju6&I~o5D$EgU(M#3gGb@JX*sGF5b#Xs{bKZeC%bCdr#6+WrkFZ) zvp67_2U2E!1?ZB#wo|ehQTIox0w-@v`E45|(08zQoU`QPbUAODY#~HCZso9LLd8JnA#mO^ZS{njkk{cKaWu`PB7wX;XX#K3ND2dSqa8Ktr%A*F~%gz1Bj zxdVK^Pj}qkp`po>;EzpSPi6WZPiOPHUTwY}O=j^P58v)|=p;(L?txa6 zE6rYe1!Jd|f|`+VVO@wci)9t<;K=0)v`B3?OZ>InVUIo&cmHMjMQNbTVBa*K_sEr# zc8JGe{u251?9)+xy`Q+PxyBVqIPcLd(U9AJhW- z!#KGjbRSW#+2OZbtASXPU2ctTIiFTcJO9;!pZD1WTFB|i7mkd;{fDc~$KPia)ECzo z;aV>2OcVd)70apg%Ua~;XaMNvzbqjOK(e;Ejy5r!_{?^$q@I5OC^Zt=Nl1ttcEK;J zyg-6gr;=Z1?^pfnWDsyue0v*KnI79eDX*uRZHP_U*+t0jW@##kOqarA#@pZWK~|1o zY_2=`^+aO`IhLgw3y+eIE}j8Nt3?v>I;u%VL$Tfx(h}oU-_zHkscVi zCOs6z4=#1fsUcG@QAf=2(*F)kK$Z_r=2;(Kj`Y~gNtVrF)VHe)3sX70VvKKRYrm0w zzcVJju^7}Z*(Rf;LXQI<{%s1Ffc^T#WURWy{WN{y+0Hjh(-7YoipWgDl$JK_@ zjAot8!_O@}ksN!v4M{wsHbJx)CpS`!2anB|c(f{9PF1DcBdZK*$zaK#->7UDaC(il zUEL9K_JJ&%*kTf%5WCRP_|W$moxY|sPc%O1>A1nvZiKyJ-F+rX0~`0}vc@g1-_=HX ztT#X*H8d2&W!|lkTQ;A%JC_Yizy~vsI#y*86MvZ%vWzBc95B*Xh4E{Zrb{Zj?87R@ zCGsbw3cVM#0wGRQE|_H=yv6~_B;UMjF)1BMQ4Aj;98T+v!WJvF;;p23`nx_z9%?_U zQl3v!o9j`TH_HNR8sXsIkIoFGYtYZb2w_25>uJC!D$C6~r(4N%_xopJuKq$H95>zd(HjY8*QU=g^T*l3sJXpC6H+Sm7fr;h|9Dq2c^DG-3Ku4n zf38Xt4g)(T%4GI^jzrMLAwVqS-`myz@!%YOe0?swzMk6N2a9#T?KqW6J9IIrZN zCDoEHEyF4!wDveig5Q1`aK_^jARQk43U|9zp6V0Geb+0)XFA$bm%>mvSj!RFJ_~= zBpaHwvl%kg*XOY|iYltJnYv|ryy}^l5`K)$2$Na1*u+fNS1C)PlPhJbuYhc`YA+cd zD~^qbOffCi^&@$mMbB5`9}ty(*#b?FZ1Ru6biN_;MEoB5_jwrf_xYyBld);bbkmn> zdRV=3kfjj3gz1PT5FrdH-fBF*^JGl>^!a@evR0JxC4y{Lt|w&*)@N7W5RbUUERo=|xgIVUNU0($5i_3!!y z{JoM62qzpyohg?$N^*Z)fJ|C?I;9Tp8l1;uXsk6XSEJf!xx{)tz6-z`jR>p8n?*^k zb{fcNT!DX5fKah=h;?b2!UU51Mo9N?AxJa6$#q z*4)=-3LzjnoG_no@4AZomsU|itbq`l9@W@o4U~twQWC#Y@?OeHJDF~wvs_wJ$ZjNmWp1qHi>+3{UHr^ zc0Vk8HqbsKAsu>-G#8Axz%Ja)A4Uk2Idm)0rJx~DJe~V6eHM~2Wk z>BrA*OkOoAMGa>(uTg_ z$e@D179zGSAYA_~M^n<}o$Fqz2s%$7kv!FrWWF_jxQRCCAC-094S&5uoX6ZI*^qX- zNWA#xy6R=ZaS#)!3J(uvJskdrd`=roAjG@JP|{Tw_sb_F@eedRz`q7bza7S>)-c*$ z#dv*iK|edQfq<*l-brC4TOBu5?DXxPDNv5JiyR&?t(|KwY(uhdX8#|{svoK zUn?32@;w!ZhKJ*;U!(qL`01jxk~I0gih|8zt1$s!+x{n8kEE#$+V^#IYW)= zrpIPAy4f$AA|tL23wga#?q>t~Wjepr3Uxi57N{(%@d0}52V<>~oD``rsG_?%aKIaMC|ri`NEyuGm? zYVv@mOYy^Afd5{s0?(TNlP;o4keg+=jH5mY4O^@=jN8MJK5~|@A@$$%$KJ^KWThFT z)_TRiuNp%s%v!I$6-i3oco0V#Dnh}w|K#;lGI|AIl&Lq?f8Oo%hIW%_t+D%sqGlZP z-7*0Nf)&U91wHG>ZH8~Rf;HO4yyJ7OP(Q3Zy&OySXoyDm<7t-p7QU4NY!ieZb5^u2)yFoP>O2C87#3b%Cj1{}se!FbK&SNoJjow$10> zPnW(j6$@V|>?mT_Nc)aPy6&D5q-YCTGxXBEosf1ZXaVHrI44CX;1vrk$@=tVQ08;&?lw{_Uz z@XZLx04#VEkfV$)zQg0pseNx_m5IrJM^9CmlI(-IGhZ=j@Xa9cjPo-DSPE23L=O2N zP(C8BOHD^&0a45Z)xKLaj3ZK(*~LM?A|hS{@=ksALqT?fd_{~LwUPAS02>&0kLb2$ z5C=x3H!U+!eHHNe=FQZx=dzZTUY?mzDXW&b z`M*h7rWJ8W7-vfDuUc=Y>*2LqWdS33%(o!_b&BntEYY-Jh{Rff$Kh=(QP`Jbw#1ej zd%B1$+d4Ztn+U7?(P_V60V%~La{m9C1qZoG?ObjdsEOrv5Syy!&^EL!o3G>xA%tjM zCA6sKXQFw5_RAGdnYBNk$3Rxs!ra_D@FOYHAl=HVvD26W5&inb zPbOL||KW;N?o!3GEeSODo}fe{!XiduRj>5-j9PK@Cca;HDQzlaN=KcjCORZ*!|Aema2WBDKV8_ba&Nveo4cW6+-2^k$0mDmmVk>Cr=MEi9 zEr|QNPhAHT5(;q1{lDQp%K#%tR)ce2+oNRjA4cHyoF!rZEl0a(R4*{S19&iXHyD(< z7ucA9?v#serOB6*Wy-+lnc79ljA=VaOw=;+Rlo|oqs6nx4C24Y|KGyAR10E5ss@eB zEkw@nnxViA96*E+CKup^_1C0(qgTwY=#;ojD{~g^Luqzisn2L~?M+at^xR+W85p2b zjyNCc`C9ti0Y@H{G-XmrabB8S{=dEgd&>pMBXYcqfY(5UH=X$&EFmR@N4uy666Q&U z_)MQ6xV97!)|Jly8Qm+prtK?*A7f=%lZHwaLXc#fSm6cC3Rtr*DOnRv>?6a}`9fyk zSwtvU)PT_d97kZ?m~c+7R5&h3_{%j5)wrN}twhM3wVFihgX~Re<(M2GAlZQ#U?a~Y zW0^O7DBZMS>y+2qrQKC(cvGyAg}-p5-IjI!w^BGDS6i~SN@Dzi-G>5uUP}UPq4um5 zZNT12#R&8a$~fh{;G2k2fQB33mgKa7^M+kW;s^IuczTk#8n{u7TPm`B-7&-z>redKDSJmI-b|H1 zILw$WZa4C&q?9|8&%Hw6u~b-gTNIqK(O%EMSAwtO>7+2>^Zynp6y&O>Q*U-sCcP+* zo!TlC)M8BCl;uej(@zf>OGt+b(Umibre1AHn}4YFj0Gxtv%ZWU90cJcH84Kvp;!$1 zCpjMn=y#84Mq_{Ar9@-bDSu1Dr-Vwh(+Lex{Wp77;}{}61!!f9Q=LACY4r|Ue5czz zJ)lE`c~h?^O7#h`=aH#;RmHX>WX*ycyW$tAZ$*|pCsmo z<*PfYC{4|lLHTcGDA1r9#}+&1nN04P$20r|O7jgQw{=)FOrjb>PSJTME`{m^ zsOn_-DsEnp5Mm74vSYXT+x?M)n*{QDTJOx!U9s;>8*G52&(oijIwpgnA89P7tN%i? zmM$1B^?68me=&0KCwh(-nExmyVV4#o?=RA_>vTyCvvss-d_tcg;-G;uR{Gn>Q`x5I z#bEUI8%O|^9yV}Ad=B{MSP}?g?+(|E=@Lss@GgXxrLEJh9i1q(Fyf5WQy1sxtpiw zK5+^;fYT`@pM7uTbZ^11ID9(`tuSEosCqcx&_Y8;EB1YMFmwt9#1is(%^<%Xf7W|O_*rn7myu;Ao5fR35K$1jnVDCn^RcX?jsv**Py zcTjpln`0N_F*0#Y2-h@vK)B!zEQ zfp3x>T)Pv>wY9Yx`;8XpYMlltA)|V2A+KwHBec$R!_PN4JsPCEB0OP=3wTs8D(Q@2 zRIjs*e|EzO;6;YgCo>D)Bq97M#MdLbJ70?&N%C*Go;6v*ZeVZRT5H?-tBHz z7rgixLcOr{swrS3IccHiIydP&(-1dDK~#`1aU?a2e$a!@605`y)?x9AZBGXWW_Hy& zI~gC73yBTX2B7tji*L?TiH}w=4-c3tU))k~burF@|IDLO>wy}?!|}@6MC=@2!=I+6 z;G%Q#xp({`NsZ%u=k1J(fwj6ToHd2VyWlk7@qP?@VdSm2yO>rNbD)wYr3NA85}r14 zk-ihlj5STlIGrv_T6Fmq^Un75;_pvyDy-A+oNme86v}ur$Vw)3AXSQ`4bSFkaDJ0f zIr@1nA&R4>slia$4K6MejNWOq%@m+JGnc|CgWPE#AZXCC5~AuNKF?Q>DAD)BgM(h6 zA%NMZd;r#z9*b)T$*IOLX^)=(J!XQQN^P|l6c+w(-?sT(SLbTCT~yRyq|eFbF>BzmN~D4x#m1<+7gM=>COxdnb^>sP!cI9Kn2wj18~ zF8c0}tatcb-E449G3`P=31ZumEeasCGmOU@CH@C6whHwFXo@~e2oEfsgOK4<{8#@4 z9G5I6$d{>?)&!m-D$JO%g%qNX=O&319}g zo)5kP0X%-|>V;6N54xdoY4UFNkc6AGJueDB0nzfTLVKBO;N_+dr2Je>PRfIo0D&eW zDH$1b8y)(2jhIhn|1FeLJAijHA zRwW9|pohV6U)^#548ls}%H|)>1Nax=hzVf8?YST9{;&uBzba%voACoo576$Bh*RAw zS4O*@x2cn;6cdTII-nJ^nZ#0iMDRDFd@qEC=;m9vlGbXwxXSfp0ahO+h14Q1{O*G- z{fwLFyaXfIz0X78mJYKUyj{;UZ&WB<%WO4Zu8f*=0?(pJ5 z%|aJ5-8T z{eJ0tA*>1Cqk!#Y0|{b-ITqYCO}NUsJ}pW|4qlMl0Y;Bzt1{9$tKc=FEaYt(joBlP z>g`4+)i$C{m#?N)>v~&*BOMzGjyyrhL!uTTZY}^aHot0XH{JNRtR8E-Cs&VQf)j^>=AetrqkvoYI}?ECLJ&g}Ba56xB3HrJxI+YGAS0*Q zjO=<#dI16sa$+s72H9T|k`RlX5LO_N$prc!7vok&VbMCzH~+r?01f~0p&V@YL3qA$ zkT@r&eg{0KSR>%+iotQam#ZUZytI`Cj8_l5IXB^2E0K`hj#ZY?))Rd0-SSTc-pffr6sXPH9-Ljj5R^U zTF3(*4Bw6M1WbNB<^wD9MCaq`fXH*;37la7FNU4qJ9=FJOmS1-rwx27q^A@N0hEBi z57TgH-{H6z++zWrd<4h&@VQ9858#)M4r~_%>mmJ1>Kf;L zaC#rYpCHZvWHD9yty;a>{hyitl^L&3(+RWTLr3U#7z#a6;0fvLqYCz!HADftb02ru zyal#0c_ejjnxyUAC9zQ7c-L*W%a}{XaL;ZDgl+iE7!L6u-uvq~pMo%hML|BiRz?b) z@zym=3fQlFm<@x8H)5vp& z4#EDl)|hIu>yS#2GvtpzNJq7XySgW6>Z(NEojlAMXrCm{+jIA{H;XC_P{!1q;ihbrURiU%SAT8_Jen+f;8N+L@hNs zRSM}+Ia;iH5N{rTqg*g*lq4o5a{n#?(FoKK{C1!nV9G$IACltks9N3F|CB z0~nGs9MHeNH#$0++bwua|9F6V_TwGT!Qp=pu3#J;;_Z~Fj}(pK-vVQ!c#XCa^+5r2 zhMX=#|L9i~!0j(r!2bf=;27+2qm(Nh^|5-P*H(pf{`biz^4cq}=mlC#3GdPc5|3^u zXD?Rqe{zy^$X7%ASA7bA+H*%Loh`{*sFOmZ&%gol!!cvIW+;98 z^dV>H#WoDTAPv*teNdsrGWeM|{Se`P#?#?B77%@;XjK39%WG7x(^jTFD1gq90O;Qb z$uc0WKZ2hkq{RfIO^26?DX~hskY9fFm23XO1v2~XcNi~GJ-SO|c(^Khr|nI0lek~6 z*}tk$fE^kIW}1N;4+?VYK;yunL$YPVMt=t5uDeb~jvOKV`}LRjxHwN|*a77|aDkj5 zM<-w7-OWWf7vUx*pu;=S_l-jv7R)<~|#60-bIQFscIKi{|?)Yx~aX7cG5mLYcVK(vx>+ zNg}VnJR0pW=%rGlMp%YwCMd+jtrWX6(j|Yt3WI9>FT3J$8GP<}(yJE@W5;?UBO^WS zVGq=q+4ggVObq(b!pj=uzYyX7;J?AKpQyO5tR&(In*Mm4Ti1vwV1cm_zg%0j`lkRo zRstjymir}a5Jx2e!{Nu__%d9JEmzWF+QWqTFgFY-yS`b!-nDGS3i5Hl(l8m?y@8uBkX)i{~##NWTHc1?D+KRWOb90xj;aD_#9S5{`GD|OEv*REZ=xFtk3fBUU0 z#wgxaA&W!7Eba;l3yJTo?)ahApY6z(Y&2$pxIypHX7m64sA5Fu& zkq0eJFj})AzsJ1c4e)V`L>{I4HQV2^`lDTpBWtwKnJrWYaV0}ne*3L;onN&#)p zrKk4DQovH6E-3)OFkDUuC7K87_;x7+1g>qOf8qlHqE2R+&T71h%8{-d_7WKdEciHV99aCy?Db7#R`L228z zt$_0*_(`kQtqFjhme?|Zy@rajxc>wIWS%G^r#koIa2rC3opM9s2KZ#0(^8Dzfp*Nx zaFe996`=7`#~zfYfCa|#6tU-)0`)`zr}I`2K*y~{luc1^jewX0e;)i0oDYJJ15nb& zW_d@j1a5e|-0}hSW-i#26l@c59m+c-IoUZbfSZfcmH<-WH43p56c*@O-mzmxb$#!< zf&$4saKKsXLypW9Jq7`+IX1`>JUN9K6cjA2TD634B_U^rDCo6r-3puNn|oWgZY}LG z)k*M!RSGYG7RLlVRlVkHpl3f+A^0I)9iDXDEPAx zcQU8I=_K%S(>p-mI9Un4S)jZ_zlPqXb6-iOj;rh1$iPMXcGs3iUrWcAzcjO!f-7dVO#iC1Tchk z=J0F}JdIv|2TzksIS8{6?n4`0AJ{& v$6@6T&obPysUH)wo)!cvC!4)eLlpRbXR-u@$xR9500000NkvXXu0mjfEP=)E literal 0 HcmV?d00001 From b0ca20ab853ae4c2765193b71b4d8c85bfa89a3a Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:40:59 +0100 Subject: [PATCH 31/37] :memo: Add documentation tutorials source code --- docs/docs_src/BasicClassification/__init__.py | 0 .../BasicClassification/tutorial001.py | 145 ++++++++++++++++++ docs/docs_src/Configurations/__init__.py | 0 docs/docs_src/Configurations/tutorial001.py | 112 ++++++++++++++ docs/docs_src/GettingStarted/__init__.py | 0 docs/docs_src/GettingStarted/tutorial001.py | 14 ++ docs/docs_src/GettingStarted/tutorial002.py | 17 ++ docs/docs_src/MelusineDetectors/__init__.py | 0 .../docs_src/MelusineDetectors/tutorial001.py | 39 +++++ .../docs_src/MelusineDetectors/tutorial002.py | 79 ++++++++++ .../docs_src/MelusineDetectors/tutorial003.py | 99 ++++++++++++ .../docs_src/MelusineDetectors/tutorial004.py | 39 +++++ docs/docs_src/MelusinePipeline/__init__.py | 0 docs/docs_src/MelusinePipeline/tutorial001.py | 6 + docs/docs_src/MelusineRegex/__init__.py | 0 docs/docs_src/MelusineRegex/tutorial001.py | 6 + .../docs_src/MelusineTransformers/__init__.py | 0 .../MelusineTransformers/tutorial001.py | 6 + docs/docs_src/Models/__init__.py | 0 docs/docs_src/Models/tutorial001.py | 6 + docs/docs_src/__init__.py | 0 21 files changed, 568 insertions(+) create mode 100644 docs/docs_src/BasicClassification/__init__.py create mode 100644 docs/docs_src/BasicClassification/tutorial001.py create mode 100644 docs/docs_src/Configurations/__init__.py create mode 100644 docs/docs_src/Configurations/tutorial001.py create mode 100644 docs/docs_src/GettingStarted/__init__.py create mode 100644 docs/docs_src/GettingStarted/tutorial001.py create mode 100644 docs/docs_src/GettingStarted/tutorial002.py create mode 100644 docs/docs_src/MelusineDetectors/__init__.py create mode 100644 docs/docs_src/MelusineDetectors/tutorial001.py create mode 100644 docs/docs_src/MelusineDetectors/tutorial002.py create mode 100644 docs/docs_src/MelusineDetectors/tutorial003.py create mode 100644 docs/docs_src/MelusineDetectors/tutorial004.py create mode 100644 docs/docs_src/MelusinePipeline/__init__.py create mode 100644 docs/docs_src/MelusinePipeline/tutorial001.py create mode 100644 docs/docs_src/MelusineRegex/__init__.py create mode 100644 docs/docs_src/MelusineRegex/tutorial001.py create mode 100644 docs/docs_src/MelusineTransformers/__init__.py create mode 100644 docs/docs_src/MelusineTransformers/tutorial001.py create mode 100644 docs/docs_src/Models/__init__.py create mode 100644 docs/docs_src/Models/tutorial001.py create mode 100644 docs/docs_src/__init__.py diff --git a/docs/docs_src/BasicClassification/__init__.py b/docs/docs_src/BasicClassification/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs_src/BasicClassification/tutorial001.py b/docs/docs_src/BasicClassification/tutorial001.py new file mode 100644 index 0000000..e9aca83 --- /dev/null +++ b/docs/docs_src/BasicClassification/tutorial001.py @@ -0,0 +1,145 @@ +from typing import List + +# --8<-- [start:create_dataset] +import pandas as pd +from transformers import pipeline + +from melusine.base import MelusineDetector + + +def create_dataset(): + df = pd.DataFrame( + [ + { + "header": "Dossier 123456", + "body": "Merci beaucoup pour votre gentillesse et votre écoute !", + }, + { + "header": "Réclamation (Dossier 987654)", + "body": ("Bonjour, je ne suis pas satisfait de cette situation, " "répondez-moi rapidement svp!"), + }, + ] + ) + + return df + + +# --8<-- [end:create_dataset] + + +def transformers_standalone(): + # --8<-- [start:transformers] + model_name_or_path = "cmarkea/distilcamembert-base-nli" + + sentences = [ + "Quelle belle journée aujourd'hui", + "La marée est haute", + "Ce film est une catastrophe, je suis en colère", + ] + + classifier = pipeline(task="zero-shot-classification", model=model_name_or_path, tokenizer=model_name_or_path) + + result = classifier( + sequences=sentences, candidate_labels=", ".join(["positif", "négatif"]), hypothesis_template="Ce texte est {}." + ) + # --8<-- [end:transformers] + + return result + + +# --8<-- [start:detector_init] +class DissatisfactionDetector(MelusineDetector): + """ + Detect if the text expresses dissatisfaction. + """ + + # Dataframe column names + OUTPUT_RESULT_COLUMN = "dissatisfaction_result" + TMP_DETECTION_INPUT_COLUMN = "detection_input" + TMP_DETECTION_OUTPUT_COLUMN = "detection_output" + + # Model inference parameters + POSITIVE_LABEL = "positif" + NEGATIVE_LABEL = "négatif" + HYPOTHESIS_TEMPLATE = "Ce texte est {}." + + def __init__(self, model_name_or_path: str, text_columns: List[str], threshold: float): + self.text_columns = text_columns + self.threshold = threshold + self.classifier = pipeline( + task="zero-shot-classification", model=model_name_or_path, tokenizer=model_name_or_path + ) + + super().__init__(input_columns=text_columns, output_columns=[self.OUTPUT_RESULT_COLUMN], name="dissatisfaction") + # --8<-- [end:detector_init] + + # --8<-- [start:pre_detect] + def pre_detect(self, row, debug_mode=False): + # Assemble the text columns into a single text + effective_text = "" + for col in self.text_columns: + effective_text += "\n" + row[col] + row[self.TMP_DETECTION_INPUT_COLUMN] = effective_text + + # Store the effective detection text in the debug data + if debug_mode: + row[self.debug_dict_col] = {"detection_input": row[self.TMP_DETECTION_INPUT_COLUMN]} + + return row + # --8<-- [end:pre_detect] + + # --8<-- [start:detect] + def detect(self, row, debug_mode=False): + # Run the classifier on the text + pipeline_result = self.classifier( + sequences=row[self.TMP_DETECTION_INPUT_COLUMN], + candidate_labels=", ".join([self.POSITIVE_LABEL, self.NEGATIVE_LABEL]), + hypothesis_template=self.HYPOTHESIS_TEMPLATE, + ) + # Format classification result + result_dict = dict(zip(pipeline_result["labels"], pipeline_result["scores"])) + row[self.TMP_DETECTION_OUTPUT_COLUMN] = result_dict + + # Store ML results in the debug data + if debug_mode: + row[self.debug_dict_col].update(result_dict) + + return row + # --8<-- [end:detect] + + # --8<-- [start:post_detect] + def post_detect(self, row, debug_mode=False): + # Compare classification score to the detection threshold + if row[self.TMP_DETECTION_OUTPUT_COLUMN][self.NEGATIVE_LABEL] > self.threshold: + row[self.OUTPUT_RESULT_COLUMN] = True + else: + row[self.OUTPUT_RESULT_COLUMN] = False + + return row + + # --8<-- [end:post_detect] + + +def run(): + # --8<-- [start:run] + df = create_dataset() + + detector = DissatisfactionDetector( + model_name_or_path="cmarkea/distilcamembert-base-nli", + text_columns=["header", "body"], + threshold=0.7, + ) + + df = detector.transform(df) + # --8<-- [end:run] + + # Debug mode + df = create_dataset() + df.debug = True + _ = detector.transform(df) + + return df + + +if __name__ == "__main__": # pragma no cover + run() diff --git a/docs/docs_src/Configurations/__init__.py b/docs/docs_src/Configurations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs_src/Configurations/tutorial001.py b/docs/docs_src/Configurations/tutorial001.py new file mode 100644 index 0000000..4231efd --- /dev/null +++ b/docs/docs_src/Configurations/tutorial001.py @@ -0,0 +1,112 @@ +""" +Default : Use default configuration to explore functionalities +Specify a config_dict +Specify a config path +Specify a MELUSINE_CONFIG_DIR environment variable +""" + + +def from_config(): + # --8<-- [start:from_config] + from melusine.pipeline import MelusinePipeline + + pipeline = MelusinePipeline.from_config(config_key="demo_pipeline") + + +# --8<-- [end:from_config] + + +def from_config_dict(): + # --8<-- [start:from_config_dict] + from melusine.processors import Normalizer + + normalizer_conf = { + "input_columns": ["text"], + "output_columns": ["normalized_text"], + "form": "NFKD", + "lowercase": False, + } + + normalizer = Normalizer.from_config(config_dict=normalizer_conf) + + +# --8<-- [end:from_config_dict] + + +def print_config(): + # --8<-- [start:print_config] + from melusine import config + + print(config["demo_pipeline"]) + + +# --8<-- [end:print_config] + + +def modify_conf_with_dict(): + # --8<-- [start:modify_conf_with_dict] + from melusine import config + + # Get a dict of the existing conf + new_conf = config.dict() + + # Add/Modify a config key + new_conf["my_conf_key"] = "my_conf_value" + + # Reset Melusine configurations + config.reset(new_conf) + + +# --8<-- [end:modify_conf_with_dict] + + +def modify_conf_with_path(): # pragma: no cover + """Tested in conf/test_config""" + # --8<-- [start:modify_conf_with_path] + from melusine import config + + # Specify the path to a conf folder + conf_path = "path/to/conf/folder" + + # Reset Melusine configurations + config.reset(config_path=conf_path) + + # >> Using config_path : path/to/conf/folder + + +# --8<-- [end:modify_conf_with_path] + + +def modify_conf_with_env(): # pragma: no cover + """Tested in conf/test_config""" + # --8<-- [start:modify_conf_with_env] + import os + + from melusine import config + + # Specify the MELUSINE_CONFIG_DIR environment variable + os.environ["MELUSINE_CONFIG_DIR"] = "path/to/conf/folder" + + # Reset Melusine configurations + config.reset() + + # >> Using config_path from env variable MELUSINE_CONFIG_DIR + # >> Using config_path : path/to/conf/folder + + +# --8<-- [end:modify_conf_with_env] + + +def export_config(): # pragma: no cover + """Tested in conf/test_config""" + # --8<-- [start:export_config] + from melusine import config + + # Specify the path a folder (created if it doesn't exist) + conf_path = "path/to/conf/folder" + + # Export default configurations to the folder + files_created = config.export_default_config(path=conf_path) + + +# --8<-- [end:export_config] diff --git a/docs/docs_src/GettingStarted/__init__.py b/docs/docs_src/GettingStarted/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs_src/GettingStarted/tutorial001.py b/docs/docs_src/GettingStarted/tutorial001.py new file mode 100644 index 0000000..5122633 --- /dev/null +++ b/docs/docs_src/GettingStarted/tutorial001.py @@ -0,0 +1,14 @@ +def run(): + # --8<-- [start:simple_pipeline] + from melusine.data import load_email_data + from melusine.pipeline import MelusinePipeline + + # Load an email dataset + df = load_email_data() + + # Load a pipeline + pipeline = MelusinePipeline.from_config("demo_pipeline") # (1)! + + # Run the pipeline + df = pipeline.transform(df) + # --8<-- [end:simple_pipeline] diff --git a/docs/docs_src/GettingStarted/tutorial002.py b/docs/docs_src/GettingStarted/tutorial002.py new file mode 100644 index 0000000..08ba297 --- /dev/null +++ b/docs/docs_src/GettingStarted/tutorial002.py @@ -0,0 +1,17 @@ +def run(): + # --8<-- [start:debug_pipeline] + from melusine.data import load_email_data + from melusine.pipeline import MelusinePipeline + + # Load an email dataset + df = load_email_data() + + # Activate debug mode + df.debug = True + + # Load the default pipeline + pipeline = MelusinePipeline.from_config("demo_pipeline") + + # Run the pipeline + df = pipeline.transform(df) + # --8<-- [end:debug_pipeline] diff --git a/docs/docs_src/MelusineDetectors/__init__.py b/docs/docs_src/MelusineDetectors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs_src/MelusineDetectors/tutorial001.py b/docs/docs_src/MelusineDetectors/tutorial001.py new file mode 100644 index 0000000..0d714a5 --- /dev/null +++ b/docs/docs_src/MelusineDetectors/tutorial001.py @@ -0,0 +1,39 @@ +from typing import Callable, List + +import pandas as pd + +from melusine.base import BaseMelusineDetector + + +# --8<-- [start:detector] +class MyCustomDetector(BaseMelusineDetector): + @property + def transform_methods(self) -> List[Callable]: + return [self.prepare, self.run] + + def prepare(self, row, debug_mode=False): + return row + + def run(self, row, debug_mode=False): + row[self.output_columns[0]] = "12345" + return row + + +# --8<-- [end:detector] + + +def run(): + # --8<-- [start:run] + df = pd.DataFrame( + [ + {"input_col": "test1"}, + {"input_col": "test2"}, + ] + ) + + detector = MyCustomDetector(input_columns=["input_col"], output_columns=["output_col"], name="custom") + + df = detector.transform(df) + # --8<-- [end:run] + + return df diff --git a/docs/docs_src/MelusineDetectors/tutorial002.py b/docs/docs_src/MelusineDetectors/tutorial002.py new file mode 100644 index 0000000..b645060 --- /dev/null +++ b/docs/docs_src/MelusineDetectors/tutorial002.py @@ -0,0 +1,79 @@ +import pandas as pd + +from melusine.base import MelusineDetector + + +# --8<-- [start:detector] +# --8<-- [start:detector_init] +class MyVirusDetector(MelusineDetector): + """ + Detect if the text expresses dissatisfaction. + """ + + # Dataframe column names + OUTPUT_RESULT_COLUMN = "virus_result" + TMP_DETECTION_INPUT_COLUMN = "detection_input" + TMP_POSITIVE_REGEX_MATCH = "positive_regex_match" + TMP_NEGATIVE_REGEX_MATCH = "negative_regex_match" + + def __init__(self, body_column: str, header_column: str): + self.body_column = body_column + self.header_column = header_column + + super().__init__( + input_columns=[self.body_column, self.header_column], + output_columns=[self.OUTPUT_RESULT_COLUMN], + name="virus", + ) + # --8<-- [end:detector_init] + + # --8<-- [start:pre_detect] + def pre_detect(self, df, debug_mode=False): + # Assemble the text columns into a single column + df[self.TMP_DETECTION_INPUT_COLUMN] = df[self.header_column] + "\n" + df[self.body_column] + + return df + # --8<-- [end:pre_detect] + + # --8<-- [start:detect] + def detect(self, df, debug_mode=False): + text_column = df[self.TMP_DETECTION_INPUT_COLUMN] + positive_regex = r"(virus)" + negative_regex = r"(corona[ _]virus)" + + # Pandas str.extract method on columns + df[self.TMP_POSITIVE_REGEX_MATCH] = text_column.str.extract(positive_regex).apply(pd.notna) + df[self.TMP_NEGATIVE_REGEX_MATCH] = text_column.str.extract(negative_regex).apply(pd.notna) + + return df + # --8<-- [end:detect] + + # --8<-- [start:post_detect] + def post_detect(self, df, debug_mode=False): + # Boolean operation on pandas column + df[self.OUTPUT_RESULT_COLUMN] = df[self.TMP_POSITIVE_REGEX_MATCH] & ~df[self.TMP_NEGATIVE_REGEX_MATCH] + return df + + # --8<-- [end:post_detect] + + +# --8<-- [end:detector] + + +def run(): + # --8<-- [start:run] + df = pd.DataFrame( + [ + {"body": "This is a dangerous virus", "header": "test"}, + {"body": "test", "header": "test"}, + {"body": "test", "header": "viruses are dangerous"}, + {"body": "corona virus is annoying", "header": "test"}, + ] + ) + + detector = MyVirusDetector(body_column="body", header_column="header") + + df = detector.transform(df) + # --8<-- [end:run] + + return df diff --git a/docs/docs_src/MelusineDetectors/tutorial003.py b/docs/docs_src/MelusineDetectors/tutorial003.py new file mode 100644 index 0000000..67d7b75 --- /dev/null +++ b/docs/docs_src/MelusineDetectors/tutorial003.py @@ -0,0 +1,99 @@ +import re + +import pandas as pd + +from melusine.base import MelusineDetector + + +# --8<-- [start:detector] +# --8<-- [start:detector_init] +class MyVirusDetector(MelusineDetector): + OUTPUT_RESULT_COLUMN = "virus_result" + TMP_DETECTION_INPUT_COLUMN = "detection_input" + TMP_POSITIVE_REGEX_MATCH = "positive_regex_match" + TMP_NEGATIVE_REGEX_MATCH = "negative_regex_match" + + def __init__(self, body_column: str, header_column: str): + self.body_column = body_column + self.header_column = header_column + + super().__init__( + input_columns=[self.body_column, self.header_column], + output_columns=[self.OUTPUT_RESULT_COLUMN], + name="virus", + ) + # --8<-- [end:detector_init] + + # --8<-- [start:pre_detect] + def pre_detect(self, row, debug_mode=False): + effective_text = row[self.header_column] + "\n" + row[self.body_column] + row[self.TMP_DETECTION_INPUT_COLUMN] = effective_text + + if debug_mode: + row[self.debug_dict_col] = {"detection_input": row[self.TMP_DETECTION_INPUT_COLUMN]} + + return row + # --8<-- [end:pre_detect] + + # --8<-- [start:detect] + def detect(self, row, debug_mode=False): + text = row[self.TMP_DETECTION_INPUT_COLUMN] + positive_regex = r"virus" + negative_regex = r"corona[ _]virus" + + positive_match = re.search(positive_regex, text) + negative_match = re.search(negative_regex, text) + + row[self.TMP_POSITIVE_REGEX_MATCH] = bool(positive_match) + row[self.TMP_NEGATIVE_REGEX_MATCH] = bool(negative_match) + + if debug_mode: + positive_match_text = ( + positive_match.string[positive_match.start() : positive_match.end()] if positive_match else None + ) + negative_match_text = ( + positive_match.string[negative_match.start() : negative_match.end()] if negative_match else None + ) + row[self.debug_dict_col].update( + { + "positive_match_data": {"result": bool(positive_match), "match_text": positive_match_text}, + "negative_match_data": {"result": bool(negative_match), "match_text": negative_match_text}, + } + ) + + return row + # --8<-- [end:detect] + + # --8<-- [start:post_detect] + def post_detect(self, row, debug_mode=False): + if row[self.TMP_POSITIVE_REGEX_MATCH] and not row[self.TMP_NEGATIVE_REGEX_MATCH]: + row[self.OUTPUT_RESULT_COLUMN] = True + else: + row[self.OUTPUT_RESULT_COLUMN] = False + + return row + + # --8<-- [end:post_detect] + + +# --8<-- [end:detector] + + +def run(): + # --8<-- [start:run] + detector = MyVirusDetector(body_column="body", header_column="header") + + df = pd.DataFrame( + [ + {"body": "This is a dangerous virus", "header": "test"}, + {"body": "test", "header": "test"}, + {"body": "test", "header": "viruses are dangerous"}, + {"body": "corona virus is annoying", "header": "test"}, + ] + ) + df.debug = True + + df = detector.transform(df) + # --8<-- [end:run] + + return df diff --git a/docs/docs_src/MelusineDetectors/tutorial004.py b/docs/docs_src/MelusineDetectors/tutorial004.py new file mode 100644 index 0000000..0d714a5 --- /dev/null +++ b/docs/docs_src/MelusineDetectors/tutorial004.py @@ -0,0 +1,39 @@ +from typing import Callable, List + +import pandas as pd + +from melusine.base import BaseMelusineDetector + + +# --8<-- [start:detector] +class MyCustomDetector(BaseMelusineDetector): + @property + def transform_methods(self) -> List[Callable]: + return [self.prepare, self.run] + + def prepare(self, row, debug_mode=False): + return row + + def run(self, row, debug_mode=False): + row[self.output_columns[0]] = "12345" + return row + + +# --8<-- [end:detector] + + +def run(): + # --8<-- [start:run] + df = pd.DataFrame( + [ + {"input_col": "test1"}, + {"input_col": "test2"}, + ] + ) + + detector = MyCustomDetector(input_columns=["input_col"], output_columns=["output_col"], name="custom") + + df = detector.transform(df) + # --8<-- [end:run] + + return df diff --git a/docs/docs_src/MelusinePipeline/__init__.py b/docs/docs_src/MelusinePipeline/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs_src/MelusinePipeline/tutorial001.py b/docs/docs_src/MelusinePipeline/tutorial001.py new file mode 100644 index 0000000..c053d1b --- /dev/null +++ b/docs/docs_src/MelusinePipeline/tutorial001.py @@ -0,0 +1,6 @@ +""" +In construction ... +Tutorial code +""" + +assert "coucou" diff --git a/docs/docs_src/MelusineRegex/__init__.py b/docs/docs_src/MelusineRegex/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs_src/MelusineRegex/tutorial001.py b/docs/docs_src/MelusineRegex/tutorial001.py new file mode 100644 index 0000000..c053d1b --- /dev/null +++ b/docs/docs_src/MelusineRegex/tutorial001.py @@ -0,0 +1,6 @@ +""" +In construction ... +Tutorial code +""" + +assert "coucou" diff --git a/docs/docs_src/MelusineTransformers/__init__.py b/docs/docs_src/MelusineTransformers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs_src/MelusineTransformers/tutorial001.py b/docs/docs_src/MelusineTransformers/tutorial001.py new file mode 100644 index 0000000..c053d1b --- /dev/null +++ b/docs/docs_src/MelusineTransformers/tutorial001.py @@ -0,0 +1,6 @@ +""" +In construction ... +Tutorial code +""" + +assert "coucou" diff --git a/docs/docs_src/Models/__init__.py b/docs/docs_src/Models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docs/docs_src/Models/tutorial001.py b/docs/docs_src/Models/tutorial001.py new file mode 100644 index 0000000..c053d1b --- /dev/null +++ b/docs/docs_src/Models/tutorial001.py @@ -0,0 +1,6 @@ +""" +In construction ... +Tutorial code +""" + +assert "coucou" diff --git a/docs/docs_src/__init__.py b/docs/docs_src/__init__.py new file mode 100644 index 0000000..e69de29 From 0205723bd7fe0f7244a1e5b8707f25d3c4e0b75a Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:41:59 +0100 Subject: [PATCH 32/37] :memo: Add documentation for advanced tutorials --- docs/advanced/ContentTagger.md | 1 + docs/advanced/CustomDetector.md | 5 +++++ docs/advanced/ExchangeConnector.md | 1 + docs/advanced/PreTrainedModelsHF.md | 1 + 4 files changed, 8 insertions(+) create mode 100644 docs/advanced/ContentTagger.md create mode 100644 docs/advanced/CustomDetector.md create mode 100644 docs/advanced/ExchangeConnector.md create mode 100644 docs/advanced/PreTrainedModelsHF.md diff --git a/docs/advanced/ContentTagger.md b/docs/advanced/ContentTagger.md new file mode 100644 index 0000000..405e844 --- /dev/null +++ b/docs/advanced/ContentTagger.md @@ -0,0 +1 @@ +# Use custom message tags diff --git a/docs/advanced/CustomDetector.md b/docs/advanced/CustomDetector.md new file mode 100644 index 0000000..bef8a83 --- /dev/null +++ b/docs/advanced/CustomDetector.md @@ -0,0 +1,5 @@ +# Use a custom MelusineDetector template + +## Specify abstract methods + +## Row transformations vs dataframe transformations diff --git a/docs/advanced/ExchangeConnector.md b/docs/advanced/ExchangeConnector.md new file mode 100644 index 0000000..d59b5db --- /dev/null +++ b/docs/advanced/ExchangeConnector.md @@ -0,0 +1 @@ +# Connect melusine to a Microsoft Exchange Mailbox diff --git a/docs/advanced/PreTrainedModelsHF.md b/docs/advanced/PreTrainedModelsHF.md new file mode 100644 index 0000000..d3a9a18 --- /dev/null +++ b/docs/advanced/PreTrainedModelsHF.md @@ -0,0 +1 @@ +# Use pre-trained models from HuggingFace From c76272fdbfe8bd7b96fb046a469f0f9450f3d273 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:42:58 +0100 Subject: [PATCH 33/37] :memo: Add documentation about contributions to the melusine package --- docs/contribute/how_to_contribute.md | 1 + docs/contribute/maif.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 docs/contribute/how_to_contribute.md create mode 100644 docs/contribute/maif.md diff --git a/docs/contribute/how_to_contribute.md b/docs/contribute/how_to_contribute.md new file mode 100644 index 0000000..63654cf --- /dev/null +++ b/docs/contribute/how_to_contribute.md @@ -0,0 +1 @@ +# How to contribute to Melusine diff --git a/docs/contribute/maif.md b/docs/contribute/maif.md new file mode 100644 index 0000000..e5251db --- /dev/null +++ b/docs/contribute/maif.md @@ -0,0 +1 @@ +# MAIF From 048f77dc2e14a5c3d647f1d56753493bfc0a5b10 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:43:30 +0100 Subject: [PATCH 34/37] :memo: Add documentation melusine history --- docs/history/history.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 docs/history/history.md diff --git a/docs/history/history.md b/docs/history/history.md new file mode 100644 index 0000000..68c1c38 --- /dev/null +++ b/docs/history/history.md @@ -0,0 +1,24 @@ +# Project history + +Melusine originated at [MAIF](https://www.maif.fr/) in 2019. +MAIF is a mutual insurance company funded in 1934 and based in Niort (France). +MAIF is a "Société à mission" meaning that its activities are + +## Motivation +MAIF receives a large number of emails everyday and needs solutions to process them +efficiently while maximizing customer satisfaction. Typical applications of automated email +processing include: + +- Email routing: Making sure that emails reach the most suited service to be processed +- Email prioritization: Treating the most urgent emails first +- Email summarization: Making it easy and fast for MAIF employees to grasp the email intention + +## Open sourcing +bla bla + +## Refactoring +From 2018 to 2023 + + + + From b9b7496485fbbc6bbb1a058694bb56c2a978efa6 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:44:11 +0100 Subject: [PATCH 35/37] :memo: Add documentation about the melusine code philosophy --- docs/philosophy/philosophy.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/philosophy/philosophy.md diff --git a/docs/philosophy/philosophy.md b/docs/philosophy/philosophy.md new file mode 100644 index 0000000..5b39a08 --- /dev/null +++ b/docs/philosophy/philosophy.md @@ -0,0 +1,5 @@ +# Code philosophy + +## What is a code philosophy and why do I need it ? + +## Design patterns From 4c1378fc041d76b1ee5d0deb4d28202bc0c05fa7 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:44:49 +0100 Subject: [PATCH 36/37] :memo: Add tutorials in the documentation --- docs/tutorials/00_GettingStarted.md | 134 ++++++++++++++++++ docs/tutorials/01_MelusinePipeline.md | 10 ++ docs/tutorials/02_MelusineTransformers.md | 1 + docs/tutorials/03_MelusineRegex.md | 1 + docs/tutorials/04_UsingModels.md | 1 + docs/tutorials/05a_MelusineDetectors.md | 120 ++++++++++++++++ .../05b_MelusineDetectorsAdvanced.md | 100 +++++++++++++ docs/tutorials/06_Configurations.md | 84 +++++++++++ docs/tutorials/07_BasicClassification.md | 122 ++++++++++++++++ 9 files changed, 573 insertions(+) create mode 100644 docs/tutorials/00_GettingStarted.md create mode 100644 docs/tutorials/01_MelusinePipeline.md create mode 100644 docs/tutorials/02_MelusineTransformers.md create mode 100644 docs/tutorials/03_MelusineRegex.md create mode 100644 docs/tutorials/04_UsingModels.md create mode 100644 docs/tutorials/05a_MelusineDetectors.md create mode 100644 docs/tutorials/05b_MelusineDetectorsAdvanced.md create mode 100644 docs/tutorials/06_Configurations.md create mode 100644 docs/tutorials/07_BasicClassification.md diff --git a/docs/tutorials/00_GettingStarted.md b/docs/tutorials/00_GettingStarted.md new file mode 100644 index 0000000..888515b --- /dev/null +++ b/docs/tutorials/00_GettingStarted.md @@ -0,0 +1,134 @@ +# Getting started with Melusine + +Let's run **emergency detection** with melusine : + +* Load a fake email dataset +* Load a demonstration pipeline +* Run the pipeline + * Apply email cleaning transformations + * Apply emergency detection + +## Input data + +Email datasets typically contain information about: + +- Email sender +- Email recipients +- Email subject/header +- Email body +- Attachments data + +The present tutorial only makes use of the **body** and **header** data. + +| | body | header | +|:---|:---------------------------------|:------------| +| 0 | This is an ëmèrgénçy | Help | +| 1 | How is life ? | Hey ! | +| 2 | Urgent update about Mr. Annoying | Latest news | +| 3 | Please call me now | URGENT | + +## Code + +A typical code for a melusine-based application looks like this : + +```Python +--8<-- +docs_src/GettingStarted/tutorial001.py:simple_pipeline +--8<-- +``` + +1. This tutorial uses one of the default pipeline configuration `demo_pipeline`. Melusine users will typically define their own pipeline configuration. + See more in the [Configurations tutorial](06_Configurations.md){target=_blank} + +## Output data + +The pipeline created extra columns in the dataset. +Some columns are temporary variables required by detectors (ex: `normalized_body`) +and some are detection results with direct business value (ex: `emergency_result`). + +| | body | header | normalized_body | emergency_result | +|:---|:---------------------------------|:------------|:---------------------------------|:-------------------| +| 0 | This is an ëmèrgénçy | Help | This is an emergency | True | +| 1 | How is life ? | Hey ! | How is life ? | False | +| 2 | Urgent update about Mr. Annoying | Latest news | Urgent update about Mr. Annoying | False | +| 3 | Please call me now | URGENT | Please call me now | True | + +## Pipeline steps + +Illustration of the pipeline used in the present tutorial : + +``` mermaid +--- +title: Demonstration pipeline +--- +flowchart LR + Input[[Email]] --> A(Cleaner) + A(Cleaner) --> C(Normalizer) + C --> F(Emergency\nDetector) + F --> Output[[Qualified Email]] +``` + +* `Cleaner` : Cleaning transformations such as uniformization of line breaks (`\r\n` -> `\n`) +* `Normalizer` : Text normalisation to delete/replace non utf8 characters (`éöà` -> `eoa`) +* `EmergencyDetector` : Detection of urgent emails + + +!!! info + This demonstration pipeline is kept minimal but typical pipelines include more complex preprocessing and a variety of detectors. + For example, pipelines may contain: + + - Email Segmentation : Split email conversation into unitary messages + - ContentTagging : Associate tags (SIGNATURE, FOOTER, BODY) to parts of messages + - Appointment detection : For exemple, detect "construction work will take place on 01/01/2024" as an appointment email. + - More on preprocessing in the [MelusineTransformers tutorial](02_MelusineTransformers.md){target=_blank} + - More on detectors in the [MelusineDetector tutorial](05a_MelusineDetectors.md){target=_blank} + + +## Debug mode + +End users typically want to know what lead melusine to a specific detection result. The debug mode generates additional explainability info. + +```Python +--8<-- +docs_src/GettingStarted/tutorial002.py:debug_pipeline +--8<-- +``` + + +A new column `debug_emergency` is created. + +| | ... | emergency_result | debug_emergency | +|:---|:----|:-------------------|:------------------| +| 0 | ... | True | [details_below] | +| 1 | ... | False | [details_below] | +| 2 | ... | False | [details_below] | +| 3 | ... | True | [details_below] | + +Inspecting the debug data gives a lot of info: + +- `text` : Effective text considered for detection. +- `EmergencyRegex` : melusine used an `EmergencyRegex` object to run detection. +- `match_result` : The `EmergencyRegex` did not match the text +- `positive_match_data` : The `EmergencyRegex` matched **positively** the text pattern "Urgent" (Required condition) +- `negative_match_data` : The `EmergencyRegex` matched **negatively** the text pattern "Mr. Annoying" (Forbidden condition) +- `BLACKLIST` : Detection groups can be defined to easily link a matching pattern to the corresponding regex. DEFAULT is used if no detection group is specified. + + +```Python +# print(df.iloc[2]["debug_emergency"]) +{ + 'text': 'Latest news\nUrgent update about Mr. Annoying'}, + 'EmergencyRegex': { + 'match_result': False, + 'negative_match_data': { + 'BLACKLIST': [ + {'match_text': 'Mr. Annoying', 'start': 32, 'stop': 44} + ]}, + 'neutral_match_data': {}, + 'positive_match_data': { + 'DEFAULT': [ + {'match_text': 'Urgent', 'start': 12, 'stop': 18} + ] + } + } +``` diff --git a/docs/tutorials/01_MelusinePipeline.md b/docs/tutorials/01_MelusinePipeline.md new file mode 100644 index 0000000..2752716 --- /dev/null +++ b/docs/tutorials/01_MelusinePipeline.md @@ -0,0 +1,10 @@ +# MelusinePipeline + +The `MelusinePipeline` class is at the core of melusine. It inherits from the `sklearn.Pipeline` class and adds extra functionalities such as : + +- Instantiation from configurations +- Input/output coherence check +- Debug mode + +## Code + diff --git a/docs/tutorials/02_MelusineTransformers.md b/docs/tutorials/02_MelusineTransformers.md new file mode 100644 index 0000000..e8d0a18 --- /dev/null +++ b/docs/tutorials/02_MelusineTransformers.md @@ -0,0 +1 @@ +# MelusineTransformers diff --git a/docs/tutorials/03_MelusineRegex.md b/docs/tutorials/03_MelusineRegex.md new file mode 100644 index 0000000..debc560 --- /dev/null +++ b/docs/tutorials/03_MelusineRegex.md @@ -0,0 +1 @@ +# MelusineRegex diff --git a/docs/tutorials/04_UsingModels.md b/docs/tutorials/04_UsingModels.md new file mode 100644 index 0000000..31296db --- /dev/null +++ b/docs/tutorials/04_UsingModels.md @@ -0,0 +1 @@ +# Using AI models diff --git a/docs/tutorials/05a_MelusineDetectors.md b/docs/tutorials/05a_MelusineDetectors.md new file mode 100644 index 0000000..e5202b4 --- /dev/null +++ b/docs/tutorials/05a_MelusineDetectors.md @@ -0,0 +1,120 @@ +# Melusine Detectors + +The `MelusineDetector` component aims at standardizing how detection +is performed in a `MelusinePipeline`. + +!!! tip + Project running over several years (such as email automation) + may accumulate technical debt over time. Standardizing code practices + can limit the technical debt and ease the onboarding of new developers. + +The `MelusineDetector` class splits detection into three steps: + +- `pre_detect`: Select/combine the inputs needed for detection. +Ex: Select the text parts tagged as `BODY` and combine them with the text +in the email header. +- `detect`: Use regular expressions, ML models or heuristics to run detection +on the input text. +- `post_detect`: Run detection rules such as thresholding or combine results from multiple models. + +The method `transform` is defined by the BaseClass `MelusineDetector` and will call +the pre_detect/detect/post_detect methods in turn (Template pattern). + +```Python +# Instantiate Detector +detector = MyDetector() + +# Run pre_detect, detect and post_detect on input data +data_with_detection = detector.transform(data) +``` + +Here is the full code of a MelusineDetector to detect emails related to viruses. +The next sections break down the different parts of the code. + +```Python +--8<-- +docs_src/MelusineDetectors/tutorial001.py:detector +--8<-- +``` + +The detector is run on a simple dataframe: +```Python +--8<-- +docs_src/MelusineDetectors/tutorial001.py:run +--8<-- +``` + +The output is a dataframe with a new `virus_result` column. + +| | body | header | virus_result | +|---:|:--------------------------|:----------------------|:---------------| +| 0 | This is a dangerous virus | test | True | +| 1 | test | test | False | +| 2 | test | viruses are dangerous | True | +| 3 | corona virus is annoying | test | False | + +!!! tip + Columns that are not declared in the `output_columns` are dropped automatically. + + +## Detector init +In the init method, you should call the superclass init and provide: + +- A name for the detector +- Inputs columns +- Output columns + +```Python +--8<-- +docs_src/MelusineDetectors/tutorial001.py:detector_init +--8<-- +``` + +!!! tip + If the init method of the super class is enough (parameters `name`, `input_columns` and `output_columns`) + you may skip the init method entirely when defining your `MelusineDetector`. + + +## Detector pre_detect +The `pre_detect` method simply combines the header text and the body text +(separated by a line break). +```Python +--8<-- +docs_src/MelusineDetectors/tutorial001.py:pre_detect +--8<-- +``` + +## Detector detect +The `detect` applies two regexes on the selected text: +- A positive regex to catch mentions to viruses +- A negative regex to avoid false positive detections +```Python +--8<-- +docs_src/MelusineDetectors/tutorial001.py:detect +--8<-- +``` + +## Detector post_detect +The `post_detect` combines the regex detection result to determine the final result. +```Python +--8<-- +docs_src/MelusineDetectors/tutorial001.py:post_detect +--8<-- +``` + +## Are MelusineDetectors mandatory for melusine? +No. + +You can use any scikit-learn compatible component in your `MelusinePipeline`. +However, we recommend using the `MelusineDetector` (and `MelusineTransformer`) +classes to benefit from: + +- Code standardization +- Input columns validation +- Dataframe backend variabilization + Today dict and pandas backend are supported but more backends may be added (e.g. polars) +- Debug mode +- Multiprocessing + +Check-out the [next tutorial](05a_MelusineDetectors.md){target=_blank} +to discover advanced features of the `MelusineDetector` class. \ No newline at end of file diff --git a/docs/tutorials/05b_MelusineDetectorsAdvanced.md b/docs/tutorials/05b_MelusineDetectorsAdvanced.md new file mode 100644 index 0000000..1f208a2 --- /dev/null +++ b/docs/tutorials/05b_MelusineDetectorsAdvanced.md @@ -0,0 +1,100 @@ +# Advanced Melusine Detectors + +This tutorial presents the advanced features of the `MelusineDetector` class: + +- Debug mode +- Row wise methods vs DataFrame wise methods +- Custom transform methods + +## Debug mode +`MelusineDetector` are designed to be easily debugged. For that purpose, the +pre-detect/detect/post-detect methods all have a `debug_mode` argument. +The debug mode is activated by setting the debug attribute of a dataframe to True. + +```Python hl_lines="3" +import pandas as pd +df = pd.DataFrame({"bla": [1, 2, 3]}) +df.debug = True +``` + +!!! warning + Debug mode activation is backend dependent. With a DictBackend, tou should use `my_dict["debug"] = True` + +When debug mode is activated, a column named "DETECTOR_NAME_debug" containing an empty +dictionary is automatically created. +Populating this debug dict with debug info is then left to the user's responsibility. + +Exemple of a detector with debug data +```Python hl_lines="21 22 37-53" +--8<-- +docs_src/MelusineDetectors/tutorial003.py:detector +--8<-- +``` + +In the end, an extra column is created containing debug data: + +| | virus_result | debug_virus | +|---:|:---------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0 | True | {'detection_input': '...', 'positive_match_data': {'result': True, 'match_text': 'virus'}, 'negative_match_data': {'result': False, 'match_text': None}} | +| 1 | False | {'detection_input': '...', 'positive_match_data': {'result': False, 'match_text': None}, 'negative_match_data': {'result': False, 'match_text': None}} | +| 2 | True | {'detection_input': '...', 'positive_match_data': {'result': True, 'match_text': 'virus'}, 'negative_match_data': {'result': False, 'match_text': None}} | +| 3 | False | {'detection_input': '...', 'positive_match_data': {'result': True, 'match_text': 'virus'}, 'negative_match_data': {'result': True, 'match_text': 'corona virus'}} |e | {'detection_input': 'test\ncorona virus is annoying', 'positive_match_data': {'result': True, 'match_text': 'virus'}, 'negative_match_data': {'result': True, 'match_text': 'corona virus'}} | + + +## Row methods vs dataframe methods +There are two ways to use the pre-detect/detect/post-detect methods: + +- Row wise: The method works on a single row of a DataFrame. +In that case, a map-like method is used to apply it on an entire dataframe +(typically pandas.DataFrame.apply is used with the PandasBackend) +- Dataframe wise: The method works directly on the entire DataFrame. + +!!! tip + Using row wise methods make your code backend independent. You may + switch from a `PandasBackend` to a `DictBackend` at any time. + The `PandasBackend` also supports multiprocessing for row wise methods. + +To use row wise methods, you just need to name the first parameter of "row". +Otherwise, dataframe wise transformations are used. + +Exemple of a Detector with dataframe wise method (works with a PandasBackend only). +```Python hl_lines="22 28 39" +--8<-- +docs_src/MelusineDetectors/tutorial002.py:detector +--8<-- +``` + +## Custom transform methods +If you are not happy with the `pre_detect`/`detect`/`post_detect` transform methods, you: + +- Use custom template methods +- Use regular pipeline steps (not inheriting from the `MelusineDetector` class) + +In this exemple, the `prepare`/`run` custom transform methods are used +instead of the default `pre_detect`/`detect`/`post_detect`. + +```Python +--8<-- +docs_src/MelusineDetectors/tutorial004.py:detector +--8<-- +``` + +To configure custom transform methods you need to: + +- inherit from the melusine.base.BaseMelusineDetector class +- define the `transform_methods` property + +The `transform` method will now call `prepare` and `run`. + +```Python +--8<-- +docs_src/MelusineDetectors/tutorial004.py:run +--8<-- +``` + +We can check that the `run` method was indeed called. + +| | input_col | output_col | +|---:|:------------|-------------:| +| 0 | test1 | 12345 | +| 1 | test2 | 12345 | diff --git a/docs/tutorials/06_Configurations.md b/docs/tutorials/06_Configurations.md new file mode 100644 index 0000000..3b63499 --- /dev/null +++ b/docs/tutorials/06_Configurations.md @@ -0,0 +1,84 @@ +# Configurations + +Melusine components can be instantiated using parameters defined in configurations. +The `from_config` method accepts a `config_dict` argument +```Python +--8<-- +docs_src/Configurations/tutorial001.py:from_config_dict +--8<-- +``` + +or a `config_key` argument. +```Python +--8<-- +docs_src/Configurations/tutorial001.py:from_config +--8<-- +``` +When `demo_pipeline` is given as argument, parameters are read from the `melusine.config` object at key `demo_pipeline`. + +## Access configurations + +The melusine configurations can be accessed with the `config` object. +```Python +--8<-- +docs_src/Configurations/tutorial001.py:print_config +--8<-- +``` + +The configuration of the `demo_pipeline` can then be easily inspected. + +```Python +{ + 'steps': [ + {'class_name': 'Cleaner', 'config_key': 'body_cleaner', 'module': 'melusine.processors'}, + {'class_name': 'Cleaner', 'config_key': 'header_cleaner', 'module': 'melusine.processors'}, + {'class_name': 'Segmenter', 'config_key': 'segmenter', 'module': 'melusine.processors'}, + {'class_name': 'ContentTagger', 'config_key': 'content_tagger', 'module': 'melusine.processors'}, + {'class_name': 'TextExtractor', 'config_key': 'text_extractor', 'module': 'melusine.processors'}, + {'class_name': 'Normalizer', 'config_key': 'demo_normalizer', 'module': 'melusine.processors'}, + {'class_name': 'EmergencyDetector', 'config_key': 'emergency_detector', 'module': 'melusine.detectors'} + ] +} +``` + +## Modify configurations +The simplest way to modify configurations is to create a new directory directly. +```Python +--8<-- +docs_src/Configurations/tutorial001.py:modify_conf_with_dict +--8<-- +``` + +To deliver code in a production environment, using configuration files should be preferred to +modifying the configurations on the fly. +Melusine lets you specify the path to a folder containing *yaml* files and loads them (the `OmegaConf` package is used behind the scene). +```Python +--8<-- +docs_src/Configurations/tutorial001.py:modify_conf_with_path +--8<-- +``` + +When the `MELUSINE_CONFIG_DIR` environment variable is set, Melusine loads directly the configurations files located at +the path specified by the environment variable. +```Python +--8<-- +docs_src/Configurations/tutorial001.py:modify_conf_with_env +--8<-- +``` + +!!! tip + If the `MELUSINE_CONFIG_DIR` is set before melusine is imported (e.g., before starting the program), you don't need to call `config.reset()`. + +## Export configurations + +Creating your configuration folder from scratch would be cumbersome. +It is advised to export the default configurations and then modify just the files you need. + +```Python +--8<-- +docs_src/Configurations/tutorial001.py:export_config +--8<-- +``` + +!!! tip + The `export_default_config` returns a list of path to all the files created. diff --git a/docs/tutorials/07_BasicClassification.md b/docs/tutorials/07_BasicClassification.md new file mode 100644 index 0000000..857c1f2 --- /dev/null +++ b/docs/tutorials/07_BasicClassification.md @@ -0,0 +1,122 @@ +# Zero Shot Classification + +Machine Learning is commonly used to classify data into pre-defined categories. + +``` mermaid +--- +title: Email classification +--- +flowchart LR + Input[[Email]] --> X(((Classifier))) + X --> A(Car) + X --> B(Boat) + X --> C(Housing) + X --> D(Health) +``` + +Typically, to reach high classification performance, +models need to be trained on context specific labeled data. +Zero-shot classification is a type of classification that +uses a pre-trained model and does not require further training on context specific data. + +## Tutorial intro +In this tutorial we want to detect insatisfaction in an email dataset. +Let's create a basic dataset: +```Python +--8<-- +docs_src/BasicClassification/tutorial001.py:create_dataset +--8<-- +``` + +| | header | body | +|---:|:-----------------------------|:-----------------------------------------------------------------------------------| +| 0 | Dossier 123456 | Merci beaucoup pour votre gentillesse et votre écoute ! | +| 1 | Réclamation (Dossier 987654) | Bonjour, je ne suis pas satisfait de cette situation, répondez-moi rapidement svp! | + + +## Classify with Zero-Shot-Classification + +The `transformers` library makes it really simple to use pre-trained models for zero shot classification. + +```Python +--8<-- +docs_src/BasicClassification/tutorial001.py:transformers +--8<-- +``` + +The classifier returns a score for the "positif" and "négatif" label for each input text: + +```Json +[ + { + 'sequence': "Quelle belle journée aujourd'hui", + 'labels': ['positif', 'négatif'], + 'scores': [0.95, 0.05] + }, + { + 'sequence': 'La marée est haute', + 'labels': ['positif', 'négatif'], + 'scores': [0.76, 0.24] + }, + {'sequence': 'Ce film est une catastrophe, je suis en colère', + 'labels': ['négatif', 'positif'], + 'scores': [0.97, 0.03] + } +] +``` + + +## Implement a Dissatisfaction detector + +A full email processing pipeline could contain multiple models. +Melusine uses the MelusineDetector template class to standardise how models are integrated into a pipeline. + +```Python +--8<-- +docs_src/BasicClassification/tutorial001.py:detector_init +--8<-- +``` + +The `pre_detect` method assembles the text that we want to use for classification. + +```Python +--8<-- +docs_src/BasicClassification/tutorial001.py:pre_detect +--8<-- +``` + +The `detect` method runs the classification model on the text. + +```Python +--8<-- +docs_src/BasicClassification/tutorial001.py:detect +--8<-- +``` + +The `post_detect` method applies a threshold on the prediction score to determine the detection result. + +```Python +--8<-- +docs_src/BasicClassification/tutorial001.py:post_detect +--8<-- +``` + +On top of that, the detector takes care of building debug data to make the result explicable. + +## Run detection + +Putting it all together, we run the detector on the input dataset. + +```Python +--8<-- +docs_src/BasicClassification/tutorial001.py:run +--8<-- +``` + +As a result, we get a new column `dissatisfaction_result` with the detection result. +We could have detection details by running the detector in debug mode. + +| | header | body | dissatisfaction_result | +|---:|:-----------------------------|:-----------------------------------------------------------------------------------|:-------------------------| +| 0 | Dossier 123456 | Merci beaucoup pour votre gentillesse et votre écoute ! | False | +| 1 | Réclamation (Dossier 987654) | Bonjour, je ne suis pas satisfait de cette situation, répondez-moi rapidement svp! | True | \ No newline at end of file From de2f0399944fbb2d93bce4f4912d5ba0d1868c93 Mon Sep 17 00:00:00 2001 From: Hugo Perrier Date: Tue, 12 Dec 2023 16:56:56 +0100 Subject: [PATCH 37/37] :white_check_mark: Refactored melusine tests --- tests/pipeline/test_pipeline.py | 37 ++------------------------- tests/pipeline/test_pipeline_basic.py | 6 ++--- 2 files changed, 5 insertions(+), 38 deletions(-) diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 92a5226..507530e 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -35,7 +35,7 @@ def test_pipeline_with_processors(): df = pd.DataFrame({"a": [dum0, dum0]}) # Fit the pipeline and transform the data - df_transformed = pipe.fit_transform(df) + df_transformed = pipe.transform(df) # Most basic test, check that the pipeline returns a pandas DataFrame assert isinstance(df_transformed, pd.DataFrame) @@ -48,39 +48,6 @@ def test_pipeline_with_processors(): assert df_transformed["c"].iloc[0] == dum2 -def test_pipeline_with_arbitrary_transformer(): - class ArbitraryTransformer: - def __init__(self, dummy_attr=dum1): - self.dummy_attr = dummy_attr - - def add_dummy_col(self, col_a_data): - return self.dummy_attr - - def fit(self, x, y=None): - return self - - def transform(self, x): - x["b"] = x["a"].apply(self.add_dummy_col) - - return x - - d1 = ArbitraryTransformer() - - # Create pipeline - pipe = MelusinePipeline(steps=[("d1", d1)], verbose=True) - - # Create data - df = pd.DataFrame({"a": [dum0, dum0]}) - - # Fit the pipeline and transform the data - df_transformed = pipe.fit_transform(df) - - # Most basic test, check that the pipeline returns a pandas DataFrame - assert isinstance(df_transformed, pd.DataFrame) - assert "a" in df_transformed.columns - assert "b" in df_transformed.columns - - def test_meta_pipeline(): d1 = DummyProcessor() d2 = DummyProcessor(output_columns=("c",), dummy_attr=dum2) @@ -95,7 +62,7 @@ def test_meta_pipeline(): df = pd.DataFrame({"a": [dum0, dum0]}) # Fit the pipeline and transform the data - df_transformed = meta_pipe.fit_transform(df) + df_transformed = meta_pipe.transform(df) # Most basic test, check that the pipeline returns a pandas DataFrame assert isinstance(df_transformed, pd.DataFrame) diff --git a/tests/pipeline/test_pipeline_basic.py b/tests/pipeline/test_pipeline_basic.py index 63646f2..5eb94ae 100644 --- a/tests/pipeline/test_pipeline_basic.py +++ b/tests/pipeline/test_pipeline_basic.py @@ -24,7 +24,7 @@ def test_pipeline_basic(dataframe_basic): pipe = MelusinePipeline(steps=[("normalizer", normalizer), ("tokenizer", tokenizer)], verbose=True) # Fit the pipeline and transform the data - df_transformed = pipe.fit_transform(df) + df_transformed = pipe.transform(df) # Most basic test, check that the pipeline returns a pandas DataFrame assert isinstance(df_transformed, pd.DataFrame) @@ -67,7 +67,7 @@ def test_pipeline_from_config(dataframe_basic): pipe = MelusinePipeline.from_config(config_key=pipeline_key, verbose=True) # Fit the pipeline and transform the data - df_transformed = pipe.fit_transform(df) + df_transformed = pipe.transform(df) # Make basic tests assert isinstance(df_transformed, pd.DataFrame) @@ -113,7 +113,7 @@ def test_pipeline_from_dict(dataframe_basic): pipe = MelusinePipeline.from_config(config_dict=conf_pipeline_basic, verbose=True) # Fit the pipeline and transform the data - df_transformed = pipe.fit_transform(df) + df_transformed = pipe.transform(df) # Make basic tests assert isinstance(df_transformed, pd.DataFrame)