From adca484088886121195260b935fa518639ef8860 Mon Sep 17 00:00:00 2001 From: Nikhil Ramchandani Date: Tue, 14 May 2024 13:28:34 -0400 Subject: [PATCH 1/4] draft compile tester --- .github/workflows/productionworkflow.yaml | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/productionworkflow.yaml diff --git a/.github/workflows/productionworkflow.yaml b/.github/workflows/productionworkflow.yaml new file mode 100644 index 0000000..c4cf74c --- /dev/null +++ b/.github/workflows/productionworkflow.yaml @@ -0,0 +1,27 @@ +name: Test Production on Pull Request + +on: + pull_request: + branches: + - main + +jobs: + test-production: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.8' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run deployment + run: | + python main.py \ No newline at end of file From b822ccd3b667491a747148044e8bc7134e8f72b2 Mon Sep 17 00:00:00 2001 From: Nikhil Ramchandani Date: Fri, 17 May 2024 01:44:16 -0400 Subject: [PATCH 2/4] updated env varibles and test prod workflow --- .github/workflows/productionworkflow.yaml | 13 +++++++--- .../ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py | 5 ++-- .../ML_Service_GKE/Mongo_Utils/mongo_funcs.py | 6 ++--- .../Mongo_Utils/production_mongo_funcs.py | 26 ++++++++++--------- .../ML_GKE/ML_Service_GKE/bootstrappers.py | 3 +-- .../ML_GKE/ML_Service_GKE/csv_funcs.py | 4 +-- .../ML_GKE/ML_Service_GKE/main.py | 3 +-- 7 files changed, 33 insertions(+), 27 deletions(-) diff --git a/.github/workflows/productionworkflow.yaml b/.github/workflows/productionworkflow.yaml index c4cf74c..8af03ee 100644 --- a/.github/workflows/productionworkflow.yaml +++ b/.github/workflows/productionworkflow.yaml @@ -1,4 +1,4 @@ -name: Test Production on Pull Request +name: Test Production on: pull_request: @@ -8,6 +8,11 @@ on: jobs: test-production: runs-on: ubuntu-latest + env: + db_name: ${{ secrets.DB_NAME }} + MONGO_URI_NAACP: ${{ secrets.MONGO_URI_NAACP }} + GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + steps: - name: Checkout code uses: actions/checkout@v2 @@ -15,13 +20,13 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.8' + python-version: '3.9' - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install -r ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/requirements.txt - name: Run deployment run: | - python main.py \ No newline at end of file + python ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py \ No newline at end of file diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py index 1710c35..ca75b62 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py @@ -1,7 +1,8 @@ import pandas as pd from tqdm import tqdm +import os + -import secret from global_state import global_instance from processingUtils import get_sentences, get_snippet, check_snippets, run_entity_recognition, run_pipeline @@ -70,7 +71,7 @@ def process_data(chunk, df, data_schema, data_packaging_scheme, nlp_ner): ] for (entities, method) in check_order: - check_text, location_geocode, existing_loc_geocode = check_snippets(secret.API_KEY, entities[1], entities[0]) + check_text, location_geocode, existing_loc_geocode = check_snippets(os.environ['API_KEY'], entities[1], entities[0]) if not check_text: discarded_articles.append(df['Tagging'][idx]) break diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py index 6706b42..2162efa 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py @@ -1,9 +1,9 @@ -import secret +import os from pymongo import MongoClient def connect_MongoDB_Prod(): try: - client = MongoClient(secret.MONGO_URI_NAACP) + client = MongoClient(os.environ['MONGO_URI_NAACP']) db = client['se_naacp_db'] return db except Exception as err: @@ -12,7 +12,7 @@ def connect_MongoDB_Prod(): def update_job_status(client, upload_id, user_id, timestamp, article_cnt, status, message): try: - db = client[secret.db_name] + db = client[os.environ['db_name']] upload_collection = db["uploads"] if (upload_collection.find_one({'uploadID': upload_id})): diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py index 9dbbeaf..dc8bd66 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py @@ -1,7 +1,7 @@ -import secret from datetime import datetime from global_state import global_instance from Mongo_Utils.mongo_funcs import connect_MongoDB_Prod +import os def convert_to_datesum(s): date_formatted = s.replace('-', '').replace(' ', '').replace(':', '') @@ -34,7 +34,7 @@ def addExistingTracts(tract_collection): def send_Discarded(client, discard_list): try: # Pack and send all articles - db_prod = client[secret.db_name] + db_prod = client[os.environ['db_name']] discarded_collection_name = "discarded" discarded_collection = db_prod[discarded_collection_name] @@ -62,7 +62,7 @@ def send_Discarded(client, discard_list): # ==== Packing Funcs ==== def send_to_production(client, df): try: - db_prod = client[secret.db_name] + db_prod = client[os.environ['db_name']] # Pack and send all articles pack_articles(db_prod, df) @@ -84,22 +84,24 @@ def pack_articles(db_prod, df): collection_list = db_prod.list_collection_names() if articles_collection_name not in collection_list: - db_prod.create_collection(articles_collection_name) - print(f"[INFO] Collection '{articles_collection_name}' created.") + db_prod.create_collection(articles_collection_name) + print(f"[INFO] Collection '{articles_collection_name}' created.") article_df = df.set_index('id') article_dict = article_df.T.to_dict('dict') for article_key in article_dict.keys(): - article = article_dict[article_key] - if ('openai_labels' not in article): - article["openai_labels"] = [] - else: - article["openai_labels"] = string_to_list(article["openai_labels"]) - article["dateSum"] = convert_to_datesum(article["pub_date"]) - article_payload.append(article) + article = article_dict[article_key] + if ('openai_labels' not in article): + article["openai_labels"] = [] + else: + article["openai_labels"] = string_to_list(article["openai_labels"]) + article["dateSum"] = convert_to_datesum(article["pub_date"]) + article_payload.append(article) articles_collection.insert_many(article_payload) + + print("[INFO] Articles Successfully inserted!") return except Exception as err: diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py index 3905ff0..e2242d2 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py @@ -1,7 +1,6 @@ import os import json import zipfile -import secret from art import * from bson import ObjectId from pymongo import MongoClient @@ -235,7 +234,7 @@ def bootstrap_MongoDB_Prod(client, defined_collection_names): if (client == None): raise Exception("No database was given!") - db_prod = client[secret.db_name] + db_prod = client[os.environ['db_name']] # Here we check for the upload collection and make it if it doesn't exist collection_list = db_prod.list_collection_names() diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py index 5defbc6..9a54d3e 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py @@ -1,4 +1,4 @@ -import secret +import os import pandas as pd from io import StringIO # Import StringIO from fastapi import UploadFile # For typing @@ -26,7 +26,7 @@ def is_duplicate_discarded(tag, discarded_collection): return discarded_collection.count_documents(queryDiscarded) > 0 def run_validation(client, df): - db_prod = client[secret.db_name] + db_prod = client[os.environ['db_name']] collection_list = db_prod.list_collection_names() if ('articles_data' in collection_list): diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py index 361036e..446315e 100644 --- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py +++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py @@ -8,7 +8,6 @@ from google.cloud import pubsub_v1 import nltk -import secret from ML_Entry import run_pipeline from global_state import global_instance from Mongo_Utils.mongo_funcs import connect_MongoDB_Prod @@ -73,7 +72,7 @@ def startup_event(): db_prod = connect_MongoDB_Prod() db_manager = global_instance.get_data("db_manager") # We then create our first MongoDB connection - db_manager.init_connection(uri=secret.MONGO_URI_NAACP) + db_manager.init_connection(uri=os.environ['MONGO_URI_NAACP']) db_manager.run_job( bootstrap_MongoDB_Prod, From c38e1e5e3dd114e033dbb8ee069f06fdd9cab9fa Mon Sep 17 00:00:00 2001 From: Nikhil Ramchandani Date: Mon, 20 May 2024 13:31:37 -0400 Subject: [PATCH 3/4] env files ignored --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0663cfb..939444f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ secret.py testing-ner.ipynb keys/ env.sh +.env # Mill combine_rss_articles.ipynb From b7ec72d8307910d2264cfccc4905f4857384e0fb Mon Sep 17 00:00:00 2001 From: Nikhil Ramchandani Date: Tue, 21 May 2024 14:57:16 -0400 Subject: [PATCH 4/4] load google cloud key --- .github/workflows/productionworkflow.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/productionworkflow.yaml b/.github/workflows/productionworkflow.yaml index 8af03ee..ae92b88 100644 --- a/.github/workflows/productionworkflow.yaml +++ b/.github/workflows/productionworkflow.yaml @@ -11,7 +11,6 @@ jobs: env: db_name: ${{ secrets.DB_NAME }} MONGO_URI_NAACP: ${{ secrets.MONGO_URI_NAACP }} - GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} steps: - name: Checkout code @@ -27,6 +26,12 @@ jobs: python -m pip install --upgrade pip pip install -r ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/requirements.txt + - name: Load Google Service File + env: + GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + run: | + echo $GOOGLE_APPLICATION_CREDENTIALS > ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/credentials.json + - name: Run deployment run: | python ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py \ No newline at end of file