From adca484088886121195260b935fa518639ef8860 Mon Sep 17 00:00:00 2001
From: Nikhil Ramchandani <ramchandaninikhil01@gmail.com>
Date: Tue, 14 May 2024 13:28:34 -0400
Subject: [PATCH 1/4] draft compile tester

---
 .github/workflows/productionworkflow.yaml | 27 +++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 .github/workflows/productionworkflow.yaml

diff --git a/.github/workflows/productionworkflow.yaml b/.github/workflows/productionworkflow.yaml
new file mode 100644
index 0000000..c4cf74c
--- /dev/null
+++ b/.github/workflows/productionworkflow.yaml
@@ -0,0 +1,27 @@
+name: Test Production on Pull Request
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  test-production:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.8'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run deployment
+        run: |
+          python main.py
\ No newline at end of file

From b822ccd3b667491a747148044e8bc7134e8f72b2 Mon Sep 17 00:00:00 2001
From: Nikhil Ramchandani <ramchandaninikhil01@gmail.com>
Date: Fri, 17 May 2024 01:44:16 -0400
Subject: [PATCH 2/4] updated env varibles and test prod workflow

---
 .github/workflows/productionworkflow.yaml     | 13 +++++++---
 .../ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py  |  5 ++--
 .../ML_Service_GKE/Mongo_Utils/mongo_funcs.py |  6 ++---
 .../Mongo_Utils/production_mongo_funcs.py     | 26 ++++++++++---------
 .../ML_GKE/ML_Service_GKE/bootstrappers.py    |  3 +--
 .../ML_GKE/ML_Service_GKE/csv_funcs.py        |  4 +--
 .../ML_GKE/ML_Service_GKE/main.py             |  3 +--
 7 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/.github/workflows/productionworkflow.yaml b/.github/workflows/productionworkflow.yaml
index c4cf74c..8af03ee 100644
--- a/.github/workflows/productionworkflow.yaml
+++ b/.github/workflows/productionworkflow.yaml
@@ -1,4 +1,4 @@
-name: Test Production on Pull Request
+name: Test Production
 
 on:
   pull_request:
@@ -8,6 +8,11 @@ on:
 jobs:
   test-production:
     runs-on: ubuntu-latest
+    env:
+      db_name: ${{ secrets.DB_NAME }}
+      MONGO_URI_NAACP: ${{ secrets.MONGO_URI_NAACP }}
+      GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
+
     steps:
       - name: Checkout code
         uses: actions/checkout@v2
@@ -15,13 +20,13 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v2
         with:
-          python-version: '3.8'
+          python-version: '3.9'
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -r requirements.txt
+          pip install -r ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/requirements.txt
 
       - name: Run deployment
         run: |
-          python main.py
\ No newline at end of file
+          python ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py
\ No newline at end of file
diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py
index 1710c35..ca75b62 100644
--- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py
+++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/ML_Pred_Funcs/ML_funcs.py
@@ -1,7 +1,8 @@
 import pandas as pd
 from tqdm import tqdm
+import os
+
 
-import secret
 from global_state import global_instance
 from processingUtils import get_sentences, get_snippet, check_snippets, run_entity_recognition, run_pipeline
 
@@ -70,7 +71,7 @@ def process_data(chunk, df, data_schema, data_packaging_scheme, nlp_ner):
                     ]
             
                     for (entities, method) in check_order:
-                        check_text, location_geocode, existing_loc_geocode = check_snippets(secret.API_KEY, entities[1], entities[0])
+                        check_text, location_geocode, existing_loc_geocode = check_snippets(os.environ['API_KEY'], entities[1], entities[0])
                         if not check_text:
                             discarded_articles.append(df['Tagging'][idx])
                             break 
diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py
index 6706b42..2162efa 100644
--- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py
+++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/mongo_funcs.py
@@ -1,9 +1,9 @@
-import secret
+import os
 from pymongo import MongoClient
 
 def connect_MongoDB_Prod():
     try:
-        client = MongoClient(secret.MONGO_URI_NAACP)
+        client = MongoClient(os.environ['MONGO_URI_NAACP'])
         db = client['se_naacp_db']
         return db
     except Exception as err:
@@ -12,7 +12,7 @@ def connect_MongoDB_Prod():
 
 def update_job_status(client, upload_id, user_id, timestamp, article_cnt, status, message):
     try:
-        db = client[secret.db_name]
+        db = client[os.environ['db_name']]
         
         upload_collection = db["uploads"]
         if (upload_collection.find_one({'uploadID': upload_id})):
diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py
index 9dbbeaf..dc8bd66 100644
--- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py
+++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/Mongo_Utils/production_mongo_funcs.py
@@ -1,7 +1,7 @@
-import secret
 from datetime import datetime
 from global_state import global_instance
 from Mongo_Utils.mongo_funcs import connect_MongoDB_Prod
+import os
 
 def convert_to_datesum(s):
 	date_formatted = s.replace('-', '').replace(' ', '').replace(':', '')
@@ -34,7 +34,7 @@ def addExistingTracts(tract_collection):
 def send_Discarded(client, discard_list):
 	try:
 		# Pack and send all articles
-		db_prod = client[secret.db_name]
+		db_prod = client[os.environ['db_name']]
 		
 		discarded_collection_name = "discarded"
 		discarded_collection = db_prod[discarded_collection_name]
@@ -62,7 +62,7 @@ def send_Discarded(client, discard_list):
 # ==== Packing Funcs ====
 def send_to_production(client, df):
 	try:
-		db_prod = client[secret.db_name]
+		db_prod = client[os.environ['db_name']]
 
 		# Pack and send all articles
 		pack_articles(db_prod, df)
@@ -84,22 +84,24 @@ def pack_articles(db_prod, df):
 		collection_list = db_prod.list_collection_names()
 
 		if articles_collection_name not in collection_list:
-		    db_prod.create_collection(articles_collection_name)
-		    print(f"[INFO] Collection '{articles_collection_name}' created.")
+			db_prod.create_collection(articles_collection_name)
+			print(f"[INFO] Collection '{articles_collection_name}' created.")
 
 		article_df = df.set_index('id')
 		article_dict = article_df.T.to_dict('dict')
 
 		for article_key in article_dict.keys():
-		    article = article_dict[article_key]
-		    if ('openai_labels' not in article):
-		        article["openai_labels"] = []
-		    else:
-		        article["openai_labels"] = string_to_list(article["openai_labels"])
-		    article["dateSum"] = convert_to_datesum(article["pub_date"])
-		    article_payload.append(article)
+			article = article_dict[article_key]
+			if ('openai_labels' not in article):
+				article["openai_labels"] = []
+			else:
+				article["openai_labels"] = string_to_list(article["openai_labels"])
+			article["dateSum"] = convert_to_datesum(article["pub_date"])
+			article_payload.append(article)
 
 		articles_collection.insert_many(article_payload)
+
+
 		print("[INFO] Articles Successfully inserted!")
 		return
 	except Exception as err:
diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py
index 3905ff0..e2242d2 100644
--- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py
+++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/bootstrappers.py
@@ -1,7 +1,6 @@
 import os
 import json
 import zipfile
-import secret
 from art import *
 from bson import ObjectId
 from pymongo import MongoClient
@@ -235,7 +234,7 @@ def bootstrap_MongoDB_Prod(client, defined_collection_names):
         if (client == None):
             raise Exception("No database was given!")
 
-        db_prod = client[secret.db_name]
+        db_prod = client[os.environ['db_name']]
             
         # Here we check for the upload collection and make it if it doesn't exist
         collection_list = db_prod.list_collection_names()
diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py
index 5defbc6..9a54d3e 100644
--- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py
+++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/csv_funcs.py
@@ -1,4 +1,4 @@
-import secret
+import os
 import pandas as pd
 from io import StringIO  # Import StringIO
 from fastapi import UploadFile # For typing
@@ -26,7 +26,7 @@ def is_duplicate_discarded(tag, discarded_collection):
 	return discarded_collection.count_documents(queryDiscarded) > 0
 
 def run_validation(client, df):
-	db_prod = client[secret.db_name]
+	db_prod = client[os.environ['db_name']]
 	collection_list = db_prod.list_collection_names()
 
 	if ('articles_data' in collection_list):
diff --git a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py
index 361036e..446315e 100644
--- a/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py
+++ b/se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py
@@ -8,7 +8,6 @@
 from google.cloud import pubsub_v1
 
 import nltk
-import secret
 from ML_Entry import run_pipeline
 from global_state import global_instance
 from Mongo_Utils.mongo_funcs import connect_MongoDB_Prod
@@ -73,7 +72,7 @@ def startup_event():
         db_prod = connect_MongoDB_Prod()
         db_manager = global_instance.get_data("db_manager")
         # We then create our first MongoDB connection
-        db_manager.init_connection(uri=secret.MONGO_URI_NAACP)
+        db_manager.init_connection(uri=os.environ['MONGO_URI_NAACP'])
 
         db_manager.run_job(
             bootstrap_MongoDB_Prod, 

From c38e1e5e3dd114e033dbb8ee069f06fdd9cab9fa Mon Sep 17 00:00:00 2001
From: Nikhil Ramchandani <ramchandaninikhil01@gmail.com>
Date: Mon, 20 May 2024 13:31:37 -0400
Subject: [PATCH 3/4] env files ignored

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 0663cfb..939444f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ secret.py
 testing-ner.ipynb
 keys/
 env.sh
+.env
 
 # Mill
 combine_rss_articles.ipynb

From b7ec72d8307910d2264cfccc4905f4857384e0fb Mon Sep 17 00:00:00 2001
From: Nikhil Ramchandani <ramchandaninikhil01@gmail.com>
Date: Tue, 21 May 2024 14:57:16 -0400
Subject: [PATCH 4/4] load google cloud key

---
 .github/workflows/productionworkflow.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/productionworkflow.yaml b/.github/workflows/productionworkflow.yaml
index 8af03ee..ae92b88 100644
--- a/.github/workflows/productionworkflow.yaml
+++ b/.github/workflows/productionworkflow.yaml
@@ -11,7 +11,6 @@ jobs:
     env:
       db_name: ${{ secrets.DB_NAME }}
       MONGO_URI_NAACP: ${{ secrets.MONGO_URI_NAACP }}
-      GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
 
     steps:
       - name: Checkout code
@@ -27,6 +26,12 @@ jobs:
           python -m pip install --upgrade pip
           pip install -r ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/requirements.txt
 
+      - name: Load Google Service File
+        env:
+          GOOGLE_APPLICATION_CREDENTIALS: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
+        run: |
+          echo $GOOGLE_APPLICATION_CREDENTIALS > ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/credentials.json
+
       - name: Run deployment
         run: |
           python ./se_ml_production/ML_backend_GKE/ML_GKE/ML_Service_GKE/main.py
\ No newline at end of file