Skip to content

Commit

Permalink
adding dotenv for credentials management
Browse files Browse the repository at this point in the history
  • Loading branch information
GokuMohandas committed Sep 15, 2023
1 parent 0133795 commit 097ffaa
Show file tree
Hide file tree
Showing 9 changed files with 1,656 additions and 1,735 deletions.
42 changes: 15 additions & 27 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,17 @@ We'll start by setting up our cluster with the environment and compute configura

</details>

### Credentials
```bash
touch .env
```
```bash
# Inside .env
GITHUB_USERNAME="CHANGE_THIS_TO_YOUR_USERNAME" # ← CHANGE THIS
```bash
source .env
```

### Git setup

Create a repository by following these instructions: [Create a new repository](https://github.com/new) → name it `Made-With-ML` → Toggle `Add a README file` (**very important** as this creates a `main` branch) → Click `Create repository` (scroll down)
Expand All @@ -109,7 +120,7 @@ Now we're ready to clone the repository that has all of our code:
```bash
git clone https://github.com/GokuMohandas/Made-With-ML.git .
git remote set-url origin https://github.com/GITHUB_USERNAME/Made-With-ML.git # <-- CHANGE THIS to your username
git remote set-url origin https://github.com/$GITHUB_USERNAME/Made-With-ML.git # <-- CHANGE THIS to your username
git checkout -b dev
```
Expand Down Expand Up @@ -317,15 +328,7 @@ python madewithml/predict.py predict \
python madewithml/serve.py --run_id $RUN_ID
```
While the application is running, we can use it via cURL, Python, etc.:
```bash
# via cURL
curl -X POST -H "Content-Type: application/json" -d '{
"title": "Transfer learning with transformers",
"description": "Using transformers for transfer learning on text classification tasks."
}' http://127.0.0.1:8000/predict
```
Once the application is running, we can use it via cURL, Python, etc.:
```python
# via Python
Expand All @@ -341,13 +344,6 @@ python madewithml/predict.py predict \
ray stop # shutdown
```
```bash
export HOLDOUT_LOC="https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/holdout.csv"
curl -X POST -H "Content-Type: application/json" -d '{
"dataset_loc": "https://raw.githubusercontent.com/GokuMohandas/Made-With-ML/main/datasets/holdout.csv"
}' http://127.0.0.1:8000/evaluate
```
</details>
<details open>
Expand All @@ -362,15 +358,7 @@ curl -X POST -H "Content-Type: application/json" -d '{
python madewithml/serve.py --run_id $RUN_ID
```

While the application is running, we can use it via cURL, Python, etc.:
```bash
# via cURL
curl -X POST -H "Content-Type: application/json" -d '{
"title": "Transfer learning with transformers",
"description": "Using transformers for transfer learning on text classification tasks."
}' http://127.0.0.1:8000/predict
```
Once the application is running, we can use it via cURL, Python, etc.:

```python
# via Python
Expand Down Expand Up @@ -399,7 +387,7 @@ export RUN_ID=$(python madewithml/predict.py get-best-run-id --experiment-name $
pytest --run-id=$RUN_ID tests/model --verbose --disable-warnings
# Coverage
python3 -m pytest --cov madewithml --cov-report html
python3 -m pytest tests/code --cov madewithml --cov-report html --disable-warnings
```

## Production
Expand Down
3 changes: 3 additions & 0 deletions madewithml/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from dotenv import load_dotenv

load_dotenv()
4 changes: 3 additions & 1 deletion madewithml/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# config.py
import logging
import os
import sys
from pathlib import Path

Expand All @@ -10,9 +11,10 @@
ROOT_DIR = Path(__file__).parent.parent.absolute()
LOGS_DIR = Path(ROOT_DIR, "logs")
LOGS_DIR.mkdir(parents=True, exist_ok=True)
EFS_DIR = Path(f"/efs/shared_storage/madewithml/{os.environ.get('GITHUB_USERNAME', '')}")

# Config MLflow
MODEL_REGISTRY = Path("/tmp/mlflow")
MODEL_REGISTRY = Path(f"{EFS_DIR}/mlflow")
Path(MODEL_REGISTRY).mkdir(parents=True, exist_ok=True)
MLFLOW_TRACKING_URI = "file://" + str(MODEL_REGISTRY.absolute())
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
Expand Down
2 changes: 1 addition & 1 deletion madewithml/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def predict(
# Load components
best_checkpoint = get_best_checkpoint(run_id=run_id)
predictor = TorchPredictor.from_checkpoint(best_checkpoint)
preprocessor = predictor.get_preprocessor()
# preprocessor = predictor.get_preprocessor()

# Predict
sample_df = pd.DataFrame([{"title": title, "description": description, "tag": "other"}])
Expand Down
3 changes: 2 additions & 1 deletion madewithml/serve.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import os
from http import HTTPStatus
from typing import Dict

Expand Down Expand Up @@ -75,5 +76,5 @@ async def _predict(self, request: Request) -> Dict:
parser.add_argument("--run_id", help="run ID to use for serving.")
parser.add_argument("--threshold", type=float, default=0.9, help="threshold for `other` class.")
args = parser.parse_args()
ray.init()
ray.init(runtime_env={"env_vars": {"GITHUB_USERNAME": os.environ["GITHUB_USERNAME"]}})
serve.run(ModelDeployment.bind(run_id=args.run_id, threshold=args.threshold))
10 changes: 4 additions & 6 deletions madewithml/train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import json
import os
from typing import Tuple

import numpy as np
Expand All @@ -23,7 +24,7 @@
from typing_extensions import Annotated

from madewithml import data, models, utils
from madewithml.config import MLFLOW_TRACKING_URI, logger
from madewithml.config import EFS_DIR, MLFLOW_TRACKING_URI, logger

# Initialize Typer CLI app
app = typer.Typer()
Expand Down Expand Up @@ -200,10 +201,7 @@ def train_model(
)

# Run config
run_config = RunConfig(
callbacks=[mlflow_callback],
checkpoint_config=checkpoint_config,
)
run_config = RunConfig(callbacks=[mlflow_callback], checkpoint_config=checkpoint_config, storage_path=EFS_DIR)

# Dataset
ds = data.load_data(dataset_loc=dataset_loc, num_samples=train_loop_config["num_samples"])
Expand Down Expand Up @@ -252,5 +250,5 @@ def train_model(
if __name__ == "__main__": # pragma: no cover, application
if ray.is_initialized():
ray.shutdown()
ray.init()
ray.init(runtime_env={"env_vars": {"GITHUB_USERNAME": os.environ["GITHUB_USERNAME"]}})
app()
10 changes: 4 additions & 6 deletions madewithml/tune.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import json
import os

import ray
import typer
Expand All @@ -19,7 +20,7 @@
from typing_extensions import Annotated

from madewithml import data, train, utils
from madewithml.config import MLFLOW_TRACKING_URI, logger
from madewithml.config import EFS_DIR, MLFLOW_TRACKING_URI, logger

# Initialize Typer CLI app
app = typer.Typer()
Expand Down Expand Up @@ -117,10 +118,7 @@ def tune_models(
experiment_name=experiment_name,
save_artifact=True,
)
run_config = RunConfig(
callbacks=[mlflow_callback],
checkpoint_config=checkpoint_config,
)
run_config = RunConfig(callbacks=[mlflow_callback], checkpoint_config=checkpoint_config, storage_path=EFS_DIR)

# Hyperparameters to start with
initial_params = json.loads(initial_params)
Expand Down Expand Up @@ -178,5 +176,5 @@ def tune_models(
if __name__ == "__main__": # pragma: no cover, application
if ray.is_initialized():
ray.shutdown()
ray.init()
ray.init(runtime_env={"env_vars": {"GITHUB_USERNAME": os.environ["GITHUB_USERNAME"]}})
app()
Loading

0 comments on commit 097ffaa

Please sign in to comment.