Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for Scheduler job and job definition creation with input folder, refactor execution manager test #513

Merged
merged 4 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
DB_FILE_PATH = f"{HERE}/jupyter_scheduler/tests/testdb.sqlite"
DB_URL = f"sqlite:///{DB_FILE_PATH}"

TEST_ROOT_DIR = f"{HERE}/jupyter_scheduler/tests/test_root_dir"

Comment on lines +16 to +17
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. os.path.join() should be used to construct file paths, as the directory delimiter can be platform-dependent.

    • Windows uses backslashes instead of forward slashes, e.g. C:\some_dir\child_dir.
  2. When using directories in tests, we should not commit the test dirs into the source repo, as this doesn't scale well when more tests are added. Instead, you should use the tmp_path fixture provided by pytest. When used, this will create a new temporary directory (under /tmp on Linux) and return the path to that temporary directory.

    So here you would want to replace TEST_ROOT_DIR with something like:

    @pytest.fixture
    def scheduler_root_dir(tmp_path):
        return os.path.join(tmp_path, 'workspace_root')

For DB_FILE_PATH, you should use the jp_data_dir fixture provided by jupyter_server.pytest_plugin. This fixture is already under tmp_path.


@pytest.fixture
def jp_server_config(jp_server_config):
Expand Down Expand Up @@ -44,7 +46,7 @@ def jp_scheduler_db():


@pytest.fixture
def jp_scheduler(jp_data_dir):
def jp_scheduler():
return Scheduler(
db_url=DB_URL, root_dir=str(jp_data_dir), environments_manager=MockEnvironmentManager()
db_url=DB_URL, root_dir=str(TEST_ROOT_DIR), environments_manager=MockEnvironmentManager()
)
63 changes: 34 additions & 29 deletions jupyter_scheduler/tests/test_execution_manager.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,55 @@
import os
from contextlib import contextmanager
from pathlib import Path
from unittest.mock import PropertyMock, patch

import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from conftest import DB_URL
from jupyter_scheduler.executors import DefaultExecutionManager
from jupyter_scheduler.orm import Base, Job
from jupyter_scheduler.orm import Job

NOTEBOOK_DIR = Path(__file__).resolve().parent / "test_staging_dir" / "job-3"
JOB_ID = "69856f4e-ce94-45fd-8f60-3a587457fce7"
NOTEBOOK_NAME = "side_effects.ipynb"
SIDE_EFECT_FILE_NAME = "output_side_effect.txt"

NOTEBOOK_DIR = Path(__file__).resolve().parent / "test_staging_dir" / "job-4"
NOTEBOOK_PATH = NOTEBOOK_DIR / NOTEBOOK_NAME
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should have a fixture for the staging directory which uses the default staging directory name in Jupyter Scheduler:

@pytest.fixture
def staging_dir(jp_data_dir):
    return os.path.join(jp_data_dir, "scheduler_staging_area")

For the side effect test:

  1. Move output_side_effect.txt and side_effects.ipynb under a new directory containing static files used in tests, e.g. jupyter_scheduler/tests/static.
  2. Write a new staging_dir_with_side_effects fixture that copies these files to staging_dir, and returns a two-tuple of the input URI and the side effect file. So this fixture should return ('side_effects.ipynb', 'output_side_effect.txt')

SIDE_EFFECT_FILE = NOTEBOOK_DIR / "output_side_effect.txt"
SIDE_EFFECT_FILE = NOTEBOOK_DIR / SIDE_EFECT_FILE_NAME


def test_execution_manager_with_side_effects():
db_url = "sqlite://"
engine = create_engine(db_url, echo=False)
Base.metadata.create_all(engine)
db_session = sessionmaker(bind=engine)
with db_session() as session:
@pytest.fixture
def load_job(jp_scheduler_db):
with jp_scheduler_db() as session:
job = Job(
runtime_environment_name="abc",
input_filename=NOTEBOOK_NAME,
job_id="123",
job_id=JOB_ID,
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of hard-coding the job ID, you should modify this fixture to return the ID of the job record it created. This is more similar to how jobs are created by the Scheduler backend.

session.add(job)
session.commit()

manager = DefaultExecutionManager(
job_id="123",
root_dir=str(NOTEBOOK_DIR),
db_url=db_url,
staging_paths={"input": str(NOTEBOOK_PATH)},

@pytest.fixture
def load_job(jp_scheduler_db):
with jp_scheduler_db() as session:
job = Job(
runtime_environment_name="abc",
input_filename=NOTEBOOK_NAME,
job_id=JOB_ID,
)
session.add(job)
session.commit()


def test_add_side_effects_files(jp_scheduler_db, load_job):
manager = DefaultExecutionManager(
job_id=JOB_ID,
root_dir=str(NOTEBOOK_DIR),
db_url=DB_URL,
staging_paths={"input": str(NOTEBOOK_PATH)},
)
manager.add_side_effects_files(str(NOTEBOOK_DIR))

with patch.object(
DefaultExecutionManager,
"db_session",
new_callable=PropertyMock,
) as mock_db_session:
mock_db_session.return_value = db_session
manager.add_side_effects_files(str(NOTEBOOK_DIR))

assert (
"output_side_effect.txt" in job.packaged_files
), "Side effect file was not added to packaged_files"
with jp_scheduler_db() as session:
job = session.query(Job).filter(Job.job_id == JOB_ID).one()
assert SIDE_EFECT_FILE_NAME in job.packaged_files
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello world!
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "dbbca65f-4b6a-4490-94b6-f7cdd87e7023",
"metadata": {},
"outputs": [],
"source": [
"file_path = 'a/b/helloworld.txt'\n",
"\n",
"with open(file_path, 'r') as file:\n",
" content = file.read()\n",
"\n",
"print(content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
46 changes: 45 additions & 1 deletion jupyter_scheduler/tests/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
import pytest

from jupyter_scheduler.models import (
CreateJob,
CreateJobDefinition,
ListJobDefinitionsQuery,
SortDirection,
SortField,
UpdateJobDefinition,
)
from jupyter_scheduler.orm import JobDefinition
from jupyter_scheduler.orm import Job, JobDefinition


def test_create_job_definition(jp_scheduler):
Expand All @@ -39,6 +40,49 @@ def test_create_job_definition(jp_scheduler):
assert "hello world" == definition.name


def test_create_job_definition_with_input_folder(jp_scheduler):
job_definition_id = jp_scheduler.create_job_definition(
CreateJobDefinition(
input_uri="job-5/import-helloworld.ipynb",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to my previous comment, you can replace this with a input_file_with_input_dir fixture that moves import-helloworld.ipynb and a/b/helloworld.txt to scheduler_root_dir, and then return the tuple ('import-helloworld.ipynb', 'a/b/helloworld.txt').

runtime_environment_name="default",
name="import hello world",
output_formats=["ipynb"],
package_input_folder=True,
)
)

with jp_scheduler.db_session() as session:
definitions = session.query(JobDefinition).all()
assert 1 == len(definitions)
definition = definitions[0]
assert job_definition_id
assert job_definition_id == definition.job_definition_id
assert "import hello world" == definition.name
assert "a/b/helloworld.txt" in definition.packaged_files


def test_create_job_with_input_folder(jp_scheduler):
job_id = jp_scheduler.create_job(
CreateJob(
input_uri="job-5/import-helloworld.ipynb",
runtime_environment_name="default",
name="import hello world",
output_formats=["ipynb"],
package_input_folder=True,
)
)

with jp_scheduler.db_session() as session:
jobs = session.query(Job).all()
assert 1 == len(jobs)
job = jobs[0]
assert job_id
assert job_id == job.job_id
assert "import hello world" == job.name
assert "default" == job.runtime_environment_name
assert "a/b/helloworld.txt" in job.packaged_files


job_definition_1 = {
"job_definition_id": "f4f8c8a9-f539-429a-b69e-b567f578646e",
"name": "hello world 1",
Expand Down
Loading