diff --git a/pyright/alt-1/requirements-pinned.txt b/pyright/alt-1/requirements-pinned.txt index 665140cf6d402..67893e6fa7963 100644 --- a/pyright/alt-1/requirements-pinned.txt +++ b/pyright/alt-1/requirements-pinned.txt @@ -1,15 +1,14 @@ agate==1.9.1 aiobotocore==2.15.1 aiofile==3.8.8 -aiohappyeyeballs==2.4.2 -aiohttp==3.10.6 +aiohappyeyeballs==2.4.3 +aiohttp==3.10.8 aioitertools==0.12.0 aiosignal==1.3.1 alembic==1.13.3 aniso8601==9.0.1 annotated-types==0.7.0 anyio==4.6.0 -appnope==0.1.4 argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0 arrow==1.3.0 @@ -25,9 +24,9 @@ backports-tarfile==1.2.0 beautifulsoup4==4.12.3 bleach==6.1.0 boto3==1.35.23 -boto3-stubs-lite==1.35.29 +boto3-stubs-lite==1.35.30 botocore==1.35.23 -botocore-stubs==1.35.29 +botocore-stubs==1.35.30 buildkite-test-collector==0.1.9 cachetools==5.5.0 caio==0.9.17 @@ -79,7 +78,7 @@ decorator==5.1.1 deepdiff==8.0.1 defusedxml==0.7.1 deltalake==0.20.1 -dill==0.3.8 +dill==0.3.9 distlib==0.3.8 docker==7.1.0 docstring-parser==0.16 @@ -108,7 +107,8 @@ gql==3.5.0 graphene==3.3 graphql-core==3.2.4 graphql-relay==3.2.0 -grpcio==1.66.1 +greenlet==3.1.1 +grpcio==1.66.2 grpcio-health-checking==1.62.3 grpcio-status==1.62.3 grpcio-tools==1.62.3 @@ -118,7 +118,7 @@ httplib2==0.22.0 httptools==0.6.1 httpx==0.27.2 humanfriendly==10.0 -hypothesis==6.112.1 +hypothesis==6.112.2 idna==3.10 importlib-metadata==6.11.0 iniconfig==2.0.0 @@ -131,6 +131,7 @@ jaraco-classes==3.4.0 jaraco-context==6.0.1 jaraco-functools==4.1.0 jedi==0.19.1 +jeepney==0.8.0 jinja2==3.1.4 jmespath==1.0.1 joblib==1.4.2 @@ -170,6 +171,7 @@ multidict==6.1.0 multimethod==1.10 mypy==1.11.2 mypy-boto3-ecs==1.35.21 +mypy-boto3-emr==1.35.18 mypy-boto3-emr-serverless==1.35.25 mypy-boto3-glue==1.35.25 mypy-extensions==1.0.0 @@ -257,6 +259,7 @@ s3transfer==0.10.2 scikit-learn==1.5.2 scipy==1.14.1 seaborn==0.13.2 +secretstorage==3.3.3 send2trash==1.8.3 setuptools==75.1.0 shellingham==1.5.4 @@ -268,11 +271,11 @@ snowflake-sqlalchemy==1.5.1 sortedcontainers==2.4.0 soupsieve==2.6 sqlalchemy==1.4.54 -sqlglot==25.24.0 +sqlglot==25.24.1 sqlglotrs==0.2.12 sqlparse==0.5.1 stack-data==0.6.3 -starlette==0.39.1 +starlette==0.39.2 structlog==24.4.0 syrupy==4.7.1 tabulate==0.9.0 @@ -284,7 +287,7 @@ tomli==2.0.1 tomlkit==0.13.2 toposort==1.10 tornado==6.4.1 -tox==4.20.0 +tox==4.21.0 tqdm==4.66.5 traitlets==5.14.3 typeguard==4.3.0 @@ -297,7 +300,7 @@ types-chardet==5.0.4.6 types-croniter==3.0.3.20240731 types-cryptography==3.3.23.2 types-mock==5.1.0.20240425 -types-paramiko==3.5.0.20240918 +types-paramiko==3.5.0.20240928 types-protobuf==5.28.0.20240924 types-pyopenssl==24.1.0.20240722 types-python-dateutil==2.9.0.20240906 diff --git a/pyright/master/requirements-pinned.txt b/pyright/master/requirements-pinned.txt index 6041cafe00c87..c23797ae55316 100644 --- a/pyright/master/requirements-pinned.txt +++ b/pyright/master/requirements-pinned.txt @@ -2,8 +2,8 @@ acryl-datahub==0.14.1 agate==1.9.1 aiofile==3.8.8 aiofiles==24.1.0 -aiohappyeyeballs==2.4.2 -aiohttp==3.10.6 +aiohappyeyeballs==2.4.3 +aiohttp==3.10.8 aiohttp-retry==2.8.3 aiosignal==1.3.1 alabaster==1.0.0 @@ -25,7 +25,6 @@ apache-airflow-providers-sqlite==3.8.2 apeye==1.4.1 apeye-core==1.1.5 apispec==6.6.1 -appnope==0.1.4 argcomplete==3.5.0 argon2-cffi==23.1.0 argon2-cffi-bindings==21.2.0 @@ -57,10 +56,10 @@ billiard==4.2.1 bleach==6.1.0 blinker==1.8.2 bokeh==3.6.0 -boto3==1.35.29 -boto3-stubs-lite==1.35.29 -botocore==1.35.29 -botocore-stubs==1.35.29 +boto3==1.35.30 +boto3-stubs-lite==1.35.30 +botocore==1.35.30 +botocore-stubs==1.35.30 buildkite-test-collector==0.1.9 cachecontrol==0.14.0 cached-property==1.5.2 @@ -72,7 +71,7 @@ cattrs==23.1.2 celery==5.4.0 certifi==2024.8.30 cffi==1.17.1 -cfn-lint==1.15.1 +cfn-lint==1.15.2 chardet==5.2.0 charset-normalizer==3.3.2 click==8.1.7 @@ -167,8 +166,8 @@ daff==1.3.46 -e python_modules/libraries/dagster-wandb -e python_modules/dagster-webserver -e python_modules/libraries/dagstermill -dask==2024.9.0 -dask-expr==1.1.14 +dask==2024.9.1 +dask-expr==1.1.15 dask-jobqueue==0.9.0 dask-kubernetes==2022.9.0 dask-yarn==0.9 @@ -194,9 +193,9 @@ deltalake==0.17.4 deprecated==1.2.14 -e examples/development_to_production dict2css==0.3.0.post1 -dill==0.3.8 +dill==0.3.9 distlib==0.3.8 -distributed==2024.9.0 +distributed==2024.9.1 distro==1.9.0 dlt==1.1.0 dnspython==2.6.1 @@ -256,7 +255,8 @@ graphql-core==3.2.4 graphql-relay==3.2.0 graphviz==0.20.3 great-expectations==0.17.11 -grpcio==1.66.1 +greenlet==3.1.1 +grpcio==1.66.2 grpcio-health-checking==1.62.3 grpcio-status==1.62.3 grpcio-tools==1.62.3 @@ -270,7 +270,7 @@ httptools==0.6.1 httpx==0.27.2 humanfriendly==10.0 humanize==4.10.0 -hypothesis==6.112.1 +hypothesis==6.112.2 idna==3.10 ijson==3.3.0 imagesize==1.4.1 @@ -360,6 +360,7 @@ msgpack==1.1.0 multidict==6.1.0 multimethod==1.10 mypy-boto3-ecs==1.35.21 +mypy-boto3-emr==1.35.18 mypy-boto3-emr-serverless==1.35.25 mypy-boto3-glue==1.35.25 mypy-extensions==1.0.0 @@ -376,13 +377,25 @@ nodeenv==1.9.1 notebook==7.2.2 notebook-shim==0.2.4 numpy==1.26.4 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu12==9.1.0.70 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu12==2.20.5 +nvidia-nvjitlink-cu12==12.6.68 +nvidia-nvtx-cu12==12.1.105 oauth2client==4.1.3 oauthlib==3.2.2 objgraph==3.6.1 onnx==1.16.2 onnxconverter-common==1.13.0 onnxruntime==1.19.2 -openai==1.50.1 +openai==1.50.2 openapi-schema-validator==0.6.2 openapi-spec-validator==0.7.1 opentelemetry-api==1.27.0 @@ -448,7 +461,7 @@ pydata-google-auth==1.8.2 pyflakes==3.2.0 pygments==2.18.0 pyjwt==2.9.0 -pymdown-extensions==10.11 +pymdown-extensions==10.11.1 pynacl==1.5.0 pyopenssl==24.2.1 pyparsing==3.1.4 @@ -471,7 +484,7 @@ python-dotenv==1.0.1 python-frontmatter==1.1.0 python-jose==3.3.0 python-json-logger==2.0.7 -python-multipart==0.0.10 +python-multipart==0.0.12 python-nvd3==0.16.0 python-slugify==8.0.4 python-utils==3.9.0 @@ -519,7 +532,7 @@ skein==0.8.2 skl2onnx==1.17.0 slack-sdk==3.33.1 sling==1.2.20 -sling-mac-arm64==1.2.20 +sling-linux-amd64==1.2.20 smmap==5.0.1 sniffio==1.3.1 snowballstemmer==2.2.0 @@ -542,13 +555,13 @@ sphinxcontrib-serializinghtml==2.0.0 sqlalchemy==1.4.54 sqlalchemy-jsonfield==1.0.2 sqlalchemy-utils==0.41.2 -sqlglot==25.24.0 +sqlglot==25.24.1 sqlglotrs==0.2.12 sqlparse==0.5.1 sshpubkeys==3.3.1 sshtunnel==0.4.0 stack-data==0.6.3 -starlette==0.39.1 +starlette==0.39.2 structlog==24.4.0 sympy==1.13.3 syrupy==4.7.1 @@ -575,6 +588,7 @@ tqdm==4.66.5 traitlets==5.14.3 trio==0.26.2 trio-websocket==0.11.1 +triton==3.0.0 -e examples/experimental/dagster-airlift/examples/tutorial-example -e examples/tutorial_notebook_assets twilio==9.3.2 @@ -590,7 +604,7 @@ types-chardet==5.0.4.6 types-croniter==3.0.3.20240731 types-cryptography==3.3.23.2 types-mock==5.1.0.20240425 -types-paramiko==3.5.0.20240918 +types-paramiko==3.5.0.20240928 types-protobuf==5.28.0.20240924 types-pyopenssl==24.1.0.20240722 types-python-dateutil==2.9.0.20240906 diff --git a/python_modules/libraries/dagster-aws/dagster_aws/pipes/__init__.py b/python_modules/libraries/dagster-aws/dagster_aws/pipes/__init__.py index 3902a5a642f31..ef95c70ae21d3 100644 --- a/python_modules/libraries/dagster-aws/dagster_aws/pipes/__init__.py +++ b/python_modules/libraries/dagster-aws/dagster_aws/pipes/__init__.py @@ -1,5 +1,6 @@ from dagster_aws.pipes.clients import ( PipesECSClient, + PipesEMRClient, PipesEMRServerlessClient, PipesGlueClient, PipesLambdaClient, @@ -19,6 +20,7 @@ "PipesGlueClient", "PipesLambdaClient", "PipesECSClient", + "PipesEMRClient", "PipesS3ContextInjector", "PipesLambdaEventContextInjector", "PipesS3MessageReader", diff --git a/python_modules/libraries/dagster-aws/dagster_aws/pipes/clients/__init__.py b/python_modules/libraries/dagster-aws/dagster_aws/pipes/clients/__init__.py index a6711bbf1ce82..e3895ac00d101 100644 --- a/python_modules/libraries/dagster-aws/dagster_aws/pipes/clients/__init__.py +++ b/python_modules/libraries/dagster-aws/dagster_aws/pipes/clients/__init__.py @@ -1,6 +1,13 @@ from dagster_aws.pipes.clients.ecs import PipesECSClient +from dagster_aws.pipes.clients.emr import PipesEMRClient from dagster_aws.pipes.clients.emr_serverless import PipesEMRServerlessClient from dagster_aws.pipes.clients.glue import PipesGlueClient from dagster_aws.pipes.clients.lambda_ import PipesLambdaClient -__all__ = ["PipesGlueClient", "PipesLambdaClient", "PipesECSClient", "PipesEMRServerlessClient"] +__all__ = [ + "PipesGlueClient", + "PipesLambdaClient", + "PipesECSClient", + "PipesEMRServerlessClient", + "PipesEMRClient", +] diff --git a/python_modules/libraries/dagster-aws/dagster_aws/pipes/clients/emr.py b/python_modules/libraries/dagster-aws/dagster_aws/pipes/clients/emr.py new file mode 100644 index 0000000000000..ade955cefc596 --- /dev/null +++ b/python_modules/libraries/dagster-aws/dagster_aws/pipes/clients/emr.py @@ -0,0 +1,292 @@ +import os +import sys +import time +from typing import TYPE_CHECKING, Any, Dict, Optional + +import boto3 +import dagster._check as check +from dagster import PipesClient +from dagster._annotations import public +from dagster._core.definitions.resource_annotation import TreatAsResourceParam +from dagster._core.errors import DagsterExecutionInterruptedError +from dagster._core.execution.context.compute import OpExecutionContext +from dagster._core.pipes.client import ( + PipesClientCompletedInvocation, + PipesContextInjector, + PipesMessageReader, +) +from dagster._core.pipes.utils import PipesEnvContextInjector, PipesSession, open_pipes_session + +from dagster_aws.emr.emr import EMR_CLUSTER_TERMINATED_STATES +from dagster_aws.pipes.message_readers import ( + PipesS3LogReader, + PipesS3MessageReader, + gzip_log_decode_fn, +) + +if TYPE_CHECKING: + from mypy_boto3_emr import EMRClient + from mypy_boto3_emr.literals import ClusterStateType + from mypy_boto3_emr.type_defs import ( + DescribeClusterOutputTypeDef, + RunJobFlowInputRequestTypeDef, + RunJobFlowOutputTypeDef, + ) + + +class PipesEMRClient(PipesClient, TreatAsResourceParam): + """A pipes client for running jobs on AWS EMR. + + Args: + message_reader (Optional[PipesMessageReader]): A message reader to use to read messages + from the EMR jobs. + Recommended to use :py:class:`PipesS3MessageReader` with `expect_s3_message_writer` set to `True`. + client (Optional[boto3.client]): The boto3 EMR client used to interact with AWS EMR. + context_injector (Optional[PipesContextInjector]): A context injector to use to inject + context into AWS EMR job. Defaults to :py:class:`PipesEnvContextInjector`. + forward_termination (bool): Whether to cancel the EMR job if the Dagster process receives a termination signal. + wait_for_s3_logs_seconds (int): The number of seconds to wait for S3 logs to be written after execution completes. + """ + + def __init__( + self, + message_reader: PipesMessageReader, + client=None, + context_injector: Optional[PipesContextInjector] = None, + forward_termination: bool = True, + wait_for_s3_logs_seconds: int = 10, + ): + self._client = client or boto3.client("emr") + self._message_reader = message_reader + self._context_injector = context_injector or PipesEnvContextInjector() + self.forward_termination = check.bool_param(forward_termination, "forward_termination") + self.wait_for_s3_logs_seconds = wait_for_s3_logs_seconds + + @property + def client(self) -> "EMRClient": + return self._client + + @property + def context_injector(self) -> PipesContextInjector: + return self._context_injector + + @property + def message_reader(self) -> PipesMessageReader: + return self._message_reader + + @classmethod + def _is_dagster_maintained(cls) -> bool: + return True + + @public + def run( + self, + *, + context: OpExecutionContext, + run_job_flow_params: "RunJobFlowInputRequestTypeDef", + extras: Optional[Dict[str, Any]] = None, + ) -> PipesClientCompletedInvocation: + """Run a job on AWS EMR, enriched with the pipes protocol. + + Starts a new EMR cluster for each invocation. + + Args: + context (OpExecutionContext): The context of the currently executing Dagster op or asset. + run_job_flow_params (Optional[dict]): Parameters for the ``run_job_flow`` boto3 EMR client call. + See `Boto3 API Documentation `_ + extras (Optional[Dict[str, Any]]): Additional information to pass to the Pipes session in the external process. + + Returns: + PipesClientCompletedInvocation: Wrapper containing results reported by the external + process. + """ + with open_pipes_session( + context=context, + message_reader=self.message_reader, + context_injector=self.context_injector, + extras=extras, + ) as session: + run_job_flow_params = self._enrich_params(session, run_job_flow_params) + start_response = self._start(context, run_job_flow_params) + try: + self._read_messages(context, start_response) + wait_response = self._wait_for_completion(context, start_response) + self._read_remaining_logs(context, wait_response) + return PipesClientCompletedInvocation(session) + + except DagsterExecutionInterruptedError: + if self.forward_termination: + context.log.warning( + "[pipes] Dagster process interrupted! Will terminate external EMR job." + ) + self._terminate(context, start_response) + raise + + def _enrich_params( + self, session: PipesSession, params: "RunJobFlowInputRequestTypeDef" + ) -> "RunJobFlowInputRequestTypeDef": + # add Pipes env variables + pipes_env_vars = session.get_bootstrap_env_vars() + + configurations = list(params.get("Configurations", [])) + + master_env_properties = { + f"spark.yarn.appMasterEnv.{var}": value for var, value in pipes_env_vars.items() + } + + # a classification can only be added once + # spark-defaults works for setting env vars with yarn + # TODO: support other cluster/execution types + for config in configurations: + if config.get("Classification") == "spark-defaults": + properties = {**config.get("Properties", {})} + properties.update(master_env_properties) + config["Properties"] = properties + break + else: + configurations.append( + {"Classification": "spark-defaults", "Properties": master_env_properties} + ) + + params["Configurations"] = configurations + + tags = list(params.get("Tags", [])) + + for key, value in session.default_remote_invocation_tags.items(): + tags.append({"Key": key, "Value": value}) + + params["Tags"] = tags + + return params + + def _start( + self, context: OpExecutionContext, params: "RunJobFlowInputRequestTypeDef" + ) -> "RunJobFlowOutputTypeDef": + response = self._client.run_job_flow(**params) + cluster_id = response["JobFlowId"] + + context.log.info(f"[pipes] EMR steps started in cluster {cluster_id}") + return response + + def _wait_for_completion( + self, context: OpExecutionContext, response: "RunJobFlowOutputTypeDef" + ) -> "DescribeClusterOutputTypeDef": + cluster_id = response["JobFlowId"] + self._client.get_waiter("cluster_running").wait(ClusterId=cluster_id) + context.log.info(f"[pipes] EMR cluster {cluster_id} running") + # now wait for the job to complete + self._client.get_waiter("cluster_terminated").wait(ClusterId=cluster_id) + + cluster = self._client.describe_cluster(ClusterId=cluster_id) + + state: ClusterStateType = cluster["Cluster"]["Status"]["State"] + + context.log.info(f"[pipes] EMR cluster {cluster_id} completed with state: {state}") + + if state in EMR_CLUSTER_TERMINATED_STATES: + context.log.error(f"[pipes] EMR job {cluster_id} failed") + raise Exception(f"[pipes] EMR job {cluster_id} failed:\n{cluster}") + + return cluster + + def _read_messages(self, context: OpExecutionContext, response: "RunJobFlowOutputTypeDef"): + cluster = self.client.describe_cluster(ClusterId=response["JobFlowId"]) + + cluster_id = cluster["Cluster"]["Id"] # type: ignore + logs_uri = cluster.get("Cluster", {}).get("LogUri", {}) + + if isinstance(self.message_reader, PipesS3MessageReader) and logs_uri is None: + context.log.warning( + "[pipes] LogUri is not set in the EMR cluster configuration. Won't be able to read logs." + ) + elif isinstance(self.message_reader, PipesS3MessageReader) and isinstance(logs_uri, str): + bucket = logs_uri.split("/")[2] + prefix = "/".join(logs_uri.split("/")[3:]) + + steps = self.client.list_steps(ClusterId=cluster_id) + + # forward stdout and stderr from each step + + for step in steps["Steps"]: + step_id = step["Id"] # type: ignore + + for stdio in ["stdout", "stderr"]: + # at this stage we can't know if this key will be created + # for example, if a step doesn't have any stdout/stderr logs + # the PipesS3LogReader won't be able to start + # this may result in some unnecessary warnings + # there is not much we can do about it except perform step logs reading + # after the job is completed, which is not ideal too + key = os.path.join(prefix, f"{cluster_id}/steps/{step_id}/{stdio}.gz") + + self.message_reader.add_log_reader( + name=f"{step_id}/{stdio}", + log_reader=PipesS3LogReader( + client=self.message_reader.client, + bucket=bucket, + key=key, + decode_fn=gzip_log_decode_fn, + target_stream=sys.stdout if stdio == "stdout" else sys.stderr, + ), + ) + + def _read_remaining_logs( + self, context: OpExecutionContext, response: "DescribeClusterOutputTypeDef" + ): + cluster_id = response["Cluster"]["Id"] # type: ignore + logs_uri = response.get("Cluster", {}).get("LogUri", {}) + + if isinstance(self.message_reader, PipesS3MessageReader) and isinstance(logs_uri, str): + bucket = logs_uri.split("/")[2] + prefix = "/".join(logs_uri.split("/")[3:]) + + # discover container (application) logs (e.g. Python logs) and forward all of them + # ex. /containers/application_1727881613116_0001/container_1727881613116_0001_01_000001/stdout.gz + containers_prefix = os.path.join(prefix, f"{cluster_id}/containers/") + + context.log.debug( + f"[pipes] Waiting for {self.wait_for_s3_logs_seconds} seconds to allow EMR to dump all logs to S3. " + "Consider increasing this value if some logs are missing." + ) + + time.sleep(self.wait_for_s3_logs_seconds) # give EMR a chance to dump all logs to S3 + + context.log.debug( + f"[pipes] Looking for application logs in s3://{os.path.join(bucket, containers_prefix)}" + ) + + all_keys = [ + obj["Key"] + for obj in self.message_reader.client.list_objects_v2( + Bucket=bucket, Prefix=containers_prefix + )["Contents"] + ] + + # filter keys which include stdout.gz or stderr.gz + + container_log_keys = {} + for key in all_keys: + if "stdout.gz" in key: + container_log_keys[key] = "stdout" + elif "stderr.gz" in key: + container_log_keys[key] = "stderr" + + # forward application logs + + for key, stdio in container_log_keys.items(): + container_id = key.split("/")[-2] + self.message_reader.add_log_reader( + name=f"{container_id}/{stdio}", + log_reader=PipesS3LogReader( + client=self.message_reader.client, + bucket=bucket, + key=key, + decode_fn=gzip_log_decode_fn, + target_stream=sys.stdout if stdio == "stdout" else sys.stderr, + ), + ) + + def _terminate(self, context: OpExecutionContext, start_response: "RunJobFlowOutputTypeDef"): + cluster_id = start_response["JobFlowId"] + context.log.info(f"[pipes] Terminating EMR job {cluster_id}") + self._client.terminate_job_flows(JobFlowIds=[cluster_id]) diff --git a/python_modules/libraries/dagster-aws/ruff.toml b/python_modules/libraries/dagster-aws/ruff.toml index 244f818c1fc43..64c0e85362012 100644 --- a/python_modules/libraries/dagster-aws/ruff.toml +++ b/python_modules/libraries/dagster-aws/ruff.toml @@ -8,7 +8,10 @@ extend-select = [ [lint.flake8-tidy-imports] banned-module-level-imports = [ + "mypy_boto3_s3", + "mypy_boto3_logs", "mypy_boto3_ecs", "mypy_boto3_glue", - "mypy_boto3_emr_serverless" + "mypy_boto3_emr_serverless", + "mypy_boto3_emr" ] diff --git a/python_modules/libraries/dagster-aws/setup.py b/python_modules/libraries/dagster-aws/setup.py index 3616471fc37bf..e210bc683b1fb 100644 --- a/python_modules/libraries/dagster-aws/setup.py +++ b/python_modules/libraries/dagster-aws/setup.py @@ -37,6 +37,7 @@ def get_version() -> str: python_requires=">=3.8,<3.13", install_requires=[ "boto3", + "boto3-stubs-lite[ecs,glue,emr,emr-serverless]", f"dagster{pin}", "packaging", "requests", @@ -45,7 +46,7 @@ def get_version() -> str: "redshift": ["psycopg2-binary"], "pyspark": ["dagster-pyspark"], "stubs": [ - "boto3-stubs-lite[ecs,glue,emr-serverless]", + "boto3-stubs-lite[ecs,glue,emr,emr-serverless]", ], "test": [ "botocore!=1.32.1",