diff --git a/sources/unstructured_data/google_drive/__init__.py b/sources/unstructured_data/google_drive/__init__.py index 1eecaa3a3..5429ef1b5 100644 --- a/sources/unstructured_data/google_drive/__init__.py +++ b/sources/unstructured_data/google_drive/__init__.py @@ -7,7 +7,7 @@ from dlt.common import logger from dlt.extract.source import TDataItem, TDataItems from dlt.sources.credentials import GcpOAuthCredentials, GcpServiceAccountCredentials -from googleapiclient.discovery import build +from googleapiclient.discovery import build # type: ignore from .helpers import download_file_from_google_drive from .settings import FOLDER_IDS, STORAGE_FOLDER_PATH diff --git a/sources/unstructured_data/google_drive/helpers.py b/sources/unstructured_data/google_drive/helpers.py index 4503cf843..cb103c9cb 100644 --- a/sources/unstructured_data/google_drive/helpers.py +++ b/sources/unstructured_data/google_drive/helpers.py @@ -1,8 +1,8 @@ import io from typing import Any -from googleapiclient.errors import HttpError -from googleapiclient.http import MediaIoBaseDownload +from googleapiclient.errors import HttpError # type: ignore +from googleapiclient.http import MediaIoBaseDownload # type: ignore def download_file_from_google_drive(service: Any, file_id: str, file_path: str) -> None: diff --git a/sources/unstructured_data/inbox/__init__.py b/sources/unstructured_data/inbox/__init__.py index 29a53de6f..74e0246bc 100644 --- a/sources/unstructured_data/inbox/__init__.py +++ b/sources/unstructured_data/inbox/__init__.py @@ -103,6 +103,7 @@ def get_message_uids(criterias: Sequence[str]) -> Optional[TDataItems]: if not message_uids: logger.warning("No emails found.") + return None else: return [ {"message_uid": int(message_uid)} for message_uid in message_uids diff --git a/sources/unstructured_data/local_folder/__init__.py b/sources/unstructured_data/local_folder/__init__.py index 831cb7506..28ba6354e 100644 --- a/sources/unstructured_data/local_folder/__init__.py +++ b/sources/unstructured_data/local_folder/__init__.py @@ -12,8 +12,8 @@ } -@dlt.resource(write_disposition="replace") -def local_folder_source( +@dlt.resource(write_disposition="replace", name="local_folder") +def local_folder_resource( data_dir: Union[str, Sequence[str]], ) -> TDataItem: """ diff --git a/sources/unstructured_data_pipeline.py b/sources/unstructured_data_pipeline.py index 4786c2f78..2d5ba5fcf 100644 --- a/sources/unstructured_data_pipeline.py +++ b/sources/unstructured_data_pipeline.py @@ -3,7 +3,7 @@ def from_local_folder_to_structured(data_dir: str) -> None: - from unstructured_data.local_folder import local_folder_source + from unstructured_data.local_folder import local_folder_resource # configure the pipeline with your destination details pipeline = dlt.pipeline( @@ -13,7 +13,7 @@ def from_local_folder_to_structured(data_dir: str) -> None: full_refresh=True, ) - data_resource = local_folder_source(data_dir) + data_resource = local_folder_resource(data_dir) filtered_data_resource = data_resource.add_filter( lambda item: item["content_type"] == "application/pdf" )