Skip to content

dataset factory

dataset factory #5445

# Tests destinations that can run without credentials.
# i.e. local postgres, duckdb, filesystem (with local fs/memory bucket)
name: dest | postgres, duckdb and fs
on:
pull_request:
branches:
- master
- devel
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
# NOTE: this workflow can't use github secrets!
# DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}
RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
ACTIVE_DESTINATIONS: "[\"duckdb\", \"postgres\", \"filesystem\", \"weaviate\", \"qdrant\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\", \"file\", \"sftp\"]"
DESTINATION__WEAVIATE__VECTORIZER: text2vec-contextionary
DESTINATION__WEAVIATE__MODULE_CONFIG: "{\"text2vec-contextionary\": {\"vectorizeClassName\": false, \"vectorizePropertyName\": true}}"
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_loader:
name: dest | postgres, duckdb and fs local tests
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
strategy:
fail-fast: false
defaults:
run:
shell: bash
runs-on: "ubuntu-latest"
# Service containers to run with `container-job`
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres
# Provide the password for postgres
env:
POSTGRES_DB: dlt_data
POSTGRES_USER: loader
POSTGRES_PASSWORD: loader
ports:
- 5432:5432
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
qdrant:
image: qdrant/qdrant:v1.8.4
ports:
- 6333:6333
steps:
- name: Check out
uses: actions/checkout@master
- name: Start weaviate
run: docker compose -f "tests/load/weaviate/docker-compose.yml" up -d
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10.x"
- name: Install Poetry
uses: snok/[email protected]
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations
- name: Install dependencies
run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline -E deltalake
- name: Start SFTP server
run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d
- name: Configure SSH Agent for sftp tests
run: |
mkdir -p /home/runner/.ssh
cp tests/load/filesystem_sftp/bootstrap/bobby_rsa /home/runner/.ssh/id_rsa
cp tests/load/filesystem_sftp/bootstrap/bobby_rsa.pub /home/runner/.ssh/id_rsa.pub
# always run full suite, also on branches
- name: Run tests Linux
run: |
eval "$(ssh-agent -s)"
poetry run pytest tests/load --ignore tests/load/sources
poetry run pytest tests/cli
env:
DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader:loader@localhost:5432/dlt_data
DESTINATION__QDRANT__CREDENTIALS__location: http://localhost:6333
DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PORT: 2222
DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_USERNAME: foo
DESTINATION__FILESYSTEM__CREDENTIALS__SFTP_PASSWORD: pass
- name: Stop weaviate
if: always()
run: docker compose -f "tests/load/weaviate/docker-compose.yml" down -v
- name: Stop SFTP server
if: always()
run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" down -v