Skip to content

data pond: expose readable datasets as dataframes and arrow tables #508

data pond: expose readable datasets as dataframes and arrow tables

data pond: expose readable datasets as dataframes and arrow tables #508

# Tests sources against a couple of local destinations
name: src | rest_api, sql_database, filesystem
on:
pull_request:
branches:
- master
- devel
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR
RUNTIME__DLTHUB_TELEMETRY_ENDPOINT: ${{ secrets.RUNTIME__DLTHUB_TELEMETRY_ENDPOINT }}
ACTIVE_DESTINATIONS: "[\"duckdb\", \"postgres\", \"filesystem\"]"
ALL_FILESYSTEM_DRIVERS: "[\"file\"]"
jobs:
get_docs_changes:
name: docs changes
uses: ./.github/workflows/get_docs_changes.yml
run_loader:
name: src | rest_api, sql_database, filesystem
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
strategy:
fail-fast: false
defaults:
run:
shell: bash
runs-on: "ubuntu-latest"
# Service containers to run with `container-job`
services:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres
# Provide the password for postgres
env:
POSTGRES_DB: dlt_data
POSTGRES_USER: loader
POSTGRES_PASSWORD: loader
ports:
- 5432:5432
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- name: Check out
uses: actions/checkout@master
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10.x"
- name: Install Poetry
uses: snok/[email protected]
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-sources
# TODO: which deps should we enable?
- name: Install dependencies
run: poetry install --no-interaction -E postgres -E duckdb -E parquet -E filesystem -E cli -E sql_database --with sentry-sdk,pipeline,sources
# run sources tests in load against configured destinations
- run: poetry run pytest tests/load/sources
name: Run tests Linux
env:
DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader:loader@localhost:5432/dlt_data
# here we upgrade sql alchemy to 2 an run the sql_database tests again
- name: Upgrade sql alchemy
run: poetry run pip install sqlalchemy==2.0.32
- run: poetry run pytest tests/load/sources/sql_database
name: Run tests Linux
env:
DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader:loader@localhost:5432/dlt_data