From e7df5220adf120b3719049cd06ac6a6dce0836e5 Mon Sep 17 00:00:00 2001 From: Sven Fillinger Date: Thu, 16 Jul 2020 10:10:36 +0200 Subject: [PATCH 1/8] Add CL items for 1.3 release --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df854e47..a2f15e06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 1.3 + +* Provide metadata schema in JSON for the IMGAG dropbox +* Register checksums for Oxford Nanopore datasets +* Register unclassified read data for Oxford Nanopore datasets + ## 1.2 * Provide ETL routine for Oxford Nanopore NGS data From f7c46f3f054a77bacf242104f4778d4d3e12b9fe Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Thu, 16 Jul 2020 18:19:49 +0200 Subject: [PATCH 2/8] try to find existing ms sample in case of replicate data --- .../register-mzML-dropbox/register-mzml-dropbox.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py b/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py index b2647ac5..cd4511ce 100644 --- a/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py +++ b/drop-boxes/register-mzML-dropbox/register-mzml-dropbox.py @@ -81,6 +81,10 @@ def process(transaction): experimentIDs.append(exp.getExperimentIdentifier()) if exp.getExperimentType() == expType: msExperiment = exp + msSampleID = '/' + space + '/' + 'MS' + parentCode + msSample = transaction.getSampleForUpdate(msSampleID) + if msSample: + msExperiment = msSample.getExperiment() # no existing experiment for samples of this sample preparation found if not msExperiment: expID = experimentIDs[0] @@ -91,13 +95,14 @@ def process(transaction): expID = '/' + space + '/' + project + '/' + project + 'E' + str(expNum) msExperiment = transaction.createNewExperiment(expID, expType) - newMSSample = transaction.createNewSample('/' + space + '/' + 'MS'+ parentCode, "Q_MS_RUN") - newMSSample.setParentSampleIdentifiers([sa.getSampleIdentifier()]) - newMSSample.setExperiment(msExperiment) + if not msSample: + msSample = transaction.createNewSample('/' + space + '/' + 'MS'+ parentCode, "Q_MS_RUN") + msSample.setParentSampleIdentifiers([sa.getSampleIdentifier()]) + msSample.setExperiment(msExperiment) # create new dataset dataSet = transaction.createNewDataSet("Q_MS_MZML_DATA") dataSet.setMeasuredData(False) - dataSet.setSample(newMSSample) + dataSet.setSample(msSample) transaction.moveFile(incomingPath, dataSet) From d4490b007e56917a96700e41f102caf63dccb6b5 Mon Sep 17 00:00:00 2001 From: Sven Fillinger Date: Fri, 17 Jul 2020 08:10:37 +0200 Subject: [PATCH 3/8] Add CL information for patch 1.3.1 Affects the register-mzml-dropbox. Checks the given project for existing samples with the provided sample code of type "Q_MS_RUN". A new sample will be only created in openBIS, if this sample does not yet exist. --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2f15e06..f6ada1fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 1.3.1 + +* Avoid sample registration for existing mass spectrometry data + ## 1.3 * Provide metadata schema in JSON for the IMGAG dropbox From 667ab74188eb404921bd98885b348a869b25db06 Mon Sep 17 00:00:00 2001 From: Sven F Date: Mon, 20 Jul 2020 17:32:08 +0200 Subject: [PATCH 4/8] Provide format documentation for single-end / paired-end data registration (#40) This CL introduces some documentation for the data structure in preparation to transfer data to QBIC and register them into qPortal. --- README.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/README.md b/README.md index 0d835565..057b258e 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,55 @@ This repository holds a collection of Jython ETL (extract-transform-load) scripts that are used at QBiC that define the behaviour of openBIS dropboxes. The ETL processes combine some quality control measures for incoming data and data transformation to facilitate the registration in openBIS. + +## Data format guidelines + +These guidelines describe the necessary file structure for different +data types to be met in order to ingest and register them correctly in +openBIS. + +Formats: + +- [NGS single-end / paired-end data](#ngs-single-end--paired-end-data) + +### NGS single-end / paired-end data + +**Responsible dropbox:** +[QBiC-register-fastq-dropbox](drop-boxes/register-fastq-dropbox) + +**Resulting data model in openBIS** +Q_TEST_SAMPLE -> Q_NGS_RAW_DATA (with sample code) -> DataSet (directory +with files contained) + +**Description** +For paired-end sequencing reads in FASTQ format, the file structure +needs to look like this + +``` +.fastq // Directory + |-- _R1.fastq + |-- _R1.fastq.sha256sum + |-- _R2.fastq + |-- _R2.fastq.sha256sum +``` + +or in the case of gzipped FASTQ files: + +``` +.fastq.gz // Directory + |-- _R1.fastq.gz + |-- _R1.fastq.gz.sha256sum + |-- _R2.fastq.gz + |-- _R2.fastq.gz.sha256sum +``` + +In the case of single-end sequencing data, the file structure needs to +look like this: + +``` +.fastq.gz // Directory + |-- .fastq.gz + |-- .fastq.gz.sha256sum +``` + + From 296f3497574b51b715b1a4d2409caf0fbb088725 Mon Sep 17 00:00:00 2001 From: wow-such-code Date: Tue, 18 Aug 2020 16:57:15 +0200 Subject: [PATCH 5/8] Update Changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6ada1fa..f662a142 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 1.4.0 + +* Register unclassified pooling data of Nanopore experiments directly at the experiment level (no copies are added to sample-based datasets) + ## 1.3.1 * Avoid sample registration for existing mass spectrometry data From 60d23dc1d636bcce766a6526241d9c7a9ec47453 Mon Sep 17 00:00:00 2001 From: Sven F Date: Tue, 13 Oct 2020 17:25:34 +0200 Subject: [PATCH 6/8] Release/1.4.0 (#46) * Update development with hot fix and changelog (#44) * Add CL items for 1.3 release * try to find existing ms sample in case of replicate data * Add CL information for patch 1.3.1 Affects the register-mzml-dropbox. Checks the given project for existing samples with the provided sample code of type "Q_MS_RUN". A new sample will be only created in openBIS, if this sample does not yet exist. * Provide format documentation for single-end / paired-end data registration (#40) This CL introduces some documentation for the data structure in preparation to transfer data to QBIC and register them into qPortal. * Update Changelog Co-authored-by: Sven Fillinger Co-authored-by: Sven F * Add environment config for conda * Update CL Co-authored-by: wow-such-code --- CHANGELOG.md | 4 ++++ environment.yaml | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 environment.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index f662a142..5130ba87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## 1.4.0 +* Provide first imaging registration support with OMERO server +* Provide `environment.yaml` that can be used to configure a conda + environment for the proper setup for the register-omero-metadata + dropbox * Register unclassified pooling data of Nanopore experiments directly at the experiment level (no copies are added to sample-based datasets) ## 1.3.1 diff --git a/environment.yaml b/environment.yaml new file mode 100644 index 00000000..bdcca4ec --- /dev/null +++ b/environment.yaml @@ -0,0 +1,39 @@ +name: etl +channels: + - bioconda + - sven1103 + - hargup/label/pypi + - anaconda + - defaults +dependencies: + - _libgcc_mutex=0.1 + - bzip2=1.0.8 + - ca-certificates=2020.7.22 + - certifi=2019.11.28 + - freetype=2.10.2 + - hashlib=20081119 + - jpeg=9b + - libedit=3.1.20191231 + - libffi=3.3 + - libgcc-ng=9.1.0 + - libpng=1.6.37 + - libstdcxx-ng=9.1.0 + - libtiff=4.1.0 + - lz4-c=1.9.2 + - ncurses=6.2 + - olefile=0.46 + - omero-importer-cli=v1.0.0 + - openjdk=8.0.152 + - openssl=1.0.2u + - pillow=6.2.1 + - pip=19.3.1 + - python=2.7.18 + - readline=8.0 + - setuptools=44.0.0 + - sqlite=3.33.0 + - tk=8.6.10 + - wheel=0.33.6 + - xz=5.2.5 + - zeroc-ice=3.6.3 + - zlib=1.2.11 + - zstd=1.4.4 \ No newline at end of file From 8aeead9fbeeac8b3e436de99676178a9ddb5c2de Mon Sep 17 00:00:00 2001 From: Sven F Date: Tue, 13 Oct 2020 17:43:49 +0200 Subject: [PATCH 7/8] Update README.md --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index 057b258e..320fcd65 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,28 @@ This repository holds a collection of Jython ETL (extract-transform-load) scripts that are used at QBiC that define the behaviour of openBIS dropboxes. The ETL processes combine some quality control measures for incoming data and data transformation to facilitate the registration in openBIS. +## Environment setup + +**1. Conda environment for the register-omero-metadata dropbox** + +To provide the dependencies for the register-omero-metadata dropbox to work properly, you can build a conda environment based on the provided [`environment.yaml`](./environment.yaml): + +```bash +conda env create -f environment.yaml +``` +Make sure that the path to the executables provided in the environment are referenced properly in the register-omero-metadata Python script. + +**2. Dependencies for sample tracking functionality** + +OpenBIS loads Java libararies on startup, if they are provided in a `lib` folder of an openBIS dropbox. For the sample-tracking to work, you need to provide the +[sample-tracking-helper](https://github.com/qbicsoftware/sample-tracking-helper-lib) library and deploy it in one of the lib folders. + + +**3. Dependencies for data transfer objects and parsers** + +We decoupled some shared functionality in the [data-model-lib](https://github.com/qbicsoftware/data-model-lib) and the [core-utils-lib](https://github.com/qbicsoftware/core-utils-lib). Please make sure to deploy them as well in of the lib folders, such that the classes are loaded by the etlserver class loader and available during runtime. + + ## Data format guidelines These guidelines describe the necessary file structure for different From 56c4f4d52087e02c775333c9e6d9e0370c12def0 Mon Sep 17 00:00:00 2001 From: Sven F Date: Tue, 13 Oct 2020 17:46:42 +0200 Subject: [PATCH 8/8] Provide DOI badge --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 320fcd65..5e3f33c7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![DOI](https://zenodo.org/badge/45912621.svg)](https://zenodo.org/badge/latestdoi/45912621) + # ETL openBIS dropboxes This repository holds a collection of Jython ETL (extract-transform-load) scripts that are used at QBiC that define the behaviour of openBIS dropboxes.