Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove mandatory requirement of run_id during parsing of nf-core datasets #132

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@
<dependency>
<artifactId>data-model-lib</artifactId>
<groupId>life.qbic</groupId>
<version>2.27.0</version>
<version>2.28.0</version>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
Expand Down
106 changes: 58 additions & 48 deletions src/main/groovy/life/qbic/utils/BioinformaticAnalysisParser.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import java.nio.file.NotDirectoryException
import java.nio.file.Path
import java.text.ParseException


/**
* <h1>Parser storing the fileTree of a nf-core pipeline output directory into JSON format</h1>
* <br>
Expand All @@ -27,7 +26,7 @@ import java.text.ParseException
* @param directory path of nf-core directory whose fileTree should be converted into a JSON String
*
*/
class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>{
class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult> {

/**
* Contains the associated keys of the required root directory subFolders
Expand All @@ -36,19 +35,28 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
* @since 1.8.0
*/
enum RequiredRootFolderKeys {
QUALITY_CONTROL("qualityControl"),
PIPELINE_INFORMATION("pipelineInformation"),
PROCESS_FOLDERS("processFolders")
QUALITY_CONTROL("qualityControl", "multiqc"),
PIPELINE_INFORMATION("pipelineInformation", "pipeline_info"),
//Process_Folder names can vary so no directory name can be assumed for now
PROCESS_FOLDERS("processFolders", null)

private String keyName

RequiredRootFolderKeys(String keyName) {
private String folderName

RequiredRootFolderKeys(String keyName, String folderName) {
this.keyName = keyName
this.folderName = folderName
}

String getKeyName() {
return this.keyName
}

String getFolderName() {
return this.folderName
}

}

/**
Expand All @@ -58,18 +66,24 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
* @since 1.8.0
*/
enum RequiredRootFileKeys {
RUN_ID("runId"),
SAMPLE_ID("sampleIds"),
RUN_ID("runId", "run_id.txt"),
SAMPLE_ID("sampleIds", "sample_ids.txt")

private String keyName
private String fileName

RequiredRootFileKeys(String keyName) {
RequiredRootFileKeys(String keyName, String fileName) {
this.keyName = keyName
this.fileName = fileName
}

String getKeyName() {
return this.keyName
}

String getFileName() {
return this.fileName
}
}

/**
Expand All @@ -79,19 +93,25 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
* @since 1.8.0
*/
enum RequiredPipelineFileKeys {
SOFTWARE_VERSIONS("softwareVersions"),
EXECUTION_REPORT("executionReport"),
SOFTWARE_VERSIONS("softwareVersions", "software_versions.yml"),
EXECUTION_REPORT("executionReport", "execution_report"),

private String keyName

RequiredPipelineFileKeys(String keyName) {
private String fileName

RequiredPipelineFileKeys(String keyName, String fileName) {
this.keyName = keyName
this.fileName = fileName
}

String getKeyName() {
return this.keyName
}

String getFileName() {
return this.fileName
}
}

/** {@InheritDoc} */
Expand Down Expand Up @@ -139,32 +159,23 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
List<Map> processFolders = []
rootChildren.each { currentChild ->
if (currentChild.containsKey("children")) {
//folder
//directory
String folderName = currentChild.get("name")
switch (folderName) {
case "multiqc":
insertAsProperty(map, currentChild, RequiredRootFolderKeys.QUALITY_CONTROL.getKeyName())
break
case "pipeline_info":
RequiredRootFolderKeys requiredRootFolderKeys = RequiredRootFolderKeys.values().find { rootFolderKeys -> (rootFolderKeys.getFolderName() == folderName) }
if (requiredRootFolderKeys) {
if (requiredRootFolderKeys == RequiredRootFolderKeys.PIPELINE_INFORMATION) {
parsePipelineInformation(currentChild)
insertAsProperty(map, currentChild, RequiredRootFolderKeys.PIPELINE_INFORMATION.getKeyName())
break
default:
processFolders.add(currentChild)
break
}
insertAsProperty(map, currentChild, requiredRootFolderKeys.getKeyName())
} else {
processFolders.add(currentChild)
}
} else if (currentChild.containsKey("fileType")) {
//file
switch (currentChild.get("name")) {
case "run_id.txt":
insertAsProperty(map, currentChild, RequiredRootFileKeys.RUN_ID.getKeyName())
break
case "sample_ids.txt":
insertAsProperty(map, currentChild, RequiredRootFileKeys.SAMPLE_ID.getKeyName())
break
default:
//ignore other files
break
String fileName = currentChild.get("name")
RequiredRootFileKeys requiredRootFileKeys = RequiredRootFileKeys.values().find { rootFileKeys -> (rootFileKeys.getFileName() == fileName) }
if (requiredRootFileKeys) {
insertAsProperty(map, currentChild, requiredRootFileKeys.getKeyName())
}
}
}
Expand Down Expand Up @@ -200,12 +211,10 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
private static void parsePipelineInformation(Map pipelineInformation) {

pipelineInformation.get("children").each { Map child ->
String filename = child.get("name")
if(filename.equals("software_versions.yml")){
insertAsProperty(pipelineInformation, child, RequiredPipelineFileKeys.SOFTWARE_VERSIONS.getKeyName())
}
else if(filename.matches("^execution_report.*")) {
insertAsProperty(pipelineInformation, child, RequiredPipelineFileKeys.EXECUTION_REPORT.getKeyName())
String fileName = child.get("name")
RequiredPipelineFileKeys requiredPipelineFileKeys = RequiredPipelineFileKeys.values().find { pipelineFileKeys -> (fileName.contains(pipelineFileKeys.fileName)) }
if (requiredPipelineFileKeys) {
insertAsProperty(pipelineInformation, child, requiredPipelineFileKeys.getKeyName())
}
}
}
Expand All @@ -218,7 +227,7 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
* @since 1.8.0
*/
private static void insertAsProperty(Map parent, Object content, String propertyName) {
parent.put(propertyName,content)
parent.put(propertyName, content)
}

/**
Expand All @@ -236,7 +245,7 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
/**
* Method which checks if a given Json String matches a given Json schema
* @param json Json String which will be compared to schema
* @param path to Json schema for validation of Json String
* @param path to Json schema for validation of Json String
* @throws org.everit.json.schema.ValidationException
*/
private static void validateJson(String json) throws ValidationException {
Expand All @@ -258,6 +267,7 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>
/*
* Converts a file tree into a json object.
*/

private static class DirectoryConverter {

/**
Expand Down Expand Up @@ -321,11 +331,11 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>

private static Map convertToRelativePaths(Map content, Path root) {
//Since each value in the root map is a map we need to iterate over each key/value pair
content["path"] = toRelativePath(content["path"] as String, root)
if (content["children"]) {
// Children always contains a map, so convert recursively
content["children"] = (content["children"] as List).collect { convertToRelativePaths(it as Map, root) }
}
content["path"] = toRelativePath(content["path"] as String, root)
if (content["children"]) {
// Children always contains a map, so convert recursively
content["children"] = (content["children"] as List).collect { convertToRelativePaths(it as Map, root) }
}
return content

}
Expand All @@ -351,8 +361,8 @@ class BioinformaticAnalysisParser implements DatasetParser<NfCorePipelineResult>


def convertedFile = [
"name" : name,
"path" : path,
"name" : name,
"path" : path,
"fileType": fileType
]
return convertedFile
Expand Down
2 changes: 0 additions & 2 deletions src/main/groovy/life/qbic/utils/NanoporeParser.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ class NanoporeParser {
for (File hiddenFile : hiddenFiles) {
deleteFile(hiddenFile)
}

return convertedExperiment

}

private static void deleteFile(File file) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,45 @@ class BioinformaticAnalysisSpec extends Specification {
ExecutionReport executionReport = pipelineInfo.getExecutionReport()
assert executionReport.getRelativePath() == "./pipeline_info/execution_report_1234-56-78_90-12-34.html"
assert executionReport.getName() == "execution_report_1234-56-78_90-12-34.html"
}

def "parsing a valid file structure without a run_id also returns a NfCorePipelineResult object"() {
given: "A valid nf-core pipeline output data structure"
def pathToDirectory = Paths.get(exampleDirectoriesRoot, "validates-no-run-id")
when: "we parse this valid structure"
NfCorePipelineResult nfCorePipelineResult = bioinformaticAnalysisParser.parseFrom(pathToDirectory)
then: "we expect no exception should be thrown"
assert nfCorePipelineResult instanceof NfCorePipelineResult
//Root files can be parsed
assert !nfCorePipelineResult.runId
assert !nfCorePipelineResult.runId
assert nfCorePipelineResult.sampleIds.getRelativePath() == "./sample_ids.txt"
assert nfCorePipelineResult.sampleIds.getName()== "sample_ids.txt"
//Root Folder can be parsed
QualityControlFolder multiQc = nfCorePipelineResult.getQualityControlFolder()
assert multiQc.getRelativePath() == "./multiqc"
assert multiQc.getName() == "multiqc"
assert multiQc instanceof DataFolder

PipelineInformationFolder pipelineInfo = nfCorePipelineResult.getPipelineInformation()
assert pipelineInfo.getRelativePath() == "./pipeline_info"
assert pipelineInfo.getName() == "pipeline_info"
assert pipelineInfo instanceof DataFolder

List<DataFolder> processFolders = nfCorePipelineResult.getProcessFolders()
assert processFolders[0].getRelativePath()== "./salmon"
assert processFolders[0].getName() == "salmon"
assert processFolders[0] instanceof DataFolder

//Files in Root folders can be parsed

SoftwareVersions softwareVersions = pipelineInfo.getSoftwareVersions()
assert softwareVersions.getRelativePath() == "./pipeline_info/software_versions.yml"
assert softwareVersions.getName() == "software_versions.yml"

ExecutionReport executionReport = pipelineInfo.getExecutionReport()
assert executionReport.getRelativePath() == "./pipeline_info/execution_report_1234-56-78_90-12-34.html"
assert executionReport.getName() == "execution_report_1234-56-78_90-12-34.html"
}

def "parsing an invalid file structure throws DatasetValidationException"() {
Expand Down