Skip to content

Commit

Permalink
Support metadata file for registration (#5)
Browse files Browse the repository at this point in the history
Adds support for a metadata file, that contains a tab-separated
list of measurement id and file pairs, which will be used for registration.
Files that have the same measurement ID entry will be aggregated and processed as one task.
  • Loading branch information
sven1103 authored May 14, 2024
1 parent 5c46a6c commit 8a7e941
Show file tree
Hide file tree
Showing 12 changed files with 259 additions and 60 deletions.
9 changes: 6 additions & 3 deletions src/main/java/life/qbic/data/processing/AppConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,18 @@ ScannerConfiguration scannerConfiguration(
RegistrationWorkersConfig registrationWorkersConfig(
@Value("${registration.threads}") int amountOfWorkers,
@Value("${registration.working.dir}") String workingDirectory,
@Value("${registration.target.dir}") String targetDirectory) {
return new RegistrationWorkersConfig(amountOfWorkers, workingDirectory, targetDirectory);
@Value("${registration.target.dir}") String targetDirectory,
@Value("${registration.metadata.filename}") String metadataFileName) {
return new RegistrationWorkersConfig(amountOfWorkers, workingDirectory, targetDirectory,
metadataFileName);
}

@Bean
RegistrationConfiguration registrationConfiguration(
RegistrationWorkersConfig registrationWorkersConfig) {
return new RegistrationConfiguration(registrationWorkersConfig.workingDirectory().toString(),
registrationWorkersConfig.targetDirectory().toString());
registrationWorkersConfig.targetDirectory().toString(),
registrationWorkersConfig.metadataFileName());
}

@Bean
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/life/qbic/data/processing/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public static void main(String[] args) {

List<ProcessRegistrationRequest> registrationWorkers = new LinkedList<>();
for (int i=0; i<registrationWorkersConfig.amountOfWorkers(); i++) {
registrationWorkers.add(new ProcessRegistrationRequest(requestQueue, registrationConfiguration));
registrationWorkers.add(new ProcessRegistrationRequest(requestQueue, registrationConfiguration, globalConfig));
}

log.info("Registering {} processing workers...", processingWorkersConfig.threads());
Expand Down
21 changes: 21 additions & 0 deletions src/main/java/life/qbic/data/processing/Provenance.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
Expand Down Expand Up @@ -42,6 +44,9 @@ public class Provenance {
@JsonProperty("measurementId")
public String qbicMeasurementID;

@JsonProperty("datasetFiles")
public List<String> datasetFiles;

/**
* A list of ordered processing folder stops the dataset has traversed and passed successfully.
* <p>
Expand Down Expand Up @@ -95,6 +100,22 @@ public enum ERROR_CODE {
IO_ERROR
}

public void addDatasetFiles(Collection<String> datasetFiles) {
Objects.requireNonNull(datasetFiles);
if (this.datasetFiles == null) {
this.datasetFiles = new ArrayList<>();
}
this.datasetFiles.addAll(datasetFiles);
}

public void addDatasetFile(String datasetFile) {
addDatasetFiles(Collections.singletonList(datasetFile));
}

public Collection<String> datasetFiles() {
return datasetFiles.stream().toList();
}

public static class ProvenanceException extends RuntimeException {

private final ERROR_CODE code;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ public class RegistrationWorkersConfig {

private final Path targetDirectory;

public RegistrationWorkersConfig(int threads, String workingDirectory, String targetDirectory) {
private final String metadataFileName;

public RegistrationWorkersConfig(int threads, String workingDirectory, String targetDirectory, String metadataFileName) {
if (threads < 1) {
throw new IllegalArgumentException("Number of threads must be greater than 0");
}
Expand All @@ -26,6 +28,7 @@ public RegistrationWorkersConfig(int threads, String workingDirectory, String ta
this.workingDirectory = directory;
this.amountOfWorkers = threads;
this.targetDirectory = targetDirectoryPath;
this.metadataFileName = metadataFileName;
}

public int amountOfWorkers() {
Expand All @@ -39,4 +42,8 @@ public Path workingDirectory() {
public Path targetDirectory() {
return this.targetDirectory;
}

public String metadataFileName() {
return this.metadataFileName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import life.qbic.data.processing.ErrorSummary;
import life.qbic.data.processing.Provenance;
import life.qbic.data.processing.Provenance.ProvenanceException;
Expand Down Expand Up @@ -146,17 +147,8 @@ private void evaluateDirectory(File taskDir) {
return;
}

var datasetSearch = findDataset(taskDir);
if (datasetSearch.isEmpty()) {
LOG.error("No dataset found: {}", taskDir.getAbsolutePath());
moveBackToOrigin(taskDir, provenance, "No dataset directory found.");
return;
}
var dataset = datasetSearch.get();
Matcher matcher = measurementIdPattern.matcher(dataset.getName());
var measurementIdResult = matcher.results().map(MatchResult::group).findFirst();
var measurementIdResult = provenance.qbicMeasurementID == null || provenance.qbicMeasurementID.isBlank() ? Optional.empty() : Optional.of(provenance.qbicMeasurementID);
if (measurementIdResult.isPresent()) {
provenance.qbicMeasurementID = measurementIdResult.get();
provenance.addToHistory(taskDir.getAbsolutePath());
try {
updateProvenanceFile(provenanceSearch.get(), provenance);
Expand All @@ -173,12 +165,13 @@ private void evaluateDirectory(File taskDir) {
}
return;
}
var errorMessage = ErrorSummary.createSimple(taskDir.getName(), dataset.getName(),
var errorMessage = ErrorSummary.createSimple(taskDir.getName(),
String.join(", ", provenance.datasetFiles),
"Missing QBiC measurement ID",
"For a successful registration please provide the pre-registered QBiC measurement ID");
LOG.error(
"Missing measurement identifier: no known measurement id was found in the content of directory '{}' in task '{}'",
dataset.getName(), taskDir.getName());
String.join(", ", provenance.datasetFiles), taskDir.getName());
moveBackToOrigin(taskDir, provenance, errorMessage.toString());
}

Expand Down Expand Up @@ -208,7 +201,8 @@ private void moveToSystemIntervention(File taskDir, String reason) {
}

private void moveBackToOrigin(File taskDir, Provenance provenance, String reason) {
LOG.info("Moving back to original user directory: " + taskDir.getAbsolutePath());
LOG.info("Moving back to original user directory: {}",
Paths.get(provenance.userWorkDirectoryPath).resolve(usersErrorDirectory));
try {
var errorFile = taskDir.toPath().resolve("error.txt").toFile();
errorFile.createNewFile();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ private void processFile(File taskDir) {
}

Provenance finalProvenance = provenance;
packageDataset(taskDir);
taskDirContent.stream().filter(file -> !file.getName().equals(Provenance.FILE_NAME)).findFirst()
.ifPresent(file -> {
finalProvenance.addToHistory(taskDir.getAbsolutePath());
Expand All @@ -153,22 +152,6 @@ private void processFile(File taskDir) {
});
}

private void packageDataset(File taskDir) {
Optional<File> datasetSearch = Arrays.stream(taskDir.listFiles())
.filter(file -> !file.getName().equals(Provenance.FILE_NAME)).findFirst();
datasetSearch.ifPresent(file -> {
if (file.isFile()) {
File datasetDir = taskDir.toPath().resolve(file.getName() + "_dataset").toFile();
datasetDir.mkdir();
try {
Files.move(file.toPath(), datasetDir.toPath().resolve(file.getName()));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}

private Optional<File> findProvenanceFile(List<File> taskDirContent) {
Optional<File> provenanceFileSearch = taskDirContent.stream()
.filter(file -> file.getName().equals(Provenance.FILE_NAME)).findFirst();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package life.qbic.data.processing.registration;

/**
* <b><enum short description - 1 Line!></b>
*
* <p><More detailed description - When to use, what it solves, etc.></p>
*
* @since <version tag>
*/
public enum ErrorCode {
METADATA_FILE_NOT_FOUND,
INCOMPLETE_METADATA, FILE_NOT_FOUND, MISSING_FILE_ENTRY, IO_EXCEPTION
}
Loading

0 comments on commit 8a7e941

Please sign in to comment.