-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
66 lines (59 loc) · 1.7 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
params.input = 'data'
params.output = 'results'
// Define the training and testing data files
trainingData = file("${params.input}/train.csv")
testingData = file("${params.input}/test.csv")
// Define the model script and output file
trainModelScript = file("models/train_model.py")
modelOutput = file("${params.output}/model.pkl")
// Define the prediction script and output file
predictValuesScript = file("models/predict_values.py")
predictedValuesOutput = file("${params.output}/predicted_values.csv")
// Define the conda environment for the pipeline
condaEnvironment = "models/requirements.txt"
// Train the model
process trainModel {
input:
file trainingData
file trainModelScript
output:
file modelOutput
script:
"""
python ${trainModelScript} --data ${trainingData} --output ${modelOutput}
"""
conda:
"${condaEnvironment}"
}
// Predict the values using the trained model
process predictValues {
input:
file testingData
file modelOutput
file predictValuesScript
output:
file predictedValuesOutput
script:
"""
python ${predictValuesScript} --data ${testingData} --model ${modelOutput} --output ${predictedValuesOutput}
"""
conda:
"${condaEnvironment}"
}
// Merge the predicted values with the testing data
process mergeResults {
input:
file testingData
file predictedValuesOutput
output:
file "${params.output}/merged_results.csv"
script:
"""
data = pd.read_csv('${testingData}')
predicted = pd.read_csv('${predictedValuesOutput}')
merged = pd.concat([data, predicted], axis=1)
merged.to_csv('${params.output}/merged_results.csv', index=False)
"""
conda:
"${condaEnvironment}"
}