-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
134 lines (112 loc) · 4.27 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
################################################################################
####### Replace these paths with their actual locations on your machine ########
# STAR binary
star=./STAR/bin/Linux_x86_64/STAR
# ConsDB main Python script
consdb=./ConsDB/consdb/ConsDB.py
# Location for the variant database files
db_dir=./db_files/
# Location for the parse ConsDB files
consdb_dir=./consdb_files/
# Location for the consensus VCF files
vcf_dir=./vcfs/
# Location for the variant type VCF files
vt_vcf_dir=./vt_vcfs/
# Location for the full personal genomes
pers_vcf_dir=./pers_full_vcfs/
# Location for mapping results
map_dir=./mapping/
# Location for masked reference FASTA file
h38_fa=${gen_dir}/h38/ref.maskPAR.fa
# Location for masked reference FASTA file
h38_fa_mask=${gen_dir}/h38/ref.maskPAR.fa
# Location for GTF file
h38_gtf=${gen_dir}/h38/genes.gtf
################################################################################
# Location for STAR genome directories
gen_dir=./genomes/
# Location for reads to map
reads_dir=./reads/
# Location for all other scripts
scripts_dir=./scripts/
# All individuals used in this analysis
individuals=HG00512 HG00513 HG00731 HG00732 HG00733 NA19238 NA19239 NA19240
all: mask_ref make_all_vcfs make_all_genomes map_everyone \
make_pers_full_vcf_all split_vcfs_to_hom_het find_read_var_overlap \
compare_mapping_all metrics_all metrics_print_all find_et_vt_overlap_all \
get_all_overlap_reads calc_norm_values count_read_categories_all \
make_all_paper_figs
.PHONY: all mask_ref make_all_vcfs make_all_genomes map_everyone \
make_pers_full_vcf_all split_vcfs_to_hom_het find_read_var_overlap \
compare_mapping_all metrics_all metrics_print_all find_et_vt_overlap_all \
get_all_overlap_reads calc_norm_values count_read_categories_all \
make_all_paper_figs
mask_ref:
betools maskfasta -fi ${h38_fa} -bed ${gen_dir}/h38/par_pos.bed \
-fo ${h38_fa_mask}
make_all_vcfs:
${scripts_dir}/make_all_vcf.sh ./id_table.csv ${db_dir} \
${vcf_dir} ${consdb} ${consdb_dir}
make_all_genomes:
${scripts_dir}/make_all_genomes.sh ${star} ${h38_fa_mask} ${h38_gtf} \
${gen_dir} $$(find ${vcf_dir} -name *.vcf)
map_everyone:
for ind in ${individuals}; do \
${scripts_dir}/map_individ.sh ${star} $$ind ./id_table.csv \
${reads_dir} ${gen_dir} ${map_dir}/$${ind}/ && echo $$ind; \
done
make_pers_full_vcf_all:
for ind in ${individuals}; do \
${scripts_dir}/make_full_pers_vcfs.sh $$ind ./id_table.csv \
${pers_vcf_dir} ${scripts_dir}/make_pers.awk && echo $$ind; \
done
split_vcfs_to_hom_het:
${scripts_dir}/split_vcf_to_hom_het_all.sh ./id_table.csv \
${pers_vcf_dir} ${vcf_dir} ${vt_vcf_dir} \
${scripts_dir}/split_vcf_to_hom_het.sh \
${scripts_dir}/split_vcf_to_hom_het.py
find_read_var_overlap:
for ind in ${individuals}; do \
${scripts_dir}/find_read_var_overlaps.sh ${vt_vcf_dir}/$${ind}/ \
${map_dir} $$ind hh_overlap && echo $$ind; \
done
compare_mapping_all:
mkdir -p ./sam_frags
for ind in ${individuals}; do \
${scripts_dir}/compare_aln_individ_parallel.sh \
${map_dir}/$${ind}/ ${scripts_dir}/compareAligns.awk \
./sam_frags/frag_ && echo $$ind; \
done
metrics_all:
for ind in ${individuals}; do \
${scripts_dir}/metrics.sh ${map_dir}/$${ind}/ all_reads_aln_comp_ \
all_reads_comp_summary_ && echo $$ind; \
done
metrics_print_all:
for ind in ${individuals}; do \
${scripts_dir}/metrics_print.sh ${map_dir}/$${ind}/ \
all_reads_aln_comp_ reads && echo $$ind; \
done
find_et_vt_overlap_all:
for ind in ${individuals}; do \
${scripts_dir}/err_type_var_type_overlap_hom_het.sh \
${map_dir}/$${ind}/ reads hh_vt_overlap hh_overlap && echo $$ind; \
done
get_all_overlap_reads:
for ind in ${individuals}; do \
cat ${map_dir}/$${ind}/*/*.hh_overlap | sort -n | \
uniq > ${map_dir}/$${ind}/all_mapped_overlapping_reads.csv && \
echo $$ind; \
done
calc_norm_values:
${scripts_dir}/calc_paper_norm_values.sh ${map_dir} ${individuals}
count_read_categories_all: $(wildcard ${map_dir}/*/*/*.hh_vt_overlap)
for ind in ${individuals}; do \
python ${scripts_dir}/count_read_categories.py \
-i ${map_dir}/$${ind}/*/*.hh_vt_overlap && echo $$ind & \
done && wait
make_all_paper_figs:
python ${scripts_dir}/make_all_paper_figs.py \
-i ${map_dir}/*/*/[^a]*_var_type_counts.csv \
-pop ./id_table.csv -norm ${map_dir}/paper_fig_norms.csv \
-o ./paper_figs/fig