-
Notifications
You must be signed in to change notification settings - Fork 0
/
code_snippets.txt.sh
89 lines (56 loc) · 2.71 KB
/
code_snippets.txt.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
$ bgzip -c WES_TLX3_TAP_Tum7-8.vcf > WES_TLX3_TAP_Tum7-8.vcf.gz
$ tabix -p vcf WES_TLX3_TAP_Tum7-8.vcf.gz
$ bcftools stats WES_TLX3_TAP_Tum7-8_TAP-01.vcf.gz WES_TLX3_TAP_Tum7-8_TLX3-01.vcf.gz > joined_stats.txt
$ plot-vcfstats joined_stats.txt -p WES_joined
####### Split VCF file by samples
#!/bin/bash
[ ! -f "$1" ] && printf "\n\n>> ERROR : File Not Found : $1 \n" && exit 1;
FILE="$1";
for sample in `bcftools query -l $FILE`; do
bcftools view -c1 -Oz -s $sample -o ${FILE/.vcf*/.$sample.vcf.gz} $FILE
done
#######################
# Split VCF file by samples
$ bcftools view -c1 -Oz -s AC3812,AC3813 -o TAP_WGS.vcf.gz FERRIER_09_Germline.allchr.snpEff.p.SAL.SAL10_1.vcf.gz
# Intersect VCF
bcftools isec -p dir A.vcf.gz B.vcf.gz
bcftools isec -p dir A.vcf.gz B.vcf.gz C.vcf.gz D.vcf.gz E.vcf.gz F.vcf.gz -n=6
########################################
# Call variants by bcftools
# http://samtools.github.io/bcftools/howtos/variant-calling.html
bcftools mpileup -Ou -f reference.fa alignments.bam | bcftools call -mv -Ob -o calls.bcf
bcftools view calls.bcf | vcfutils.pl varFilter - > calls_filter.vcf
########################################
## === get DAN seqs from bed coordinates from Python
## ATTENTION!!! gives 0-based fasta not compatible with bcftools consensus
mm9 = "/home/sergio/media/NAS4/PFlab/TLX3_project/ChiP-Seq/references/mm9/chromFa/mm9.fa"
# ---
bd = "tracks/HMMenh_tlx_notFantom_topScore40.bed"
fa = "tracks/HMMenh_tlx_notFantom_topScore40.fa"
bd2fa = "bedtools getfasta -fi {} -bed {} -fo {}".format(mm9,bd,fa)
print('Running process ........ \n')
print(bd2fa)
########################################
# == get DNA coordinates and apply mutation in VCF
faidx -b test.bed reference_genome.fa -o in.fa
bcftools consensus -f in.fa mut.vcf.gz > out.fa
# Pythonic way
# https://github.com/mdshw5/pyfaidx
from pyfaidx import Fasta
fn = 'reference_genome.fa'
fa = Fasta(fn)
reg = fa['chr1'][85280067:85281276]
out = open('out.fa',"w")
out.write('>'+reg.longname+'\n'+reg.seq)
out.close()
# etc.
# The FastaVariant class provides a way to integrate single nucleotide variant calls to generate a consensus sequence.
consensus = FastaVariant('tests/data/chr22.fasta', 'tests/data/chr22.vcf.gz', het=True, hom=True)
RuntimeWarning: Using sample NA06984 genotypes.
####################################
# apply learnet model to binarized data
#Description
#This command takes a saved model file and binarized data and outputs files with the segmentation state assignments
#and/or the posterior state probabilities. These files can also be outputted by LearnModel, but this command enables
#producing the segmentations without the need to relearn the model.
ChromHMM.sh MakeSegmentation model_6.txt binarized_RAG out_RAG