coverageBased.smk 5.66 KB
Newer Older
Philipp Spohr's avatar
Philipp Spohr committed
1
2
rule estimateKmerCoverage:
    input:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
3
4
        read1 = 'data/auxiliary/{dataset}/{id}'+'.qc_internal_R1.fq',
        read2 = 'data/auxiliary/{dataset}/{id}'+'.qc_internal_R2.fq'
Philipp Spohr's avatar
Philipp Spohr committed
5
    output:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
6
7
8
9
        histogram = report('data/output/{dataset}/kmers/{kmer}/{id}/kmers.histo.png',category='Coverage-Based Method',caption='../report/kmerCoverageHistogram.rst'),
        histogramRaw = 'data/auxiliary/{dataset}/kmers/{kmer}/{id}/kmers.histo.raw.png',
        mean = 'data/auxiliary/{dataset}/kmers/{kmer}/{id}/coverage_estimate_kmercountbased.txt',
        kmererror = 'data/auxiliary/{dataset}/kmers/{kmer}/{id}/kmer_error.txt'
Philipp Spohr's avatar
Philipp Spohr committed
10
11
12
13
14
15
16
17
    params:
        k = lambda wildcards: wildcards.kmer,
        #cluster execution
        cpus = '1', #TODO: This could in theory be sped up significantly using a shared cache and multithreading
        gpus = '0',
        mem = '64G',
        walltime = '00:45:00'
    log:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
18
        'logs/{dataset}/kmers/{kmer}/{id}/estimateKmerCoverage.log'
Philipp Spohr's avatar
Philipp Spohr committed
19
    benchmark:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
20
        'benchmarks/{dataset}/kmers/{kmer}/{id}/estimateKmerCoverage.log'
Philipp Spohr's avatar
Philipp Spohr committed
21
22
23
24
25
26
27
28
29
    conda:
        '../envs/biopythonworkbench.yaml'
    script:
        '../scripts/estimateKmerCoverage.py'


'''
rule estimateKmerCoverageFiltered:
    input:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
30
        reads = 'data/auxiliary/{dataset}/{id}/filteredReads.fastq'
Philipp Spohr's avatar
Philipp Spohr committed
31
    output:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
32
        histogram = 'data/auxiliary/{dataset}/kmers/{kmer}/{id}/kmers.histo.regionXOnly.png'
Philipp Spohr's avatar
Philipp Spohr committed
33
34
35
36
    params:
        k = lambda wildcards: wildcards.kmer
    #TODO: Threads = 2 ?
    log:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
37
        'logs/{dataset}/kmers/{kmer}/{id}/estimateKmerCoverageFiltered.log'
Philipp Spohr's avatar
Philipp Spohr committed
38
39
40
41
42
43
44
45
46
    conda:
        '../envs/main.yaml'
    script:
        '../scripts/estimateKmerCoverageFiltered.py'
'''


rule estimateKmerCoverageAlignment:
    input:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
47
48
49
        coverageEstimate = 'data/auxiliary/{dataset}/{id}/coverageEstimate.txt',
        readLengthEstimate = 'data/auxiliary/{dataset}/{id}/readLengthEstimate.txt',
        baseErrorEstimate = 'data/auxiliary/{dataset}/{id}/base_error_estimate.txt'
Philipp Spohr's avatar
Philipp Spohr committed
50
    output:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
51
        kmerCoverage = 'data/auxiliary/{dataset}/kmers/{kmer}/{id}/coverage_estimate_alignmentbased.txt'
Philipp Spohr's avatar
Philipp Spohr committed
52
53
54
55
56
57
58
59
    params:
        k = lambda wildcards: wildcards.kmer,
        # cluster execution
        cpus = '1',
        gpus = '0',
        mem = '16G',
        walltime = '00:30:30'
    log:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
60
        'logs/{dataset}/kmers/{kmer}/{id}/estimateKmerCoverage.log'
Philipp Spohr's avatar
Philipp Spohr committed
61
62
63
64
65
66
67
    conda:
        '../envs/biopythonworkbench.yaml'
    script:
        '../scripts/estimateKmerCoverage_alignment.py'

rule estimateCoverageAlignment:
    input:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
68
        filteredAlignment = 'data/auxiliary/{dataset}/{id}/alignment.sorted.bam'
Philipp Spohr's avatar
Philipp Spohr committed
69
    output:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
70
        coverageEstimate = 'data/auxiliary/{dataset}/{id}/coverageEstimate.txt'
Philipp Spohr's avatar
Philipp Spohr committed
71
72
73
74
75
76
77
    params:
        # cluster execution
        cpus = '1',
        gpus = '0',
        mem = '16G',
        walltime = '00:30:30'
    log:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
78
        'logs/{dataset}/{id}/estimateKmerCoverage_alignment.log'
Philipp Spohr's avatar
Philipp Spohr committed
79
80
81
82
83
84
85
    conda:
        '../envs/biopythonworkbench.yaml'
    shell:
        'samtools depth  {input.filteredAlignment} |  awk \' $1 == "maskref" {{sum+=$3}} END {{ print "Average = ",sum/NR}}\' | grep -Eo \'[+-]?[0-9]+([.][0-9]+)?\' > {output.coverageEstimate}'

rule calcPriorProbabilitiesCoverage:
    input:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
86
        likelihoods = 'data/auxiliary/{dataset}/{iterset}/kmers/{kmer}/{id}/likelihoods_cov.json'
Philipp Spohr's avatar
Philipp Spohr committed
87
    output:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
88
        priorFilePath = 'data/auxiliary/{dataset}/{iterset}/kmers/{kmer}/{id}/prior_cov.txt'
Philipp Spohr's avatar
Philipp Spohr committed
89
90
91
92
93
94
95
96
97
    params:
        k = lambda wildcards: wildcards.kmer,
        dps = config['dps'],
        # cluster execution
        cpus = '1',
        gpus = '0',
        mem = '2G',
        walltime = '00:05:30'
    log:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
98
        'logs/{dataset}/{iterset}/probabilistic/kmers/{kmer}/{id}/calcPrior_cov.log'
Philipp Spohr's avatar
Philipp Spohr committed
99
100
101
102
103
104
105
    conda:
        '../envs/biopythonworkbench.yaml'
    script:
        '../scripts/calcPriorProbabilities.py'

rule calcProbabilitiesCoverage:
    input:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
106
107
        likelihoods = 'data/auxiliary/{dataset}/{iterset}/kmers/{kmer}/{id}/likelihoods_cov.json',
        prior = 'data/auxiliary/{dataset}/{iterset}/kmers/{kmer}/{id}/prior_cov.txt'
Philipp Spohr's avatar
Philipp Spohr committed
108
    output:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
109
        probabilities = 'data/auxiliary/{dataset}/{iterset}/kmers/{kmer}/{id}/scores.probabilistic_cov.tsv'
Philipp Spohr's avatar
Philipp Spohr committed
110
111
112
113
114
115
116
117
118
119
    params:
        dps = config['dps'],
        # cluster execution
        cpus = '1',
        gpus = '0',
        mem = '8G',
        walltime = '00:10:30'
    conda:
        '../envs/biopythonworkbench.yaml'
    log:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
120
        'logs/{dataset}/{iterset}/probabilistic/kmers/{kmer}/{id}/probabilities_cov.log'
Philipp Spohr's avatar
Philipp Spohr committed
121
122
123
124
125
126
    script:
        '../scripts/calcSpaTypeProbabilities.py'


rule createFitnessPlots:
    input:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
127
        counts = 'data/auxiliary/{dataset}/kmers/{kmer}/{id}/alignment.counts.json',
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
128
        probabilities = 'data/auxiliary/{dataset}/{iterset}/kmers/{kmer}/{id}/scores.probabilistic_cov.tsv',
Philipp Spohr's avatar
Philipp Spohr committed
129
130
        ratios = 'data/auxiliary/kmers/{kmer}/spaSequencesRatios.json'
    output:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
131
        report('data/output/{dataset}/{iterset}/kmers/{kmer}/{id}_top3fit.svg',category='Coverage-Based Method',caption='../report/fitnessPlot.rst')
Philipp Spohr's avatar
Philipp Spohr committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
    params:
        dps = config['dps'],
        # cluster execution
        cpus = '1',
        gpus = '0',
        mem = '1G',
        walltime = '00:20:30'
    conda:
        '../envs/biopythonworkbench.yaml'
    script:
        '../scripts/createFitnessPlots.py'


rule calcExpectedCounts:
    input:
        kmerCoverageEstimate = determineKmerCoverageEstimateFile(),
        counts = 'data/auxiliary/kmers/{kmer}/spaSequences.counts.json'
    output:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
150
        'data/auxiliary/{dataset}/kmers/{kmer}/{id}/expected_counts.json'
Philipp Spohr's avatar
Philipp Spohr committed
151
152
153
154
155
156
157
158
159
160
161
162
    params:
        k = lambda wildcards: wildcards.kmer,
        # cluster execution
        cpus = '1',
        mem = '8G',
        gpus = '0',
        walltime = '00:30:00' 
    conda:
        '../envs/biopythonworkbench.yaml'
    script:
        '../scripts/calcExpectedCounts.py'