Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Jan Hoeckesfeld
Snakemake Ngs Spa Typing
Commits
43c43755
Commit
43c43755
authored
Oct 20, 2020
by
Jan Hoeckesfeld
Browse files
added venndiagrams
parent
7b196446
Changes
5
Hide whitespace changes
Inline
Side-by-side
Snakefile
View file @
43c43755
...
...
@@ -39,6 +39,7 @@ def get_input():
input_list.append(expand('data/output/kmers/{kmer}/uniquenessTest.tsv',kmer=kmer_lengths))
if config['kmer_stats_analysis']:
input_list.append(expand('data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/stats.tsv',kmer=kmer_lengths,id=inputIDs))
input_list.append(expand('data/output/'+config['input_folder']+'/kmers/{kmer}/{id}/spaTypesGroundTruthVennDia.svg',kmer=kmer_lengths,id=inputIDs))
return input_list
...
...
envs/biopythonworkbench.yaml
View file @
43c43755
...
...
@@ -9,9 +9,10 @@ dependencies:
-
biopython=1.71
-
python=3.7
-
regex
-
openssl =
1.
0
-
samtools =
1.
9
-
seaborn = 0.
9
.0
-
scipy = 1.
2.1
-
pysam = 0.1
5.2
-
openssl = 1.
1.1h
-
samtools =
1.
11
-
seaborn = 0.
11
.0
-
scipy = 1.
5.2
-
pysam = 0.1
6.0.1
-
mpmath = 1.1.0
-
matplotlib-venn=0.11.5
rules/kmerApproach.snk
View file @
43c43755
...
...
@@ -340,8 +340,25 @@ rule calcAverageReadLength:
'../envs/biopythonworkbench.yaml'
shell:
'awk \' {{ if(NR%4==2) {{count++; bases += length}} }} END{{print bases/count}} \' {input.read1} > {output}' #todo: use both read files?
rule createSpaTypeVennDiagram:
input:
expected = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/expected_counts.json',
observed = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/alignment.counts.json',
scores = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/scores.probabilistic_cov.tsv'
output:
venngtt = 'data/output/'+config['input_folder']+'/kmers/{kmer}/{id}/spaTypesGroundTruthVennDia.svg',
venntopsix = 'data/output/'+config['input_folder']+'/kmers/{kmer}/{id}/spaTypesTopSixVennDia.svg',
vennrandomsix = 'data/output/'+config['input_folder']+'/kmers/{kmer}/{id}/spaTypesRandomSixVennDia.svg'
params:
gtt = lambda wildcards : getGroundTruthType(wildcards.id)
log:
'logs/'+config['input_folder']+'/probabilistic/kmers/{kmer}/{id}/spaTypeVennDia.log'
conda:
'../envs/biopythonworkbench.yaml'
script:
'../scripts/spaTypeVennDia.py'
rule createValidKmerHistogram:
...
...
rules/probabilistic.snk
View file @
43c43755
...
...
@@ -64,7 +64,7 @@ rule calcLikelihoods:
input:
expected = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/expected_counts.json',
observed = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/alignment.counts.json',
iterationset = determineIterationset({id}}),
#
iterationset = determineIterationset({id}}),
kmerError = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/kmer_error.txt',
kmerCoverageEstimate = determineKmerCoverageEstimateFile()
output:
...
...
scripts/spaTypeVennDia.py
0 → 100644
View file @
43c43755
import
json
import
logging
import
random
from
textwrap
import
wrap
import
regex
as
re
from
matplotlib
import
pyplot
as
plt
from
matplotlib_venn
import
venn3
logging
.
basicConfig
(
filename
=
snakemake
.
log
[
0
],
level
=
logging
.
DEBUG
,
format
=
"%(asctime)s:%(levelname)s:%(message)s"
)
# Read counts
expected_counts
=
json
.
load
(
open
(
snakemake
.
input
[
'expected'
],
'r'
))
actual_counts
=
json
.
load
(
open
(
snakemake
.
input
[
'observed'
],
'r'
))
# without assessing the actual counts! just if it exists!
V
=
set
(
k
for
spaTypeID
in
expected_counts
for
k
in
expected_counts
[
spaTypeID
])
O
=
set
(
k
for
k
in
actual_counts
)
def
generate_six_venndiagrams
(
type_counts
,
type_names
,
filepath
):
figure
,
axes
=
plt
.
subplots
(
2
,
3
,
figsize
=
(
11.69
,
8.27
))
x
=
[
0
,
0
,
0
,
1
,
1
,
1
]
y
=
[
0
,
1
,
2
,
0
,
1
,
2
]
for
i
in
range
(
6
):
v
=
venn3
([
O
,
type_counts
[
i
],
V
],
set_labels
=
(
'O'
,
type_names
[
i
],
'V'
),
set_colors
=
(
'darkorange'
,
'blue'
,
'green'
),
alpha
=
0.8
,
ax
=
axes
[
x
[
i
]][
y
[
i
]])
for
idx
,
subset
in
enumerate
(
v
.
subset_labels
):
if
subset
is
not
None
:
v
.
subset_labels
[
idx
].
set_visible
(
True
)
axes
[
1
,
2
].
axis
(
'off'
)
figure
.
savefig
(
filepath
)
##############################################################
# Si = groundtruth spatype!
Si
=
set
(
k
for
k
in
expected_counts
[
snakemake
.
params
[
'gtt'
]])
fig
=
plt
.
figure
()
v
=
venn3
([
O
,
Si
,
V
],
set_labels
=
(
'O'
,
'Si'
,
'V'
),
set_colors
=
(
'darkorange'
,
'blue'
,
'green'
),
alpha
=
0.8
)
fig
.
suptitle
(
"
\n
"
.
join
(
wrap
(
"Distribution of kmers in spa-type "
+
snakemake
.
params
[
'gtt'
]
+
" (expected spa-type)"
,
60
)))
fig
.
savefig
(
snakemake
.
output
[
'venngtt'
])
################################################################
# take the 6 most feasible spatypes and plot their venn diagrams!
f
=
open
(
snakemake
.
input
[
'scores'
],
'r'
)
t
=
[]
type_names
=
[]
type_kmer_counts
=
[]
for
i
,
l
in
enumerate
(
f
.
readlines
()):
if
i
>
5
:
break
t
.
append
(
re
.
split
(
r
'\t+'
,
l
.
rstrip
(
'
\t\n
'
)))
type_names
.
append
(
t
[
i
][
0
])
type_kmer_counts
.
append
(
set
(
k
for
k
in
expected_counts
[
type_names
[
i
]]))
generate_six_venndiagrams
(
type_kmer_counts
,
type_names
,
snakemake
.
output
[
'venntopsix'
])
##################################################################
# random six types
random_spatypes
=
random
.
sample
(
list
(
expected_counts
),
6
)
type_kmer_counts
=
[
set
(
k
for
k
in
expected_counts
[
s
])
for
s
in
random_spatypes
]
generate_six_venndiagrams
(
type_kmer_counts
,
random_spatypes
,
snakemake
.
output
[
'vennrandomsix'
])
##################################################################
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment