Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Jan Hoeckesfeld
Snakemake Ngs Spa Typing
Commits
35ec954c
Commit
35ec954c
authored
Nov 22, 2020
by
Jan Hoeckesfeld
Browse files
updated rule likelihood inputs
parent
a5e6c2d7
Changes
3
Hide whitespace changes
Inline
Side-by-side
rules/kmerApproach.snk
View file @
35ec954c
...
...
@@ -456,7 +456,8 @@ rule createDistanceMatrixOverKmersOfV:
input:
kmers = 'data/auxiliary/kmers/{kmer}/spaSequences.counts.json'
output:
V_kmers_distances = 'data/auxiliary/kmers/{kmer}/V_kmers.distances.npz'
V_kmers_distances = 'data/auxiliary/kmers/{kmer}/V_kmers.distances.npz',
V_kmers = 'data/auxiliary/kmers/{kmer}/V_kmers.json'
params:
k = lambda wildcards: wildcards.kmer,
hamming_distance_cutoff = 5,
...
...
rules/probabilistic.snk
View file @
35ec954c
...
...
@@ -65,7 +65,9 @@ rule calcLikelihoods:
expected = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/expected_counts.json',
observed = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/alignment.counts.json',
kmerError = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/kmer_error.txt',
kmerCoverageEstimate = determineKmerCoverageEstimateFile()
kmerCoverageEstimate = determineKmerCoverageEstimateFile(),
V_kmers_distances = 'data/auxiliary/kmers/{kmer}/V_kmers.distances.npz',
V_kmers = 'data/auxiliary/kmers/{kmer}/V_kmers.json'
output:
likelihoods = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/likelihoods_cov.json',
unexpectedLikelihoods = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/unexpected_likelihoods_cov.json'
...
...
@@ -86,7 +88,7 @@ rule calcLikelihoods:
singularity:
'docker://phspo/ckmertools:iterationset-tests'
shell:
'c_kmertools --e {input.expected} --c {params.cpus} --m 0 --o {input.observed} --kmererror {params.e} --d {params.deviationCutoff} --target {output.likelihoods} --unexpected {output.unexpectedLikelihoods} --log {log} --itersetType {params.itersetType}'
'c_kmertools --e {input.expected} --c {params.cpus} --m 0 --o {input.observed} --kmererror {params.e} --d {params.deviationCutoff} --target {output.likelihoods} --unexpected {output.unexpectedLikelihoods} --log {log} --itersetType {params.itersetType}
--hammingdist {input.V_kmers_distances} --kmersindex {input.V_kmers}
'
rule calcLikelihoods_Generative:
...
...
scripts/calcDistanceMatrixVkmers.py
View file @
35ec954c
...
...
@@ -5,8 +5,13 @@ from scipy.sparse import coo_matrix, vstack, save_npz
from
sklearn.metrics
import
pairwise_distances_chunked
,
pairwise_distances
##############INPUT######################################
cutoff
=
snakemake
.
params
[
'hamming_distance_cutoff'
]
kmers_file
=
snakemake
.
input
[
'kmers'
]
mem
=
snakemake
.
params
[
'mem'
][:
-
1
]
V_kmers_distances_file
=
snakemake
.
output
[
'V_kmers_distances'
]
V_kmers_file
=
snakemake
.
output
[
'V_kmers'
]
# eg "spaSequences.counts.json"
f
=
open
(
snakemake
.
input
[
'kmers'
]
,
"r"
)
f
=
open
(
kmers_file
,
"r"
)
expected_json
=
f
.
read
()
person_dict
=
json
.
loads
(
expected_json
)
...
...
@@ -19,7 +24,7 @@ m = len(kmers[0])
#########################################################
# eg 5
ALLOWED_DISTANCE
=
int
(
snakemake
.
params
[
'hamming_distance_
cutoff
'
]
)
/
m
ALLOWED_DISTANCE
=
int
(
cutoff
)
/
m
def
reduce_func
(
D_chunk
,
start
):
neigh
=
np
.
where
(
D_chunk
<
ALLOWED_DISTANCE
,
D_chunk
,
0
)
return
(
neigh
*
m
).
astype
(
'u1'
)
...
...
@@ -44,7 +49,7 @@ print(M.shape)
cpus
=
-
1
# working_memory = 1024
# eg snakemake.params['mem'] = 1G
working_memory
=
int
(
snakemake
.
params
[
'mem'
][:
-
1
]
)
*
1000
-
500
working_memory
=
int
(
mem
)
*
1000
-
500
gen
=
pairwise_distances_chunked
(
kmers_int
,
reduce_func
=
reduce_func
,
metric
=
"hamming"
,
n_jobs
=
cpus
,
working_memory
=
working_memory
)
...
...
@@ -74,7 +79,8 @@ print("Matrix shape:", M.shape)
print
(
"Density of sparse matrix: "
,
M
.
getnnz
()
/
np
.
prod
(
M
.
shape
))
M
=
M
.
tocoo
()
# eg 'V_kmer_distances.npz'
save_npz
(
snakemake
.
output
[
'V_kmers_distances'
],
M
)
# https://github.com/rogersce/cnpy
# https://stackoverflow.com/questions/36433030/load-scipy-sparse-csr-matrix-in-c
save_npz
(
V_kmers_distances_file
,
M
)
with
open
(
V_kmers_file
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
json
.
dump
(
kmers
,
f
,
ensure_ascii
=
False
,
indent
=
4
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment