Commit 35ec954c authored by Jan Hoeckesfeld's avatar Jan Hoeckesfeld
Browse files

updated rule likelihood inputs

parent a5e6c2d7
......@@ -456,7 +456,8 @@ rule createDistanceMatrixOverKmersOfV:
input:
kmers = 'data/auxiliary/kmers/{kmer}/spaSequences.counts.json'
output:
V_kmers_distances = 'data/auxiliary/kmers/{kmer}/V_kmers.distances.npz'
V_kmers_distances = 'data/auxiliary/kmers/{kmer}/V_kmers.distances.npz',
V_kmers = 'data/auxiliary/kmers/{kmer}/V_kmers.json'
params:
k = lambda wildcards: wildcards.kmer,
hamming_distance_cutoff = 5,
......
......@@ -65,7 +65,9 @@ rule calcLikelihoods:
expected = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/expected_counts.json',
observed = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/alignment.counts.json',
kmerError = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/kmer_error.txt',
kmerCoverageEstimate = determineKmerCoverageEstimateFile()
kmerCoverageEstimate = determineKmerCoverageEstimateFile(),
V_kmers_distances = 'data/auxiliary/kmers/{kmer}/V_kmers.distances.npz',
V_kmers = 'data/auxiliary/kmers/{kmer}/V_kmers.json'
output:
likelihoods = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/likelihoods_cov.json',
unexpectedLikelihoods = 'data/auxiliary/'+config['input_folder']+'/kmers/{kmer}/{id}/unexpected_likelihoods_cov.json'
......@@ -86,7 +88,7 @@ rule calcLikelihoods:
singularity:
'docker://phspo/ckmertools:iterationset-tests'
shell:
'c_kmertools --e {input.expected} --c {params.cpus} --m 0 --o {input.observed} --kmererror {params.e} --d {params.deviationCutoff} --target {output.likelihoods} --unexpected {output.unexpectedLikelihoods} --log {log} --itersetType {params.itersetType}'
'c_kmertools --e {input.expected} --c {params.cpus} --m 0 --o {input.observed} --kmererror {params.e} --d {params.deviationCutoff} --target {output.likelihoods} --unexpected {output.unexpectedLikelihoods} --log {log} --itersetType {params.itersetType} --hammingdist {input.V_kmers_distances} --kmersindex {input.V_kmers}'
rule calcLikelihoods_Generative:
......
......@@ -5,8 +5,13 @@ from scipy.sparse import coo_matrix, vstack, save_npz
from sklearn.metrics import pairwise_distances_chunked, pairwise_distances
##############INPUT######################################
cutoff = snakemake.params['hamming_distance_cutoff']
kmers_file = snakemake.input['kmers']
mem = snakemake.params['mem'][:-1]
V_kmers_distances_file = snakemake.output['V_kmers_distances']
V_kmers_file = snakemake.output['V_kmers']
# eg "spaSequences.counts.json"
f = open(snakemake.input['kmers'], "r")
f = open(kmers_file, "r")
expected_json = f.read()
person_dict = json.loads(expected_json)
......@@ -19,7 +24,7 @@ m = len(kmers[0])
#########################################################
# eg 5
ALLOWED_DISTANCE = int(snakemake.params['hamming_distance_cutoff'])/m
ALLOWED_DISTANCE = int(cutoff)/m
def reduce_func(D_chunk, start):
neigh = np.where(D_chunk < ALLOWED_DISTANCE, D_chunk, 0)
return (neigh * m).astype('u1')
......@@ -44,7 +49,7 @@ print(M.shape)
cpus = -1
# working_memory = 1024
# eg snakemake.params['mem'] = 1G
working_memory = int(snakemake.params['mem'][:-1]) * 1000 - 500
working_memory = int(mem) * 1000 - 500
gen = pairwise_distances_chunked(kmers_int, reduce_func=reduce_func, metric="hamming", n_jobs=cpus,
working_memory=working_memory)
......@@ -74,7 +79,8 @@ print("Matrix shape:", M.shape)
print("Density of sparse matrix: ", M.getnnz() / np.prod(M.shape))
M = M.tocoo()
# eg 'V_kmer_distances.npz'
save_npz(snakemake.output['V_kmers_distances'], M)
# https://github.com/rogersce/cnpy
# https://stackoverflow.com/questions/36433030/load-scipy-sparse-csr-matrix-in-c
save_npz(V_kmers_distances_file, M)
with open(V_kmers_file, 'w', encoding='utf-8') as f:
json.dump(kmers, f, ensure_ascii=False, indent=4)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment