Commit 2e0bad9b authored by Jan Hoeckesfeld's avatar Jan Hoeckesfeld
Browse files

small bug fixes

parent 7f68613e
......@@ -8,21 +8,21 @@ configfile: "config.yaml"
validate(config, "schemas/config.schema.yaml")
def get_input_ids():
inputIDs, = glob_wildcards('data/input/'+wildcards.dataset+'/{id}'+config["datasets"][wildcards.dataset]['input_read_1_ending'])
def get_input_ids(dataset):
inputIDs, = glob_wildcards('data/input/'+dataset+'/{id}'+config["datasets"][dataset]['input_read_1_ending'])
return inputIDs
#Generate Input/Output Files from specified folder
kmer_lengths = config['kmers']
dataset_inputIDs = {}
for d in config['datasets'].keys():
inputIDs = get_input_ids()
inputIDs = get_input_ids(d)
dataset_inputIDs[d] = [str(x) for x in inputIDs]
#kmer_lengths = [24]
def get_general_inputs(wildcards):
run_dir = wildcards.dataset
inputIDs = get_input_ids()
inputIDs = get_input_ids(wildcards.dataset)
possible_params = {
'assembly_model': expand('data/output/'+run_dir+'/{id}/exactMatches.tsv',id=inputIDs),
......@@ -40,7 +40,7 @@ def get_iterset_inputs(wildcards):
possible_params = {
'generative_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset +'/kmers/{kmer}/predictions.probabilistic_gen.tsv',kmer=kmer_lengths),
'distance_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset +'/kmers/{kmer}/predictions.euclidean.tsv',kmer=kmer_lengths),
'probabilistic_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset + '/kmers/{kmer}/predictions.probabilistic_cov.csv',kmer=kmer_lengths),
'probabilistic_model': expand('data/output/' + wildcards.dataset + '/kmers/{kmer}/predictions.probabilistic_cov.csv',kmer=kmer_lengths),
# if above:
'plot_top3_fit': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset + '/kmers/{kmer}/{id}_top3fit.svg',kmer=kmer_lengths,id=inputIDs),
'kmer_stats_analysis': expand('data/output/' + wildcards.dataset + '/' + wildcards.iterset +'/kmers/{kmer}/{id}/spaTypesGroundTruthVennDia.svg',kmer=kmer_lengths,id=inputIDs)
......@@ -61,7 +61,7 @@ rule run_dataset:
general = get_general_inputs,
summarys = expand('data/auxiliary/{dataset}/{iterset}_summary.md', iterset=use_itersets(), allow_missing=True)
output:
summary = 'data/output/{dataset}_summary.md'
summary = touch('data/output/{dataset}_summary.md')
params:
# cluster execution
cpus = '1',
......@@ -69,14 +69,12 @@ rule run_dataset:
gpus = '0',
walltime = '00:01:00'
# TODO create summary
shell:
'touch {output.out}'
rule run_iterset:
input:
get_iterset_inputs
output:
out = 'data/auxiliary/{dataset}/{iterset}_summary.md'
out = touch('data/auxiliary/{dataset}/{iterset}_summary.md')
params:
# cluster execution
cpus = '1',
......@@ -84,8 +82,6 @@ rule run_iterset:
gpus = '0',
walltime = '00:01:00'
# TODO create summary
shell:
'touch {output.out}'
##### load rules #####
include: "rules/assembly.smk"
......
......@@ -124,6 +124,8 @@ rule summarize_coverage:
mem = '1G',
gpus = '0',
walltime = '00:05:00'
log:
'logs/{dataset}/kmers/{kmer}/predictions.probabilistic_cov.csv'
conda:
'../envs/biopythonworkbench.yaml'
script:
......
......@@ -7,10 +7,10 @@ def getReadIDFromPath(path):
def get_wildcards(path):
path = Path(path)
return {
"id": path.parent,
"kmer": path.parents[1],
"iterset": path.parents[3],
"dataset":path.parents[4],
"id": str(path.parent),
"kmer": str(path.parents[1]),
"iterset": str(path.parents[3]),
"dataset": str(path.parents[4]),
}
def update_in(d, path, value):
......@@ -47,14 +47,14 @@ def summarize(input,benchmarks,groundTruthFile,output):
results = update_in(results, [w["iterset"], w["kmer"], w["id"], "time"], time)
results = update_in(results, [w["iterset"], w["kmer"], w["id"], "mean_load"], mean_load)
with open(f,'r') as groundTruthFile:
lines = f.readlines()
with open(groundTruthFile,'r') as gt:
lines = gt.readlines()
for line in lines:
if line[0] in ids:
results = update_in(results, ["expected", line[0]], line[1])
#table_cols = ["expected"] + zip(*[([i,k],[i,k],[i,k]) for i in results.keys() for k in results[i].keys()])
table_col_names = ["expected"] + zip(*[("observed_" + i + "_" + k, i + k +"_estimate", i + k + "_time", i + k + "_cpu%") for i in results.keys() for k in results[i].keys()])
cols_tuples = [("observed_" + i + "_" + k, i + k +"_estimate", i + k + "_time", i + k + "_cpu%") for i in results.keys() for k in results[i].keys()]
table_col_names = ["expected"] + [c for ct in cols_tuples for c in ct]
with open(output,'w') as outfile:
outfile.write("\t".join(table_col_names) + '\n')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment