Snakefile 3.54 KB
Newer Older
Philipp Spohr's avatar
Philipp Spohr committed
1
2
3
4
5
6
7
8
9
10
include: "scripts/shared.py"

from snakemake.utils import validate

#Validate configuration files

configfile: "config.yaml"
validate(config, "schemas/config.schema.yaml")


Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
11
12
13
def get_input_ids():
	inputIDs, = glob_wildcards('data/input/'+wildcards.dataset+'/{id}'+config["datasets"][wildcards.dataset]['input_read_1_ending'])
	return inputIDs
Philipp Spohr's avatar
Philipp Spohr committed
14

Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
15
#Generate Input/Output Files from specified folder
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
16
17
kmer_lengths = config['kmers']
dataset_inputIDs = {}
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
18
19
20
for d in config['datasets'].keys():
	inputIDs = get_input_ids()
	dataset_inputIDs[d] = [str(x) for x in inputIDs]
Philipp Spohr's avatar
Philipp Spohr committed
21
22

#kmer_lengths = [24]
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
23
24
def get_general_inputs(wildcards):
	run_dir = wildcards.dataset
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
25
	inputIDs = get_input_ids()
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
26
27

	possible_params = {
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
28
29
30
31
32
	'assembly_model': expand('data/output/'+run_dir+'/{id}/exactMatches.tsv',id=inputIDs),
	'calc_strand_bias': expand('data/output/'+run_dir+'/{id}/strandbias.txt',id=inputIDs),
	'mapping_diff_analysis': expand('data/output/'+run_dir+'/methodAnalysis/{id}/mapping.comparison',id=inputIDs),
	'map_filtered_reads': expand('data/output/'+run_dir+'/methodAnalysis/{id}/alignmentToGroundTruthType.sorted.bam.bai',id=inputIDs),
	'verifyUniqueness': expand('data/output/kmers/{kmer}/uniquenessTest.tsv',kmer=kmer_lengths),
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
33
34
35
36
37
38
39
40
41
42
	'kmer_stats_analysis': expand('data/auxiliary/'+run_dir+'/kmers/{kmer}/{id}/stats.tsv',kmer=kmer_lengths,id=inputIDs)
	}

	return [item for k in possible_params.keys() if config[k] for item in possible_params[k]]

def get_iterset_inputs(wildcards):
	inputIDs, = glob_wildcards('data/input/'+wildcards.dataset+'/{id}'+config["datasets"][wildcards.dataset]['input_read_1_ending'])
	possible_params = {
		'generative_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset +'/kmers/{kmer}/predictions.probabilistic_gen.tsv',kmer=kmer_lengths),
		'distance_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset +'/kmers/{kmer}/predictions.euclidean.tsv',kmer=kmer_lengths),
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
43
		'probabilistic_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset + '/kmers/{kmer}/predictions.probabilistic_cov.csv',kmer=kmer_lengths),
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
44
45
46
		# if above:
		'plot_top3_fit': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset + '/kmers/{kmer}/{id}_top3fit.svg',kmer=kmer_lengths,id=inputIDs),
		'kmer_stats_analysis': expand('data/output/' + wildcards.dataset + '/' + wildcards.iterset +'/kmers/{kmer}/{id}/spaTypesGroundTruthVennDia.svg',kmer=kmer_lengths,id=inputIDs)
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
47
	}
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
48
	return [item for k in possible_params.keys() if config[k] for item in possible_params[k]]
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
49

Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
50
51
52
53
def use_itersets():
	if config['probabilistic_model'] and config['itersets']:
		return config['itersets']
	return ['O']
Philipp Spohr's avatar
Philipp Spohr committed
54
55
56

rule all:
	input:
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
57
		run_datasets = expand('data/output/{dataset}_summary.md', dataset=config['datasets'].keys())
Philipp Spohr's avatar
Philipp Spohr committed
58

Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
59
60
61
62
63
64
rule run_dataset:
	input:
		general = get_general_inputs,
		summarys = expand('data/auxiliary/{dataset}/{iterset}_summary.md', iterset=use_itersets(), allow_missing=True)
	output:
		summary = 'data/output/{dataset}_summary.md'
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
65
66
67
68
69
70
	params:
		# cluster execution
		cpus = '1',
		mem = '1G',
		gpus = '0',
		walltime = '00:01:00'
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
71
72
73
74
75
76
77
78
79
	#  TODO create summary
	shell:
		'touch {output.out}'

rule run_iterset:
	input:
		get_iterset_inputs
	output:
		out = 'data/auxiliary/{dataset}/{iterset}_summary.md'
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
80
81
82
83
84
85
	params:
		# cluster execution
		cpus = '1',
		mem = '1G',
		gpus = '0',
		walltime = '00:01:00'
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
86
87
88
	# TODO create summary
	shell:
		'touch {output.out}'
Philipp Spohr's avatar
Philipp Spohr committed
89
90

##### load rules #####
Jan Hoeckesfeld's avatar
Jan Hoeckesfeld committed
91
92
93
94
95
96
include: "rules/assembly.smk"
include: "rules/shared.smk"
include: "rules/kmerApproach.smk"
include: "rules/coverageBased.smk"
include: "rules/probabilistic.smk"
include: "rules/euclidean.smk"