include: "scripts/shared.py" from snakemake.utils import validate #Validate configuration files configfile: "config.yaml" validate(config, "schemas/config.schema.yaml") #Generate Input/Output Files from specified folder kmer_lengths = config['kmers'] dataset_inputIDs = {} #kmer_lengths = [24] def get_general_inputs(wildcards): run_dir = wildcards.dataset inputIDs, = glob_wildcards('data/input/'+wildcards.dataset+'/{id}'+config["datasets"][wildcards.dataset]['input_read_1_ending']) dataset_inputIDs[wildcards.dataset] = inputIDs possible_params = { 'assembly_model': expand('data/output/'+run_dir+'/{id}/exactMatches.tsv',id=inputIDs), 'calc_strand_bias': expand('data/output/'+run_dir+'/{id}/strandbias.txt',id=inputIDs), 'mapping_diff_analysis': expand('data/output/'+run_dir+'/methodAnalysis/{id}/mapping.comparison',id=inputIDs), 'map_filtered_reads': expand('data/output/'+run_dir+'/methodAnalysis/{id}/alignmentToGroundTruthType.sorted.bam.bai',id=inputIDs), 'verifyUniqueness': expand('data/output/kmers/{kmer}/uniquenessTest.tsv',kmer=kmer_lengths), 'kmer_stats_analysis': expand('data/auxiliary/'+run_dir+'/kmers/{kmer}/{id}/stats.tsv',kmer=kmer_lengths,id=inputIDs) } return [item for k in possible_params.keys() if config[k] for item in possible_params[k]] def get_iterset_inputs(wildcards): inputIDs, = glob_wildcards('data/input/'+wildcards.dataset+'/{id}'+config["datasets"][wildcards.dataset]['input_read_1_ending']) possible_params = { 'generative_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset +'/kmers/{kmer}/predictions.probabilistic_gen.tsv',kmer=kmer_lengths), 'distance_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset +'/kmers/{kmer}/predictions.euclidean.tsv',kmer=kmer_lengths), 'probabilistic_model': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset + '/kmers/{kmer}/predictions.probabilistic_cov.tsv',kmer=kmer_lengths), # if above: 'plot_top3_fit': expand('data/output/' + wildcards.dataset +'/' + wildcards.iterset + '/kmers/{kmer}/{id}_top3fit.svg',kmer=kmer_lengths,id=inputIDs), 'kmer_stats_analysis': expand('data/output/' + wildcards.dataset + '/' + wildcards.iterset +'/kmers/{kmer}/{id}/spaTypesGroundTruthVennDia.svg',kmer=kmer_lengths,id=inputIDs) } return [item for k in possible_params.keys() if config[k] for item in possible_params[k]] def use_itersets(): if config['probabilistic_model'] and config['itersets']: return config['itersets'] return ['O'] rule all: input: run_datasets = expand('data/output/{dataset}_summary.md', dataset=config['datasets'].keys()) rule run_dataset: input: general = get_general_inputs, summarys = expand('data/auxiliary/{dataset}/{iterset}_summary.md', iterset=use_itersets(), allow_missing=True) output: summary = 'data/output/{dataset}_summary.md' # TODO create summary shell: 'touch {output.out}' rule run_iterset: input: get_iterset_inputs output: out = 'data/auxiliary/{dataset}/{iterset}_summary.md' # TODO create summary shell: 'touch {output.out}' ##### load rules ##### include: "rules/assembly.smk" include: "rules/shared.smk" include: "rules/kmerApproach.smk" include: "rules/coverageBased.smk" include: "rules/probabilistic.smk" include: "rules/euclidean.smk"