Commit 7b196446 authored by Jan Hoeckesfeld's avatar Jan Hoeckesfeld
Browse files

added proper spatype.txt corruption errormessage

parent 49b482e9
from Bio import SeqIO
import re import re
from Bio import SeqIO
regSpaTypes = re.compile(r'((t[0-9]+,)([0-9][0-9])+(-[0-9])*)') regSpaTypes = re.compile(r'((t[0-9]+,)([0-9][0-9])+(-[0-9])*)')
#Load repeat sequences in fasta format # Load repeat sequences in fasta format
repeats = SeqIO.parse(snakemake.input['repeats'],'fasta') repeats = SeqIO.parse(snakemake.input['repeats'], 'fasta')
repeatsDict = {} repeatsDict = {}
filterList = [] filterList = []
#Pre-Process Repeats # Pre-Process Repeats
if 'filterList' in snakemake.input: if 'filterList' in snakemake.input:
with open(snakemake.input['filterList'],'r') as infile: with open(snakemake.input['filterList'], 'r') as infile:
filterList = infile.read().split(',') filterList = infile.read().split(',')
for repeat in repeats: for repeat in repeats:
repeatsDict[repeat.id[1:]]=str(repeat.seq) repeatsDict[repeat.id[1:]] = str(repeat.seq)
with open(snakemake.input['types'],'r') as infile, open(snakemake.output['out'],'w') as outfile: with open(snakemake.input['types'], 'r') as infile, open(snakemake.output['out'], 'w') as outfile:
spaTypes = infile.read().splitlines() spaTypes = infile.read().splitlines()
spaTypesExtended = [] spaTypesExtended = []
for line, spaType in enumerate(spaTypes): for line, spaType in enumerate(spaTypes):
if not regSpaTypes.match(spaType): if not regSpaTypes.match(spaType):
print("spaType in spatypes" + " does not fit into the pattern: " + spaType + "in line " + str(line+1)) print("spaType in spatypes" + " does not fit into the pattern: " + spaType + "in line " + str(line + 1))
split = spaType.split(',') split = spaType.split(',')
name = split[0] name = split[0]
if len(filterList) > 0: if len(filterList) > 0:
if not name in filterList: if not name in filterList:
continue continue
value = split[1] value = split[1]
sptRepeats = value.split('-') sptRepeats = value.split('-')
if not sptRepeats: if not sptRepeats:
continue continue
sequence = '' sequence = ''
for repeat in sptRepeats: for repeat in sptRepeats:
if repeat not in repeatsDict: if repeat not in repeatsDict:
print("Unrecoginized repeat in spatype " + repeat + ", found in line " + str(line+1)) print("Unrecoginized repeat in spatype " + repeat + ", found in line " + str(line + 1))
sequence += repeatsDict[repeat] sequence += repeatsDict[repeat]
outfile.write('>'+name+'\n'+sequence+'\n') outfile.write('>' + name + '\n' + sequence + '\n')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment