Skip to content
Snippets Groups Projects
Commit a20b5769 authored by Sara Schulte's avatar Sara Schulte
Browse files
parents 6fa67bd1 5e17f211
Branches
No related tags found
No related merge requests found
......@@ -28,8 +28,12 @@ def get_parser():
help='If provided, binding affinities are converted to binary data.')
parser.add_argument('--binding-affinities', '-ba', dest='ba_matrix', required=True, type=str,
help='Binding affinity file for input peptides and alleles.')
parser.add_argument('--required_epitopes', '-epi', dest='required_epitopes',
help='File of peptides you want to be present in vaccine')
parser.add_argument('--min-hits', '-mh', dest='min_hits', default=1, type=int,
help='Minimum number of hits for an allele to be covered')
parser.add_argument('--maximize-peptides', dest='maximize_peptides', action='store_const', const=True,
default=False, help='Maximize number of peptides in the vaccine in a second optimization')
parser.add_argument('--embedding-length', default=0, type=int, help='Set length of embedding if used')
parser.add_argument('--embedded-peptides', type=str, help='File containing embedded peptides')
parser.add_argument('--embedded-epitope_features', type=str, help='Path to embedded epitope features')
......@@ -70,8 +74,13 @@ def main():
peptides = read_peptides(args.peptides)
pep_count = len(peptides)
required_peptides = []
if args.required_epitopes:
print('Reading required epitopes')
required_peptides = read_peptides(args.required_epitopes)
drawing_enabled = False
if pep_count <= 30:
if pep_count < 30:
print('Number of peptides below 30 -> drawing enabled')
drawing_enabled = True
......@@ -100,8 +109,6 @@ def main():
leaves_dict, hog = linear_time_hog.compute_hog(str(pep_count), peptides, args.outdir, drawing_enabled)
print('Binarize ba predictions')
if args.ba_threshold:
print(args.ba_threshold)
bin_matrix = binarize_entries(mod_ba_df, args.ba_threshold)
full_known_alleles = gp.tuplelist(key for key in f_data.keys() if key in bin_matrix.keys())
......@@ -117,6 +124,8 @@ def main():
min_hits=args.min_hits,
populations=args.populations,
path=args.outdir,
optional_peptides=required_peptides,
maximize_peptides=args.maximize_peptides,
logging=args.logging_enabled,
coloring=drawing_enabled)
......
......@@ -25,14 +25,9 @@ def subtour_elim_boekler_paper(model, where):
subtour = sorted(list(subtour), key=int)
log('## Subtour ' + ', '.join(subtour))
save_for_later.append(subtour)
# for each subtour that does not contain the root node, we require an incoming edge from the outside
for node in subtour:
model.cbLazy(gp.quicksum(model._x_edges.select('*', node)) <=
gp.quicksum(model._x_edges[node_outer, node_inner] for node_outer, node_inner in sorted(model._x_edges)
model.cbLazy(1 <= gp.quicksum(model._x_edges[node_outer, node_inner] for node_outer, node_inner in sorted(model._x_edges)
if node_inner in subtour and node_outer not in subtour),
'At least one incoming edge to subtour ' + '_'.join(subtour) + ' from the outside')
# first add constraint only for single node, if this does not work, add it for all nodes in cycle
break
def find_subtour(vals, graph):
......@@ -51,7 +46,7 @@ def find_subtour(vals, graph):
return subtours
def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, min_hits=1, populations=['World'], logging=False, coloring=False):
def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, min_hits=1, populations=['World'], optional_peptides=[], maximize_peptides=False, logging=False, coloring=False):
global logging_enabled
logging_enabled = logging
......@@ -62,8 +57,9 @@ def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, mi
m.setParam('Seed', 42)
random.seed(42)
numpy.random.seed(42)
if not os.path.exists(path + 'lp_out/'):
os.mkdir(path + 'lp_out/')
# only for debugging, lp files becomes very large
# if not os.path.exists(path + 'lp_out/'):
# os.mkdir(path + 'lp_out/')
# create hit variables
log('Create hit variables')
......@@ -71,6 +67,12 @@ def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, mi
for allele in alleles:
hit[allele] = m.addVar(vtype=GRB.BINARY, name='_'.join(allele).replace(':', '-'))
# create node variables
log('Create node variables for peptide nodes')
x_nodes = gp.tupledict()
for node in leaves.values():
x_nodes[node] = m.addVar(vtype=GRB.BINARY, name=graph.nodes[node]['string'])
# create edge variables
log('Create edge variables')
x_edges = gp.tupledict()
......@@ -87,16 +89,25 @@ def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, mi
log('Add >>At least one outgoing edge from root<< constraint')
m.addConstr(x_edges.sum('0', '*') >= 1, 'At least one outgoing edge from root')
# optionally force include peptides in vaccine
if optional_peptides:
for p in optional_peptides:
n = leaves[p]
m.addConstr(x_nodes[n] >= 1, 'Must include peptide ' + p)
log('Add >>Sum of edge weights below threshold k<< constraint')
m.addConstr(gp.quicksum(x_edges[node_a, node_b] * graph.edges[node_a, node_b]['weight']
for node_a, node_b in sorted(graph.edges)) <= k, 'Sum of edge weights below threshold k')
log('Add flow conservation for nodes')
for node in sorted(graph.nodes):
m.addConstr(x_edges.sum('*', node) == x_edges.sum(node, '*'),
'Flow conservation at node ' + str(node))
log('Add peptide node visiting constraint')
for pep, node in leaves.items():
m.addConstr(x_nodes[node] <= x_edges.sum('*', node), 'Node visiting constraint for ' + pep)
'''
without this update function, the data will be cached in some intermediate storage that might require more memory
than the final storage to which the ilp will be transferred by update() and optimize().
......@@ -105,9 +116,9 @@ def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, mi
log('Add allele hit constraint')
for allele in alleles:
# check for peptides (leaf nodes) with non-zero entry in B matrix for current allele
hit_leaves = list(filter(lambda x: 1.0 == B_matrix[allele][x], leaves.keys()))
hit_pep = list(filter(lambda x: 1.0 == B_matrix[allele][x], leaves.keys()))
# only sum over such peptides instead of summing over all peptides -> only beneficial for ba threshold > 0.xx
m.addConstr(hit[allele] * min_hits <= gp.quicksum(x_edges.sum('*', leaves[leaf]) for leaf in hit_leaves),
m.addConstr(hit[allele] * min_hits <= gp.quicksum(x_nodes[leaves[pep]] for pep in hit_pep),
'Peptide-hit for allele ' + '_'.join(allele))
m.update()
......@@ -123,6 +134,20 @@ def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, mi
log('Now optimize:')
m.optimize(subtour_elim_boekler_paper)
# 2nd optimization: maximize number of peptides
if maximize_peptides:
# require optimal solution for first objective function
assert m.Status == GRB.Status.OPTIMAL
opt_coverage = m.getObjective()
# set new objective function
# maximize number of selected peptides
m.setObjective(gp.quicksum(x_nodes), GRB.MAXIMIZE)
# add previous objective function with objective value as constraint
m.addConstr(gp.quicksum(hit[allele] * freq_vector[allele][pop] for pop in populations for allele in alleles) >= opt_coverage.getValue(),
'Add maximal population coverage as constraint')
m.optimize(subtour_elim_boekler_paper)
solution_edges = []
if m.Status == GRB.OPTIMAL:
sol = m.getAttr('X', x_edges)
......@@ -138,10 +163,11 @@ def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, mi
if node_b in leaves.values():
chosen_pep.append(graph.nodes[node_b]['string'])
if not os.path.exists(path + 'lp_out/'):
os.mkdir(path + 'lp_out/')
m.write(path + 'lp_out/' + pep_count + '_vaccine_ilp_hog.sol')
m.write(path + 'lp_out/' + pep_count + '_vaccine_ilp_hog.json')
# also only for more details and debugging
# if not os.path.exists(path + 'lp_out/'):
# os.mkdir(path + 'lp_out/')
# m.write(path + 'lp_out/' + pep_count + '_vaccine_ilp_hog.sol')
# m.write(path + 'lp_out/' + pep_count + '_vaccine_ilp_hog.json')
if not os.path.exists(path + 'pep_out/'):
os.mkdir(path + 'pep_out/')
......@@ -165,9 +191,10 @@ def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, mi
if coloring:
dot = nx.drawing.nx_pydot.to_pydot(sub_hog)
dot.write_png(path + 'pep_out/' + pep_count + '_sub_hog.png')
draw_hog.draw_hog(graph, leaves, sol, path + 'pep_out/' + pep_count + '_colored_sub_hog.png')
hogvaxine = ''
for e in nx.eulerian_circuit(sub_hog):
for e in nx.eulerian_circuit(sub_hog, source='0'):
if graph.edges[e]['is_slink'] == 'yes':
continue
else:
......@@ -184,14 +211,11 @@ def hogvax(k, alleles, freq_vector, B_matrix, leaves, pep_count, graph, path, mi
# after solving add lazy constraints to lp file
for i, subtour in enumerate(save_for_later):
for node in subtour:
m.addConstr(gp.quicksum(m._x_edges.select('*', node)) <=
gp.quicksum(m._x_edges[node_outer, node_inner] for node_outer, node_inner in m._x_edges
m.addConstr(1 <= gp.quicksum(x_edges[node_outer, node_inner] for node_outer, node_inner in sorted(x_edges)
if node_inner in subtour and node_outer not in subtour),
'Subtour elimination ' + node)
# first add constraint only for single node, if this does not work, add it for all nodes in cycle
break
'At least one incoming edge to subtour ' + str(i) + ' from the outside')
m.write(path + 'lp_out/' + pep_count + '_vaccine_ilp_hog.lp')
# only for debugging, lp file becomes quite large
# m.write(path + 'lp_out/' + pep_count + '_vaccine_ilp_hog.lp')
return chosen_pep
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment