From 74f7ad545f03e9b86559fc3451f2f1a55c4f065c Mon Sep 17 00:00:00 2001
From: Sajjad Ghaffarinasabsharabiani <ghaffas@hhu.de>
Date: Tue, 9 Aug 2022 14:41:16 +0000
Subject: [PATCH] Upload New File

---
 networkComplexityBigg/plot/makePlots.R | 738 +++++++++++++++++++++++++
 1 file changed, 738 insertions(+)
 create mode 100644 networkComplexityBigg/plot/makePlots.R

diff --git a/networkComplexityBigg/plot/makePlots.R b/networkComplexityBigg/plot/makePlots.R
new file mode 100644
index 0000000..636b8ca
--- /dev/null
+++ b/networkComplexityBigg/plot/makePlots.R
@@ -0,0 +1,738 @@
+#!/usr/bin/Rscript
+
+library(methods)
+library(parallel)
+library(sybil)
+library(dplyr)
+library(ggplot2)
+library(gridExtra)
+library(grid)
+library(gtable)
+library(ggrepel)
+library(nls2)
+library(xlsx)
+library(xtable)
+
+#install.packages("devtools")
+#devtools::install_github("slowkow/ggrepel")
+
+
+
+modelSelector <- scan(text="iAM_Pb448 iCN718 iCN900 iYS1720 iNF517 iLB1027_lipid iJB785 iIS312 iEK1008 iAF692 iIT341 iLJ478 iSM199 iSB619 iHN637 iJN678 iJN746 iYO844 iAF987 iMM904 iPC815 iRC1080 iYL1228 STM_v1_0 iML1515",
+					what="character", sep=" ")
+# missing here e_coli_core, iJR904, iAF1260b, smaller yeast model
+myBlue <- "#006ab3"
+myBlue2 <- "#008cc2"
+myGreen <- "#97bf0d"
+myGrey <- "#D9DADB"
+myRed <- "#BE0A26"
+myYellow <- "#FFCC00"
+
+file.remove(list.files("~/plots/", full.names=T), showWarnings=F)
+file.remove("~/plot/plots.pdf", showWarnings=F)
+
+
+if(!exists("dataLoaded")){
+	print(load("~/DATA/universalBiGG.ver2.Rdata"))
+	print(load("~/DATA/mergedResults.Rdata"))
+	print(load("~/DATA/compareDf.Rdata"))
+	print(load("~/DATA/innovationIndex.Rdata"))
+	print(load("~/DATA/innovationIndexClusters.Rdata"))
+	print(load("~/DATA/exaptation.Rdata"))
+	print(load("~/DATA/metaboliteParticipation.Rdata"))
+	
+	growthCounts <- compareDf %>% group_by(model, bof, mediumType) %>% summarize(NN=sum(NN)/n(), NG=sum(NG)/n(), GN=sum(GN)/n(), GG=sum(GG)/n())
+	
+	
+	
+	modelTable <- read.csv("~/DATA/modelTable.csv") %>% filter(bigg_id %in% names(modelReactMap))
+	modelTable <- modelTable %>% left_join(read.csv("~/DATA/shortOrganismNames.csv") %>% select(bigg_id, shortO, sO))
+	
+	modelTable <- modelTable %>% mutate(label= paste0(organism, " (", bigg_id, ")"))
+	modelTable <- modelTable %>% mutate(labelSO= paste0(sO, " (", bigg_id, ")"))
+	modelTable <- modelTable %>% mutate(labelSOE= paste0("italic(", gsub(" ", "~", sO), ")", "~(", bigg_id, ")"))
+	modelTable <- modelTable %>% mutate(ecoLabel= factor(ifelse(isEco55, "62~italic(E.Coli)", "other~species"), levels=c("other~species", "62~italic(E.Coli)")))
+	modelTable <- modelTable %>% mutate(#shortO = sapply(strsplit(as.character(organism), "\\s"), function(x) paste(x[1:2], collapse=" ")),
+										shortLabel = paste0(shortO, " (", bigg_id, ")"))
+	
+	auxotrophic <- c("ic_1306", "iECDH10B_1368", "iECIAI39_1322", "iECUMN_1333", 
+		"iSbBS512_1146", "iSBO_1134", "iSFV_1184", "iSFxv_1172", "iS_1188", 
+		"iSF_1195", "iSSON_1240", "iSDY_1059",
+		"iSM199", "iPC815", "iIT341", "iAF692", "iNJ661", "iSB619","iEcDH1_1363") # manually added for non eco55
+	modelTable <- modelTable %>% mutate(auxo = bigg_id %in% auxotrophic)
+	modelTable <- modelTable %>% left_join(metaboliteParticipation %>% filter(y==2) %>% select(name, x) %>% rename(bigg_id=name, twoReactMetRatio = x))
+	
+	addSummary <- addSummary %>% mutate(auxo = model %in% auxotrophic)
+	
+	
+	resultWithModel <- resultWithModel %>% left_join(modelTable %>% select(bigg_id, shortLabel, labelSO, labelSOE, ecoLabel), by=c(model="bigg_id"))
+	growthCounts <- growthCounts %>% left_join(modelTable %>% select(bigg_id, shortLabel, labelSO, labelSOE), by=c(model="bigg_id"))
+	
+	
+	
+	df <- result %>% filter(bof=="general", mediumType=="random")
+	df2 <- innovationIndex %>% filter(bof=="general", mediumType=="random") %>% select(-NN, -NG, -GG, -GN)
+	
+	innovationIndexSingle <- merge.data.frame(df, df2, by.x=c("model", "bof", "mediumType", "medium"), by.y=c("model", "bof", "mediumType", "addedForMedium"), all.x=T)
+#	innovationIndexSingle <- innovationIndexSingle %>% select(-c(organism.y, X.y, label.y, ))
+	innovationIndexSingle <- merge.data.frame(innovationIndexSingle, modelTable, by.x="model", by.y="bigg_id", all.x)
+	
+	innovationIndex <- merge.data.frame(innovationIndex, modelTable, by.x="model", by.y="bigg_id", all.x)
+	df <- innovationIndex %>% group_by(model, bof, mediumType) %>% summarize(innoIndexRelMean=mean(innoIndexRel), innoIndexDistinctRelMean=mean(innoIndexDistinctRel))
+	addSummary <- merge.data.frame(addSummary, df, by=c("model", "bof", "mediumType"), all.x=T)
+	addSummary <- addSummary %>% left_join(modelTable %>% select(bigg_id, labelSO, labelSOE, ecoLabel) %>% rename(model=bigg_id))
+	
+	innovationIndexWithClusters <- innovationIndexWithClusters %>% left_join(modelTable %>% select(bigg_id, gene_count, labelSOE, ecoLabel), by=c(model="bigg_id"))
+	
+	
+	
+	innovationIndex <- innovationIndex %>% left_join(exaptation, by=c(model="model", addedForMedium="second"))
+	innovationIndex <- innovationIndex %>% left_join(result %>% select(model, medium, bof, mediumType, add, addCor), c(model="model", mediumType="mediumType", addedForMedium="medium", bof="bof"))
+	
+	
+	dataLoaded <- T
+}
+
+
+table1 <- modelTable %>% arrange(gene_count) %>% select(bigg_id, organism, ends_with("_count"), isEco55, taxid, pubMedId, year)
+#write.csv(table1, row.names = F, file="tableS1.csv")
+rownames(table1) <- NULL
+table1 <- table1 %>% select(-year) %>% mutate(isEco55=ifelse(isEco55, "X", "")) %>%
+					 rename("Model ID"=bigg_id, 
+							"Organism"=organism, 
+							"Gene count"=gene_count, 
+							"Metabolite count"=metabolite_count, 
+							"Reaction count"=reaction_count, 
+							"62\\textit{E.coli}"=isEco55,
+							"Taxonomy ID"=taxid,
+							"PubMed ID"=pubMedId)
+write.xlsx(table1, showNA=F, file=paste0("Data/plot/tableS1.xlsx"))
+
+
+addtorow          <- list()
+addtorow$pos      <- list()
+addtorow$pos[[1]] <- c(0)
+addtorow$command  <- c(paste("\\hline \n",
+							"\\endhead \n",
+							"\\hline \n",
+							"{\\footnotesize Continued on next page} \n",
+							"\\endfoot \n",
+							"\\endlastfoot \n",sep=""))
+print(xtable(table1, align=c("l", "p{4cm}", "p{6cm}", "r", "r", "r", "c", "r", "r"),
+					 caption="Organism specific models and their properties"
+					),
+					file="Data/plot/tableS1.tex", table.placement="",
+											include.rownames=F,
+											rotate.colnames=F,
+											floating=F,
+											sanitize.colnames.function = function(x) {x},
+											tabular.environment = "longtable",
+											floating.environment="",
+											caption.placement = "top",
+											add.to.row = addtorow,
+											hline.after=c(-1))
+
+#labels=parse(text=modelTable %>% arrange(reaction_count) %>% select(labelSOE) %>% .[[1]])
+#labeller(isEco55=function(x) parse(text=ecoLabeller(x)))
+myParse <- function(x) parse(text=x)
+
+gS1 <- ggplot(modelTable %>% arrange(reaction_count), aes(x=reorder(labelSOE, order(reaction_count)), y=reaction_count, fill=isEco55)) +
+	geom_bar(stat="identity") + 
+	labs(x="", y="reaction count") + 
+	scale_fill_manual(name="is E.Coli 62", values=c(myBlue, myBlue2), breaks=c(FALSE, TRUE)) +
+	scale_x_discrete(labels=myParse) +
+	coord_flip() +
+	facet_grid(ecoLabel ~ ., scales="free_y", space="free_y", labeller=label_parsed) +
+	theme(#axis.text.x = element_text(size=10, angle = 0, hjust = 1, vjust=0.5),
+			legend.position="none") +
+	ggtitle(bold("Figure S1.")~"Networks sizes")
+gS1
+ggsave(gS1, filename="FigS1.pdf", path="Data/plot", width=210, height=297, units="mm")
+
+
+myLabels <- modelTable %>% filter(bigg_id %in% modelSelector) %>% arrange(reaction_count) %>% select(labelSOE) %>% .[[1]]
+g1 <- ggplot(modelTable %>% filter(bigg_id %in% modelSelector) %>% arrange(reaction_count), aes(x=reorder(labelSOE, order(reaction_count)), y=reaction_count)) +
+	geom_bar(stat="identity", fill=myBlue) + 
+	scale_x_discrete(labels=parse(text=myLabels)) +
+	coord_flip() +
+	labs(x="", y="reaction count") + 
+	theme(	axis.text.y = element_text(size=10, angle = 0, hjust = 1, vjust=0.5),
+			legend.position="none")# +
+#	ggtitle("Number of reactions per model")
+
+ggsave(g1, filename="Fig1.pdf", path="Data/plot/", width=210/2, height=297/2, units="mm")
+
+
+
+count <- table(met_comp(uni2))
+compCount <- data.frame(id=mod_compart(uni2)[as.integer(names(count))], value=as.integer(count)) %>% 
+			arrange(desc(value)) %>% mutate(id=factor(id, levels=id))
+
+gS2a <- ggplot(compCount, aes(x=id, y=value)) +
+	geom_bar(stat="identity", fill=myBlue) + 
+	labs(x="compartment id", y="metabolite count") + 
+#	scale_y_log10() +
+	ggtitle(bold("Figure S2a.")~"Number of metabolites in compartments") +
+	theme(plot.title=element_text(hjust=0, size=10))
+ggsave(gS2a, filename="FigS2a.pdf", path="Data/plot", width=210, height=297/2, units="mm")
+
+gS2b <- ggplot(modelTable %>% filter(bigg_id %in% modelSelector) %>% group_by(year) %>% summarize(yearCount=n()), aes(x=year, y=yearCount)) +
+	geom_bar(stat="identity", fill=myBlue) +
+	xlab("year") +
+	ylab("count") +
+	ggtitle(bold("Figure S2b.")~"published models per year") +
+	scale_x_continuous(breaks=seq(from=2005, to=2020, by=2), minor_breaks=seq(from=2005+1, to=2020, by=2), labels=seq(from=2005, to=2020, by=2)) +
+	theme(plot.title=element_text(hjust=0, size=15))
+ggsave(gS2b, filename="FigS2d.pdf", path="New analysis bacillus/New/plot", width=210, height=297/2, units="mm")
+
+gS2 <- grid.arrange(gS2a, gS2b, ncol=1, nrow =2)
+ggsave(gS2, filename="FigS2.pdf", path="Data/plot", width=210, height=297, units="mm")
+
+
+
+
+
+
+
+count <- table(unlist(lapply(modelReactMap, function(x) unique(gsub("_((LR)|(RL)|B)", "", x)))))
+reactionCount <- data.frame(id=names(count), value=as.integer(count))
+a <- reactionCount %>% filter(id %in% gsub("_((LR)|(RL)|B)", "", modelReactMap$e_coli_core))
+
+b <- a %>% filter(value < 20) %>% select(id) %>%  .[,1] %>% as.character()
+c <- table(unlist(lapply(b, function(y) names(which(sapply(modelReactMap, function(x) y %in% x))))))
+
+# alternatives for singleton E.coli reactions.
+lapply(b, grep ,react_id(uni2), value=T)
+
+
+
+
+g2 <- ggplot(reactionCount, aes(x=value)) +
+	geom_histogram(binwidth=2, fill=myBlue) + 
+	geom_histogram(data=a, binwidth=2, aes(x=value), fill=myGreen) +
+	scale_y_log10() +
+	labs(y="frequency (log)", x="# occurrences of reaction in all models") +
+	#ggtitle("Reaction frequencies (log scale) over all models") +
+	theme(plot.title=element_text(hjust=0, size=9),
+			plot.margin=unit(c(0.5, 0.5, 0.5, 0.5), "cm"))
+
+ggsave(g2, filename="Fig2.pdf", path="Data/plot", width=297/2, height=210/2, units="mm")
+
+
+
+
+
+a %>% arrange(value) %>% 
+		mutate(count=value >= 60) %>% 
+		group_by(count) %>% 
+		summarize(n=n()/nrow(.)) %>% 
+		print()
+
+
+nbof <- c(
+	normal="model specific biomass reaction",
+	energy="energy production",
+	general="general biomass reaction"
+)
+nmt <- c(
+	minimal="minimal environments",
+	random="random minimal environments",
+	seed="wet lab (the seed) environments"
+)
+
+gS3 <- (ggplot(reshape2::melt(growthCounts %>% select(-GN), id.vars=c("model", "bof", "mediumType", "shortLabel", "labelSO", "labelSOE"), variable.name="type"), aes(x=model, y=value, fill=type))+ 
+		geom_bar(stat="identity", alpha=1) +
+#		coord_flip() +
+		scale_fill_manual(name = "subset growth / full growth:", 
+			labels = c(GG="growth/growth", NG="no growth/growth", NN="no growth/ no growth"),
+			values = c(myBlue, myGreen, myGrey)
+			) +
+		#facet_grid(bof ~ mediumType) +
+		facet_wrap(bof ~ mediumType, nrow=9, ncol=1, labeller = labeller(bof=nbof, mediumType=nmt, .multi_line=FALSE)) +
+		xlab("") +
+		ylab("percent") +
+		ggtitle(bold("Figure S3.")~"Growth for submodels and supermodel") +
+		theme(#text = element_text(size = 20),
+			#legend.position=c(1, 1),
+			#legend.justification = c(1, 1),
+			legend.position="bottom",
+			legend.background = element_rect(fill="#ffffff77"),
+			axis.text.x=element_text(angle=90, hjust=1, size=8),
+			plot.title=element_text(hjust=0, size=10),
+			plot.margin=unit(c(0.5, 0.5, 0.5, 0.5), "cm"))
+)
+
+ggsave(gS3, filename="FigS3.pdf", path="Data/plot", width=210, height=297, units="mm")
+
+
+
+cat("not growing in FULLmedium:\n")
+resultWithModel %>% filter(medium == "FULLmedium", bof == "general", !subG) %>% select(model, organism) %>% print()
+
+
+initialGrowth <- addSummary %>% filter(mediumType=="seed", bof=="general") %>% mutate(initialGrowthRatio = GG/(NN+NG+GG))
+midPoint <- with(initialGrowth, min(initialGrowthRatio) + (max(initialGrowthRatio) - min(initialGrowthRatio)) / 2)
+midPoint=0.75
+specialists <- initialGrowth %>% filter(initialGrowthRatio <= midPoint) %>% select(model) %>% .[[1]] %>% as.character()
+addSummary <- addSummary %>% mutate(sgCat=as.factor(ifelse(model %in% specialists, "specialist", "generalist")))
+resultWithModel <- resultWithModel %>% mutate(sgCat=as.factor(ifelse(model %in% specialists, "specialist", "generalist")))
+innovationIndex <- innovationIndex %>% mutate(sgCat=as.factor(ifelse(model %in% specialists, "specialist", "generalist")))
+modelTable <- modelTable %>% mutate(sgCat=as.factor(ifelse(bigg_id %in% specialists, "specialist", "generalist")))
+
+
+
+
+# order growth percentage
+#order <- growthCounts %>% filter(model %in% modelSelector, bof=="general", mediumType=="seed") %>% arrange(GG, NN) %>% ungroup() %>% select(labelSO) %>% .[[1]] %>% as.character()
+
+order <- modelTable %>% filter(bigg_id %in% modelSelector) %>% arrange(gene_count) %>% select(labelSOE) %>% .[[1]] %>% as.character()
+g3 <- (ggplot(reshape2::melt(
+			growthCounts %>% filter(model %in% modelSelector, bof=="general", mediumType %in% c("random", "seed")) %>% 
+								select(-GN, -NN, -NG) %>% mutate(GG=GG*ifelse(mediumType == "random", -1, 1)),
+								id.vars=c("model", "bof", "mediumType", "shortLabel", "labelSO", "labelSOE"), variable.name="type"),
+ 								aes(x=factor(labelSOE, order), y=value, group=mediumType, fill=paste0(mediumType, model %in% specialists) ))+ 
+		geom_bar(stat="identity", alpha=1) +
+		geom_hline(yintercept=midPoint) +
+		coord_flip() +
+		guides(fill=F) +
+		scale_x_discrete(labels=parse(text=order)) +
+#		scale_fill_manual(name = "submodel/fullmodel", 
+#			labels = c(GG="growth/growth", NG="no growth/growth", NN="no growth/ no growth"),
+#			values = c(myBlue, myGreen, myGrey)
+#			) +
+		scale_fill_manual(name = "medium type", 
+			values = c(randomFALSE=myGreen, seedFALSE=myBlue, seedTRUE=myRed, randomTRUE=myGreen)) +
+		scale_y_continuous(breaks=c(-0.2, 0, midPoint, 0.3), labels=c("0.2", "0", round(midPoint, digits=2), "0.3")) +
+		xlab("") +
+		ylab("fraction") +
+		ggtitle(~"random environments | wet lab environments") +
+		theme(#text = element_text(size = 20),
+			legend.position=c(1, 0),
+			legend.justification = c(1, 0),
+			legend.background = element_rect(fill="#ffffff77"),
+			plot.title=element_text(hjust=0.43, size=10),
+			axis.text.y=element_text(angle=0, hjust=1,lineheight = 2, size=12),
+			plot.margin=unit(c(0.5, 0.5, 0.5, 0.5), "cm"))
+)
+
+ggsave(g3, filename="Fig3.pdf", path="Data/plot", width=220, height=160, units="mm")
+
+
+gS4 <- ggplot(initialGrowth %>% filter(model %in% modelSelector | isEco55), aes(x=initialGrowthRatio, fill=ifelse(initialGrowthRatio <=midPoint, "S", "G"))) +
+	geom_histogram(binwidth=0.02) +
+	geom_vline(xintercept=midPoint) +
+	scale_fill_manual(values=c(G=myBlue, S=myRed)) +
+	guides(fill=FALSE) +
+	xlab("fraction of viable seed environments") +
+	ylab("count") +
+	#coord_flip() +
+	ggtitle(bold("Figure S4.")~"growth in wet lab media") +
+	theme(plot.title=element_text(hjust=0, size=10))
+	
+	
+ggsave(gS4, filename="FigS4.pdf", path="Data/plot", width=210, height=297/2, units="mm")
+
+
+
+#       (b-a)(x - min)
+#f(x) = --------------  + a
+#          max - min
+#adddf <- adddf %>% mutate(x=(7768^2 - (reaction_count^2)    ),
+#				a= min(avg/max(avg)),
+#				b= max(avg/max(avg)),
+#				toolboxScaled= ((b-a) * (x - min(x)) / (max(x) - min(x))) + a
+#) %>% select(-x, -a, -b)
+
+
+
+
+
+# average number of added reactions vs reaction count
+adddf <- addSummary %>% filter(bof=="general", mediumType=="seed")
+
+
+adddf=adddf[complete.cases(adddf$gene_count), ]
+adddf=adddf[complete.cases(adddf$avg), ]
+order <- resultWithModel %>% filter(model %in% modelSelector, bof=="general", mediumType=="seed") %>% arrange(gene_count) %>% select(labelSOE) %>% .[[1]]
+g4a <- ggplot(resultWithModel %>% filter(model %in% modelSelector, bof=="general", mediumType=="seed"), 
+				aes(x=reorder(factor(labelSOE), gene_count), y=add, fill=sgCat)) +
+	geom_violin(colour=NA, scale="width") +
+	stat_summary(fun.y=mean, geom="point", shape="|", size=5, colour="black") +
+	scale_fill_manual(values=c(specialist=myRed, generalist=myBlue)) +
+	scale_x_discrete(labels=parse(text=modelTable %>% filter(bigg_id %in% modelSelector) %>% arrange(gene_count) %>% select(labelSOE) %>% .[[1]])) +
+	guides(colour=guide_legend(title.position = "top", ncol=9), 
+			shape=guide_legend(title.position = "top", nrow=1)) +
+	ylab("# added reactions") +
+	xlab("model - ordered by network size") + 
+	theme(legend.position = "none",
+		axis.text.y = element_text(angle = 0, hjust = 1, size=10)) +
+	#ggtitle(bold(a)~"distribution of # added reactions") +
+  ggtitle(bold(a)~"") +
+	theme(plot.title=element_text(hjust=0, size=10),
+			plot.margin=unit(c(0.5, 0.5, 0.5, 0.5), "cm")) +
+	coord_flip()
+ggsave(g4a, filename="Fig4a.pdf", path="Data/plot", width=210/2, height=297/2, units="mm")
+
+adddf=adddf[complete.cases(adddf$gene_count), ]
+adddf=adddf[complete.cases(adddf$avg), ]
+modelNLS <- nls(adddf, formula="avg ~ 1/(2*c*gene_count)", start=list(c=1), control=nls.control(minFactor=1e-10))
+adddf$fit <- predict(modelNLS)
+
+
+#modelLog <- nls(adddf, formula="log(avg) ~ lc - log(gene_count)", start=list(lc=1), control=nls.control(minFactor=1e-10))
+#adddf$fitLog <- exp(predict(modelLog))
+#modelLm <- lm(adddf, formula="log(avg) ~  offset(log(gene_count))")
+#adddf$fitLm <- exp(predict(modelLm))
+#cat("fittet value c is:\n")
+#print(fitValueC <- exp(-modelLog$m$getPars()["lc"]) / 2)
+
+#adddf <- adddf %>% filter(!auxo, model!="e_coli_core")
+
+
+modelLm <- lm(adddf %>% filter(model !="iAF1260", model %in% modelSelector ), formula="log(avg) ~  offset(-log(gene_count))")
+adddf=adddf[complete.cases(adddf$gene_count), ]
+#modelFit <- adddf %>% filter(model !="iAF1260", model %in% modelSelector ) %>% select(model, avg, gene_count)
+modelFit <- data.frame(gene_count=min(adddf$gene_count):max(adddf$gene_count),rm.na=T)
+#modelLmPred <- as.data.frame(exp(predict(modelLm, interval="confidence")))
+modelLmPred <- data.frame(fit=exp(predict(modelLm, newdata=modelFit)))
+modelFit$fitLm <- modelLmPred$fit
+#modelFit$fitLmLwr <- modelLmPred$lwr
+#modelFit$fitLmUpr <- modelLmPred$upr
+
+
+modelLmF <- lm(adddf %>% filter(model !="iAF1260", model %in% modelSelector ), formula="log(avg) ~  log(gene_count)")
+#modelFit$fitLmF <- exp(predict(modelLmF))
+modelFit$fitLmF <- exp(predict(modelLmF, newdata=modelFit))
+
+
+cat("fittet value c is:\n")
+print(fitValueC <- exp(-modelLm$coefficients["(Intercept)"]) / 2)
+cat("fittet value c for free model is:\n")
+print(fitValueCF <- exp(-modelLmF$coefficients["(Intercept)"]) / 2)
+cat("factor for free model is:\n")
+print(modelLmF$coefficients["log(gene_count)"])
+cat("alpha for free model:\n")
+calcAlpha <- function(m) -(m -1)
+print( calcAlpha(modelLmF$coefficients["log(gene_count)"]))
+cat("confidence interval for slope and alpha:\n")
+print(confint(modelLmF, 'log(gene_count)', level=0.95))
+print(calcAlpha(confint(modelLmF, 'log(gene_count)', level=0.95)))
+cat("MSE for normal fit:\n")
+print(mean(modelLm$residuals^2))
+cat("MSE for free fit:\n")
+print(mean(modelLmF$residuals^2))
+
+addReactSpearman <- with(adddf %>% filter(model !="iAF1260", model %in% modelSelector ), cor(x=gene_count, y=avg, method="spearman"))
+print(with(adddf %>% filter(model !="iAF1260", model %in% modelSelector ), cor.test(x=gene_count, y=avg, method="spearman")))
+
+g4b <- ggplot(adddf %>% filter(model %in% modelSelector), aes(x=gene_count, y=avg, colour=sgCat,
+				shape=I(ifelse(isEco55, 17, 16) - ifelse(auxo, 15, 0)),
+				alpha=I(ifelse(model %in% modelSelector, 1, 0.5)))) +
+	geom_line(data=modelFit, aes(x=gene_count, y=fitLmF, colour=NULL, shape=NULL, alpha=NULL), size=.7, colour=myGreen) +
+	geom_line(data=modelFit, aes(x=gene_count, y=fitLm, colour=NULL, shape=NULL, alpha=NULL), size=.7, colour=myYellow) +
+	geom_point() +
+	scale_colour_manual(name="is E.Coli 62", values=c(myRed,myBlue), breaks=c("specialist", "generalist")) +
+	guides(shape=FALSE, alpha=FALSE,
+		colour=FALSE
+		#colour = guide_legend(order = 1, override.aes = list(linetype = c(1, 1), color = c(myGreen, myYellow))) 
+		) +
+	xlab("gene count") + 
+	ylab("average # added reactions") +
+	scale_y_log10(limits=c(2, 150), breaks=c(2, 5, 10, 100, 150), minor_breaks=c(1:10, seq(from=10, to=150, by=10))) +
+	theme(legend.position = "bottom",
+		axis.text.x = element_text(angle = 0, hjust = 0.5)) +
+	geom_text_repel(aes(label=ifelse(!isEco55 | model %in% c(modelSelector, "e_coli_core"), as.character(model), "")), 
+							segment.color = "#cccccc",
+							colour="black",
+							max.iter=5000,
+							max.overlaps = Inf,
+							segment.curvature = 0,
+							segment.ncp = 3,
+							segment.angle = 20,
+							point.padding= unit(0.5, 'lines'),
+							box.padding = unit(0.5, 'lines')) +
+	#ggtitle(bold(b)~"average # added reactions and model size") +
+  ggtitle(bold(b)~"") +
+	annotate("text", x = 1200, y = 100, vjust=0, label = paste0("spearmans correlation = ", format(addReactSpearman, digits=2)), parse=F, size=4) +
+	#annotate("text", x = 100, y = 3, hjust=0, label = "f(g) == frac(g^{1-alpha}, alpha*c)", parse=T, size=4) +
+	theme(plot.title=element_text(hjust=0, size=10),
+			plot.margin=unit(c(0.5, 0.5, 0.5, 0.5), "cm"))
+g4b
+
+
+#
+
+ggsave(g4b, filename="Fig4b.pdf", path="Data/plot", width=297/2, height=210/2, units="mm")
+
+
+
+
+
+gS5 <- ggplot(adddf %>% filter(isEco55), aes(x=gene_count, y=avg, shape=auxo, colour=sgCat)) +
+	geom_smooth(method="lm", aes(x=gene_count, y=avg, color=NULL, shape=NULL), colour = "#777777", alpha=0.7, method.args = list(), se=F) +
+	geom_point() +
+	guides(colour=FALSE, shape=FALSE, alpha=FALSE) +
+	xlab("gene count") + 
+	ylab("average # added reactions") +
+	scale_shape_manual(name="is auxotrophic", values=c(17, 2), breaks=c(FALSE, TRUE)) +
+	scale_y_continuous(breaks=c(1:10), labels=1:10) +
+	scale_colour_manual(values=c(specialist=myRed, generalist=myBlue)) +
+#	coord_trans(y="log10", limy=c(2, 200)) +
+	theme(legend.position = "bottom",
+		axis.text.x = element_text(angle = 0, hjust = 0.5)) +
+	geom_text_repel(aes(label=ifelse(model %in% c("iML1515", "iEcDH1_1363") | auxo, as.character(model), "")), 
+							segment.color = "#cccccc",
+							max.overlaps = Inf,
+							colour="black",
+							point.padding= unit(0.5, 'lines'),
+							box.padding = unit(0.5, 'lines')) +
+	#ggtitle(bold(a)~"") +
+	annotate("text", x = 1300, y = 5.5, vjust=0, hjust=0, label = paste0("spearman correlation = ", format(with(adddf %>% filter(isEco55,model!="iEcDH1_1363"), cor(x=gene_count, y=avg, method="spearman")), digits=2)), parse=F, size=4) +
+	ggtitle(bold("Figure S5.")~"average # added reactions and model size for 62"~italic(E.coli)~"only") +
+	theme(plot.title=element_text(hjust=0, size=10),
+			plot.margin=unit(c(0.5, 0.5, 0.5, 0.5), "cm"))
+
+ggsave(gS5, filename="FigS5.pdf", path="Data/plot", width=210, height=297/2, units="mm")
+
+###
+###without auxotrophic
+gS51 <- ggplot(adddf %>% filter(isEco55,!(model %in% auxotrophic)), aes(x=gene_count, y=avg, shape=auxo, colour=sgCat)) +
+  geom_smooth(method="lm", aes(x=gene_count, y=avg, color=NULL, shape=NULL), colour = "#777777", alpha=0.7, method.args = list(), se=F) +
+  geom_point() +
+  guides(colour=FALSE, shape=FALSE, alpha=FALSE) +
+  xlab("gene count") + 
+  ylab("average # added reactions") +
+  scale_shape_manual(name="is auxotrophic", values=c(17, 2), breaks=c(FALSE, TRUE)) +
+  scale_y_continuous(limits=c(1,4),breaks=c(1:3), labels=1:3) +
+  scale_colour_manual(values=c(specialist=myRed, generalist=myBlue)) +
+  #	coord_trans(y="log10", limy=c(2, 200)) +
+  theme(legend.position = "bottom",
+        axis.text.x = element_text(angle = 0, hjust = 0.5)) +
+  geom_text_repel(aes(label=ifelse(model %in% c("iML1515", "iEcDH1_1363") | auxo, as.character(model), "")), 
+                  segment.color = "#cccccc",
+                  max.overlaps = Inf,
+                  colour="black",
+                  point.padding= unit(0.5, 'lines'),
+                  box.padding = unit(0.5, 'lines')) +
+  #ggtitle(bold(a)~"") +
+  annotate("text", x = 1450, y = 3.5, vjust=0, hjust=0, label = paste0("Spearman correlation = ", format(with(adddf %>% filter(isEco55,!(model %in% auxotrophic)), cor(x=gene_count, y=avg, method="spearman")), digits=2)), parse=F, size=4) +
+  ggtitle(bold("Figure S5.")~"average # added reactions and model size for 62"~italic(E.coli)~"only") +
+  theme(plot.title=element_text(hjust=0, size=10),
+        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5), "cm"))
+
+ggsave(gS51, filename="FigS51.pdf", path="Data/plot", width=210, height=297/2, units="mm")
+
+####
+
+indexData <- innovationIndex %>% filter(bof=="general", mediumType=="seed") %>% group_by(model, bof, mediumType, isEco55, organism, sgCat, auxo, gene_count) %>% 
+		summarize(innoIndexDistinctRelMean = mean(innoIndexDistinctRel),
+					innoIndexDistinctMean = mean(innoIndexDistinct, na.rm=T),
+					v1=mean(startEnv/distinctEnv), 
+					v50rel=mean(startEnv50/distinctEnv),
+					v50=mean(startEnv50),
+					vMean=mean(startEnvMean),
+					meanAdd=mean(addCor)) %>% ungroup()
+indexData <- indexData %>% ungroup() %>% left_join(modelTable %>% select(bigg_id, twoReactMetRatio) %>% rename(model=bigg_id))
+
+
+
+g4 <- grid.arrange(g4a, g4b, ncol = 1, nrow = 2)
+ggsave(g4, filename="Fig4.pdf", path="Data/plot", width=297/2, height=210, units="mm")
+
+
+
+
+
+
+networkLinaritySpearman <- with(modelTable %>% filter(bigg_id !="iAF1260", bigg_id %in% modelSelector ), cor.test(x=gene_count, y=twoReactMetRatio, method="spearman"))
+print(networkLinaritySpearman)
+
+g5 <- ggplot(modelTable %>% filter(bigg_id %in% c(modelSelector, "e_coli_core")), aes(x=gene_count, y=twoReactMetRatio, colour=sgCat, 
+		shape=I(ifelse(bigg_id == "e_coli_core", 3, ifelse(isEco55, 17, 16) - ifelse(auxo, 15, 0)))
+		#alpha=I(ifelse(bigg_id %in% modelSelector, 1, 0.5))
+		)) +
+	geom_hline(mapping=NULL, yintercept=0.4, colour="#cccccc") +
+	geom_point() +
+	ylab("network linearity") + 
+	xlab("gene count") +
+	xlim(c(100, 1710))+
+#	annotate("text", x = 800, y = 0.6, vjust=0, label = paste0("spearman correlation = ", 
+#						format(cor(x=modelTable$gene_count, y=modelTable$twoReactMetRatio, method="spearman"), digits=2)),#
+#						parse=F, size=4) +
+	guides(colour=FALSE, shape=FALSE) +
+	scale_colour_manual(values=c(specialist=myRed, generalist=myBlue), breaks=c("specialist", "generalist")) +
+	geom_text_repel(aes(label=as.character(bigg_id)), colour="black",max.overlaps = Inf, segment.color = "#cccccc",
+		 point.padding= unit(0.5, 'lines'), box.padding = unit(0.5, 'lines')) +
+  annotate("text", x = 1000, y = 0.6, vjust=0, hjust=0, label = paste0("spearman correlation = ", format(networkLinaritySpearman[["estimate"]], digits=2)), parse=F, size=4)+
+	#ggtitle("network structure and gene count") +
+	theme(plot.title=element_text(hjust=0, size=10))
+ggsave(g5, filename="Fig5.pdf", path="Data/plot", width=297/2, height=210/2, units="mm")
+####
+#.vs. added reaction
+#preparing dataframe
+SJ=adddf
+rownames(SJ)=SJ$model
+SJ2=modelTable
+rownames(SJ2)=modelTable$bigg_id
+SAJ=merge(data.frame(SJ),data.frame(SJ2),by=0,all=T)[-1]
+####
+networkLinaritySpearman <- with(SAJ %>% filter(bigg_id !="iAF1260", bigg_id %in% modelSelector ), cor.test(x=avg, y=twoReactMetRatio, method="spearman"))
+print(networkLinaritySpearman)
+
+g5 <- ggplot(SAJ %>% filter(bigg_id %in% c(modelSelector, "e_coli_core")), aes(x=avg, y=twoReactMetRatio, colour=sgCat.y, 
+                                                                                      shape=I(ifelse(bigg_id == "e_coli_core", 3, ifelse(isEco55.y, 17, 16) - ifelse(auxo.y, 15, 0)))
+                                                                                      #alpha=I(ifelse(bigg_id %in% modelSelector, 1, 0.5))
+)) +
+  geom_hline(mapping=NULL, yintercept=0.4, colour="#cccccc") +
+  geom_point() +
+  ylab("network linearity") + 
+  xlab("added reactions") +
+  xlim(c(0, 100))+
+  #	annotate("text", x = 800, y = 0.6, vjust=0, label = paste0("spearman correlation = ", 
+  #						format(cor(x=modelTable$gene_count, y=modelTable$twoReactMetRatio, method="spearman"), digits=2)),#
+  #						parse=F, size=4) +
+  guides(colour=FALSE, shape=FALSE) +
+  scale_colour_manual(values=c(specialist=myRed, generalist=myBlue), breaks=c("specialist", "generalist")) +
+  geom_text_repel(aes(label=as.character(bigg_id)), colour="black",max.overlaps = Inf, segment.color = "#cccccc",
+                  point.padding= unit(0.5, 'lines'), box.padding = unit(0.5, 'lines')) +
+  annotate("text", x = 60, y = 0.6, vjust=0, hjust=0, label = paste0("spearman correlation = ", format(networkLinaritySpearman[["estimate"]], digits=2)), parse=F, size=4)+
+  #ggtitle("network structure and gene count") +
+  theme(plot.title=element_text(hjust=0, size=10))
+ggsave(g5, filename="Fig51.pdf", path="New analysis bacillus/New/plot", width=297/2, height=210/2, units="mm")
+###
+#reactionwithoutnames
+networkLinaritySpearman <- with(SAJ %>% filter(bigg_id !="iAF1260", bigg_id %in% modelSelector ), cor.test(x=avg, y=twoReactMetRatio, method="spearman"))
+print(networkLinaritySpearman)
+
+g5 <- ggplot(SAJ %>% filter(bigg_id %in% c(modelSelector, "e_coli_core")), aes(x=avg, y=twoReactMetRatio, colour=sgCat.y, 
+                                                                               shape=I(ifelse(bigg_id == "e_coli_core", 3, ifelse(isEco55.y, 17, 16) - ifelse(auxo.y, 15, 0)))
+                                                                               #alpha=I(ifelse(bigg_id %in% modelSelector, 1, 0.5))
+)) +
+  geom_hline(mapping=NULL, yintercept=0.4, colour="#cccccc") +
+  geom_point() +
+  ylab("network linearity") + 
+  xlab("added reactions") +
+  xlim(c(0, 100))+
+  #	annotate("text", x = 800, y = 0.6, vjust=0, label = paste0("spearman correlation = ", 
+  #						format(cor(x=modelTable$gene_count, y=modelTable$twoReactMetRatio, method="spearman"), digits=2)),#
+  #						parse=F, size=4) +
+  guides(colour=FALSE, shape=FALSE) +
+  scale_colour_manual(values=c(specialist=myRed, generalist=myBlue), breaks=c("specialist", "generalist")) +
+  geom_text_repel(aes(label=ifelse(bigg_id %in% c("iML1515", "iEcDH1_1363"),  as.character(bigg_id), "")), colour="black",max.overlaps = Inf, segment.color = "#cccccc",
+                  point.padding= unit(0.5, 'lines'), box.padding = unit(0.5, 'lines')) +
+  annotate("text", x = 60, y = 0.6, vjust=0, hjust=0, label = paste0("spearman correlation = ", format(networkLinaritySpearman[["estimate"]], digits=2)), parse=F, size=4)+
+  #ggtitle("network structure and gene count") +
+  theme(plot.title=element_text(hjust=0, size=10))
+ggsave(g5, filename="Fig51withoutnames.pdf", path="Data/plot", width=297/2, height=210/2, units="mm")
+####
+#without namesgenecount
+g5 <- ggplot(modelTable %>% filter(bigg_id %in% c(modelSelector, "e_coli_core")), aes(x=gene_count, y=twoReactMetRatio, colour=sgCat, 
+                                                                                      shape=I(ifelse(bigg_id == "e_coli_core", 3, ifelse(isEco55, 17, 16) - ifelse(auxo, 15, 0)))
+                                                                                      #alpha=I(ifelse(bigg_id %in% modelSelector, 1, 0.5))
+)) +
+  geom_hline(mapping=NULL, yintercept=0.4, colour="#cccccc") +
+  geom_point() +
+  ylab("network linearity") + 
+  xlab("gene count") +
+  xlim(c(100, 1710))+
+  #	annotate("text", x = 800, y = 0.6, vjust=0, label = paste0("spearman correlation = ", 
+  #						format(cor(x=modelTable$gene_count, y=modelTable$twoReactMetRatio, method="spearman"), digits=2)),#
+  #						parse=F, size=4) +
+  guides(colour=FALSE, shape=FALSE) +
+  scale_colour_manual(values=c(specialist=myRed, generalist=myBlue), breaks=c("specialist", "generalist")) +
+  geom_text_repel(aes(label=ifelse(bigg_id %in% c("iML1515", "iEcDH1_1363"),  as.character(bigg_id), "")), colour="black",max.overlaps = Inf, segment.color = "#cccccc",
+                  point.padding= unit(0.5, 'lines'), box.padding = unit(0.5, 'lines')) +
+  annotate("text", x = 1000, y = 0.6, vjust=0, hjust=0, label = paste0("spearman correlation = ", format(networkLinaritySpearman[["estimate"]], digits=2)), parse=F, size=4)+
+  
+  #ggtitle("network structure and gene count") +
+  theme(plot.title=element_text(hjust=0, size=10))
+ggsave(g5, filename="Fig5.pdf", path="Data/plot", width=297/2, height=210/2, units="mm")
+####
+
+gS6 <- ggplot(resultWithModel %>% filter(bof=="general", mediumType=="random"), aes(x=reorder(factor(labelSOE), gene_count), y=add, fill=sgCat)) +
+	geom_violin(colour=NA, scale="width") +
+	stat_summary(fun.y=mean, geom="point", shape="|", size=4, colour="black") +
+	guides(colour=guide_legend(title.position = "top", ncol=9), 
+			shape=guide_legend(title.position = "top", nrow=1)) +
+	#scale_fill_manual(name="is E.Coli 55", values=c(myBlue, myGreen), breaks=c(FALSE, TRUE)) +
+	scale_fill_manual(values=c(specialist=myRed, generalist=myBlue), breaks=c("specialist", "generalist")) +
+	scale_y_continuous(name="# added reactions", breaks=c(seq(from=0, to=150, by=10))) + #, labels=c(1:5, rep("", 4), 10, rep("", 8), 100) ) +
+	scale_x_discrete(label=myParse) +
+	xlab("model - ordered by network size") + 
+	facet_grid(ecoLabel ~ ., scales="free_y", space="free_y", labeller=label_parsed) +
+	ggtitle(bold("Figure S6.")~"Distributions of added reactions per submodel") +
+	theme(legend.position = "none",
+		axis.text.y = element_text(angle = 0, hjust = 1, size=8)) +
+	theme(plot.title=element_text(hjust=0, size=10)) +
+	coord_flip()
+	
+ggsave(gS6, filename="FigS6.pdf", path="Data/plot", width=210, height=297, units="mm")
+
+
+
+gS7 <- ggplot(innovationIndex %>% filter(bof=="general", mediumType=="random"), aes(x=reorder(factor(labelSOE), gene_count), y=innoIndexRel, fill=sgCat)) +
+	geom_violin(colour=NA, scale="width") +
+	stat_summary(fun=mean, geom="point", shape="|", size=4, colour="black") +
+	guides(colour=guide_legend(title.position = "top", ncol=9), 
+			shape=guide_legend(title.position = "top", nrow=1)) +
+	scale_fill_manual(values=c(specialist=myRed, generalist=myBlue), breaks=c("specialist", "generalist")) +
+	ylab("collateral adaptation index") +
+	xlab("model - ordered by network size") + 
+	scale_x_discrete(label=myParse) +
+	ggtitle(bold("Figure S7.")~"Distributions of the collateral adaptation index per submodel") +
+	facet_grid(ecoLabel ~ ., scales="free_y", space="free_y", labeller=label_parsed) +
+	theme(legend.position = "none",
+		axis.text.y = element_text(angle = 0, hjust = 1, size=8)) +
+	theme(plot.title=element_text(hjust=0, size=10)) +
+	coord_flip()
+ggsave(gS7, filename="FigS7.pdf", path="Data/plot", width=210, height=297, units="mm")
+
+
+
+
+gS8 <- ggplot(innovationIndex %>% filter(bof=="general", mediumType=="random"), aes(x=reorder(factor(labelSOE), gene_count), y=startEnvMean, fill=sgCat)) +
+	geom_violin(colour=NA, scale="width") +
+	stat_summary(fun.y=mean, geom="point", shape="|", size=4, colour="black") +
+	guides(colour=guide_legend(title.position = "top", ncol=9), 
+			shape=guide_legend(title.position = "top", nrow=1)) +
+	scale_fill_manual(values=c(specialist=myRed, generalist=myBlue), breaks=c("specialist", "generalist")) +
+	ylab("exaptation index") +
+	xlab("model - ordered by network size") + 
+	scale_x_discrete(label=myParse) +
+	ggtitle(bold("Figure S8.")~"Distributions of the exaptation index per submodel") +
+	facet_grid(ecoLabel ~ ., scales="free_y", space="free_y", labeller=label_parsed) +
+	theme(legend.position = "none",
+		axis.text.y = element_text(angle = 0, hjust = 1, size=8)) +
+	theme(plot.title=element_text(hjust=0, size=10)) +
+	coord_flip()
+ggsave(gS8, filename="FigS8.pdf", path="Data/plot", width=210, height=297, units="mm")
+
+
+
+
+system("pdfunite plots/Fig{,S}?.pdf plots.pdf")
+cat("converting to jpg\n")
+file.remove(list.files("jpg/", full.names=T))
+system("./makeJpg.sh")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-- 
GitLab