diff --git a/03-analyse_outputs/03-01-compile_results.ipynb b/03-analyse_outputs/03-01-compile_results.ipynb index f788182206ef9b06b8c4b207c9333e2230bf8435..0aad2d0c89a104a8438b30e67cb8756dafafb897 100644 --- a/03-analyse_outputs/03-01-compile_results.ipynb +++ b/03-analyse_outputs/03-01-compile_results.ipynb @@ -62,6 +62,16 @@ " data_dir}/1236_wol_tree_pruned_with_internal_labels.nwk\"" ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash -s \"$res_dir\" \n", + "mkdir -p $1/angst $1/ale $1/ranger $1/gloome_ml $1/gloome_mp $1/count_ml $1/count_mp $1/wn" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -71,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -145,38 +155,38 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>47440</th>\n", + " <th>47479</th>\n", " <td>ERT8F</td>\n", " <td>207954-619304</td>\n", " <td>1897630</td>\n", " </tr>\n", " <tr>\n", - " <th>47441</th>\n", + " <th>47480</th>\n", " <td>ERT8F</td>\n", " <td>207954-619304</td>\n", " <td>1543721</td>\n", " </tr>\n", " <tr>\n", - " <th>47442</th>\n", + " <th>47481</th>\n", " <td>ERT8F</td>\n", " <td>207954-619304</td>\n", " <td>62101</td>\n", " </tr>\n", " <tr>\n", - " <th>47443</th>\n", + " <th>47482</th>\n", " <td>ERT8F</td>\n", " <td>1897630</td>\n", " <td>1859457</td>\n", " </tr>\n", " <tr>\n", - " <th>47444</th>\n", + " <th>47483</th>\n", " <td>ERT8F</td>\n", " <td>1859457</td>\n", " <td>167879</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>47445 rows × 3 columns</p>\n", + "<p>47484 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ @@ -187,11 +197,11 @@ "3 ER9VY 1548547 \n", "4 ER9VY 1548547 \n", "... ... ... \n", - "47440 ERT8F 207954-619304 \n", - "47441 ERT8F 207954-619304 \n", - "47442 ERT8F 207954-619304 \n", - "47443 ERT8F 1897630 \n", - "47444 ERT8F 1859457 \n", + "47479 ERT8F 207954-619304 \n", + "47480 ERT8F 207954-619304 \n", + "47481 ERT8F 207954-619304 \n", + "47482 ERT8F 1897630 \n", + "47483 ERT8F 1859457 \n", "\n", " recipient_branch \n", "0 216142-243924-1028989-223283-205918-157783-384... \n", @@ -200,13 +210,13 @@ "3 697282-1116472-1091494-857087-1538553 \n", "4 167879-58049 \n", "... ... \n", - "47440 1897630 \n", - "47441 1543721 \n", - "47442 62101 \n", - "47443 1859457 \n", - "47444 167879 \n", + "47479 1897630 \n", + "47480 1543721 \n", + "47481 62101 \n", + "47482 1859457 \n", + "47483 167879 \n", "\n", - "[47445 rows x 3 columns]" + "[47484 rows x 3 columns]" ] }, "metadata": {}, @@ -215,7 +225,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d2c91bf8cca64392af403c2215f315db", + "model_id": "62c4daa9e4e946049144138c0113b307", "version_major": 2, "version_minor": 0 }, @@ -269,26 +279,26 @@ " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>46</td>\n", + " <td>EQRDS</td>\n", + " <td>25</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>28</td>\n", + " <td>EQRFZ</td>\n", + " <td>47</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>39</td>\n", + " <td>EQRG2</td>\n", + " <td>28</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>42</td>\n", + " <td>EQRGC</td>\n", + " <td>39</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", @@ -299,14 +309,14 @@ " </tr>\n", " <tr>\n", " <th>1295</th>\n", - " <td>ETCI9</td>\n", - " <td>30</td>\n", + " <td>ETCI3</td>\n", + " <td>53</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1296</th>\n", - " <td>ETCIB</td>\n", - " <td>24</td>\n", + " <td>ETCI9</td>\n", + " <td>30</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", @@ -335,13 +345,13 @@ "text/plain": [ " nog_id transfers transfer_threshold\n", "0 EQRBG 33 1\n", - "1 EQRFZ 46 1\n", - "2 EQRG2 28 1\n", - "3 EQRGC 39 1\n", - "4 EQRGG 42 1\n", + "1 EQRDS 25 1\n", + "2 EQRFZ 47 1\n", + "3 EQRG2 28 1\n", + "4 EQRGC 39 1\n", "... ... ... ...\n", - "1295 ETCI9 30 1\n", - "1296 ETCIB 24 1\n", + "1295 ETCI3 53 1\n", + "1296 ETCI9 30 1\n", "1297 ETCIZ 46 1\n", "1298 ETCJF 50 1\n", "1299 ETCUH 32 1\n", @@ -430,35 +440,35 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>47440</th>\n", + " <th>47479</th>\n", " <td>ERT8F</td>\n", " <td>N203</td>\n", " <td>1897630</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", - " <th>47441</th>\n", + " <th>47480</th>\n", " <td>ERT8F</td>\n", " <td>N203</td>\n", " <td>1543721</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", - " <th>47442</th>\n", + " <th>47481</th>\n", " <td>ERT8F</td>\n", " <td>N203</td>\n", " <td>62101</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", - " <th>47443</th>\n", + " <th>47482</th>\n", " <td>ERT8F</td>\n", " <td>1897630</td>\n", " <td>1859457</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", - " <th>47444</th>\n", + " <th>47483</th>\n", " <td>ERT8F</td>\n", " <td>1859457</td>\n", " <td>167879</td>\n", @@ -466,7 +476,7 @@ " </tr>\n", " </tbody>\n", "</table>\n", - "<p>47445 rows × 4 columns</p>\n", + "<p>47484 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ @@ -477,13 +487,13 @@ "3 ER9VY 1548547 N72 1\n", "4 ER9VY 1548547 N229 1\n", "... ... ... ... ...\n", - "47440 ERT8F N203 1897630 1\n", - "47441 ERT8F N203 1543721 1\n", - "47442 ERT8F N203 62101 1\n", - "47443 ERT8F 1897630 1859457 1\n", - "47444 ERT8F 1859457 167879 1\n", + "47479 ERT8F N203 1897630 1\n", + "47480 ERT8F N203 1543721 1\n", + "47481 ERT8F N203 62101 1\n", + "47482 ERT8F 1897630 1859457 1\n", + "47483 ERT8F 1859457 167879 1\n", "\n", - "[47445 rows x 4 columns]" + "[47484 rows x 4 columns]" ] }, "metadata": {}, @@ -497,8 +507,8 @@ "display(nogwise_angst_df)\n", "print(\"NOGwise branchwise DF:\")\n", "display(nogwise_branchwise_angst_df)\n", - "nogwise_angst_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.angst.tsv\", index=False, header=True, sep='\\t')\n", - "nogwise_branchwise_angst_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.branchwise.angst.tsv\", index=False, header=True, sep='\\t')" + "nogwise_angst_df.to_csv(f\"{res_dir}/angst/compiled_transfers.nogwise.angst.tsv\", index=False, header=True, sep='\\t')\n", + "nogwise_branchwise_angst_df.to_csv(f\"{res_dir}/angst/compiled_transfers.nogwise.branchwise.angst.tsv\", index=False, header=True, sep='\\t')" ] }, { @@ -510,7 +520,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -525,7 +535,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5dbc4543cac24aac91257dd2e23e292c", + "model_id": "c9b19ec8c2694333bba472f99f3ab157", "version_major": 2, "version_minor": 0 }, @@ -573,37 +583,37 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>ERA5M</td>\n", - " <td>1006000</td>\n", - " <td>1177154</td>\n", + " <td>ERC6U</td>\n", + " <td>1005057</td>\n", + " <td>N52</td>\n", " <td>0.01</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>ERA5M</td>\n", + " <td>ERC6U</td>\n", " <td>1006000</td>\n", - " <td>N119</td>\n", - " <td>0.01</td>\n", + " <td>349521</td>\n", + " <td>0.02</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>ERA5M</td>\n", - " <td>1006000</td>\n", - " <td>N211</td>\n", + " <td>ERC6U</td>\n", + " <td>1009858</td>\n", + " <td>247634</td>\n", " <td>0.01</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>ERA5M</td>\n", + " <td>ERC6U</td>\n", " <td>1009858</td>\n", - " <td>1513271</td>\n", + " <td>743720</td>\n", " <td>0.01</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>ERA5M</td>\n", + " <td>ERC6U</td>\n", " <td>1009858</td>\n", - " <td>N125</td>\n", + " <td>N168</td>\n", " <td>0.01</td>\n", " </tr>\n", " <tr>\n", @@ -614,60 +624,60 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>2357745</th>\n", + " <th>2351035</th>\n", " <td>ERZXU</td>\n", - " <td>N349</td>\n", - " <td>N228</td>\n", + " <td>N348</td>\n", + " <td>N207</td>\n", " <td>0.01</td>\n", " </tr>\n", " <tr>\n", - " <th>2357746</th>\n", + " <th>2351036</th>\n", " <td>ERZXU</td>\n", - " <td>N349</td>\n", - " <td>N207</td>\n", - " <td>0.02</td>\n", + " <td>N348</td>\n", + " <td>N332</td>\n", + " <td>0.05</td>\n", " </tr>\n", " <tr>\n", - " <th>2357747</th>\n", + " <th>2351037</th>\n", " <td>ERZXU</td>\n", - " <td>N349</td>\n", - " <td>N209</td>\n", - " <td>0.01</td>\n", + " <td>N348</td>\n", + " <td>N344</td>\n", + " <td>0.02</td>\n", " </tr>\n", " <tr>\n", - " <th>2357748</th>\n", + " <th>2351038</th>\n", " <td>ERZXU</td>\n", - " <td>N350</td>\n", - " <td>N212</td>\n", + " <td>N349</td>\n", + " <td>N59</td>\n", " <td>0.01</td>\n", " </tr>\n", " <tr>\n", - " <th>2357749</th>\n", + " <th>2351039</th>\n", " <td>ERZXU</td>\n", - " <td>N350</td>\n", - " <td>N346</td>\n", + " <td>N349</td>\n", + " <td>N207</td>\n", " <td>0.01</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>2357750 rows × 4 columns</p>\n", + "<p>2351040 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " nog_id source_branch recipient_branch transfers\n", - "0 ERA5M 1006000 1177154 0.01\n", - "1 ERA5M 1006000 N119 0.01\n", - "2 ERA5M 1006000 N211 0.01\n", - "3 ERA5M 1009858 1513271 0.01\n", - "4 ERA5M 1009858 N125 0.01\n", + "0 ERC6U 1005057 N52 0.01\n", + "1 ERC6U 1006000 349521 0.02\n", + "2 ERC6U 1009858 247634 0.01\n", + "3 ERC6U 1009858 743720 0.01\n", + "4 ERC6U 1009858 N168 0.01\n", "... ... ... ... ...\n", - "2357745 ERZXU N349 N228 0.01\n", - "2357746 ERZXU N349 N207 0.02\n", - "2357747 ERZXU N349 N209 0.01\n", - "2357748 ERZXU N350 N212 0.01\n", - "2357749 ERZXU N350 N346 0.01\n", + "2351035 ERZXU N348 N207 0.01\n", + "2351036 ERZXU N348 N332 0.05\n", + "2351037 ERZXU N348 N344 0.02\n", + "2351038 ERZXU N349 N59 0.01\n", + "2351039 ERZXU N349 N207 0.01\n", "\n", - "[2357750 rows x 4 columns]" + "[2351040 rows x 4 columns]" ] }, "metadata": {}, @@ -710,31 +720,31 @@ " <tr>\n", " <th>0</th>\n", " <td>EQRBG</td>\n", - " <td>33.56</td>\n", + " <td>33.18</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>49.61</td>\n", + " <td>EQRDS</td>\n", + " <td>28.04</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>30.19</td>\n", + " <td>EQRFZ</td>\n", + " <td>50.38</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>43.15</td>\n", + " <td>EQRG2</td>\n", + " <td>29.78</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>43.22</td>\n", + " <td>EQRGC</td>\n", + " <td>43.20</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", @@ -746,53 +756,53 @@ " <tr>\n", " <th>0</th>\n", " <td>EREPP</td>\n", - " <td>2.23</td>\n", - " <td>2.140303</td>\n", + " <td>2.27</td>\n", + " <td>2.178687</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>EREPP</td>\n", - " <td>2.23</td>\n", - " <td>2.162727</td>\n", + " <td>2.27</td>\n", + " <td>2.201515</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>EREPP</td>\n", - " <td>2.23</td>\n", - " <td>2.185152</td>\n", + " <td>2.27</td>\n", + " <td>2.224343</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>EREPP</td>\n", - " <td>2.23</td>\n", - " <td>2.207576</td>\n", + " <td>2.27</td>\n", + " <td>2.247172</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>EREPP</td>\n", - " <td>2.23</td>\n", - " <td>2.230000</td>\n", + " <td>2.27</td>\n", + " <td>2.270000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>46722 rows × 3 columns</p>\n", + "<p>45616 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " nog_id transfers transfer_threshold\n", - "0 EQRBG 33.56 0.010000\n", - "1 EQRFZ 49.61 0.010000\n", - "2 EQRG2 30.19 0.010000\n", - "3 EQRGC 43.15 0.010000\n", - "4 EQRGG 43.22 0.010000\n", + "0 EQRBG 33.18 0.010000\n", + "1 EQRDS 28.04 0.010000\n", + "2 EQRFZ 50.38 0.010000\n", + "3 EQRG2 29.78 0.010000\n", + "4 EQRGC 43.20 0.010000\n", ".. ... ... ...\n", - "0 EREPP 2.23 2.140303\n", - "0 EREPP 2.23 2.162727\n", - "0 EREPP 2.23 2.185152\n", - "0 EREPP 2.23 2.207576\n", - "0 EREPP 2.23 2.230000\n", + "0 EREPP 2.27 2.178687\n", + "0 EREPP 2.27 2.201515\n", + "0 EREPP 2.27 2.224343\n", + "0 EREPP 2.27 2.247172\n", + "0 EREPP 2.27 2.270000\n", "\n", - "[46722 rows x 3 columns]" + "[45616 rows x 3 columns]" ] }, "metadata": {}, @@ -824,13 +834,13 @@ "\n", " # write it out\n", " nogwise_branchwise_ale_df.to_csv(\n", - " f\"{res_dir}/compiled_transfers.nogwise.branchwise.{ale_dir.lower()}.tsv\",\n", + " f\"{res_dir}/ale/compiled_transfers.nogwise.branchwise.{ale_dir.lower()}.tsv\",\n", " index=False,\n", " header=True,\n", " sep=\"\\t\",\n", " )\n", " nogwise_ale_df.to_csv(\n", - " f\"{res_dir}/compiled_transfers.nogwise.{ale_dir.lower()}.tsv\",\n", + " f\"{res_dir}/ale/compiled_transfers.nogwise.{ale_dir.lower()}.tsv\",\n", " index=False,\n", " header=True,\n", " sep=\"\\t\",\n", @@ -852,7 +862,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -866,7 +876,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2a59330cc744406a9f0a2c1bbabb6899", + "model_id": "a4f1fbe62dd4441894142ebc6798d871", "version_major": 2, "version_minor": 0 }, @@ -880,7 +890,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e0c73fc010f94a8ab3b2e69b3b728b40", + "model_id": "580afe8a685241558c734fedaa88fe57", "version_major": 2, "version_minor": 0 }, @@ -933,26 +943,26 @@ " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>49.00</td>\n", + " <td>EQRDS</td>\n", + " <td>26.65</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>29.02</td>\n", + " <td>EQRFZ</td>\n", + " <td>50.00</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>41.48</td>\n", + " <td>EQRG2</td>\n", + " <td>28.96</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>43.42</td>\n", + " <td>EQRGC</td>\n", + " <td>41.41</td>\n", " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", @@ -964,53 +974,53 @@ " <tr>\n", " <th>0</th>\n", " <td>ET4RU</td>\n", - " <td>3.68</td>\n", - " <td>3.531717</td>\n", + " <td>3.71</td>\n", + " <td>3.560505</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>ET4RU</td>\n", - " <td>3.68</td>\n", - " <td>3.568788</td>\n", + " <td>3.71</td>\n", + " <td>3.597879</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>ET4RU</td>\n", - " <td>3.68</td>\n", - " <td>3.605859</td>\n", + " <td>3.71</td>\n", + " <td>3.635253</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>ET4RU</td>\n", - " <td>3.68</td>\n", - " <td>3.642929</td>\n", + " <td>3.71</td>\n", + " <td>3.672626</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>ET4RU</td>\n", - " <td>3.68</td>\n", - " <td>3.680000</td>\n", + " <td>3.71</td>\n", + " <td>3.710000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>37893 rows × 3 columns</p>\n", + "<p>37850 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " nog_id transfers transfer_threshold\n", "0 EQRBG 33.21 0.010000\n", - "1 EQRFZ 49.00 0.010000\n", - "2 EQRG2 29.02 0.010000\n", - "3 EQRGC 41.48 0.010000\n", - "4 EQRGG 43.42 0.010000\n", + "1 EQRDS 26.65 0.010000\n", + "2 EQRFZ 50.00 0.010000\n", + "3 EQRG2 28.96 0.010000\n", + "4 EQRGC 41.41 0.010000\n", ".. ... ... ...\n", - "0 ET4RU 3.68 3.531717\n", - "0 ET4RU 3.68 3.568788\n", - "0 ET4RU 3.68 3.605859\n", - "0 ET4RU 3.68 3.642929\n", - "0 ET4RU 3.68 3.680000\n", + "0 ET4RU 3.71 3.560505\n", + "0 ET4RU 3.71 3.597879\n", + "0 ET4RU 3.71 3.635253\n", + "0 ET4RU 3.71 3.672626\n", + "0 ET4RU 3.71 3.710000\n", "\n", - "[37893 rows x 3 columns]" + "[37850 rows x 3 columns]" ] }, "metadata": {}, @@ -1054,37 +1064,37 @@ " <tr>\n", " <th>0</th>\n", " <td>ERCE7</td>\n", - " <td>380703</td>\n", - " <td>N297</td>\n", - " <td>0.88</td>\n", + " <td>1187848</td>\n", + " <td>N334</td>\n", + " <td>0.32</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>ERCE7</td>\n", - " <td>N254</td>\n", - " <td>1799789</td>\n", - " <td>0.01</td>\n", + " <td>N322</td>\n", + " <td>N309</td>\n", + " <td>0.51</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>ERCE7</td>\n", - " <td>1879031</td>\n", - " <td>247633</td>\n", - " <td>0.10</td>\n", + " <td>326297</td>\n", + " <td>211586</td>\n", + " <td>0.43</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>ERCE7</td>\n", + " <td>698738</td>\n", " <td>N231</td>\n", - " <td>N54</td>\n", - " <td>0.03</td>\n", + " <td>0.05</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>ERCE7</td>\n", - " <td>N296</td>\n", - " <td>910964</td>\n", - " <td>0.48</td>\n", + " <td>N109</td>\n", + " <td>247634</td>\n", + " <td>0.03</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -1094,60 +1104,60 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>185391</th>\n", + " <th>184761</th>\n", " <td>ERVXT</td>\n", - " <td>N13</td>\n", - " <td>1797696</td>\n", - " <td>0.02</td>\n", + " <td>768671</td>\n", + " <td>1300345</td>\n", + " <td>0.04</td>\n", " </tr>\n", " <tr>\n", - " <th>185392</th>\n", + " <th>184762</th>\n", " <td>ERVXT</td>\n", - " <td>N114</td>\n", - " <td>448</td>\n", - " <td>0.24</td>\n", + " <td>743721</td>\n", + " <td>N13</td>\n", + " <td>0.10</td>\n", " </tr>\n", " <tr>\n", - " <th>185393</th>\n", + " <th>184763</th>\n", " <td>ERVXT</td>\n", - " <td>1895767</td>\n", - " <td>N155</td>\n", - " <td>0.09</td>\n", + " <td>N63</td>\n", + " <td>1300345</td>\n", + " <td>0.16</td>\n", " </tr>\n", " <tr>\n", - " <th>185394</th>\n", + " <th>184764</th>\n", " <td>ERVXT</td>\n", - " <td>N341</td>\n", - " <td>550540</td>\n", - " <td>1.00</td>\n", + " <td>N14</td>\n", + " <td>765910</td>\n", + " <td>0.13</td>\n", " </tr>\n", " <tr>\n", - " <th>185395</th>\n", + " <th>184765</th>\n", " <td>ERVXT</td>\n", " <td>N13</td>\n", - " <td>N107</td>\n", - " <td>0.03</td>\n", + " <td>631362</td>\n", + " <td>0.25</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>185396 rows × 4 columns</p>\n", + "<p>184766 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " nog_id source_branch recipient_branch transfers\n", - "0 ERCE7 380703 N297 0.88\n", - "1 ERCE7 N254 1799789 0.01\n", - "2 ERCE7 1879031 247633 0.10\n", - "3 ERCE7 N231 N54 0.03\n", - "4 ERCE7 N296 910964 0.48\n", + "0 ERCE7 1187848 N334 0.32\n", + "1 ERCE7 N322 N309 0.51\n", + "2 ERCE7 326297 211586 0.43\n", + "3 ERCE7 698738 N231 0.05\n", + "4 ERCE7 N109 247634 0.03\n", "... ... ... ... ...\n", - "185391 ERVXT N13 1797696 0.02\n", - "185392 ERVXT N114 448 0.24\n", - "185393 ERVXT 1895767 N155 0.09\n", - "185394 ERVXT N341 550540 1.00\n", - "185395 ERVXT N13 N107 0.03\n", + "184761 ERVXT 768671 1300345 0.04\n", + "184762 ERVXT 743721 N13 0.10\n", + "184763 ERVXT N63 1300345 0.16\n", + "184764 ERVXT N14 765910 0.13\n", + "184765 ERVXT N13 631362 0.25\n", "\n", - "[185396 rows x 4 columns]" + "[184766 rows x 4 columns]" ] }, "metadata": {}, @@ -1164,7 +1174,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1882476c4e8b41cdaba3c3dc88884239", + "model_id": "a18da30df4d4415793b8113093437370", "version_major": 2, "version_minor": 0 }, @@ -1178,7 +1188,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "dec6427a9ecc44d3ac96df95f0fc7b8b", + "model_id": "a80a7a05c5fe4d05b03a97c21a8fe0c0", "version_major": 2, "version_minor": 0 }, @@ -1227,31 +1237,31 @@ " <th>0</th>\n", " <td>EQRBG</td>\n", " <td>32.0</td>\n", - " <td>0.030000</td>\n", + " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>49.0</td>\n", - " <td>0.030000</td>\n", + " <td>EQRDS</td>\n", + " <td>26.0</td>\n", + " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>27.0</td>\n", - " <td>0.030000</td>\n", + " <td>EQRFZ</td>\n", + " <td>50.0</td>\n", + " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>41.0</td>\n", - " <td>0.030000</td>\n", + " <td>EQRG2</td>\n", + " <td>27.0</td>\n", + " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>42.0</td>\n", - " <td>0.030000</td>\n", + " <td>EQRGC</td>\n", + " <td>41.0</td>\n", + " <td>0.010000</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -1263,25 +1273,25 @@ " <th>0</th>\n", " <td>ET4FF</td>\n", " <td>4.0</td>\n", - " <td>3.839596</td>\n", + " <td>3.838788</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>ET4FF</td>\n", " <td>4.0</td>\n", - " <td>3.879697</td>\n", + " <td>3.879091</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>ET4FF</td>\n", " <td>4.0</td>\n", - " <td>3.919798</td>\n", + " <td>3.919394</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", " <td>ET4FF</td>\n", " <td>4.0</td>\n", - " <td>3.959899</td>\n", + " <td>3.959697</td>\n", " </tr>\n", " <tr>\n", " <th>0</th>\n", @@ -1291,24 +1301,24 @@ " </tr>\n", " </tbody>\n", "</table>\n", - "<p>36547 rows × 3 columns</p>\n", + "<p>36542 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " nog_id transfers transfer_threshold\n", - "0 EQRBG 32.0 0.030000\n", - "1 EQRFZ 49.0 0.030000\n", - "2 EQRG2 27.0 0.030000\n", - "3 EQRGC 41.0 0.030000\n", - "4 EQRGG 42.0 0.030000\n", + "0 EQRBG 32.0 0.010000\n", + "1 EQRDS 26.0 0.010000\n", + "2 EQRFZ 50.0 0.010000\n", + "3 EQRG2 27.0 0.010000\n", + "4 EQRGC 41.0 0.010000\n", ".. ... ... ...\n", - "0 ET4FF 4.0 3.839596\n", - "0 ET4FF 4.0 3.879697\n", - "0 ET4FF 4.0 3.919798\n", - "0 ET4FF 4.0 3.959899\n", + "0 ET4FF 4.0 3.838788\n", + "0 ET4FF 4.0 3.879091\n", + "0 ET4FF 4.0 3.919394\n", + "0 ET4FF 4.0 3.959697\n", "0 ET4FF 4.0 4.000000\n", "\n", - "[36547 rows x 3 columns]" + "[36542 rows x 3 columns]" ] }, "metadata": {}, @@ -1351,38 +1361,38 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>ERCE7</td>\n", - " <td>1736225</td>\n", - " <td>1484157</td>\n", - " <td>1.0</td>\n", + " <td>ERDFX</td>\n", + " <td>N278</td>\n", + " <td>667129</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>ERCE7</td>\n", - " <td>N109</td>\n", - " <td>247634</td>\n", - " <td>1.0</td>\n", + " <td>ERDFX</td>\n", + " <td>N94</td>\n", + " <td>1797696</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>ERCE7</td>\n", - " <td>523791</td>\n", - " <td>1897630</td>\n", - " <td>1.0</td>\n", + " <td>ERDFX</td>\n", + " <td>N293</td>\n", + " <td>N326</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>ERCE7</td>\n", - " <td>1331007</td>\n", - " <td>N343</td>\n", - " <td>1.0</td>\n", + " <td>ERDFX</td>\n", + " <td>406818</td>\n", + " <td>406817</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>ERCE7</td>\n", - " <td>N296</td>\n", - " <td>910964</td>\n", - " <td>1.0</td>\n", + " <td>ERDFX</td>\n", + " <td>1681196</td>\n", + " <td>N317</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -1392,60 +1402,60 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>49241</th>\n", + " <th>49266</th>\n", " <td>ERVXT</td>\n", - " <td>N230</td>\n", - " <td>N338</td>\n", - " <td>1.0</td>\n", + " <td>N13</td>\n", + " <td>216778</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", - " <th>49242</th>\n", + " <th>49267</th>\n", " <td>ERVXT</td>\n", - " <td>N13</td>\n", - " <td>1301098</td>\n", - " <td>1.0</td>\n", + " <td>1392540</td>\n", + " <td>N128</td>\n", + " <td>0.54</td>\n", " </tr>\n", " <tr>\n", - " <th>49243</th>\n", + " <th>49268</th>\n", " <td>ERVXT</td>\n", - " <td>N114</td>\n", - " <td>448</td>\n", - " <td>1.0</td>\n", + " <td>225848</td>\n", + " <td>314608</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", - " <th>49244</th>\n", + " <th>49269</th>\n", " <td>ERVXT</td>\n", - " <td>N12</td>\n", - " <td>1300345</td>\n", - " <td>1.0</td>\n", + " <td>N14</td>\n", + " <td>N230</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", - " <th>49245</th>\n", + " <th>49270</th>\n", " <td>ERVXT</td>\n", - " <td>N341</td>\n", - " <td>550540</td>\n", - " <td>1.0</td>\n", + " <td>N230</td>\n", + " <td>765910</td>\n", + " <td>1.00</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>49246 rows × 4 columns</p>\n", + "<p>49271 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " nog_id source_branch recipient_branch transfers\n", - "0 ERCE7 1736225 1484157 1.0\n", - "1 ERCE7 N109 247634 1.0\n", - "2 ERCE7 523791 1897630 1.0\n", - "3 ERCE7 1331007 N343 1.0\n", - "4 ERCE7 N296 910964 1.0\n", + "0 ERDFX N278 667129 1.00\n", + "1 ERDFX N94 1797696 1.00\n", + "2 ERDFX N293 N326 1.00\n", + "3 ERDFX 406818 406817 1.00\n", + "4 ERDFX 1681196 N317 1.00\n", "... ... ... ... ...\n", - "49241 ERVXT N230 N338 1.0\n", - "49242 ERVXT N13 1301098 1.0\n", - "49243 ERVXT N114 448 1.0\n", - "49244 ERVXT N12 1300345 1.0\n", - "49245 ERVXT N341 550540 1.0\n", + "49266 ERVXT N13 216778 1.00\n", + "49267 ERVXT 1392540 N128 0.54\n", + "49268 ERVXT 225848 314608 1.00\n", + "49269 ERVXT N14 N230 1.00\n", + "49270 ERVXT N230 765910 1.00\n", "\n", - "[49246 rows x 4 columns]" + "[49271 rows x 4 columns]" ] }, "metadata": {}, @@ -1467,8 +1477,8 @@ " display(nogwise_ranger_df)\n", " print(\"NOGwise branchwise DF:\")\n", " display(nogwise_branchwise_ranger_df)\n", - " nogwise_ranger_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.{os.path.basename(ranger_dir).lower()}.tsv\", index=False, header=True, sep='\\t')\n", - " nogwise_branchwise_ranger_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.branchwise.{os.path.basename(ranger_dir).lower()}.tsv\", index=False, header=True, sep='\\t')" + " nogwise_ranger_df.to_csv(f\"{res_dir}/ranger/compiled_transfers.nogwise.{os.path.basename(ranger_dir).lower()}.tsv\", index=False, header=True, sep='\\t')\n", + " nogwise_branchwise_ranger_df.to_csv(f\"{res_dir}/ranger/compiled_transfers.nogwise.branchwise.{os.path.basename(ranger_dir).lower()}.tsv\", index=False, header=True, sep='\\t')" ] }, { @@ -1482,13 +1492,13 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "944445f7470e4dac8449c7ea6f5bea0e", + "model_id": "f87542c494f3493bb605f7f3b3c0f809", "version_major": 2, "version_minor": 0 }, @@ -1499,15 +1509,175 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "compiled_transfers.nogwise.branchwise.gloome.ml. df:\n" + ] + }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "5f52d7cc99234551b44f65e943902d32", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>nog_id</th>\n", + " <th>source_branch</th>\n", + " <th>recipient_branch</th>\n", + " <th>gloome_branch_name</th>\n", + " <th>transfers</th>\n", + " <th>transfer_threshold</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>ERJME</td>\n", + " <td>unknown</td>\n", + " <td>1896966</td>\n", + " <td>1896966</td>\n", + " <td>0.10960</td>\n", + " <td>0.10550</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>ERJME</td>\n", + " <td>unknown</td>\n", + " <td>N357</td>\n", + " <td>N2</td>\n", + " <td>0.12460</td>\n", + " <td>0.12360</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>ERJME</td>\n", + " <td>unknown</td>\n", + " <td>216778</td>\n", + " <td>216778</td>\n", + " <td>0.94510</td>\n", + " <td>0.94460</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>ERJME</td>\n", + " <td>unknown</td>\n", + " <td>870187</td>\n", + " <td>870187</td>\n", + " <td>0.73870</td>\n", + " <td>0.73370</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>ERJME</td>\n", + " <td>unknown</td>\n", + " <td>519989</td>\n", + " <td>519989</td>\n", + " <td>0.72170</td>\n", + " <td>0.72100</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>230858</th>\n", + " <td>ERVXT</td>\n", + " <td>unknown</td>\n", + " <td>N287</td>\n", + " <td>N314</td>\n", + " <td>0.07843</td>\n", + " <td>0.07701</td>\n", + " </tr>\n", + " <tr>\n", + " <th>230862</th>\n", + " <td>ERVXT</td>\n", + " <td>unknown</td>\n", + " <td>515618</td>\n", + " <td>515618</td>\n", + " <td>0.16350</td>\n", + " <td>0.15470</td>\n", + " </tr>\n", + " <tr>\n", + " <th>230864</th>\n", + " <td>ERVXT</td>\n", + " <td>unknown</td>\n", + " <td>N343</td>\n", + " <td>N348</td>\n", + " <td>0.24960</td>\n", + " <td>0.24870</td>\n", + " </tr>\n", + " <tr>\n", + " <th>230871</th>\n", + " <td>ERVXT</td>\n", + " <td>unknown</td>\n", + " <td>762983</td>\n", + " <td>762983</td>\n", + " <td>0.12800</td>\n", + " <td>0.12200</td>\n", + " </tr>\n", + " <tr>\n", + " <th>230873</th>\n", + " <td>ERVXT</td>\n", + " <td>unknown</td>\n", + " <td>1810504</td>\n", + " <td>1810504</td>\n", + " <td>0.06005</td>\n", + " <td>0.05901</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>77213 rows × 6 columns</p>\n", + "</div>" + ], "text/plain": [ - "Processing transfer thresholds: 0%| | 0/1 [00:00<?, ?it/s]" + " nog_id source_branch recipient_branch gloome_branch_name transfers \\\n", + "0 ERJME unknown 1896966 1896966 0.10960 \n", + "2 ERJME unknown N357 N2 0.12460 \n", + "15 ERJME unknown 216778 216778 0.94510 \n", + "19 ERJME unknown 870187 870187 0.73870 \n", + "24 ERJME unknown 519989 519989 0.72170 \n", + "... ... ... ... ... ... \n", + "230858 ERVXT unknown N287 N314 0.07843 \n", + "230862 ERVXT unknown 515618 515618 0.16350 \n", + "230864 ERVXT unknown N343 N348 0.24960 \n", + "230871 ERVXT unknown 762983 762983 0.12800 \n", + "230873 ERVXT unknown 1810504 1810504 0.06005 \n", + "\n", + " transfer_threshold \n", + "0 0.10550 \n", + "2 0.12360 \n", + "15 0.94460 \n", + "19 0.73370 \n", + "24 0.72100 \n", + "... ... \n", + "230858 0.07701 \n", + "230862 0.15470 \n", + "230864 0.24870 \n", + "230871 0.12200 \n", + "230873 0.05901 \n", + "\n", + "[77213 rows x 6 columns]" ] }, "metadata": {}, @@ -1517,8 +1687,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Results compiled for GLOOME run with species tree.\n", - "compiled_transfers.nogwise.gloome.ml df:\n" + "compiled_transfers.nogwise.gloome.ml. df:\n" ] }, { @@ -1551,31 +1720,31 @@ " <tr>\n", " <th>0</th>\n", " <td>EQRBG</td>\n", - " <td>16.81498</td>\n", + " <td>16.85095</td>\n", " <td>0.050000</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>11.32676</td>\n", + " <td>EQRDS</td>\n", + " <td>15.71871</td>\n", " <td>0.050000</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>1.44199</td>\n", + " <td>EQRFZ</td>\n", + " <td>11.55448</td>\n", " <td>0.050000</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>38.71439</td>\n", + " <td>EQRG2</td>\n", + " <td>1.43731</td>\n", " <td>0.050000</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>6.08199</td>\n", + " <td>EQRGC</td>\n", + " <td>38.81869</td>\n", " <td>0.050000</td>\n", " </tr>\n", " <tr>\n", @@ -1585,15 +1754,15 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>209</th>\n", - " <td>ETCDP</td>\n", - " <td>1.99490</td>\n", + " <th>213</th>\n", + " <td>ETC37</td>\n", + " <td>0.99770</td>\n", " <td>0.990107</td>\n", " </tr>\n", " <tr>\n", - " <th>210</th>\n", - " <td>ETCUH</td>\n", - " <td>0.99240</td>\n", + " <th>214</th>\n", + " <td>ETCDP</td>\n", + " <td>1.99540</td>\n", " <td>0.990107</td>\n", " </tr>\n", " <tr>\n", @@ -1616,24 +1785,38 @@ " </tr>\n", " </tbody>\n", "</table>\n", - "<p>117288 rows × 3 columns</p>\n", + "<p>118454 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " nog_id transfers transfer_threshold\n", - "0 EQRBG 16.81498 0.050000\n", - "1 EQRFZ 11.32676 0.050000\n", - "2 EQRG2 1.44199 0.050000\n", - "3 EQRGC 38.71439 0.050000\n", - "4 EQRGG 6.08199 0.050000\n", + "0 EQRBG 16.85095 0.050000\n", + "1 EQRDS 15.71871 0.050000\n", + "2 EQRFZ 11.55448 0.050000\n", + "3 EQRG2 1.43731 0.050000\n", + "4 EQRGC 38.81869 0.050000\n", ".. ... ... ...\n", - "209 ETCDP 1.99490 0.990107\n", - "210 ETCUH 0.99240 0.990107\n", + "213 ETC37 0.99770 0.990107\n", + "214 ETCDP 1.99540 0.990107\n", "0 EQYJP 1.00000 0.999700\n", "1 ES59Z 1.00000 0.999700\n", "2 ESXGB 1.00100 0.999700\n", "\n", - "[117288 rows x 3 columns]" + "[118454 rows x 3 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "55b0add5921444c79cf9ec49ee4054be", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Processing transfer thresholds: 0%| | 0/100 [00:00<?, ?it/s]" ] }, "metadata": {}, @@ -1643,7 +1826,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "compiled_transfers.nogwise.branchwise.gloome.ml df:\n" + "compiled_transfers.nogwise.branchwise.gloome.ml. df:\n" ] }, { @@ -1670,1386 +1853,56 @@ " <th>nog_id</th>\n", " <th>source_branch</th>\n", " <th>recipient_branch</th>\n", + " <th>gloome_branch_name</th>\n", " <th>transfers</th>\n", " <th>transfer_threshold</th>\n", - " <th>gloome_branch_name</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>1</th>\n", - " <td>ET9HP</td>\n", + " <th>0</th>\n", + " <td>ERJME</td>\n", " <td>unknown</td>\n", - " <td>N210</td>\n", - " <td>0.05797</td>\n", - " <td>0.05797</td>\n", - " <td>N165</td>\n", + " <td>N19</td>\n", + " <td>N19</td>\n", + " <td>0.05641</td>\n", + " <td>0.05574</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>ET9HP</td>\n", + " <td>ERJME</td>\n", " <td>unknown</td>\n", - " <td>N209</td>\n", - " <td>0.92970</td>\n", - " <td>0.92970</td>\n", - " <td>N185</td>\n", + " <td>N22</td>\n", + " <td>N22</td>\n", + " <td>0.08738</td>\n", + " <td>0.08582</td>\n", " </tr>\n", " <tr>\n", - " <th>6</th>\n", - " <td>ET9HP</td>\n", + " <th>5</th>\n", + " <td>ERJME</td>\n", " <td>unknown</td>\n", - " <td>1178482</td>\n", - " <td>0.95380</td>\n", - " <td>0.95370</td>\n", - " <td>1178482</td>\n", + " <td>N27</td>\n", + " <td>N27</td>\n", + " <td>0.08184</td>\n", + " <td>0.08038</td>\n", " </tr>\n", " <tr>\n", - " <th>8</th>\n", - " <td>ET9HP</td>\n", + " <th>7</th>\n", + " <td>ERJME</td>\n", " <td>unknown</td>\n", - " <td>N186</td>\n", - " <td>0.19190</td>\n", - " <td>0.19190</td>\n", - " <td>N203</td>\n", - " </tr>\n", - " <tr>\n", - " <th>12</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>314283</td>\n", - " <td>0.05668</td>\n", - " <td>0.05662</td>\n", - " <td>314283</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223911</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>N276</td>\n", - " <td>0.05297</td>\n", - " <td>0.05296</td>\n", - " <td>N297</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223913</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>343509</td>\n", - " <td>0.08645</td>\n", - " <td>0.08644</td>\n", - " <td>343509</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223923</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>515618</td>\n", - " <td>0.05607</td>\n", - " <td>0.05505</td>\n", - " <td>515618</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223945</th>\n", - " <td>ET5PB</td>\n", - " <td>unknown</td>\n", - " <td>N350</td>\n", - " <td>0.10260</td>\n", - " <td>0.10260</td>\n", - " <td>N222</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223947</th>\n", - " <td>ET5PB</td>\n", - " <td>unknown</td>\n", - " <td>N349</td>\n", - " <td>0.87770</td>\n", - " <td>0.87750</td>\n", - " <td>N224</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>75638 rows × 6 columns</p>\n", - "</div>" - ], - "text/plain": [ - " nog_id source_branch recipient_branch transfers transfer_threshold \\\n", - "1 ET9HP unknown N210 0.05797 0.05797 \n", - "2 ET9HP unknown N209 0.92970 0.92970 \n", - "6 ET9HP unknown 1178482 0.95380 0.95370 \n", - "8 ET9HP unknown N186 0.19190 0.19190 \n", - "12 ET9HP unknown 314283 0.05668 0.05662 \n", - "... ... ... ... ... ... \n", - "223911 ETAWV unknown N276 0.05297 0.05296 \n", - "223913 ETAWV unknown 343509 0.08645 0.08644 \n", - "223923 ETAWV unknown 515618 0.05607 0.05505 \n", - "223945 ET5PB unknown N350 0.10260 0.10260 \n", - "223947 ET5PB unknown N349 0.87770 0.87750 \n", - "\n", - " gloome_branch_name \n", - "1 N165 \n", - "2 N185 \n", - "6 1178482 \n", - "8 N203 \n", - "12 314283 \n", - "... ... \n", - "223911 N297 \n", - "223913 343509 \n", - "223923 515618 \n", - "223945 N222 \n", - "223947 N224 \n", - "\n", - "[75638 rows x 6 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compiled_losses.nogwise.branchwise.gloome.ml df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>source_branch</th>\n", - " <th>branch</th>\n", - " <th>losses</th>\n", - " <th>transfer_threshold</th>\n", - " <th>gloome_branch_name</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>unknown</td>\n", - " <td>1896966</td>\n", - " <td>0.05675</td>\n", - " <td>0.05611</td>\n", - " <td>1896966</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>unknown</td>\n", - " <td>N182</td>\n", - " <td>0.91170</td>\n", - " <td>0.91150</td>\n", - " <td>N188</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>unknown</td>\n", - " <td>N173</td>\n", - " <td>0.05437</td>\n", - " <td>0.05436</td>\n", - " <td>N189</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>unknown</td>\n", - " <td>N181</td>\n", - " <td>0.07854</td>\n", - " <td>0.07854</td>\n", - " <td>N190</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>unknown</td>\n", - " <td>N191</td>\n", - " <td>0.19540</td>\n", - " <td>0.19530</td>\n", - " <td>N201</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223954</th>\n", - " <td>unknown</td>\n", - " <td>595494</td>\n", - " <td>0.99480</td>\n", - " <td>0.99430</td>\n", - " <td>595494</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223955</th>\n", - " <td>unknown</td>\n", - " <td>N332</td>\n", - " <td>0.79170</td>\n", - " <td>0.79170</td>\n", - " <td>N276</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223956</th>\n", - " <td>unknown</td>\n", - " <td>N268</td>\n", - " <td>0.20180</td>\n", - " <td>0.20160</td>\n", - " <td>N277</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223957</th>\n", - " <td>unknown</td>\n", - " <td>N331</td>\n", - " <td>0.18220</td>\n", - " <td>0.18220</td>\n", - " <td>N285</td>\n", - " </tr>\n", - " <tr>\n", - " <th>223958</th>\n", - " <td>unknown</td>\n", - " <td>762983</td>\n", - " <td>0.99040</td>\n", - " <td>0.98370</td>\n", - " <td>762983</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>148321 rows × 5 columns</p>\n", - "</div>" - ], - "text/plain": [ - " source_branch branch losses transfer_threshold gloome_branch_name\n", - "0 unknown 1896966 0.05675 0.05611 1896966\n", - "3 unknown N182 0.91170 0.91150 N188\n", - "4 unknown N173 0.05437 0.05436 N189\n", - "5 unknown N181 0.07854 0.07854 N190\n", - "7 unknown N191 0.19540 0.19530 N201\n", - "... ... ... ... ... ...\n", - "223954 unknown 595494 0.99480 0.99430 595494\n", - "223955 unknown N332 0.79170 0.79170 N276\n", - "223956 unknown N268 0.20180 0.20160 N277\n", - "223957 unknown N331 0.18220 0.18220 N285\n", - "223958 unknown 762983 0.99040 0.98370 762983\n", - "\n", - "[148321 rows x 5 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compiled_transfers.nogwise.gloome.mp df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>nog_id</th>\n", - " <th>transfers</th>\n", - " <th>transfer_threshold</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>EQRBG</td>\n", - " <td>18</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>33</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>15</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>40</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>31</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1281</th>\n", - " <td>ETCI9</td>\n", - " <td>27</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1282</th>\n", - " <td>ETCIB</td>\n", - " <td>17</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1283</th>\n", - " <td>ETCIZ</td>\n", - " <td>28</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1284</th>\n", - " <td>ETCJF</td>\n", - " <td>41</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1285</th>\n", - " <td>ETCUH</td>\n", - " <td>23</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>1286 rows × 3 columns</p>\n", - "</div>" - ], - "text/plain": [ - " nog_id transfers transfer_threshold\n", - "0 EQRBG 18 1\n", - "1 EQRFZ 33 1\n", - "2 EQRG2 15 1\n", - "3 EQRGC 40 1\n", - "4 EQRGG 31 1\n", - "... ... ... ...\n", - "1281 ETCI9 27 1\n", - "1282 ETCIB 17 1\n", - "1283 ETCIZ 28 1\n", - "1284 ETCJF 41 1\n", - "1285 ETCUH 23 1\n", - "\n", - "[1286 rows x 3 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compiled_transfers.nogwise.branchwise.gloome.mp df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>nog_id</th>\n", - " <th>source_branch</th>\n", - " <th>recipient_branch</th>\n", - " <th>transfers</th>\n", - " <th>transfer_threshold</th>\n", - " <th>gloome_branch_name</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>1178482</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1178482</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>N183</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>N198</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>966</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>966</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>349521</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>349521</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>N186</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>N203</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40526</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>N343</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>N348</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40527</th>\n", - " <td>ET5PB</td>\n", - " <td>unknown</td>\n", - " <td>1298881</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1298881</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40528</th>\n", - " <td>ET5PB</td>\n", - " <td>unknown</td>\n", - " <td>N216</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>N229</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40529</th>\n", - " <td>ET5PB</td>\n", - " <td>unknown</td>\n", - " <td>N241</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>N230</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40532</th>\n", - " <td>ET5PB</td>\n", - " <td>unknown</td>\n", - " <td>N346</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>N255</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>34374 rows × 6 columns</p>\n", - "</div>" - ], - "text/plain": [ - " nog_id source_branch recipient_branch transfers transfer_threshold \\\n", - "0 ET9HP unknown 1178482 1 1 \n", - "1 ET9HP unknown N183 1 1 \n", - "2 ET9HP unknown 966 1 1 \n", - "3 ET9HP unknown 349521 1 1 \n", - "4 ET9HP unknown N186 1 1 \n", - "... ... ... ... ... ... \n", - "40526 ETAWV unknown N343 1 1 \n", - "40527 ET5PB unknown 1298881 1 1 \n", - "40528 ET5PB unknown N216 1 1 \n", - "40529 ET5PB unknown N241 1 1 \n", - "40532 ET5PB unknown N346 1 1 \n", - "\n", - " gloome_branch_name \n", - "0 1178482 \n", - "1 N198 \n", - "2 966 \n", - "3 349521 \n", - "4 N203 \n", - "... ... \n", - "40526 N348 \n", - "40527 1298881 \n", - "40528 N229 \n", - "40529 N230 \n", - "40532 N255 \n", - "\n", - "[34374 rows x 6 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compiled_losses.nogwise.branchwise.gloome.mp df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>source_branch</th>\n", - " <th>branch</th>\n", - " <th>losses</th>\n", - " <th>transfer_threshold</th>\n", - " <th>gloome_branch_name</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>unknown</td>\n", - " <td>1232683</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1232683</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8</th>\n", - " <td>unknown</td>\n", - " <td>207954</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>207954</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>unknown</td>\n", - " <td>1331007</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1331007</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>unknown</td>\n", - " <td>N295</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>N293</td>\n", - " </tr>\n", - " <tr>\n", - " <th>15</th>\n", - " <td>unknown</td>\n", - " <td>634500</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>634500</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40530</th>\n", - " <td>unknown</td>\n", - " <td>56804</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>56804</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40531</th>\n", - " <td>unknown</td>\n", - " <td>1535422</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1535422</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40533</th>\n", - " <td>unknown</td>\n", - " <td>357804</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>357804</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40534</th>\n", - " <td>unknown</td>\n", - " <td>595494</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>595494</td>\n", - " </tr>\n", - " <tr>\n", - " <th>40535</th>\n", - " <td>unknown</td>\n", - " <td>N332</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>N276</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>6162 rows × 5 columns</p>\n", - "</div>" - ], - "text/plain": [ - " source_branch branch losses transfer_threshold gloome_branch_name\n", - "7 unknown 1232683 1 1 1232683\n", - "8 unknown 207954 1 1 207954\n", - "10 unknown 1331007 1 1 1331007\n", - "11 unknown N295 1 1 N293\n", - "15 unknown 634500 1 1 634500\n", - "... ... ... ... ... ...\n", - "40530 unknown 56804 1 1 56804\n", - "40531 unknown 1535422 1 1 1535422\n", - "40533 unknown 357804 1 1 357804\n", - "40534 unknown 595494 1 1 595494\n", - "40535 unknown N332 1 1 N276\n", - "\n", - "[6162 rows x 5 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8174030ce42843a2bba58edf6abd071e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Processing transfer thresholds: 0%| | 0/100 [00:00<?, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "7ff3f8f560e4404eadf9d376753cb185", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Processing transfer thresholds: 0%| | 0/1 [00:00<?, ?it/s]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Results compiled for GLOOME run without species tree.\n", - "compiled_transfers.nogwise.gloome.ml df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>nog_id</th>\n", - " <th>transfers</th>\n", - " <th>transfer_threshold</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>EQRBG</td>\n", - " <td>11.23777</td>\n", - " <td>0.05</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>17.30847</td>\n", - " <td>0.05</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>2.19190</td>\n", - " <td>0.05</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>26.10110</td>\n", - " <td>0.05</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>9.88948</td>\n", - " <td>0.05</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>582</th>\n", - " <td>ETCDP</td>\n", - " <td>2.04400</td>\n", - " <td>1.00</td>\n", - " </tr>\n", - " <tr>\n", - " <th>583</th>\n", - " <td>ETCEN</td>\n", - " <td>1.01900</td>\n", - " <td>1.00</td>\n", - " </tr>\n", - " <tr>\n", - " <th>584</th>\n", - " <td>ETCI9</td>\n", - " <td>2.05600</td>\n", - " <td>1.00</td>\n", - " </tr>\n", - " <tr>\n", - " <th>585</th>\n", - " <td>ETCIB</td>\n", - " <td>1.01100</td>\n", - " <td>1.00</td>\n", - " </tr>\n", - " <tr>\n", - " <th>586</th>\n", - " <td>ETCIZ</td>\n", - " <td>1.08200</td>\n", - " <td>1.00</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>123689 rows × 3 columns</p>\n", - "</div>" - ], - "text/plain": [ - " nog_id transfers transfer_threshold\n", - "0 EQRBG 11.23777 0.05\n", - "1 EQRFZ 17.30847 0.05\n", - "2 EQRG2 2.19190 0.05\n", - "3 EQRGC 26.10110 0.05\n", - "4 EQRGG 9.88948 0.05\n", - ".. ... ... ...\n", - "582 ETCDP 2.04400 1.00\n", - "583 ETCEN 1.01900 1.00\n", - "584 ETCI9 2.05600 1.00\n", - "585 ETCIB 1.01100 1.00\n", - "586 ETCIZ 1.08200 1.00\n", - "\n", - "[123689 rows x 3 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compiled_transfers.nogwise.branchwise.gloome.ml df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>nog_id</th>\n", - " <th>source_branch</th>\n", - " <th>recipient_branch</th>\n", - " <th>transfers</th>\n", - " <th>transfer_threshold</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>6</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>515618</td>\n", - " <td>0.99810</td>\n", - " <td>0.9981</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>1410383</td>\n", - " <td>0.99480</td>\n", - " <td>0.9948</td>\n", - " </tr>\n", - " <tr>\n", - " <th>19</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>1178482</td>\n", - " <td>0.91850</td>\n", - " <td>0.9181</td>\n", - " </tr>\n", - " <tr>\n", - " <th>22</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>1288826</td>\n", - " <td>0.65520</td>\n", - " <td>0.6551</td>\n", - " </tr>\n", - " <tr>\n", - " <th>28</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>349521</td>\n", - " <td>0.84130</td>\n", - " <td>0.8385</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275564</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>1122209</td>\n", - " <td>0.05587</td>\n", - " <td>0.0550</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275571</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>1656094</td>\n", - " <td>0.91420</td>\n", - " <td>0.9130</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275574</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>314608</td>\n", - " <td>0.93170</td>\n", - " <td>0.9314</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275577</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>1859457</td>\n", - " <td>0.95030</td>\n", - " <td>0.9481</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275579</th>\n", - " <td>ETAWV</td>\n", - " <td>unknown</td>\n", - " <td>1414654</td>\n", - " <td>0.76010</td>\n", - " <td>0.7533</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>55173 rows × 5 columns</p>\n", - "</div>" - ], - "text/plain": [ - " nog_id source_branch recipient_branch transfers transfer_threshold\n", - "6 ET9HP unknown 515618 0.99810 0.9981\n", - "7 ET9HP unknown 1410383 0.99480 0.9948\n", - "19 ET9HP unknown 1178482 0.91850 0.9181\n", - "22 ET9HP unknown 1288826 0.65520 0.6551\n", - "28 ET9HP unknown 349521 0.84130 0.8385\n", - "... ... ... ... ... ...\n", - "275564 ETAWV unknown 1122209 0.05587 0.0550\n", - "275571 ETAWV unknown 1656094 0.91420 0.9130\n", - "275574 ETAWV unknown 314608 0.93170 0.9314\n", - "275577 ETAWV unknown 1859457 0.95030 0.9481\n", - "275579 ETAWV unknown 1414654 0.76010 0.7533\n", - "\n", - "[55173 rows x 5 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compiled_losses.nogwise.branchwise.gloome.ml df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>source_branch</th>\n", - " <th>branch</th>\n", - " <th>losses</th>\n", - " <th>transfer_threshold</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>unknown</td>\n", - " <td>634500</td>\n", - " <td>0.9976</td>\n", - " <td>0.9975</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>unknown</td>\n", - " <td>1115515</td>\n", - " <td>0.9938</td>\n", - " <td>0.9936</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>unknown</td>\n", - " <td>1681196</td>\n", - " <td>0.9976</td>\n", - " <td>0.9974</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>unknown</td>\n", - " <td>207954</td>\n", - " <td>0.3386</td>\n", - " <td>0.3376</td>\n", - " </tr>\n", - " <tr>\n", - " <th>12</th>\n", - " <td>unknown</td>\n", - " <td>1118153</td>\n", - " <td>0.4660</td>\n", - " <td>0.4642</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275586</th>\n", - " <td>unknown</td>\n", - " <td>380703</td>\n", - " <td>1.0000</td>\n", - " <td>0.9961</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275589</th>\n", - " <td>unknown</td>\n", - " <td>56804</td>\n", - " <td>0.7820</td>\n", - " <td>0.7819</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275591</th>\n", - " <td>unknown</td>\n", - " <td>745411</td>\n", - " <td>1.0000</td>\n", - " <td>0.9980</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275592</th>\n", - " <td>unknown</td>\n", - " <td>357804</td>\n", - " <td>0.9990</td>\n", - " <td>0.9974</td>\n", - " </tr>\n", - " <tr>\n", - " <th>275593</th>\n", - " <td>unknown</td>\n", - " <td>1535422</td>\n", - " <td>0.9838</td>\n", - " <td>0.9829</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>73350 rows × 4 columns</p>\n", - "</div>" - ], - "text/plain": [ - " source_branch branch losses transfer_threshold\n", - "3 unknown 634500 0.9976 0.9975\n", - "4 unknown 1115515 0.9938 0.9936\n", - "5 unknown 1681196 0.9976 0.9974\n", - "10 unknown 207954 0.3386 0.3376\n", - "12 unknown 1118153 0.4660 0.4642\n", - "... ... ... ... ...\n", - "275586 unknown 380703 1.0000 0.9961\n", - "275589 unknown 56804 0.7820 0.7819\n", - "275591 unknown 745411 1.0000 0.9980\n", - "275592 unknown 357804 0.9990 0.9974\n", - "275593 unknown 1535422 0.9838 0.9829\n", - "\n", - "[73350 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compiled_transfers.nogwise.gloome.mp df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>nog_id</th>\n", - " <th>transfers</th>\n", - " <th>transfer_threshold</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>EQRBG</td>\n", - " <td>17</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>28</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>15</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>28</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>19</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1269</th>\n", - " <td>ETCI9</td>\n", - " <td>22</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1270</th>\n", - " <td>ETCIB</td>\n", - " <td>12</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1271</th>\n", - " <td>ETCIZ</td>\n", - " <td>17</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1272</th>\n", - " <td>ETCJF</td>\n", - " <td>29</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1273</th>\n", - " <td>ETCUH</td>\n", - " <td>26</td>\n", - " <td>1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>1274 rows × 3 columns</p>\n", - "</div>" - ], - "text/plain": [ - " nog_id transfers transfer_threshold\n", - "0 EQRBG 17 1\n", - "1 EQRFZ 28 1\n", - "2 EQRG2 15 1\n", - "3 EQRGC 28 1\n", - "4 EQRGG 19 1\n", - "... ... ... ...\n", - "1269 ETCI9 22 1\n", - "1270 ETCIB 12 1\n", - "1271 ETCIZ 17 1\n", - "1272 ETCJF 29 1\n", - "1273 ETCUH 26 1\n", - "\n", - "[1274 rows x 3 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "compiled_transfers.nogwise.branchwise.gloome.mp df:\n" - ] - }, - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>nog_id</th>\n", - " <th>source_branch</th>\n", - " <th>recipient_branch</th>\n", - " <th>transfers</th>\n", - " <th>transfer_threshold</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>515618</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>5</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>1410383</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>10</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>314283</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>11</th>\n", - " <td>ET9HP</td>\n", - " <td>unknown</td>\n", - " <td>1178482</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>N28</td>\n", + " <td>N28</td>\n", + " <td>0.20670</td>\n", + " <td>0.19580</td>\n", " </tr>\n", " <tr>\n", - " <th>12</th>\n", - " <td>ET9HP</td>\n", + " <th>9</th>\n", + " <td>ERJME</td>\n", " <td>unknown</td>\n", - " <td>1288826</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>N35</td>\n", + " <td>N35</td>\n", + " <td>0.05857</td>\n", + " <td>0.05849</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -3058,67 +1911,86 @@ " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", + " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>43633</th>\n", - " <td>ETAWV</td>\n", + " <th>278741</th>\n", + " <td>ERVXT</td>\n", " <td>unknown</td>\n", - " <td>214092</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>N346</td>\n", + " <td>N346</td>\n", + " <td>0.07211</td>\n", + " <td>0.07206</td>\n", " </tr>\n", " <tr>\n", - " <th>43636</th>\n", - " <td>ETAWV</td>\n", + " <th>278743</th>\n", + " <td>ERVXT</td>\n", " <td>unknown</td>\n", - " <td>1656094</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>1859457</td>\n", + " <td>1859457</td>\n", + " <td>0.05085</td>\n", + " <td>0.05009</td>\n", " </tr>\n", " <tr>\n", - " <th>43637</th>\n", - " <td>ETAWV</td>\n", + " <th>278745</th>\n", + " <td>ERVXT</td>\n", " <td>unknown</td>\n", - " <td>314608</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>1632859</td>\n", + " <td>1632859</td>\n", + " <td>0.05082</td>\n", + " <td>0.05006</td>\n", " </tr>\n", " <tr>\n", - " <th>43638</th>\n", - " <td>ETAWV</td>\n", + " <th>278747</th>\n", + " <td>ERVXT</td>\n", " <td>unknown</td>\n", - " <td>1859457</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>N356</td>\n", + " <td>N356</td>\n", + " <td>0.06755</td>\n", + " <td>0.06615</td>\n", " </tr>\n", " <tr>\n", - " <th>43639</th>\n", - " <td>ETAWV</td>\n", + " <th>278749</th>\n", + " <td>ERVXT</td>\n", " <td>unknown</td>\n", - " <td>1414654</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>247634</td>\n", + " <td>247634</td>\n", + " <td>1.19600</td>\n", + " <td>1.00000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>27173 rows × 5 columns</p>\n", + "<p>131596 rows × 6 columns</p>\n", "</div>" ], "text/plain": [ - " nog_id source_branch recipient_branch transfers transfer_threshold\n", - "4 ET9HP unknown 515618 1 1\n", - "5 ET9HP unknown 1410383 1 1\n", - "10 ET9HP unknown 314283 1 1\n", - "11 ET9HP unknown 1178482 1 1\n", - "12 ET9HP unknown 1288826 1 1\n", - "... ... ... ... ... ...\n", - "43633 ETAWV unknown 214092 1 1\n", - "43636 ETAWV unknown 1656094 1 1\n", - "43637 ETAWV unknown 314608 1 1\n", - "43638 ETAWV unknown 1859457 1 1\n", - "43639 ETAWV unknown 1414654 1 1\n", - "\n", - "[27173 rows x 5 columns]" + " nog_id source_branch recipient_branch gloome_branch_name transfers \\\n", + "0 ERJME unknown N19 N19 0.05641 \n", + "2 ERJME unknown N22 N22 0.08738 \n", + "5 ERJME unknown N27 N27 0.08184 \n", + "7 ERJME unknown N28 N28 0.20670 \n", + "9 ERJME unknown N35 N35 0.05857 \n", + "... ... ... ... ... ... \n", + "278741 ERVXT unknown N346 N346 0.07211 \n", + "278743 ERVXT unknown 1859457 1859457 0.05085 \n", + "278745 ERVXT unknown 1632859 1632859 0.05082 \n", + "278747 ERVXT unknown N356 N356 0.06755 \n", + "278749 ERVXT unknown 247634 247634 1.19600 \n", + "\n", + " transfer_threshold \n", + "0 0.05574 \n", + "2 0.08582 \n", + "5 0.08038 \n", + "7 0.19580 \n", + "9 0.05849 \n", + "... ... \n", + "278741 0.07206 \n", + "278743 0.05009 \n", + "278745 0.05006 \n", + "278747 0.06615 \n", + "278749 1.00000 \n", + "\n", + "[131596 rows x 6 columns]" ] }, "metadata": {}, @@ -3128,7 +2000,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "compiled_losses.nogwise.branchwise.gloome.mp df:\n" + "compiled_transfers.nogwise.gloome.ml. df:\n" ] }, { @@ -3152,110 +2024,98 @@ " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", - " <th>source_branch</th>\n", - " <th>branch</th>\n", - " <th>losses</th>\n", + " <th>nog_id</th>\n", + " <th>transfers</th>\n", " <th>transfer_threshold</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", + " <th>0</th>\n", + " <td>EQRBG</td>\n", + " <td>20.35391</td>\n", + " <td>0.05</td>\n", + " </tr>\n", + " <tr>\n", " <th>1</th>\n", - " <td>unknown</td>\n", - " <td>634500</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>EQRDS</td>\n", + " <td>35.32471</td>\n", + " <td>0.05</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>unknown</td>\n", - " <td>1115515</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>EQRFZ</td>\n", + " <td>27.24602</td>\n", + " <td>0.05</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>unknown</td>\n", - " <td>1681196</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>7</th>\n", - " <td>unknown</td>\n", - " <td>1232683</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <td>EQRG2</td>\n", + " <td>10.47929</td>\n", + " <td>0.05</td>\n", " </tr>\n", " <tr>\n", - " <th>17</th>\n", - " <td>unknown</td>\n", - " <td>1331007</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <th>4</th>\n", + " <td>EQRGC</td>\n", + " <td>60.32069</td>\n", + " <td>0.05</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", - " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>43641</th>\n", - " <td>unknown</td>\n", - " <td>380703</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <th>931</th>\n", + " <td>ETCEW</td>\n", + " <td>1.01600</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", - " <th>43643</th>\n", - " <td>unknown</td>\n", - " <td>56804</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <th>932</th>\n", + " <td>ETCI3</td>\n", + " <td>1.00400</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", - " <th>43644</th>\n", - " <td>unknown</td>\n", - " <td>745411</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <th>933</th>\n", + " <td>ETCI9</td>\n", + " <td>2.01800</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", - " <th>43645</th>\n", - " <td>unknown</td>\n", - " <td>357804</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <th>934</th>\n", + " <td>ETCIZ</td>\n", + " <td>2.03000</td>\n", + " <td>1.00</td>\n", " </tr>\n", " <tr>\n", - " <th>43646</th>\n", - " <td>unknown</td>\n", - " <td>1535422</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", + " <th>935</th>\n", + " <td>ETCJF</td>\n", + " <td>2.02600</td>\n", + " <td>1.00</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>4645 rows × 4 columns</p>\n", + "<p>129092 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ - " source_branch branch losses transfer_threshold\n", - "1 unknown 634500 1 1\n", - "2 unknown 1115515 1 1\n", - "3 unknown 1681196 1 1\n", - "7 unknown 1232683 1 1\n", - "17 unknown 1331007 1 1\n", - "... ... ... ... ...\n", - "43641 unknown 380703 1 1\n", - "43643 unknown 56804 1 1\n", - "43644 unknown 745411 1 1\n", - "43645 unknown 357804 1 1\n", - "43646 unknown 1535422 1 1\n", + " nog_id transfers transfer_threshold\n", + "0 EQRBG 20.35391 0.05\n", + "1 EQRDS 35.32471 0.05\n", + "2 EQRFZ 27.24602 0.05\n", + "3 EQRG2 10.47929 0.05\n", + "4 EQRGC 60.32069 0.05\n", + ".. ... ... ...\n", + "931 ETCEW 1.01600 1.00\n", + "932 ETCI3 1.00400 1.00\n", + "933 ETCI9 2.01800 1.00\n", + "934 ETCIZ 2.03000 1.00\n", + "935 ETCJF 2.02600 1.00\n", "\n", - "[4645 rows x 4 columns]" + "[129092 rows x 3 columns]" ] }, "metadata": {}, @@ -3265,32 +2125,48 @@ "source": [ "pa_matrix_tsv_filepath = f\"{data_dir}/1236_pa_matrix.tsv\"\n", "\n", - "# first we compile results for the case with the species tree,\n", - "gloome_output_dir_wt = f\"{program_runs_dir}GLOOME_with_tree/Results_GLOOME_with_tree/\"\n", - "input_tree = ete3.Tree(input_tree_filepath, format=1)\n", + "# cases with species tree\n", + "gloome_output_dir_wt = f\"{program_runs_dir}/GLOOME_with_tree/Results_GLOOME_ML_with_tree/\"\n", "\n", - "gloome_wt_results_dict = read_and_compile_gloome_results(\n", - " gloome_output_dir=gloome_output_dir_wt, input_tree=input_tree, species_tree_bool=True,\n", - " pa_matrix_tsv_filepath=pa_matrix_tsv_filepath)\n", - "print(\"Results compiled for GLOOME run with species tree.\")\n", - "for key, df in gloome_wt_results_dict.items():\n", + "# ml\n", + "gloome_ml_results_dict = read_and_compile_gloome_results(\n", + " gloome_output_dir_wt, pa_matrix_tsv_filepath, \"ml\", input_tree_filepath\n", + ")\n", + "for key, df in gloome_ml_results_dict.items():\n", " print(f\"{key} df:\")\n", " display(df)\n", - " df.to_csv(f\"{res_dir}/{key}.tsv\", \n", - " index=False, header=True, sep='\\t')\n", - "\n", - "\n", - "# then the case without the species tree\n", - "gloome_output_dir_wot = f\"{program_runs_dir}GLOOME_without_tree/Results_GLOOME_without_tree/\"\n", - "gloome_wot_results_dict = read_and_compile_gloome_results(\n", - " gloome_output_dir=gloome_output_dir_wot, input_tree=input_tree, species_tree_bool=False,\n", - " pa_matrix_tsv_filepath=pa_matrix_tsv_filepath)\n", - "print(\"Results compiled for GLOOME run without species tree.\")\n", - "for key, df in gloome_wot_results_dict.items():\n", + " df.to_csv(f\"{res_dir}/gloome_ml/{key}tsv\", index=False, header=True, sep='\\t')\n", + "# mp\n", + "gloome_mp_output_dir_wt = [f\"{program_runs_dir}/GLOOME_with_tree/{fi}\" for fi in os.listdir(f\"{program_runs_dir}/GLOOME_with_tree/\") if fi.startswith('Results_GLOOME_MP_')]\n", + "gloome_mp_results_dict = read_and_compile_gloome_results(\n", + " gloome_mp_output_dir_wt, pa_matrix_tsv_filepath, \"mp\", input_tree_filepath\n", + ")\n", + "for key, df in gloome_mp_results_dict.items():\n", + " if key.endswith('8'): # only show the df for gain penalty ratio 8\n", + " print(f\"{key} df:\")\n", + " display(df)\n", + " df.to_csv(f\"{res_dir}/gloome_mp/{key}tsv\", index=False, header=True, sep='\\t')\n", + "# cases without species tree\n", + "gloome_output_dir_wot = f\"{program_runs_dir}/GLOOME_without_tree/Results_GLOOME_ML_without_tree/\"\n", + "# ml\n", + "gloome_ml_results_dict = read_and_compile_gloome_results(\n", + " gloome_output_dir_wot, pa_matrix_tsv_filepath, \"ml\", None\n", + ")\n", + "for key, df in gloome_ml_results_dict.items():\n", " print(f\"{key} df:\")\n", " display(df)\n", - " df.to_csv(f\"{res_dir}/{key}.without_tree.tsv\", \n", - " index=False, header=True, sep='\\t')" + " df.to_csv(f\"{res_dir}/gloome_ml/{key}without_tree.tsv\", index=False, header=True, sep='\\t')\n", + "# mp\n", + "gloome_mp_output_dir = [f\"{program_runs_dir}/GLOOME_without_tree/{fi}\" for fi in os.listdir(f\"{program_runs_dir}/GLOOME_without_tree/\") if fi.startswith('Results_GLOOME_MP_')]\n", + "gloome_mp_results_dict = read_and_compile_gloome_results(\n", + " gloome_mp_output_dir, pa_matrix_tsv_filepath, \"mp\", None\n", + ")\n", + "for key, df in gloome_mp_results_dict.items():\n", + " if key.endswith('8'):\n", + " print(f\"{key} df:\")\n", + " display(df)\n", + " key = key.replace('.gloome.mp.', '.gloome.mp.without_tree.')\n", + " df.to_csv(f\"{res_dir}/gloome_mp/{key}tsv\", index=False, header=True, sep='\\t')" ] }, { @@ -3309,13 +2185,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "32807662211547a1a513c3967378ec4a", + "model_id": "a34f2182a53a4f8d8c8c8adf147942f3", "version_major": 2, "version_minor": 0 }, @@ -3361,31 +2237,31 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>10</th>\n", + " <th>11</th>\n", " <td>EQRZ4</td>\n", " <td>1</td>\n", " <td>8</td>\n", " </tr>\n", " <tr>\n", - " <th>13</th>\n", + " <th>14</th>\n", " <td>EQSA4</td>\n", " <td>1</td>\n", " <td>8</td>\n", " </tr>\n", " <tr>\n", - " <th>14</th>\n", + " <th>15</th>\n", " <td>EQSAX</td>\n", " <td>1</td>\n", " <td>8</td>\n", " </tr>\n", " <tr>\n", - " <th>22</th>\n", + " <th>23</th>\n", " <td>EQSTQ</td>\n", " <td>2</td>\n", " <td>8</td>\n", " </tr>\n", " <tr>\n", - " <th>23</th>\n", + " <th>24</th>\n", " <td>EQSTU</td>\n", " <td>1</td>\n", " <td>8</td>\n", @@ -3397,55 +2273,55 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>1281</th>\n", - " <td>ETCI9</td>\n", - " <td>138</td>\n", + " <th>1295</th>\n", + " <td>ETCI3</td>\n", + " <td>126</td>\n", " <td>0.33</td>\n", " </tr>\n", " <tr>\n", - " <th>1282</th>\n", - " <td>ETCIB</td>\n", - " <td>90</td>\n", + " <th>1296</th>\n", + " <td>ETCI9</td>\n", + " <td>138</td>\n", " <td>0.33</td>\n", " </tr>\n", " <tr>\n", - " <th>1283</th>\n", + " <th>1297</th>\n", " <td>ETCIZ</td>\n", " <td>157</td>\n", " <td>0.33</td>\n", " </tr>\n", " <tr>\n", - " <th>1284</th>\n", + " <th>1298</th>\n", " <td>ETCJF</td>\n", " <td>180</td>\n", " <td>0.33</td>\n", " </tr>\n", " <tr>\n", - " <th>1285</th>\n", + " <th>1299</th>\n", " <td>ETCUH</td>\n", " <td>122</td>\n", " <td>0.33</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>10170 rows × 3 columns</p>\n", + "<p>10290 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " nog_id transfers transfer_threshold\n", - "10 EQRZ4 1 8\n", - "13 EQSA4 1 8\n", - "14 EQSAX 1 8\n", - "22 EQSTQ 2 8\n", - "23 EQSTU 1 8\n", + "11 EQRZ4 1 8\n", + "14 EQSA4 1 8\n", + "15 EQSAX 1 8\n", + "23 EQSTQ 2 8\n", + "24 EQSTU 1 8\n", "... ... ... ...\n", - "1281 ETCI9 138 0.33\n", - "1282 ETCIB 90 0.33\n", - "1283 ETCIZ 157 0.33\n", - "1284 ETCJF 180 0.33\n", - "1285 ETCUH 122 0.33\n", + "1295 ETCI3 126 0.33\n", + "1296 ETCI9 138 0.33\n", + "1297 ETCIZ 157 0.33\n", + "1298 ETCJF 180 0.33\n", + "1299 ETCUH 122 0.33\n", "\n", - "[10170 rows x 3 columns]" + "[10290 rows x 3 columns]" ] }, "metadata": {}, @@ -3457,13 +2333,13 @@ "count_MP_nogwise_transfers_df = compile_count_mp_nogwise_transfers(count_MP_output_dir, taxonomic_id, res_dir)\n", "print(\"Count MP nogwise transfers:\")\n", "display(count_MP_nogwise_transfers_df)\n", - "count_MP_nogwise_transfers_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.count.mp.tsv\", \n", + "count_MP_nogwise_transfers_df.to_csv(f\"{res_dir}/count_mp/compiled_transfers.nogwise.count.mp.tsv\", \n", " index=False, header=True, sep='\\t')" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -3471,29 +2347,29 @@ "output_type": "stream", "text": [ "Input dir is ../data/program_runs//Count/Count_MP/\n", - "Tree filepath is ../data/1236_wol_tree_pruned_with_internal_labels.nwk and output dir is ../data/compiled_results//count/\n", + "Tree filepath is ../data/1236_wol_tree_pruned_with_internal_labels.nwk and output dir is ../data/compiled_results//count_mp/\n", "Files are ../data/program_runs//Count/Count_MP//1236_Count_output_gain_0.33_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_0.5_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_1_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_2_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_3_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_4_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_5_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_6_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_7_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_8_families.tsv\n", "Number of files is 10\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_2_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.2.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_6_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.6.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_7_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.7.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_1_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.1.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_0.33_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.0.33.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_0.5_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.0.5.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_5_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.5.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_3_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.3.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_8_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.8.tsv\n", - "Processing 920776 nog-branch pairs\n", - "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_4_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count//compiled_transfers.nogwise.branchwise.count.mp.4.tsv\n", - "Processing 920776 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_0.33_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.0.33.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_7_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.7.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_0.5_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.0.5.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_1_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.1.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_2_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.2.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_8_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.8.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_4_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.4.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_6_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.6.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_3_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.3.tsv\n", + "Processing 930800 nog-branch pairs\n", + "Running: python3 lib/compile_count_mp_nw_bw_parallel.py -c ../data/program_runs//Count/Count_MP//1236_Count_output_gain_5_families.tsv -t ../data/1236_wol_tree_pruned_with_internal_labels.nwk -o ../data/compiled_results//count_mp//compiled_transfers.nogwise.branchwise.count.mp.5.tsv\n", + "Processing 930800 nog-branch pairs\n", " \r" ] } @@ -3504,7 +2380,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -3517,7 +2393,7 @@ { "data": { "text/plain": [ - "['6', '1', '3', '0.5', '5', '7', '4', '8', '2', '0.33']" + "['0.5', '8', '1', '4', '2', '3', '7', '5', '6', '0.33']" ] }, "metadata": {}, @@ -3561,35 +2437,35 @@ " <tr>\n", " <th>0</th>\n", " <td>EQRBG</td>\n", - " <td>N30</td>\n", + " <td>N146</td>\n", " <td>0.33</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>EQRBG</td>\n", - " <td>1304275</td>\n", + " <td>N236</td>\n", " <td>0.33</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>EQRBG</td>\n", - " <td>N116</td>\n", + " <td>1515746</td>\n", " <td>0.33</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>EQRBG</td>\n", - " <td>N215</td>\n", + " <td>N100</td>\n", " <td>0.33</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>EQRBG</td>\n", - " <td>745411</td>\n", + " <td>58049</td>\n", " <td>0.33</td>\n", " <td>unknown</td>\n", " </tr>\n", @@ -3601,60 +2477,60 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>702974</th>\n", + " <th>712639</th>\n", " <td>ETCUH</td>\n", - " <td>69222</td>\n", + " <td>N311</td>\n", " <td>8</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>702975</th>\n", - " <td>ETCEW</td>\n", - " <td>N87</td>\n", + " <th>712640</th>\n", + " <td>ETCUH</td>\n", + " <td>1926881</td>\n", " <td>8</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>702976</th>\n", - " <td>ETCEW</td>\n", - " <td>N41</td>\n", + " <th>712641</th>\n", + " <td>ETCUH</td>\n", + " <td>N304</td>\n", " <td>8</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>702977</th>\n", - " <td>ETCEW</td>\n", - " <td>1548547</td>\n", + " <th>712642</th>\n", + " <td>ETCUH</td>\n", + " <td>69222</td>\n", " <td>8</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>702978</th>\n", - " <td>ETCEW</td>\n", - " <td>N57</td>\n", + " <th>712643</th>\n", + " <td>ETCUH</td>\n", + " <td>634500</td>\n", " <td>8</td>\n", " <td>unknown</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>702979 rows × 4 columns</p>\n", + "<p>712644 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " nog_id recipient_branch transfers source_branch\n", - "0 EQRBG N30 0.33 unknown\n", - "1 EQRBG 1304275 0.33 unknown\n", - "2 EQRBG N116 0.33 unknown\n", - "3 EQRBG N215 0.33 unknown\n", - "4 EQRBG 745411 0.33 unknown\n", + "0 EQRBG N146 0.33 unknown\n", + "1 EQRBG N236 0.33 unknown\n", + "2 EQRBG 1515746 0.33 unknown\n", + "3 EQRBG N100 0.33 unknown\n", + "4 EQRBG 58049 0.33 unknown\n", "... ... ... ... ...\n", - "702974 ETCUH 69222 8 unknown\n", - "702975 ETCEW N87 8 unknown\n", - "702976 ETCEW N41 8 unknown\n", - "702977 ETCEW 1548547 8 unknown\n", - "702978 ETCEW N57 8 unknown\n", + "712639 ETCUH N311 8 unknown\n", + "712640 ETCUH 1926881 8 unknown\n", + "712641 ETCUH N304 8 unknown\n", + "712642 ETCUH 69222 8 unknown\n", + "712643 ETCUH 634500 8 unknown\n", "\n", - "[702979 rows x 4 columns]" + "[712644 rows x 4 columns]" ] }, "metadata": {}, @@ -3685,13 +2561,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fd12d7fed26442c7bf0cbbbd77fa4368", + "model_id": "af86a9e274914528a070a236b7a11153", "version_major": 2, "version_minor": 0 }, @@ -3738,38 +2614,38 @@ " </thead>\n", " <tbody>\n", " <tr>\n", - " <th>46</th>\n", + " <th>33</th>\n", " <td>ERTPC</td>\n", " <td>1896966</td>\n", - " <td>0.997253</td>\n", + " <td>0.996428</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>51</th>\n", + " <th>57</th>\n", " <td>ET8I8</td>\n", " <td>1896966</td>\n", - " <td>0.996793</td>\n", + " <td>0.996019</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>155</th>\n", + " <th>148</th>\n", " <td>ERUVU</td>\n", " <td>1896966</td>\n", - " <td>0.997317</td>\n", + " <td>0.996502</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", " <th>158</th>\n", - " <td>ERQWV</td>\n", + " <td>ERJ56</td>\n", " <td>1896966</td>\n", - " <td>0.996674</td>\n", + " <td>0.996487</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>195</th>\n", - " <td>ERJ56</td>\n", + " <th>176</th>\n", + " <td>ERENP</td>\n", " <td>1896966</td>\n", - " <td>0.997302</td>\n", + " <td>0.996191</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", @@ -3780,60 +2656,60 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>920771</th>\n", + " <th>930795</th>\n", " <td>ER3QB</td>\n", " <td>N357</td>\n", - " <td>0.000005</td>\n", + " <td>0.000007</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>920772</th>\n", - " <td>ER3P3</td>\n", + " <th>930796</th>\n", + " <td>ER3QZ</td>\n", " <td>N357</td>\n", - " <td>0.000004</td>\n", + " <td>0.000005</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>920773</th>\n", - " <td>ERDWS</td>\n", + " <th>930797</th>\n", + " <td>ESNNU</td>\n", " <td>N357</td>\n", - " <td>0.000004</td>\n", + " <td>0.000017</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>920774</th>\n", - " <td>ETAWV</td>\n", + " <th>930798</th>\n", + " <td>ER6B2</td>\n", " <td>N357</td>\n", - " <td>0.000004</td>\n", + " <td>0.000005</td>\n", " <td>unknown</td>\n", " </tr>\n", " <tr>\n", - " <th>920775</th>\n", - " <td>ET5PB</td>\n", + " <th>930799</th>\n", + " <td>ERVXT</td>\n", " <td>N357</td>\n", - " <td>0.000004</td>\n", + " <td>0.000007</td>\n", " <td>unknown</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>546872 rows × 4 columns</p>\n", + "<p>552916 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " nog_id recipient_branch transfers source_branch\n", - "46 ERTPC 1896966 0.997253 unknown\n", - "51 ET8I8 1896966 0.996793 unknown\n", - "155 ERUVU 1896966 0.997317 unknown\n", - "158 ERQWV 1896966 0.996674 unknown\n", - "195 ERJ56 1896966 0.997302 unknown\n", + "33 ERTPC 1896966 0.996428 unknown\n", + "57 ET8I8 1896966 0.996019 unknown\n", + "148 ERUVU 1896966 0.996502 unknown\n", + "158 ERJ56 1896966 0.996487 unknown\n", + "176 ERENP 1896966 0.996191 unknown\n", "... ... ... ... ...\n", - "920771 ER3QB N357 0.000005 unknown\n", - "920772 ER3P3 N357 0.000004 unknown\n", - "920773 ERDWS N357 0.000004 unknown\n", - "920774 ETAWV N357 0.000004 unknown\n", - "920775 ET5PB N357 0.000004 unknown\n", + "930795 ER3QB N357 0.000007 unknown\n", + "930796 ER3QZ N357 0.000005 unknown\n", + "930797 ESNNU N357 0.000017 unknown\n", + "930798 ER6B2 N357 0.000005 unknown\n", + "930799 ERVXT N357 0.000007 unknown\n", "\n", - "[546872 rows x 4 columns]" + "[552916 rows x 4 columns]" ] }, "metadata": {}, @@ -3876,32 +2752,32 @@ " <tr>\n", " <th>0</th>\n", " <td>EQRBG</td>\n", - " <td>15.669436</td>\n", - " <td>1.372267e-15</td>\n", + " <td>15.700713</td>\n", + " <td>1.107008e-15</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>EQRFZ</td>\n", - " <td>19.318403</td>\n", - " <td>1.372267e-15</td>\n", + " <td>EQRDS</td>\n", + " <td>17.821400</td>\n", + " <td>1.107008e-15</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>EQRG2</td>\n", - " <td>6.139847</td>\n", - " <td>1.372267e-15</td>\n", + " <td>EQRFZ</td>\n", + " <td>19.090728</td>\n", + " <td>1.107008e-15</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>EQRGC</td>\n", - " <td>32.660359</td>\n", - " <td>1.372267e-15</td>\n", + " <td>EQRG2</td>\n", + " <td>6.146018</td>\n", + " <td>1.107008e-15</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>EQRGG</td>\n", - " <td>17.762175</td>\n", - " <td>1.372267e-15</td>\n", + " <td>EQRGC</td>\n", + " <td>32.652265</td>\n", + " <td>1.107008e-15</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -3910,27 +2786,27 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>993</th>\n", - " <td>ETCIB</td>\n", - " <td>0.999963</td>\n", + " <th>996</th>\n", + " <td>ETCI9</td>\n", + " <td>3.989736</td>\n", " <td>9.898990e-01</td>\n", " </tr>\n", " <tr>\n", - " <th>994</th>\n", + " <th>997</th>\n", " <td>ETCIZ</td>\n", - " <td>0.999233</td>\n", + " <td>0.999188</td>\n", " <td>9.898990e-01</td>\n", " </tr>\n", " <tr>\n", - " <th>995</th>\n", + " <th>998</th>\n", " <td>ETCJF</td>\n", - " <td>1.982774</td>\n", + " <td>1.982222</td>\n", " <td>9.898990e-01</td>\n", " </tr>\n", " <tr>\n", - " <th>996</th>\n", + " <th>999</th>\n", " <td>ETCUH</td>\n", - " <td>1.993069</td>\n", + " <td>1.992892</td>\n", " <td>9.898990e-01</td>\n", " </tr>\n", " <tr>\n", @@ -3941,24 +2817,24 @@ " </tr>\n", " </tbody>\n", "</table>\n", - "<p>126136 rows × 3 columns</p>\n", + "<p>127500 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " nog_id transfers transfer_threshold\n", - "0 EQRBG 15.669436 1.372267e-15\n", - "1 EQRFZ 19.318403 1.372267e-15\n", - "2 EQRG2 6.139847 1.372267e-15\n", - "3 EQRGC 32.660359 1.372267e-15\n", - "4 EQRGG 17.762175 1.372267e-15\n", + "0 EQRBG 15.700713 1.107008e-15\n", + "1 EQRDS 17.821400 1.107008e-15\n", + "2 EQRFZ 19.090728 1.107008e-15\n", + "3 EQRG2 6.146018 1.107008e-15\n", + "4 EQRGC 32.652265 1.107008e-15\n", ".. ... ... ...\n", - "993 ETCIB 0.999963 9.898990e-01\n", - "994 ETCIZ 0.999233 9.898990e-01\n", - "995 ETCJF 1.982774 9.898990e-01\n", - "996 ETCUH 1.993069 9.898990e-01\n", + "996 ETCI9 3.989736 9.898990e-01\n", + "997 ETCIZ 0.999188 9.898990e-01\n", + "998 ETCJF 1.982222 9.898990e-01\n", + "999 ETCUH 1.992892 9.898990e-01\n", "0 ER67V 1.000000 1.000000e+00\n", "\n", - "[126136 rows x 3 columns]" + "[127500 rows x 3 columns]" ] }, "metadata": {}, @@ -4000,33 +2876,33 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>ET9HP</td>\n", + " <td>ERJME</td>\n", " <td>1896966</td>\n", - " <td>0.000041</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>ERPXQ</td>\n", + " <td>ESZHK</td>\n", " <td>1896966</td>\n", - " <td>0.000055</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>ET34D</td>\n", + " <td>ERZ77</td>\n", " <td>1896966</td>\n", - " <td>0.000041</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>ESD2G</td>\n", + " <td>ESRQ3</td>\n", " <td>1896966</td>\n", - " <td>0.086637</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>ERNQY</td>\n", + " <td>ESD2G</td>\n", " <td>1896966</td>\n", - " <td>0.022795</td>\n", + " <td>0.068212</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", @@ -4035,55 +2911,55 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>920771</th>\n", + " <th>930795</th>\n", " <td>ER3QB</td>\n", " <td>N357</td>\n", - " <td>0.000041</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " <tr>\n", - " <th>920772</th>\n", - " <td>ER3P3</td>\n", + " <th>930796</th>\n", + " <td>ER3QZ</td>\n", " <td>N357</td>\n", - " <td>0.000041</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " <tr>\n", - " <th>920773</th>\n", - " <td>ERDWS</td>\n", + " <th>930797</th>\n", + " <td>ESNNU</td>\n", " <td>N357</td>\n", - " <td>0.000041</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " <tr>\n", - " <th>920774</th>\n", - " <td>ETAWV</td>\n", + " <th>930798</th>\n", + " <td>ER6B2</td>\n", " <td>N357</td>\n", - " <td>0.000041</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " <tr>\n", - " <th>920775</th>\n", - " <td>ET5PB</td>\n", + " <th>930799</th>\n", + " <td>ERVXT</td>\n", " <td>N357</td>\n", - " <td>0.000041</td>\n", + " <td>0.000053</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>832797 rows × 3 columns</p>\n", + "<p>841772 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " nog_id branch losses\n", - "0 ET9HP 1896966 0.000041\n", - "1 ERPXQ 1896966 0.000055\n", - "2 ET34D 1896966 0.000041\n", - "3 ESD2G 1896966 0.086637\n", - "4 ERNQY 1896966 0.022795\n", + "0 ERJME 1896966 0.000053\n", + "1 ESZHK 1896966 0.000053\n", + "2 ERZ77 1896966 0.000053\n", + "3 ESRQ3 1896966 0.000053\n", + "4 ESD2G 1896966 0.068212\n", "... ... ... ...\n", - "920771 ER3QB N357 0.000041\n", - "920772 ER3P3 N357 0.000041\n", - "920773 ERDWS N357 0.000041\n", - "920774 ETAWV N357 0.000041\n", - "920775 ET5PB N357 0.000041\n", + "930795 ER3QB N357 0.000053\n", + "930796 ER3QZ N357 0.000053\n", + "930797 ESNNU N357 0.000053\n", + "930798 ER6B2 N357 0.000053\n", + "930799 ERVXT N357 0.000053\n", "\n", - "[832797 rows x 3 columns]" + "[841772 rows x 3 columns]" ] }, "metadata": {}, @@ -4098,15 +2974,15 @@ "# Display and write out the dataframes to TSV files\n", "print(\"Count_ML NOGwise branchwise transfers:\")\n", "display(count_ml_nw_bw_gains_df)\n", - "count_ml_nw_bw_gains_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.branchwise.count.ml.tsv\", index=False, header=True, sep='\\t')\n", + "count_ml_nw_bw_gains_df.to_csv(f\"{res_dir}/count_ml/compiled_transfers.nogwise.branchwise.count.ml.tsv\", index=False, header=True, sep='\\t')\n", "\n", "print(\"Count ML NOGwise gains:\")\n", "display(count_ml_nogwise_gains_df)\n", - "count_ml_nogwise_gains_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.count.ml.tsv\", index=False, header=True, sep='\\t')\n", + "count_ml_nogwise_gains_df.to_csv(f\"{res_dir}/count_ml/compiled_transfers.nogwise.count.ml.tsv\", index=False, header=True, sep='\\t')\n", "\n", "print(\"Count_ML NOGwise branchwise losses:\")\n", "display(count_ml_nw_bw_losses_df)\n", - "count_ml_nw_bw_losses_df.to_csv(f\"{res_dir}/compiled_losses.nogwise.branchwise.count.ml.tsv\", index=False, header=True, sep='\\t')" + "count_ml_nw_bw_losses_df.to_csv(f\"{res_dir}/count_ml/compiled_losses.nogwise.branchwise.count.ml.tsv\", index=False, header=True, sep='\\t')" ] }, { @@ -4118,7 +2994,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -4126,17 +3002,17 @@ "output_type": "stream", "text": [ "Gene to NOG map for some genes:\n", - "1005058.UMN179_02117: ET9HP\n", - "1006000.GKAS_03119: ET9HP\n", - "1051646.IX91_13535: ET9HP\n", - "1122206.SAMN02745753_03616: ET9HP\n", - "1122207.MUS1_13180: ET9HP\n" + "1051646.IX91_06790: ERJME\n", + "1051646.IX91_17690: ERJME\n", + "1076588.TBH_C2789: ERJME\n", + "1127673.GLIP_2986: ERJME\n", + "1177154.Y5S_01274: ERJME\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4c129d3a7d1c480fba5464b0a2055980", + "model_id": "a0e116e1b7ec4cf2b9d905e4c5ea9951", "version_major": 2, "version_minor": 0 }, @@ -4183,32 +3059,32 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>EQRVP</td>\n", + " <td>EQSTU</td>\n", " <td>1</td>\n", " <td>13.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>EQSTU</td>\n", + " <td>EQTSP</td>\n", " <td>1</td>\n", " <td>13.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>EQTSP</td>\n", + " <td>EQUN1</td>\n", " <td>1</td>\n", " <td>13.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>EQUN1</td>\n", - " <td>1</td>\n", + " <td>EQUY3</td>\n", + " <td>2</td>\n", " <td>13.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>EQUY3</td>\n", - " <td>2</td>\n", + " <td>EQUY4</td>\n", + " <td>1</td>\n", " <td>13.0</td>\n", " </tr>\n", " <tr>\n", @@ -4218,55 +3094,55 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>957</th>\n", - " <td>ETC37</td>\n", - " <td>1</td>\n", - " <td>4.0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>958</th>\n", + " <th>965</th>\n", " <td>ETCDP</td>\n", - " <td>33</td>\n", + " <td>32</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", - " <th>959</th>\n", + " <th>966</th>\n", " <td>ETCEN</td>\n", - " <td>43</td>\n", + " <td>41</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", - " <th>960</th>\n", + " <th>967</th>\n", " <td>ETCI9</td>\n", " <td>5</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", - " <th>961</th>\n", + " <th>968</th>\n", " <td>ETCIZ</td>\n", - " <td>30</td>\n", + " <td>27</td>\n", + " <td>4.0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>969</th>\n", + " <td>ETCUH</td>\n", + " <td>1</td>\n", " <td>4.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>3689 rows × 3 columns</p>\n", + "<p>3725 rows × 3 columns</p>\n", "</div>" ], "text/plain": [ " nog_id transfers transfer_threshold\n", - "0 EQRVP 1 13.0\n", - "1 EQSTU 1 13.0\n", - "2 EQTSP 1 13.0\n", - "3 EQUN1 1 13.0\n", - "4 EQUY3 2 13.0\n", + "0 EQSTU 1 13.0\n", + "1 EQTSP 1 13.0\n", + "2 EQUN1 1 13.0\n", + "3 EQUY3 2 13.0\n", + "4 EQUY4 1 13.0\n", ".. ... ... ...\n", - "957 ETC37 1 4.0\n", - "958 ETCDP 33 4.0\n", - "959 ETCEN 43 4.0\n", - "960 ETCI9 5 4.0\n", - "961 ETCIZ 30 4.0\n", + "965 ETCDP 32 4.0\n", + "966 ETCEN 41 4.0\n", + "967 ETCI9 5 4.0\n", + "968 ETCIZ 27 4.0\n", + "969 ETCUH 1 4.0\n", "\n", - "[3689 rows x 3 columns]" + "[3725 rows x 3 columns]" ] }, "metadata": {}, @@ -4309,37 +3185,37 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>EQRFZ</td>\n", + " <td>EQRDS</td>\n", " <td>unknown</td>\n", - " <td>966</td>\n", - " <td>6.0</td>\n", + " <td>585056</td>\n", + " <td>4.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>EQRFZ</td>\n", " <td>unknown</td>\n", - " <td>28173</td>\n", - " <td>5.0</td>\n", + " <td>966</td>\n", + " <td>6.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>EQRFZ</td>\n", " <td>unknown</td>\n", - " <td>43263</td>\n", + " <td>28173</td>\n", " <td>5.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>EQRFZ</td>\n", " <td>unknown</td>\n", - " <td>87626</td>\n", + " <td>43263</td>\n", " <td>5.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>EQRFZ</td>\n", " <td>unknown</td>\n", - " <td>105559</td>\n", + " <td>69222</td>\n", " <td>5.0</td>\n", " </tr>\n", " <tr>\n", @@ -4350,60 +3226,60 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>16844</th>\n", + " <th>16695</th>\n", " <td>ETCIZ</td>\n", " <td>unknown</td>\n", - " <td>1232683</td>\n", + " <td>1134474</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", - " <th>16845</th>\n", + " <th>16696</th>\n", " <td>ETCIZ</td>\n", " <td>unknown</td>\n", " <td>1245471</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", - " <th>16846</th>\n", + " <th>16697</th>\n", " <td>ETCIZ</td>\n", " <td>unknown</td>\n", " <td>1354303</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", - " <th>16847</th>\n", + " <th>16698</th>\n", " <td>ETCIZ</td>\n", " <td>unknown</td>\n", " <td>1392540</td>\n", - " <td>4.0</td>\n", + " <td>5.0</td>\n", " </tr>\n", " <tr>\n", - " <th>16848</th>\n", - " <td>ETCIZ</td>\n", + " <th>16699</th>\n", + " <td>ETCUH</td>\n", " <td>unknown</td>\n", - " <td>1799789</td>\n", + " <td>314608</td>\n", " <td>4.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>16849 rows × 4 columns</p>\n", + "<p>16700 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " nog_id source_branch recipient_branch transfers\n", - "0 EQRFZ unknown 966 6.0\n", - "1 EQRFZ unknown 28173 5.0\n", - "2 EQRFZ unknown 43263 5.0\n", - "3 EQRFZ unknown 87626 5.0\n", - "4 EQRFZ unknown 105559 5.0\n", + "0 EQRDS unknown 585056 4.0\n", + "1 EQRFZ unknown 966 6.0\n", + "2 EQRFZ unknown 28173 5.0\n", + "3 EQRFZ unknown 43263 5.0\n", + "4 EQRFZ unknown 69222 5.0\n", "... ... ... ... ...\n", - "16844 ETCIZ unknown 1232683 4.0\n", - "16845 ETCIZ unknown 1245471 4.0\n", - "16846 ETCIZ unknown 1354303 4.0\n", - "16847 ETCIZ unknown 1392540 4.0\n", - "16848 ETCIZ unknown 1799789 4.0\n", + "16695 ETCIZ unknown 1134474 4.0\n", + "16696 ETCIZ unknown 1245471 4.0\n", + "16697 ETCIZ unknown 1354303 4.0\n", + "16698 ETCIZ unknown 1392540 5.0\n", + "16699 ETCUH unknown 314608 4.0\n", "\n", - "[16849 rows x 4 columns]" + "[16700 rows x 4 columns]" ] }, "metadata": {}, @@ -4421,8 +3297,8 @@ "print(\"Wn NOGwise branchwise HGTs:\")\n", "display(wn_nogwise_branchwise_hgt_df)\n", "\n", - "wn_nogwise_hgt_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.wn.tsv\", index=False, header=True, sep='\\t')\n", - "wn_nogwise_branchwise_hgt_df.to_csv(f\"{res_dir}/compiled_transfers.nogwise.branchwise.wn.tsv\", index=False, header=True, sep='\\t')" + "wn_nogwise_hgt_df.to_csv(f\"{res_dir}/wn/compiled_transfers.nogwise.wn.tsv\", index=False, header=True, sep='\\t')\n", + "wn_nogwise_branchwise_hgt_df.to_csv(f\"{res_dir}/wn/compiled_transfers.nogwise.branchwise.wn.tsv\", index=False, header=True, sep='\\t')" ] }, { diff --git a/03-analyse_outputs/lib/compile_count_mp_nw_bw_parallel.sh b/03-analyse_outputs/lib/compile_count_mp_nw_bw_parallel.sh index 9d95b6867c387b958a0fc236d27d07e17b7999f7..7b38247a38e8f928ebec5ca73dba172955187525 100644 --- a/03-analyse_outputs/lib/compile_count_mp_nw_bw_parallel.sh +++ b/03-analyse_outputs/lib/compile_count_mp_nw_bw_parallel.sh @@ -18,7 +18,7 @@ res_dir=$4 input_dir="$program_runs_dir/Count/Count_MP/" echo "Input dir is $input_dir" # output_dir is the directory where all the output files will be written -output_dir="$res_dir/count/" +output_dir="$res_dir/count_mp/" # make the output dir if it doesn't exist mkdir -p $output_dir tree_filepath="$input_tree_filepath" diff --git a/03-analyse_outputs/lib/output_compilation_functions.py b/03-analyse_outputs/lib/output_compilation_functions.py index d32616d97e82da360d0b670952953461329431b6..4c95218045fb04fada4e099b65dabecf039df2cc 100644 --- a/03-analyse_outputs/lib/output_compilation_functions.py +++ b/03-analyse_outputs/lib/output_compilation_functions.py @@ -1,5 +1,6 @@ import os from multiprocessing import Pool +from typing import Union, List, Optional # to suppress warning from ete3 because it's not up to date with py3.12 import warnings @@ -182,7 +183,7 @@ def prepare_nogwise_transfer_thresholds_df(nogwise_transfers_df, threshold_colum def compile_angst_results(output_dir, input_tree_filepath): # for each nog_id/ read in the .events file inside it and store it nogwise_hgt_list = [] - for nog_id in os.listdir(output_dir): + for nog_id in [d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d))]: # read in the AnGST.events file inside it, but only lines that start with "[hgt]: " with open(os.path.join(output_dir, nog_id, "AnGST.events"), "r") as nog_fo: # lines look like `[hgt]: source_branch --> target_branch` @@ -213,6 +214,11 @@ def compile_angst_results(output_dir, input_tree_filepath): # we need to map these to the input tree internal node names input_tree = ete3.Tree(input_tree_filepath, format=1) angst_node_mapping = map_angst_to_input_nodes(input_tree, angst_internal_nodes) + # write this mapping to a tsv file + with open(os.path.join(output_dir, "angst_node_mapping.tsv"), "w") as angst_node_mapping_fo: + angst_node_mapping_fo.write("angst_node\tinput_tree_node\n") + for angst_node, input_tree_node in angst_node_mapping.items(): + angst_node_mapping_fo.write(f"{angst_node}\t{input_tree_node}\n") # now replace in the nogwise df, the names, based on this mapping nogwise_branchwise_df["source_branch"] = nogwise_branchwise_df["source_branch"].map( @@ -259,6 +265,11 @@ def compile_ale_outputs(output_dir, input_tree): ale_tree = ete3.Tree(ale_tree_string, format=1) ale_node_mapping = map_output_to_input_nodes(input_tree, ale_tree, ale=True) + # write this mapping to a tsv file + with open(os.path.join(output_dir, "ale_node_mapping.tsv"), "w") as ale_node_mapping_fo: + ale_node_mapping_fo.write("ale_node\tinput_tree_node\n") + for ale_node, input_tree_node in ale_node_mapping.items(): + ale_node_mapping_fo.write(f"{ale_node}\t{input_tree_node}\n") # now read in all the *uTs files in the output_dir, and process them. # each file contains the columns: 'source_branch', 'recipient_branch', and 'freq' (of transfer). @@ -414,6 +425,11 @@ def compile_ranger_results(ranger_dir, input_tree_filepath): ranger_tree = ete3.Tree(ranger_tree_string, format=1) input_tree = ete3.Tree(input_tree_filepath, format=1) ranger_node_mapping = map_output_to_input_nodes(input_tree, ranger_tree) + # write this mapping to a tsv file + with open(os.path.join(ranger_dir, "ranger_node_mapping.tsv"), "w") as ranger_node_mapping_fo: + ranger_node_mapping_fo.write("ranger_node\tinput_tree_node\n") + for ranger_node, input_tree_node in ranger_node_mapping.items(): + ranger_node_mapping_fo.write(f"{ranger_node}\t{input_tree_node}\n") # now replace in the nogwise_branchwise df, the names, based on this mapping nogwise_branchwise_hgt_df["source_branch"] = nogwise_branchwise_hgt_df[ @@ -679,171 +695,312 @@ def process_count_ml_output(count_ML_output_file: str): return count_ml_gains_df, count_ml_nogwise_gains_df, count_ml_losses_df -def compile_gloome_results( - expectations_df: pd.DataFrame, - gainloss_df: pd.DataFrame, - gloome_node_mapping: dict, - ml_mp: str, - species_tree_bool: bool, +def read_and_compile_gloome_results( + gloome_output_dir: Union[str, List[str]], # can be a list of gloome output dirs pa_matrix_tsv_filepath: str, + ml_mp: str, # either 'ml' or 'mp' + input_tree_filepath: str, ): + """ + Read and compile GLOOME results from the specified output directory. - if ml_mp not in ["ml", "mp"]: + Parameters: + gloome_output_dir: Path to the GLOOME output directory. + This can be a list of gloome output dirs (in case of 'mp' mode) + pa_matrix_tsv_filepath (str): Path to the PA matrix TSV file. + ml_mp (str): Specify whether to use 'ml' or 'mp' for maximum-likelihood or maximum-parsimony. + In case of 'mp', the + input_tree_filepath (str): Path to the input tree file. + This is required if GLOOME was run with a species tree. + If not available, set it as None, + and the function will not map the branch names to the input tree. + + Returns: + dict: A dictionary containing compiled GLOOME results. + The keys are the file names and the values are the corresponding DataFrames. + """ + + # if input_tree_filepath is not None, read in the tree + input_tree = None + if input_tree_filepath is not None: + input_tree = ete3.Tree(input_tree_filepath, format=1) + else: + input_tree = None + # make sure pa_matrix_tsv_filepath is a valid file + if not os.path.isfile(pa_matrix_tsv_filepath): raise ValueError( - "ml_mp must be either 'ml' or 'mp', for maximum-likelihood or maximum-parsimony" + f"PA matrix TSV file {pa_matrix_tsv_filepath} does not exist. Please provide a valid file." ) - # if species_tree_bool is True, prepare branchwise file and nogwise.branchwise files also, apart from nogwise file - if not species_tree_bool: - # if species_tree_bool is False, we don't have a species tree comparable across methods - # so we keep the terminal branches only (internal branch labels start with 'N') - gainloss_df = gainloss_df[gainloss_df["branch"].str.startswith("N") == False] - expectations_df = expectations_df[ - expectations_df["branch"].str.startswith("N") == False - ] - - # prepare nogwise file - # Replace the branch names with the input tree branch names if species_tree_bool is True - if species_tree_bool: - gainloss_df = gainloss_df.rename(columns={"branch": "gloome_branch_name"}) - gainloss_df.loc[:, "recipient_branch"] = gainloss_df["gloome_branch_name"].map( - gloome_node_mapping + # if it's mp, make sure gloome_output_dir is a list of dirs + if ml_mp == "mp": + if isinstance(gloome_output_dir, str): + raise ValueError( + "For maximum-parsimony mode, gloome_output_dir must be a list of directories." + "These correspond to the different gain penalty ratios used in GLOOME." + ) + # if gloome_output_dir is a list, make sure it has at least 2 elements + if len(gloome_output_dir) < 2: + raise ValueError( + "For maximum-parsimony mode, gloome_output_dir must be a list of directories." + "These correspond to the different gain penalty ratios used in GLOOME." + ) + gloome_results_dict = read_and_compile_mp_gloome_results( + gloome_output_dir, pa_matrix_tsv_filepath, input_tree + ) + elif ml_mp == "ml": + if isinstance(gloome_output_dir, list): + raise ValueError( + "For maximum-likelihood mode, gloome_output_dir must be a single directory." + ) + gloome_results_dict = read_and_compile_ml_gloome_results( + gloome_output_dir, pa_matrix_tsv_filepath, input_tree ) else: - gainloss_df.rename(columns={"branch": "recipient_branch"}, inplace=True) - gainloss_df["source_branch"] = "unknown" + raise ValueError( + "ml_mp must be either 'ml' or 'mp', for maximum-likelihood or maximum-parsimony" + ) + + # return the dictionary + return gloome_results_dict + + +def read_and_compile_mp_gloome_results( + gloome_output_dirs: List[str], + pa_matrix_tsv_filepath: str, + input_tree: Optional[ete3.Tree] = None, +): + """ + Read and compile GLOOME results for maximum-parsimony mode. - # we need to replace the POS column data with NOG-IDs. - # First read in the tsv file of the PA matrix we created for Count (since the matrix was the same for GLOOME also) - # find the _PA_matrix.tsv file in the Count dir, using the gloome_output_dir string + Parameters: + gloome_output_dirs (List[str]): List of paths to GLOOME output directories. + pa_matrix_tsv_filepath (str): Path to the PA matrix TSV file. + input_tree (ete3.Tree): ETE3 Tree object representing the input tree. + Returns: + dict: A dictionary containing compiled GLOOME results. + The keys are the file names and the values are the corresponding DataFrames. + """ + # read in the PA matrix TSV file pa_matrix_df = pd.read_csv(pa_matrix_tsv_filepath, sep="\t") - # the first column here contains the NOGs. The row number of the NOG corresponds to the POS IDs in the gainloss_df # create a dict of row number to NOG IDs in pa_matrix_df pos_nog_dict = {i + 1: nog for i, nog in enumerate(pa_matrix_df.iloc[:, 0])} - # now use the dict to replace POS column with NOG IDs - gainloss_df["POS"] = gainloss_df["POS"].map(pos_nog_dict) - # rename the POS column to nog_id and 'expectation' column to 'transfers' - gainloss_df = gainloss_df.rename( - columns={ - "POS": "nog_id", - "expectation": "transfers", - "probability": "transfer_threshold", - } - ) - # retain only the columns 'source_branch', 'recipient_branch', 'transfers', 'nog_id', 'gloome_branch_name' and 'G/L' in that order - column_names = [ - "nog_id", - "source_branch", - "recipient_branch", - "transfers", - "transfer_threshold", - "G/L", - ] - # retain 'gloome_branch_name' only if species_tree_bool is True - if species_tree_bool: - column_names.insert(-1, "gloome_branch_name") - gainloss_df = gainloss_df[column_names] - nw_bw_gains_df = gainloss_df[gainloss_df["G/L"] == "gain"][column_names[:-1]] - - # groupby nog_id and branch and sum the transfers to get nogwise transfers. - # set up 'transfer_threshold' column - nw_gains_df = prepare_nogwise_transfer_thresholds_df(nw_bw_gains_df) - - # do the same for a nw_bw_losses_df - nw_bw_losses_df = gainloss_df[gainloss_df["G/L"] == "loss"][column_names[1:-1]] - # rename transfers to losses - nw_bw_losses_df = nw_bw_losses_df.rename( - columns={"recipient_branch": "branch", "transfers": "losses"} - ) + # create a dict to store the compiled results + gloome_results_dict = {} + nogwise_gain_dfs = [] + nogwise_branchwise_gain_dfs = {} - return { - f"compiled_transfers.nogwise.gloome.{ml_mp}": nw_gains_df, - f"compiled_transfers.nogwise.branchwise.gloome.{ml_mp}": nw_bw_gains_df, - f"compiled_losses.nogwise.branchwise.gloome.{ml_mp}": nw_bw_losses_df, - } - - -def read_and_compile_gloome_results( + # process each gloome output dir with corresponding gain penalty ratio + for gloome_output_dir in gloome_output_dirs: + gloome_tree = ete3.Tree( + os.path.join(gloome_output_dir, "TheTree.INodes.ph"), format=1 + ) + if input_tree is not None: + # if input tree is provided, map the branch names to the input tree + gloome_node_mapping = map_output_to_input_nodes(input_tree, gloome_tree) + # write this mapping to a tsv file + with open( + os.path.join(gloome_output_dir, "gloome_mp_node_mapping.tsv"), + "w", + ) as gloome_node_mapping_fo: + gloome_node_mapping_fo.write("gloome_node\tinput_tree_node\n") + for gloome_node, input_tree_node in gloome_node_mapping.items(): + gloome_node_mapping_fo.write( + f"{gloome_node}\t{input_tree_node}\n" + ) + else: + gloome_node_mapping = {} + # read in the per-position-per-branch expectation files + all_res_files = [ + f + for f in os.listdir(gloome_output_dir) + if f.startswith("gainLossMP") and f.endswith(".txt") + ] + per_pos_per_branch_expectations_file_path = [ + os.path.join(gloome_output_dir, f) + for f in all_res_files + if f.endswith(".PerPosPerBranch.txt") + ][0] + # read in the file: skip commented (#) lines + per_pos_per_branch_expectations_df = pd.read_csv( + per_pos_per_branch_expectations_file_path, comment="#", sep="\t" + ).rename(columns={"branch": "gloome_branch_name"}) + # if input tree is provided, map the gloome branch names to the input tree branch names + if input_tree is not None: + per_pos_per_branch_expectations_df["recipient_branch"] = ( + per_pos_per_branch_expectations_df["gloome_branch_name"] + .map(gloome_node_mapping) + .astype(str) + ) + else: + per_pos_per_branch_expectations_df["recipient_branch"] = ( + per_pos_per_branch_expectations_df["gloome_branch_name"] + ) + # use the pos_nog_dict to replace the POS column with NOG IDs + per_pos_per_branch_expectations_df["POS"] = per_pos_per_branch_expectations_df[ + "POS" + ].map(pos_nog_dict) + # rename the POS column to nog_id and exp01 column to transfers + per_pos_per_branch_expectations_df.rename( + columns={ + "POS": "nog_id", + "expectation": "transfers", + }, + inplace=True, + ) + # retain only rows where G/L is gain + per_pos_per_branch_expectations_df = per_pos_per_branch_expectations_df[ + per_pos_per_branch_expectations_df["G/L"] == "gain" + ] + # add a source_branch column + per_pos_per_branch_expectations_df["source_branch"] = "unknown" + # the transfer threshold column is the gain penalty ratio + # this is contained in the 3rd line of the per_pos_per_branch_expectations_file + # read the file again to get the gain penalty ratio + with open( + per_pos_per_branch_expectations_file_path, "r" + ) as f: + gain_penalty_ratio = f.readlines()[2].strip().split("=")[1].strip() + # add the gain penalty ratio to the df + per_pos_per_branch_expectations_df["transfer_threshold"] = gain_penalty_ratio + # retain only the columns nog_id, source_branch, recipient_branch, gloome_branch_name, transfers, transfer_threshold + nogwise_branchwise_gains_df = per_pos_per_branch_expectations_df[ + [ + "nog_id", + "source_branch", + "recipient_branch", + "gloome_branch_name", + "transfers", + "transfer_threshold", + ] + ] + # add this df to the list + nogwise_branchwise_gain_dfs[gain_penalty_ratio] = nogwise_branchwise_gains_df + + # group by nog_id and sum the transfers, + nogwise_gains_df = nogwise_branchwise_gains_df.copy() + nogwise_gains_df = nogwise_gains_df.groupby("nog_id").sum().reset_index() + # add the transfer_threshold column to the nogwise_gains_df + nogwise_gains_df["transfer_threshold"] = gain_penalty_ratio + nogwise_gains_df = nogwise_gains_df[ + ["nog_id", "transfers", "transfer_threshold"] + ] + # add the nogwise_gains_df to the list + nogwise_gain_dfs.append(nogwise_gains_df) + + # concatenate the nogwise_gain_dfs + nogwise_gains_df = pd.concat(nogwise_gain_dfs, ignore_index=True) + gloome_results_dict[ + f"compiled_transfers.nogwise.gloome.mp."] = nogwise_gains_df + + # for nogwise branchwise, we create a file for each gain penalty ratio + for gain_penalty_ratio, nogwise_branchwise_gains_df in nogwise_branchwise_gain_dfs.items(): + gloome_results_dict[ + f"compiled_transfers.nogwise.branchwise.gloome.mp.{gain_penalty_ratio}." + ] = nogwise_branchwise_gains_df + # return the dictionary + return gloome_results_dict + + +def read_and_compile_ml_gloome_results( gloome_output_dir: str, - input_tree: ete3.Tree, - species_tree_bool: bool, # if species tree was given as input or not pa_matrix_tsv_filepath: str, + input_tree: Optional[ete3.Tree] = None, ): - # if species_tree_bool is True, there was a tree given to GLOOME as input, - # so we can find branchwise transfers and compare it to other programs like ALE, Ranger, etc. - # But if it's not available, branchwise transfers are not comparable, so we don't need to write out those files - - all_gloome_results_dict = {} - - gloome_tree = ete3.Tree( - os.path.join(gloome_output_dir, "TheTree.INodes.ph"), format=1 - ) - if species_tree_bool: - # if species tree is available, we can map the internal node names of the GLOOME tree to the input tree - gloome_node_mapping = map_output_to_input_nodes(input_tree, gloome_tree) - else: - # if species tree is not available, it's just the terminal branches that we care about so we don't need to map anything - gloome_node_mapping = {} + """ + Read and compile GLOOME results for maximum-likelihood mode. - # first we process the ML results, and then the MP results - # for each of them, we read and prepare expectations_df and probabilties_df - # case ML: branchwise expectations are from "ExpectationPerBranch.txt" and probabilities are from "gainLossProbExpPerPosPerBranch.txt" - # case MP: branchwise expectations are from "gainLossMP.1.PerBranch.txt" and probabilities are from "gainLossMP.1.PerPosPerBranch.txt" + Parameters: + gloome_output_dir (str): Path to the GLOOME output directory. + pa_matrix_tsv_filepath (str): Path to the PA matrix TSV file. + input_tree (ete3.Tree): ETE3 Tree object representing the input tree. - # case ML: branchwise expectations - ml_expectations_file_path = os.path.join( - gloome_output_dir, "ExpectationPerBranch.txt" - ) - ml_expectations_df = pd.read_csv(ml_expectations_file_path, skiprows=[0], sep="\t") + Returns: + dict: A dictionary containing compiled GLOOME results. + The keys are the file names and the values are the corresponding DataFrames. + """ + # read in the PA matrix TSV file + pa_matrix_df = pd.read_csv(pa_matrix_tsv_filepath, sep="\t") + # create a dict of row number to NOG IDs in pa_matrix_df + pos_nog_dict = {i + 1: nog for i, nog in enumerate(pa_matrix_df.iloc[:, 0])} - # case ML: nogwise and nogwise.branchwise expectations - ml_gainloss_file_path = os.path.join( + # read in the per-position-per-branch expectation file + per_pos_per_branch_expectations_file_path = os.path.join( gloome_output_dir, "gainLossProbExpPerPosPerBranch.txt" ) - ml_gainloss_df = pd.read_csv(ml_gainloss_file_path, skiprows=[0], sep="\t") - - # prepare all compiled files for ML - all_gloome_results_dict.update( - compile_gloome_results( - ml_expectations_df, - ml_gainloss_df, - gloome_node_mapping, - "ml", - species_tree_bool, - pa_matrix_tsv_filepath, + # read in the file: skip commented (#) lines + per_pos_per_branch_expectations_df = pd.read_csv( + per_pos_per_branch_expectations_file_path, comment="#", sep="\t" + ).rename(columns={"branch": "gloome_branch_name"}) + # if input tree is provided, map the branch names to the input tree + if input_tree is not None: + gloome_tree = ete3.Tree( + os.path.join(gloome_output_dir, "TheTree.INodes.ph"), format=1 ) - ) - - # case MP: branchwise expectations - mp_expectations_file_path = os.path.join( - gloome_output_dir, "gainLossMP.1.PerBranch.txt" - ) - # this file has 6 rows to skip instead of just one - mp_expectations_df = pd.read_csv( - mp_expectations_file_path, skiprows=list(range(6)), sep="\t" - ) - - # case MP: nogwise and nogwise.branchwise expectations - mp_gainloss_file_path = os.path.join( - gloome_output_dir, "gainLossMP.1.PerPosPerBranch.txt" - ) - # skip first 5 rows and read in the file - mp_gainloss_df = pd.read_csv( - mp_gainloss_file_path, skiprows=list(range(5)), sep="\t" - ) - - # prepare all compiled files for MP - all_gloome_results_dict.update( - compile_gloome_results( - mp_expectations_df, - mp_gainloss_df, - gloome_node_mapping, - "mp", - species_tree_bool, - pa_matrix_tsv_filepath, + gloome_node_mapping = map_output_to_input_nodes(input_tree, gloome_tree) + # write this mapping to a tsv file + with open( + os.path.join(gloome_output_dir, "gloome_ml_node_mapping.tsv"), + "w", + ) as gloome_node_mapping_fo: + gloome_node_mapping_fo.write("gloome_node\tinput_tree_node\n") + for gloome_node, input_tree_node in gloome_node_mapping.items(): + gloome_node_mapping_fo.write( + f"{gloome_node}\t{input_tree_node}\n" + ) + # replace the gloome branch names with the input tree branch names + per_pos_per_branch_expectations_df["recipient_branch"] = ( + per_pos_per_branch_expectations_df["gloome_branch_name"] + .map(gloome_node_mapping) + .astype(str) ) + else: + per_pos_per_branch_expectations_df['recipient_branch'] = per_pos_per_branch_expectations_df[ + "gloome_branch_name" + ] + # use the pos_nog_dict to replace the POS column with NOG IDs + per_pos_per_branch_expectations_df["POS"] = per_pos_per_branch_expectations_df[ + "POS" + ].map(pos_nog_dict) + # rename the POS column to nog_id and expectation column to transfers + per_pos_per_branch_expectations_df.rename( + columns={ + "POS": "nog_id", + "expectation": "transfers", + "probability": "transfer_threshold", + }, + inplace=True, ) + # retain only rows where G/L is gain + per_pos_per_branch_expectations_df = per_pos_per_branch_expectations_df[ + per_pos_per_branch_expectations_df["G/L"] == "gain" + ] + # add a source_branch column + per_pos_per_branch_expectations_df["source_branch"] = "unknown" + # retain only the columns nog_id, source_branch, recipient_branch, gloome_branch_name, transfers, transfer_threshold + nogwise_branchwise_gains_df = per_pos_per_branch_expectations_df[ + [ + "nog_id", + "source_branch", + "recipient_branch", + "gloome_branch_name", + "transfers", + "transfer_threshold", + ] + ] + # add this df to dictionary + gloome_results_dict = { + f"compiled_transfers.nogwise.branchwise.gloome.ml.": nogwise_branchwise_gains_df + } - return all_gloome_results_dict + # concatenate the nogwise_branchwise_gains_df and use prepare_nogwise_transfer_thresholds_df to get nogwise_gains_df + nogwise_gains_df = nogwise_branchwise_gains_df.copy() + # Group by nog_id and sum the transfers + nogwise_gains_df = prepare_nogwise_transfer_thresholds_df(nogwise_gains_df) + # add the nogwise_gains_df to the dictionary + gloome_results_dict[ + f"compiled_transfers.nogwise.gloome.ml."] = nogwise_gains_df + # return the dictionary + return gloome_results_dict