Commit 6cfd931a authored by joweb106's avatar joweb106
Browse files

clusters from seurat

parent 46745d1f
def save_state(vlm, name):
print("save analysis state to",name,"... ", end = "")
vlm.to_hdf5(name) # z.b. "my_velocyto_analysis"
print("Finished")
def set_clusters_from_10x(vlm, path_to_csv):
print("set clusters from 10x reanalyze ... ", end = "")
# read the clusters from the csv file
# and rearange them to fit the cell ordering of the loom file
cluster_data = pd.read_csv(path_to_csv).values
clusters = np.char.mod('%d', cluster_data[:,1]) # elements to strings
names = vlm.ca["CellID"]
#new_names = pd.read_csv("../data/EPDC.SCTransform.integrated_minus9_13_6_CellNames.csv").values[:,0]
mask = np.array([e in names for e in cluster_data[:,0]])
"""
names_cl = np.array(["52_NGS_MI"+n.split("-")[1]+"_EPDC:"+n.split("-")[0]+"x" for n in cluster_data[:,0]])
ind = np.array([np.where(names_cl == n) for n in names])
clusters = clusters[ind].reshape((clusters.shape[0]))
"""
ind = np.array([np.where(cluster_data[:,0][mask] == n) for n in names])
clusters = clusters[mask]
clusters = clusters[ind].reshape((clusters.shape[0]))
# colors of the clusters
colors_dict = {'1':np.array([137, 34, 81]),
'2':np.array([130, 162, 209]),
'3':np.array([224, 139, 183]),
'4':np.array([52, 127, 184]),
'5':np.array([150, 81, 156]),
'6':np.array([224, 139, 104]),
'7':np.array([96, 97, 96]),
'8':np.array([107, 194, 165]),
'9':np.array([225,34,46]),
'10':np.array([190,190,190]),
'11':np.array([37,139,72]),
'12':np.array([249,214,64]),
'13':np.array([164,87,44]),
'14':np.array([164,209,235]),
'15':np.array([223,127,49]),
'16':np.array([164,187,44]),
'17':np.array([14,87,15]),
'18':np.array([164,187,144]),}
colors_dict = {k:v/256 for k, v in colors_dict.items()}
vlm.set_clusters(clusters, cluster_colors_dict=colors_dict)
print("Finished")
def plot_fractions(vlm, fname = "../plots/fractions.png"):
print("plot",fname,"...", end="")
vlm.plot_fractions(fname)
print("Finished")
#vlm.plot_fractions("test.png")
#plt.savefig("test2.png")
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import velocyto as vcy\n",
"\n",
"vlm = vcy.VelocytoLoom(\"../data/52_NGS_EPDC_merged.loom\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vlm.plot_fractions()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'CellID': array(['52_NGS_MI1_EPDC:AAAGATGCACTCGACGx',\n",
" '52_NGS_MI1_EPDC:AAACGGGCACCGTTGGx',\n",
" '52_NGS_MI1_EPDC:AAACCTGAGTGCAAGCx', ...,\n",
" '52_NGS_MI3_EPDC:TTGCGTCCAGAGTGTGx',\n",
" '52_NGS_MI3_EPDC:TTTCCTCCACACCGACx',\n",
" '52_NGS_MI3_EPDC:TTCTACAGTAAAGTCAx'], dtype=object)}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vlm.ca"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vlm.filter_cells(bool_array=vlm.initial_Ucell_size > \n",
" np.percentile(vlm.initial_Ucell_size, 0.5))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
%% Cell type:code id: tags:
``` python
import numpy as np
import velocyto as vcy
vlm = vcy.VelocytoLoom("../data/52_NGS_EPDC_merged.loom")
```
%% Cell type:code id: tags:
``` python
vlm.plot_fractions()
```
%% Cell type:code id: tags:
``` python
vlm.ca
```
%%%% Output: execute_result
{'CellID': array(['52_NGS_MI1_EPDC:AAAGATGCACTCGACGx',
'52_NGS_MI1_EPDC:AAACGGGCACCGTTGGx',
'52_NGS_MI1_EPDC:AAACCTGAGTGCAAGCx', ...,
'52_NGS_MI3_EPDC:TTGCGTCCAGAGTGTGx',
'52_NGS_MI3_EPDC:TTTCCTCCACACCGACx',
'52_NGS_MI3_EPDC:TTCTACAGTAAAGTCAx'], dtype=object)}
%% Cell type:code id: tags:
``` python
vlm.filter_cells(bool_array=vlm.initial_Ucell_size >
np.percentile(vlm.initial_Ucell_size, 0.5))
```
%% Cell type:code id: tags:
``` python
```
......@@ -18,39 +18,22 @@ def load_data(fname = "../data/52_NGS_EPDC_merged.loom", ftype="loom"):
print("Finished")
return vlm
def save_state(vlm, name):
print("save analysis state to",name,"... ", end = "")
vlm.to_hdf5(name) # z.b. "my_velocyto_analysis"
print("Finished")
def set_clusters_from_10x(vlm, path_to_csv):
print("set clusters from 10x reanalyze ... ", end = "")
# read the clusters from the csv file
# and rearange them to fit the cell ordering of the loom file
cluster_data = pd.read_csv(path_to_csv).values
clusters = np.char.mod('%d', cluster_data[:,1]) # elements to strings
names = vlm.ca["CellID"]
def set_clusters_from_seurat(vlm, fname):
infile = open(fname,"r")
infile.readline()
#new_names = pd.read_csv("../data/EPDC.SCTransform.integrated_minus9_13_6_CellNames.csv").values[:,0]
mask = np.array([e in names for e in cluster_data[:,0]])
"""
names_cl = np.array(["52_NGS_MI"+n.split("-")[1]+"_EPDC:"+n.split("-")[0]+"x" for n in cluster_data[:,0]])
ind = np.array([np.where(names_cl == n) for n in names])
clusters = clusters[ind].reshape((clusters.shape[0]))
"""
ind = np.array([np.where(cluster_data[:,0][mask] == n) for n in names])
clusters = clusters[mask]
clusters = clusters[ind].reshape((clusters.shape[0]))
# colors of the clusters
colors_dict = {'1':np.array([137, 34, 81]),
clusters = []
for line in infile:
line = line.replace("\"", "").strip()
line = line.split(",")[1]
line = line.split("_")
if len(line) == 2:
clusters.append(line[1])
else:
clusters.append("13")
colors_dict = {'0':np.array([14,87,15]),
'1':np.array([137, 34, 81]),
'2':np.array([130, 162, 209]),
'3':np.array([224, 139, 183]),
'4':np.array([52, 127, 184]),
......@@ -62,22 +45,12 @@ def set_clusters_from_10x(vlm, path_to_csv):
'10':np.array([190,190,190]),
'11':np.array([37,139,72]),
'12':np.array([249,214,64]),
'13':np.array([164,87,44]),
'14':np.array([164,209,235]),
'15':np.array([223,127,49]),
'16':np.array([164,187,44]),
'17':np.array([14,87,15]),
'18':np.array([164,187,144]),}
'13':np.array([164,87,44]),}
colors_dict = {k:v/256 for k, v in colors_dict.items()}
vlm.set_clusters(clusters, cluster_colors_dict=colors_dict)
print("Finished")
def plot_fractions(vlm, fname = "../plots/fractions.png"):
print("plot",fname,"...", end="")
vlm.plot_fractions(fname)
print("Finished")
vlm.set_clusters(np.array(clusters), cluster_colors_dict=colors_dict)
def normalize(vlm):
print("normalize S ... ", end = "")
......@@ -164,10 +137,9 @@ def print_time(start):
total_start = time.time()
vlm = load_data(fname="../data/52_NGS_EPDC_merged_filtered_renamed.loom") #load loom file
#vlm.plot_fractions("test.png")
#plt.savefig("test2.png")
set_clusters_from_10x(vlm, "../data/52_NGS_EPDC_reanalyze/outs/analysis/clustering/graphclust/clusters.csv")
set_clusters_from_seurat(vlm, "../data/Seurat_data/clusters.csv")
#set_clusters_from_10x(vlm, "../data/52_NGS_EPDC_reanalyze/outs/analysis/clustering/graphclust/clusters.csv")
filter_data(vlm, verbose = True)
......@@ -182,9 +154,9 @@ vlm.perform_PCA()
print("Finished")
print_time(start)
#print("Plot PCA ... ", end = "")
#plot_PCA(vlm, '../plots/PCA.png')
#print("Finished")
print("Plot PCA ... ", end = "")
plot_PCA(vlm, '../plots/PCA.png')
print("Finished")
print("Do knn pooling ... ", end = "")
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
install.packages('devtools')
install.packages('Seurat')
library(Seurat)
install.packages("caTools")
devtools::install_github(repo = 'satijalab/seurat', ref = 'develop')
load("E:\\Uni\\Projektarbeit\\Datentransfer_klau\\EPDC.SCTransform.integrated_minus9_13_6_for_velocyto.Robj")
# rename
e <- EPDC.SCTransform.integrated_minus9_13_6
# cellnames
print(e@assays$spliced@counts@Dimnames)
write.csv(e@assays$spliced@counts@Dimnames[1], file="Seurat_data_saved/genenames.csv")
write.csv(e@assays$spliced@counts@Dimnames[2], file="Seurat_data_saved/cellnames.csv")
# clusters
print(e$celltype_clusters)
write.csv(e$celltype_clusters, file="Seurat_data_saved/clusters.csv")
# pca
print(e@reductions$pca@cell.embeddings)
write.csv(e@reductions$pca@cell.embeddings, file="Seurat_data_saved/pca.csv")
# umap (use as tsne?)
print(e@reductions$umap@cell.embeddings)
write.csv(e@reductions$umap@cell.embeddings, file="Seurat_data_saved/umap.csv")
This diff is collapsed.
plots/TSNE.png

261 KB | W: | H:

plots/TSNE.png

251 KB | W: | H:

plots/TSNE.png
plots/TSNE.png
plots/TSNE.png
plots/TSNE.png
  • 2-up
  • Swipe
  • Onion skin
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment