admin管理员组文章数量:1027628
内容复习
作者,Evil Genius
今天我们复习,banksy。
随着经验积累的不断增多,回头看一些方法又会有不一样的理解。
我们分享了很多邻域的分析,主要集中在细胞的细胞邻域,而banksy主要集中在细胞的分子邻域。
就是一个细胞周围环境细胞的基因表达的平均值,作为该细胞的特征进行聚类分析。
针对的平台包括MERFISH, CosMX, CODEX,Xenium等,但是随着panel数量的增多,banksy也慢慢退出舞台了。说明其核心是一种数据增强的方法。
其中Seurat的HD教程也采用了banksy(8um)分析,说明HD本身数据信息不够,要进行一定程度的数据增强。
核心原理如下
核心就是考虑邻域的分子环境。数据增强之后对于本身的聚类分析是有好处的。
其中最核心的地方就是,依据分子邻域特征区分细胞亚型。
最后看看代码,我们现在基本都要转向Python版本了。
代码语言:javascript代码运行次数:0运行复制import os
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import scanpy as sc
sc.logging.print_header()
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.settings.verbosity = 1 # errors (0), warnings (1), info (2), hints (3)
import random
# Note that BANKSY itself is deterministic, here the seeds affect the umap clusters and leiden partition
seed = 0
np.random.seed(seed)
random.seed(seed)
# %%
# Define File paths
file_path = os.path.join("data", "slide_seq", "v1")
gcm_filename = "Cerebellum_MappedDGEForR.csv"
# (Optional) Arguments for load_data only if annadata is not present
locations_filename = "locations.csv"
adata_filename = "spatial.h5ad"
from banksy_utils.load_data import load_adata
# To either load data from .h5ad directly or convert raw data to .h5ad format
load_adata_directly = True
# Keys to specify coordinate indexes in the anndata Object
coord_keys = ('xcoord', 'ycoord', 'coord_xy')
raw_y, raw_x, adata = load_adata(file_path,
load_adata_directly,
adata_filename,
gcm_filename,
locations_filename,
coord_keys)
# %%
adata.var_names_make_unique()
adata.var["mt"] = adata.var_names.str.startswith("MT-")
# Calulates QC metrics and put them in place to the adata object
sc.pp.calculate_qc_metrics(adata,
qc_vars=["mt"],
log1p=True,
inplace=True)
# %%
from banksy_utils.filter_utils import filter_cells
# Filter cells with each respective filters
adata = filter_cells(adata,
min_count=40,
max_count=1000,
MT_filter=20,
gene_filter=10)
# %%
from banksy_utils.filter_utils import normalize_total, filter_hvg
# Normalizes the anndata dataset
adata = normalize_total(adata)
# %%
adata, adata_allgenes = filter_hvg(adata,
n_top_genes = 2000,
flavor="seurat")
# %%
from banksy.main import median_dist_to_nearest_neighbour
# set params
# ==========
plot_graph_weights = True
k_geom = 15 # only for fixed type
max_m = 1 # azumithal transform up to kth order
nbr_weight_decay = "scaled_gaussian" # can also be "reciprocal", "uniform" or "ranked"
# Find median distance to closest neighbours, the median distance will be `sigma`
nbrs = median_dist_to_nearest_neighbour(adata, key = coord_keys[2])
from banksy.initialize_banksy import initialize_banksy
banksy_dict = initialize_banksy(adata,
coord_keys,
k_geom,
nbr_weight_decay = nbr_weight_decay,
max_m = max_m,
plt_edge_hist = True,
plt_nbr_weights = True,
plt_agf_angles = False
)
from banksy.main import concatenate_all
from banksy.embed_banksy import generate_banksy_matrix
# The following are the main hyperparameters for BANKSY
resolutions = [0.7] # clustering resolution for UMAP
pca_dims = [20] # Dimensionality in which PCA reduces to
lambda_list = [0.2] # list of lambda parameters
banksy_dict, banksy_matrix = generate_banksy_matrix(adata,
banksy_dict,
lambda_list,
max_m)
banksy_dict["nonspatial"] = {
# Here we simply append the nonspatial matrix (adata.X) to obtain the nonspatial clustering results
0.0: {"adata": concatenate_all([adata.X], 0, adata=adata), }
}
print(banksy_dict['nonspatial'][0.0]['adata'])
from banksy_utils.umap_pca import pca_umap
pca_umap(banksy_dict,
pca_dims = pca_dims,
add_umap = True
)
from banksy.cluster_methods import run_Leiden_partition
results_df, max_num_labels = run_Leiden_partition(
banksy_dict,
resolutions,
num_nn = 50,
num_iterations = -1,
partition_seed = 1234,
match_labels = True,
)
from banksy.plot_banksy import plot_results
c_map = 'tab20' # specify color map
weights_graph = banksy_dict['scaled_gaussian']['weights'][1]
plot_results(
results_df,
weights_graph,
c_map,
match_labels = True,
coord_keys = coord_keys,
max_num_labels = max_num_labels,
save_path = os.path.join(file_path, 'tmp_png'),
save_fig = False, # Save Spatial Plot Only
save_fullfig = True # Save Full Plot
)
生活很好,有你更好
内容复习
作者,Evil Genius
今天我们复习,banksy。
随着经验积累的不断增多,回头看一些方法又会有不一样的理解。
我们分享了很多邻域的分析,主要集中在细胞的细胞邻域,而banksy主要集中在细胞的分子邻域。
就是一个细胞周围环境细胞的基因表达的平均值,作为该细胞的特征进行聚类分析。
针对的平台包括MERFISH, CosMX, CODEX,Xenium等,但是随着panel数量的增多,banksy也慢慢退出舞台了。说明其核心是一种数据增强的方法。
其中Seurat的HD教程也采用了banksy(8um)分析,说明HD本身数据信息不够,要进行一定程度的数据增强。
核心原理如下
核心就是考虑邻域的分子环境。数据增强之后对于本身的聚类分析是有好处的。
其中最核心的地方就是,依据分子邻域特征区分细胞亚型。
最后看看代码,我们现在基本都要转向Python版本了。
代码语言:javascript代码运行次数:0运行复制import os
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import scanpy as sc
sc.logging.print_header()
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.settings.verbosity = 1 # errors (0), warnings (1), info (2), hints (3)
import random
# Note that BANKSY itself is deterministic, here the seeds affect the umap clusters and leiden partition
seed = 0
np.random.seed(seed)
random.seed(seed)
# %%
# Define File paths
file_path = os.path.join("data", "slide_seq", "v1")
gcm_filename = "Cerebellum_MappedDGEForR.csv"
# (Optional) Arguments for load_data only if annadata is not present
locations_filename = "locations.csv"
adata_filename = "spatial.h5ad"
from banksy_utils.load_data import load_adata
# To either load data from .h5ad directly or convert raw data to .h5ad format
load_adata_directly = True
# Keys to specify coordinate indexes in the anndata Object
coord_keys = ('xcoord', 'ycoord', 'coord_xy')
raw_y, raw_x, adata = load_adata(file_path,
load_adata_directly,
adata_filename,
gcm_filename,
locations_filename,
coord_keys)
# %%
adata.var_names_make_unique()
adata.var["mt"] = adata.var_names.str.startswith("MT-")
# Calulates QC metrics and put them in place to the adata object
sc.pp.calculate_qc_metrics(adata,
qc_vars=["mt"],
log1p=True,
inplace=True)
# %%
from banksy_utils.filter_utils import filter_cells
# Filter cells with each respective filters
adata = filter_cells(adata,
min_count=40,
max_count=1000,
MT_filter=20,
gene_filter=10)
# %%
from banksy_utils.filter_utils import normalize_total, filter_hvg
# Normalizes the anndata dataset
adata = normalize_total(adata)
# %%
adata, adata_allgenes = filter_hvg(adata,
n_top_genes = 2000,
flavor="seurat")
# %%
from banksy.main import median_dist_to_nearest_neighbour
# set params
# ==========
plot_graph_weights = True
k_geom = 15 # only for fixed type
max_m = 1 # azumithal transform up to kth order
nbr_weight_decay = "scaled_gaussian" # can also be "reciprocal", "uniform" or "ranked"
# Find median distance to closest neighbours, the median distance will be `sigma`
nbrs = median_dist_to_nearest_neighbour(adata, key = coord_keys[2])
from banksy.initialize_banksy import initialize_banksy
banksy_dict = initialize_banksy(adata,
coord_keys,
k_geom,
nbr_weight_decay = nbr_weight_decay,
max_m = max_m,
plt_edge_hist = True,
plt_nbr_weights = True,
plt_agf_angles = False
)
from banksy.main import concatenate_all
from banksy.embed_banksy import generate_banksy_matrix
# The following are the main hyperparameters for BANKSY
resolutions = [0.7] # clustering resolution for UMAP
pca_dims = [20] # Dimensionality in which PCA reduces to
lambda_list = [0.2] # list of lambda parameters
banksy_dict, banksy_matrix = generate_banksy_matrix(adata,
banksy_dict,
lambda_list,
max_m)
banksy_dict["nonspatial"] = {
# Here we simply append the nonspatial matrix (adata.X) to obtain the nonspatial clustering results
0.0: {"adata": concatenate_all([adata.X], 0, adata=adata), }
}
print(banksy_dict['nonspatial'][0.0]['adata'])
from banksy_utils.umap_pca import pca_umap
pca_umap(banksy_dict,
pca_dims = pca_dims,
add_umap = True
)
from banksy.cluster_methods import run_Leiden_partition
results_df, max_num_labels = run_Leiden_partition(
banksy_dict,
resolutions,
num_nn = 50,
num_iterations = -1,
partition_seed = 1234,
match_labels = True,
)
from banksy.plot_banksy import plot_results
c_map = 'tab20' # specify color map
weights_graph = banksy_dict['scaled_gaussian']['weights'][1]
plot_results(
results_df,
weights_graph,
c_map,
match_labels = True,
coord_keys = coord_keys,
max_num_labels = max_num_labels,
save_path = os.path.join(file_path, 'tmp_png'),
save_fig = False, # Save Spatial Plot Only
save_fullfig = True # Save Full Plot
)
生活很好,有你更好
本文标签: 内容复习
版权声明:本文标题:内容复习 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://it.en369.cn/jiaocheng/1747424490a2165720.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论