Source code for spatialcells.spatial._estimateInitialDistance

"""
@authors: Guihong Wan
@date: July 22, 2023
"""

import scanpy as sc
from sklearn.cluster import AgglomerativeClustering


[docs] def estimateInitialDistance(adata, markers_of_interest, sampling_ratio=0.1): """ Use hierarchical clustering to get the checkpoints to estimate the distance parameter for density-based clustering algorithms, e.g., DBSCAN. :param adata: the anndata object :param markers_of_interest: the list of marker names to subset the data :param sampling_ratio: the sampling ratio to subsample the data :returns: the list of distances """ # get the subset data assert len(markers_of_interest) > 0, "markers_of_interest is empty?" condition = False for target_marker in markers_of_interest: try: condition = condition | (adata.obs[target_marker]) except: print(target_marker, "is not found.") return None adata_tmp = adata[condition] assert (sampling_ratio >= 0) and ( sampling_ratio <= 1 ), "sampling_ratio should be [0, 1]" if sampling_ratio < 1: sc.pp.subsample(adata_tmp, fraction=sampling_ratio, random_state=42) X = adata_tmp.obs[["X_centroid", "Y_centroid"]].to_numpy() # compute the distances model = AgglomerativeClustering( distance_threshold=0, n_clusters=None, compute_distances=True ) model = model.fit(X) print("Computing distances...") nsamples = X.shape[0] newnodes = [] layer = 0 distances = [] for i, item in enumerate(model.children_): # print("i="+str(i)+":", item[0], item[1],"d:", model.distances_[i], "-->", nsamples+i) newnodes.append(nsamples + i) if (item[0] in newnodes) and (item[1] in newnodes): # print("-------",layer, model.distances_[i]) newnodes = [] layer += 1 distances.append(model.distances_[i]) # print(min(model.distances_), max(model.distances_), np.median(model.distances_), np.mean(model.distances_)) return distances