The goal of SPOTlight is to provide a tool that enables the deconvolution of cell types and cell type proportions present within each capture locations comprising mixtures of cells, originally developed for 10X’s Visium - spatial trancsiptomics- technology, it can be used for all technologies returning mixtures of cells. SPOTlight is based on finding topic profile signatures, by means of an NMFreg model, for each cell type and finding which combination fits best the spot we want to deconvolute.
You can install the latest stable version from the GitHub repository SPOTlight with:
# install.packages("devtools")
devtools::install_github("https://github.com/MarcElosua/SPOTlight")
devtools::install_git("https://github.com/MarcElosua/SPOTlight")
Or the latest version in development by downloading the devel branch
devtools::install_github("https://github.com/MarcElosua/SPOTlight", ref = "devel")
devtools::install_git("https://github.com/MarcElosua/SPOTlight", ref = "devel")
library(Matrix)
library(data.table)
library(Seurat)
library(SeuratData)
library(dplyr)
library(gt)
library(SPOTlight)
library(igraph)
library(RColorBrewer)
For the purpose of this tutorial we are going to use adult mouse brain data. The scRNAseq data can be downloaded here while the spatial data is the one put out publicly by 10X and the processed object can be downloaded using SeuratData as shown below.
Load single-cell reference dataset.
path_to_data <- system.file(package = "SPOTlight")
cortex_sc <- readRDS(glue::glue("{path_to_data}/allen_cortex_dwn.rds"))
Load Spatial data
if (! "stxBrain" %in% SeuratData::AvailableData()[, "Dataset"]) {
# If dataset not downloaded proceed to download it
SeuratData::InstallData("stxBrain")
}
# Load data
anterior <- SeuratData::LoadData("stxBrain", type = "anterior1")
set.seed(123)
cortex_sc <- Seurat::SCTransform(cortex_sc, verbose = FALSE) %>%
Seurat::RunPCA(., verbose = FALSE) %>%
Seurat::RunUMAP(., dims = 1:30, verbose = FALSE)
Visualize the clustering
Seurat::DimPlot(cortex_sc,
group.by = "subclass",
label = TRUE) + Seurat::NoLegend()
cortex_sc@meta.data %>%
dplyr::count(subclass) %>%
gt::gt(.[-1, ]) %>%
gt::tab_header(
title = "Cell types present in the reference dataset",
) %>%
gt::cols_label(
subclass = gt::html("Cell Type")
)
Cell types present in the reference dataset | |
---|---|
Cell Type | n |
Astro | 70 |
CR | 7 |
Endo | 70 |
L2/3 IT | 70 |
L4 | 70 |
L5 IT | 70 |
L5 PT | 70 |
L6 CT | 70 |
L6 IT | 70 |
L6b | 70 |
Lamp5 | 70 |
Macrophage | 51 |
Meis2 | 45 |
NP | 70 |
Oligo | 70 |
Peri | 32 |
Pvalb | 70 |
Serpinf1 | 27 |
SMC | 55 |
Sncg | 70 |
Sst | 70 |
Vip | 70 |
VLMC | 67 |
To determine the most important marker genes we can use the function Seurat::FindAllMarkers
which will return the markers for each cluster.
Seurat::Idents(object = cortex_sc) <- cortex_sc@meta.data$subclass
cluster_markers_all <- Seurat::FindAllMarkers(object = cortex_sc,
assay = "SCT",
slot = "data",
verbose = TRUE,
only.pos = TRUE)
saveRDS(object = cluster_markers_all,
file = here::here("inst/markers_sc.RDS"))
set.seed(123)
spotlight_ls <- spotlight_deconvolution(
se_sc = cortex_sc,
counts_spatial = anterior@assays$Spatial@counts,
clust_vr = "subclass", # Variable in sc_seu containing the cell-type annotation
cluster_markers = cluster_markers_all, # Dataframe with the marker genes
cl_n = 100, # number of cells per cell type to use
hvg = 3000, # Number of HVG to use
ntop = NULL, # How many of the marker genes to use (by default all)
transf = "uv", # Perform unit-variance scaling per cell and spot prior to factorzation and NLS
method = "nsNMF", # Factorization method
min_cont = 0 # Remove those cells contributing to a spot below a certain threshold
)
saveRDS(object = spotlight_ls, file = here::here("inst/spotlight_ls.rds"))
Read RDS object
spotlight_ls <- readRDS(file = here::here("inst/spotlight_ls.rds"))
nmf_mod <- spotlight_ls[[1]]
decon_mtrx <- spotlight_ls[[2]]
Before even looking at the decomposed spots we can gain insight on how well the model performed by looking at the topic profiles for the cell types.
The first thing we can do is look at how specific the topic profiles are for each cell type.
h <- NMF::coef(nmf_mod[[1]])
rownames(h) <- paste("Topic", 1:nrow(h), sep = "_")
topic_profile_plts <- SPOTlight::dot_plot_profiles_fun(
h = h,
train_cell_clust = nmf_mod[[2]])
topic_profile_plts[[2]] + ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90),
axis.text = ggplot2::element_text(size = 12))
Next we can take a look at the how the individual topic profiles of each cell within each cell-type behave.
Here we expect that all the cells from the same cell type show a similar topic profile distribution, if not there might be a bit more substructure in that cluster and we may only be capturing one or the other.
topic_profile_plts[[1]] + theme(axis.text.x = element_text(angle = 90),
axis.text = element_text(size = 12))
Lastly we can take a look at which genes are the most important for each topic and therefore get an insight into which genes are driving them.
basis_spotlight <- data.frame(NMF::basis(nmf_mod[[1]]))
colnames(basis_spotlight) <- unique(stringr::str_wrap(nmf_mod[[2]], width = 30))
basis_spotlight %>%
dplyr::arrange(desc(Astro)) %>%
round(., 5) %>%
DT::datatable(., filter = "top")
Join decomposition with metadata
# This is the equivalent to setting min_cont to 0.04
decon_mtrx_sub <- decon_mtrx[, colnames(decon_mtrx) != "res_ss"]
decon_mtrx_sub[decon_mtrx_sub < 0.08] <- 0
decon_mtrx <- cbind(decon_mtrx_sub, "res_ss" = decon_mtrx[, "res_ss"])
rownames(decon_mtrx) <- colnames(anterior)
decon_df <- decon_mtrx %>%
data.frame() %>%
tibble::rownames_to_column("barcodes")
anterior@meta.data <- anterior@meta.data %>%
tibble::rownames_to_column("barcodes") %>%
dplyr::left_join(decon_df, by = "barcodes") %>%
tibble::column_to_rownames("barcodes")
we can use the standard Seurat::SpatialFeaturePlot
to view predicted celltype proportions one at a time.
Seurat::SpatialFeaturePlot(
object = anterior,
features = c("L2.3.IT", "L6b", "Meis2", "Oligo"),
alpha = c(0.1, 1))