| name | single-cell-rna |
| description | Analyze single-cell RNA sequencing data using Scanpy, Seurat, and other single-cell tools. |
Single-Cell RNA-seq Analysis
Analyze gene expression at single-cell resolution to identify cell types and states.
Scanpy Workflow (Python)
import scanpy as sc
import numpy as np
import pandas as pd
# Read data
adata = sc.read_10x_mtx('filtered_feature_bc_matrix/')
# Or from H5
adata = sc.read_10x_h5('filtered_feature_bc_matrix.h5')
# Basic filtering
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
# Quality control
adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], inplace=True)
# Filter by QC metrics
adata = adata[adata.obs.n_genes_by_counts < 5000, :]
adata = adata[adata.obs.pct_counts_mt < 20, :]
# Normalization
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
# Highly variable genes
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata.raw = adata
adata = adata[:, adata.var.highly_variable]
# Scale data
sc.pp.scale(adata, max_value=10)
Dimensionality Reduction
# PCA
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca_variance_ratio(adata, n_pcs=50)
# Neighborhood graph
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
# UMAP
sc.tl.umap(adata)
sc.pl.umap(adata, color=['n_genes_by_counts', 'pct_counts_mt'])
# t-SNE
sc.tl.tsne(adata, n_pcs=40)
Clustering
# Leiden clustering
sc.tl.leiden(adata, resolution=0.5)
# Visualize clusters
sc.pl.umap(adata, color='leiden')
# Find marker genes
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False)
# Get marker genes table
markers = sc.get.rank_genes_groups_df(adata, group='0')
Seurat Workflow (R)
library(Seurat)
# Read data
data <- Read10X(data.dir = "filtered_feature_bc_matrix/")
seurat_obj <- CreateSeuratObject(counts = data, project = "scRNA",
min.cells = 3, min.features = 200)
# QC metrics
seurat_obj[["percent.mt"]] <- PercentageFeatureSet(seurat_obj, pattern = "^MT-")
VlnPlot(seurat_obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"))
# Filter
seurat_obj <- subset(seurat_obj, subset = nFeature_RNA > 200 &
nFeature_RNA < 5000 & percent.mt < 20)
# Normalize
seurat_obj <- NormalizeData(seurat_obj)
seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst",
nfeatures = 2000)
# Scale and PCA
seurat_obj <- ScaleData(seurat_obj)
seurat_obj <- RunPCA(seurat_obj)
# Clustering
seurat_obj <- FindNeighbors(seurat_obj, dims = 1:30)
seurat_obj <- FindClusters(seurat_obj, resolution = 0.5)
seurat_obj <- RunUMAP(seurat_obj, dims = 1:30)
# Visualization
DimPlot(seurat_obj, reduction = "umap")
# Marker genes
markers <- FindAllMarkers(seurat_obj, only.pos = TRUE, min.pct = 0.25)
Cell Type Annotation
# Using marker genes
marker_genes = {
'T cells': ['CD3D', 'CD3E', 'CD4', 'CD8A'],
'B cells': ['CD19', 'CD79A', 'MS4A1'],
'Monocytes': ['CD14', 'LYZ', 'FCGR3A'],
'NK cells': ['NKG7', 'GNLY', 'KLRD1']
}
sc.pl.dotplot(adata, marker_genes, groupby='leiden')
# Automated annotation with celltypist
import celltypist
model = celltypist.models.download_models(model='Immune_All_Low.pkl')
predictions = celltypist.annotate(adata, model='Immune_All_Low.pkl')
adata.obs['cell_type'] = predictions.predicted_labels
Trajectory Analysis
# PAGA
sc.tl.paga(adata, groups='leiden')
sc.pl.paga(adata)
# Diffusion pseudotime
sc.tl.diffmap(adata)
adata.uns['iroot'] = np.flatnonzero(adata.obs['leiden'] == '0')[0]
sc.tl.dpt(adata)
sc.pl.umap(adata, color='dpt_pseudotime')