name	gene-ontology
description	Perform Gene Ontology (GO) enrichment analysis to identify biological processes, molecular functions, and cellular components.

Gene Ontology Analysis

Name: gene-ontology
Author: benchflow-ai

Gene Ontology (GO) enrichment analysis identifies overrepresented biological terms in gene sets.

GO Categories

Biological Process (BP): Biological objectives (e.g., cell division, apoptosis)
Molecular Function (MF): Biochemical activities (e.g., kinase activity, DNA binding)
Cellular Component (CC): Cellular locations (e.g., nucleus, membrane)

R: clusterProfiler

library(clusterProfiler)
library(org.Hs.eg.db)  # Human annotation

# Convert gene symbols to Entrez IDs
gene_list <- c("TP53", "BRCA1", "EGFR", "MYC", "KRAS")
gene_ids <- bitr(gene_list, fromType = "SYMBOL", 
                 toType = "ENTREZID", OrgDb = org.Hs.eg.db)

# GO enrichment analysis
ego <- enrichGO(
    gene = gene_ids$ENTREZID,
    OrgDb = org.Hs.eg.db,
    ont = "BP",  # BP, MF, CC, or "ALL"
    pAdjustMethod = "BH",
    pvalueCutoff = 0.05,
    qvalueCutoff = 0.05,
    readable = TRUE
)

# View results
head(ego)

# Visualization
barplot(ego, showCategory = 20)
dotplot(ego, showCategory = 20)
cnetplot(ego, categorySize = "pvalue", foldChange = gene_fc)

GSEA with clusterProfiler

# Prepare ranked gene list
gene_list <- sort(deseq_results$log2FoldChange, decreasing = TRUE)
names(gene_list) <- rownames(deseq_results)

# Convert to Entrez IDs
gene_list_entrez <- bitr(names(gene_list), fromType = "SYMBOL",
                         toType = "ENTREZID", OrgDb = org.Hs.eg.db)

# GSEA
gsea_go <- gseGO(
    geneList = gene_list,
    OrgDb = org.Hs.eg.db,
    ont = "BP",
    minGSSize = 10,
    maxGSSize = 500,
    pvalueCutoff = 0.05
)

# GSEA plot
gseaplot2(gsea_go, geneSetID = 1:3)

Python: goatools

from goatools import obo_parser
from goatools.go_enrichment import GOEnrichmentStudy
from goatools.associations import read_associations
import pandas as pd

# Load GO ontology
obo_file = "go-basic.obo"
go = obo_parser.GODag(obo_file)

# Load gene-to-GO associations
associations = read_associations("gene2go.txt")

# Background gene set (all genes in experiment)
background_genes = set(expression_data.index)

# Study genes (differentially expressed)
study_genes = set(sig_genes.index)

# Run enrichment
goe = GOEnrichmentStudy(
    background_genes,
    associations,
    go,
    propagate_counts=True,
    alpha=0.05,
    methods=['fdr_bh']
)

results = goe.run_study(study_genes)

# Filter significant results
significant = [r for r in results if r.p_fdr_bh < 0.05]

# Convert to DataFrame
go_results = pd.DataFrame([{
    'GO_ID': r.GO,
    'name': r.name,
    'namespace': r.NS,
    'p_value': r.p_uncorrected,
    'p_fdr': r.p_fdr_bh,
    'study_count': r.study_count,
    'study_n': r.study_n,
    'enrichment': r.enrichment
} for r in significant])

gProfiler (Web API)

from gprofiler import GProfiler

gp = GProfiler(return_dataframe=True)

# Run enrichment
results = gp.profile(
    organism='hsapiens',
    query=gene_list,
    sources=['GO:BP', 'GO:MF', 'GO:CC', 'KEGG', 'REAC']
)

# Filter results
significant = results[results['p_value'] < 0.05]
print(significant[['source', 'native', 'name', 'p_value', 'intersection_size']])

Visualization

import matplotlib.pyplot as plt
import seaborn as sns

def plot_go_enrichment(results, top_n=20):
    """Create GO enrichment bar plot."""
    top_results = results.nsmallest(top_n, 'p_value')

    plt.figure(figsize=(10, 8))
    plt.barh(range(len(top_results)), -np.log10(top_results['p_value']))
    plt.yticks(range(len(top_results)), top_results['name'])
    plt.xlabel('-log10(p-value)')
    plt.title('GO Enrichment Analysis')
    plt.tight_layout()
    plt.savefig('go_enrichment.png', dpi=150)

Semantic Similarity

library(GOSemSim)

# Calculate semantic similarity between GO terms
hsGO <- godata('org.Hs.eg.db', ont = "BP")

# Between two terms
sim <- goSim("GO:0004060", "GO:0003824", semData = hsGO, measure = "Wang")

# Between gene products
geneSim("TP53", "MDM2", semData = hsGO, measure = "Wang", combine = "BMA")

gene-ontology

Install Skill

SKILL.md

Gene Ontology Analysis

GO Categories

R: clusterProfiler

GSEA with clusterProfiler

Python: goatools

gProfiler (Web API)

Visualization

Semantic Similarity