| name | bioinformatics |
| description | Computational biology analysis techniques. Use when analyzing genomic data, sequences, or biological datasets. |
Bioinformatics
Core concepts and patterns for biological data analysis.
Variant Analysis
# Classify variant types
def classify_variant(ref, alt):
if len(ref) == len(alt) == 1:
return 'SNP'
elif len(ref) > len(alt):
return 'deletion'
elif len(ref) < len(alt):
return 'insertion'
else:
return 'complex'
# Transition vs transversion
transitions = {('A', 'G'), ('G', 'A'), ('C', 'T'), ('T', 'C')}
def is_transition(ref, alt):
return (ref, alt) in transitions
Common Metrics
# Allele frequency
af = alt_count / (ref_count + alt_count)
# Read depth filtering
min_depth = 10
filtered = df[df['DP'] >= min_depth]
# Quality filtering
high_qual = df[df['QUAL'] >= 30]
Genomic Coordinates
# BED format: 0-based, half-open
# VCF format: 1-based, closed
def vcf_to_bed(chrom, pos, ref):
return (chrom, pos - 1, pos - 1 + len(ref))
# Check overlap
def overlaps(start1, end1, start2, end2):
return start1 < end2 and start2 < end1