Claude Code Plugins

Community-maintained marketplace

Feedback

Parse, analyze, and manipulate VCF (Variant Call Format) files for variant interpretation. Use when programmatically accessing variant data, calculating statistics, or filtering by quality.

Install Skill

1Download skill
2Enable skills in Claude

Open claude.ai/settings/capabilities and find the "Skills" section

3Upload to Claude

Click "Upload skill" and select the downloaded ZIP file

Note: Please verify skill by going through its instructions before using it.

SKILL.md

name vcf-analysis
description Parse, analyze, and manipulate VCF (Variant Call Format) files for variant interpretation. Use when programmatically accessing variant data, calculating statistics, or filtering by quality.

VCF Analysis

Comprehensive analysis and manipulation of VCF (Variant Call Format) files for genetic variant data.

VCF File Structure

# VCF format
##fileformat=VCFv4.2
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  SAMPLE
chr1    12345   rs123   A       G       99      PASS    DP=30   GT:DP   0/1:15

Python: PyVCF3

import vcf

# Read VCF file
vcf_reader = vcf.Reader(filename='variants.vcf.gz')

# Iterate over records
for record in vcf_reader:
    print(f"Position: {record.CHROM}:{record.POS}")
    print(f"REF: {record.REF}, ALT: {record.ALT}")
    print(f"QUAL: {record.QUAL}")
    print(f"FILTER: {record.FILTER}")

    # Access INFO fields
    if 'DP' in record.INFO:
        print(f"Depth: {record.INFO['DP']}")

    # Access genotypes
    for sample in record.samples:
        print(f"  {sample.sample}: {sample['GT']}")

# Filter specific region
vcf_reader = vcf.Reader(filename='variants.vcf.gz')
for record in vcf_reader.fetch('chr1', 1000000, 2000000):
    print(record)

Python: cyvcf2 (Faster)

from cyvcf2 import VCF, Writer

# Read VCF
vcf = VCF('variants.vcf.gz')

# Get sample names
print(vcf.samples)

# Iterate with filtering
for variant in vcf:
    # Basic properties
    chrom = variant.CHROM
    pos = variant.POS
    ref = variant.REF
    alt = variant.ALT[0] if variant.ALT else None
    qual = variant.QUAL

    # INFO fields
    dp = variant.INFO.get('DP')
    af = variant.INFO.get('AF')

    # Genotypes (0=ref, 1=alt, -1=missing)
    gts = variant.gt_types  # 0=hom_ref, 1=het, 2=hom_alt, 3=unknown

    # Filter by quality
    if qual and qual >= 30:
        print(f"{chrom}:{pos} {ref}>{alt}")

vcf.close()

# Write filtered VCF
def filter_vcf(input_vcf, output_vcf, min_qual=30, min_dp=10):
    """Filter VCF by quality and depth."""
    vcf_in = VCF(input_vcf)
    vcf_out = Writer(output_vcf, vcf_in)

    for variant in vcf_in:
        if variant.QUAL >= min_qual:
            dp = variant.INFO.get('DP', 0)
            if dp >= min_dp:
                vcf_out.write_record(variant)

    vcf_out.close()
    vcf_in.close()

Variant Statistics

from collections import Counter
import numpy as np

def vcf_statistics(vcf_file):
    """Calculate comprehensive VCF statistics."""
    from cyvcf2 import VCF

    vcf = VCF(vcf_file)

    stats = {
        'total_variants': 0,
        'snps': 0,
        'indels': 0,
        'multi_allelic': 0,
        'transitions': 0,
        'transversions': 0,
        'qualities': [],
        'depths': []
    }

    transitions = [('A', 'G'), ('G', 'A'), ('C', 'T'), ('T', 'C')]

    for variant in vcf:
        stats['total_variants'] += 1

        if variant.is_snp:
            stats['snps'] += 1
            # Check Ti/Tv
            if len(variant.ALT) == 1:
                pair = (variant.REF, variant.ALT[0])
                if pair in transitions:
                    stats['transitions'] += 1
                else:
                    stats['transversions'] += 1
        elif variant.is_indel:
            stats['indels'] += 1

        if len(variant.ALT) > 1:
            stats['multi_allelic'] += 1

        if variant.QUAL:
            stats['qualities'].append(variant.QUAL)

        dp = variant.INFO.get('DP')
        if dp:
            stats['depths'].append(dp)

    # Calculate summary statistics
    stats['ti_tv_ratio'] = stats['transitions'] / stats['transversions'] \
        if stats['transversions'] > 0 else float('inf')
    stats['mean_quality'] = np.mean(stats['qualities']) if stats['qualities'] else 0
    stats['mean_depth'] = np.mean(stats['depths']) if stats['depths'] else 0

    return stats

stats = vcf_statistics('variants.vcf.gz')
print(f"Total variants: {stats['total_variants']}")
print(f"SNPs: {stats['snps']}, Indels: {stats['indels']}")
print(f"Ti/Tv ratio: {stats['ti_tv_ratio']:.2f}")

Variant Annotation

def annotate_variant_type(ref, alt):
    """Classify variant type."""
    if len(ref) == 1 and len(alt) == 1:
        return 'SNV'
    elif len(ref) > len(alt):
        return 'deletion'
    elif len(ref) < len(alt):
        return 'insertion'
    else:
        return 'MNV'  # Multi-nucleotide variant

def predict_impact(ref, alt, position, exon_coords):
    """Predict variant impact (simplified)."""
    # Check if in exon
    in_exon = any(start <= position <= end for start, end in exon_coords)

    if not in_exon:
        return 'intronic'

    var_type = annotate_variant_type(ref, alt)

    if var_type == 'SNV':
        return 'missense'  # Simplified - would need codon context
    elif var_type in ['insertion', 'deletion']:
        if (len(alt) - len(ref)) % 3 == 0:
            return 'inframe_indel'
        else:
            return 'frameshift'

    return 'unknown'

Genotype Analysis

def genotype_summary(vcf_file, sample_name):
    """Summarize genotypes for a sample."""
    from cyvcf2 import VCF

    vcf = VCF(vcf_file)
    sample_idx = vcf.samples.index(sample_name)

    counts = {'hom_ref': 0, 'het': 0, 'hom_alt': 0, 'missing': 0}

    for variant in vcf:
        gt_type = variant.gt_types[sample_idx]
        if gt_type == 0:
            counts['hom_ref'] += 1
        elif gt_type == 1:
            counts['het'] += 1
        elif gt_type == 2:
            counts['hom_alt'] += 1
        else:
            counts['missing'] += 1

    return counts

# Calculate heterozygosity
def calculate_heterozygosity(vcf_file, sample_name):
    counts = genotype_summary(vcf_file, sample_name)
    total = counts['hom_ref'] + counts['het'] + counts['hom_alt']
    return counts['het'] / total if total > 0 else 0

VCF to DataFrame

import pandas as pd
from cyvcf2 import VCF

def vcf_to_dataframe(vcf_file, info_fields=None, format_fields=None):
    """Convert VCF to pandas DataFrame."""
    vcf = VCF(vcf_file)

    records = []
    for variant in vcf:
        record = {
            'CHROM': variant.CHROM,
            'POS': variant.POS,
            'ID': variant.ID,
            'REF': variant.REF,
            'ALT': ','.join(variant.ALT) if variant.ALT else '.',
            'QUAL': variant.QUAL,
            'FILTER': ';'.join(variant.FILTER) if variant.FILTER else 'PASS'
        }

        # Add INFO fields
        if info_fields:
            for field in info_fields:
                record[field] = variant.INFO.get(field)

        records.append(record)

    return pd.DataFrame(records)

df = vcf_to_dataframe('variants.vcf.gz', info_fields=['DP', 'AF', 'MQ'])