Claude Code Plugins

Community-maintained marketplace

Feedback

structural-variants

@benchflow-ai/skillsbench
15
0

Detect and analyze structural variants including large deletions, duplications, inversions, and translocations.

Install Skill

1Download skill
2Enable skills in Claude

Open claude.ai/settings/capabilities and find the "Skills" section

3Upload to Claude

Click "Upload skill" and select the downloaded ZIP file

Note: Please verify skill by going through its instructions before using it.

SKILL.md

name structural-variants
description Detect and analyze structural variants including large deletions, duplications, inversions, and translocations.

Structural Variant Detection

Identify large genomic rearrangements including deletions, duplications, inversions, and translocations.

Types of Structural Variants

Type Description Size
DEL Deletion >50bp
DUP Duplication >50bp
INV Inversion >50bp
INS Insertion >50bp
BND Breakend (translocation) N/A
CNV Copy Number Variant Variable

Manta SV Caller

# Configure Manta
configManta.py \
    --bam sample.bam \
    --referenceFasta reference.fasta \
    --runDir manta_output

# Run Manta
cd manta_output
./runWorkflow.py -j 8

# Output files:
# results/variants/diploidSV.vcf.gz - Diploid SV calls
# results/variants/candidateSV.vcf.gz - All candidate SVs
# results/variants/candidateSmallIndels.vcf.gz - Small indels

DELLY SV Caller

# Call SVs
delly call -g reference.fasta -o delly_calls.bcf sample.bam

# Filter calls
delly filter -f germline -o delly_filtered.bcf delly_calls.bcf

# Convert to VCF
bcftools view delly_filtered.bcf > delly_sv.vcf

Lumpy SV Caller

# Extract discordant and split reads
samtools view -b -F 1294 sample.bam > discordant.bam
extractSplitReads_BwaMem -i sample.bam | \
    samtools view -Sb - > splitters.bam

# Run Lumpy
lumpyexpress \
    -B sample.bam \
    -S splitters.bam \
    -D discordant.bam \
    -o lumpy_sv.vcf

SURVIVOR for Merging Calls

# Create list of VCF files
ls *.vcf > vcf_files.txt

# Merge SV calls from multiple callers
SURVIVOR merge vcf_files.txt 1000 2 1 1 0 50 merged_sv.vcf

# Parameters: max_dist, min_callers, same_type, same_strand, estimate_dist, min_size

Python Analysis

from cyvcf2 import VCF
import pandas as pd
import matplotlib.pyplot as plt

def parse_sv_vcf(vcf_file):
    """Parse SV VCF and extract key information."""
    vcf = VCF(vcf_file)

    sv_records = []
    for variant in vcf:
        sv_type = variant.INFO.get('SVTYPE')
        sv_len = variant.INFO.get('SVLEN')
        end_pos = variant.INFO.get('END')

        sv_records.append({
            'chrom': variant.CHROM,
            'pos': variant.POS,
            'end': end_pos if end_pos else variant.POS + abs(sv_len) if sv_len else variant.POS,
            'sv_type': sv_type,
            'sv_len': abs(sv_len) if sv_len else None,
            'qual': variant.QUAL
        })

    return pd.DataFrame(sv_records)

# Analyze SVs
sv_df = parse_sv_vcf('structural_variants.vcf')

# Size distribution by type
def plot_sv_size_distribution(sv_df):
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))

    for ax, sv_type in zip(axes.flat, ['DEL', 'DUP', 'INV', 'INS']):
        subset = sv_df[sv_df['sv_type'] == sv_type]['sv_len'].dropna()
        if len(subset) > 0:
            ax.hist(subset, bins=50, edgecolor='black')
            ax.set_xlabel('SV Length (bp)')
            ax.set_ylabel('Count')
            ax.set_title(f'{sv_type} Size Distribution (n={len(subset)})')
            ax.set_xscale('log')

    plt.tight_layout()
    plt.savefig('sv_size_distribution.png')

plot_sv_size_distribution(sv_df)

CNV Analysis with CNVnator

# Extract mapping data
cnvnator -root sample.root -tree sample.bam

# Calculate histogram
cnvnator -root sample.root -his 100

# Calculate statistics
cnvnator -root sample.root -stat 100

# Partition
cnvnator -root sample.root -partition 100

# Call CNVs
cnvnator -root sample.root -call 100 > cnvs.txt

# Convert to VCF
cnvnator2VCF.pl cnvs.txt reference.fasta > cnvs.vcf

Filtering and Annotation

def filter_sv_calls(sv_df, min_size=50, min_qual=20):
    """Filter SV calls by size and quality."""
    filtered = sv_df[
        (sv_df['sv_len'] >= min_size) &
        (sv_df['qual'] >= min_qual)
    ]
    return filtered

def annotate_sv_genes(sv_df, gene_bed):
    """Annotate SVs with overlapping genes."""
    import pybedtools

    sv_bed = pybedtools.BedTool.from_dataframe(
        sv_df[['chrom', 'pos', 'end']]
    )
    genes = pybedtools.BedTool(gene_bed)

    overlaps = sv_bed.intersect(genes, wa=True, wb=True)

    # Parse results
    annotated = []
    for overlap in overlaps:
        annotated.append({
            'sv_chrom': overlap[0],
            'sv_start': overlap[1],
            'sv_end': overlap[2],
            'gene': overlap[6]  # Assuming gene name in column 4 of gene_bed
        })

    return pd.DataFrame(annotated)

Visualization

import matplotlib.pyplot as plt
import matplotlib.patches as patches

def plot_sv_in_region(sv_df, chrom, start, end):
    """Visualize SVs in a genomic region."""
    region_svs = sv_df[
        (sv_df['chrom'] == chrom) &
        (sv_df['pos'] >= start) &
        (sv_df['end'] <= end)
    ]

    colors = {'DEL': 'red', 'DUP': 'blue', 'INV': 'green', 'INS': 'orange'}

    fig, ax = plt.subplots(figsize=(14, 4))

    for i, (_, sv) in enumerate(region_svs.iterrows()):
        color = colors.get(sv['sv_type'], 'gray')
        rect = patches.Rectangle(
            (sv['pos'], i - 0.3),
            sv['end'] - sv['pos'],
            0.6,
            facecolor=color,
            alpha=0.6
        )
        ax.add_patch(rect)

    ax.set_xlim(start, end)
    ax.set_ylim(-1, len(region_svs))
    ax.set_xlabel(f'Position on {chrom}')
    ax.set_ylabel('SV')
    plt.savefig('sv_region.png')