name	frequency-analysis
description	Perform letter frequency analysis to crack substitution ciphers. Use this skill when analyzing ciphertext patterns, identifying cipher types, or breaking monoalphabetic substitution ciphers using statistical methods.

Frequency Analysis

Name: frequency-analysis
Author: benchflow-ai

Analyze letter and pattern frequencies to break classical ciphers.

Letter Frequency Analysis

from collections import Counter
import string

# English letter frequencies (approximate)
ENGLISH_FREQ = {
    'E': 12.7, 'T': 9.1, 'A': 8.2, 'O': 7.5, 'I': 7.0,
    'N': 6.7, 'S': 6.3, 'H': 6.1, 'R': 6.0, 'D': 4.3,
    'L': 4.0, 'C': 2.8, 'U': 2.8, 'M': 2.4, 'W': 2.4,
    'F': 2.2, 'G': 2.0, 'Y': 2.0, 'P': 1.9, 'B': 1.5,
    'V': 1.0, 'K': 0.8, 'J': 0.15, 'X': 0.15, 'Q': 0.1,
    'Z': 0.07
}

def analyze_frequency(text: str) -> dict:
    """Calculate letter frequency percentages."""
    letters = [c.upper() for c in text if c.isalpha()]
    total = len(letters)
    if total == 0:
        return {}

    counts = Counter(letters)
    return {char: (count / total) * 100 for char, count in counts.items()}

def compare_to_english(freq: dict) -> float:
    """Calculate chi-squared distance from English."""
    chi_sq = 0
    for letter in string.ascii_uppercase:
        observed = freq.get(letter, 0)
        expected = ENGLISH_FREQ.get(letter, 0)
        if expected > 0:
            chi_sq += ((observed - expected) ** 2) / expected
    return chi_sq

# Example usage
ciphertext = "XLMW MW E WIGVIX QIWWEKI"
freq = analyze_frequency(ciphertext)
for letter, pct in sorted(freq.items(), key=lambda x: -x[1]):
    print(f"{letter}: {pct:.1f}%")

Bigram and Trigram Analysis

def analyze_ngrams(text: str, n: int = 2) -> Counter:
    """Count n-grams in text."""
    text = ''.join(c.upper() for c in text if c.isalpha())
    ngrams = [text[i:i+n] for i in range(len(text) - n + 1)]
    return Counter(ngrams)

# Common English bigrams and trigrams
COMMON_BIGRAMS = ['TH', 'HE', 'IN', 'ER', 'AN', 'RE', 'ON', 'AT', 'EN', 'ND']
COMMON_TRIGRAMS = ['THE', 'AND', 'ING', 'HER', 'HAT', 'HIS', 'THA', 'ERE', 'FOR', 'ENT']

ciphertext = "XLMWMWEWIGVIXQIWWEKI"
bigrams = analyze_ngrams(ciphertext, 2)
trigrams = analyze_ngrams(ciphertext, 3)

print("Top bigrams:", bigrams.most_common(5))
print("Top trigrams:", trigrams.most_common(5))

Index of Coincidence

def index_of_coincidence(text: str) -> float:
    """Calculate IC to identify cipher type.

    English text: ~0.067
    Random text: ~0.038
    """
    letters = [c.upper() for c in text if c.isalpha()]
    n = len(letters)
    if n <= 1:
        return 0

    freq = Counter(letters)
    ic = sum(f * (f - 1) for f in freq.values()) / (n * (n - 1))
    return ic

# Use IC to detect cipher type
ciphertext = "XLMWMWEWIGVIXQIWWEKI"
ic = index_of_coincidence(ciphertext)
print(f"Index of Coincidence: {ic:.4f}")

if ic > 0.060:
    print("Likely monoalphabetic substitution")
elif ic > 0.045:
    print("Likely polyalphabetic (Vigenere)")
else:
    print("Likely random or very strong encryption")

Kasiski Examination (Vigenere Key Length)

import math
from collections import defaultdict

def find_repeated_sequences(ciphertext: str, min_len: int = 3) -> dict:
    """Find repeated sequences and their spacings."""
    text = ''.join(c.upper() for c in ciphertext if c.isalpha())
    sequences = defaultdict(list)

    for seq_len in range(min_len, min(len(text) // 2, 10)):
        for i in range(len(text) - seq_len):
            seq = text[i:i + seq_len]
            for j in range(i + seq_len, len(text) - seq_len + 1):
                if text[j:j + seq_len] == seq:
                    sequences[seq].append(j - i)

    return sequences

def kasiski_key_length(ciphertext: str) -> list:
    """Estimate Vigenere key length using Kasiski examination."""
    sequences = find_repeated_sequences(ciphertext)

    spacings = []
    for seq, positions in sequences.items():
        spacings.extend(positions)

    if not spacings:
        return []

    # Find GCD of spacings
    factors = defaultdict(int)
    for spacing in spacings:
        for i in range(2, min(spacing + 1, 20)):
            if spacing % i == 0:
                factors[i] += 1

    return sorted(factors.items(), key=lambda x: -x[1])[:5]

# Example
ciphertext = "VVHQWVVRHMUSGJGTHKIHTSSEJCHLSFCBGVWCRLRYQTFSVGAHWKCUHWAUGLQHNSLRLJSHBLTSPISPRDXLJSVEEGHLQWKASSKUWEPWQTWVSPGOELKCQYFNSVWLJSNIQKGNRGYBWLWGOVHAKSHXBAXMZSSHXBHGSHTCHLSTWDDWW"
key_lengths = kasiski_key_length(ciphertext)
print("Likely key lengths:", key_lengths)

Pattern Word Matching

def get_word_pattern(word: str) -> str:
    """Get pattern for word matching.

    HELLO -> 0.1.2.2.3
    """
    word = word.upper()
    mapping = {}
    pattern = []
    next_num = 0

    for char in word:
        if char not in mapping:
            mapping[char] = next_num
            next_num += 1
        pattern.append(str(mapping[char]))

    return '.'.join(pattern)

# Common English words by pattern
PATTERN_WORDS = {
    '0.1.2': ['THE', 'AND', 'FOR', 'ARE', 'BUT'],
    '0.1.1.2': ['THAT', 'WITH', 'HAVE', 'BEEN'],
    '0': ['A', 'I'],
}

# Find possible decryptions for a pattern
cipher_word = "XYY"
pattern = get_word_pattern(cipher_word)  # "0.1.1"
print(f"Pattern: {pattern}")

Automated Substitution Solver

def score_text(text: str) -> float:
    """Score text based on English frequency match."""
    freq = analyze_frequency(text)
    return -compare_to_english(freq)  # Higher is better

def solve_substitution_hill_climbing(ciphertext: str, iterations: int = 10000):
    """Solve substitution cipher using hill climbing."""
    import random

    alphabet = list(string.ascii_uppercase)
    best_key = alphabet.copy()
    random.shuffle(best_key)
    best_score = score_text(apply_key(ciphertext, best_key))

    for _ in range(iterations):
        # Swap two random letters
        new_key = best_key.copy()
        i, j = random.sample(range(26), 2)
        new_key[i], new_key[j] = new_key[j], new_key[i]

        plaintext = apply_key(ciphertext, new_key)
        score = score_text(plaintext)

        if score > best_score:
            best_key = new_key
            best_score = score

    return ''.join(best_key), apply_key(ciphertext, best_key)

def apply_key(text: str, key: list) -> str:
    """Apply substitution key to text."""
    table = str.maketrans(string.ascii_uppercase, ''.join(key))
    table.update(str.maketrans(string.ascii_lowercase, ''.join(key).lower()))
    return text.translate(table)

frequency-analysis

Install Skill

SKILL.md

Frequency Analysis

Letter Frequency Analysis

Bigram and Trigram Analysis

Index of Coincidence

Kasiski Examination (Vigenere Key Length)

Pattern Word Matching

Automated Substitution Solver