Claude Code Plugins

Community-maintained marketplace

Feedback
1
0

Implements vector embedding generation for the Model Indexer. Covers transformers.js setup, batch processing, content hashing for cache, and Voy HNSW index building.

Install Skill

1Download skill
2Enable skills in Claude

Open claude.ai/settings/capabilities and find the "Skills" section

3Upload to Claude

Click "Upload skill" and select the downloaded ZIP file

Note: Please verify skill by going through its instructions before using it.

SKILL.md

name embedding-generation
description Implements vector embedding generation for the Model Indexer. Covers transformers.js setup, batch processing, content hashing for cache, and Voy HNSW index building.

Embedding Generation Skill

This skill covers generating vector embeddings for semantic search and building the Voy HNSW index.

Architecture

Models ─▶ Text Builder ─▶ Embeddings ─▶ Voy Index ─▶ index.voy
              │                │
              │                └──▶ Embedding Cache (xxhash)
              │
              └──▶ name + description + features + modalities

Embedding Generator

// src/embeddings/generator.ts
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import pLimit from 'p-limit';
import { logger } from '../utils/logger.js';

const DEFAULT_MODEL = 'Xenova/gte-small';
const DEFAULT_BATCH_SIZE = 32;
const DEFAULT_CONCURRENCY = 8;

export class EmbeddingGenerator {
  private embedder: FeatureExtractionPipeline | null = null;
  private modelId: string;
  private concurrency: number;

  constructor(options?: { modelId?: string; concurrency?: number }) {
    this.modelId = options?.modelId ?? DEFAULT_MODEL;
    this.concurrency = options?.concurrency ?? DEFAULT_CONCURRENCY;
  }

  async initialize(): Promise<void> {
    logger.info(`Loading embedding model: ${this.modelId}`);
    this.embedder = await pipeline('feature-extraction', this.modelId, {
      dtype: 'fp32',
    });
    logger.success(`Embedding model loaded`);
  }

  async embed(text: string): Promise<Float32Array> {
    if (!this.embedder) throw new Error('Generator not initialized');

    const output = await this.embedder(text, {
      pooling: 'mean',
      normalize: true,
    });

    return output.data as Float32Array;
  }

  async embedBatch(
    items: Array<{ id: string; text: string }>,
    options?: {
      batchSize?: number;
      onProgress?: (completed: number, total: number) => void;
    },
  ): Promise<Map<string, Float32Array>> {
    const batchSize = options?.batchSize ?? DEFAULT_BATCH_SIZE;
    const limit = pLimit(this.concurrency);
    const results = new Map<string, Float32Array>();

    // Process in batches to manage memory
    for (let i = 0; i < items.length; i += batchSize) {
      const batch = items.slice(i, i + batchSize);

      const batchResults = await Promise.all(
        batch.map((item) =>
          limit(async () => {
            try {
              const embedding = await this.embed(item.text);
              return { id: item.id, embedding, error: null };
            } catch (error) {
              return { id: item.id, embedding: null, error };
            }
          }),
        ),
      );

      for (const result of batchResults) {
        if (result.embedding) {
          results.set(result.id, result.embedding);
        } else {
          logger.warn(`Failed to embed ${result.id}: ${result.error}`);
        }
      }

      options?.onProgress?.(Math.min(i + batchSize, items.length), items.length);
    }

    return results;
  }

  dispose(): void {
    this.embedder = null;
  }
}

Text Builder

// src/embeddings/text-builder.ts
import type { AIModel } from '@ai-model-registry/spec';

const MAX_TEXT_LENGTH = 512;

/**
 * Build embedding text from model metadata.
 * Prioritizes searchable fields: name, description, provider, modalities.
 */
export function buildEmbeddingText(model: AIModel): string {
  const parts: string[] = [
    model.name,
    model.description,
    model.provider,
  ];

  // Add modalities
  if (model.modalities.inputs.length > 0) {
    parts.push(`inputs: ${model.modalities.inputs.join(' ')}`);
  }
  if (model.modalities.outputs.length > 0) {
    parts.push(`outputs: ${model.modalities.outputs.join(' ')}`);
  }

  // Add key features
  if (model.features.vision) parts.push('vision');
  if (model.features.tools.functionCalling) parts.push('function calling tools');
  if (model.features.reasoning.enabled) parts.push('reasoning chain of thought');
  if (model.features.embeddings) parts.push('embeddings');

  // Add tasks if present
  if (model.tasks?.length) {
    parts.push(model.tasks.join(' '));
  }

  // Add detailed description if present
  if (model.details?.description) {
    parts.push(model.details.description);
  }

  return parts.join(' ').slice(0, MAX_TEXT_LENGTH);
}

Embedding Cache

// src/embeddings/cache.ts
import { readFile, writeFile, mkdir } from 'node:fs/promises';
import { join } from 'node:path';
import xxhash from 'xxhash-wasm';

interface CacheEntry {
  hash: string;
  embedding: number[];
  createdAt: string;
}

export class EmbeddingCache {
  private cacheDir: string;
  private h64: ((input: string) => bigint) | null = null;

  constructor(cacheDir: string = '.cache/embeddings') {
    this.cacheDir = cacheDir;
  }

  async initialize(): Promise<void> {
    await mkdir(this.cacheDir, { recursive: true });
    const { h64 } = await xxhash();
    this.h64 = h64;
  }

  private hashText(text: string): string {
    if (!this.h64) throw new Error('Cache not initialized');
    return this.h64(text).toString(16);
  }

  async get(text: string): Promise<Float32Array | null> {
    const hash = this.hashText(text);
    const cachePath = join(this.cacheDir, `${hash}.json`);

    try {
      const data = await readFile(cachePath, 'utf-8');
      const entry: CacheEntry = JSON.parse(data);
      return new Float32Array(entry.embedding);
    } catch {
      return null;
    }
  }

  async set(text: string, embedding: Float32Array): Promise<void> {
    const hash = this.hashText(text);
    const cachePath = join(this.cacheDir, `${hash}.json`);

    const entry: CacheEntry = {
      hash,
      embedding: Array.from(embedding),
      createdAt: new Date().toISOString(),
    };

    await writeFile(cachePath, JSON.stringify(entry));
  }
}

Voy Index Builder

// src/output/voy-builder.ts
import { Voy } from 'voy-search';
import { logger } from '../utils/logger.js';

export interface IndexedModel {
  canonicalSlug: string;
  embedding: Float32Array;
}

export class VoyIndexBuilder {
  private documents: Array<{ id: string; embeddings: Float32Array }> = [];

  add(canonicalSlug: string, embedding: Float32Array): void {
    this.documents.push({
      id: canonicalSlug,
      embeddings: embedding,
    });
  }

  addBatch(models: IndexedModel[]): void {
    for (const model of models) {
      this.add(model.canonicalSlug, model.embedding);
    }
  }

  build(): Voy {
    logger.info(`Building Voy index with ${this.documents.length} documents`);
    return new Voy({ embeddings: this.documents });
  }

  serialize(): Uint8Array {
    const index = this.build();
    const serialized = index.serialize();
    logger.success(`Voy index serialized: ${(serialized.byteLength / 1024).toFixed(1)} KB`);
    return serialized;
  }

  get size(): number {
    return this.documents.length;
  }
}

Incremental Embeddings

// src/embeddings/incremental.ts
interface EmbeddingManifest {
  modelId: string;
  dimensions: number;
  models: Record<string, {
    hash: string;      // Content hash of model data
    embeddedAt: string;
  }>;
}

export class IncrementalEmbedder {
  private manifest: EmbeddingManifest;
  private generator: EmbeddingGenerator;
  private cache: EmbeddingCache;

  async embedModels(
    models: AIModel[],
    previousManifest?: EmbeddingManifest,
  ): Promise<Map<string, Float32Array>> {
    const results = new Map<string, Float32Array>();
    const toEmbed: AIModel[] = [];

    for (const model of models) {
      const text = buildEmbeddingText(model);
      const hash = this.hashContent(text);

      // Check if unchanged
      if (previousManifest?.models[model.canonicalSlug]?.hash === hash) {
        // Try to load from cache
        const cached = await this.cache.get(text);
        if (cached) {
          results.set(model.canonicalSlug, cached);
          continue;
        }
      }

      toEmbed.push(model);
    }

    logger.info(`Embedding ${toEmbed.length} models (${models.length - toEmbed.length} cached)`);

    // Embed only new/changed models
    const newEmbeddings = await this.generator.embedBatch(
      toEmbed.map(m => ({ id: m.canonicalSlug, text: buildEmbeddingText(m) }))
    );

    for (const [id, embedding] of newEmbeddings) {
      results.set(id, embedding);
      await this.cache.set(buildEmbeddingText(models.find(m => m.canonicalSlug === id)!), embedding);
    }

    return results;
  }
}

Code Patterns

Batched Processing

const BATCH_SIZE = 32;

for (let i = 0; i < items.length; i += BATCH_SIZE) {
  const batch = items.slice(i, i + BATCH_SIZE);
  await processBatch(batch);
  onProgress?.(Math.min(i + BATCH_SIZE, items.length), items.length);
}

Content Hash Caching

import xxhash from 'xxhash-wasm';

const { h64 } = await xxhash();
const hash = h64(text).toString(16);
const cached = cache.get(hash);
if (cached) return cached;

Embedding Model

Model Dimensions Size Notes
Xenova/gte-small 384 ~30MB Fast, good quality
Xenova/bge-small-en 384 ~30MB Alternative
Xenova/all-MiniLM-L6-v2 384 ~22MB Lighter weight

Using @huggingface/transformers (replaces @xenova/transformers)

Pipeline Integration

// In main indexer
async function generateEmbeddings(models: AIModel[]): Promise<void> {
  const generator = new EmbeddingGenerator();
  await generator.initialize();

  const embeddingItems = models.map((m) => ({
    id: m.canonicalSlug,
    text: buildEmbeddingText(m),
  }));

  const embeddings = await generator.embedBatch(embeddingItems, {
    onProgress: (done, total) => {
      logger.info(`Embedding: ${done}/${total} models`);
    },
  });

  const voyBuilder = new VoyIndexBuilder();
  for (const model of models) {
    const embedding = embeddings.get(model.canonicalSlug);
    if (embedding) {
      voyBuilder.add(model.canonicalSlug, embedding);
    }
  }

  const serialized = voyBuilder.serialize();
  await writeFile('output/index.voy', serialized);

  generator.dispose();
}

Output Files

File Description Size (est.)
index.voy Serialized HNSW index ~100KB for 500 models
metadata.json Embedding model info, checksums ~1KB

Metadata Structure

interface EmbeddingMetadata {
  embeddingModel: string;        // "Xenova/gte-small"
  embeddingDimensions: number;   // 384
  totalModels: number;
  indexSizeBytes: number;
  generatedAt: string;
}

Implementation Checklist

  • Set up EmbeddingGenerator with transformers.js
  • Create buildEmbeddingText function
  • Implement content hash caching with xxhash
  • Build VoyIndexBuilder for HNSW index
  • Add incremental embedding support
  • Memory-efficient batch processing
  • Progress callbacks for CLI
  • Write index.voy and metadata.json

Full Details

docs/specs/03-model-indexer/impl_03_model_indexer.md (Tasks 9-10)