name	embedding-generation
description	Implements vector embedding generation for the Model Indexer. Covers transformers.js setup, batch processing, content hashing for cache, and Voy HNSW index building.

Embedding Generation Skill

This skill covers generating vector embeddings for semantic search and building the Voy HNSW index.

Architecture

Models ─▶ Text Builder ─▶ Embeddings ─▶ Voy Index ─▶ index.voy
              │                │
              │                └──▶ Embedding Cache (xxhash)
              │
              └──▶ name + description + features + modalities

Embedding Generator

// src/embeddings/generator.ts
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import pLimit from 'p-limit';
import { logger } from '../utils/logger.js';

const DEFAULT_MODEL = 'Xenova/gte-small';
const DEFAULT_BATCH_SIZE = 32;
const DEFAULT_CONCURRENCY = 8;

export class EmbeddingGenerator {
  private embedder: FeatureExtractionPipeline | null = null;
  private modelId: string;
  private concurrency: number;

  constructor(options?: { modelId?: string; concurrency?: number }) {
    this.modelId = options?.modelId ?? DEFAULT_MODEL;
    this.concurrency = options?.concurrency ?? DEFAULT_CONCURRENCY;
  }

  async initialize(): Promise<void> {
    logger.info(`Loading embedding model: ${this.modelId}`);
    this.embedder = await pipeline('feature-extraction', this.modelId, {
      dtype: 'fp32',
    });
    logger.success(`Embedding model loaded`);
  }

  async embed(text: string): Promise<Float32Array> {
    if (!this.embedder) throw new Error('Generator not initialized');

    const output = await this.embedder(text, {
      pooling: 'mean',
      normalize: true,
    });

    return output.data as Float32Array;
  }

  async embedBatch(
    items: Array<{ id: string; text: string }>,
    options?: {
      batchSize?: number;
      onProgress?: (completed: number, total: number) => void;
    },
  ): Promise<Map<string, Float32Array>> {
    const batchSize = options?.batchSize ?? DEFAULT_BATCH_SIZE;
    const limit = pLimit(this.concurrency);
    const results = new Map<string, Float32Array>();

    // Process in batches to manage memory
    for (let i = 0; i < items.length; i += batchSize) {
      const batch = items.slice(i, i + batchSize);

      const batchResults = await Promise.all(
        batch.map((item) =>
          limit(async () => {
            try {
              const embedding = await this.embed(item.text);
              return { id: item.id, embedding, error: null };
            } catch (error) {
              return { id: item.id, embedding: null, error };
            }
          }),
        ),
      );

      for (const result of batchResults) {
        if (result.embedding) {
          results.set(result.id, result.embedding);
        } else {
          logger.warn(`Failed to embed ${result.id}: ${result.error}`);
        }
      }

      options?.onProgress?.(Math.min(i + batchSize, items.length), items.length);
    }

    return results;
  }

  dispose(): void {
    this.embedder = null;
  }
}

Text Builder

// src/embeddings/text-builder.ts
import type { AIModel } from '@ai-model-registry/spec';

const MAX_TEXT_LENGTH = 512;

/**
 * Build embedding text from model metadata.
 * Prioritizes searchable fields: name, description, provider, modalities.
 */
export function buildEmbeddingText(model: AIModel): string {
  const parts: string[] = [
    model.name,
    model.description,
    model.provider,
  ];

  // Add modalities
  if (model.modalities.inputs.length > 0) {
    parts.push(`inputs: ${model.modalities.inputs.join(' ')}`);
  }
  if (model.modalities.outputs.length > 0) {
    parts.push(`outputs: ${model.modalities.outputs.join(' ')}`);
  }

  // Add key features
  if (model.features.vision) parts.push('vision');
  if (model.features.tools.functionCalling) parts.push('function calling tools');
  if (model.features.reasoning.enabled) parts.push('reasoning chain of thought');
  if (model.features.embeddings) parts.push('embeddings');

  // Add tasks if present
  if (model.tasks?.length) {
    parts.push(model.tasks.join(' '));
  }

  // Add detailed description if present
  if (model.details?.description) {
    parts.push(model.details.description);
  }

  return parts.join(' ').slice(0, MAX_TEXT_LENGTH);
}

Embedding Cache

// src/embeddings/cache.ts
import { readFile, writeFile, mkdir } from 'node:fs/promises';
import { join } from 'node:path';
import xxhash from 'xxhash-wasm';

interface CacheEntry {
  hash: string;
  embedding: number[];
  createdAt: string;
}

export class EmbeddingCache {
  private cacheDir: string;
  private h64: ((input: string) => bigint) | null = null;

  constructor(cacheDir: string = '.cache/embeddings') {
    this.cacheDir = cacheDir;
  }

  async initialize(): Promise<void> {
    await mkdir(this.cacheDir, { recursive: true });
    const { h64 } = await xxhash();
    this.h64 = h64;
  }

  private hashText(text: string): string {
    if (!this.h64) throw new Error('Cache not initialized');
    return this.h64(text).toString(16);
  }

  async get(text: string): Promise<Float32Array | null> {
    const hash = this.hashText(text);
    const cachePath = join(this.cacheDir, `${hash}.json`);

    try {
      const data = await readFile(cachePath, 'utf-8');
      const entry: CacheEntry = JSON.parse(data);
      return new Float32Array(entry.embedding);
    } catch {
      return null;
    }
  }

  async set(text: string, embedding: Float32Array): Promise<void> {
    const hash = this.hashText(text);
    const cachePath = join(this.cacheDir, `${hash}.json`);

    const entry: CacheEntry = {
      hash,
      embedding: Array.from(embedding),
      createdAt: new Date().toISOString(),
    };

    await writeFile(cachePath, JSON.stringify(entry));
  }
}

Voy Index Builder

// src/output/voy-builder.ts
import { Voy } from 'voy-search';
import { logger } from '../utils/logger.js';

export interface IndexedModel {
  canonicalSlug: string;
  embedding: Float32Array;
}

export class VoyIndexBuilder {
  private documents: Array<{ id: string; embeddings: Float32Array }> = [];

  add(canonicalSlug: string, embedding: Float32Array): void {
    this.documents.push({
      id: canonicalSlug,
      embeddings: embedding,
    });
  }

  addBatch(models: IndexedModel[]): void {
    for (const model of models) {
      this.add(model.canonicalSlug, model.embedding);
    }
  }

  build(): Voy {
    logger.info(`Building Voy index with ${this.documents.length} documents`);
    return new Voy({ embeddings: this.documents });
  }

  serialize(): Uint8Array {
    const index = this.build();
    const serialized = index.serialize();
    logger.success(`Voy index serialized: ${(serialized.byteLength / 1024).toFixed(1)} KB`);
    return serialized;
  }

  get size(): number {
    return this.documents.length;
  }
}

Incremental Embeddings

// src/embeddings/incremental.ts
interface EmbeddingManifest {
  modelId: string;
  dimensions: number;
  models: Record<string, {
    hash: string;      // Content hash of model data
    embeddedAt: string;
  }>;
}

export class IncrementalEmbedder {
  private manifest: EmbeddingManifest;
  private generator: EmbeddingGenerator;
  private cache: EmbeddingCache;

  async embedModels(
    models: AIModel[],
    previousManifest?: EmbeddingManifest,
  ): Promise<Map<string, Float32Array>> {
    const results = new Map<string, Float32Array>();
    const toEmbed: AIModel[] = [];

    for (const model of models) {
      const text = buildEmbeddingText(model);
      const hash = this.hashContent(text);

      // Check if unchanged
      if (previousManifest?.models[model.canonicalSlug]?.hash === hash) {
        // Try to load from cache
        const cached = await this.cache.get(text);
        if (cached) {
          results.set(model.canonicalSlug, cached);
          continue;
        }
      }

      toEmbed.push(model);
    }

    logger.info(`Embedding ${toEmbed.length} models (${models.length - toEmbed.length} cached)`);

    // Embed only new/changed models
    const newEmbeddings = await this.generator.embedBatch(
      toEmbed.map(m => ({ id: m.canonicalSlug, text: buildEmbeddingText(m) }))
    );

    for (const [id, embedding] of newEmbeddings) {
      results.set(id, embedding);
      await this.cache.set(buildEmbeddingText(models.find(m => m.canonicalSlug === id)!), embedding);
    }

    return results;
  }
}

Code Patterns

Batched Processing

const BATCH_SIZE = 32;

for (let i = 0; i < items.length; i += BATCH_SIZE) {
  const batch = items.slice(i, i + BATCH_SIZE);
  await processBatch(batch);
  onProgress?.(Math.min(i + BATCH_SIZE, items.length), items.length);
}

Content Hash Caching

import xxhash from 'xxhash-wasm';

const { h64 } = await xxhash();
const hash = h64(text).toString(16);
const cached = cache.get(hash);
if (cached) return cached;

Embedding Model

Model	Dimensions	Size	Notes
Xenova/gte-small	384	~30MB	Fast, good quality
Xenova/bge-small-en	384	~30MB	Alternative
Xenova/all-MiniLM-L6-v2	384	~22MB	Lighter weight

Using @huggingface/transformers (replaces @xenova/transformers)

Pipeline Integration

// In main indexer
async function generateEmbeddings(models: AIModel[]): Promise<void> {
  const generator = new EmbeddingGenerator();
  await generator.initialize();

  const embeddingItems = models.map((m) => ({
    id: m.canonicalSlug,
    text: buildEmbeddingText(m),
  }));

  const embeddings = await generator.embedBatch(embeddingItems, {
    onProgress: (done, total) => {
      logger.info(`Embedding: ${done}/${total} models`);
    },
  });

  const voyBuilder = new VoyIndexBuilder();
  for (const model of models) {
    const embedding = embeddings.get(model.canonicalSlug);
    if (embedding) {
      voyBuilder.add(model.canonicalSlug, embedding);
    }
  }

  const serialized = voyBuilder.serialize();
  await writeFile('output/index.voy', serialized);

  generator.dispose();
}

Output Files

File	Description	Size (est.)
`index.voy`	Serialized HNSW index	~100KB for 500 models
`metadata.json`	Embedding model info, checksums	~1KB

Metadata Structure

interface EmbeddingMetadata {
  embeddingModel: string;        // "Xenova/gte-small"
  embeddingDimensions: number;   // 384
  totalModels: number;
  indexSizeBytes: number;
  generatedAt: string;
}

Implementation Checklist

Set up EmbeddingGenerator with transformers.js
Create buildEmbeddingText function
Implement content hash caching with xxhash
Build VoyIndexBuilder for HNSW index
Add incremental embedding support
Memory-efficient batch processing
Progress callbacks for CLI
Write index.voy and metadata.json

Full Details

→ docs/specs/03-model-indexer/impl_03_model_indexer.md (Tasks 9-10)

embedding-generation

Install Skill

SKILL.md