| name | embedding-generation |
| description | Implements vector embedding generation for the Model Indexer. Covers transformers.js setup, batch processing, content hashing for cache, and Voy HNSW index building. |
Embedding Generation Skill
This skill covers generating vector embeddings for semantic search and building the Voy HNSW index.
Architecture
Models ─▶ Text Builder ─▶ Embeddings ─▶ Voy Index ─▶ index.voy
│ │
│ └──▶ Embedding Cache (xxhash)
│
└──▶ name + description + features + modalities
Embedding Generator
// src/embeddings/generator.ts
import { pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
import pLimit from 'p-limit';
import { logger } from '../utils/logger.js';
const DEFAULT_MODEL = 'Xenova/gte-small';
const DEFAULT_BATCH_SIZE = 32;
const DEFAULT_CONCURRENCY = 8;
export class EmbeddingGenerator {
private embedder: FeatureExtractionPipeline | null = null;
private modelId: string;
private concurrency: number;
constructor(options?: { modelId?: string; concurrency?: number }) {
this.modelId = options?.modelId ?? DEFAULT_MODEL;
this.concurrency = options?.concurrency ?? DEFAULT_CONCURRENCY;
}
async initialize(): Promise<void> {
logger.info(`Loading embedding model: ${this.modelId}`);
this.embedder = await pipeline('feature-extraction', this.modelId, {
dtype: 'fp32',
});
logger.success(`Embedding model loaded`);
}
async embed(text: string): Promise<Float32Array> {
if (!this.embedder) throw new Error('Generator not initialized');
const output = await this.embedder(text, {
pooling: 'mean',
normalize: true,
});
return output.data as Float32Array;
}
async embedBatch(
items: Array<{ id: string; text: string }>,
options?: {
batchSize?: number;
onProgress?: (completed: number, total: number) => void;
},
): Promise<Map<string, Float32Array>> {
const batchSize = options?.batchSize ?? DEFAULT_BATCH_SIZE;
const limit = pLimit(this.concurrency);
const results = new Map<string, Float32Array>();
// Process in batches to manage memory
for (let i = 0; i < items.length; i += batchSize) {
const batch = items.slice(i, i + batchSize);
const batchResults = await Promise.all(
batch.map((item) =>
limit(async () => {
try {
const embedding = await this.embed(item.text);
return { id: item.id, embedding, error: null };
} catch (error) {
return { id: item.id, embedding: null, error };
}
}),
),
);
for (const result of batchResults) {
if (result.embedding) {
results.set(result.id, result.embedding);
} else {
logger.warn(`Failed to embed ${result.id}: ${result.error}`);
}
}
options?.onProgress?.(Math.min(i + batchSize, items.length), items.length);
}
return results;
}
dispose(): void {
this.embedder = null;
}
}
Text Builder
// src/embeddings/text-builder.ts
import type { AIModel } from '@ai-model-registry/spec';
const MAX_TEXT_LENGTH = 512;
/**
* Build embedding text from model metadata.
* Prioritizes searchable fields: name, description, provider, modalities.
*/
export function buildEmbeddingText(model: AIModel): string {
const parts: string[] = [
model.name,
model.description,
model.provider,
];
// Add modalities
if (model.modalities.inputs.length > 0) {
parts.push(`inputs: ${model.modalities.inputs.join(' ')}`);
}
if (model.modalities.outputs.length > 0) {
parts.push(`outputs: ${model.modalities.outputs.join(' ')}`);
}
// Add key features
if (model.features.vision) parts.push('vision');
if (model.features.tools.functionCalling) parts.push('function calling tools');
if (model.features.reasoning.enabled) parts.push('reasoning chain of thought');
if (model.features.embeddings) parts.push('embeddings');
// Add tasks if present
if (model.tasks?.length) {
parts.push(model.tasks.join(' '));
}
// Add detailed description if present
if (model.details?.description) {
parts.push(model.details.description);
}
return parts.join(' ').slice(0, MAX_TEXT_LENGTH);
}
Embedding Cache
// src/embeddings/cache.ts
import { readFile, writeFile, mkdir } from 'node:fs/promises';
import { join } from 'node:path';
import xxhash from 'xxhash-wasm';
interface CacheEntry {
hash: string;
embedding: number[];
createdAt: string;
}
export class EmbeddingCache {
private cacheDir: string;
private h64: ((input: string) => bigint) | null = null;
constructor(cacheDir: string = '.cache/embeddings') {
this.cacheDir = cacheDir;
}
async initialize(): Promise<void> {
await mkdir(this.cacheDir, { recursive: true });
const { h64 } = await xxhash();
this.h64 = h64;
}
private hashText(text: string): string {
if (!this.h64) throw new Error('Cache not initialized');
return this.h64(text).toString(16);
}
async get(text: string): Promise<Float32Array | null> {
const hash = this.hashText(text);
const cachePath = join(this.cacheDir, `${hash}.json`);
try {
const data = await readFile(cachePath, 'utf-8');
const entry: CacheEntry = JSON.parse(data);
return new Float32Array(entry.embedding);
} catch {
return null;
}
}
async set(text: string, embedding: Float32Array): Promise<void> {
const hash = this.hashText(text);
const cachePath = join(this.cacheDir, `${hash}.json`);
const entry: CacheEntry = {
hash,
embedding: Array.from(embedding),
createdAt: new Date().toISOString(),
};
await writeFile(cachePath, JSON.stringify(entry));
}
}
Voy Index Builder
// src/output/voy-builder.ts
import { Voy } from 'voy-search';
import { logger } from '../utils/logger.js';
export interface IndexedModel {
canonicalSlug: string;
embedding: Float32Array;
}
export class VoyIndexBuilder {
private documents: Array<{ id: string; embeddings: Float32Array }> = [];
add(canonicalSlug: string, embedding: Float32Array): void {
this.documents.push({
id: canonicalSlug,
embeddings: embedding,
});
}
addBatch(models: IndexedModel[]): void {
for (const model of models) {
this.add(model.canonicalSlug, model.embedding);
}
}
build(): Voy {
logger.info(`Building Voy index with ${this.documents.length} documents`);
return new Voy({ embeddings: this.documents });
}
serialize(): Uint8Array {
const index = this.build();
const serialized = index.serialize();
logger.success(`Voy index serialized: ${(serialized.byteLength / 1024).toFixed(1)} KB`);
return serialized;
}
get size(): number {
return this.documents.length;
}
}
Incremental Embeddings
// src/embeddings/incremental.ts
interface EmbeddingManifest {
modelId: string;
dimensions: number;
models: Record<string, {
hash: string; // Content hash of model data
embeddedAt: string;
}>;
}
export class IncrementalEmbedder {
private manifest: EmbeddingManifest;
private generator: EmbeddingGenerator;
private cache: EmbeddingCache;
async embedModels(
models: AIModel[],
previousManifest?: EmbeddingManifest,
): Promise<Map<string, Float32Array>> {
const results = new Map<string, Float32Array>();
const toEmbed: AIModel[] = [];
for (const model of models) {
const text = buildEmbeddingText(model);
const hash = this.hashContent(text);
// Check if unchanged
if (previousManifest?.models[model.canonicalSlug]?.hash === hash) {
// Try to load from cache
const cached = await this.cache.get(text);
if (cached) {
results.set(model.canonicalSlug, cached);
continue;
}
}
toEmbed.push(model);
}
logger.info(`Embedding ${toEmbed.length} models (${models.length - toEmbed.length} cached)`);
// Embed only new/changed models
const newEmbeddings = await this.generator.embedBatch(
toEmbed.map(m => ({ id: m.canonicalSlug, text: buildEmbeddingText(m) }))
);
for (const [id, embedding] of newEmbeddings) {
results.set(id, embedding);
await this.cache.set(buildEmbeddingText(models.find(m => m.canonicalSlug === id)!), embedding);
}
return results;
}
}
Code Patterns
Batched Processing
const BATCH_SIZE = 32;
for (let i = 0; i < items.length; i += BATCH_SIZE) {
const batch = items.slice(i, i + BATCH_SIZE);
await processBatch(batch);
onProgress?.(Math.min(i + BATCH_SIZE, items.length), items.length);
}
Content Hash Caching
import xxhash from 'xxhash-wasm';
const { h64 } = await xxhash();
const hash = h64(text).toString(16);
const cached = cache.get(hash);
if (cached) return cached;
Embedding Model
| Model | Dimensions | Size | Notes |
|---|---|---|---|
| Xenova/gte-small | 384 | ~30MB | Fast, good quality |
| Xenova/bge-small-en | 384 | ~30MB | Alternative |
| Xenova/all-MiniLM-L6-v2 | 384 | ~22MB | Lighter weight |
Using @huggingface/transformers (replaces @xenova/transformers)
Pipeline Integration
// In main indexer
async function generateEmbeddings(models: AIModel[]): Promise<void> {
const generator = new EmbeddingGenerator();
await generator.initialize();
const embeddingItems = models.map((m) => ({
id: m.canonicalSlug,
text: buildEmbeddingText(m),
}));
const embeddings = await generator.embedBatch(embeddingItems, {
onProgress: (done, total) => {
logger.info(`Embedding: ${done}/${total} models`);
},
});
const voyBuilder = new VoyIndexBuilder();
for (const model of models) {
const embedding = embeddings.get(model.canonicalSlug);
if (embedding) {
voyBuilder.add(model.canonicalSlug, embedding);
}
}
const serialized = voyBuilder.serialize();
await writeFile('output/index.voy', serialized);
generator.dispose();
}
Output Files
| File | Description | Size (est.) |
|---|---|---|
index.voy |
Serialized HNSW index | ~100KB for 500 models |
metadata.json |
Embedding model info, checksums | ~1KB |
Metadata Structure
interface EmbeddingMetadata {
embeddingModel: string; // "Xenova/gte-small"
embeddingDimensions: number; // 384
totalModels: number;
indexSizeBytes: number;
generatedAt: string;
}
Implementation Checklist
- Set up EmbeddingGenerator with transformers.js
- Create buildEmbeddingText function
- Implement content hash caching with xxhash
- Build VoyIndexBuilder for HNSW index
- Add incremental embedding support
- Memory-efficient batch processing
- Progress callbacks for CLI
- Write index.voy and metadata.json
Full Details
→ docs/specs/03-model-indexer/impl_03_model_indexer.md (Tasks 9-10)