| name | cloudflare-workers-ai |
| description | Cloudflare Workers AI essentials - text generation, embeddings, image generation, streaming. Trigger: When working with Workers AI, LLMs, text generation, embeddings, image generation. |
| license | Apache-2.0 |
| metadata | [object Object] |
Critical Patterns
Setup Binding
# wrangler.toml
[ai]
binding = "AI"
export interface Env {
AI: Ai
}
Text Generation
import { createWorkersAI } from "workers-ai-provider"
import { generateText } from "ai"
type Env = { AI: Ai }
export default {
async fetch(req: Request, env: Env) {
const workersai = createWorkersAI({ binding: env.AI })
const { text } = await generateText({
model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"),
prompt: "Write a short poem about clouds"
})
return Response.json({ generatedText: text })
}
}
Popular models:
@cf/meta/llama-3.3-70b-instruct-fp8-fast- Llama 3.3 (large, fast)@cf/meta/llama-2-7b-chat-int8- Llama 2 (smaller)@cf/mistral/mistral-7b-instruct-v0.1- Mistral 7B
Streaming Text
import { streamText } from "ai"
import { Hono } from "hono"
const app = new Hono<{ Bindings: Env }>()
app.post("/chat", async (c) => {
const { messages } = await c.req.json()
const workersai = createWorkersAI({ binding: c.env.AI })
const result = await streamText({
model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"),
messages
})
return result.toTextStreamResponse({
headers: {
"Content-Type": "text/x-unknown",
"content-encoding": "identity",
"transfer-encoding": "chunked"
}
})
})
export default { fetch: app.fetch }
Chat with System Prompt
const systemPrompt = `You are a helpful financial advisor.
Provide accurate, concise advice about personal finance.`
const result = await streamText({
model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"),
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userMessage }
],
maxTokens: 1000,
temperature: 0.7
})
Text Embeddings
import { embed } from "ai"
const workersai = createWorkersAI({ binding: env.AI })
const { embedding } = await embed({
model: workersai.textEmbedding("@cf/baai/bge-base-en-v1.5"),
value: "Search engines use embeddings for semantic understanding"
})
// embedding is array of numbers [0.123, -0.456, ...]
console.log(embedding.length) // 768 dimensions
Popular embedding models:
@cf/baai/bge-base-en-v1.5- 768 dimensions (recommended)@cf/baai/bge-small-en-v1.5- 384 dimensions (faster)@cf/baai/bge-large-en-v1.5- 1024 dimensions (more accurate)
Batch Embeddings
import { embedMany } from "ai"
const { embeddings } = await embedMany({
model: workersai.textEmbedding("@cf/baai/bge-base-en-v1.5"),
values: [
"First document",
"Second document",
"Third document"
]
})
// embeddings is array of arrays
console.log(embeddings.length) // 3
console.log(embeddings[0].length) // 768
Image Generation
import { generateImage } from "ai"
const workersai = createWorkersAI({ binding: env.AI })
const { image } = await generateImage({
model: workersai.image("@cf/black-forest-labs/flux-1-schnell"),
prompt: "A serene mountain landscape at sunset",
size: "1024x1024"
})
return new Response(image, {
headers: { "Content-Type": "image/png" }
})
Popular image models:
@cf/black-forest-labs/flux-1-schnell- FLUX.1 (fast, recommended)@cf/stabilityai/stable-diffusion-xl-base-1.0- SDXL@cf/bytedance/stable-diffusion-xl-lightning- SDXL Lightning
Sizes: "1024x1024", "1024x768", "768x1024"
Common Patterns
Complete Chat API
import { Hono } from "hono"
import { createWorkersAI } from "workers-ai-provider"
import { streamText } from "ai"
const app = new Hono<{ Bindings: Env }>()
app.post("/api/chat", async (c) => {
const { messages } = await c.req.json()
if (!messages || !Array.isArray(messages)) {
return c.json({ error: "Invalid messages" }, 400)
}
const workersai = createWorkersAI({ binding: c.env.AI })
try {
const result = await streamText({
model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"),
messages,
maxTokens: 1000,
temperature: 0.7
})
return result.toTextStreamResponse({
headers: {
"Content-Type": "text/x-unknown",
"content-encoding": "identity",
"transfer-encoding": "chunked"
}
})
} catch (error) {
console.error("AI error:", error)
return c.json({ error: "Failed" }, 500)
}
})
export default { fetch: app.fetch }
Semantic Search
app.post("/search", async (c) => {
const { query } = await c.req.json()
const workersai = createWorkersAI({ binding: c.env.AI })
// 1. Generate embedding for query
const { embedding: queryEmbedding } = await embed({
model: workersai.textEmbedding("@cf/baai/bge-base-en-v1.5"),
value: query
})
// 2. Search in vector database (e.g., Vectorize)
const results = await c.env.VECTORIZE.query(queryEmbedding, {
topK: 10
})
return c.json({ results })
})
Image Generation with Upload to R2
app.post("/generate-image", async (c) => {
const { prompt } = await c.req.json()
if (!prompt || prompt.length < 3 || prompt.length > 1000) {
return c.json({ error: "Invalid prompt" }, 400)
}
const workersai = createWorkersAI({ binding: c.env.AI })
try {
const { image } = await generateImage({
model: workersai.image("@cf/black-forest-labs/flux-1-schnell"),
prompt,
size: "1024x1024"
})
// Upload to R2
const key = `images/${crypto.randomUUID()}.png`
await c.env.BUCKET.put(key, image, {
httpMetadata: { contentType: "image/png" }
})
return c.json({ success: true, key })
} catch (error) {
return c.json({ error: "Generation failed" }, 500)
}
})
Performance Tips
// ✅ Stream for long responses
const result = await streamText({...})
return result.toTextStreamResponse()
// ❌ Wait for entire response (slow)
const { text } = await generateText({...})
return Response.json({ text })
// ✅ Choose right model size
// Small tasks -> smaller models
workersai("@cf/meta/llama-2-7b-chat-int8")
// Complex reasoning -> larger models
workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast")
// ✅ Cache embeddings
const cached = await cache.get(text)
if (cached) return JSON.parse(cached)
const { embedding } = await embed({...})
await cache.put(text, JSON.stringify(embedding))
// ✅ Batch embeddings
const { embeddings } = await embedMany({
model: workersai.textEmbedding("@cf/baai/bge-base-en-v1.5"),
values: documents
})
// ❌ One at a time (slow)
for (const doc of documents) {
await embed({ value: doc })
}
Model Configuration
const result = await generateText({
model: workersai("@cf/meta/llama-3.3-70b-instruct-fp8-fast"),
prompt: "Your prompt",
maxTokens: 500, // Max tokens to generate
temperature: 0.7, // Randomness (0-1)
topP: 0.9, // Nucleus sampling
frequencyPenalty: 0.5, // Reduce repetition
presencePenalty: 0.5, // Encourage diversity
stopSequences: ["\n\n", "END"]
})
Common Mistakes
❌ Not handling errors
const { text } = await generateText({...}) // May throw!
✅ Always use try-catch
try {
const { text } = await generateText({...})
} catch (error) {
return c.json({ error: "Failed" }, 500)
}
❌ Missing streaming headers
return result.toTextStreamResponse()
✅ Include required headers
return result.toTextStreamResponse({
headers: {
"Content-Type": "text/x-unknown",
"content-encoding": "identity",
"transfer-encoding": "chunked"
}
})
Commands
# List models
wrangler ai models list
# Test model
wrangler ai run @cf/meta/llama-3.3-70b-instruct-fp8-fast --prompt "Hello"
# Deploy
wrangler deploy