name: serialization description: Data serialization formats and techniques. Use when implementing data exchange, API payloads, storage formats, or cross-language communication. Keywords: serialization, JSON, Protocol Buffers, protobuf, MessagePack, schema evolution, versioning, backward compatibility, forward compatibility, binary formats.
Serialization
Overview
Serialization is the process of converting data structures into a format that can be stored or transmitted and later reconstructed. This skill covers JSON best practices, binary formats like Protocol Buffers and MessagePack, schema evolution strategies, and performance considerations.
Key Concepts
JSON Serialization Best Practices
Consistent Naming Conventions:
// camelCase for JavaScript/TypeScript APIs
interface UserResponse {
userId: string;
firstName: string;
lastName: string;
emailAddress: string;
createdAt: string;
}
// snake_case for Python/Ruby APIs
interface UserResponseSnake {
user_id: string;
first_name: string;
last_name: string;
email_address: string;
created_at: string;
}
// Case conversion utilities
function toSnakeCase(str: string): string {
return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
}
function toCamelCase(str: string): string {
return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
}
function convertKeys(obj: any, converter: (key: string) => string): any {
if (Array.isArray(obj)) {
return obj.map((item) => convertKeys(item, converter));
}
if (obj !== null && typeof obj === "object") {
return Object.fromEntries(
Object.entries(obj).map(([key, value]) => [
converter(key),
convertKeys(value, converter),
]),
);
}
return obj;
}
Date/Time Handling:
// Always use ISO 8601 format
const dateFormats = {
// Preferred: Full ISO 8601 with timezone
iso8601: "2024-12-19T14:30:00.000Z",
// Date only
dateOnly: "2024-12-19",
// With timezone offset
withOffset: "2024-12-19T14:30:00+00:00",
// Unix timestamp (seconds) - use for precise timing
unixSeconds: 1703000000,
// Unix timestamp (milliseconds)
unixMillis: 1703000000000,
};
class DateSerializer {
static toJSON(date: Date): string {
return date.toISOString();
}
static fromJSON(value: string | number): Date {
if (typeof value === "number") {
// Handle both seconds and milliseconds
return new Date(value < 1e12 ? value * 1000 : value);
}
return new Date(value);
}
static toUnix(date: Date): number {
return Math.floor(date.getTime() / 1000);
}
}
Null vs Undefined vs Omission:
interface ApiResponse {
// Required field - always present
id: string;
// Optional field - may be omitted
nickname?: string;
// Nullable field - present but may be null
deletedAt: string | null;
}
// Serialization strategies
const serializationStrategies = {
// Strategy 1: Omit undefined, keep null
omitUndefined: (obj: any) => JSON.parse(JSON.stringify(obj)),
// Strategy 2: Convert undefined to null
undefinedToNull: (obj: any) =>
JSON.parse(JSON.stringify(obj, (_, v) => (v === undefined ? null : v))),
// Strategy 3: Explicit handling
explicit: (obj: any) => {
const result: any = {};
for (const [key, value] of Object.entries(obj)) {
if (value !== undefined) {
result[key] = value;
}
}
return result;
},
};
Custom JSON Serialization:
class CustomSerializer {
private serializers: Map<string, (value: any) => any> = new Map();
private deserializers: Map<string, (value: any) => any> = new Map();
registerType<T>(
typeName: string,
serialize: (value: T) => any,
deserialize: (value: any) => T,
): void {
this.serializers.set(typeName, serialize);
this.deserializers.set(typeName, deserialize);
}
serialize(value: any): string {
return JSON.stringify(value, (key, val) => {
if (val instanceof Date) {
return { __type: "Date", value: val.toISOString() };
}
if (val instanceof Map) {
return { __type: "Map", value: Array.from(val.entries()) };
}
if (val instanceof Set) {
return { __type: "Set", value: Array.from(val) };
}
if (val instanceof BigInt) {
return { __type: "BigInt", value: val.toString() };
}
return val;
});
}
deserialize<T>(json: string): T {
return JSON.parse(json, (key, val) => {
if (val && typeof val === "object" && "__type" in val) {
switch (val.__type) {
case "Date":
return new Date(val.value);
case "Map":
return new Map(val.value);
case "Set":
return new Set(val.value);
case "BigInt":
return BigInt(val.value);
}
}
return val;
});
}
}
// Usage
const serializer = new CustomSerializer();
const data = {
id: 1,
created: new Date(),
tags: new Set(["a", "b"]),
metadata: new Map([["key", "value"]]),
};
const json = serializer.serialize(data);
const restored = serializer.deserialize(json);
Protocol Buffers (Protobuf)
Schema Definition (.proto):
syntax = "proto3";
package myapp;
// Enum definition
enum OrderStatus {
ORDER_STATUS_UNSPECIFIED = 0;
ORDER_STATUS_PENDING = 1;
ORDER_STATUS_CONFIRMED = 2;
ORDER_STATUS_SHIPPED = 3;
ORDER_STATUS_DELIVERED = 4;
ORDER_STATUS_CANCELLED = 5;
}
// Message definitions
message User {
string id = 1;
string email = 2;
string name = 3;
optional string phone = 4;
repeated string roles = 5;
map<string, string> metadata = 6;
google.protobuf.Timestamp created_at = 7;
}
message Address {
string street = 1;
string city = 2;
string state = 3;
string postal_code = 4;
string country = 5;
}
message Order {
string id = 1;
string user_id = 2;
OrderStatus status = 3;
repeated OrderItem items = 4;
Address shipping_address = 5;
int64 total_cents = 6;
string currency = 7;
google.protobuf.Timestamp created_at = 8;
google.protobuf.Timestamp updated_at = 9;
}
message OrderItem {
string product_id = 1;
string name = 2;
int32 quantity = 3;
int64 price_cents = 4;
}
// Service definition (for gRPC)
service OrderService {
rpc CreateOrder(CreateOrderRequest) returns (Order);
rpc GetOrder(GetOrderRequest) returns (Order);
rpc ListOrders(ListOrdersRequest) returns (ListOrdersResponse);
rpc UpdateOrderStatus(UpdateOrderStatusRequest) returns (Order);
}
message CreateOrderRequest {
string user_id = 1;
repeated OrderItem items = 2;
Address shipping_address = 3;
}
message GetOrderRequest {
string order_id = 1;
}
message ListOrdersRequest {
string user_id = 1;
int32 page_size = 2;
string page_token = 3;
}
message ListOrdersResponse {
repeated Order orders = 1;
string next_page_token = 2;
}
message UpdateOrderStatusRequest {
string order_id = 1;
OrderStatus status = 2;
}
TypeScript Usage with protobufjs:
import * as protobuf from "protobufjs";
class ProtobufSerializer {
private root: protobuf.Root;
async load(protoPath: string): Promise<void> {
this.root = await protobuf.load(protoPath);
}
encode<T>(typeName: string, payload: T): Uint8Array {
const MessageType = this.root.lookupType(typeName);
const errMsg = MessageType.verify(payload);
if (errMsg) throw new Error(errMsg);
const message = MessageType.create(payload);
return MessageType.encode(message).finish();
}
decode<T>(typeName: string, buffer: Uint8Array): T {
const MessageType = this.root.lookupType(typeName);
const message = MessageType.decode(buffer);
return MessageType.toObject(message, {
longs: String,
enums: String,
defaults: true,
}) as T;
}
}
// Usage
const serializer = new ProtobufSerializer();
await serializer.load("./schema.proto");
const order = {
id: "ord_123",
userId: "usr_456",
status: "ORDER_STATUS_PENDING",
items: [
{ productId: "prod_789", name: "Widget", quantity: 2, priceCents: 1999 },
],
totalCents: 3998,
currency: "USD",
};
const buffer = serializer.encode("myapp.Order", order);
const decoded = serializer.decode<typeof order>("myapp.Order", buffer);
Python Usage:
from google.protobuf import json_format
import myapp_pb2
# Create message
order = myapp_pb2.Order(
id='ord_123',
user_id='usr_456',
status=myapp_pb2.ORDER_STATUS_PENDING,
total_cents=3998,
currency='USD'
)
# Add repeated field
item = order.items.add()
item.product_id = 'prod_789'
item.name = 'Widget'
item.quantity = 2
item.price_cents = 1999
# Serialize
binary_data = order.SerializeToString()
# Deserialize
parsed_order = myapp_pb2.Order()
parsed_order.ParseFromString(binary_data)
# Convert to/from JSON
json_str = json_format.MessageToJson(order)
from_json = json_format.Parse(json_str, myapp_pb2.Order())
MessagePack
Basic Usage:
import * as msgpack from "@msgpack/msgpack";
// Simple encode/decode
const data = {
name: "Alice",
age: 30,
tags: ["developer", "typescript"],
active: true,
metadata: { key: "value" },
};
const encoded = msgpack.encode(data);
const decoded = msgpack.decode(encoded);
// With options
const encoder = new msgpack.Encoder({
extensionCodec: createCustomCodec(),
ignoreUndefined: true,
});
const decoder = new msgpack.Decoder({
extensionCodec: createCustomCodec(),
});
Custom Extension Types:
import { ExtensionCodec } from "@msgpack/msgpack";
function createCustomCodec(): ExtensionCodec {
const codec = new ExtensionCodec();
// Date extension (type 0)
codec.register({
type: 0,
encode: (value: unknown): Uint8Array | null => {
if (value instanceof Date) {
const ms = value.getTime();
const buffer = new ArrayBuffer(8);
new DataView(buffer).setBigInt64(0, BigInt(ms));
return new Uint8Array(buffer);
}
return null;
},
decode: (data: Uint8Array): Date => {
const ms = new DataView(data.buffer).getBigInt64(0);
return new Date(Number(ms));
},
});
// BigInt extension (type 1)
codec.register({
type: 1,
encode: (value: unknown): Uint8Array | null => {
if (typeof value === "bigint") {
return new TextEncoder().encode(value.toString());
}
return null;
},
decode: (data: Uint8Array): bigint => {
return BigInt(new TextDecoder().decode(data));
},
});
return codec;
}
Streaming Encoder/Decoder:
import { Encoder, Decoder, decodeMultiStream } from "@msgpack/msgpack";
// Encode multiple messages to a stream
async function encodeStream(
messages: any[],
stream: WritableStream<Uint8Array>,
): Promise<void> {
const encoder = new Encoder();
const writer = stream.getWriter();
for (const message of messages) {
const encoded = encoder.encode(message);
await writer.write(encoded);
}
await writer.close();
}
// Decode from a stream
async function* decodeStream<T>(
stream: ReadableStream<Uint8Array>,
): AsyncIterable<T> {
for await (const message of decodeMultiStream(stream)) {
yield message as T;
}
}
Schema Evolution and Versioning
Field Numbering Strategy (Protobuf):
message User {
// Core fields: 1-15 (1-byte tag, most efficient)
string id = 1;
string email = 2;
string name = 3;
// Common fields: 16-100
optional string phone = 16;
optional string avatar_url = 17;
// Reserved for future use: 101-200
reserved 101 to 200;
// Extension fields: 201+
map<string, string> metadata = 201;
// Deprecated fields (never reuse numbers!)
reserved 50, 51;
reserved "old_field", "legacy_field";
}
JSON Schema Versioning:
interface SchemaVersion {
version: number;
schema: object;
migrate?: (data: any, fromVersion: number) => any;
}
class VersionedSerializer {
private versions: Map<number, SchemaVersion> = new Map();
private currentVersion: number = 1;
registerVersion(version: SchemaVersion): void {
this.versions.set(version.version, version);
if (version.version > this.currentVersion) {
this.currentVersion = version.version;
}
}
serialize(data: any): { version: number; data: any } {
return {
version: this.currentVersion,
data,
};
}
deserialize(payload: { version: number; data: any }): any {
let data = payload.data;
let version = payload.version;
// Migrate through versions if needed
while (version < this.currentVersion) {
const nextVersion = version + 1;
const schema = this.versions.get(nextVersion);
if (schema?.migrate) {
data = schema.migrate(data, version);
}
version = nextVersion;
}
return data;
}
}
// Example usage
const serializer = new VersionedSerializer();
serializer.registerVersion({
version: 1,
schema: { type: "object", properties: { name: { type: "string" } } },
});
serializer.registerVersion({
version: 2,
schema: {
type: "object",
properties: {
firstName: { type: "string" },
lastName: { type: "string" },
},
},
migrate: (data, fromVersion) => {
if (fromVersion === 1) {
const [firstName, ...rest] = (data.name || "").split(" ");
return {
firstName,
lastName: rest.join(" "),
};
}
return data;
},
});
Backward/Forward Compatibility
Compatibility Rules:
// Rules for maintaining compatibility
const compatibilityRules = {
// BACKWARD COMPATIBLE (new code reads old data)
backwardCompatible: [
"Add optional field with default",
"Add new enum value (not at position 0)",
"Remove required field (treat as optional)",
"Widen numeric type (int32 -> int64)",
"Add new message type",
],
// FORWARD COMPATIBLE (old code reads new data)
forwardCompatible: [
"Add optional field (old code ignores)",
"Add new enum value (old code uses default)",
"Old code ignores unknown fields",
],
// BREAKING CHANGES (avoid!)
breakingChanges: [
"Change field type",
"Change field number",
"Remove required field",
"Rename field (in JSON)",
"Change field from optional to required",
],
};
Defensive Deserialization:
class SafeDeserializer<T> {
constructor(
private schema: {
required: string[];
optional: string[];
defaults: Partial<T>;
},
) {}
deserialize(json: string): T {
let parsed: any;
try {
parsed = JSON.parse(json);
} catch (e) {
throw new DeserializationError("Invalid JSON");
}
if (typeof parsed !== "object" || parsed === null) {
throw new DeserializationError("Expected object");
}
// Check required fields
for (const field of this.schema.required) {
if (!(field in parsed)) {
throw new DeserializationError(`Missing required field: ${field}`);
}
}
// Apply defaults for missing optional fields
const result = { ...this.schema.defaults } as T;
for (const key of [...this.schema.required, ...this.schema.optional]) {
if (key in parsed) {
(result as any)[key] = parsed[key];
}
}
// Ignore unknown fields (forward compatibility)
return result;
}
}
class DeserializationError extends Error {
constructor(message: string) {
super(message);
this.name = "DeserializationError";
}
}
Union Types and Discriminators:
// Using discriminated unions for extensibility
type Event =
| { type: "user.created"; payload: UserCreatedPayload }
| { type: "user.updated"; payload: UserUpdatedPayload }
| { type: "order.created"; payload: OrderCreatedPayload };
function deserializeEvent(json: string): Event | null {
const data = JSON.parse(json);
// Handle unknown event types gracefully
switch (data.type) {
case "user.created":
return { type: "user.created", payload: data.payload };
case "user.updated":
return { type: "user.updated", payload: data.payload };
case "order.created":
return { type: "order.created", payload: data.payload };
default:
// Forward compatibility: ignore unknown types
console.warn(`Unknown event type: ${data.type}`);
return null;
}
}
Custom Serializers
Type-Safe Serializer Framework:
interface Serializer<T> {
serialize(value: T): any;
deserialize(raw: any): T;
}
class SerializerRegistry {
private serializers: Map<string, Serializer<any>> = new Map();
register<T>(name: string, serializer: Serializer<T>): void {
this.serializers.set(name, serializer);
}
get<T>(name: string): Serializer<T> {
const serializer = this.serializers.get(name);
if (!serializer) {
throw new Error(`No serializer registered for: ${name}`);
}
return serializer;
}
}
// Built-in serializers
const dateSerializer: Serializer<Date> = {
serialize: (date) => date.toISOString(),
deserialize: (raw) => new Date(raw),
};
const decimalSerializer: Serializer<number> = {
serialize: (num) => num.toFixed(2),
deserialize: (raw) => parseFloat(raw),
};
const moneySerializer: Serializer<{ amount: number; currency: string }> = {
serialize: (money) => ({
amount: Math.round(money.amount * 100),
currency: money.currency,
}),
deserialize: (raw) => ({
amount: raw.amount / 100,
currency: raw.currency,
}),
};
Decorator-Based Serialization:
import "reflect-metadata";
const SERIALIZABLE_KEY = Symbol("serializable");
const PROPERTY_KEY = Symbol("property");
interface PropertyOptions {
name?: string;
serializer?: Serializer<any>;
optional?: boolean;
default?: any;
}
function Serializable(options?: { discriminator?: string }) {
return function (constructor: Function) {
Reflect.defineMetadata(SERIALIZABLE_KEY, options || {}, constructor);
};
}
function Property(options?: PropertyOptions) {
return function (target: any, propertyKey: string) {
const existing = Reflect.getMetadata(PROPERTY_KEY, target) || [];
existing.push({ key: propertyKey, options: options || {} });
Reflect.defineMetadata(PROPERTY_KEY, existing, target);
};
}
@Serializable()
class User {
@Property()
id: string;
@Property({ name: "email_address" })
email: string;
@Property({ serializer: dateSerializer })
createdAt: Date;
@Property({ optional: true, default: [] })
tags: string[];
}
function serialize<T>(instance: T): any {
const prototype = Object.getPrototypeOf(instance);
const properties = Reflect.getMetadata(PROPERTY_KEY, prototype) || [];
const result: any = {};
for (const { key, options } of properties) {
const value = (instance as any)[key];
const outputKey = options.name || key;
if (value === undefined && options.optional) {
continue;
}
if (options.serializer) {
result[outputKey] = options.serializer.serialize(value);
} else {
result[outputKey] = value;
}
}
return result;
}
Performance Considerations
Benchmark Comparison:
import Benchmark from "benchmark";
import * as msgpack from "@msgpack/msgpack";
const testData = {
id: "user_123456789",
email: "user@example.com",
name: "Test User",
age: 30,
active: true,
roles: ["admin", "user"],
metadata: {
lastLogin: "2024-12-19T00:00:00Z",
preferences: { theme: "dark", language: "en" },
},
};
const suite = new Benchmark.Suite();
suite
.add("JSON.stringify", () => {
JSON.stringify(testData);
})
.add("JSON.parse", () => {
JSON.parse(JSON.stringify(testData));
})
.add("MessagePack encode", () => {
msgpack.encode(testData);
})
.add("MessagePack decode", () => {
msgpack.decode(msgpack.encode(testData));
})
.on("cycle", (event: any) => {
console.log(String(event.target));
})
.run();
Size Optimization:
// Strategies for reducing payload size
// 1. Field name shortening (with mapping)
const fieldMap = {
userId: "u",
firstName: "fn",
lastName: "ln",
emailAddress: "e",
createdAt: "ca",
};
function compressKeys(obj: any, map: Record<string, string>): any {
const result: any = {};
for (const [key, value] of Object.entries(obj)) {
const newKey = map[key] || key;
result[newKey] =
typeof value === "object" && value !== null
? compressKeys(value, map)
: value;
}
return result;
}
// 2. Array-based encoding for known schemas
interface UserTuple {
0: string; // id
1: string; // email
2: string; // name
3: number; // createdAt (unix timestamp)
}
function toTuple(user: User): UserTuple {
return [user.id, user.email, user.name, user.createdAt.getTime()];
}
function fromTuple(tuple: UserTuple): User {
return {
id: tuple[0],
email: tuple[1],
name: tuple[2],
createdAt: new Date(tuple[3]),
};
}
// 3. Delta encoding for updates
function createDelta(original: any, updated: any): any {
const delta: any = {};
for (const key of Object.keys(updated)) {
if (JSON.stringify(original[key]) !== JSON.stringify(updated[key])) {
delta[key] = updated[key];
}
}
return delta;
}
function applyDelta(original: any, delta: any): any {
return { ...original, ...delta };
}
Streaming for Large Payloads:
import { createReadStream, createWriteStream } from "fs";
import { Transform } from "stream";
class JSONLineSerializer extends Transform {
constructor() {
super({ objectMode: true });
}
_transform(chunk: any, encoding: string, callback: Function): void {
try {
const line = JSON.stringify(chunk) + "\n";
callback(null, line);
} catch (error) {
callback(error);
}
}
}
class JSONLineDeserializer extends Transform {
private buffer: string = "";
constructor() {
super({ objectMode: true });
}
_transform(chunk: Buffer, encoding: string, callback: Function): void {
this.buffer += chunk.toString();
const lines = this.buffer.split("\n");
this.buffer = lines.pop() || "";
for (const line of lines) {
if (line.trim()) {
try {
this.push(JSON.parse(line));
} catch (error) {
// Skip malformed lines
}
}
}
callback();
}
_flush(callback: Function): void {
if (this.buffer.trim()) {
try {
this.push(JSON.parse(this.buffer));
} catch (error) {
// Skip malformed line
}
}
callback();
}
}
Best Practices
JSON
- Use consistent naming conventions (camelCase or snake_case)
- Always use ISO 8601 for dates
- Handle null/undefined explicitly
- Keep payloads reasonably sized
- Validate input before processing
Protocol Buffers
- Reserve field numbers for deprecated fields
- Use optional for fields that may be absent
- Avoid changing field types
- Use well-known types for common patterns
- Version your .proto files
Schema Evolution
- Plan for schema changes from the start
- Always add new fields as optional
- Never reuse field numbers or names
- Test backward/forward compatibility
- Document breaking changes
Performance
- Choose format based on use case (JSON for debug, binary for perf)
- Use streaming for large payloads
- Consider compression for large JSON
- Profile serialization in your specific context
- Cache serializers/deserializers
Examples
Complete Serialization Layer
// Generic serialization layer supporting multiple formats
interface SerializationFormat {
name: string;
contentType: string;
encode<T>(data: T): Buffer;
decode<T>(buffer: Buffer): T;
}
const jsonFormat: SerializationFormat = {
name: "json",
contentType: "application/json",
encode: (data) => Buffer.from(JSON.stringify(data)),
decode: (buffer) => JSON.parse(buffer.toString()),
};
const msgpackFormat: SerializationFormat = {
name: "msgpack",
contentType: "application/msgpack",
encode: (data) => Buffer.from(msgpack.encode(data)),
decode: (buffer) => msgpack.decode(buffer) as any,
};
class SerializationService {
private formats: Map<string, SerializationFormat> = new Map();
private defaultFormat: string = "json";
constructor() {
this.registerFormat(jsonFormat);
this.registerFormat(msgpackFormat);
}
registerFormat(format: SerializationFormat): void {
this.formats.set(format.name, format);
}
serialize<T>(
data: T,
formatName?: string,
): {
buffer: Buffer;
contentType: string;
} {
const format = this.formats.get(formatName || this.defaultFormat);
if (!format) {
throw new Error(`Unknown format: ${formatName}`);
}
return {
buffer: format.encode(data),
contentType: format.contentType,
};
}
deserialize<T>(buffer: Buffer, contentType: string): T {
const format = Array.from(this.formats.values()).find(
(f) => f.contentType === contentType,
);
if (!format) {
throw new Error(`Unknown content type: ${contentType}`);
}
return format.decode(buffer);
}
// Content negotiation helper
negotiate(acceptHeader: string): SerializationFormat {
const accepted = acceptHeader.split(",").map((s) => s.trim().split(";")[0]);
for (const type of accepted) {
const format = Array.from(this.formats.values()).find(
(f) => f.contentType === type,
);
if (format) return format;
}
return this.formats.get(this.defaultFormat)!;
}
}
// Express middleware
function serializationMiddleware(service: SerializationService) {
return (
req: express.Request,
res: express.Response,
next: express.NextFunction,
) => {
// Determine response format
const format = service.negotiate(req.headers.accept || "application/json");
// Override res.json
const originalJson = res.json.bind(res);
res.json = (data: any) => {
const { buffer, contentType } = service.serialize(data, format.name);
res.contentType(contentType);
res.send(buffer);
return res;
};
next();
};
}