数据建模实践

数据模型是系统的基石。好的数据模型能让业务逻辑清晰、查询高效、扩展灵活。本文将分享数据建模的原则、模式和实战经验。

建模原则

1. 业务驱动

数据模型应该反映业务领域，而非技术实现。

❌ 错误：按技术分表
   - data_table_1
   - data_table_2
   - config_json

✅ 正确：按业务领域建模
   - users (用户)
   - orders (订单)
   - products (商品)
   - payments (支付)

2. 适度冗余

在读多写少的场景，适当冗余可以避免 JOIN，提升查询性能。

-- 订单表冗余用户名和商品名
CREATE TABLE orders (
    id BIGINT PRIMARY KEY,
    user_id BIGINT NOT NULL,
    user_name VARCHAR(64),      -- 冗余字段
    product_id BIGINT NOT NULL,
    product_name VARCHAR(128),  -- 冗余字段
    amount DECIMAL(10,2),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 查询时无需 JOIN
SELECT id, user_name, product_name, amount 
FROM orders 
WHERE user_id = 123;

3. 预留扩展

为未来变化预留空间。

-- 使用 JSON 字段存储扩展属性
CREATE TABLE products (
    id BIGINT PRIMARY KEY,
    name VARCHAR(128) NOT NULL,
    price DECIMAL(10,2) NOT NULL,
    category VARCHAR(32),
    -- 扩展属性，不同品类有不同字段
    attributes JSON,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 电子产品
INSERT INTO products (name, price, category, attributes) VALUES 
('iPhone 15', 7999.00, 'electronics', '{"brand": "Apple", "storage": "256GB", "color": "black"}');

-- 服装
INSERT INTO products (name, price, category, attributes) VALUES 
('T-Shirt', 99.00, 'clothing', '{"size": "L", "material": "cotton", "color": "white"}');

常见建模模式

1. 主从表模式

一对多关系的经典模式。

-- 订单主表
CREATE TABLE orders (
    id BIGINT PRIMARY KEY,
    user_id BIGINT NOT NULL,
    total_amount DECIMAL(10,2) NOT NULL,
    status VARCHAR(20) DEFAULT 'pending',
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_user_id (user_id),
    INDEX idx_status_created (status, created_at)
);

-- 订单明细表
CREATE TABLE order_items (
    id BIGINT PRIMARY KEY,
    order_id BIGINT NOT NULL,
    product_id BIGINT NOT NULL,
    quantity INT NOT NULL,
    unit_price DECIMAL(10,2) NOT NULL,
    subtotal DECIMAL(10,2) NOT NULL,
    INDEX idx_order_id (order_id),
    FOREIGN KEY (order_id) REFERENCES orders(id)
);

// Go 模型定义
type Order struct {
    ID          int64       `json:"id" db:"id"`
    UserID      int64       `json:"user_id" db:"user_id"`
    TotalAmount float64     `json:"total_amount" db:"total_amount"`
    Status      string      `json:"status" db:"status"`
    Items       []OrderItem `json:"items" db:"-"` // 关联加载
    CreatedAt   time.Time   `json:"created_at" db:"created_at"`
}

type OrderItem struct {
    ID        int64   `json:"id" db:"id"`
    OrderID   int64   `json:"order_id" db:"order_id"`
    ProductID int64   `json:"product_id" db:"product_id"`
    Quantity  int     `json:"quantity" db:"quantity"`
    UnitPrice float64 `json:"unit_price" db:"unit_price"`
    Subtotal  float64 `json:"subtotal" db:"subtotal"`
}

2. 状态机模式

用于有明确状态流转的业务。

-- 订单状态表
CREATE TABLE order_status_logs (
    id BIGINT PRIMARY KEY AUTO_INCREMENT,
    order_id BIGINT NOT NULL,
    from_status VARCHAR(20),
    to_status VARCHAR(20) NOT NULL,
    operator_id BIGINT,
    reason VARCHAR(255),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_order_id (order_id)
);

// 状态机定义
type OrderStatus string

const (
    OrderStatusPending   OrderStatus = "pending"
    OrderStatusPaid      OrderStatus = "paid"
    OrderStatusShipped   OrderStatus = "shipped"
    OrderStatusDelivered OrderStatus = "delivered"
    OrderStatusCancelled OrderStatus = "cancelled"
    OrderStatusRefunded  OrderStatus = "refunded"
)

// 状态转换规则
var orderTransitions = map[OrderStatus][]OrderStatus{
    OrderStatusPending:   {OrderStatusPaid, OrderStatusCancelled},
    OrderStatusPaid:      {OrderStatusShipped, OrderStatusRefunded},
    OrderStatusShipped:   {OrderStatusDelivered, OrderStatusRefunded},
    OrderStatusDelivered: {OrderStatusRefunded},
    OrderStatusCancelled: {},
    OrderStatusRefunded:  {},
}

func (o *Order) CanTransitionTo(newStatus OrderStatus) bool {
    allowed, ok := orderTransitions[o.Status]
    if !ok {
        return false
    }
    for _, s := range allowed {
        if s == newStatus {
            return true
        }
    }
    return false
}

func (o *Order) TransitionTo(newStatus OrderStatus, operatorID int64, reason string) error {
    if !o.CanTransitionTo(newStatus) {
        return fmt.Errorf("invalid transition from %s to %s", o.Status, newStatus)
    }
    
    oldStatus := o.Status
    o.Status = newStatus
    
    // 记录状态变更日志
    log := &OrderStatusLog{
        OrderID:    o.ID,
        FromStatus: oldStatus,
        ToStatus:   newStatus,
        OperatorID: operatorID,
        Reason:     reason,
    }
    
    return o.repo.SaveWithLog(o, log)
}

3. 快照模式

保存历史状态，用于审计和回溯。

-- 商品快照表（订单创建时保存）
CREATE TABLE product_snapshots (
    id BIGINT PRIMARY KEY AUTO_INCREMENT,
    product_id BIGINT NOT NULL,
    name VARCHAR(128) NOT NULL,
    price DECIMAL(10,2) NOT NULL,
    description TEXT,
    attributes JSON,
    snapshot_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_product_id (product_id)
);

-- 订单关联商品快照
CREATE TABLE order_items (
    id BIGINT PRIMARY KEY,
    order_id BIGINT NOT NULL,
    product_snapshot_id BIGINT NOT NULL,  -- 关联快照而非原商品
    quantity INT NOT NULL,
    unit_price DECIMAL(10,2) NOT NULL,
    FOREIGN KEY (product_snapshot_id) REFERENCES product_snapshots(id)
);

// 创建订单时保存商品快照
func (s *OrderService) CreateOrder(ctx context.Context, req *CreateOrderReq) (*Order, error) {
    var items []OrderItem
    var totalAmount float64
    
    for _, item := range req.Items {
        product, err := s.productRepo.GetByID(ctx, item.ProductID)
        if err != nil {
            return nil, err
        }
        
        // 创建商品快照
        snapshot := &ProductSnapshot{
            ProductID:   product.ID,
            Name:        product.Name,
            Price:       product.Price,
            Description: product.Description,
            Attributes:  product.Attributes,
        }
        if err := s.snapshotRepo.Create(ctx, snapshot); err != nil {
            return nil, err
        }
        
        subtotal := product.Price * float64(item.Quantity)
        items = append(items, OrderItem{
            ProductSnapshotID: snapshot.ID,
            Quantity:          item.Quantity,
            UnitPrice:         product.Price,
            Subtotal:          subtotal,
        })
        totalAmount += subtotal
    }
    
    order := &Order{
        UserID:      req.UserID,
        TotalAmount: totalAmount,
        Items:       items,
    }
    
    return s.orderRepo.Create(ctx, order)
}

4. 软删除模式

逻辑删除而非物理删除。

CREATE TABLE users (
    id BIGINT PRIMARY KEY,
    username VARCHAR(64) NOT NULL,
    email VARCHAR(128) NOT NULL,
    status VARCHAR(20) DEFAULT 'active',
    deleted_at TIMESTAMP NULL,  -- NULL 表示未删除
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    
    -- 唯一索引需要考虑软删除
    UNIQUE INDEX idx_username_deleted (username, deleted_at),
    UNIQUE INDEX idx_email_deleted (email, deleted_at)
);

// GORM 软删除
type User struct {
    ID        int64          `gorm:"primaryKey"`
    Username  string         `gorm:"uniqueIndex:idx_username_deleted"`
    Email     string         `gorm:"uniqueIndex:idx_email_deleted"`
    Status    string         `gorm:"default:active"`
    DeletedAt gorm.DeletedAt `gorm:"index"` // 软删除字段
    CreatedAt time.Time
    UpdatedAt time.Time
}

// 查询自动过滤已删除记录
db.Find(&users) // WHERE deleted_at IS NULL

// 包含已删除记录
db.Unscoped().Find(&users)

// 软删除
db.Delete(&user) // UPDATE users SET deleted_at = NOW() WHERE id = ?

// 物理删除
db.Unscoped().Delete(&user) // DELETE FROM users WHERE id = ?

5. 多态关联模式

一个表关联多种类型的实体。

-- 评论表（可以评论商品、文章、视频等）
CREATE TABLE comments (
    id BIGINT PRIMARY KEY AUTO_INCREMENT,
    user_id BIGINT NOT NULL,
    content TEXT NOT NULL,
    -- 多态关联
    target_type VARCHAR(32) NOT NULL,  -- 'product', 'article', 'video'
    target_id BIGINT NOT NULL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_target (target_type, target_id),
    INDEX idx_user_id (user_id)
);

-- 操作日志表
CREATE TABLE activity_logs (
    id BIGINT PRIMARY KEY AUTO_INCREMENT,
    user_id BIGINT NOT NULL,
    action VARCHAR(32) NOT NULL,  -- 'create', 'update', 'delete'
    target_type VARCHAR(32) NOT NULL,
    target_id BIGINT NOT NULL,
    changes JSON,  -- 变更内容
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_target (target_type, target_id),
    INDEX idx_user_created (user_id, created_at)
);

// 多态关联接口
type Commentable interface {
    GetTargetType() string
    GetTargetID() int64
}

type Comment struct {
    ID         int64     `json:"id"`
    UserID     int64     `json:"user_id"`
    Content    string    `json:"content"`
    TargetType string    `json:"target_type"`
    TargetID   int64     `json:"target_id"`
    CreatedAt  time.Time `json:"created_at"`
}

func (s *CommentService) Create(ctx context.Context, userID int64, target Commentable, content string) (*Comment, error) {
    comment := &Comment{
        UserID:     userID,
        Content:    content,
        TargetType: target.GetTargetType(),
        TargetID:   target.GetTargetID(),
    }
    return s.repo.Create(ctx, comment)
}

// Product 实现 Commentable
func (p *Product) GetTargetType() string { return "product" }
func (p *Product) GetTargetID() int64    { return p.ID }

索引设计

索引原则

选择性高的列优先 - 区分度越高越好
最左前缀原则 - 复合索引按查询条件顺序设计
覆盖索引 - 索引包含查询所需所有列
避免过多索引 - 影响写入性能

常见索引模式

-- 1. 单列索引
CREATE INDEX idx_user_id ON orders(user_id);

-- 2. 复合索引（注意顺序）
-- 查询: WHERE status = 'paid' AND created_at > '2025-01-01'
CREATE INDEX idx_status_created ON orders(status, created_at);

-- 3. 覆盖索引
-- 查询: SELECT id, status, amount FROM orders WHERE user_id = ?
CREATE INDEX idx_user_covering ON orders(user_id, status, amount);

-- 4. 前缀索引（长字符串）
CREATE INDEX idx_email_prefix ON users(email(20));

-- 5. 唯一索引
CREATE UNIQUE INDEX idx_username ON users(username);

索引优化案例

-- 原始查询（全表扫描）
EXPLAIN SELECT * FROM orders 
WHERE user_id = 123 
  AND status = 'paid' 
  AND created_at > '2025-01-01'
ORDER BY created_at DESC 
LIMIT 10;

-- 优化：创建复合索引
CREATE INDEX idx_user_status_created ON orders(user_id, status, created_at);

-- 优化后（索引范围扫描）
-- type: range, key: idx_user_status_created

分库分表设计

分片策略

策略	优点	缺点	适用场景
范围分片	扩容简单	热点问题	时间序列数据
哈希分片	数据均匀	扩容复杂	用户数据
一致性哈希	扩容平滑	实现复杂	分布式缓存

分表实现

// 分表路由
type ShardRouter struct {
    shardCount int
    tablePrefix string
}

func NewShardRouter(prefix string, count int) *ShardRouter {
    return &ShardRouter{
        shardCount:  count,
        tablePrefix: prefix,
    }
}

// 按用户 ID 分表
func (r *ShardRouter) GetTable(userID int64) string {
    shard := userID % int64(r.shardCount)
    return fmt.Sprintf("%s_%d", r.tablePrefix, shard)
}

// 分表仓储
type ShardedOrderRepo struct {
    db     *sql.DB
    router *ShardRouter
}

func (r *ShardedOrderRepo) GetByID(ctx context.Context, userID, orderID int64) (*Order, error) {
    table := r.router.GetTable(userID)
    query := fmt.Sprintf("SELECT * FROM %s WHERE id = ? AND user_id = ?", table)
    
    var order Order
    err := r.db.QueryRowContext(ctx, query, orderID, userID).Scan(
        &order.ID, &order.UserID, &order.TotalAmount, &order.Status, &order.CreatedAt,
    )
    return &order, err
}

func (r *ShardedOrderRepo) Create(ctx context.Context, order *Order) error {
    table := r.router.GetTable(order.UserID)
    query := fmt.Sprintf(
        "INSERT INTO %s (id, user_id, total_amount, status) VALUES (?, ?, ?, ?)",
        table,
    )
    _, err := r.db.ExecContext(ctx, query, order.ID, order.UserID, order.TotalAmount, order.Status)
    return err
}

全局 ID 生成

// Snowflake ID 生成器
type Snowflake struct {
    mu        sync.Mutex
    epoch     int64 // 起始时间戳
    nodeID    int64 // 节点 ID
    sequence  int64 // 序列号
    lastTime  int64
}

func NewSnowflake(nodeID int64) *Snowflake {
    return &Snowflake{
        epoch:  1704067200000, // 2024-01-01 00:00:00 UTC
        nodeID: nodeID,
    }
}

func (s *Snowflake) Generate() int64 {
    s.mu.Lock()
    defer s.mu.Unlock()
    
    now := time.Now().UnixMilli()
    
    if now == s.lastTime {
        s.sequence = (s.sequence + 1) & 0xFFF // 12 bit sequence
        if s.sequence == 0 {
            // 等待下一毫秒
            for now <= s.lastTime {
                now = time.Now().UnixMilli()
            }
        }
    } else {
        s.sequence = 0
    }
    
    s.lastTime = now
    
    // 41 bit timestamp + 10 bit node + 12 bit sequence
    id := ((now - s.epoch) << 22) | (s.nodeID << 12) | s.sequence
    return id
}

数据迁移

在线迁移方案

// 双写迁移
type MigrationOrderRepo struct {
    oldRepo *OldOrderRepo
    newRepo *NewOrderRepo
    phase   MigrationPhase
}

type MigrationPhase int

const (
    PhaseOldOnly    MigrationPhase = iota // 只写旧库
    PhaseDualWrite                         // 双写
    PhaseNewPrimary                        // 新库为主，旧库为备
    PhaseNewOnly                           // 只写新库
)

func (r *MigrationOrderRepo) Create(ctx context.Context, order *Order) error {
    switch r.phase {
    case PhaseOldOnly:
        return r.oldRepo.Create(ctx, order)
        
    case PhaseDualWrite:
        // 双写，旧库为主
        if err := r.oldRepo.Create(ctx, order); err != nil {
            return err
        }
        // 异步写新库
        go r.newRepo.Create(context.Background(), order)
        return nil
        
    case PhaseNewPrimary:
        // 双写，新库为主
        if err := r.newRepo.Create(ctx, order); err != nil {
            return err
        }
        go r.oldRepo.Create(context.Background(), order)
        return nil
        
    case PhaseNewOnly:
        return r.newRepo.Create(ctx, order)
    }
    return nil
}

func (r *MigrationOrderRepo) GetByID(ctx context.Context, id int64) (*Order, error) {
    switch r.phase {
    case PhaseOldOnly, PhaseDualWrite:
        return r.oldRepo.GetByID(ctx, id)
    case PhaseNewPrimary, PhaseNewOnly:
        order, err := r.newRepo.GetByID(ctx, id)
        if err != nil && r.phase == PhaseNewPrimary {
            // 降级到旧库
            return r.oldRepo.GetByID(ctx, id)
        }
        return order, err
    }
    return nil, errors.New("unknown phase")
}

总结

数据建模的核心要点：

业务驱动 - 模型反映业务领域，而非技术实现
适度冗余 - 读多写少场景，冗余换性能
状态机 - 明确状态流转，记录变更日志
快照模式 - 保存历史状态，支持审计回溯
索引优化 - 选择性、最左前缀、覆盖索引
分库分表 - 哈希分片、全局 ID、平滑迁移

好的数据模型是系统稳定性和可维护性的基础。

建模原则​

1. 业务驱动​

2. 适度冗余​

3. 预留扩展​

常见建模模式​

1. 主从表模式​

2. 状态机模式​

3. 快照模式​

4. 软删除模式​

5. 多态关联模式​

索引设计​

索引原则​

常见索引模式​

索引优化案例​

分库分表设计​

分片策略​

分表实现​

全局 ID 生成​

数据迁移​

在线迁移方案​

总结​