第 3 课:记忆系统拆解

课程目标

通过本课程,你将:

  • 深入理解三层记忆架构的设计原理和实现细节
  • 掌握记忆存储、检索、更新的完整生命周期管理
  • 学习语义搜索和向量嵌入的技术实现
  • 了解记忆压缩、冲突解决的算法原理
  • 能够设计和实现自己的记忆系统
  • 掌握隐私保护的技术方案

3.1 记忆系统架构总览

为什么需要记忆系统?

Claude Code 如果没有记忆,每次对话都会是"初次见面":

用户:我正在开发一个 Express 项目,使用 TypeScript
AI: 好的,我明白了。

[5 分钟后]
用户:我的项目用什么语言写的?
AI: 抱歉,我不确定。可能是 JavaScript、Python、Java...

有了记忆系统后

用户:我正在开发一个 Express 项目,使用 TypeScript
AI: 好的,我已经记住了:你的项目使用 Express + TypeScript。

[5 分钟后]
用户:我的项目用什么语言写的?
AI: 你的项目使用 TypeScript,基于 Express 框架。

三层记忆架构

重要

不重要

用户输入

上下文分析器

短期记忆处理

长期记忆处理

全局记忆处理

会话缓存 Redis/LRU

工作记忆区

重要性过滤器

SQLite 数据库

丢弃队列

用户画像

偏好设置

技能库

查询引擎

向量搜索

关键词搜索

时间过滤

Embedding 模型

记忆索引

整理引擎

冲突检测

压缩引擎

遗忘曲线

各层职责

层级 作用 存储位置 生命周期 容量限制
短期记忆 当前对话上下文 LRU 缓存 (内存) 会话期间 ~1000 条
长期记忆 持久化知识 SQLite 数据库 永久(可清理) 无限制
全局记忆 用户画像、偏好 SQLite + 配置文件 永久 ~100 条核心

记忆数据类型

// packages/core/src/memory/types.ts

/**
 * 记忆的核心结构
 */
export interface Memory {
  /** 唯一标识符 */
  id: string;
  
  /** 记忆类型 */
  type: MemoryType;
  
  /** 记忆内容(文本) */
  content: string;
  
  /** 元数据 */
  metadata: MemoryMetadata;
  
  /** 标签(用于快速筛选) */
  tags?: string[];
  
  /** 
   * 重要性评分 (0.0 - 1.0)
   * 0.0 = 随时可丢弃
   * 1.0 = 绝对不能删除
   */
  importance: number;
  
  /** 访问次数 */
  accessCount: number;
  
  /** 创建时间 */
  createdAt: Date;
  
  /** 最后更新时间 */
  updatedAt: Date;
  
  /** 
   * 过期时间(可选)
   * 用于临时记忆
   */
  expiresAt?: Date;
  
  /** 
   * 嵌入向量(用于语义搜索)
   * 通常是 1536 维浮点数数组
   */
  embedding?: number[];
  
  /** 来源信息 */
  source?: MemorySource;
}

export enum MemoryType {
  SHORT_TERM = 'short_term',    // 短期记忆
  LONG_TERM = 'long_term',      // 长期记忆
  GLOBAL = 'global',            // 全局记忆
  EPISODIC = 'episodic',        // 情景记忆(特定事件)
  SEMANTIC = 'semantic',        // 语义记忆(事实知识)
  PROCEDURAL = 'procedural',    // 程序记忆(如何做某事)
}

export interface MemoryMetadata {
  /** 相关的项目路径 */
  projectPath?: string;
  
  /** 相关的文件列表 */
  files?: string[];
  
  /** 相关的 Git commit */
  commitHash?: string;
  
  /** 情绪标记(正面/负面/中性) */
  sentiment?: 'positive' | 'negative' | 'neutral';
  
  /** 置信度评分 */
  confidence?: number;
  
  /** 压缩标记 */
  isCompressed?: boolean;
  
  /** 原始记忆 IDs(如果是压缩后的) */
  compressedFrom?: string[];
  
  /** 自定义字段 */
  custom?: Record<string, any>;
}

export interface MemorySource {
  /** 来源类型 */
  type: 'user_input' | 'ai_response' | 'tool_result' | 'system_event';
  
  /** 会话 ID */
  sessionId: string;
  
  /** 消息 ID */
  messageId?: string;
  
  /** 时间戳 */
  timestamp: Date;
}

3.2 记忆存储机制

SQLite 数据库设计

// packages/core/src/memory/database.ts
import Database from 'better-sqlite3';
import { Memory, MemoryType } from './types';

export class MemoryDatabase {
  private db: Database;
  
  constructor(dbPath: string) {
    this.db = new Database(dbPath);
    this.initializeSchema();
    this.createIndexes();
  }
  
  /**
   * 初始化数据库 Schema
   */
  private initializeSchema(): void {
    this.db.exec(`
      -- 记忆主表
      CREATE TABLE IF NOT EXISTS memories (
        id TEXT PRIMARY KEY,
        type TEXT NOT NULL CHECK(type IN (
          'short_term', 'long_term', 'global',
          'episodic', 'semantic', 'procedural'
        )),
        content TEXT NOT NULL,
        metadata JSON,
        tags JSON,
        importance REAL DEFAULT 0.5 CHECK(importance >= 0 AND importance <= 1),
        access_count INTEGER DEFAULT 0,
        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
        updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
        expires_at DATETIME,
        is_compressed BOOLEAN DEFAULT FALSE,
        compressed_from JSON,
        source_type TEXT,
        source_session_id TEXT,
        source_message_id TEXT
      );
      
      -- 嵌入向量表(使用 SQLite 向量扩展)
      CREATE VIRTUAL TABLE IF NOT EXISTS memory_embeddings 
      USING vec0(
        memory_id TEXT PRIMARY KEY,
        embedding FLOAT[1536]
      );
      
      -- 记忆关系表(图谱结构)
      CREATE TABLE IF NOT EXISTS memory_relations (
        source_id TEXT REFERENCES memories(id) ON DELETE CASCADE,
        target_id TEXT REFERENCES memories(id) ON DELETE CASCADE,
        relation_type TEXT NOT NULL,
        strength REAL DEFAULT 1.0,
        created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
        PRIMARY KEY (source_id, target_id, relation_type)
      );
      
      -- 访问日志表
      CREATE TABLE IF NOT EXISTS memory_access_log (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        memory_id TEXT REFERENCES memories(id) ON DELETE SET NULL,
        access_type TEXT NOT NULL,
        context JSON,
        accessed_at DATETIME DEFAULT CURRENT_TIMESTAMP
      );
      
      -- 全文搜索虚拟表
      CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts 
      USING fts5(
        content,
        tags,
        content='memories',
        content_rowid='rowid'
      );
      
      -- 触发器:同步 FTS 索引
      CREATE TRIGGER IF NOT EXISTS memories_ai AFTER INSERT ON memories BEGIN
        INSERT INTO memories_fts(rowid, content, tags) 
        VALUES (NEW.rowid, NEW.content, json(NEW.tags));
      END;
      
      CREATE TRIGGER IF NOT EXISTS memories_ad AFTER DELETE ON memories BEGIN
        INSERT INTO memories_fts(memories_fts, rowid, content, tags) 
        VALUES ('delete', OLD.rowid, OLD.content, json(OLD.tags));
      END;
    `);
  }
  
  /**
   * 创建性能优化索引
   */
  private createIndexes(): void {
    this.db.exec(`
      -- 按类型查询
      CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
      
      -- 按重要性排序
      CREATE INDEX IF NOT EXISTS idx_memories_importance ON memories(importance DESC);
      
      -- 按时间范围查询
      CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
      
      -- 按过期时间查询(清理任务)
      CREATE INDEX IF NOT EXISTS idx_memories_expires ON memories(expires_at) 
        WHERE expires_at IS NOT NULL;
      
      -- 按访问频率查询
      CREATE INDEX IF NOT EXISTS idx_memories_access ON memories(access_count DESC);
      
      -- 关系表索引
      CREATE INDEX IF NOT EXISTS idx_relations_source ON memory_relations(source_id);
      CREATE INDEX IF NOT EXISTS idx_relations_target ON memory_relations(target_id);
      
      -- 访问日志索引
      CREATE INDEX IF NOT EXISTS idx_access_log_memory ON memory_access_log(memory_id);
      CREATE INDEX IF NOT EXISTS idx_access_log_time ON memory_access_log(accessed_at DESC);
    `);
  }
  
  /**
   * 插入记忆
   */
  insert(memory: Memory): void {
    const insert = this.db.prepare(`
      INSERT INTO memories (
        id, type, content, metadata, tags, importance,
        access_count, created_at, updated_at, expires_at,
        is_compressed, compressed_from, source_type,
        source_session_id, source_message_id
      ) VALUES (
        ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
      )
    `);
    
    insert.run(
      memory.id,
      memory.type,
      memory.content,
      JSON.stringify(memory.metadata || {}),
      JSON.stringify(memory.tags || []),
      memory.importance,
      memory.accessCount || 0,
      memory.createdAt.toISOString(),
      memory.updatedAt.toISOString(),
      memory.expiresAt?.toISOString(),
      memory.isCompressed ? 1 : 0,
      memory.compressedFrom ? JSON.stringify(memory.compressedFrom) : null,
      memory.source?.type,
      memory.source?.sessionId,
      memory.source?.messageId
    );
    
    // 同时插入嵌入向量
    if (memory.embedding) {
      this.insertEmbedding(memory.id, memory.embedding);
    }
  }
  
  /**
   * 批量插入(用于初始化或导入)
   */
  insertBatch(memories: Memory[]): void {
    const insert = this.db.prepare(`
      INSERT INTO memories (id, type, content, importance, created_at) 
      VALUES (?, ?, ?, ?, ?)
    `);
    
    const insertMany = this.db.transaction((mems: Memory[]) => {
      for (const mem of mems) {
        insert.run(
          mem.id,
          mem.type,
          mem.content,
          mem.importance,
          mem.createdAt.toISOString()
        );
      }
    });
    
    insertMany(memories);
  }
  
  /**
   * 查询记忆
   */
  findById(id: string): Memory | null {
    const row = this.db.prepare(`
      SELECT * FROM memories WHERE id = ?
    `).get(id) as any;
    
    if (!row) return null;
    
    return this.rowToMemory(row);
  }
  
  /**
   * 按条件查询
   */
  findByFilters(filters: MemoryFilters): Memory[] {
    const conditions: string[] = [];
    const params: any[] = [];
    
    if (filters.type) {
      conditions.push('type = ?');
      params.push(filters.type);
    }
    
    if (filters.tags && filters.tags.length > 0) {
      conditions.push(`
        EXISTS (
          SELECT 1 FROM json_each(tags) AS tag 
          WHERE tag.value IN (${filters.tags.map(() => '?').join(',')})
        )
      `);
      params.push(...filters.tags);
    }
    
    if (filters.minImportance !== undefined) {
      conditions.push('importance >= ?');
      params.push(filters.minImportance);
    }
    
    if (filters.timeRange) {
      conditions.push('created_at BETWEEN ? AND ?');
      params.push(
        filters.timeRange.start.toISOString(),
        filters.timeRange.end.toISOString()
      );
    }
    
    if (filters.searchQuery) {
      conditions.push(`
        id IN (
          SELECT rowid FROM memories_fts 
          WHERE memories_fts MATCH ?
        )
      `);
      params.push(filters.searchQuery);
    }
    
    const whereClause = conditions.length > 0 
      ? 'WHERE ' + conditions.join(' AND ') 
      : '';
    
    const orderBy = filters.orderBy || 'created_at DESC';
    const limit = filters.limit || 100;
    const offset = filters.offset || 0;
    
    const query = `
      SELECT * FROM memories
      ${whereClause}
      ORDER BY ${orderBy}
      LIMIT ? OFFSET ?
    `;
    
    params.push(limit, offset);
    
    const rows = this.db.prepare(query).all(...params) as any[];
    return rows.map(row => this.rowToMemory(row));
  }
  
  /**
   * 更新记忆
   */
  update(id: string, updates: Partial<Memory>): void {
    const fields: string[] = [];
    const params: any[] = [];
    
    if (updates.content !== undefined) {
      fields.push('content = ?');
      params.push(updates.content);
    }
    
    if (updates.importance !== undefined) {
      fields.push('importance = ?');
      params.push(updates.importance);
    }
    
    if (updates.tags !== undefined) {
      fields.push('tags = ?');
      params.push(JSON.stringify(updates.tags));
    }
    
    if (updates.metadata !== undefined) {
      fields.push('metadata = ?');
      params.push(JSON.stringify(updates.metadata));
    }
    
    // 总是更新时间戳
    fields.push('updated_at = CURRENT_TIMESTAMP');
    
    params.push(id);
    
    this.db.prepare(`
      UPDATE memories SET ${fields.join(', ')} WHERE id = ?
    `).run(...params);
    
    // 如果更新了 embedding
    if (updates.embedding) {
      this.updateEmbedding(id, updates.embedding);
    }
  }
  
  /**
   * 删除记忆
   */
  delete(id: string): void {
    this.db.prepare('DELETE FROM memories WHERE id = ?').run(id);
  }
  
  /**
   * 增加访问计数
   */
  incrementAccessCount(id: string): void {
    this.db.prepare(`
      UPDATE memories 
      SET access_count = access_count + 1,
          updated_at = CURRENT_TIMESTAMP
      WHERE id = ?
    `).run(id);
    
    // 记录访问日志
    this.logAccess(id, 'read');
  }
  
  /**
   * 清理过期记忆
   */
  cleanupExpired(): number {
    const result = this.db.prepare(`
      DELETE FROM memories 
      WHERE expires_at IS NOT NULL 
        AND expires_at < CURRENT_TIMESTAMP
    `).run();
    
    return result.changes;
  }
  
  /**
   * 获取统计信息
   */
  getStats(): MemoryStats {
    const stats = this.db.prepare(`
      SELECT 
        type,
        COUNT(*) as count,
        AVG(importance) as avg_importance,
        SUM(access_count) as total_accesses,
        MIN(created_at) as oldest,
        MAX(created_at) as newest
      FROM memories
      GROUP BY type
    `).all() as any[];
    
    const totalMemories = this.db.prepare(`
      SELECT COUNT(*) as count FROM memories
    `).get() as any;
    
    return {
      byType: stats.reduce((acc, row) => {
        acc[row.type] = {
          count: row.count,
          avgImportance: row.avg_importance,
          totalAccesses: row.total_accesses,
          oldest: new Date(row.oldest),
          newest: new Date(row.newest),
        };
        return acc;
      }, {}),
      total: totalMemories.count,
    };
  }
  
  /**
   * 辅助方法:数据库行转 Memory 对象
   */
  private rowToMemory(row: any): Memory {
    return {
      id: row.id,
      type: row.type as MemoryType,
      content: row.content,
      metadata: JSON.parse(row.metadata || '{}'),
      tags: JSON.parse(row.tags || '[]'),
      importance: row.importance,
      accessCount: row.access_count,
      createdAt: new Date(row.created_at),
      updatedAt: new Date(row.updated_at),
      expiresAt: row.expires_at ? new Date(row.expires_at) : undefined,
      isCompressed: Boolean(row.is_compressed),
      compressedFrom: row.compressed_from ? JSON.parse(row.compressed_from) : undefined,
      source: row.source_type ? {
        type: row.source_type as any,
        sessionId: row.source_session_id,
        messageId: row.source_message_id,
        timestamp: row.created_at,
      } : undefined,
    };
  }
  
  /**
   * 辅助方法:插入嵌入向量
   */
  private insertEmbedding(memoryId: string, embedding: number[]): void {
    const float32Array = new Float32Array(embedding);
    const buffer = Buffer.from(float32Array.buffer);
    
    this.db.prepare(`
      INSERT INTO memory_embeddings (memory_id, embedding) 
      VALUES (?, ?)
    `).run(memoryId, buffer);
  }
  
  /**
   * 辅助方法:更新嵌入向量
   */
  private updateEmbedding(memoryId: string, embedding: number[]): void {
    const float32Array = new Float32Array(embedding);
    const buffer = Buffer.from(float32Array.buffer);
    
    this.db.prepare(`
      UPDATE memory_embeddings 
      SET embedding = ? 
      WHERE memory_id = ?
    `).run(buffer, memoryId);
  }
  
  /**
   * 辅助方法:记录访问日志
   */
  private logAccess(memoryId: string, accessType: string, context?: any): void {
    this.db.prepare(`
      INSERT INTO memory_access_log (memory_id, access_type, context, accessed_at)
      VALUES (?, ?, ?, CURRENT_TIMESTAMP)
    `).run(memoryId, accessType, JSON.stringify(context || {}));
  }
}

export interface MemoryFilters {
  type?: MemoryType;
  tags?: string[];
  minImportance?: number;
  timeRange?: { start: Date; end: Date };
  searchQuery?: string;
  orderBy?: string;
  limit?: number;
  offset?: number;
}

export interface MemoryStats {
  byType: Record<string, {
    count: number;
    avgImportance: number;
    totalAccesses: number;
    oldest: Date;
    newest: Date;
  }>;
  total: number;
}

LRU 缓存实现(短期记忆)

// packages/core/src/memory/lru-cache.ts

/**
 * LRU 缓存:用于短期记忆的内存存储
 * 特点:
 * - O(1) 时间复杂度的读写
 * - 自动淘汰最久未使用的项
 * - 支持权重优先级
 */
export class LRUCache<K, V> {
  private capacity: number;
  private cache: Map<K, LRUNode<V>>;
  private head: LRUNode<V>; // 虚拟头节点(最近使用)
  private tail: LRUNode<V>; // 虚拟尾节点(最久未使用)
  private size: number = 0;
  
  constructor(capacity: number) {
    this.capacity = capacity;
    this.cache = new Map();
    
    // 初始化双向链表
    this.head = new LRUNode(null!, null!);
    this.tail = new LRUNode(null!, null!);
    this.head.next = this.tail;
    this.tail.prev = this.head;
  }
  
  /**
   * 获取值
   */
  get(key: K): V | undefined {
    const node = this.cache.get(key);
    if (!node) return undefined;
    
    // 移动到头部(标记为最近使用)
    this.moveToHead(node);
    this.size++;
    
    return node.value;
  }
  
  /**
   * 设置值
   */
  set(key: K, value: V): void {
    const existing = this.cache.get(key);
    
    if (existing) {
      // 更新已有节点
      existing.value = value;
      this.moveToHead(existing);
      this.size++;
    } else {
      // 创建新节点
      const node = new LRUNode(key, value);
      this.cache.set(key, node);
      this.addToHead(node);
      this.size++;
      
      // 如果超出容量,淘汰最久未使用的
      if (this.size > this.capacity) {
        this.evictLRU();
      }
    }
  }
  
  /**
   * 删除键
   */
  delete(key: K): boolean {
    const node = this.cache.get(key);
    if (!node) return false;
    
    this.removeNode(node);
    this.cache.delete(key);
    this.size--;
    
    return true;
  }
  
  /**
   * 检查是否存在
   */
  has(key: K): boolean {
    return this.cache.has(key);
  }
  
  /**
   * 获取大小
   */
  getSize(): number {
    return this.size;
  }
  
  /**
   * 清空缓存
   */
  clear(): void {
    this.cache.clear();
    this.head.next = this.tail;
    this.tail.prev = this.head;
    this.size = 0;
  }
  
  /**
   * 获取所有键
   */
  keys(): IterableIterator<K> {
    return this.cache.keys();
  }
  
  /**
   * 获取所有值(按使用频率排序)
   */
  valuesByRecency(): V[] {
    const values: V[] = [];
    let current = this.head.next;
    
    while (current !== this.tail) {
      values.push(current.value);
      current = current.next;
    }
    
    return values;
  }
  
  /**
   * 遍历缓存(从最近到最久)
   */
  forEach(callback: (key: K, value: V, index: number) => void): void {
    let current = this.head.next;
    let index = 0;
    
    while (current !== this.tail) {
      callback(current.key, current.value, index);
      current = current.next;
      index++;
    }
  }
  
  /**
   * 添加到头部
   */
  private addToHead(node: LRUNode<V>): void {
    node.prev = this.head;
    node.next = this.head.next;
    this.head.next!.prev = node;
    this.head.next = node;
  }
  
  /**
   * 从链表中移除节点
   */
  private removeNode(node: LRUNode<V>): void {
    node.prev!.next = node.next;
    node.next!.prev = node.prev;
  }
  
  /**
   * 移动到头部
   */
  private moveToHead(node: LRUNode<V>): void {
    this.removeNode(node);
    this.addToHead(node);
  }
  
  /**
   * 淘汰最久未使用的项
   */
  private evictLRU(): void {
    const lru = this.tail.prev;
    if (lru === this.head) return;
    
    this.removeNode(lru);
    this.cache.delete(lru.key);
    this.size--;
    
    // 触发淘汰回调
    this.onEvict?.(lru.key, lru.value);
  }
  
  /**
   * 淘汰回调(可用于持久化)
   */
  onEvict?: (key: K, value: V) => void;
}

/**
 * 双向链表节点
 */
class LRUNode<V> {
  key: K;
  value: V;
  prev: LRUNode<V> | null = null;
  next: LRUNode<V> | null = null;
  
  constructor(key: K, value: V) {
    this.key = key;
    this.value = value;
  }
}

// 使用示例
const shortTermMemory = new LRUCache<string, Memory>(1000);

// 添加记忆
shortTermMemory.set('mem-1', {
  id: 'mem-1',
  type: MemoryType.SHORT_TERM,
  content: '用户喜欢使用 Tab 缩进',
  importance: 0.7,
  createdAt: new Date(),
});

// 设置淘汰回调:自动持久化到数据库
shortTermMemory.onEvict = (key, memory) => {
  console.log('记忆被淘汰,持久化到数据库:', key);
  database.insert(memory);
};

3.3 记忆生命周期管理

创建阶段

// packages/core/src/memory/memory-creator.ts
import { Memory, MemoryType, MemorySource } from './types';
import { generateUUID } from '../utils/uuid';

export class MemoryCreator {
  private importanceCalculator: ImportanceCalculator;
  private embeddingGenerator: EmbeddingGenerator;
  private tagExtractor: TagExtractor;
  
  constructor(config: MemoryConfig) {
    this.importanceCalculator = new ImportanceCalculator();
    this.embeddingGenerator = new EmbeddingGenerator(config.apiKey);
    this.tagExtractor = new TagExtractor();
  }
  
  /**
   * 从用户输入创建记忆
   */
  async createFromUserInput(
    content: string,
    context: MessageContext
  ): Promise<Memory> {
    // 1. 提取关键信息
    const extracted = await this.extractKeyInformation(content, context);
    
    // 2. 计算重要性评分
    const importance = await this.importanceCalculator.calculate({
      content: extracted.content,
      context,
      userFeedback: context.userFeedback,
    });
    
    // 3. 生成标签
    const tags = await this.tagExtractor.extract(extracted.content);
    
    // 4. 生成嵌入向量
    const embedding = await this.embeddingGenerator.generate(extracted.content);
    
    // 5. 创建记忆对象
    const memory: Memory = {
      id: generateUUID(),
      type: this.determineMemoryType(extracted, importance),
      content: extracted.content,
      metadata: {
        projectPath: context.projectPath,
        files: context.files,
        confidence: extracted.confidence,
      },
      tags,
      importance,
      accessCount: 0,
      createdAt: new Date(),
      updatedAt: new Date(),
      embedding,
      source: {
        type: 'user_input',
        sessionId: context.sessionId,
        messageId: context.messageId,
        timestamp: new Date(),
      },
    };
    
    return memory;
  }
  
  /**
   * 从 AI 响应创建记忆
   */
  async createFromAIResponse(
    content: string,
    context: ResponseContext
  ): Promise<Memory> {
    // 检测是否包含重要信息(如代码片段、配置等)
    const codeBlocks = this.extractCodeBlocks(content);
    const configurations = this.extractConfigurations(content);
    
    if (codeBlocks.length > 0 || configurations.length > 0) {
      // 创建程序性记忆
      return this.createProceduralMemory(codeBlocks, configurations, context);
    }
    
    // 否则创建普通的语义记忆
    return this.createFromUserInput(content, context);
  }
  
  /**
   * 提取关键信息
   */
  private async extractKeyInformation(
    content: string,
    context: MessageContext
  ): Promise<{ content: string; confidence: number }> {
    // 使用轻量级 NLP 模型提取实体和关系
    const entities = await this.extractEntities(content);
    const relations = await this.extractRelations(content);
    
    // 过滤掉无关紧要的内容
    const filtered = this.filterIrrelevantContent(content, entities);
    
    // 计算置信度
    const confidence = this.calculateConfidence(entities, relations);
    
    return {
      content: filtered,
      confidence,
    };
  }
  
  /**
   * 提取实体(人名、地名、技术栈等)
   */
  private async extractEntities(text: string): Promise<Entity[]> {
    // 使用正则表达式和关键词匹配
    const patterns = [
      { type: 'technology', regex: /\b(TypeScript|JavaScript|Python|React|Vue|Node\.js)\b/g },
      { type: 'version', regex: /\b(\d+\.\d+\.\d+)\b/g },
      { type: 'file', regex: /[\w\-\/]+\.(ts|js|json|md|py)/g },
      { type: 'command', regex: /\$\s*\w+/g },
    ];
    
    const entities: Entity[] = [];
    
    for (const pattern of patterns) {
      const matches = text.matchAll(pattern.regex);
      for (const match of matches) {
        entities.push({
          type: pattern.type,
          value: match[0],
          position: match.index,
        });
      }
    }
    
    return entities;
  }
  
  /**
   * 确定记忆类型
   */
  private determineMemoryType(
    extracted: ExtractedInfo,
    importance: number
  ): MemoryType {
    // 高重要性的事实 → 全局记忆
    if (importance > 0.9 && extracted.isFact) {
      return MemoryType.GLOBAL;
    }
    
    // 包含步骤说明 → 程序记忆
    if (extracted.containsSteps) {
      return MemoryType.PROCEDURAL;
    }
    
    // 特定事件的描述 → 情景记忆
    if (extracted.isEvent) {
      return MemoryType.EPISODIC;
    }
    
    // 一般知识 → 语义记忆
    if (extracted.isKnowledge) {
      return MemoryType.SEMANTIC;
    }
    
    // 默认:根据重要性决定
    return importance > 0.7 ? MemoryType.LONG_TERM : MemoryType.SHORT_TERM;
  }
}

重要性评分算法

// packages/core/src/memory/importance-calculator.ts

export class ImportanceCalculator {
  private weights: ImportanceWeights = {
    recency: 0.2,        // 新鲜度
    frequency: 0.25,     // 访问频率
    userMarked: 0.3,     // 用户标记
    contentLength: 0.1,  // 内容长度
    entityDensity: 0.15, // 实体密度
  };
  
  /**
   * 计算综合重要性评分
   */
  async calculate(params: ImportanceParams): Promise<number> {
    const scores: Record<string, number> = {};
    
    // 1. 新鲜度评分(指数衰减)
    scores.recency = this.calculateRecencyScore(params.context.timestamp);
    
    // 2. 频率评分(基于历史访问)
    scores.frequency = await this.calculateFrequencyScore(params.content);
    
    // 3. 用户标记评分
    scores.userMarked = this.calculateUserMarkedScore(params.userFeedback);
    
    // 4. 内容长度评分(对数增长)
    scores.contentLength = this.calculateLengthScore(params.content);
    
    // 5. 实体密度评分
    scores.entityDensity = this.calculateEntityDensityScore(params.content);
    
    // 加权求和
    const totalScore = Object.entries(scores).reduce((sum, [key, score]) => {
      return sum + score * (this.weights[key as keyof ImportanceWeights] || 0);
    }, 0);
    
    // 归一化到 [0, 1]
    return Math.min(1.0, Math.max(0.0, totalScore));
  }
  
  /**
   * 新鲜度评分:艾宾浩斯遗忘曲线
   */
  private calculateRecencyScore(timestamp: Date): number {
    const hoursSinceCreation = hoursBetween(timestamp, new Date());
    
    // 艾宾浩斯遗忘曲线:R = e^(-t/S)
    // S 是记忆强度常数,这里设为 24 小时
    const retention = Math.exp(-hoursSinceCreation / 24);
    
    return retention;
  }
  
  /**
   * 频率评分
   */
  private async calculateFrequencyScore(content: string): Promise<number> {
    // 查询历史访问次数
    const accessCount = await this.getAccessCount(content);
    
    // 对数评分:避免过度偏向高频访问
    return Math.log2(accessCount + 1) / 10; // 归一化
  }
  
  /**
   * 用户标记评分
   */
  private calculateUserMarkedScore(feedback?: UserFeedback): number {
    if (!feedback) return 0.5;
    
    const scores = {
      explicit_important: 1.0,      // 用户明确标记"重要"
      implicit_positive: 0.8,       // 用户表现出满意(如"谢谢")
      neutral: 0.5,                  // 无反馈
      implicit_negative: 0.3,       // 用户表现出不满
      explicit_unimportant: 0.0,    // 用户标记"不重要"
    };
    
    return scores[feedback.type] || 0.5;
  }
  
  /**
   * 内容长度评分
   */
  private calculateLengthScore(content: string): number {
    const wordCount = content.split(/\s+/).length;
    
    // 对数增长:太短或太长都不好
    if (wordCount < 10) return 0.3;
    if (wordCount > 1000) return 0.5;
    
    return Math.log2(wordCount) / 10;
  }
  
  /**
   * 实体密度评分
   */
  private calculateEntityDensityScore(content: string): number {
    const entities = this.extractEntities(content);
    const wordCount = content.split(/\s+/).length;
    
    if (wordCount === 0) return 0;
    
    const density = entities.length / wordCount;
    
    // 实体密度适中最好(0.1 - 0.3)
    if (density < 0.05) return 0.3;
    if (density > 0.5) return 0.4;
    
    return 0.5 + density; // 0.5 - 1.0
  }
  
  /**
   * 动态调整权重(基于用户反馈学习)
   */
  adjustWeights(feedback: WeightAdjustmentFeedback): void {
    // 使用强化学习:根据用户对记忆重要性的反馈调整权重
    // 例如:如果用户经常查看"新鲜"的记忆,增加 recency 权重
    
    const gradient = this.calculateGradient(feedback);
    
    for (const key of Object.keys(this.weights)) {
      this.weights[key as keyof ImportanceWeights] += 
        gradient[key as keyof ImportanceWeights] * 0.01;
    }
    
    // 重新归一化权重
    const total = Object.values(this.weights).reduce((a, b) => a + b, 0);
    for (const key of Object.keys(this.weights)) {
      this.weights[key as keyof ImportanceWeights] /= total;
    }
  }
}

interface ImportanceWeights {
  recency: number;
  frequency: number;
  userMarked: number;
  contentLength: number;
  entityDensity: number;
}

更新阶段:背景记忆重写

// packages/core/src/memory/background-rewriting.ts

export class BackgroundRewriter {
  private conflictDetector: ConflictDetector;
  private merger: MemoryMerger;
  private compressor: MemoryCompressor;
  
  /**
   * 后台持续运行的记忆整理任务
   */
  async startBackgroundTask(intervalMs: number = 300000): Promise<void> {
    setInterval(async () => {
      try {
        await this.consolidateMemories();
      } catch (error) {
        console.error('记忆整理失败:', error);
      }
    }, intervalMs);
  }
  
  /**
   * 执行记忆整理
   */
  async consolidateMemories(): Promise<void> {
    // 1. 找到活跃的记忆簇
    const clusters = await this.findActiveClusters();
    
    for (const cluster of clusters) {
      // 2. 检测并解决冲突
      const conflicts = await this.conflictDetector.detect(cluster.memories);
      
      if (conflicts.length > 0) {
        await this.resolveConflicts(conflicts);
      }
      
      // 3. 压缩冗余记忆
      if (cluster.memories.length > 5) {
        await this.compressRedundantMemories(cluster.memories);
      }
      
      // 4. 更新重要性评分
      await this.updateImportanceScores(cluster.memories);
    }
  }
  
  /**
   * 检测记忆冲突
   */
  private async detectConflicts(
    memories: Memory[]
  ): Promise<MemoryConflict[]> {
    const conflicts: MemoryConflict[] = [];
    
    // 两两比较(O(n²),但 n 通常很小)
    for (let i = 0; i < memories.length; i++) {
      for (let j = i + 1; j < memories.length; j++) {
        const m1 = memories[i];
        const m2 = memories[j];
        
        // 检查语义冲突
        const conflict = await this.checkSemanticConflict(m1, m2);
        
        if (conflict) {
          conflicts.push(conflict);
        }
      }
    }
    
    return conflicts;
  }
  
  /**
   * 语义冲突检测
   */
  private async checkSemanticConflict(
    m1: Memory,
    m2: Memory
  ): Promise<MemoryConflict | null> {
    // 类型 1:直接矛盾
    // 例:m1="项目使用 npm", m2="项目使用 pnpm"
    const contradictionPatterns = [
      /(使用 | 采用 | 是)(.+?)(而不是 | 而非 | 不是)(.+)/,
      /(不是 | 并非 | 绝不)(.+)/,
      /(应该 | 必须|不要)(.+?)(而不是 | 而非)(.+)/,
    ];
    
    for (const pattern of contradictionPatterns) {
      const match1 = m1.content.match(pattern);
      const match2 = m2.content.match(pattern);
      
      if (match1 && match2) {
        // 检查是否有矛盾的断言
        if (this.hasContradictoryClaims(match1, match2)) {
          return {
            type: 'contradiction',
            memory1: m1,
            memory2: m2,
            severity: 'high',
            description: '两条记忆存在直接矛盾',
          };
        }
      }
    }
    
    // 类型 2:时效性冲突(新旧信息冲突)
    const timeDiff = Math.abs(
      m1.createdAt.getTime() - m2.createdAt.getTime()
    );
    
    if (timeDiff > 7 * 24 * 60 * 60 * 1000) { // 超过 7 天
      const similarity = this.calculateSimilarity(m1.content, m2.content);
      
      if (similarity > 0.8) {
        // 高度相似但时间相差很大,可能有更新
        return {
          type: 'temporal_conflict',
          memory1: m1,
          memory2: m2,
          severity: 'medium',
          description: '可能存在过时的信息',
        };
      }
    }
    
    return null;
  }
  
  /**
   * 解决冲突
   */
  private async resolveConflicts(conflicts: MemoryConflict[]): Promise<void> {
    for (const conflict of conflicts) {
      switch (conflict.type) {
        case 'contradiction':
          await this.resolveContradiction(conflict);
          break;
          
        case 'temporal_conflict':
          await this.resolveTemporalConflict(conflict);
          break;
      }
    }
  }
  
  /**
   * 解决直接矛盾:保留更可信的
   */
  private async resolveContradiction(conflict: MemoryConflict): Promise<void> {
    const { memory1, memory2 } = conflict;
    
    // 比较可信度
    const confidence1 = memory1.metadata?.confidence || 0.5;
    const confidence2 = memory2.metadata?.confidence || 0.5;
    
    // 比较重要性
    const importance1 = memory1.importance;
    const importance2 = memory2.importance;
    
    // 综合评分
    const score1 = confidence1 * 0.6 + importance1 * 0.4;
    const score2 = confidence2 * 0.6 + importance2 * 0.4;
    
    if (score1 > score2) {
      // 保留 memory1,标记 memory2 为已解决
      await this.markAsResolved(memory2, 'superseded_by:' + memory1.id);
    } else {
      await this.markAsResolved(memory1, 'superseded_by:' + memory2.id);
    }
  }
  
  /**
   * 解决时效性冲突:保留更新的
   */
  private async resolveTemporalConflict(conflict: MemoryConflict): Promise<void> {
    const { memory1, memory2 } = conflict;
    
    // 保留较新的记忆
    const older = memory1.createdAt < memory2.createdAt ? memory1 : memory2;
    const newer = older === memory1 ? memory2 : memory1;
    
    // 将旧记忆标记为可能过时
    await this.markAsPotentiallyOutdated(older, newer);
  }
  
  /**
   * 压缩冗余记忆
   */
  private async compressRedundantMemories(memories: Memory[]): Promise<void> {
    // 1. 聚类高度相似的记忆
    const clusters = this.clusterBySimilarity(memories, 0.85);
    
    for (const cluster of clusters) {
      if (cluster.length < 2) continue;
      
      // 2. 生成摘要
      const summary = await this.generateSummary(cluster);
      
      // 3. 创建压缩后的记忆
      const compressed: Memory = {
        id: generateUUID(),
        type: MemoryType.LONG_TERM,
        content: summary,
        metadata: {
          compressedFrom: cluster.map(m => m.id),
          compressionDate: new Date(),
          originalCount: cluster.length,
        },
        tags: this.mergeTags(cluster),
        importance: Math.max(...cluster.map(m => m.importance)),
        accessCount: cluster.reduce((sum, m) => sum + m.accessCount, 0),
        createdAt: new Date(Math.min(...cluster.map(m => m.createdAt.getTime()))),
        embedding: this.averageEmbeddings(cluster),
      };
      
      // 4. 存储压缩后的记忆
      await this.database.insert(compressed);
      
      // 5. 标记原始记忆为已压缩
      for (const memory of cluster) {
        await this.markAsCompressed(memory, compressed.id);
      }
    }
  }
  
  /**
   * 生成记忆摘要
   */
  private async generateSummary(memories: Memory[]): Promise<string> {
    const sortedMemories = memories.sort(
      (a, b) => b.importance - a.importance
    );
    
    // 选择最重要的 3-5 条记忆
    const topMemories = sortedMemories.slice(0, 5);
    
    const prompt = `
请为以下相关记忆创建一个简洁的摘要(不超过 150 字):

${topMemories.map(m => `- ${m.content}`).join('\n')}

要求:
1. 保留所有关键信息
2. 消除重复
3. 解决任何细微矛盾
4. 使用清晰的结构

摘要:
`.trim();
    
    const response = await this.llmClient.generate(prompt);
    return response.text.trim();
  }
}

3.4 记忆检索与验证

混合搜索系统

// packages/core/src/memory/search-engine.ts

export class SearchEngine {
  private vectorSearch: VectorSearch;
  private keywordSearch: KeywordSearch;
  private temporalFilter: TemporalFilter;
  private reranker: ResultReranker;
  
  /**
   * 混合搜索:结合多种策略
   */
  async search(query: string, options?: SearchOptions): Promise<SearchResult[]> {
    // 1. 解析查询意图
    const intent = await this.parseQueryIntent(query);
    
    // 2. 并行执行多种搜索
    const [vectorResults, keywordResults] = await Promise.all([
      this.vectorSearch.search(query, options),
      this.keywordSearch.search(query, options),
    ]);
    
    // 3. 应用时间过滤
    let filteredResults = [...vectorResults, ...keywordResults];
    
    if (options?.timeRange) {
      filteredResults = this.temporalFilter.filter(
        filteredResults,
        options.timeRange
      );
    }
    
    // 4. 去重
    const deduplicated = this.deduplicateResults(filteredResults);
    
    // 5. 重新排序(学习到的排序模型)
    const reranked = await this.reranker.rerank(
      deduplicated,
      query,
      intent
    );
    
    // 6. 截断到指定数量
    return reranked.slice(0, options?.limit || 20);
  }
  
  /**
   * 解析查询意图
   */
  private async parseQueryIntent(query: string): Promise<QueryIntent> {
    const intent: QueryIntent = {
      type: 'general',
      entities: [],
      timeConstraint: null,
      confidence: 0,
    };
    
    // 检测时间约束
    const timeMatch = query.match(/(昨天 | 上周 | 最近 | 过去\s*\d+\s*天)/);
    if (timeMatch) {
      intent.timeConstraint = this.parseTimeExpression(timeMatch[0]);
      intent.type = 'temporal';
    }
    
    // 检测实体
    const entities = this.extractEntities(query);
    if (entities.length > 0) {
      intent.entities = entities;
      intent.type = 'entity_focused';
    }
    
    // 检测问题类型
    if (query.startsWith('如何') || query.startsWith('怎么')) {
      intent.type = 'procedural'; // 程序性问题
    } else if (query.startsWith('什么') || query.startsWith('哪些')) {
      intent.type = 'factual'; // 事实性问题
    } else if (query.startsWith('为什么')) {
      intent.type = 'explanatory'; // 解释性问题
    }
    
    return intent;
  }
  
  /**
   * 向量搜索实现
   */
  private async vectorSearch.search(
    query: string,
    options?: SearchOptions
  ): Promise<VectorSearchResult[]> {
    // 1. 生成查询的嵌入向量
    const queryEmbedding = await this.embeddingModel.encode(query);
    
    // 2. 在向量索引中搜索
    const results = await this.database.searchVectors(
      queryEmbedding,
      {
        limit: options?.limit || 50,
        threshold: options?.similarityThreshold || 0.7,
        metric: 'cosine',
      }
    );
    
    // 3. 转换为标准格式
    return results.map(r => ({
      memory: r.memory,
      score: r.similarity,
      source: 'vector',
    }));
  }
  
  /**
   * 关键词搜索实现
   */
  private async keywordSearch.search(
    query: string,
    options?: SearchOptions
  ): Promise<KeywordSearchResult[]> {
    // 使用 SQLite FTS5 进行全文搜索
    const results = await this.database.searchFullText(query, {
      limit: options?.limit || 50,
    });
    
    // 计算 BM25 分数
    return results.map(r => ({
      memory: r.memory,
      score: this.calculateBM25(query, r.memory.content),
      source: 'keyword',
      highlights: r.highlights,
    }));
  }
  
  /**
   * 结果去重
   */
  private deduplicateResults(results: SearchResult[]): SearchResult[] {
    const seen = new Map<string, SearchResult>();
    
    for (const result of results) {
      const key = result.memory.id;
      
      if (!seen.has(key)) {
        seen.set(key, result);
      } else {
        // 如果已存在,保留分数更高的
        const existing = seen.get(key)!;
        if (result.score > existing.score) {
          seen.set(key, result);
        }
      }
    }
    
    return Array.from(seen.values());
  }
}

/**
 * 结果重新排序器(使用学习到的模型)
 */
class ResultReranker {
  private model: LearningToRankModel;
  
  constructor() {
    // 使用 LambdaMART 或类似算法
    this.model = new LearningToRankModel();
  }
  
  async rerank(
    results: SearchResult[],
    query: string,
    intent: QueryIntent
  ): Promise<SearchResult[]> {
    // 为每个结果提取特征
    const features = results.map(r => this.extractFeatures(r, query, intent));
    
    // 预测相关性分数
    const scores = await this.model.predict(features);
    
    // 按分数排序
    return results
      .map((r, i) => ({ ...r, finalScore: scores[i] }))
      .sort((a, b) => b.finalScore - a.finalScore);
  }
  
  private extractFeatures(
    result: SearchResult,
    query: string,
    intent: QueryIntent
  ): FeatureVector {
    return {
      // 1. 文本相似度特征
      bm25Score: this.calculateBM25(query, result.memory.content),
      cosineSimilarity: result.score,
      
      // 2. 记忆质量特征
      importance: result.memory.importance,
      accessCount: result.memory.accessCount,
      contentLength: result.memory.content.length,
      
      // 3. 时效性特征
      recency: this.calculateRecency(result.memory.createdAt),
      
      // 4. 意图匹配特征
      entityTypeMatch: this.checkEntityTypeMatch(result.memory, intent.entities),
      questionTypeMatch: this.checkQuestionTypeMatch(result.memory, intent.type),
      
      // 5. 用户交互特征
      userPreviouslyAccessed: this.checkUserAccessHistory(result.memory),
      userMarkedImportant: result.memory.tags?.includes('important') ? 1 : 0,
    };
  }
}

有效性验证

// packages/core/src/memory/validation.ts

export class MemoryValidator {
  /**
   * 验证记忆的有效性
   */
  async validateMemory(memory: Memory): Promise<ValidationResult> {
    const result: ValidationResult = {
      isValid: true,
      issues: [],
      suggestions: [],
    };
    
    // 1. 基础验证
    if (!memory.content || memory.content.trim().length === 0) {
      result.isValid = false;
      result.issues.push('记忆内容为空');
    }
    
    if (memory.importance < 0 || memory.importance > 1) {
      result.isValid = false;
      result.issues.push('重要性评分超出范围 [0, 1]');
    }
    
    // 2. 时效性验证
    const daysSinceCreation = daysBetween(memory.createdAt, new Date());
    
    if (daysSinceCreation > 365 && memory.accessCount === 0) {
      result.suggestions.push('此记忆已超过 1 年未被访问,考虑删除');
    }
    
    // 3. 冲突检测
    const conflicts = await this.detectConflictsWithExisting(memory);
    
    if (conflicts.length > 0) {
      result.issues.push(...conflicts.map(c => `与记忆"${c.otherMemory.content}"冲突`));
      result.suggestions.push('建议解决冲突后再使用此记忆');
    }
    
    // 4. 冗余检测
    const redundant = await this.detectRedundancy(memory);
    
    if (redundant.length > 0) {
      result.suggestions.push(
        `${redundant.length} 条现有记忆高度相似,考虑合并`
      );
    }
    
    // 5. 准确性验证(针对事实性记忆)
    if (memory.type === MemoryType.SEMANTIC || memory.type === MemoryType.GLOBAL) {
      const accuracyCheck = await this.verifyFactualAccuracy(memory);
      
      if (!accuracyCheck.passed) {
        result.issues.push(`事实准确性存疑:${accuracyCheck.reason}`);
      }
    }
    
    return result;
  }
  
  /**
   * 验证记忆集合的整体质量
   */
  async validateMemoryCollection(
    memories: Memory[]
  ): Promise<CollectionValidationResult> {
    const stats = {
      total: memories.length,
      valid: 0,
      withIssues: 0,
      avgImportance: 0,
      avgAccessCount: 0,
      duplicates: 0,
    };
    
    const validationPromises = memories.map(m => this.validateMemory(m));
    const results = await Promise.all(validationPromises);
    
    for (const result of results) {
      if (result.isValid) {
        stats.valid++;
      } else {
        stats.withIssues++;
      }
    }
    
    stats.avgImportance = 
      memories.reduce((sum, m) => sum + m.importance, 0) / memories.length;
    
    stats.avgAccessCount = 
      memories.reduce((sum, m) => sum + m.accessCount, 0) / memories.length;
    
    // 检测重复
    stats.duplicates = this.countDuplicates(memories);
    
    return {
      stats,
      results,
      recommendations: this.generateRecommendations(stats, results),
    };
  }
  
  /**
   * 事实准确性验证
   */
  private async verifyFactualAccuracy(memory: Memory): Promise<AccuracyCheck> {
    // 方法 1:交叉验证其他记忆
    const relatedMemories = await this.findRelatedMemories(memory);
    
    for (const related of relatedMemories) {
      if (this.contradicts(memory, related)) {
        return {
          passed: false,
          reason: `与已有记忆"${related.content}"矛盾`,
        };
      }
    }
    
    // 方法 2:使用外部知识源验证(如 Wikipedia API)
    if (memory.content.includes('事实:') || memory.content.includes('定义:')) {
      const externalVerification = await this.verifyWithExternalSource(memory);
      
      if (!externalVerification.verified) {
        return {
          passed: false,
          reason: externalVerification.reason,
        };
      }
    }
    
    return { passed: true };
  }
}

3.5 隐私保护措施

敏感信息过滤

// packages/core/src/memory/privacy-filter.ts

export class PrivacyFilter {
  private piiDetector: PIIDetector;
  private secretDetector: SecretDetector;
  
  /**
   * 在存储前过滤敏感信息
   */
  async filterSensitiveContent(content: string): Promise<FilteredResult> {
    const result: FilteredResult = {
      originalContent: content,
      filteredContent: content,
      detectedItems: [],
      redacted: false,
    };
    
    // 1. 检测 PII(个人身份信息)
    const piiItems = await this.piiDetector.detect(content);
    
    for (const item of piiItems) {
      result.detectedItems.push(item);
      
      // 替换为占位符
      result.filteredContent = result.filteredContent.replace(
        item.value,
        `[${item.type}_REDACTED]`
      );
      result.redacted = true;
    }
    
    // 2. 检测密钥和凭证
    const secrets = await this.secretDetector.detect(content);
    
    for (const secret of secrets) {
      result.detectedItems.push(secret);
      
      // 替换为占位符
      result.filteredContent = result.filteredContent.replace(
        secret.value,
        `[SECRET_${secret.type}_REDACTED]`
      );
      result.redacted = true;
    }
    
    // 3. 检测代码中的敏感部分
    const codeSensitivity = await this.detectCodeSensitivity(content);
    
    if (codeSensitivity.containsSensitiveCode) {
      result.filteredContent = codeSensity.sanitizedCode;
      result.detectedItems.push(...codeSensitivity.items);
      result.redacted = true;
    }
    
    return result;
  }
  
  /**
   * PII 检测器
   */
  private async detectPII(text: string): Promise<PIIItem[]> {
    const items: PIIItem[] = [];
    
    // 邮箱地址
    const emailPattern = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
    const emails = text.match(emailPattern) || [];
    
    for (const email of emails) {
      items.push({
        type: 'email',
        value: email,
        confidence: 0.95,
        position: text.indexOf(email),
      });
    }
    
    // 电话号码(支持多国格式)
    const phonePatterns = [
      /\+?\d{1,3}[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,9}/g, // 国际格式
      /1[3-9]\d{9}/g, // 中国大陆手机号
      /\d{3}-\d{4}-\d{4}/g, // 日本格式
    ];
    
    for (const pattern of phonePatterns) {
      const phones = text.match(pattern) || [];
      for (const phone of phones) {
        items.push({
          type: 'phone',
          value: phone,
          confidence: 0.9,
          position: text.indexOf(phone),
        });
      }
    }
    
    // 身份证号(中国大陆)
    const idCardPattern = /[1-9]\d{5}(18|19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]/g;
    const idCards = text.match(idCardPattern) || [];
    
    for (const idCard of idCards) {
      items.push({
        type: 'id_card',
        value: idCard,
        confidence: 0.98,
        position: text.indexOf(idCard),
      });
    }
    
    // 姓名(使用 NLP 模型)
    const names = await this.extractPersonNames(text);
    
    for (const name of names) {
      items.push({
        type: 'person_name',
        value: name,
        confidence: 0.85,
        position: text.indexOf(name),
      });
    }
    
    return items;
  }
  
  /**
   * 密钥检测
   */
  private async detectSecrets(text: string): Promise<SecretItem[]> {
    const items: SecretItem[] = [];
    
    // AWS Access Key
    const awsKeyPattern = /AKIA[0-9A-Z]{16}/g;
    const awsKeys = text.match(awsKeyPattern) || [];
    
    for (const key of awsKeys) {
      items.push({
        type: 'aws_access_key',
        value: key,
        confidence: 0.99,
        severity: 'critical',
      });
    }
    
    // AWS Secret Key
    const awsSecretPattern = /[0-9a-zA-Z/+]{40}/g;
    // (需要结合上下文判断,这里简化处理)
    
    // GitHub Token
    const githubTokenPattern = /ghp_[0-9a-zA-Z]{36}/g;
    const githubTokens = text.match(githubTokenPattern) || [];
    
    for (const token of githubTokens) {
      items.push({
        type: 'github_token',
        value: token,
        confidence: 0.99,
        severity: 'critical',
      });
    }
    
    // JWT Token
    const jwtPattern = /eyJ[A-Za-z0-9-_]+\.eyJ[A-Za-z0-9-_]+\.[A-Za-z0-9-_.+/=]*/g;
    const jwts = text.match(jwtPattern) || [];
    
    for (const jwt of jwts) {
      items.push({
        type: 'jwt_token',
        value: jwt,
        confidence: 0.95,
        severity: 'high',
      });
    }
    
    // 通用 API Key(启发式检测)
    const apiKeyPatterns = [
      /api[_-]?key\s*[=:]\s*["']?[0-9a-zA-Z]{16,}["']?/gi,
      /apikey\s*[=:]\s*["']?[0-9a-zA-Z]{16,}["']?/gi,
      /bearer\s+[0-9a-zA-Z-_.+/=]+/gi,
    ];
    
    for (const pattern of apiKeyPatterns) {
      const matches = text.match(pattern) || [];
      for (const match of matches) {
        items.push({
          type: 'api_key',
          value: match,
          confidence: 0.8,
          severity: 'high',
        });
      }
    }
    
    return items;
  }
  
  /**
   * 代码敏感度检测
   */
  private async detectCodeSensitivity(content: string): Promise<CodeSensitivityResult> {
    const items: DetectedItem[] = [];
    let sanitizedContent = content;
    
    // 检测密码硬编码
    const passwordPatterns = [
      /password\s*[=:]\s*["'][^"']+["']/gi,
      /passwd\s*[=:]\s*["'][^"']+["']/gi,
      /pwd\s*[=:]\s*["'][^"']+["']/gi,
    ];
    
    for (const pattern of passwordPatterns) {
      const matches = content.matchAll(pattern);
      for (const match of matches) {
        const fullMatch = match[0];
        const placeholder = `/* PASSWORD_REDACTED */`;
        
        sanitizedContent = sanitizedContent.replace(fullMatch, placeholder);
        
        items.push({
          type: 'hardcoded_password',
          value: fullMatch,
          confidence: 0.9,
          severity: 'critical',
        });
      }
    }
    
    // 检测私钥
    if (content.includes('-----BEGIN RSA PRIVATE KEY-----') ||
        content.includes('-----BEGIN OPENSSH PRIVATE KEY-----')) {
      items.push({
        type: 'private_key',
        value: '(私钥内容)',
        confidence: 0.99,
        severity: 'critical',
      });
      
      // 完全移除私钥
      sanitizedContent = sanitizedContent.replace(
        /-----BEGIN[^-]+PRIVATE KEY-----[\s\S]*?-----END[^-]+PRIVATE KEY-----/g,
        '/* PRIVATE_KEY_REDACTED */'
      );
    }
    
    // 检测数据库连接字符串
    const connectionStringPattern = /(mongodb|postgres|mysql|redis):\/\/[^:\s]+:[^@\s]+@[^\s]+/gi;
    const connections = content.match(connectionStringPattern) || [];
    
    for (const conn of connections) {
      sanitizedContent = sanitizedContent.replace(conn, '/* DB_CONNECTION_REDACTED */');
      
      items.push({
        type: 'database_connection_string',
        value: conn,
        confidence: 0.95,
        severity: 'critical',
      });
    }
    
    return {
      containsSensitiveCode: items.length > 0,
      sanitizedCode: sanitizedContent,
      items,
    };
  }
}

GDPR 合规支持

// packages/core/src/memory/gdpr-compliance.ts

export class GDPRComplianceManager {
  private database: MemoryDatabase;
  
  /**
   * 数据主体访问权(Right of Access)
   */
  async exportUserData(userId: string): Promise<UserDataExport> {
    // 收集该用户的所有数据
    const memories = await this.database.findByFilters({
      metadata: { userId },
    });
    
    const accessLogs = await this.database.query(`
      SELECT * FROM memory_access_log
      WHERE user_id = ?
      ORDER BY accessed_at DESC
    `, [userId]);
    
    return {
      userId,
      exportDate: new Date(),
      memories: memories.map(m => this.sanitizeForExport(m)),
      accessHistory: accessLogs,
      statistics: {
        totalMemories: memories.length,
        totalAccesses: accessLogs.length,
        earliestRecord: memories.reduce(
          (min, m) => m.createdAt < min ? m.createdAt : min,
          memories[0]?.createdAt || new Date()
        ),
      },
    };
  }
  
  /**
   * 被遗忘权(Right to Erasure)
   */
  async eraseUserData(userId: string, options?: ErasureOptions): Promise<ErasureResult> {
    const result: ErasureResult = {
      deletedCount: 0,
      retainedCount: 0,
      reasons: [],
    };
    
    // 获取用户的所有记忆
    const memories = await this.database.findByFilters({
      metadata: { userId },
    });
    
    for (const memory of memories) {
      // 检查是否有合法理由保留
      if (this.shouldRetainMemory(memory, options)) {
        result.retainedCount++;
        result.reasons.push({
          memoryId: memory.id,
          reason: this.getRetentionReason(memory, options),
        });
      } else {
        // 执行删除
        await this.database.delete(memory.id);
        result.deletedCount++;
      }
    }
    
    // 同时删除访问日志
    await this.database.execute(
      'DELETE FROM memory_access_log WHERE user_id = ?',
      [userId]
    );
    
    return result;
  }
  
  /**
   * 决定是否保留记忆
   */
  private shouldRetainMemory(
    memory: Memory,
    options?: ErasureOptions
  ): boolean {
    // 法律义务:需要保留的情况
    if (options?.legalObligation) {
      if (this.isRequiredByLaw(memory)) {
        return true;
      }
    }
    
    // 公共利益:科学研究、历史记录等
    if (options?.publicInterest) {
      if (this.isPublicInterest(memory)) {
        return true;
      }
    }
    
    // 言论自由和信息自由
    if (options?.freedomOfExpression) {
      if (this.relatesToFreedomOfExpression(memory)) {
        return true;
      }
    }
    
    // 默认:不保留
    return false;
  }
  
  /**
   * 数据可携带权(Right to Data Portability)
   */
  async exportInPortableFormat(
    userId: string,
    format: 'json' | 'csv' | 'xml'
  ): Promise<string> {
    const data = await this.exportUserData(userId);
    
    switch (format) {
      case 'csv':
        return this.convertToCSV(data);
      case 'xml':
        return this.convertToXML(data);
      case 'json':
      default:
        return JSON.stringify(data, null, 2);
    }
  }
  
  /**
   * 同意管理(Consent Management)
   */
  async recordConsent(
    userId: string,
    consentType: ConsentType,
    granted: boolean,
    metadata?: ConsentMetadata
  ): Promise<void> {
    await this.database.insertConsentRecord({
      userId,
      type: consentType,
      granted,
      timestamp: new Date(),
      ip: metadata?.ip,
      userAgent: metadata?.userAgent,
      version: metadata?.policyVersion,
    });
  }
  
  /**
   * 检查是否有有效同意
   */
  async hasValidConsent(
    userId: string,
    consentType: ConsentType
  ): Promise<boolean> {
    const lastRecord = await this.database.getLastConsentRecord(userId, consentType);
    
    if (!lastRecord) return false;
    if (!lastRecord.granted) return false;
    
    // 同意是否有过期
    if (lastRecord.expiresAt && lastRecord.expiresAt < new Date()) {
      return false;
    }
    
    return true;
  }
}

课后练习

基础题

  1. 数据库设计:画出记忆系统的 ER 图,标注所有表和关系
  2. 重要性计算:实现一个简化版的重要性评分函数
  3. LRU 实现:手写一个 LRU 缓存(不使用库)

进阶题

  1. 向量搜索:使用 @xenova/transformers.js 实现简单的语义搜索
  2. 冲突检测:实现一个规则基础的冲突检测器
  3. 隐私过滤:编写正则表达式检测邮箱、电话、API Key

挑战题

  1. 完整系统:集成所有组件,实现可用的记忆系统
  2. 性能优化:对百万级记忆进行性能基准测试和优化
  3. 学习排序:使用用户反馈训练排序模型

下节预告

第 4 课:权限系统设计

  • 🔐 RBAC vs ABAC:权限模型深度对比
  • 🎯 四层权限体系架构
  • ⚠️ CVE-2026-21852 漏洞完整分析
  • 🛡️ 动态权限管理和审计
  • 🔑 最小权限原则的最佳实践

版权声明:本课程内容仅用于教育和技术研究目的,请勿将所学知识用于非法活动。所有案例分析均基于公开信息,旨在提升安全意识和技术水平。

Logo

欢迎加入DeepSeek 技术社区。在这里,你可以找到志同道合的朋友,共同探索AI技术的奥秘。

更多推荐