Claude Code 源码泄露之三:记忆系统拆解
本课程深入讲解AI记忆系统的三层架构设计,包含短期、长期和全局记忆的完整生命周期管理。课程首先展示记忆系统的必要性,通过对比有无记忆的对话效果差异。系统架构采用分层设计,短期记忆使用Redis/LRU缓存,长期记忆存储在SQLite数据库,全局记忆处理用户画像等核心数据。详细介绍了记忆数据结构类型,包括情景记忆、语义记忆等分类,以及重要性评分、嵌入向量等关键字段。数据库设计部分展示了SQLite表
·
第 3 课:记忆系统拆解
课程目标
通过本课程,你将:
- 深入理解三层记忆架构的设计原理和实现细节
- 掌握记忆存储、检索、更新的完整生命周期管理
- 学习语义搜索和向量嵌入的技术实现
- 了解记忆压缩、冲突解决的算法原理
- 能够设计和实现自己的记忆系统
- 掌握隐私保护的技术方案
3.1 记忆系统架构总览
为什么需要记忆系统?
Claude Code 如果没有记忆,每次对话都会是"初次见面":
用户:我正在开发一个 Express 项目,使用 TypeScript
AI: 好的,我明白了。
[5 分钟后]
用户:我的项目用什么语言写的?
AI: 抱歉,我不确定。可能是 JavaScript、Python、Java...
有了记忆系统后:
用户:我正在开发一个 Express 项目,使用 TypeScript
AI: 好的,我已经记住了:你的项目使用 Express + TypeScript。
[5 分钟后]
用户:我的项目用什么语言写的?
AI: 你的项目使用 TypeScript,基于 Express 框架。
三层记忆架构
各层职责:
| 层级 | 作用 | 存储位置 | 生命周期 | 容量限制 |
|---|---|---|---|---|
| 短期记忆 | 当前对话上下文 | LRU 缓存 (内存) | 会话期间 | ~1000 条 |
| 长期记忆 | 持久化知识 | SQLite 数据库 | 永久(可清理) | 无限制 |
| 全局记忆 | 用户画像、偏好 | SQLite + 配置文件 | 永久 | ~100 条核心 |
记忆数据类型
// packages/core/src/memory/types.ts
/**
* 记忆的核心结构
*/
export interface Memory {
/** 唯一标识符 */
id: string;
/** 记忆类型 */
type: MemoryType;
/** 记忆内容(文本) */
content: string;
/** 元数据 */
metadata: MemoryMetadata;
/** 标签(用于快速筛选) */
tags?: string[];
/**
* 重要性评分 (0.0 - 1.0)
* 0.0 = 随时可丢弃
* 1.0 = 绝对不能删除
*/
importance: number;
/** 访问次数 */
accessCount: number;
/** 创建时间 */
createdAt: Date;
/** 最后更新时间 */
updatedAt: Date;
/**
* 过期时间(可选)
* 用于临时记忆
*/
expiresAt?: Date;
/**
* 嵌入向量(用于语义搜索)
* 通常是 1536 维浮点数数组
*/
embedding?: number[];
/** 来源信息 */
source?: MemorySource;
}
export enum MemoryType {
SHORT_TERM = 'short_term', // 短期记忆
LONG_TERM = 'long_term', // 长期记忆
GLOBAL = 'global', // 全局记忆
EPISODIC = 'episodic', // 情景记忆(特定事件)
SEMANTIC = 'semantic', // 语义记忆(事实知识)
PROCEDURAL = 'procedural', // 程序记忆(如何做某事)
}
export interface MemoryMetadata {
/** 相关的项目路径 */
projectPath?: string;
/** 相关的文件列表 */
files?: string[];
/** 相关的 Git commit */
commitHash?: string;
/** 情绪标记(正面/负面/中性) */
sentiment?: 'positive' | 'negative' | 'neutral';
/** 置信度评分 */
confidence?: number;
/** 压缩标记 */
isCompressed?: boolean;
/** 原始记忆 IDs(如果是压缩后的) */
compressedFrom?: string[];
/** 自定义字段 */
custom?: Record<string, any>;
}
export interface MemorySource {
/** 来源类型 */
type: 'user_input' | 'ai_response' | 'tool_result' | 'system_event';
/** 会话 ID */
sessionId: string;
/** 消息 ID */
messageId?: string;
/** 时间戳 */
timestamp: Date;
}
3.2 记忆存储机制
SQLite 数据库设计
// packages/core/src/memory/database.ts
import Database from 'better-sqlite3';
import { Memory, MemoryType } from './types';
export class MemoryDatabase {
private db: Database;
constructor(dbPath: string) {
this.db = new Database(dbPath);
this.initializeSchema();
this.createIndexes();
}
/**
* 初始化数据库 Schema
*/
private initializeSchema(): void {
this.db.exec(`
-- 记忆主表
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
type TEXT NOT NULL CHECK(type IN (
'short_term', 'long_term', 'global',
'episodic', 'semantic', 'procedural'
)),
content TEXT NOT NULL,
metadata JSON,
tags JSON,
importance REAL DEFAULT 0.5 CHECK(importance >= 0 AND importance <= 1),
access_count INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
expires_at DATETIME,
is_compressed BOOLEAN DEFAULT FALSE,
compressed_from JSON,
source_type TEXT,
source_session_id TEXT,
source_message_id TEXT
);
-- 嵌入向量表(使用 SQLite 向量扩展)
CREATE VIRTUAL TABLE IF NOT EXISTS memory_embeddings
USING vec0(
memory_id TEXT PRIMARY KEY,
embedding FLOAT[1536]
);
-- 记忆关系表(图谱结构)
CREATE TABLE IF NOT EXISTS memory_relations (
source_id TEXT REFERENCES memories(id) ON DELETE CASCADE,
target_id TEXT REFERENCES memories(id) ON DELETE CASCADE,
relation_type TEXT NOT NULL,
strength REAL DEFAULT 1.0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (source_id, target_id, relation_type)
);
-- 访问日志表
CREATE TABLE IF NOT EXISTS memory_access_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
memory_id TEXT REFERENCES memories(id) ON DELETE SET NULL,
access_type TEXT NOT NULL,
context JSON,
accessed_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- 全文搜索虚拟表
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts
USING fts5(
content,
tags,
content='memories',
content_rowid='rowid'
);
-- 触发器:同步 FTS 索引
CREATE TRIGGER IF NOT EXISTS memories_ai AFTER INSERT ON memories BEGIN
INSERT INTO memories_fts(rowid, content, tags)
VALUES (NEW.rowid, NEW.content, json(NEW.tags));
END;
CREATE TRIGGER IF NOT EXISTS memories_ad AFTER DELETE ON memories BEGIN
INSERT INTO memories_fts(memories_fts, rowid, content, tags)
VALUES ('delete', OLD.rowid, OLD.content, json(OLD.tags));
END;
`);
}
/**
* 创建性能优化索引
*/
private createIndexes(): void {
this.db.exec(`
-- 按类型查询
CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(type);
-- 按重要性排序
CREATE INDEX IF NOT EXISTS idx_memories_importance ON memories(importance DESC);
-- 按时间范围查询
CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
-- 按过期时间查询(清理任务)
CREATE INDEX IF NOT EXISTS idx_memories_expires ON memories(expires_at)
WHERE expires_at IS NOT NULL;
-- 按访问频率查询
CREATE INDEX IF NOT EXISTS idx_memories_access ON memories(access_count DESC);
-- 关系表索引
CREATE INDEX IF NOT EXISTS idx_relations_source ON memory_relations(source_id);
CREATE INDEX IF NOT EXISTS idx_relations_target ON memory_relations(target_id);
-- 访问日志索引
CREATE INDEX IF NOT EXISTS idx_access_log_memory ON memory_access_log(memory_id);
CREATE INDEX IF NOT EXISTS idx_access_log_time ON memory_access_log(accessed_at DESC);
`);
}
/**
* 插入记忆
*/
insert(memory: Memory): void {
const insert = this.db.prepare(`
INSERT INTO memories (
id, type, content, metadata, tags, importance,
access_count, created_at, updated_at, expires_at,
is_compressed, compressed_from, source_type,
source_session_id, source_message_id
) VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
)
`);
insert.run(
memory.id,
memory.type,
memory.content,
JSON.stringify(memory.metadata || {}),
JSON.stringify(memory.tags || []),
memory.importance,
memory.accessCount || 0,
memory.createdAt.toISOString(),
memory.updatedAt.toISOString(),
memory.expiresAt?.toISOString(),
memory.isCompressed ? 1 : 0,
memory.compressedFrom ? JSON.stringify(memory.compressedFrom) : null,
memory.source?.type,
memory.source?.sessionId,
memory.source?.messageId
);
// 同时插入嵌入向量
if (memory.embedding) {
this.insertEmbedding(memory.id, memory.embedding);
}
}
/**
* 批量插入(用于初始化或导入)
*/
insertBatch(memories: Memory[]): void {
const insert = this.db.prepare(`
INSERT INTO memories (id, type, content, importance, created_at)
VALUES (?, ?, ?, ?, ?)
`);
const insertMany = this.db.transaction((mems: Memory[]) => {
for (const mem of mems) {
insert.run(
mem.id,
mem.type,
mem.content,
mem.importance,
mem.createdAt.toISOString()
);
}
});
insertMany(memories);
}
/**
* 查询记忆
*/
findById(id: string): Memory | null {
const row = this.db.prepare(`
SELECT * FROM memories WHERE id = ?
`).get(id) as any;
if (!row) return null;
return this.rowToMemory(row);
}
/**
* 按条件查询
*/
findByFilters(filters: MemoryFilters): Memory[] {
const conditions: string[] = [];
const params: any[] = [];
if (filters.type) {
conditions.push('type = ?');
params.push(filters.type);
}
if (filters.tags && filters.tags.length > 0) {
conditions.push(`
EXISTS (
SELECT 1 FROM json_each(tags) AS tag
WHERE tag.value IN (${filters.tags.map(() => '?').join(',')})
)
`);
params.push(...filters.tags);
}
if (filters.minImportance !== undefined) {
conditions.push('importance >= ?');
params.push(filters.minImportance);
}
if (filters.timeRange) {
conditions.push('created_at BETWEEN ? AND ?');
params.push(
filters.timeRange.start.toISOString(),
filters.timeRange.end.toISOString()
);
}
if (filters.searchQuery) {
conditions.push(`
id IN (
SELECT rowid FROM memories_fts
WHERE memories_fts MATCH ?
)
`);
params.push(filters.searchQuery);
}
const whereClause = conditions.length > 0
? 'WHERE ' + conditions.join(' AND ')
: '';
const orderBy = filters.orderBy || 'created_at DESC';
const limit = filters.limit || 100;
const offset = filters.offset || 0;
const query = `
SELECT * FROM memories
${whereClause}
ORDER BY ${orderBy}
LIMIT ? OFFSET ?
`;
params.push(limit, offset);
const rows = this.db.prepare(query).all(...params) as any[];
return rows.map(row => this.rowToMemory(row));
}
/**
* 更新记忆
*/
update(id: string, updates: Partial<Memory>): void {
const fields: string[] = [];
const params: any[] = [];
if (updates.content !== undefined) {
fields.push('content = ?');
params.push(updates.content);
}
if (updates.importance !== undefined) {
fields.push('importance = ?');
params.push(updates.importance);
}
if (updates.tags !== undefined) {
fields.push('tags = ?');
params.push(JSON.stringify(updates.tags));
}
if (updates.metadata !== undefined) {
fields.push('metadata = ?');
params.push(JSON.stringify(updates.metadata));
}
// 总是更新时间戳
fields.push('updated_at = CURRENT_TIMESTAMP');
params.push(id);
this.db.prepare(`
UPDATE memories SET ${fields.join(', ')} WHERE id = ?
`).run(...params);
// 如果更新了 embedding
if (updates.embedding) {
this.updateEmbedding(id, updates.embedding);
}
}
/**
* 删除记忆
*/
delete(id: string): void {
this.db.prepare('DELETE FROM memories WHERE id = ?').run(id);
}
/**
* 增加访问计数
*/
incrementAccessCount(id: string): void {
this.db.prepare(`
UPDATE memories
SET access_count = access_count + 1,
updated_at = CURRENT_TIMESTAMP
WHERE id = ?
`).run(id);
// 记录访问日志
this.logAccess(id, 'read');
}
/**
* 清理过期记忆
*/
cleanupExpired(): number {
const result = this.db.prepare(`
DELETE FROM memories
WHERE expires_at IS NOT NULL
AND expires_at < CURRENT_TIMESTAMP
`).run();
return result.changes;
}
/**
* 获取统计信息
*/
getStats(): MemoryStats {
const stats = this.db.prepare(`
SELECT
type,
COUNT(*) as count,
AVG(importance) as avg_importance,
SUM(access_count) as total_accesses,
MIN(created_at) as oldest,
MAX(created_at) as newest
FROM memories
GROUP BY type
`).all() as any[];
const totalMemories = this.db.prepare(`
SELECT COUNT(*) as count FROM memories
`).get() as any;
return {
byType: stats.reduce((acc, row) => {
acc[row.type] = {
count: row.count,
avgImportance: row.avg_importance,
totalAccesses: row.total_accesses,
oldest: new Date(row.oldest),
newest: new Date(row.newest),
};
return acc;
}, {}),
total: totalMemories.count,
};
}
/**
* 辅助方法:数据库行转 Memory 对象
*/
private rowToMemory(row: any): Memory {
return {
id: row.id,
type: row.type as MemoryType,
content: row.content,
metadata: JSON.parse(row.metadata || '{}'),
tags: JSON.parse(row.tags || '[]'),
importance: row.importance,
accessCount: row.access_count,
createdAt: new Date(row.created_at),
updatedAt: new Date(row.updated_at),
expiresAt: row.expires_at ? new Date(row.expires_at) : undefined,
isCompressed: Boolean(row.is_compressed),
compressedFrom: row.compressed_from ? JSON.parse(row.compressed_from) : undefined,
source: row.source_type ? {
type: row.source_type as any,
sessionId: row.source_session_id,
messageId: row.source_message_id,
timestamp: row.created_at,
} : undefined,
};
}
/**
* 辅助方法:插入嵌入向量
*/
private insertEmbedding(memoryId: string, embedding: number[]): void {
const float32Array = new Float32Array(embedding);
const buffer = Buffer.from(float32Array.buffer);
this.db.prepare(`
INSERT INTO memory_embeddings (memory_id, embedding)
VALUES (?, ?)
`).run(memoryId, buffer);
}
/**
* 辅助方法:更新嵌入向量
*/
private updateEmbedding(memoryId: string, embedding: number[]): void {
const float32Array = new Float32Array(embedding);
const buffer = Buffer.from(float32Array.buffer);
this.db.prepare(`
UPDATE memory_embeddings
SET embedding = ?
WHERE memory_id = ?
`).run(buffer, memoryId);
}
/**
* 辅助方法:记录访问日志
*/
private logAccess(memoryId: string, accessType: string, context?: any): void {
this.db.prepare(`
INSERT INTO memory_access_log (memory_id, access_type, context, accessed_at)
VALUES (?, ?, ?, CURRENT_TIMESTAMP)
`).run(memoryId, accessType, JSON.stringify(context || {}));
}
}
export interface MemoryFilters {
type?: MemoryType;
tags?: string[];
minImportance?: number;
timeRange?: { start: Date; end: Date };
searchQuery?: string;
orderBy?: string;
limit?: number;
offset?: number;
}
export interface MemoryStats {
byType: Record<string, {
count: number;
avgImportance: number;
totalAccesses: number;
oldest: Date;
newest: Date;
}>;
total: number;
}
LRU 缓存实现(短期记忆)
// packages/core/src/memory/lru-cache.ts
/**
* LRU 缓存:用于短期记忆的内存存储
* 特点:
* - O(1) 时间复杂度的读写
* - 自动淘汰最久未使用的项
* - 支持权重优先级
*/
export class LRUCache<K, V> {
private capacity: number;
private cache: Map<K, LRUNode<V>>;
private head: LRUNode<V>; // 虚拟头节点(最近使用)
private tail: LRUNode<V>; // 虚拟尾节点(最久未使用)
private size: number = 0;
constructor(capacity: number) {
this.capacity = capacity;
this.cache = new Map();
// 初始化双向链表
this.head = new LRUNode(null!, null!);
this.tail = new LRUNode(null!, null!);
this.head.next = this.tail;
this.tail.prev = this.head;
}
/**
* 获取值
*/
get(key: K): V | undefined {
const node = this.cache.get(key);
if (!node) return undefined;
// 移动到头部(标记为最近使用)
this.moveToHead(node);
this.size++;
return node.value;
}
/**
* 设置值
*/
set(key: K, value: V): void {
const existing = this.cache.get(key);
if (existing) {
// 更新已有节点
existing.value = value;
this.moveToHead(existing);
this.size++;
} else {
// 创建新节点
const node = new LRUNode(key, value);
this.cache.set(key, node);
this.addToHead(node);
this.size++;
// 如果超出容量,淘汰最久未使用的
if (this.size > this.capacity) {
this.evictLRU();
}
}
}
/**
* 删除键
*/
delete(key: K): boolean {
const node = this.cache.get(key);
if (!node) return false;
this.removeNode(node);
this.cache.delete(key);
this.size--;
return true;
}
/**
* 检查是否存在
*/
has(key: K): boolean {
return this.cache.has(key);
}
/**
* 获取大小
*/
getSize(): number {
return this.size;
}
/**
* 清空缓存
*/
clear(): void {
this.cache.clear();
this.head.next = this.tail;
this.tail.prev = this.head;
this.size = 0;
}
/**
* 获取所有键
*/
keys(): IterableIterator<K> {
return this.cache.keys();
}
/**
* 获取所有值(按使用频率排序)
*/
valuesByRecency(): V[] {
const values: V[] = [];
let current = this.head.next;
while (current !== this.tail) {
values.push(current.value);
current = current.next;
}
return values;
}
/**
* 遍历缓存(从最近到最久)
*/
forEach(callback: (key: K, value: V, index: number) => void): void {
let current = this.head.next;
let index = 0;
while (current !== this.tail) {
callback(current.key, current.value, index);
current = current.next;
index++;
}
}
/**
* 添加到头部
*/
private addToHead(node: LRUNode<V>): void {
node.prev = this.head;
node.next = this.head.next;
this.head.next!.prev = node;
this.head.next = node;
}
/**
* 从链表中移除节点
*/
private removeNode(node: LRUNode<V>): void {
node.prev!.next = node.next;
node.next!.prev = node.prev;
}
/**
* 移动到头部
*/
private moveToHead(node: LRUNode<V>): void {
this.removeNode(node);
this.addToHead(node);
}
/**
* 淘汰最久未使用的项
*/
private evictLRU(): void {
const lru = this.tail.prev;
if (lru === this.head) return;
this.removeNode(lru);
this.cache.delete(lru.key);
this.size--;
// 触发淘汰回调
this.onEvict?.(lru.key, lru.value);
}
/**
* 淘汰回调(可用于持久化)
*/
onEvict?: (key: K, value: V) => void;
}
/**
* 双向链表节点
*/
class LRUNode<V> {
key: K;
value: V;
prev: LRUNode<V> | null = null;
next: LRUNode<V> | null = null;
constructor(key: K, value: V) {
this.key = key;
this.value = value;
}
}
// 使用示例
const shortTermMemory = new LRUCache<string, Memory>(1000);
// 添加记忆
shortTermMemory.set('mem-1', {
id: 'mem-1',
type: MemoryType.SHORT_TERM,
content: '用户喜欢使用 Tab 缩进',
importance: 0.7,
createdAt: new Date(),
});
// 设置淘汰回调:自动持久化到数据库
shortTermMemory.onEvict = (key, memory) => {
console.log('记忆被淘汰,持久化到数据库:', key);
database.insert(memory);
};
3.3 记忆生命周期管理
创建阶段
// packages/core/src/memory/memory-creator.ts
import { Memory, MemoryType, MemorySource } from './types';
import { generateUUID } from '../utils/uuid';
export class MemoryCreator {
private importanceCalculator: ImportanceCalculator;
private embeddingGenerator: EmbeddingGenerator;
private tagExtractor: TagExtractor;
constructor(config: MemoryConfig) {
this.importanceCalculator = new ImportanceCalculator();
this.embeddingGenerator = new EmbeddingGenerator(config.apiKey);
this.tagExtractor = new TagExtractor();
}
/**
* 从用户输入创建记忆
*/
async createFromUserInput(
content: string,
context: MessageContext
): Promise<Memory> {
// 1. 提取关键信息
const extracted = await this.extractKeyInformation(content, context);
// 2. 计算重要性评分
const importance = await this.importanceCalculator.calculate({
content: extracted.content,
context,
userFeedback: context.userFeedback,
});
// 3. 生成标签
const tags = await this.tagExtractor.extract(extracted.content);
// 4. 生成嵌入向量
const embedding = await this.embeddingGenerator.generate(extracted.content);
// 5. 创建记忆对象
const memory: Memory = {
id: generateUUID(),
type: this.determineMemoryType(extracted, importance),
content: extracted.content,
metadata: {
projectPath: context.projectPath,
files: context.files,
confidence: extracted.confidence,
},
tags,
importance,
accessCount: 0,
createdAt: new Date(),
updatedAt: new Date(),
embedding,
source: {
type: 'user_input',
sessionId: context.sessionId,
messageId: context.messageId,
timestamp: new Date(),
},
};
return memory;
}
/**
* 从 AI 响应创建记忆
*/
async createFromAIResponse(
content: string,
context: ResponseContext
): Promise<Memory> {
// 检测是否包含重要信息(如代码片段、配置等)
const codeBlocks = this.extractCodeBlocks(content);
const configurations = this.extractConfigurations(content);
if (codeBlocks.length > 0 || configurations.length > 0) {
// 创建程序性记忆
return this.createProceduralMemory(codeBlocks, configurations, context);
}
// 否则创建普通的语义记忆
return this.createFromUserInput(content, context);
}
/**
* 提取关键信息
*/
private async extractKeyInformation(
content: string,
context: MessageContext
): Promise<{ content: string; confidence: number }> {
// 使用轻量级 NLP 模型提取实体和关系
const entities = await this.extractEntities(content);
const relations = await this.extractRelations(content);
// 过滤掉无关紧要的内容
const filtered = this.filterIrrelevantContent(content, entities);
// 计算置信度
const confidence = this.calculateConfidence(entities, relations);
return {
content: filtered,
confidence,
};
}
/**
* 提取实体(人名、地名、技术栈等)
*/
private async extractEntities(text: string): Promise<Entity[]> {
// 使用正则表达式和关键词匹配
const patterns = [
{ type: 'technology', regex: /\b(TypeScript|JavaScript|Python|React|Vue|Node\.js)\b/g },
{ type: 'version', regex: /\b(\d+\.\d+\.\d+)\b/g },
{ type: 'file', regex: /[\w\-\/]+\.(ts|js|json|md|py)/g },
{ type: 'command', regex: /\$\s*\w+/g },
];
const entities: Entity[] = [];
for (const pattern of patterns) {
const matches = text.matchAll(pattern.regex);
for (const match of matches) {
entities.push({
type: pattern.type,
value: match[0],
position: match.index,
});
}
}
return entities;
}
/**
* 确定记忆类型
*/
private determineMemoryType(
extracted: ExtractedInfo,
importance: number
): MemoryType {
// 高重要性的事实 → 全局记忆
if (importance > 0.9 && extracted.isFact) {
return MemoryType.GLOBAL;
}
// 包含步骤说明 → 程序记忆
if (extracted.containsSteps) {
return MemoryType.PROCEDURAL;
}
// 特定事件的描述 → 情景记忆
if (extracted.isEvent) {
return MemoryType.EPISODIC;
}
// 一般知识 → 语义记忆
if (extracted.isKnowledge) {
return MemoryType.SEMANTIC;
}
// 默认:根据重要性决定
return importance > 0.7 ? MemoryType.LONG_TERM : MemoryType.SHORT_TERM;
}
}
重要性评分算法
// packages/core/src/memory/importance-calculator.ts
export class ImportanceCalculator {
private weights: ImportanceWeights = {
recency: 0.2, // 新鲜度
frequency: 0.25, // 访问频率
userMarked: 0.3, // 用户标记
contentLength: 0.1, // 内容长度
entityDensity: 0.15, // 实体密度
};
/**
* 计算综合重要性评分
*/
async calculate(params: ImportanceParams): Promise<number> {
const scores: Record<string, number> = {};
// 1. 新鲜度评分(指数衰减)
scores.recency = this.calculateRecencyScore(params.context.timestamp);
// 2. 频率评分(基于历史访问)
scores.frequency = await this.calculateFrequencyScore(params.content);
// 3. 用户标记评分
scores.userMarked = this.calculateUserMarkedScore(params.userFeedback);
// 4. 内容长度评分(对数增长)
scores.contentLength = this.calculateLengthScore(params.content);
// 5. 实体密度评分
scores.entityDensity = this.calculateEntityDensityScore(params.content);
// 加权求和
const totalScore = Object.entries(scores).reduce((sum, [key, score]) => {
return sum + score * (this.weights[key as keyof ImportanceWeights] || 0);
}, 0);
// 归一化到 [0, 1]
return Math.min(1.0, Math.max(0.0, totalScore));
}
/**
* 新鲜度评分:艾宾浩斯遗忘曲线
*/
private calculateRecencyScore(timestamp: Date): number {
const hoursSinceCreation = hoursBetween(timestamp, new Date());
// 艾宾浩斯遗忘曲线:R = e^(-t/S)
// S 是记忆强度常数,这里设为 24 小时
const retention = Math.exp(-hoursSinceCreation / 24);
return retention;
}
/**
* 频率评分
*/
private async calculateFrequencyScore(content: string): Promise<number> {
// 查询历史访问次数
const accessCount = await this.getAccessCount(content);
// 对数评分:避免过度偏向高频访问
return Math.log2(accessCount + 1) / 10; // 归一化
}
/**
* 用户标记评分
*/
private calculateUserMarkedScore(feedback?: UserFeedback): number {
if (!feedback) return 0.5;
const scores = {
explicit_important: 1.0, // 用户明确标记"重要"
implicit_positive: 0.8, // 用户表现出满意(如"谢谢")
neutral: 0.5, // 无反馈
implicit_negative: 0.3, // 用户表现出不满
explicit_unimportant: 0.0, // 用户标记"不重要"
};
return scores[feedback.type] || 0.5;
}
/**
* 内容长度评分
*/
private calculateLengthScore(content: string): number {
const wordCount = content.split(/\s+/).length;
// 对数增长:太短或太长都不好
if (wordCount < 10) return 0.3;
if (wordCount > 1000) return 0.5;
return Math.log2(wordCount) / 10;
}
/**
* 实体密度评分
*/
private calculateEntityDensityScore(content: string): number {
const entities = this.extractEntities(content);
const wordCount = content.split(/\s+/).length;
if (wordCount === 0) return 0;
const density = entities.length / wordCount;
// 实体密度适中最好(0.1 - 0.3)
if (density < 0.05) return 0.3;
if (density > 0.5) return 0.4;
return 0.5 + density; // 0.5 - 1.0
}
/**
* 动态调整权重(基于用户反馈学习)
*/
adjustWeights(feedback: WeightAdjustmentFeedback): void {
// 使用强化学习:根据用户对记忆重要性的反馈调整权重
// 例如:如果用户经常查看"新鲜"的记忆,增加 recency 权重
const gradient = this.calculateGradient(feedback);
for (const key of Object.keys(this.weights)) {
this.weights[key as keyof ImportanceWeights] +=
gradient[key as keyof ImportanceWeights] * 0.01;
}
// 重新归一化权重
const total = Object.values(this.weights).reduce((a, b) => a + b, 0);
for (const key of Object.keys(this.weights)) {
this.weights[key as keyof ImportanceWeights] /= total;
}
}
}
interface ImportanceWeights {
recency: number;
frequency: number;
userMarked: number;
contentLength: number;
entityDensity: number;
}
更新阶段:背景记忆重写
// packages/core/src/memory/background-rewriting.ts
export class BackgroundRewriter {
private conflictDetector: ConflictDetector;
private merger: MemoryMerger;
private compressor: MemoryCompressor;
/**
* 后台持续运行的记忆整理任务
*/
async startBackgroundTask(intervalMs: number = 300000): Promise<void> {
setInterval(async () => {
try {
await this.consolidateMemories();
} catch (error) {
console.error('记忆整理失败:', error);
}
}, intervalMs);
}
/**
* 执行记忆整理
*/
async consolidateMemories(): Promise<void> {
// 1. 找到活跃的记忆簇
const clusters = await this.findActiveClusters();
for (const cluster of clusters) {
// 2. 检测并解决冲突
const conflicts = await this.conflictDetector.detect(cluster.memories);
if (conflicts.length > 0) {
await this.resolveConflicts(conflicts);
}
// 3. 压缩冗余记忆
if (cluster.memories.length > 5) {
await this.compressRedundantMemories(cluster.memories);
}
// 4. 更新重要性评分
await this.updateImportanceScores(cluster.memories);
}
}
/**
* 检测记忆冲突
*/
private async detectConflicts(
memories: Memory[]
): Promise<MemoryConflict[]> {
const conflicts: MemoryConflict[] = [];
// 两两比较(O(n²),但 n 通常很小)
for (let i = 0; i < memories.length; i++) {
for (let j = i + 1; j < memories.length; j++) {
const m1 = memories[i];
const m2 = memories[j];
// 检查语义冲突
const conflict = await this.checkSemanticConflict(m1, m2);
if (conflict) {
conflicts.push(conflict);
}
}
}
return conflicts;
}
/**
* 语义冲突检测
*/
private async checkSemanticConflict(
m1: Memory,
m2: Memory
): Promise<MemoryConflict | null> {
// 类型 1:直接矛盾
// 例:m1="项目使用 npm", m2="项目使用 pnpm"
const contradictionPatterns = [
/(使用 | 采用 | 是)(.+?)(而不是 | 而非 | 不是)(.+)/,
/(不是 | 并非 | 绝不)(.+)/,
/(应该 | 必须|不要)(.+?)(而不是 | 而非)(.+)/,
];
for (const pattern of contradictionPatterns) {
const match1 = m1.content.match(pattern);
const match2 = m2.content.match(pattern);
if (match1 && match2) {
// 检查是否有矛盾的断言
if (this.hasContradictoryClaims(match1, match2)) {
return {
type: 'contradiction',
memory1: m1,
memory2: m2,
severity: 'high',
description: '两条记忆存在直接矛盾',
};
}
}
}
// 类型 2:时效性冲突(新旧信息冲突)
const timeDiff = Math.abs(
m1.createdAt.getTime() - m2.createdAt.getTime()
);
if (timeDiff > 7 * 24 * 60 * 60 * 1000) { // 超过 7 天
const similarity = this.calculateSimilarity(m1.content, m2.content);
if (similarity > 0.8) {
// 高度相似但时间相差很大,可能有更新
return {
type: 'temporal_conflict',
memory1: m1,
memory2: m2,
severity: 'medium',
description: '可能存在过时的信息',
};
}
}
return null;
}
/**
* 解决冲突
*/
private async resolveConflicts(conflicts: MemoryConflict[]): Promise<void> {
for (const conflict of conflicts) {
switch (conflict.type) {
case 'contradiction':
await this.resolveContradiction(conflict);
break;
case 'temporal_conflict':
await this.resolveTemporalConflict(conflict);
break;
}
}
}
/**
* 解决直接矛盾:保留更可信的
*/
private async resolveContradiction(conflict: MemoryConflict): Promise<void> {
const { memory1, memory2 } = conflict;
// 比较可信度
const confidence1 = memory1.metadata?.confidence || 0.5;
const confidence2 = memory2.metadata?.confidence || 0.5;
// 比较重要性
const importance1 = memory1.importance;
const importance2 = memory2.importance;
// 综合评分
const score1 = confidence1 * 0.6 + importance1 * 0.4;
const score2 = confidence2 * 0.6 + importance2 * 0.4;
if (score1 > score2) {
// 保留 memory1,标记 memory2 为已解决
await this.markAsResolved(memory2, 'superseded_by:' + memory1.id);
} else {
await this.markAsResolved(memory1, 'superseded_by:' + memory2.id);
}
}
/**
* 解决时效性冲突:保留更新的
*/
private async resolveTemporalConflict(conflict: MemoryConflict): Promise<void> {
const { memory1, memory2 } = conflict;
// 保留较新的记忆
const older = memory1.createdAt < memory2.createdAt ? memory1 : memory2;
const newer = older === memory1 ? memory2 : memory1;
// 将旧记忆标记为可能过时
await this.markAsPotentiallyOutdated(older, newer);
}
/**
* 压缩冗余记忆
*/
private async compressRedundantMemories(memories: Memory[]): Promise<void> {
// 1. 聚类高度相似的记忆
const clusters = this.clusterBySimilarity(memories, 0.85);
for (const cluster of clusters) {
if (cluster.length < 2) continue;
// 2. 生成摘要
const summary = await this.generateSummary(cluster);
// 3. 创建压缩后的记忆
const compressed: Memory = {
id: generateUUID(),
type: MemoryType.LONG_TERM,
content: summary,
metadata: {
compressedFrom: cluster.map(m => m.id),
compressionDate: new Date(),
originalCount: cluster.length,
},
tags: this.mergeTags(cluster),
importance: Math.max(...cluster.map(m => m.importance)),
accessCount: cluster.reduce((sum, m) => sum + m.accessCount, 0),
createdAt: new Date(Math.min(...cluster.map(m => m.createdAt.getTime()))),
embedding: this.averageEmbeddings(cluster),
};
// 4. 存储压缩后的记忆
await this.database.insert(compressed);
// 5. 标记原始记忆为已压缩
for (const memory of cluster) {
await this.markAsCompressed(memory, compressed.id);
}
}
}
/**
* 生成记忆摘要
*/
private async generateSummary(memories: Memory[]): Promise<string> {
const sortedMemories = memories.sort(
(a, b) => b.importance - a.importance
);
// 选择最重要的 3-5 条记忆
const topMemories = sortedMemories.slice(0, 5);
const prompt = `
请为以下相关记忆创建一个简洁的摘要(不超过 150 字):
${topMemories.map(m => `- ${m.content}`).join('\n')}
要求:
1. 保留所有关键信息
2. 消除重复
3. 解决任何细微矛盾
4. 使用清晰的结构
摘要:
`.trim();
const response = await this.llmClient.generate(prompt);
return response.text.trim();
}
}
3.4 记忆检索与验证
混合搜索系统
// packages/core/src/memory/search-engine.ts
export class SearchEngine {
private vectorSearch: VectorSearch;
private keywordSearch: KeywordSearch;
private temporalFilter: TemporalFilter;
private reranker: ResultReranker;
/**
* 混合搜索:结合多种策略
*/
async search(query: string, options?: SearchOptions): Promise<SearchResult[]> {
// 1. 解析查询意图
const intent = await this.parseQueryIntent(query);
// 2. 并行执行多种搜索
const [vectorResults, keywordResults] = await Promise.all([
this.vectorSearch.search(query, options),
this.keywordSearch.search(query, options),
]);
// 3. 应用时间过滤
let filteredResults = [...vectorResults, ...keywordResults];
if (options?.timeRange) {
filteredResults = this.temporalFilter.filter(
filteredResults,
options.timeRange
);
}
// 4. 去重
const deduplicated = this.deduplicateResults(filteredResults);
// 5. 重新排序(学习到的排序模型)
const reranked = await this.reranker.rerank(
deduplicated,
query,
intent
);
// 6. 截断到指定数量
return reranked.slice(0, options?.limit || 20);
}
/**
* 解析查询意图
*/
private async parseQueryIntent(query: string): Promise<QueryIntent> {
const intent: QueryIntent = {
type: 'general',
entities: [],
timeConstraint: null,
confidence: 0,
};
// 检测时间约束
const timeMatch = query.match(/(昨天 | 上周 | 最近 | 过去\s*\d+\s*天)/);
if (timeMatch) {
intent.timeConstraint = this.parseTimeExpression(timeMatch[0]);
intent.type = 'temporal';
}
// 检测实体
const entities = this.extractEntities(query);
if (entities.length > 0) {
intent.entities = entities;
intent.type = 'entity_focused';
}
// 检测问题类型
if (query.startsWith('如何') || query.startsWith('怎么')) {
intent.type = 'procedural'; // 程序性问题
} else if (query.startsWith('什么') || query.startsWith('哪些')) {
intent.type = 'factual'; // 事实性问题
} else if (query.startsWith('为什么')) {
intent.type = 'explanatory'; // 解释性问题
}
return intent;
}
/**
* 向量搜索实现
*/
private async vectorSearch.search(
query: string,
options?: SearchOptions
): Promise<VectorSearchResult[]> {
// 1. 生成查询的嵌入向量
const queryEmbedding = await this.embeddingModel.encode(query);
// 2. 在向量索引中搜索
const results = await this.database.searchVectors(
queryEmbedding,
{
limit: options?.limit || 50,
threshold: options?.similarityThreshold || 0.7,
metric: 'cosine',
}
);
// 3. 转换为标准格式
return results.map(r => ({
memory: r.memory,
score: r.similarity,
source: 'vector',
}));
}
/**
* 关键词搜索实现
*/
private async keywordSearch.search(
query: string,
options?: SearchOptions
): Promise<KeywordSearchResult[]> {
// 使用 SQLite FTS5 进行全文搜索
const results = await this.database.searchFullText(query, {
limit: options?.limit || 50,
});
// 计算 BM25 分数
return results.map(r => ({
memory: r.memory,
score: this.calculateBM25(query, r.memory.content),
source: 'keyword',
highlights: r.highlights,
}));
}
/**
* 结果去重
*/
private deduplicateResults(results: SearchResult[]): SearchResult[] {
const seen = new Map<string, SearchResult>();
for (const result of results) {
const key = result.memory.id;
if (!seen.has(key)) {
seen.set(key, result);
} else {
// 如果已存在,保留分数更高的
const existing = seen.get(key)!;
if (result.score > existing.score) {
seen.set(key, result);
}
}
}
return Array.from(seen.values());
}
}
/**
* 结果重新排序器(使用学习到的模型)
*/
class ResultReranker {
private model: LearningToRankModel;
constructor() {
// 使用 LambdaMART 或类似算法
this.model = new LearningToRankModel();
}
async rerank(
results: SearchResult[],
query: string,
intent: QueryIntent
): Promise<SearchResult[]> {
// 为每个结果提取特征
const features = results.map(r => this.extractFeatures(r, query, intent));
// 预测相关性分数
const scores = await this.model.predict(features);
// 按分数排序
return results
.map((r, i) => ({ ...r, finalScore: scores[i] }))
.sort((a, b) => b.finalScore - a.finalScore);
}
private extractFeatures(
result: SearchResult,
query: string,
intent: QueryIntent
): FeatureVector {
return {
// 1. 文本相似度特征
bm25Score: this.calculateBM25(query, result.memory.content),
cosineSimilarity: result.score,
// 2. 记忆质量特征
importance: result.memory.importance,
accessCount: result.memory.accessCount,
contentLength: result.memory.content.length,
// 3. 时效性特征
recency: this.calculateRecency(result.memory.createdAt),
// 4. 意图匹配特征
entityTypeMatch: this.checkEntityTypeMatch(result.memory, intent.entities),
questionTypeMatch: this.checkQuestionTypeMatch(result.memory, intent.type),
// 5. 用户交互特征
userPreviouslyAccessed: this.checkUserAccessHistory(result.memory),
userMarkedImportant: result.memory.tags?.includes('important') ? 1 : 0,
};
}
}
有效性验证
// packages/core/src/memory/validation.ts
export class MemoryValidator {
/**
* 验证记忆的有效性
*/
async validateMemory(memory: Memory): Promise<ValidationResult> {
const result: ValidationResult = {
isValid: true,
issues: [],
suggestions: [],
};
// 1. 基础验证
if (!memory.content || memory.content.trim().length === 0) {
result.isValid = false;
result.issues.push('记忆内容为空');
}
if (memory.importance < 0 || memory.importance > 1) {
result.isValid = false;
result.issues.push('重要性评分超出范围 [0, 1]');
}
// 2. 时效性验证
const daysSinceCreation = daysBetween(memory.createdAt, new Date());
if (daysSinceCreation > 365 && memory.accessCount === 0) {
result.suggestions.push('此记忆已超过 1 年未被访问,考虑删除');
}
// 3. 冲突检测
const conflicts = await this.detectConflictsWithExisting(memory);
if (conflicts.length > 0) {
result.issues.push(...conflicts.map(c => `与记忆"${c.otherMemory.content}"冲突`));
result.suggestions.push('建议解决冲突后再使用此记忆');
}
// 4. 冗余检测
const redundant = await this.detectRedundancy(memory);
if (redundant.length > 0) {
result.suggestions.push(
`与 ${redundant.length} 条现有记忆高度相似,考虑合并`
);
}
// 5. 准确性验证(针对事实性记忆)
if (memory.type === MemoryType.SEMANTIC || memory.type === MemoryType.GLOBAL) {
const accuracyCheck = await this.verifyFactualAccuracy(memory);
if (!accuracyCheck.passed) {
result.issues.push(`事实准确性存疑:${accuracyCheck.reason}`);
}
}
return result;
}
/**
* 验证记忆集合的整体质量
*/
async validateMemoryCollection(
memories: Memory[]
): Promise<CollectionValidationResult> {
const stats = {
total: memories.length,
valid: 0,
withIssues: 0,
avgImportance: 0,
avgAccessCount: 0,
duplicates: 0,
};
const validationPromises = memories.map(m => this.validateMemory(m));
const results = await Promise.all(validationPromises);
for (const result of results) {
if (result.isValid) {
stats.valid++;
} else {
stats.withIssues++;
}
}
stats.avgImportance =
memories.reduce((sum, m) => sum + m.importance, 0) / memories.length;
stats.avgAccessCount =
memories.reduce((sum, m) => sum + m.accessCount, 0) / memories.length;
// 检测重复
stats.duplicates = this.countDuplicates(memories);
return {
stats,
results,
recommendations: this.generateRecommendations(stats, results),
};
}
/**
* 事实准确性验证
*/
private async verifyFactualAccuracy(memory: Memory): Promise<AccuracyCheck> {
// 方法 1:交叉验证其他记忆
const relatedMemories = await this.findRelatedMemories(memory);
for (const related of relatedMemories) {
if (this.contradicts(memory, related)) {
return {
passed: false,
reason: `与已有记忆"${related.content}"矛盾`,
};
}
}
// 方法 2:使用外部知识源验证(如 Wikipedia API)
if (memory.content.includes('事实:') || memory.content.includes('定义:')) {
const externalVerification = await this.verifyWithExternalSource(memory);
if (!externalVerification.verified) {
return {
passed: false,
reason: externalVerification.reason,
};
}
}
return { passed: true };
}
}
3.5 隐私保护措施
敏感信息过滤
// packages/core/src/memory/privacy-filter.ts
export class PrivacyFilter {
private piiDetector: PIIDetector;
private secretDetector: SecretDetector;
/**
* 在存储前过滤敏感信息
*/
async filterSensitiveContent(content: string): Promise<FilteredResult> {
const result: FilteredResult = {
originalContent: content,
filteredContent: content,
detectedItems: [],
redacted: false,
};
// 1. 检测 PII(个人身份信息)
const piiItems = await this.piiDetector.detect(content);
for (const item of piiItems) {
result.detectedItems.push(item);
// 替换为占位符
result.filteredContent = result.filteredContent.replace(
item.value,
`[${item.type}_REDACTED]`
);
result.redacted = true;
}
// 2. 检测密钥和凭证
const secrets = await this.secretDetector.detect(content);
for (const secret of secrets) {
result.detectedItems.push(secret);
// 替换为占位符
result.filteredContent = result.filteredContent.replace(
secret.value,
`[SECRET_${secret.type}_REDACTED]`
);
result.redacted = true;
}
// 3. 检测代码中的敏感部分
const codeSensitivity = await this.detectCodeSensitivity(content);
if (codeSensitivity.containsSensitiveCode) {
result.filteredContent = codeSensity.sanitizedCode;
result.detectedItems.push(...codeSensitivity.items);
result.redacted = true;
}
return result;
}
/**
* PII 检测器
*/
private async detectPII(text: string): Promise<PIIItem[]> {
const items: PIIItem[] = [];
// 邮箱地址
const emailPattern = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g;
const emails = text.match(emailPattern) || [];
for (const email of emails) {
items.push({
type: 'email',
value: email,
confidence: 0.95,
position: text.indexOf(email),
});
}
// 电话号码(支持多国格式)
const phonePatterns = [
/\+?\d{1,3}[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{1,4}[-.\s]?\d{1,9}/g, // 国际格式
/1[3-9]\d{9}/g, // 中国大陆手机号
/\d{3}-\d{4}-\d{4}/g, // 日本格式
];
for (const pattern of phonePatterns) {
const phones = text.match(pattern) || [];
for (const phone of phones) {
items.push({
type: 'phone',
value: phone,
confidence: 0.9,
position: text.indexOf(phone),
});
}
}
// 身份证号(中国大陆)
const idCardPattern = /[1-9]\d{5}(18|19|20)\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])\d{3}[\dXx]/g;
const idCards = text.match(idCardPattern) || [];
for (const idCard of idCards) {
items.push({
type: 'id_card',
value: idCard,
confidence: 0.98,
position: text.indexOf(idCard),
});
}
// 姓名(使用 NLP 模型)
const names = await this.extractPersonNames(text);
for (const name of names) {
items.push({
type: 'person_name',
value: name,
confidence: 0.85,
position: text.indexOf(name),
});
}
return items;
}
/**
* 密钥检测
*/
private async detectSecrets(text: string): Promise<SecretItem[]> {
const items: SecretItem[] = [];
// AWS Access Key
const awsKeyPattern = /AKIA[0-9A-Z]{16}/g;
const awsKeys = text.match(awsKeyPattern) || [];
for (const key of awsKeys) {
items.push({
type: 'aws_access_key',
value: key,
confidence: 0.99,
severity: 'critical',
});
}
// AWS Secret Key
const awsSecretPattern = /[0-9a-zA-Z/+]{40}/g;
// (需要结合上下文判断,这里简化处理)
// GitHub Token
const githubTokenPattern = /ghp_[0-9a-zA-Z]{36}/g;
const githubTokens = text.match(githubTokenPattern) || [];
for (const token of githubTokens) {
items.push({
type: 'github_token',
value: token,
confidence: 0.99,
severity: 'critical',
});
}
// JWT Token
const jwtPattern = /eyJ[A-Za-z0-9-_]+\.eyJ[A-Za-z0-9-_]+\.[A-Za-z0-9-_.+/=]*/g;
const jwts = text.match(jwtPattern) || [];
for (const jwt of jwts) {
items.push({
type: 'jwt_token',
value: jwt,
confidence: 0.95,
severity: 'high',
});
}
// 通用 API Key(启发式检测)
const apiKeyPatterns = [
/api[_-]?key\s*[=:]\s*["']?[0-9a-zA-Z]{16,}["']?/gi,
/apikey\s*[=:]\s*["']?[0-9a-zA-Z]{16,}["']?/gi,
/bearer\s+[0-9a-zA-Z-_.+/=]+/gi,
];
for (const pattern of apiKeyPatterns) {
const matches = text.match(pattern) || [];
for (const match of matches) {
items.push({
type: 'api_key',
value: match,
confidence: 0.8,
severity: 'high',
});
}
}
return items;
}
/**
* 代码敏感度检测
*/
private async detectCodeSensitivity(content: string): Promise<CodeSensitivityResult> {
const items: DetectedItem[] = [];
let sanitizedContent = content;
// 检测密码硬编码
const passwordPatterns = [
/password\s*[=:]\s*["'][^"']+["']/gi,
/passwd\s*[=:]\s*["'][^"']+["']/gi,
/pwd\s*[=:]\s*["'][^"']+["']/gi,
];
for (const pattern of passwordPatterns) {
const matches = content.matchAll(pattern);
for (const match of matches) {
const fullMatch = match[0];
const placeholder = `/* PASSWORD_REDACTED */`;
sanitizedContent = sanitizedContent.replace(fullMatch, placeholder);
items.push({
type: 'hardcoded_password',
value: fullMatch,
confidence: 0.9,
severity: 'critical',
});
}
}
// 检测私钥
if (content.includes('-----BEGIN RSA PRIVATE KEY-----') ||
content.includes('-----BEGIN OPENSSH PRIVATE KEY-----')) {
items.push({
type: 'private_key',
value: '(私钥内容)',
confidence: 0.99,
severity: 'critical',
});
// 完全移除私钥
sanitizedContent = sanitizedContent.replace(
/-----BEGIN[^-]+PRIVATE KEY-----[\s\S]*?-----END[^-]+PRIVATE KEY-----/g,
'/* PRIVATE_KEY_REDACTED */'
);
}
// 检测数据库连接字符串
const connectionStringPattern = /(mongodb|postgres|mysql|redis):\/\/[^:\s]+:[^@\s]+@[^\s]+/gi;
const connections = content.match(connectionStringPattern) || [];
for (const conn of connections) {
sanitizedContent = sanitizedContent.replace(conn, '/* DB_CONNECTION_REDACTED */');
items.push({
type: 'database_connection_string',
value: conn,
confidence: 0.95,
severity: 'critical',
});
}
return {
containsSensitiveCode: items.length > 0,
sanitizedCode: sanitizedContent,
items,
};
}
}
GDPR 合规支持
// packages/core/src/memory/gdpr-compliance.ts
export class GDPRComplianceManager {
private database: MemoryDatabase;
/**
* 数据主体访问权(Right of Access)
*/
async exportUserData(userId: string): Promise<UserDataExport> {
// 收集该用户的所有数据
const memories = await this.database.findByFilters({
metadata: { userId },
});
const accessLogs = await this.database.query(`
SELECT * FROM memory_access_log
WHERE user_id = ?
ORDER BY accessed_at DESC
`, [userId]);
return {
userId,
exportDate: new Date(),
memories: memories.map(m => this.sanitizeForExport(m)),
accessHistory: accessLogs,
statistics: {
totalMemories: memories.length,
totalAccesses: accessLogs.length,
earliestRecord: memories.reduce(
(min, m) => m.createdAt < min ? m.createdAt : min,
memories[0]?.createdAt || new Date()
),
},
};
}
/**
* 被遗忘权(Right to Erasure)
*/
async eraseUserData(userId: string, options?: ErasureOptions): Promise<ErasureResult> {
const result: ErasureResult = {
deletedCount: 0,
retainedCount: 0,
reasons: [],
};
// 获取用户的所有记忆
const memories = await this.database.findByFilters({
metadata: { userId },
});
for (const memory of memories) {
// 检查是否有合法理由保留
if (this.shouldRetainMemory(memory, options)) {
result.retainedCount++;
result.reasons.push({
memoryId: memory.id,
reason: this.getRetentionReason(memory, options),
});
} else {
// 执行删除
await this.database.delete(memory.id);
result.deletedCount++;
}
}
// 同时删除访问日志
await this.database.execute(
'DELETE FROM memory_access_log WHERE user_id = ?',
[userId]
);
return result;
}
/**
* 决定是否保留记忆
*/
private shouldRetainMemory(
memory: Memory,
options?: ErasureOptions
): boolean {
// 法律义务:需要保留的情况
if (options?.legalObligation) {
if (this.isRequiredByLaw(memory)) {
return true;
}
}
// 公共利益:科学研究、历史记录等
if (options?.publicInterest) {
if (this.isPublicInterest(memory)) {
return true;
}
}
// 言论自由和信息自由
if (options?.freedomOfExpression) {
if (this.relatesToFreedomOfExpression(memory)) {
return true;
}
}
// 默认:不保留
return false;
}
/**
* 数据可携带权(Right to Data Portability)
*/
async exportInPortableFormat(
userId: string,
format: 'json' | 'csv' | 'xml'
): Promise<string> {
const data = await this.exportUserData(userId);
switch (format) {
case 'csv':
return this.convertToCSV(data);
case 'xml':
return this.convertToXML(data);
case 'json':
default:
return JSON.stringify(data, null, 2);
}
}
/**
* 同意管理(Consent Management)
*/
async recordConsent(
userId: string,
consentType: ConsentType,
granted: boolean,
metadata?: ConsentMetadata
): Promise<void> {
await this.database.insertConsentRecord({
userId,
type: consentType,
granted,
timestamp: new Date(),
ip: metadata?.ip,
userAgent: metadata?.userAgent,
version: metadata?.policyVersion,
});
}
/**
* 检查是否有有效同意
*/
async hasValidConsent(
userId: string,
consentType: ConsentType
): Promise<boolean> {
const lastRecord = await this.database.getLastConsentRecord(userId, consentType);
if (!lastRecord) return false;
if (!lastRecord.granted) return false;
// 同意是否有过期
if (lastRecord.expiresAt && lastRecord.expiresAt < new Date()) {
return false;
}
return true;
}
}
课后练习
基础题
- 数据库设计:画出记忆系统的 ER 图,标注所有表和关系
- 重要性计算:实现一个简化版的重要性评分函数
- LRU 实现:手写一个 LRU 缓存(不使用库)
进阶题
- 向量搜索:使用 @xenova/transformers.js 实现简单的语义搜索
- 冲突检测:实现一个规则基础的冲突检测器
- 隐私过滤:编写正则表达式检测邮箱、电话、API Key
挑战题
- 完整系统:集成所有组件,实现可用的记忆系统
- 性能优化:对百万级记忆进行性能基准测试和优化
- 学习排序:使用用户反馈训练排序模型
下节预告
第 4 课:权限系统设计
- 🔐 RBAC vs ABAC:权限模型深度对比
- 🎯 四层权限体系架构
- ⚠️ CVE-2026-21852 漏洞完整分析
- 🛡️ 动态权限管理和审计
- 🔑 最小权限原则的最佳实践
版权声明:本课程内容仅用于教育和技术研究目的,请勿将所学知识用于非法活动。所有案例分析均基于公开信息,旨在提升安全意识和技术水平。
更多推荐



所有评论(0)