From 248492d68e1f821b13d08f9f9628071cb3113e81 Mon Sep 17 00:00:00 2001 From: zhaoawd Date: Tue, 9 Dec 2025 00:36:02 +0800 Subject: [PATCH] =?UTF-8?q?=E8=B0=83=E6=95=B4rag=5Fsnippet=E8=A1=A8?= =?UTF-8?q?=E5=AD=97=E6=AE=B5=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/services/table_snippet.py | 7 +++++++ file/tableschema/rag_snippet.sql | 15 ++++++++++++--- test/test_snippet_rag_ingest.py | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/app/services/table_snippet.py b/app/services/table_snippet.py index 345d296..70db2d3 100644 --- a/app/services/table_snippet.py +++ b/app/services/table_snippet.py @@ -435,6 +435,8 @@ def merge_snippet_records_from_db( for alias_id, alias_info in alias_map.items(): if alias_id in seen_ids: continue + if alias_action_id is None and snippet_action_id is None: + continue merged.append( { "id": alias_id, @@ -520,11 +522,14 @@ def _prepare_rag_payloads( else: updated_at = updated_at_raw if isinstance(updated_at_raw, datetime) else now + created_at = updated_at + row = { "rag_item_id": rag_item_id, "workspace_id": workspace_id, "table_id": table_id, "version_ts": version_ts, + "created_at": created_at, "action_result_id": action_result_id, "snippet_id": snippet_id, "rag_text": rag_text, @@ -557,6 +562,7 @@ def _upsert_rag_snippet_rows(engine: Engine, rows: Sequence[Dict[str, Any]]) -> workspace_id, table_id, version_ts, + created_at, action_result_id, snippet_id, rag_text, @@ -567,6 +573,7 @@ def _upsert_rag_snippet_rows(engine: Engine, rows: Sequence[Dict[str, Any]]) -> :workspace_id, :table_id, :version_ts, + :created_at, :action_result_id, :snippet_id, :rag_text, diff --git a/file/tableschema/rag_snippet.sql b/file/tableschema/rag_snippet.sql index e6f3c5a..abcc9ef 100644 --- a/file/tableschema/rag_snippet.sql +++ b/file/tableschema/rag_snippet.sql @@ -7,9 +7,18 @@ CREATE TABLE `rag_snippet` ( `snippet_id` varchar(255) COLLATE utf8mb4_bin NOT NULL COMMENT '原始 snippet id', `rag_text` text COLLATE utf8mb4_bin NOT NULL COMMENT '用于向量化的拼接文本', `merged_json` json NOT NULL COMMENT '合并后的 snippet 对象', - `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - PRIMARY KEY (`rag_item_id`), + `created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '写入时间,用于分区', + `updated_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + PRIMARY KEY (`rag_item_id`,`created_at`), KEY `idx_action_result` (`action_result_id`), KEY `idx_workspace` (`workspace_id`), KEY `idx_table_version` (`table_id`,`version_ts`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin COMMENT='RAG snippet 索引缓存'; +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin +PARTITION BY RANGE COLUMNS (`created_at`) ( + PARTITION p202401 VALUES LESS THAN ('2024-02-01'), + PARTITION p202402 VALUES LESS THAN ('2024-03-01'), + PARTITION p202403 VALUES LESS THAN ('2024-04-01'), + PARTITION p202404 VALUES LESS THAN ('2024-05-01'), + PARTITION p202405 VALUES LESS THAN ('2024-06-01'), + PARTITION p_future VALUES LESS THAN (MAXVALUE) +) COMMENT='RAG snippet 索引缓存'; diff --git a/test/test_snippet_rag_ingest.py b/test/test_snippet_rag_ingest.py index 1668ead..8af1e16 100644 --- a/test/test_snippet_rag_ingest.py +++ b/test/test_snippet_rag_ingest.py @@ -38,6 +38,7 @@ def _setup_sqlite_engine(): workspace_id INTEGER, table_id INTEGER, version_ts INTEGER, + created_at TEXT, snippet_id TEXT, rag_text TEXT, merged_json TEXT,