[ { "id": "snpt_topn_station", "desc": "按站点统计水表数量并取前N", "type": "topn", "title": "站点TopN水表数", "examples": [ "各站点水表数量排名前10", "站点水表覆盖情况排行" ], "variables": [ { "name": "top_n", "type": "int", "default": 10 } ], "dialect_sql": { "mysql": "SELECT station,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY station\nORDER BY meter_cnt DESC\nLIMIT {{top_n}};" }, "applicability": { "constraints": { "notes": [ "TopN建议N<=36", "以service_point_id去重计数", "无时间列,无法做趋势" ], "fk_join_available": false, "dim_cardinality_hint": 36 }, "time_column": null, "required_columns": [ "station", "service_point_id" ] }, "business_caliber": "水表数=按service_point_id去重计数;粒度=站点。仅统计当前表中的有效记录(不含时间口径)。安全限制:用于分析排名,避免扩大LIMIT造成全量导出。" }, { "id": "snpt_share_district", "desc": "统计各辖区水表数及其占比", "type": "ratio", "title": "辖区水表占比", "examples": [ "各辖区水表占比", "哪个辖区水表最多" ], "variables": [], "dialect_sql": { "mysql": "WITH by_district AS (\n SELECT district, COUNT(DISTINCT service_point_id) AS meter_cnt\n FROM `data-ge`.`water_meter_info`\n GROUP BY district\n), tot AS (\n SELECT COUNT(DISTINCT service_point_id) AS total_cnt\n FROM `data-ge`.`water_meter_info`\n)\nSELECT b.district,\n b.meter_cnt,\n ROUND(b.meter_cnt / NULLIF(t.total_cnt, 0) * 100, 2) AS pct\nFROM by_district b\nCROSS JOIN tot t\nORDER BY pct DESC, b.district;" }, "applicability": { "constraints": { "notes": [ "占比分母为全表service_point_id去重总数", "service_point_id为空将被忽略" ], "fk_join_available": false, "dim_cardinality_hint": 13 }, "time_column": null, "required_columns": [ "district", "service_point_id" ] }, "business_caliber": "水表数=按service_point_id去重计数;粒度=辖区。占比=辖区水表数/全表水表总数。安全限制:仅基于本表,不代表全市/全网口径;无时间维度。" }, { "id": "snpt_dist_diameter", "desc": "按表径统计水表数量分布", "type": "aggregate", "title": "表径分布统计", "examples": [ "不同口径水表有多少", "查看表径分布情况" ], "variables": [], "dialect_sql": { "mysql": "SELECT meter_diameter,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_diameter\nORDER BY meter_cnt DESC, meter_diameter;" }, "applicability": { "constraints": { "notes": [ "以service_point_id去重计数", "适合绘制条形图/饼图" ], "fk_join_available": false, "dim_cardinality_hint": 8 }, "time_column": null, "required_columns": [ "meter_diameter", "service_point_id" ] }, "business_caliber": "水表数=按service_point_id去重计数;粒度=表径。安全限制:仅用于分布分析,不含时间过滤;避免用于明细导出。" }, { "id": "snpt_type_subtype_matrix", "desc": "统计水表类型与子类组合的数量", "type": "aggregate", "title": "类型子类分布", "examples": [ "不同类型与子类的水表数量", "查看类型与子类的组合分布" ], "variables": [], "dialect_sql": { "mysql": "SELECT meter_type,\n meter_subtype,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_type, meter_subtype\nORDER BY meter_cnt DESC, meter_type, meter_subtype;" }, "applicability": { "constraints": { "notes": [ "组合基数<=5×9=45", "以service_point_id去重计数" ], "fk_join_available": false, "dim_cardinality_hint": 45 }, "time_column": null, "required_columns": [ "meter_type", "meter_subtype", "service_point_id" ] }, "business_caliber": "水表数=按service_point_id去重计数;粒度=类型×子类组合。安全限制:仅用于汇总分析,不包含时间或业务状态变化。" }, { "id": "snpt_quality_spid_uniq", "desc": "评估service_point_id的空值与重复情况", "type": "quality", "title": "服务点唯一性检", "examples": [ "检查服务点ID是否唯一", "统计service_point_id空值与重复情况" ], "variables": [], "dialect_sql": { "mysql": "SELECT\n COUNT(*) AS total_rows,\n SUM(service_point_id IS NULL) AS null_cnt,\n COUNT(DISTINCT service_point_id) AS distinct_cnt,\n (COUNT(*) - COUNT(DISTINCT service_point_id)) AS duplicate_rows_est,\n (\n SELECT COUNT(*) FROM (\n SELECT service_point_id\n FROM `data-ge`.`water_meter_info`\n GROUP BY service_point_id\n HAVING COUNT(*) > 1\n ) AS dup\n ) AS dup_key_groups\nFROM `data-ge`.`water_meter_info`;" }, "applicability": { "constraints": { "notes": [ "用于键完整性检查", "重复行估算=总行数-去重数" ], "fk_join_available": false, "dim_cardinality_hint": null }, "time_column": null, "required_columns": [ "service_point_id" ] }, "business_caliber": "质量检查口径:在本表内评估service_point_id的非空与唯一性,不代表跨表全局唯一。安全限制:仅输出汇总指标,不暴露明细重复值。" }, { "id": "snpt_quality_account_nulls", "desc": "抽取account_id为空的记录用于排查", "type": "quality", "title": "账户ID缺失明细", "examples": [ "列出account_id为空的水表", "抽样查看账户缺失的数据行" ], "variables": [ { "name": "limit_n", "type": "int", "default": 50 } ], "dialect_sql": { "mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nWHERE account_id IS NULL\nLIMIT {{limit_n}};" }, "applicability": { "constraints": { "notes": [ "明细仅限小样本抽取", "建议LIMIT<=100,避免全量导出" ], "fk_join_available": false, "dim_cardinality_hint": null }, "time_column": null, "required_columns": [ "account_id" ] }, "business_caliber": "质量抽样:筛出账户ID缺失的水表记录,便于核对。安全限制:仅用于样本排查,不建议在生产中全量导出;如需口径统计请改为COUNT聚合。" }, { "id": "snpt_sample_random_rows", "desc": "随机抽取水表信息用于人工核验", "type": "sample", "title": "随机抽样明细", "examples": [ "抽样查看水表信息", "随机抽取20条做质检" ], "variables": [ { "name": "sample_size", "type": "int", "default": 20 } ], "dialect_sql": { "mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nORDER BY RAND()\nLIMIT {{sample_size}};" }, "applicability": { "constraints": { "notes": [ "使用RAND()随机,样本不可复现", "建议限制样本量" ], "fk_join_available": false, "dim_cardinality_hint": 300 }, "time_column": null, "required_columns": [ "service_point_id" ] }, "business_caliber": "样本抽取:从本表随机返回若干行明细。安全限制:避免扩大LIMIT进行全量下载;如需可复现样本,请改用带种子的随机方法(MySQL不原生支持)。" }, { "id": "snpt_filter_office_type_where", "desc": "常用WHERE筛选条件片段:按营业所与类型且为有效", "type": "sample", "title": "机构类型筛选片", "examples": [ "筛选A营业所的机械表", "仅查看某营业所的指定类型水表" ], "variables": [ { "name": "supply_office", "type": "string" }, { "name": "meter_type", "type": "string" } ], "dialect_sql": { "mysql": "WHERE supply_office = '{{supply_office}}'\n AND meter_type = '{{meter_type}}'\n AND meter_status = '有效'" }, "applicability": { "constraints": { "notes": [ "这是条件片段,可拼接到其他查询", "meter_status当前为单一值“有效”" ], "fk_join_available": false, "dim_cardinality_hint": 11 }, "time_column": null, "required_columns": [ "supply_office", "meter_type", "meter_status" ] }, "business_caliber": "过滤口径:仅保留指定营业所与指定水表类型、且状态为“有效”的记录。安全限制:为片段用途,需拼接在SELECT…FROM之后使用。" }, { "id": "snpt_office_station_dist", "desc": "按营业所与站点组合统计水表数", "type": "aggregate", "title": "所站层级分布", "examples": [ "按营业所查看各站点水表数", "所站两级的水表分布情况" ], "variables": [], "dialect_sql": { "mysql": "SELECT supply_office,\n station,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY supply_office, station\nORDER BY supply_office, meter_cnt DESC, station;" }, "applicability": { "constraints": { "notes": [ "组合基数<=11×36=396", "以service_point_id去重计数", "如结果过长可再按TopN筛选" ], "fk_join_available": false, "dim_cardinality_hint": 396 }, "time_column": null, "required_columns": [ "supply_office", "station", "service_point_id" ] }, "business_caliber": "水表数=按service_point_id去重计数;粒度=营业所×站点。安全限制:结果行数可能较多,建议在可视化端增加筛选或分页。" }, { "id": "snpt_total_meter_baseline", "desc": "获取全表水表去重总量基线", "type": "aggregate", "title": "水表总量基线", "examples": [ "当前有多少只水表", "作为占比分析的分母基线" ], "variables": [], "dialect_sql": { "mysql": "SELECT COUNT(DISTINCT service_point_id) AS meter_total\nFROM `data-ge`.`water_meter_info`;" }, "applicability": { "constraints": { "notes": [ "作为其他占比/分摊分母基线", "忽略service_point_id为空的记录" ], "fk_join_available": false, "dim_cardinality_hint": 300 }, "time_column": null, "required_columns": [ "service_point_id" ] }, "business_caliber": "水表总量=按service_point_id去重计数;基于当前表的全量记录。安全限制:无时间维度,无法反映存量随时间变化。" } ]