demo数据

This commit is contained in:
zhaoawd
2025-11-14 00:58:00 +08:00
parent 7eb3c059a1
commit a72ca3593e
13 changed files with 2733 additions and 0 deletions

View File

@ -0,0 +1,330 @@
[
{
"id": "snpt_topn_station",
"desc": "按站点统计水表数量并取前N",
"type": "topn",
"title": "站点TopN水表数",
"examples": [
"各站点水表数量排名前10",
"站点水表覆盖情况排行"
],
"variables": [
{
"name": "top_n",
"type": "int",
"default": 10
}
],
"dialect_sql": {
"mysql": "SELECT station,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY station\nORDER BY meter_cnt DESC\nLIMIT {{top_n}};"
},
"applicability": {
"constraints": {
"notes": [
"TopN建议N<=36",
"以service_point_id去重计数",
"无时间列,无法做趋势"
],
"fk_join_available": false,
"dim_cardinality_hint": 36
},
"time_column": null,
"required_columns": [
"station",
"service_point_id"
]
},
"business_caliber": "水表数=按service_point_id去重计数粒度=站点。仅统计当前表中的有效记录不含时间口径。安全限制用于分析排名避免扩大LIMIT造成全量导出。"
},
{
"id": "snpt_share_district",
"desc": "统计各辖区水表数及其占比",
"type": "ratio",
"title": "辖区水表占比",
"examples": [
"各辖区水表占比",
"哪个辖区水表最多"
],
"variables": [],
"dialect_sql": {
"mysql": "WITH by_district AS (\n SELECT district, COUNT(DISTINCT service_point_id) AS meter_cnt\n FROM `data-ge`.`water_meter_info`\n GROUP BY district\n), tot AS (\n SELECT COUNT(DISTINCT service_point_id) AS total_cnt\n FROM `data-ge`.`water_meter_info`\n)\nSELECT b.district,\n b.meter_cnt,\n ROUND(b.meter_cnt / NULLIF(t.total_cnt, 0) * 100, 2) AS pct\nFROM by_district b\nCROSS JOIN tot t\nORDER BY pct DESC, b.district;"
},
"applicability": {
"constraints": {
"notes": [
"占比分母为全表service_point_id去重总数",
"service_point_id为空将被忽略"
],
"fk_join_available": false,
"dim_cardinality_hint": 13
},
"time_column": null,
"required_columns": [
"district",
"service_point_id"
]
},
"business_caliber": "水表数=按service_point_id去重计数粒度=辖区。占比=辖区水表数/全表水表总数。安全限制:仅基于本表,不代表全市/全网口径;无时间维度。"
},
{
"id": "snpt_dist_diameter",
"desc": "按表径统计水表数量分布",
"type": "aggregate",
"title": "表径分布统计",
"examples": [
"不同口径水表有多少",
"查看表径分布情况"
],
"variables": [],
"dialect_sql": {
"mysql": "SELECT meter_diameter,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_diameter\nORDER BY meter_cnt DESC, meter_diameter;"
},
"applicability": {
"constraints": {
"notes": [
"以service_point_id去重计数",
"适合绘制条形图/饼图"
],
"fk_join_available": false,
"dim_cardinality_hint": 8
},
"time_column": null,
"required_columns": [
"meter_diameter",
"service_point_id"
]
},
"business_caliber": "水表数=按service_point_id去重计数粒度=表径。安全限制:仅用于分布分析,不含时间过滤;避免用于明细导出。"
},
{
"id": "snpt_type_subtype_matrix",
"desc": "统计水表类型与子类组合的数量",
"type": "aggregate",
"title": "类型子类分布",
"examples": [
"不同类型与子类的水表数量",
"查看类型与子类的组合分布"
],
"variables": [],
"dialect_sql": {
"mysql": "SELECT meter_type,\n meter_subtype,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_type, meter_subtype\nORDER BY meter_cnt DESC, meter_type, meter_subtype;"
},
"applicability": {
"constraints": {
"notes": [
"组合基数<=5×9=45",
"以service_point_id去重计数"
],
"fk_join_available": false,
"dim_cardinality_hint": 45
},
"time_column": null,
"required_columns": [
"meter_type",
"meter_subtype",
"service_point_id"
]
},
"business_caliber": "水表数=按service_point_id去重计数粒度=类型×子类组合。安全限制:仅用于汇总分析,不包含时间或业务状态变化。"
},
{
"id": "snpt_quality_spid_uniq",
"desc": "评估service_point_id的空值与重复情况",
"type": "quality",
"title": "服务点唯一性检",
"examples": [
"检查服务点ID是否唯一",
"统计service_point_id空值与重复情况"
],
"variables": [],
"dialect_sql": {
"mysql": "SELECT\n COUNT(*) AS total_rows,\n SUM(service_point_id IS NULL) AS null_cnt,\n COUNT(DISTINCT service_point_id) AS distinct_cnt,\n (COUNT(*) - COUNT(DISTINCT service_point_id)) AS duplicate_rows_est,\n (\n SELECT COUNT(*) FROM (\n SELECT service_point_id\n FROM `data-ge`.`water_meter_info`\n GROUP BY service_point_id\n HAVING COUNT(*) > 1\n ) AS dup\n ) AS dup_key_groups\nFROM `data-ge`.`water_meter_info`;"
},
"applicability": {
"constraints": {
"notes": [
"用于键完整性检查",
"重复行估算=总行数-去重数"
],
"fk_join_available": false,
"dim_cardinality_hint": null
},
"time_column": null,
"required_columns": [
"service_point_id"
]
},
"business_caliber": "质量检查口径在本表内评估service_point_id的非空与唯一性不代表跨表全局唯一。安全限制仅输出汇总指标不暴露明细重复值。"
},
{
"id": "snpt_quality_account_nulls",
"desc": "抽取account_id为空的记录用于排查",
"type": "quality",
"title": "账户ID缺失明细",
"examples": [
"列出account_id为空的水表",
"抽样查看账户缺失的数据行"
],
"variables": [
{
"name": "limit_n",
"type": "int",
"default": 50
}
],
"dialect_sql": {
"mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nWHERE account_id IS NULL\nLIMIT {{limit_n}};"
},
"applicability": {
"constraints": {
"notes": [
"明细仅限小样本抽取",
"建议LIMIT<=100避免全量导出"
],
"fk_join_available": false,
"dim_cardinality_hint": null
},
"time_column": null,
"required_columns": [
"account_id"
]
},
"business_caliber": "质量抽样筛出账户ID缺失的水表记录便于核对。安全限制仅用于样本排查不建议在生产中全量导出如需口径统计请改为COUNT聚合。"
},
{
"id": "snpt_sample_random_rows",
"desc": "随机抽取水表信息用于人工核验",
"type": "sample",
"title": "随机抽样明细",
"examples": [
"抽样查看水表信息",
"随机抽取20条做质检"
],
"variables": [
{
"name": "sample_size",
"type": "int",
"default": 20
}
],
"dialect_sql": {
"mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nORDER BY RAND()\nLIMIT {{sample_size}};"
},
"applicability": {
"constraints": {
"notes": [
"使用RAND()随机,样本不可复现",
"建议限制样本量"
],
"fk_join_available": false,
"dim_cardinality_hint": 300
},
"time_column": null,
"required_columns": [
"service_point_id"
]
},
"business_caliber": "样本抽取从本表随机返回若干行明细。安全限制避免扩大LIMIT进行全量下载如需可复现样本请改用带种子的随机方法MySQL不原生支持。"
},
{
"id": "snpt_filter_office_type_where",
"desc": "常用WHERE筛选条件片段按营业所与类型且为有效",
"type": "sample",
"title": "机构类型筛选片",
"examples": [
"筛选A营业所的机械表",
"仅查看某营业所的指定类型水表"
],
"variables": [
{
"name": "supply_office",
"type": "string"
},
{
"name": "meter_type",
"type": "string"
}
],
"dialect_sql": {
"mysql": "WHERE supply_office = '{{supply_office}}'\n AND meter_type = '{{meter_type}}'\n AND meter_status = '有效'"
},
"applicability": {
"constraints": {
"notes": [
"这是条件片段,可拼接到其他查询",
"meter_status当前为单一值“有效”"
],
"fk_join_available": false,
"dim_cardinality_hint": 11
},
"time_column": null,
"required_columns": [
"supply_office",
"meter_type",
"meter_status"
]
},
"business_caliber": "过滤口径仅保留指定营业所与指定水表类型、且状态为“有效”的记录。安全限制为片段用途需拼接在SELECT…FROM之后使用。"
},
{
"id": "snpt_office_station_dist",
"desc": "按营业所与站点组合统计水表数",
"type": "aggregate",
"title": "所站层级分布",
"examples": [
"按营业所查看各站点水表数",
"所站两级的水表分布情况"
],
"variables": [],
"dialect_sql": {
"mysql": "SELECT supply_office,\n station,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY supply_office, station\nORDER BY supply_office, meter_cnt DESC, station;"
},
"applicability": {
"constraints": {
"notes": [
"组合基数<=11×36=396",
"以service_point_id去重计数",
"如结果过长可再按TopN筛选"
],
"fk_join_available": false,
"dim_cardinality_hint": 396
},
"time_column": null,
"required_columns": [
"supply_office",
"station",
"service_point_id"
]
},
"business_caliber": "水表数=按service_point_id去重计数粒度=营业所×站点。安全限制:结果行数可能较多,建议在可视化端增加筛选或分页。"
},
{
"id": "snpt_total_meter_baseline",
"desc": "获取全表水表去重总量基线",
"type": "aggregate",
"title": "水表总量基线",
"examples": [
"当前有多少只水表",
"作为占比分析的分母基线"
],
"variables": [],
"dialect_sql": {
"mysql": "SELECT COUNT(DISTINCT service_point_id) AS meter_total\nFROM `data-ge`.`water_meter_info`;"
},
"applicability": {
"constraints": {
"notes": [
"作为其他占比/分摊分母基线",
"忽略service_point_id为空的记录"
],
"fk_join_available": false,
"dim_cardinality_hint": 300
},
"time_column": null,
"required_columns": [
"service_point_id"
]
},
"business_caliber": "水表总量=按service_point_id去重计数基于当前表的全量记录。安全限制无时间维度无法反映存量随时间变化。"
}
]