330 lines
13 KiB
JSON
330 lines
13 KiB
JSON
[
|
||
{
|
||
"id": "snpt_topn_station",
|
||
"desc": "按站点统计水表数量并取前N",
|
||
"type": "topn",
|
||
"title": "站点TopN水表数",
|
||
"examples": [
|
||
"各站点水表数量排名前10",
|
||
"站点水表覆盖情况排行"
|
||
],
|
||
"variables": [
|
||
{
|
||
"name": "top_n",
|
||
"type": "int",
|
||
"default": 10
|
||
}
|
||
],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT station,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY station\nORDER BY meter_cnt DESC\nLIMIT {{top_n}};"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"TopN建议N<=36",
|
||
"以service_point_id去重计数",
|
||
"无时间列,无法做趋势"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 36
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"station",
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "水表数=按service_point_id去重计数;粒度=站点。仅统计当前表中的有效记录(不含时间口径)。安全限制:用于分析排名,避免扩大LIMIT造成全量导出。"
|
||
},
|
||
{
|
||
"id": "snpt_share_district",
|
||
"desc": "统计各辖区水表数及其占比",
|
||
"type": "ratio",
|
||
"title": "辖区水表占比",
|
||
"examples": [
|
||
"各辖区水表占比",
|
||
"哪个辖区水表最多"
|
||
],
|
||
"variables": [],
|
||
"dialect_sql": {
|
||
"mysql": "WITH by_district AS (\n SELECT district, COUNT(DISTINCT service_point_id) AS meter_cnt\n FROM `data-ge`.`water_meter_info`\n GROUP BY district\n), tot AS (\n SELECT COUNT(DISTINCT service_point_id) AS total_cnt\n FROM `data-ge`.`water_meter_info`\n)\nSELECT b.district,\n b.meter_cnt,\n ROUND(b.meter_cnt / NULLIF(t.total_cnt, 0) * 100, 2) AS pct\nFROM by_district b\nCROSS JOIN tot t\nORDER BY pct DESC, b.district;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"占比分母为全表service_point_id去重总数",
|
||
"service_point_id为空将被忽略"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 13
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"district",
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "水表数=按service_point_id去重计数;粒度=辖区。占比=辖区水表数/全表水表总数。安全限制:仅基于本表,不代表全市/全网口径;无时间维度。"
|
||
},
|
||
{
|
||
"id": "snpt_dist_diameter",
|
||
"desc": "按表径统计水表数量分布",
|
||
"type": "aggregate",
|
||
"title": "表径分布统计",
|
||
"examples": [
|
||
"不同口径水表有多少",
|
||
"查看表径分布情况"
|
||
],
|
||
"variables": [],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT meter_diameter,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_diameter\nORDER BY meter_cnt DESC, meter_diameter;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"以service_point_id去重计数",
|
||
"适合绘制条形图/饼图"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 8
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"meter_diameter",
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "水表数=按service_point_id去重计数;粒度=表径。安全限制:仅用于分布分析,不含时间过滤;避免用于明细导出。"
|
||
},
|
||
{
|
||
"id": "snpt_type_subtype_matrix",
|
||
"desc": "统计水表类型与子类组合的数量",
|
||
"type": "aggregate",
|
||
"title": "类型子类分布",
|
||
"examples": [
|
||
"不同类型与子类的水表数量",
|
||
"查看类型与子类的组合分布"
|
||
],
|
||
"variables": [],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT meter_type,\n meter_subtype,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY meter_type, meter_subtype\nORDER BY meter_cnt DESC, meter_type, meter_subtype;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"组合基数<=5×9=45",
|
||
"以service_point_id去重计数"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 45
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"meter_type",
|
||
"meter_subtype",
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "水表数=按service_point_id去重计数;粒度=类型×子类组合。安全限制:仅用于汇总分析,不包含时间或业务状态变化。"
|
||
},
|
||
{
|
||
"id": "snpt_quality_spid_uniq",
|
||
"desc": "评估service_point_id的空值与重复情况",
|
||
"type": "quality",
|
||
"title": "服务点唯一性检",
|
||
"examples": [
|
||
"检查服务点ID是否唯一",
|
||
"统计service_point_id空值与重复情况"
|
||
],
|
||
"variables": [],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT\n COUNT(*) AS total_rows,\n SUM(service_point_id IS NULL) AS null_cnt,\n COUNT(DISTINCT service_point_id) AS distinct_cnt,\n (COUNT(*) - COUNT(DISTINCT service_point_id)) AS duplicate_rows_est,\n (\n SELECT COUNT(*) FROM (\n SELECT service_point_id\n FROM `data-ge`.`water_meter_info`\n GROUP BY service_point_id\n HAVING COUNT(*) > 1\n ) AS dup\n ) AS dup_key_groups\nFROM `data-ge`.`water_meter_info`;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"用于键完整性检查",
|
||
"重复行估算=总行数-去重数"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": null
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "质量检查口径:在本表内评估service_point_id的非空与唯一性,不代表跨表全局唯一。安全限制:仅输出汇总指标,不暴露明细重复值。"
|
||
},
|
||
{
|
||
"id": "snpt_quality_account_nulls",
|
||
"desc": "抽取account_id为空的记录用于排查",
|
||
"type": "quality",
|
||
"title": "账户ID缺失明细",
|
||
"examples": [
|
||
"列出account_id为空的水表",
|
||
"抽样查看账户缺失的数据行"
|
||
],
|
||
"variables": [
|
||
{
|
||
"name": "limit_n",
|
||
"type": "int",
|
||
"default": 50
|
||
}
|
||
],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nWHERE account_id IS NULL\nLIMIT {{limit_n}};"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"明细仅限小样本抽取",
|
||
"建议LIMIT<=100,避免全量导出"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": null
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"account_id"
|
||
]
|
||
},
|
||
"business_caliber": "质量抽样:筛出账户ID缺失的水表记录,便于核对。安全限制:仅用于样本排查,不建议在生产中全量导出;如需口径统计请改为COUNT聚合。"
|
||
},
|
||
{
|
||
"id": "snpt_sample_random_rows",
|
||
"desc": "随机抽取水表信息用于人工核验",
|
||
"type": "sample",
|
||
"title": "随机抽样明细",
|
||
"examples": [
|
||
"抽样查看水表信息",
|
||
"随机抽取20条做质检"
|
||
],
|
||
"variables": [
|
||
{
|
||
"name": "sample_size",
|
||
"type": "int",
|
||
"default": 20
|
||
}
|
||
],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT *\nFROM `data-ge`.`water_meter_info`\nORDER BY RAND()\nLIMIT {{sample_size}};"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"使用RAND()随机,样本不可复现",
|
||
"建议限制样本量"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 300
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "样本抽取:从本表随机返回若干行明细。安全限制:避免扩大LIMIT进行全量下载;如需可复现样本,请改用带种子的随机方法(MySQL不原生支持)。"
|
||
},
|
||
{
|
||
"id": "snpt_filter_office_type_where",
|
||
"desc": "常用WHERE筛选条件片段:按营业所与类型且为有效",
|
||
"type": "sample",
|
||
"title": "机构类型筛选片",
|
||
"examples": [
|
||
"筛选A营业所的机械表",
|
||
"仅查看某营业所的指定类型水表"
|
||
],
|
||
"variables": [
|
||
{
|
||
"name": "supply_office",
|
||
"type": "string"
|
||
},
|
||
{
|
||
"name": "meter_type",
|
||
"type": "string"
|
||
}
|
||
],
|
||
"dialect_sql": {
|
||
"mysql": "WHERE supply_office = '{{supply_office}}'\n AND meter_type = '{{meter_type}}'\n AND meter_status = '有效'"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"这是条件片段,可拼接到其他查询",
|
||
"meter_status当前为单一值“有效”"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 11
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"supply_office",
|
||
"meter_type",
|
||
"meter_status"
|
||
]
|
||
},
|
||
"business_caliber": "过滤口径:仅保留指定营业所与指定水表类型、且状态为“有效”的记录。安全限制:为片段用途,需拼接在SELECT…FROM之后使用。"
|
||
},
|
||
{
|
||
"id": "snpt_office_station_dist",
|
||
"desc": "按营业所与站点组合统计水表数",
|
||
"type": "aggregate",
|
||
"title": "所站层级分布",
|
||
"examples": [
|
||
"按营业所查看各站点水表数",
|
||
"所站两级的水表分布情况"
|
||
],
|
||
"variables": [],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT supply_office,\n station,\n COUNT(DISTINCT service_point_id) AS meter_cnt\nFROM `data-ge`.`water_meter_info`\nGROUP BY supply_office, station\nORDER BY supply_office, meter_cnt DESC, station;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"组合基数<=11×36=396",
|
||
"以service_point_id去重计数",
|
||
"如结果过长可再按TopN筛选"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 396
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"supply_office",
|
||
"station",
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "水表数=按service_point_id去重计数;粒度=营业所×站点。安全限制:结果行数可能较多,建议在可视化端增加筛选或分页。"
|
||
},
|
||
{
|
||
"id": "snpt_total_meter_baseline",
|
||
"desc": "获取全表水表去重总量基线",
|
||
"type": "aggregate",
|
||
"title": "水表总量基线",
|
||
"examples": [
|
||
"当前有多少只水表",
|
||
"作为占比分析的分母基线"
|
||
],
|
||
"variables": [],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT COUNT(DISTINCT service_point_id) AS meter_total\nFROM `data-ge`.`water_meter_info`;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"作为其他占比/分摊分母基线",
|
||
"忽略service_point_id为空的记录"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 300
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "水表总量=按service_point_id去重计数;基于当前表的全量记录。安全限制:无时间维度,无法反映存量随时间变化。"
|
||
}
|
||
] |