186 lines
7.8 KiB
JSON
186 lines
7.8 KiB
JSON
[
|
||
{
|
||
"id": "snpt_count-service-points-by-dimension",
|
||
"desc": "按指定维度(如区域、供水所)分组,统计各分类下的用水点数量。",
|
||
"type": "aggregate",
|
||
"title": "按维度统计用水点数",
|
||
"examples": [
|
||
"按区域统计用水点数量",
|
||
"各个供水所分别有多少个用水点"
|
||
],
|
||
"variables": [
|
||
{
|
||
"name": "dimension_column",
|
||
"type": "column",
|
||
"default": "district"
|
||
}
|
||
],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT\n `${dimension_column}`,\n COUNT(DISTINCT service_point_id) AS service_point_count\nFROM\n `data-ge.water_meter_info`\nGROUP BY\n `${dimension_column}`\nORDER BY\n service_point_count DESC;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"适用于对水表档案信息进行分类汇总统计。",
|
||
"可将变量 ${dimension_column} 替换为任一维度列,如 district, supply_office, station, meter_type 等。"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": null
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "用水点数:对 `service_point_id` 进行去重计数,代表一个独立的服务点(通常对应一个水表)。统计粒度为“指定维度”。"
|
||
},
|
||
{
|
||
"id": "snpt_topn-service-points-by-dimension",
|
||
"desc": "按指定维度(如区域、站点)统计用水点数,并展示数量最多的前N个分类。",
|
||
"type": "topn",
|
||
"title": "Top-N 用水点数维度排名",
|
||
"examples": [
|
||
"哪个区域的用水点最多",
|
||
"用水点数排名前5的站点是哪些"
|
||
],
|
||
"variables": [
|
||
{
|
||
"name": "dimension_column",
|
||
"type": "column",
|
||
"default": "station"
|
||
},
|
||
{
|
||
"name": "top_n",
|
||
"type": "int",
|
||
"default": 10
|
||
}
|
||
],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT\n `${dimension_column}`,\n COUNT(DISTINCT service_point_id) AS service_point_count\nFROM\n `data-ge.water_meter_info`\nGROUP BY\n `${dimension_column}`\nORDER BY\n service_point_count DESC\nLIMIT ${top_n};"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"维度 `station` 基数较高 (36),建议 Top-N 查询时结合业务场景合理设置 N 值。"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": 36
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "用水点数:对 `service_point_id` 进行去重计数。排名依据为各维度分类下的用水点总数。统计粒度为“指定维度”。"
|
||
},
|
||
{
|
||
"id": "snpt_ratio-service-points-by-dimension",
|
||
"desc": "计算在指定维度下,各分类的用水点数占总用水点数的百分比,以分析其分布构成。",
|
||
"type": "ratio",
|
||
"title": "各维度用水点数占比",
|
||
"examples": [
|
||
"不同水表类型(meter_type)的分布情况",
|
||
"各个区域的用水点占比是多少"
|
||
],
|
||
"variables": [
|
||
{
|
||
"name": "dimension_column",
|
||
"type": "column",
|
||
"default": "meter_type"
|
||
}
|
||
],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT\n `${dimension_column}`,\n COUNT(DISTINCT service_point_id) AS service_point_count,\n COUNT(DISTINCT service_point_id) * 100.0 / SUM(COUNT(DISTINCT service_point_id)) OVER () AS percentage\nFROM\n `data-ge.water_meter_info`\nGROUP BY\n `${dimension_column}`\nORDER BY\n service_point_count DESC;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"SQL模板使用了窗口函数 SUM() OVER(),请确保MySQL版本支持(8.0+)。"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": null
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "用水点数占比:某分类下的用水点数 / 总用水点数。用水点数以 `service_point_id` 去重计数。统计粒度为“指定维度”。"
|
||
},
|
||
{
|
||
"id": "snpt_quality-check-duplicate-spid",
|
||
"desc": "查找在用水点信息表中存在重复的 `service_point_id`,用于数据质量校验。",
|
||
"type": "quality",
|
||
"title": "检查重复的用水点ID",
|
||
"examples": [
|
||
"检查是否存在重复的水表档案",
|
||
"校验用水点ID的唯一性"
|
||
],
|
||
"variables": [],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT\n service_point_id,\n COUNT(*) AS occurrences\nFROM\n `data-ge.water_meter_info`\nGROUP BY\n service_point_id\nHAVING\n COUNT(*) > 1;"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [
|
||
"预期返回结果为空。若有返回,则表示数据存在一致性问题,`service_point_id` 未能作为唯一主键。"
|
||
],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": null
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"service_point_id"
|
||
]
|
||
},
|
||
"business_caliber": "重复项:指 `service_point_id` 出现次数大于1的记录。此ID应为表的主键,理论上不应重复。"
|
||
},
|
||
{
|
||
"id": "snpt_sample-filter-service-points-by-dims",
|
||
"desc": "根据区域、水表类型、供水所等多个维度组合条件,筛选出符合条件的用水点明细。",
|
||
"type": "sample",
|
||
"title": "多维度筛选用水点列表",
|
||
"examples": [
|
||
"查询城区的机械表有哪些",
|
||
"拉取某个供水所下特定口径水表的列表"
|
||
],
|
||
"variables": [
|
||
{
|
||
"name": "district_name",
|
||
"type": "string",
|
||
"default": "城区"
|
||
},
|
||
{
|
||
"name": "meter_type_name",
|
||
"type": "string",
|
||
"default": "机械表"
|
||
},
|
||
{
|
||
"name": "limit_num",
|
||
"type": "int",
|
||
"default": 100
|
||
}
|
||
],
|
||
"dialect_sql": {
|
||
"mysql": "SELECT\n service_point_id,\n account_id,\n district,\n supply_office,\n meter_type,\n meter_subtype,\n meter_diameter\nFROM\n `data-ge.water_meter_info`\nWHERE\n district = '${district_name}'\n AND meter_type = '${meter_type_name}'\n -- AND meter_status = '有效' -- 可选:根据画像,该列为常量'有效',可不加\nLIMIT ${limit_num};"
|
||
},
|
||
"applicability": {
|
||
"constraints": {
|
||
"notes": [],
|
||
"fk_join_available": false,
|
||
"dim_cardinality_hint": null
|
||
},
|
||
"time_column": null,
|
||
"required_columns": [
|
||
"service_point_id",
|
||
"account_id",
|
||
"district",
|
||
"supply_office",
|
||
"meter_type",
|
||
"meter_subtype",
|
||
"meter_diameter"
|
||
]
|
||
},
|
||
"business_caliber": "返回满足所有筛选条件的用水点明细信息。`meter_status` 列只有一个值 '有效',通常无需作为筛选条件。"
|
||
}
|
||
] |