demo数据

This commit is contained in:
zhaoawd
2025-11-14 00:58:00 +08:00
parent 7eb3c059a1
commit a72ca3593e
13 changed files with 2733 additions and 0 deletions

View File

@ -0,0 +1,227 @@
[
{
"id": "snpt_topn_supply_office_by_account",
"desc": "统计各供应办公室对应的账户数量,识别高占比管理所",
"type": "topn",
"title": "按供应办公室统计账户数",
"examples": [
"哪个供水管理所服务的用户最多?",
"列出前5个账户数最多的供应办公室"
],
"variables": [
{
"name": "top_n",
"type": "int",
"default": 11
}
],
"dialect_sql": {
"mysql": "SELECT supply_office, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY supply_office\nORDER BY account_count DESC\nLIMIT {{top_n}};"
},
"applicability": {
"constraints": {
"notes": [
"供应办公室仅11个唯一值可安全展示全部建议LIMIT 11避免冗余排序"
],
"fk_join_available": false,
"dim_cardinality_hint": 11
},
"time_column": "nullable",
"required_columns": [
"supply_office",
"account_id"
]
},
"business_caliber": "粒度=供应办公室,指标=去重账户数account_id仅统计水表信息表中有效账户不关联外部表"
},
{
"id": "snpt_topn_station_by_account",
"desc": "统计各站点服务的账户数量,识别高负载站点",
"type": "topn",
"title": "按站点统计账户分布",
"examples": [
"哪些站点服务的用户最多?",
"TOP10用户最多的站点是哪些"
],
"variables": [
{
"name": "top_n",
"type": "int",
"default": 20
}
],
"dialect_sql": {
"mysql": "SELECT station, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY station\nORDER BY account_count DESC\nLIMIT {{top_n}};"
},
"applicability": {
"constraints": {
"notes": [
"站点有36个唯一值建议LIMIT<=20以避免结果过长高基数维度可能影响查询性能"
],
"fk_join_available": false,
"dim_cardinality_hint": 36
},
"time_column": "nullable",
"required_columns": [
"station",
"account_id"
]
},
"business_caliber": "粒度=站点station指标=去重账户数account_id基于水表信息表直接聚合不涉及时间维度"
},
{
"id": "snpt_topn_district_by_account",
"desc": "统计各行政区的账户数量,辅助区域资源分配分析",
"type": "topn",
"title": "按行政区统计账户分布",
"examples": [
"哪个区的用水账户最多?",
"列出所有行政区的账户数量排名"
],
"variables": [
{
"name": "top_n",
"type": "int",
"default": 13
}
],
"dialect_sql": {
"mysql": "SELECT district, COUNT(DISTINCT account_id) AS account_count\nFROM water_meter_info\nGROUP BY district\nORDER BY account_count DESC\nLIMIT {{top_n}};"
},
"applicability": {
"constraints": {
"notes": [
"行政区共13个可完整展示适合用于区域对比分析"
],
"fk_join_available": false,
"dim_cardinality_hint": 13
},
"time_column": "nullable",
"required_columns": [
"district",
"account_id"
]
},
"business_caliber": "粒度=行政区district指标=去重账户数account_id基于水表信息表聚合反映各区域用户规模"
},
{
"id": "snpt_share_of_meter_type",
"desc": "计算各类水表类型在总账户中的占比,识别主流类型",
"type": "ratio",
"title": "水表类型占比分析",
"examples": [
"各类水表在用户中的占比是多少?",
"电磁式远传水表占总用户比例多少?"
],
"variables": [],
"dialect_sql": {
"mysql": "SELECT meter_type, \n COUNT(DISTINCT account_id) AS account_count,\n ROUND(COUNT(DISTINCT account_id) * 100.0 / SUM(COUNT(DISTINCT account_id)) OVER (), 2) AS percentage\nFROM water_meter_info\nGROUP BY meter_type\nORDER BY account_count DESC;"
},
"applicability": {
"constraints": {
"notes": [
"水表类型仅5种适合计算占比可直接展示全量分布"
],
"fk_join_available": false,
"dim_cardinality_hint": 5
},
"time_column": "nullable",
"required_columns": [
"meter_type",
"account_id"
]
},
"business_caliber": "粒度=水表类型meter_type指标=去重账户数占比,分母为全表去重账户总数,反映技术选型分布"
},
{
"id": "snpt_sample_account_service_point",
"desc": "随机抽取部分账户与服务点ID的原始记录用于数据质量核查",
"type": "sample",
"title": "抽样账户与服务点明细",
"examples": [
"随机查看10条账户与服务点的详细信息",
"抽样检查水表信息是否符合预期格式"
],
"variables": [
{
"name": "sample_size",
"type": "int",
"default": 10
}
],
"dialect_sql": {
"mysql": "SELECT account_id, service_point_id, supply_office, station, district, meter_diameter, meter_type, meter_subtype, installation_position\nFROM water_meter_info\nORDER BY RAND()\nLIMIT {{sample_size}};"
},
"applicability": {
"constraints": {
"notes": [
"主键组合为account_id+service_point_id适合抽样验证唯一性建议样本量≤100"
],
"fk_join_available": false,
"dim_cardinality_hint": null
},
"time_column": "nullable",
"required_columns": [
"account_id",
"service_point_id"
]
},
"business_caliber": "粒度=单条水表记录抽取样本用于验证account_id与service_point_id的组合唯一性及维度字段完整性"
},
{
"id": "snpt_filter_meter_status_valid",
"desc": "过滤出水表状态为'有效'的记录,用于后续分析",
"type": "quality",
"title": "筛选有效水表记录",
"examples": [
"只取状态为有效的水表记录",
"确认所有水表是否均为有效状态"
],
"variables": [],
"dialect_sql": {
"mysql": "SELECT *\nFROM water_meter_info\nWHERE meter_status = '有效';"
},
"applicability": {
"constraints": {
"notes": [
"meter_status仅存在'有效'值,此条件恒成立;可用于数据清洗流程的显式过滤"
],
"fk_join_available": false,
"dim_cardinality_hint": 1
},
"time_column": "nullable",
"required_columns": [
"meter_status"
]
},
"business_caliber": "仅保留水表状态为'有效'的记录,因全表均为有效值,此过滤为冗余但可作为数据质量校验的显式断言"
},
{
"id": "snpt_filter_meter_diameter_20mm",
"desc": "筛选水表直径为20mm的记录用于特定口径设备分析",
"type": "quality",
"title": "筛选20mm水表记录",
"examples": [
"找出所有使用20mm水表的用户",
"20mm水表分布在哪些站点"
],
"variables": [],
"dialect_sql": {
"mysql": "SELECT *\nFROM water_meter_info\nWHERE meter_diameter = '20mm';"
},
"applicability": {
"constraints": {
"notes": [
"水表直径共8种枚举值20mm为常见规格可作为子集分析的起点"
],
"fk_join_available": false,
"dim_cardinality_hint": 8
},
"time_column": "nullable",
"required_columns": [
"meter_diameter"
]
},
"business_caliber": "粒度=单条水表记录筛选条件为meter_diameter='20mm',用于分析标准住宅用水表的分布特征"
}
]