Files
data-ge/demo/table-desc.json
2025-10-29 00:38:57 +08:00

278 lines
8.2 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"table": "ecommerce_orders",
"row_count": 10000,
"role": "fact",
"grain": ["order_id"],
"time": {
"column": "order_date",
"granularity": "day",
"range": ["2024-04-20", "2025-04-19"],
"has_gaps": false
},
"columns": [
{
"name": "order_id",
"dtype": "string",
"semantic_type": "id",
"null_rate": 0.0,
"distinct_count": 10000,
"distinct_ratio": 1.0,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": false,
"top_values": [],
"pk_candidate_score": 1.0,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "customer_id",
"dtype": "integer",
"semantic_type": "dimension",
"null_rate": 0.0,
"distinct_count": 2713,
"distinct_ratio": 0.2713,
"stats": {"min": 1, "max": 2999, "mean": 995.29, "std": null, "skewness": null},
"enumish": false,
"top_values": [],
"pk_candidate_score": 0.3,
"metric_candidate_score": 0.1,
"comment": ""
},
{
"name": "product_id",
"dtype": "integer",
"semantic_type": "dimension",
"null_rate": 0.0,
"distinct_count": 1000,
"distinct_ratio": 0.0999,
"stats": {"min": 1, "max": 1000, "mean": 504.87, "std": null, "skewness": null},
"enumish": true,
"top_values": [],
"pk_candidate_score": 0.1,
"metric_candidate_score": 0.1,
"comment": ""
},
{
"name": "category",
"dtype": "string",
"semantic_type": "dimension",
"null_rate": 0.0,
"distinct_count": 6,
"distinct_ratio": 0.0006,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": true,
"top_values": [
{"value": "Beauty", "pct": null},
{"value": "Books", "pct": null},
{"value": "Clothing", "pct": null},
{"value": "Electronics", "pct": null},
{"value": "Home", "pct": null},
{"value": "Toys", "pct": null}
],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "price",
"dtype": "float",
"semantic_type": "metric",
"null_rate": 0.0,
"distinct_count": 9013,
"distinct_ratio": 0.9013,
"stats": {"min": 5.06, "max": 499.93, "mean": 252.55, "std": null, "skewness": null},
"enumish": false,
"top_values": [],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.9,
"comment": ""
},
{
"name": "quantity",
"dtype": "integer",
"semantic_type": "metric",
"null_rate": 0.0,
"distinct_count": 9,
"distinct_ratio": 0.0009,
"stats": {"min": 1, "max": 9, "mean": 2.12, "std": null, "skewness": null},
"enumish": true,
"top_values": [
{"value": 1, "pct": null},
{"value": 2, "pct": null},
{"value": 3, "pct": null},
{"value": 4, "pct": null},
{"value": 5, "pct": null}
],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.7,
"comment": ""
},
{
"name": "order_date",
"dtype": "string",
"semantic_type": "time",
"null_rate": 0.0,
"distinct_count": 365,
"distinct_ratio": 0.0365,
"stats": {"min": "2024-04-20", "max": "2025-04-19", "mean": null, "std": null, "skewness": null},
"enumish": false,
"top_values": [],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "shipping_date",
"dtype": "string",
"semantic_type": "time",
"null_rate": 0.0,
"distinct_count": 371,
"distinct_ratio": 0.0371,
"stats": {"min": "2024-04-21", "max": "2025-04-26", "mean": null, "std": null, "skewness": null},
"enumish": false,
"top_values": [],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "delivery_status",
"dtype": "string",
"semantic_type": "dimension",
"null_rate": 0.0,
"distinct_count": 4,
"distinct_ratio": 0.0004,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": true,
"top_values": [
{"value": "Delivered", "pct": null},
{"value": "Pending", "pct": null},
{"value": "Returned", "pct": null},
{"value": "Shipped", "pct": null}
],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "payment_method",
"dtype": "string",
"semantic_type": "dimension",
"null_rate": 0.0,
"distinct_count": 5,
"distinct_ratio": 0.0005,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": true,
"top_values": [
{"value": "Apple Pay", "pct": null},
{"value": "Credit Card", "pct": null},
{"value": "Debit Card", "pct": null},
{"value": "Google Pay", "pct": null},
{"value": "PayPal", "pct": null}
],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "device_type",
"dtype": "string",
"semantic_type": "dimension",
"null_rate": 0.0,
"distinct_count": 3,
"distinct_ratio": 0.0003,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": true,
"top_values": [
{"value": "Desktop", "pct": null},
{"value": "Mobile", "pct": null},
{"value": "Tablet", "pct": null}
],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "channel",
"dtype": "string",
"semantic_type": "dimension",
"null_rate": 0.0,
"distinct_count": 4,
"distinct_ratio": 0.0004,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": true,
"top_values": [
{"value": "Email", "pct": null},
{"value": "Organic", "pct": null},
{"value": "Paid Search", "pct": null},
{"value": "Social", "pct": null}
],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "shipping_address",
"dtype": "string",
"semantic_type": "text",
"null_rate": 0.0,
"distinct_count": 10000,
"distinct_ratio": 1.0,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": false,
"top_values": [],
"pk_candidate_score": 0.9,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "billing_address",
"dtype": "string",
"semantic_type": "text",
"null_rate": 0.0,
"distinct_count": 10000,
"distinct_ratio": 1.0,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": false,
"top_values": [],
"pk_candidate_score": 0.9,
"metric_candidate_score": 0.0,
"comment": ""
},
{
"name": "customer_segment",
"dtype": "string",
"semantic_type": "dimension",
"null_rate": 0.0,
"distinct_count": 3,
"distinct_ratio": 0.0003,
"stats": {"min": null, "max": null, "mean": null, "std": null, "skewness": null},
"enumish": true,
"top_values": [
{"value": "New", "pct": null},
{"value": "Returning", "pct": null},
{"value": "VIP", "pct": null}
],
"pk_candidate_score": 0.0,
"metric_candidate_score": 0.0,
"comment": ""
}
],
"primary_key_candidates": [["order_id"]],
"fk_candidates": [
{"from": "customer_id", "to": "dim_customer(customer_id)", "confidence": 0.9},
{"from": "product_id", "to": "dim_product(product_id)", "confidence": 0.9}
],
"quality": {
"failed_expectations": [],
"warning_hints": []
},
"confidence_notes": [
"表含时间列(order_date, shipping_date)且含度量列(price, quantity)推断为fact表。",
"order_id唯一性=1.0,确认主键。",
"order_date日期范围连续无缺口粒度为日级。",
"高基数数值字段(price, quantity)符合指标特征。",
"低熵字段(category, delivery_status, payment_method等)为枚举维度。"
]
}