Files
planet/database_schema.sql
2026-03-05 11:46:58 +08:00

446 lines
15 KiB
PL/PgSQL

-- Database Schema for Intelligent Planet Plan
-- PostgreSQL 15+ with TimescaleDB
-- Generated at: 2024-01-20
-- ============================================
-- EXTENSIONS
-- ============================================
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "timescaledb";
-- ============================================
-- ENUMS
-- ============================================
CREATE TYPE user_role AS ENUM ('super_admin', 'admin', 'operator', 'viewer');
CREATE TYPE datasource_priority AS ENUM ('P0', 'P1', 'P2');
CREATE TYPE task_status AS ENUM ('pending', 'running', 'success', 'failed', 'cancelled');
CREATE TYPE alert_severity AS ENUM ('critical', 'warning', 'info');
CREATE TYPE alert_status AS ENUM ('active', 'acknowledged', 'resolved');
CREATE TYPE data_module AS ENUM ('L1', 'L2', 'L3', 'L4');
-- ============================================
-- USERS & AUTHENTICATION
-- ============================================
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) NOT NULL UNIQUE,
email VARCHAR(255) NOT NULL UNIQUE,
password_hash VARCHAR(255) NOT NULL,
role user_role NOT NULL DEFAULT 'viewer',
is_active BOOLEAN NOT NULL DEFAULT TRUE,
last_login_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_users_username ON users(username);
CREATE INDEX idx_users_role ON users(role);
CREATE INDEX idx_users_is_active ON users(is_active);
CREATE TABLE refresh_tokens (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
token VARCHAR(500) NOT NULL UNIQUE,
expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
revoked_at TIMESTAMP WITH TIME ZONE
);
CREATE INDEX idx_refresh_tokens_user ON refresh_tokens(user_id);
CREATE INDEX idx_refresh_tokens_token ON refresh_tokens(token);
CREATE INDEX idx_refresh_tokens_expires ON refresh_tokens(expires_at);
-- ============================================
-- DATA SOURCES
-- ============================================
CREATE TABLE data_sources (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
module data_module NOT NULL,
priority datasource_priority NOT NULL DEFAULT 'P1',
frequency_minutes INTEGER NOT NULL DEFAULT 60,
collector_class VARCHAR(100) NOT NULL,
config JSONB NOT NULL DEFAULT '{}',
is_active BOOLEAN NOT NULL DEFAULT TRUE,
last_run_at TIMESTAMP WITH TIME ZONE,
last_status task_status,
next_run_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_datasources_module ON data_sources(module);
CREATE INDEX idx_datasources_priority ON data_sources(priority);
CREATE INDEX idx_datasources_is_active ON data_sources(is_active);
CREATE INDEX idx_datasources_next_run ON data_sources(next_run_at);
-- ============================================
-- COLLECTION TASKS
-- ============================================
CREATE TABLE collection_tasks (
id BIGSERIAL PRIMARY KEY,
datasource_id INTEGER NOT NULL REFERENCES data_sources(id) ON DELETE CASCADE,
status task_status NOT NULL DEFAULT 'pending',
started_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE,
records_processed INTEGER DEFAULT 0,
error_message TEXT,
traceback TEXT,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_tasks_datasource ON collection_tasks(datasource_id);
CREATE INDEX idx_tasks_status ON collection_tasks(status);
CREATE INDEX idx_tasks_created_at ON collection_tasks(created_at DESC);
-- Hypertable for time-series queries
SELECT create_hypertable('collection_tasks', 'created_at');
-- ============================================
-- GPU CLUSTERS (L1 - Hypertables)
-- ============================================
CREATE TABLE gpu_clusters (
time TIMESTAMP WITH TIME ZONE NOT NULL,
cluster_id VARCHAR(100) NOT NULL,
name VARCHAR(200) NOT NULL,
country VARCHAR(100),
city VARCHAR(100),
latitude DOUBLE PRECISION,
longitude DOUBLE PRECISION,
organization VARCHAR(200),
gpu_count INTEGER,
gpu_type VARCHAR(100),
total_flops DOUBLE PRECISION,
rank INTEGER,
source VARCHAR(50) NOT NULL
);
SELECT create_hypertable('gpu_clusters', 'time');
CREATE INDEX idx_gpu_clusters_cluster_id ON gpu_clusters(cluster_id);
CREATE INDEX idx_gpu_clusters_country ON gpu_clusters(country);
CREATE INDEX idx_gpu_clusters_source ON gpu_clusters(source);
CREATE INDEX idx_gpu_clusters_time ON gpu_clusters(time DESC);
-- Latest snapshot view
CREATE VIEW gpu_clusters_latest AS
SELECT DISTINCT ON (cluster_id) *
FROM gpu_clusters
ORDER BY cluster_id, time DESC;
-- ============================================
-- MODEL ECOSYSTEM (L1)
-- ============================================
CREATE TABLE model_ecosystem (
time TIMESTAMP WITH TIME ZONE NOT NULL,
model_id VARCHAR(200) NOT NULL,
model_name VARCHAR(200) NOT NULL,
publisher VARCHAR(200),
downloads INTEGER,
likes INTEGER,
stars INTEGER,
license VARCHAR(100),
size_bytes BIGINT,
source VARCHAR(50) NOT NULL
);
SELECT create_hypertable('model_ecosystem', 'time');
CREATE INDEX idx_models_model_id ON model_ecosystem(model_id);
CREATE INDEX idx_models_source ON model_ecosystem(source);
CREATE INDEX idx_models_time ON model_ecosystem(time DESC);
-- ============================================
-- SUPERCOMPUTERS (L1)
-- ============================================
CREATE TABLE supercomputers (
time TIMESTAMP WITH TIME ZONE NOT NULL,
rank INTEGER NOT NULL,
name VARCHAR(200) NOT NULL,
country VARCHAR(100),
city VARCHAR(100),
latitude DOUBLE PRECISION,
longitude DOUBLE PRECISION,
organization VARCHAR(200),
rmax_tflops DOUBLE PRECISION,
rpeak_tflops DOUBLE PRECISION,
power_mw DOUBLE PRECISION,
cpu_cores INTEGER,
accelerator_cores INTEGER,
memory_tb DOUBLE PRECISION,
interconnect VARCHAR(100),
os VARCHAR(100),
source VARCHAR(50) NOT NULL
);
SELECT create_hypertable('supercomputers', 'time');
CREATE INDEX idx_supercomputers_rank ON supercomputers(rank);
CREATE INDEX idx_supercomputers_country ON supercomputers(country);
CREATE INDEX idx_supercomputers_time ON supercomputers(time DESC);
-- ============================================
-- SUBCABLE CABLES (L2)
-- ============================================
CREATE TABLE submarine_cables (
id SERIAL PRIMARY KEY,
cable_id VARCHAR(100) NOT NULL UNIQUE,
name VARCHAR(200) NOT NULL,
length_km INTEGER,
owners JSONB DEFAULT '[]',
capacity_tbps DOUBLE PRECISION,
status VARCHAR(50) DEFAULT 'active',
source VARCHAR(50) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
CREATE TABLE cable_landing_points (
id SERIAL PRIMARY KEY,
cable_id VARCHAR(100) NOT NULL REFERENCES submarine_cables(cable_id) ON DELETE CASCADE,
sequence_number INTEGER NOT NULL,
country VARCHAR(100) NOT NULL,
city VARCHAR(100),
latitude DOUBLE PRECISION NOT NULL,
longitude DOUBLE PRECISION NOT NULL,
landing_type VARCHAR(50),
UNIQUE(cable_id, sequence_number)
);
CREATE INDEX idx_cables_name ON submarine_cables(name);
CREATE INDEX idx_cables_status ON submarine_cables(status);
CREATE INDEX idx_landing_cable ON cable_landing_points(cable_id);
-- ============================================
-- IXP NODES (L2)
-- ============================================
CREATE TABLE ixp_nodes (
id SERIAL PRIMARY KEY,
ixp_id VARCHAR(100) NOT NULL UNIQUE,
name VARCHAR(200) NOT NULL,
country VARCHAR(100),
city VARCHAR(100),
latitude DOUBLE PRECISION,
longitude DOUBLE PRECISION,
member_count INTEGER,
traffic_tbps DOUBLE PRECISION,
source VARCHAR(50) NOT NULL,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_ixp_country ON ixp_nodes(country);
CREATE INDEX idx_ixp_source ON ixp_nodes(source);
-- ============================================
-- CLOUD INFRASTRUCTURE (L1)
-- ============================================
CREATE TABLE cloud_infrastructure (
time TIMESTAMP WITH TIME ZONE NOT NULL,
provider VARCHAR(100) NOT NULL,
region VARCHAR(100) NOT NULL,
availability_zone VARCHAR(100),
data_center_count INTEGER,
total_capacity_mw DOUBLE PRECISION,
source VARCHAR(50) NOT NULL
);
SELECT create_hypertable('cloud_infrastructure', 'time');
CREATE INDEX idx_cloud_provider ON cloud_infrastructure(provider);
CREATE INDEX idx_cloud_region ON cloud_infrastructure(region);
CREATE INDEX idx_cloud_time ON cloud_infrastructure(time DESC);
-- ============================================
-- ALERTS
-- ============================================
CREATE TABLE alert_rules (
id SERIAL PRIMARY KEY,
name VARCHAR(200) NOT NULL,
description TEXT,
datasource_id INTEGER REFERENCES data_sources(id) ON DELETE SET NULL,
condition JSONB NOT NULL,
severity alert_severity NOT NULL,
notification_channels JSONB DEFAULT '[]',
is_active BOOLEAN NOT NULL DEFAULT TRUE,
cooldown_minutes INTEGER DEFAULT 30,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_alert_rules_active ON alert_rules(is_active);
CREATE INDEX idx_alert_rules_severity ON alert_rules(severity);
CREATE TABLE alerts (
id BIGSERIAL PRIMARY KEY,
rule_id INTEGER NOT NULL REFERENCES alert_rules(id),
datasource_id INTEGER REFERENCES data_sources(id) ON DELETE SET NULL,
severity alert_severity NOT NULL,
status alert_status NOT NULL DEFAULT 'active',
message TEXT NOT NULL,
detail JSONB DEFAULT '{}',
acknowledged_by INTEGER REFERENCES users(id),
acknowledged_at TIMESTAMP WITH TIME ZONE,
resolved_by INTEGER REFERENCES users(id),
resolved_at TIMESTAMP WITH TIME ZONE,
resolution_note TEXT,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('alerts', 'created_at');
CREATE INDEX idx_alerts_status ON alerts(status);
CREATE INDEX idx_alerts_severity ON alerts(severity);
CREATE INDEX idx_alerts_datasource ON alerts(datasource_id);
CREATE INDEX idx_alerts_created ON alerts(created_at DESC);
-- ============================================
-- AUDIT LOGS
-- ============================================
CREATE TABLE audit_logs (
id BIGSERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id) ON DELETE SET NULL,
action VARCHAR(100) NOT NULL,
resource VARCHAR(100) NOT NULL,
resource_id VARCHAR(100),
detail JSONB DEFAULT '{}',
ip_address INET,
user_agent TEXT,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
SELECT create_hypertable('audit_logs', 'created_at');
CREATE INDEX idx_audit_user ON audit_logs(user_id);
CREATE INDEX idx_audit_action ON audit_logs(action);
CREATE INDEX idx_audit_resource ON audit_logs(resource);
CREATE INDEX idx_audit_created ON audit_logs(created_at DESC);
-- ============================================
-- SYSTEM CONFIGURATION
-- ============================================
CREATE TABLE system_config (
id SERIAL PRIMARY KEY,
key VARCHAR(100) NOT NULL UNIQUE,
value JSONB NOT NULL,
description TEXT,
is_sensitive BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
);
-- Default configurations
INSERT INTO system_config (key, value, description) VALUES
('data_retention_days', '30', 'Number of days to retain time-series data'),
('refresh_interval', '300', 'Default WebSocket refresh interval in seconds'),
('timezone', 'Asia/Shanghai', 'System timezone'),
('max_concurrent_tasks', '5', 'Maximum concurrent collection tasks'),
('api_rate_limit', '100', 'API requests per minute per user'),
('jwt_access_token_expire', '900', 'JWT access token expiration in seconds'),
('jwt_refresh_token_expire', '604800', 'JWT refresh token expiration in seconds');
-- ============================================
-- FUNCTIONS & TRIGGERS
-- ============================================
-- Updated_at trigger function
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Apply updated_at trigger to all tables with updated_at column
CREATE TRIGGER update_users_updated_at BEFORE UPDATE ON users
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_datasources_updated_at BEFORE UPDATE ON data_sources
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_alert_rules_updated_at BEFORE UPDATE ON alert_rules
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_system_config_updated_at BEFORE UPDATE ON system_config
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- ============================================
-- VIEWS
-- ============================================
-- Data source overview
CREATE VIEW datasource_overview AS
SELECT
ds.id,
ds.name,
ds.module,
ds.priority,
ds.frequency_minutes,
ds.is_active,
ds.last_run_at,
ds.last_status,
COUNT(ct.id) FILTER (WHERE ct.status = 'success') as total_success,
COUNT(ct.id) FILTER (WHERE ct.status = 'failed') as total_failed,
MAX(ct.created_at) as last_task_at
FROM data_sources ds
LEFT JOIN collection_tasks ct ON ct.datasource_id = ds.id
GROUP BY ds.id;
-- System health view
CREATE VIEW system_health AS
SELECT
NOW() as check_time,
(SELECT COUNT(*) FROM data_sources WHERE is_active = true) as active_datasources,
(SELECT COUNT(*) FROM collection_tasks WHERE status = 'running') as running_tasks,
(SELECT COUNT(*) FROM alerts WHERE status = 'active') as active_alerts,
(SELECT COUNT(*) FROM users WHERE is_active = true) as active_users;
-- ============================================
-- DATA RETENTION POLICY
-- ============================================
-- Example: Add retention policy for hypertables
-- SELECT add_retention_policy('gpu_clusters', INTERVAL '30 days');
-- SELECT add_retention_policy('collection_tasks', INTERVAL '90 days');
-- SELECT add_retention_policy('audit_logs', INTERVAL '180 days');
-- ============================================
-- ROW LEVEL SECURITY (RLS)
-- ============================================
-- Enable RLS for sensitive tables
ALTER TABLE users ENABLE ROW LEVEL SECURITY;
ALTER TABLE audit_logs ENABLE ROW LEVEL SECURITY;
-- RLS policies
CREATE POLICY "Admins can view all users" ON users
FOR SELECT USING (current_setting('app.current_role', true) IN ('super_admin', 'admin'));
CREATE POLICY "Users can view own data" ON users
FOR SELECT USING (auth.uid() = id);
-- ============================================
-- COMMENTS
-- ============================================
COMMENT ON TABLE gpu_clusters IS 'L1: Global AI GPU cluster distribution data';
COMMENT ON TABLE submarine_cables IS 'L2: Submarine cable infrastructure and landing points';
COMMENT ON TABLE ixp_nodes IS 'L2: Internet Exchange Point nodes and connectivity';
COMMENT ON TABLE model_ecosystem IS 'L1: AI model download statistics and ecosystem data';
COMMENT ON TABLE cloud_infrastructure IS 'L1: Cloud data center capacity and distribution';