Compare commits

..

55 Commits

Author SHA1 Message Date
linkong
2015ab79bd feat: enrich earth bgp event visualization 2026-03-27 17:26:17 +08:00
linkong
755729ee5e fix: make earth satellite and cable toggles fully unload 2026-03-27 17:11:07 +08:00
linkong
7a3ca6e1b3 fix: refine treemap sizing and add earth bgp collectors 2026-03-27 16:35:40 +08:00
linkong
62f2d9f403 fix: polish earth legend and info panel interactions 2026-03-27 16:01:12 +08:00
linkong
b448a1e560 docs: refresh quick start commands 2026-03-27 15:30:08 +08:00
linkong
2cc0c9412c fix: narrow vite dep scan entries 2026-03-27 15:13:36 +08:00
linkong
3dd210a3e5 feat: refine collected data overview and admin navigation 2026-03-27 15:08:45 +08:00
linkong
a761dfc5fb style: refine earth legend item presentation 2026-03-27 14:30:28 +08:00
linkong
7ec9586f7a chore: add earth hud backup and icon assets 2026-03-27 14:30:12 +08:00
linkong
b0058edf17 feat: add bgp observability and admin ui improvements 2026-03-27 14:27:07 +08:00
linkong
bf2c4a172d fix: upgrade startup script controls 2026-03-27 11:13:01 +08:00
linkong
30a29a6e34 fix: redesign earth hud interactions and legend behavior 2026-03-26 17:58:03 +08:00
linkong
ab09f0ba78 fix: polish earth toolbar controls and loading copy 2026-03-26 14:04:57 +08:00
linkong
7b53cf9a06 Enhance Earth interaction and bump version to 0.21.0 2026-03-26 11:09:57 +08:00
linkong
a04f4f9e67 Bump version to 0.20.0 and add changelog 2026-03-26 10:41:46 +08:00
linkong
ce5feba3b9 Stabilize Earth module and fix satellite TLE handling 2026-03-26 10:29:50 +08:00
linkong
3fd6cbb6f7 Add version history and bump project version to 0.19.0 2026-03-25 17:36:18 +08:00
linkong
020c1d5051 Refine data management and collection workflows 2026-03-25 17:19:10 +08:00
linkong
cc5f16f8a7 Fix settings layout and frontend startup checks 2026-03-25 10:42:10 +08:00
rayd1o
ef0fefdfc7 feat: persist system settings and refine admin layouts 2026-03-25 02:57:58 +08:00
linkong
81a0ca5e7a fix(satellites): fix back-facing detection with proper coordinate transform 2026-03-24 12:10:52 +08:00
linkong
b57d69c98b fix(satellites): remove debug console.log for ring create/update
Also ensures back-facing satellite selection prevention is in place
2026-03-24 11:40:28 +08:00
linkong
b9fbacade7 fix(satellites): prevent selecting satellites on far side of earth
- Add isSatelliteFrontFacing() to detect if satellite is on visible side
- Filter satellites in hover and click handlers by front-facing check
- Apply same logic as cables for consistent back-face culling
2026-03-24 10:44:06 +08:00
linkong
543fe35fbb fix(satellites): fix ring size attenuation and breathing animation
- Add sizeAttenuation: false to sprite materials for fixed ring size
- Move breathing animation parameters to SATELLITE_CONFIG constants
- Export updateBreathingPhase function to avoid ES module binding issues
- Adjust breathing speed and amplitude for better visual effect
2026-03-23 17:41:27 +08:00
rayd1o
1784c057e5 feat(earth): add predicted orbit display for locked satellites
- Calculate orbital period from meanMotion
- Generate predicted orbit points with 10s sampling
- Show complete orbit line when satellite is locked
- Hide orbit when satellite is unlocked
- Color gradient: bright (current) to dark (end)
- Fix TLE epoch format issue with fallback circle orbit
- Add visibility change handler to clear trails on page hide
- Fix satellite count display after loading
- Merge predicted-orbit plan into single file
2026-03-23 05:41:44 +08:00
rayd1o
465129eec7 fix(satellites): use timestamp-based trail filtering to prevent flash
- Changed trail data structure to {pos, time} with Date.now() timestamp
- Replaced length-based filtering with time-based filtering (5 second window)
- Trail now naturally clears when page returns to foreground
- No more ugly frame-skipping or visibilitychange workarounds

Build: passes
2026-03-23 03:56:45 +08:00
rayd1o
0c950262d3 fix(earth): fix satellite trail origin line and sync button state
- Fill unfilled trail points with satellite position instead of (0,0,0)
- Update toggle-satellites button state after auto-show on init
- Remove trailsReady flag since it's no longer needed
2026-03-21 05:10:59 +08:00
rayd1o
eabdbdc85a fix(earth): clear lock state when hiding satellites or cables 2026-03-21 04:50:05 +08:00
rayd1o
af29e90cb0 fix(earth): prevent cable hover/click interaction when cables are hidden 2026-03-21 04:41:20 +08:00
rayd1o
d9a64f7768 fix(frontend): fix iframe scrollbar issue by using 100% instead of 100vw/vh and setting html/body/root to 100% height 2026-03-21 04:10:33 +08:00
rayd1o
78bb639a83 feat(earth): toolbar zoom improvements and toggle-cables
- Remove zoom slider, implement click/hold zoom behavior (+/- buttons)
- Add 10% step on click, 1% continuous on hold
- Add box-sizing/padding normalization to toolbar buttons
- Add toggle-cables functionality with visibility state
- Fix breathing effect: faster pulse (0.008), wider opacity range (0.2-1.0)
- Fix slider null error in updateZoomDisplay
- Set satellite default to hidden
2026-03-21 02:26:41 +08:00
linkong
96222b9e4c feat(earth): refactor toolbar layout, improve cable breathing effect
- Restructure right-toolbar-group with zoom-toolbar and control-toolbar
- Add reset button to zoom-toolbar
- Change collapse toggle to arrow icon
- Improve cable breathing effect opacity range
- Adjust toolbar sizing and spacing
2026-03-20 16:34:00 +08:00
linkong
3fcbae55dc feat(earth): add cable-landing point relation via city_id
Backend:
- Fix arcgis_landing collector to extract city_id
- Fix arcgis_relation collector to extract city_id
- Fix convert_landing_point_to_geojson to use city_id mapping

Frontend:
- Update landing point cableNames to use array
- Add applyLandingPointVisualState for cable lock highlight
- Dim all landing points when satellite is locked
2026-03-20 15:45:02 +08:00
linkong
3e3090d72a docs: add architecture refactor and webgl instancing plans 2026-03-20 13:53:36 +08:00
rayd1o
4f922f13d1 refactor(earth): extract satellite config to SATELLITE_CONFIG constants 2026-03-19 18:00:22 +08:00
rayd1o
bb6b18fe3b feat(earth): satellite dot rendering with hover/lock rings, dim cables when satellite locked
- Change satellite points from squares to circular dots
- Add hover ring (white) and lock ring (yellow) for satellites
- Fix satellite hover/lock ring state management
- Dim all cables when satellite is locked
- Increase MAX_SATELLITES to 2000
- Fix satIntersects scoping bug
2026-03-19 17:41:53 +08:00
rayd1o
0ecc1bc537 feat(earth): cable state management, hover/lock visual separation, fix isSameCable undefined bug 2026-03-19 16:46:40 +08:00
rayd1o
869d661a94 refactor(earth): abstract cable highlight logic with applyCableVisualState() 2026-03-19 15:55:32 +08:00
rayd1o
d18e400fcb refactor(earth): remove dead code - setupMouseControls, getSelectedSatellite, updateCableDetails 2026-03-19 14:22:03 +08:00
rayd1o
6fabbcfe5c feat(earth): request geolocation on resetView, fallback to China 2026-03-19 12:49:38 +08:00
rayd1o
1189fec014 feat(earth): init view to China coordinates 2026-03-19 12:48:25 +08:00
rayd1o
82f7aa29a6 refactor: 提取地球坐标常量到EARTH_CONFIG
- 添加tilt、chinaLat、chinaLon、latCoefficient等常量
- earth.js和controls.js使用常量替代硬编码
- 离开地球时隐藏tooltip
2026-03-19 12:42:08 +08:00
rayd1o
777891f865 fix: 修复resetView视角和离开地球隐藏tooltip 2026-03-19 12:13:55 +08:00
rayd1o
c2eba54da0 refactor: 整理资源文件,添加legacy路由
- 将原版文件移到frontend/legacy/3dearthmult/
- 纹理文件移到frontend/public/earth/assets/
- vite.config添加/legacy/earth路由支持
- earth.js纹理路径改为assets/
2026-03-19 11:10:33 +08:00
rayd1o
f50830712c feat: 自动旋转按钮改为播放/暂停图标状态 2026-03-19 09:49:37 +08:00
rayd1o
e21b783bef fix: 修复arcgis_landing解析GeoJSON坐标格式错误
- geometry.x/y 改为 geometry.coordinates[0]/[1]
- 修复后912个登陆点正确存储
2026-03-19 09:31:38 +08:00
rayd1o
11a9dda942 refactor: 统一启动脚本到planet.sh,修复resetView调用
- 新增planet.sh统一管理start/stop/restart/health/log命令
- docker-compose.yml只保留postgres和redis
- controls.js点击事件调用resetView函数
2026-03-18 18:09:12 +08:00
rayd1o
3b0e9dec5a feat: 统一卫星和线缆锁定逻辑,使用lockedObject系统
- 添加lockedObject和lockedObjectType统一管理锁定状态
- 点击任一对象自动清除之前的锁定
- 修复悬停/锁定优先级逻辑
- 修复坐标映射worldToLocal问题
- 添加bun.lock用于bun包管理
2026-03-18 10:20:23 +08:00
rayd1o
c82e1d5a04 fix: 修复3D地球坐标映射多个严重bug
## Bug修复详情

### 1. 致命错误:球面距离计算 (calculateDistance)
- 问题:使用勾股定理计算经纬度距离,在球体表面完全错误
- 修复:改用Haversine公式计算球面大圆距离
- 影响:赤道1度=111km,极地1度=19km,原计算误差巨大

### 2. 经度范围规范化 (vector3ToLatLon)
- 问题:Math.atan2返回[-180°,180°],转换后可能超出标准范围
- 修复:添加while循环规范化到[-180, 180]区间
- 影响:避免本初子午线附近返回360°的异常值

### 3. 屏幕坐标转换支持非全屏 (screenToEarthCoords)
- 问题:假设Canvas永远全屏,非全屏时点击偏移严重
- 修复:新增domElement参数,使用getBoundingClientRect()计算相对坐标
- 影响:嵌入式3D地球组件也能精准拾取

### 4. 地球旋转时经纬度映射错误
- 问题:Raycaster返回世界坐标,未考虑地球自转
- 修复:使用earth.worldToLocal()转换到本地坐标空间
- 影响:地球旋转时经纬度显示正确跟随

## 新增功能

- CelesTrak卫星数据采集器
- Space-Track卫星数据采集器
- 卫星可视化模块(500颗,实时SGP4轨道计算)
- 海底光缆悬停显示info-card
- 统一info-card组件
- 工具栏按钮(Stellarium风格)
- 缩放控制(百分比显示)
- Docker volume映射(代码热更新)

## 文件变更

- utils.js: 坐标转换核心逻辑修复
- satellites.js: 新增卫星可视化
- cables.js: 悬停交互支持
- main.js: 悬停/锁定逻辑
- controls.js: 工具栏UI
- info-card.js: 统一卡片组件
- docker-compose.yml: volume映射
- restart.sh: 简化重启脚本
2026-03-17 04:10:24 +08:00
rayd1o
02991730e5 fix: add cable_id to API response for cable highlighting 2026-03-13 16:23:45 +08:00
rayd1o
4e487b315a upload new geo json 2026-03-13 16:09:44 +08:00
rayd1o
948af2c88f Fix: coordinates-display position 2026-03-13 13:52:25 +08:00
rayd1o
b06cb4606f Fix: remove ignored files from tracking 2026-03-13 10:55:00 +08:00
rayd1o
de32552159 feat: add data sources config system and Earth API integration
- Add data_sources.yaml for configurable data source URLs
- Add data_sources.py to load config with database override support
- Add arcgis_landing_points and arcgis_cable_landing_relation collectors
- Change visualization API to query arcgis_landing_points
- Add /api/v1/datasources/configs/all endpoint
- Update Earth to fetch from API instead of static files
- Fix scheduler collector ID mappings
2026-03-13 10:54:02 +08:00
rayd1o
99771a88c5 feat(config): make ArcGIS data source URLs configurable
- Add ARCGIS_CABLE_URL, ARCGIS_LANDING_POINT_URL, ARCGIS_CABLE_LANDING_RELATION_URL to config
- Use @property to read URL from settings in collectors
- URLs can now be configured via environment variables
2026-03-12 17:08:18 +08:00
186 changed files with 18060 additions and 5263 deletions

25
.env
View File

@@ -1,25 +0,0 @@
# Database
POSTGRES_SERVER=localhost
POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DB=planet_db
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/planet_db
# Redis
REDIS_SERVER=localhost
REDIS_PORT=6379
REDIS_URL=redis://localhost:6379/0
# Security
SECRET_KEY=your-secret-key-change-in-production
ALGORITHM=HS256
ACCESS_TOKEN_EXPIRE_MINUTES=15
REFRESH_TOKEN_EXPIRE_DAYS=7
# API
API_V1_STR=/api/v1
PROJECT_NAME="Intelligent Planet Plan"
VERSION=1.0.0
# CORS
CORS_ORIGINS=["http://localhost:3000", "http://localhost:8000"]

2
.gitignore vendored
View File

@@ -41,6 +41,8 @@ MANIFEST
venv/ venv/
ENV/ ENV/
env/ env/
.uv/
.uv-cache/
.ruff_cache/ .ruff_cache/
*.db *.db
*.sqlite *.sqlite

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.14

View File

@@ -0,0 +1,165 @@
# 地球3D可视化架构重构计划
## 背景
当前 `frontend/public/earth` 3D地球可视化系统基于 Three.js 构建,未来需要迁移到 Unreal Engine (Cesium)。为降低迁移成本,需要提前做好**逻辑与渲染分离**的架构设计。
## 目标
- 将线缆高亮逻辑与渲染实现分离
- 保持交互逻辑可复用,只需重写渲染层
- 为后续迁移到 UE/Cesium 做好准备
## 已完成
### 1. 状态枚举定义 (constants.js)
```javascript
export const CABLE_STATE = {
NORMAL: 'normal',
HOVERED: 'hovered',
LOCKED: 'locked'
};
```
### 2. 线缆状态管理 (cables.js - 数据层)
```javascript
const cableStates = new Map();
export function getCableState(cableId) { ... }
export function setCableState(cableId, state) { ... }
export function clearAllCableStates() { ... }
export function getCableStateInfo() { ... }
```
### 3. 逻辑层调用 (main.js)
```javascript
// 悬停
setCableState(cable.userData.cableId, CABLE_STATE.HOVERED);
// 锁定
setCableState(cableId, CABLE_STATE.LOCKED);
// 恢复
setCableState(cableId, CABLE_STATE.NORMAL);
clearAllCableStates();
// 清除锁定时
clearLockedObject() {
hoveredCable = null;
clearAllCableStates();
...
}
```
### 4. 渲染层 (main.js - applyCableVisualState)
```javascript
function applyCableVisualState() {
const allCables = getCableLines();
const pulse = (Math.sin(Date.now() * CABLE_CONFIG.pulseSpeed) + 1) * 0.5;
allCables.forEach(c => {
const cableId = c.userData.cableId;
const state = getCableState(cableId);
switch (state) {
case CABLE_STATE.LOCKED:
// 呼吸效果 + 白色
c.material.opacity = CABLE_CONFIG.lockedOpacityMin + pulse * CABLE_CONFIG.pulseCoefficient;
c.material.color.setRGB(1, 1, 1);
break;
case CABLE_STATE.HOVERED:
// 白色高亮
c.material.opacity = 1;
c.material.color.setRGB(1, 1, 1);
break;
case CABLE_STATE.NORMAL:
default:
if (lockedObjectType === 'cable' && lockedObject) {
// 其他线缆变暗
c.material.opacity = CABLE_CONFIG.otherOpacity;
...
} else {
// 恢复原始
c.material.opacity = 1;
c.material.color.setHex(c.userData.originalColor);
}
}
});
}
```
## 待完成
### Phase 1: 完善状态配置 (constants.js)
```javascript
export const CABLE_CONFIG = {
lockedOpacityMin: 0.6,
lockedOpacityMax: 1.0,
otherOpacity: 0.5,
otherBrightness: 0.6,
pulseSpeed: 0.003,
pulseCoefficient: 0.4,
// 未来可扩展
// lockedLineWidth: 3,
// normalLineWidth: 1,
};
```
### Phase 2: 卫星状态管理 (satellites.js)
参考线缆状态管理,为卫星添加类似的状态枚举和状态管理函数:
```javascript
export const SATELLITE_STATE = {
NORMAL: 'normal',
HOVERED: 'hovered',
LOCKED: 'locked'
};
```
#### 卫星数据源说明
- **当前使用**: CelesTrak (https://celestrak.org) - 免费,无需认证
- **后续计划**: Space-Track.org (https://space-track.org) - 需要认证,数据更权威
- 迁移时只需修改 `satellites.js` 中的数据获取逻辑,状态管理和渲染逻辑不变
### Phase 3: 统一渲染接口
将所有对象的渲染逻辑抽象为一个统一的渲染函数:
```javascript
function applyObjectVisualState() {
applyCableVisualState();
applySatelliteVisualState();
applyLandingPointVisualState();
}
```
### Phase 4: UE 迁移准备
迁移到 Unreal Engine 时:
1. 保留 `constants.js` 中的枚举和配置
2. 保留 `cables.js` 中的数据层和状态管理
3. 保留 `main.js` 中的交互逻辑
4. **仅重写** `applyCableVisualState()` 等渲染函数
---
## 架构原则
1. **状态与渲染分离** - 对象状态由数据层管理,渲染层只负责根据状态更新视觉效果
2. **逻辑可复用** - 交互逻辑(点击、悬停、锁定)在迁移时应直接复用
3. **渲染可替换** - 渲染实现可以针对不同引擎重写,不影响逻辑层
## 文件变更记录
| 日期 | 文件 | 变更 |
|------|------|------|
| 2026-03-19 | constants.js | 新增 CABLE_STATE 枚举 |
| 2026-03-19 | cables.js | 新增状态管理函数 |
| 2026-03-19 | main.js | 使用状态管理,抽象 applyCableVisualState() |

View File

@@ -0,0 +1,136 @@
# 卫星预测轨道显示功能
## TL;DR
> 锁定卫星时显示绕地球完整一圈的预测轨道轨迹,从当前位置向外渐变消失
## Context
### 目标
点击锁定卫星 → 显示该卫星绕地球一周的完整预测轨道(而非当前的历史轨迹)
### 当前实现
- `TRAIL_LENGTH = 30` - 历史轨迹点数,每帧 push 当前位置
- 显示最近30帧历史轨迹类似彗星尾巴
### 参考: SatelliteMap.space
- 锁定时显示预测轨道
- 颜色从当前位置向外渐变消失
- 使用 satellite.js与本项目相同
## 实现状态
### ✅ 已完成
- [x] 计算卫星轨道周期(基于 `meanMotion`
- [x] 生成预测轨道点10秒采样间隔
- [x] 创建独立预测轨道渲染对象
- [x] 锁定卫星时显示预测轨道
- [x] 解除锁定时隐藏预测轨道
- [x] 颜色渐变:当前位置(亮) → 轨道终点(暗)
- [x] 页面隐藏时清除轨迹(防止切回时闪现)
### 🚧 进行中
- [ ] 完整圆环轨道(部分卫星因 SGP4 计算问题使用 fallback 圆形轨道)
- [ ] 每颗卫星只显示一条轨道
## 技术细节
### 轨道周期计算
```javascript
function calculateOrbitalPeriod(meanMotion) {
return 86400 / meanMotion;
}
```
### 预测轨道计算
```javascript
function calculatePredictedOrbit(satellite, periodSeconds, sampleInterval = 10) {
const points = [];
const samples = Math.ceil(periodSeconds / sampleInterval);
const now = new Date();
// Full orbit: from now to now+period
for (let i = 0; i <= samples; i++) {
const time = new Date(now.getTime() + i * sampleInterval * 1000);
const pos = computeSatellitePosition(satellite, time);
if (pos) points.push(pos);
}
// Fallback: 如果真实位置计算点太少,使用圆形 fallback
if (points.length < samples * 0.5) {
points.length = 0;
// ... 圆形轨道生成
}
return points;
}
```
### 渲染对象
```javascript
let predictedOrbitLine = null;
export function showPredictedOrbit(satellite) {
hidePredictedOrbit();
// ... 计算并渲染轨道
}
export function hidePredictedOrbit() {
if (predictedOrbitLine) {
earthObjRef.remove(predictedOrbitLine);
predictedOrbitLine.geometry.dispose();
predictedOrbitLine.material.dispose();
predictedOrbitLine = null;
}
}
```
## 已知问题
### 1. TLE 格式问题
`computeSatellitePosition` 使用自行构建的 TLE 格式,对某些卫星返回 null。当前使用 fallback 圆形轨道作为补偿。
### 2. 多条轨道
部分情况下锁定时会显示多条轨道。需要确保 `hidePredictedOrbit()` 被正确调用。
## 性能考虑
### 点数估算
| 卫星类型 | 周期 | 10秒采样 | 点数 |
|---------|------|---------|------|
| LEO | 90分钟 | 540秒 | ~54点 |
| MEO | 12小时 | 4320秒 | ~432点 |
| GEO | 24小时 | 8640秒 | ~864点 |
### 优化策略
- 当前方案(~900点 GEO性能可接受
- 如遇性能问题GEO 降低采样率到 30秒
## 验证方案
### QA Scenarios
**Scenario: 锁定 Starlink 卫星显示预测轨道**
1. 打开浏览器,进入 Earth 页面
2. 显示卫星(点击按钮)
3. 点击一颗 Starlink 卫星(低轨道 LEO
4. 验证:出现黄色预测轨道线,从卫星向外绕行
5. 验证:颜色从亮黄渐变到暗蓝
6. 验证:轨道完整闭环
**Scenario: 锁定 GEO 卫星显示预测轨道**
1. 筛选一颗 GEO 卫星(倾斜角 0-10° 或高轨道)
2. 点击锁定
3. 验证:显示完整 24 小时轨道(或 fallback 圆形轨道)
4. 验证:点数合理(~864点或 fallback
**Scenario: 解除锁定隐藏预测轨道**
1. 锁定一颗卫星,显示预测轨道
2. 点击地球空白处解除锁定
3. 验证:预测轨道消失
**Scenario: 切换页面后轨迹不闪现**
1. 锁定一颗卫星
2. 切换到其他标签页
3. 等待几秒
4. 切回页面
5. 验证:轨迹不突然闪现累积

View File

@@ -0,0 +1,293 @@
# WebGL Instancing 卫星渲染优化计划
## 背景
当前 `satellites.js` 使用 `THREE.Points` 渲染卫星,受限于 WebGL 点渲染性能,只能显示 ~500-1000 颗卫星。
需要迁移到真正的 WebGL Instancing 以支持 5000+ 卫星流畅渲染。
## 技术选型
| 方案 | 性能 | 改动量 | 维护性 | 推荐 |
|------|------|--------|--------|------|
| THREE.Points (现状) | ★★☆ | - | - | 基准 |
| THREE.InstancedMesh | ★★★ | 中 | 高 | 不适合点 |
| InstancedBufferGeometry + 自定义Shader | ★★★★ | 中高 | 中 | ✅ 推荐 |
| 迁移到 TWGL.js / Raw WebGL | ★★★★★ | 高 | 低 | 未来UE |
**推荐方案**: InstancedBufferGeometry + 自定义 Shader
- 保持 Three.js 架构
- 复用 satellite.js 数据层
- 性能接近原生 WebGL
---
## Phase 1: 调研与原型
### 1.1 分析现有架构
**现状 (satellites.js)**:
```javascript
// 创建点云
const pointsGeometry = new THREE.BufferGeometry();
pointsGeometry.setAttribute('position', new THREE.BufferAttribute(positions, 3));
pointsGeometry.setAttribute('color', new THREE.BufferAttribute(colors, 3));
const pointsMaterial = new THREE.PointsMaterial({
size: 2,
vertexColors: true,
transparent: true,
opacity: 0.8,
sizeAttenuation: true
});
satellitePoints = new THREE.Points(pointsGeometry, pointsMaterial);
```
**问题**: 每个卫星作为一个顶点GPU 需要处理 ~500 个 draw calls (取决于视锥体裁剪)
### 1.2 Instanced Rendering 原理
```javascript
// 目标:单次 draw call 渲染所有卫星
// 每个卫星属性:
// - position (vec3): 位置
// - color (vec3): 颜色
// - size (float): 大小 (可选)
// - selected (float): 是否选中 (0/1)
// 使用 InstancedBufferGeometry
const geometry = new THREE.InstancedBufferGeometry();
geometry.index = originalGeometry.index;
geometry.attributes.position = originalGeometry.attributes.position;
geometry.attributes.uv = originalGeometry.attributes.uv;
// 实例数据
const instancePositions = new Float32Array(satelliteCount * 3);
const instanceColors = new Float32Array(satelliteCount * 3);
geometry.setAttribute('instancePosition',
new THREE.InstancedBufferAttribute(instancePositions, 3));
geometry.setAttribute('instanceColor',
new THREE.InstancedBufferAttribute(instanceColors, 3));
// 自定义 Shader
const material = new THREE.ShaderMaterial({
vertexShader: `
attribute vec3 instancePosition;
attribute vec3 instanceColor;
varying vec3 vColor;
void main() {
vColor = instanceColor;
vec3 transformed = position + instancePosition;
gl_Position = projectionMatrix * modelViewMatrix * vec4(transformed, 1.0);
}
`,
fragmentShader: `
varying vec3 vColor;
void main() {
gl_FragColor = vec4(vColor, 0.8);
}
`
});
```
---
## Phase 2: 实现
### 2.1 创建 instanced-satellites.js
```javascript
// instanced-satellites.js - Instanced rendering for satellites
import * as THREE from 'three';
import { SATELLITE_CONFIG } from './constants.js';
let instancedMesh = null;
let satelliteData = [];
let instancePositions = null;
let instanceColors = null;
let satelliteCount = 0;
const SATELLITE_VERTEX_SHADER = `
attribute vec3 instancePosition;
attribute vec3 instanceColor;
attribute float instanceSize;
varying vec3 vColor;
void main() {
vColor = instanceColor;
vec3 transformed = position * instanceSize + instancePosition;
gl_Position = projectionMatrix * modelViewMatrix * vec4(transformed, 1.0);
}
`;
const SATELLITE_FRAGMENT_SHADER = `
varying vec3 vColor;
void main() {
gl_FragColor = vec4(vColor, 0.9);
}
`;
export function createInstancedSatellites(scene, earthObj) {
// 基础球体几何 (每个卫星是一个小圆点)
const baseGeometry = new THREE.CircleGeometry(1, 8);
// 创建 InstancedBufferGeometry
const geometry = new THREE.InstancedBufferGeometry();
geometry.index = baseGeometry.index;
geometry.attributes.position = baseGeometry.attributes.position;
geometry.attributes.uv = baseGeometry.attributes.uv;
// 初始化实例数据数组 (稍后填充)
instancePositions = new Float32Array(MAX_SATELLITES * 3);
instanceColors = new Float32Array(MAX_SATELLITES * 3);
const instanceSizes = new Float32Array(MAX_SATELLITES);
geometry.setAttribute('instancePosition',
new THREE.InstancedBufferAttribute(instancePositions, 3));
geometry.setAttribute('instanceColor',
new THREE.InstancedBufferAttribute(instanceColors, 3));
geometry.setAttribute('instanceSize',
new THREE.InstancedBufferAttribute(instanceSizes, 1));
const material = new THREE.ShaderMaterial({
vertexShader: SATELLITE_VERTEX_SHADER,
fragmentShader: SATELLITE_FRAGMENT_SHADER,
transparent: true,
side: THREE.DoubleSide
});
instancedMesh = new THREE.Mesh(geometry, material);
instancedMesh.frustumCulled = false; // 我们自己处理裁剪
scene.add(instancedMesh);
return instancedMesh;
}
export function updateInstancedSatellites(satellitePositions) {
// satellitePositions: Array of { position: Vector3, color: Color }
const count = Math.min(satellitePositions.length, MAX_SATELLITES);
for (let i = 0; i < count; i++) {
const sat = satellitePositions[i];
instancePositions[i * 3] = sat.position.x;
instancePositions[i * 3 + 1] = sat.position.y;
instancePositions[i * 3 + 2] = sat.position.z;
instanceColors[i * 3] = sat.color.r;
instanceColors[i * 3 + 1] = sat.color.g;
instanceColors[i * 3 + 2] = sat.color.b;
}
instancedMesh.geometry.attributes.instancePosition.needsUpdate = true;
instancedMesh.geometry.attributes.instanceColor.needsUpdate = true;
instancedMesh.geometry.setDrawRange(0, count);
}
```
### 2.2 修改现有 satellites.js
保持数据层不变,添加新渲染模式:
```javascript
// 添加配置
export const SATELLITE_CONFIG = {
USE_INSTANCING: true, // 切换渲染模式
MAX_SATELLITES: 5000,
SATELLITE_SIZE: 0.5,
// ...
};
```
### 2.3 性能优化点
1. **GPU 实例化**: 单次 draw call 渲染所有卫星
2. **批量更新**: 所有位置/颜色一次更新
3. **视锥体裁剪**: 自定义裁剪逻辑,避免 CPU 端逐卫星检测
4. **LOD (可选)**: 远处卫星简化显示
---
## Phase 3: 与现有系统集成
### 3.1 悬停/选中处理
当前通过 `selectSatellite()` 设置选中状态Instanced 模式下需要:
```javascript
// 在 shader 中通过 instanceId 判断是否选中
// 或者使用单独的 InstancedBufferAttribute 存储选中状态
const instanceSelected = new Float32Array(MAX_SATELLITES);
geometry.setAttribute('instanceSelected',
new THREE.InstancedBufferAttribute(instanceSelected, 1));
```
### 3.2 轨迹线
轨迹线仍然使用 `THREE.Line``THREE.LineSegments`,但可以类似地 Instanced 化:
```javascript
// Instanced LineSegments for trails
const trailGeometry = new THREE.InstancedBufferGeometry();
trailGeometry.setAttribute('position', trailPositions);
trailGeometry.setAttribute('instanceStart', ...);
trailGeometry.setAttribute('instanceEnd', ...);
```
---
## Phase 4: 验证与调优
### 4.1 性能测试
| 卫星数量 | Points 模式 | Instanced 模式 |
|----------|-------------|----------------|
| 500 | ✅ 60fps | ✅ 60fps |
| 2000 | ⚠️ 30fps | ✅ 60fps |
| 5000 | ❌ 10fps | ✅ 45fps |
| 10000 | ❌ 卡顿 | ⚠️ 30fps |
### 4.2 可能遇到的问题
1. **Shader 编译错误**: 需要调试 GLSL
2. **实例数量限制**: GPU 最大实例数 (通常 65535)
3. **大小不一**: 需要 per-instance size 属性
4. **透明度排序**: Instanced 渲染透明度处理复杂
---
## 文件变更清单
| 文件 | 变更 |
|------|------|
| `constants.js` | 新增 `SATELLITE_CONFIG` |
| `satellites.js` | 添加 Instanced 模式支持 |
| `instanced-satellites.js` | 新文件 - Instanced 渲染核心 |
| `main.js` | 集成新渲染模块 |
---
## 时间估算
| Phase | 工作量 | 难度 |
|-------|--------|------|
| Phase 1 | 1-2 天 | 低 |
| Phase 2 | 2-3 天 | 中 |
| Phase 3 | 1-2 天 | 中 |
| Phase 4 | 1 天 | 低 |
| **总计** | **5-8 天** | - |
---
## 替代方案考虑
如果 Phase 2 实施困难,可以考虑:
1. **使用 Three.js InstancedMesh**: 适合渲染小型 3D 模型替代点
2. **使用 pointcloud2 格式**: 类似 LiDAR 点云渲染
3. **Web Workers**: 将轨道计算移到 Worker 线程
4. **迁移到 Cesium**: Cesium 原生支持 Instancing且是 UE 迁移的中间步骤

View File

@@ -184,14 +184,20 @@
## 快速启动 ## 快速启动
```bash ```bash
# 启动全部服务 # 启动前后端服务
docker-compose up -d ./planet.sh start
# 仅启后端 # 仅启后端
cd backend && python -m uvicorn app.main:app --reload ./planet.sh restart -b
# 仅启前端 # 仅启前端
cd frontend && npm run dev ./planet.sh restart -f
# 交互创建用户
./planet.sh createuser
# 查看服务状态
./planet.sh health
``` ```
## API 文档 ## API 文档

4
TODO.md Normal file
View File

@@ -0,0 +1,4 @@
# TODO
- [ ] 把 BGP 观测站和异常点的 `hover/click` 手感再磨细一点
- [ ] 开始做 BGP 异常和海缆/区域的关联展示

1
VERSION Normal file
View File

@@ -0,0 +1 @@
0.21.8

View File

@@ -16,4 +16,4 @@ COPY . .
EXPOSE 8000 EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]

View File

@@ -11,6 +11,7 @@ from app.api.v1 import (
settings, settings,
collected_data, collected_data,
visualization, visualization,
bgp,
) )
api_router = APIRouter() api_router = APIRouter()
@@ -27,3 +28,4 @@ api_router.include_router(dashboard.router, prefix="/dashboard", tags=["dashboar
api_router.include_router(alerts.router, prefix="/alerts", tags=["alerts"]) api_router.include_router(alerts.router, prefix="/alerts", tags=["alerts"])
api_router.include_router(settings.router, prefix="/settings", tags=["settings"]) api_router.include_router(settings.router, prefix="/settings", tags=["settings"])
api_router.include_router(visualization.router, prefix="/visualization", tags=["visualization"]) api_router.include_router(visualization.router, prefix="/visualization", tags=["visualization"])
api_router.include_router(bgp.router, prefix="/bgp", tags=["bgp"])

View File

@@ -1,4 +1,4 @@
from datetime import datetime from datetime import UTC, datetime
from typing import Optional from typing import Optional
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
@@ -10,6 +10,7 @@ from app.models.user import User
from app.core.security import get_current_user from app.core.security import get_current_user
from app.models.alert import Alert, AlertSeverity, AlertStatus from app.models.alert import Alert, AlertSeverity, AlertStatus
router = APIRouter() router = APIRouter()
@@ -67,7 +68,7 @@ async def acknowledge_alert(
alert.status = AlertStatus.ACKNOWLEDGED alert.status = AlertStatus.ACKNOWLEDGED
alert.acknowledged_by = current_user.id alert.acknowledged_by = current_user.id
alert.acknowledged_at = datetime.utcnow() alert.acknowledged_at = datetime.now(UTC)
await db.commit() await db.commit()
return {"message": "Alert acknowledged", "alert": alert.to_dict()} return {"message": "Alert acknowledged", "alert": alert.to_dict()}
@@ -88,7 +89,7 @@ async def resolve_alert(
alert.status = AlertStatus.RESOLVED alert.status = AlertStatus.RESOLVED
alert.resolved_by = current_user.id alert.resolved_by = current_user.id
alert.resolved_at = datetime.utcnow() alert.resolved_at = datetime.now(UTC)
alert.resolution_notes = resolution alert.resolution_notes = resolution
await db.commit() await db.commit()

182
backend/app/api/v1/bgp.py Normal file
View File

@@ -0,0 +1,182 @@
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.security import get_current_user
from app.db.session import get_db
from app.models.bgp_anomaly import BGPAnomaly
from app.models.collected_data import CollectedData
from app.models.user import User
router = APIRouter()
BGP_SOURCES = ("ris_live_bgp", "bgpstream_bgp")
def _parse_dt(value: Optional[str]) -> Optional[datetime]:
if not value:
return None
return datetime.fromisoformat(value.replace("Z", "+00:00"))
def _matches_time(value: Optional[datetime], time_from: Optional[datetime], time_to: Optional[datetime]) -> bool:
if value is None:
return False
if time_from and value < time_from:
return False
if time_to and value > time_to:
return False
return True
@router.get("/events")
async def list_bgp_events(
prefix: Optional[str] = Query(None),
origin_asn: Optional[int] = Query(None),
peer_asn: Optional[int] = Query(None),
collector: Optional[str] = Query(None),
event_type: Optional[str] = Query(None),
source: Optional[str] = Query(None),
time_from: Optional[str] = Query(None),
time_to: Optional[str] = Query(None),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
stmt = (
select(CollectedData)
.where(CollectedData.source.in_(BGP_SOURCES))
.order_by(CollectedData.reference_date.desc().nullslast(), CollectedData.id.desc())
)
if source:
stmt = stmt.where(CollectedData.source == source)
result = await db.execute(stmt)
records = result.scalars().all()
dt_from = _parse_dt(time_from)
dt_to = _parse_dt(time_to)
filtered = []
for record in records:
metadata = record.extra_data or {}
if prefix and metadata.get("prefix") != prefix:
continue
if origin_asn is not None and metadata.get("origin_asn") != origin_asn:
continue
if peer_asn is not None and metadata.get("peer_asn") != peer_asn:
continue
if collector and metadata.get("collector") != collector:
continue
if event_type and metadata.get("event_type") != event_type:
continue
if (dt_from or dt_to) and not _matches_time(record.reference_date, dt_from, dt_to):
continue
filtered.append(record)
offset = (page - 1) * page_size
return {
"total": len(filtered),
"page": page,
"page_size": page_size,
"data": [record.to_dict() for record in filtered[offset : offset + page_size]],
}
@router.get("/events/{event_id}")
async def get_bgp_event(
event_id: int,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
record = await db.get(CollectedData, event_id)
if not record or record.source not in BGP_SOURCES:
raise HTTPException(status_code=404, detail="BGP event not found")
return record.to_dict()
@router.get("/anomalies")
async def list_bgp_anomalies(
severity: Optional[str] = Query(None),
anomaly_type: Optional[str] = Query(None),
status: Optional[str] = Query(None),
prefix: Optional[str] = Query(None),
origin_asn: Optional[int] = Query(None),
time_from: Optional[str] = Query(None),
time_to: Optional[str] = Query(None),
page: int = Query(1, ge=1),
page_size: int = Query(50, ge=1, le=200),
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
stmt = select(BGPAnomaly).order_by(BGPAnomaly.created_at.desc(), BGPAnomaly.id.desc())
if severity:
stmt = stmt.where(BGPAnomaly.severity == severity)
if anomaly_type:
stmt = stmt.where(BGPAnomaly.anomaly_type == anomaly_type)
if status:
stmt = stmt.where(BGPAnomaly.status == status)
if prefix:
stmt = stmt.where(BGPAnomaly.prefix == prefix)
if origin_asn is not None:
stmt = stmt.where(BGPAnomaly.origin_asn == origin_asn)
result = await db.execute(stmt)
records = result.scalars().all()
dt_from = _parse_dt(time_from)
dt_to = _parse_dt(time_to)
if dt_from or dt_to:
records = [record for record in records if _matches_time(record.created_at, dt_from, dt_to)]
offset = (page - 1) * page_size
return {
"total": len(records),
"page": page,
"page_size": page_size,
"data": [record.to_dict() for record in records[offset : offset + page_size]],
}
@router.get("/anomalies/summary")
async def get_bgp_anomaly_summary(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
total_result = await db.execute(select(func.count(BGPAnomaly.id)))
type_result = await db.execute(
select(BGPAnomaly.anomaly_type, func.count(BGPAnomaly.id))
.group_by(BGPAnomaly.anomaly_type)
.order_by(func.count(BGPAnomaly.id).desc())
)
severity_result = await db.execute(
select(BGPAnomaly.severity, func.count(BGPAnomaly.id))
.group_by(BGPAnomaly.severity)
.order_by(func.count(BGPAnomaly.id).desc())
)
status_result = await db.execute(
select(BGPAnomaly.status, func.count(BGPAnomaly.id))
.group_by(BGPAnomaly.status)
.order_by(func.count(BGPAnomaly.id).desc())
)
return {
"total": total_result.scalar() or 0,
"by_type": {row[0]: row[1] for row in type_result.fetchall()},
"by_severity": {row[0]: row[1] for row in severity_result.fetchall()},
"by_status": {row[0]: row[1] for row in status_result.fetchall()},
}
@router.get("/anomalies/{anomaly_id}")
async def get_bgp_anomaly(
anomaly_id: int,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
record = await db.get(BGPAnomaly, anomaly_id)
if not record:
raise HTTPException(status_code=404, detail="BGP anomaly not found")
return record.to_dict()

View File

@@ -7,16 +7,138 @@ import json
import csv import csv
import io import io
from app.core.collected_data_fields import get_metadata_field
from app.core.countries import COUNTRY_OPTIONS, get_country_search_variants, normalize_country
from app.core.time import to_iso8601_utc
from app.db.session import get_db from app.db.session import get_db
from app.models.user import User from app.models.user import User
from app.core.security import get_current_user from app.core.security import get_current_user
from app.models.collected_data import CollectedData from app.models.collected_data import CollectedData
from app.models.datasource import DataSource
router = APIRouter() router = APIRouter()
COUNTRY_SQL = "metadata->>'country'"
SEARCHABLE_SQL = [
"name",
"title",
"description",
"source",
"data_type",
"source_id",
"metadata::text",
]
def parse_multi_values(value: Optional[str]) -> list[str]:
if not value:
return []
return [item.strip() for item in value.split(",") if item.strip()]
def build_in_condition(field_sql: str, values: list[str], param_prefix: str, params: dict) -> str:
placeholders = []
for index, value in enumerate(values):
key = f"{param_prefix}_{index}"
params[key] = value
placeholders.append(f":{key}")
return f"{field_sql} IN ({', '.join(placeholders)})"
def build_search_condition(search: Optional[str], params: dict) -> Optional[str]:
if not search:
return None
normalized = search.strip()
if not normalized:
return None
search_terms = [normalized]
for variant in get_country_search_variants(normalized):
if variant.casefold() not in {term.casefold() for term in search_terms}:
search_terms.append(variant)
conditions = []
for index, term in enumerate(search_terms):
params[f"search_{index}"] = f"%{term}%"
conditions.extend(f"{field} ILIKE :search_{index}" for field in SEARCHABLE_SQL)
params["search_exact"] = normalized
params["search_prefix"] = f"{normalized}%"
canonical_variants = get_country_search_variants(normalized)
canonical = canonical_variants[0] if canonical_variants else None
params["country_search_exact"] = canonical or normalized
params["country_search_prefix"] = f"{(canonical or normalized)}%"
return "(" + " OR ".join(conditions) + ")"
def build_search_rank_sql(search: Optional[str]) -> str:
if not search or not search.strip():
return "0"
return """
CASE
WHEN name ILIKE :search_exact THEN 700
WHEN name ILIKE :search_prefix THEN 600
WHEN title ILIKE :search_exact THEN 500
WHEN title ILIKE :search_prefix THEN 400
WHEN metadata->>'country' ILIKE :country_search_exact THEN 380
WHEN metadata->>'country' ILIKE :country_search_prefix THEN 340
WHEN source_id ILIKE :search_exact THEN 350
WHEN source ILIKE :search_exact THEN 300
WHEN data_type ILIKE :search_exact THEN 250
WHEN description ILIKE :search_0 THEN 150
WHEN metadata::text ILIKE :search_0 THEN 100
WHEN title ILIKE :search_0 THEN 80
WHEN name ILIKE :search_0 THEN 60
WHEN source ILIKE :search_0 THEN 40
WHEN data_type ILIKE :search_0 THEN 30
WHEN source_id ILIKE :search_0 THEN 20
ELSE 0
END
"""
def serialize_collected_row(row, source_name_map: dict[str, str] | None = None) -> dict:
metadata = row[7]
source = row[1]
return {
"id": row[0],
"source": source,
"source_name": source_name_map.get(source, source) if source_name_map else source,
"source_id": row[2],
"data_type": row[3],
"name": row[4],
"title": row[5],
"description": row[6],
"country": get_metadata_field(metadata, "country"),
"city": get_metadata_field(metadata, "city"),
"latitude": get_metadata_field(metadata, "latitude"),
"longitude": get_metadata_field(metadata, "longitude"),
"value": get_metadata_field(metadata, "value"),
"unit": get_metadata_field(metadata, "unit"),
"metadata": metadata,
"cores": get_metadata_field(metadata, "cores"),
"rmax": get_metadata_field(metadata, "rmax"),
"rpeak": get_metadata_field(metadata, "rpeak"),
"power": get_metadata_field(metadata, "power"),
"collected_at": to_iso8601_utc(row[8]),
"reference_date": to_iso8601_utc(row[9]),
"is_valid": row[10],
}
async def get_source_name_map(db: AsyncSession) -> dict[str, str]:
result = await db.execute(select(DataSource.source, DataSource.name))
return {row[0]: row[1] for row in result.fetchall()}
@router.get("") @router.get("")
async def list_collected_data( async def list_collected_data(
mode: str = Query("current", description="查询模式: current/history"),
source: Optional[str] = Query(None, description="数据源过滤"), source: Optional[str] = Query(None, description="数据源过滤"),
data_type: Optional[str] = Query(None, description="数据类型过滤"), data_type: Optional[str] = Query(None, description="数据类型过滤"),
country: Optional[str] = Query(None, description="国家过滤"), country: Optional[str] = Query(None, description="国家过滤"),
@@ -27,25 +149,30 @@ async def list_collected_data(
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
"""查询采集的数据列表""" """查询采集的数据列表"""
normalized_country = normalize_country(country) if country else None
source_values = parse_multi_values(source)
data_type_values = parse_multi_values(data_type)
# Build WHERE clause # Build WHERE clause
conditions = [] conditions = []
params = {} params = {}
if source: if mode != "history":
conditions.append("source = :source") conditions.append("COALESCE(is_current, TRUE) = TRUE")
params["source"] = source
if data_type: if source_values:
conditions.append("data_type = :data_type") conditions.append(build_in_condition("source", source_values, "source", params))
params["data_type"] = data_type if data_type_values:
if country: conditions.append(build_in_condition("data_type", data_type_values, "data_type", params))
conditions.append("country = :country") if normalized_country:
params["country"] = country conditions.append(f"{COUNTRY_SQL} = :country")
if search: params["country"] = normalized_country
conditions.append("(name ILIKE :search OR title ILIKE :search)") search_condition = build_search_condition(search, params)
params["search"] = f"%{search}%" if search_condition:
conditions.append(search_condition)
where_sql = " AND ".join(conditions) if conditions else "1=1" where_sql = " AND ".join(conditions) if conditions else "1=1"
search_rank_sql = build_search_rank_sql(search)
# Calculate offset # Calculate offset
offset = (page - 1) * page_size offset = (page - 1) * page_size
@@ -58,11 +185,11 @@ async def list_collected_data(
# Query data # Query data
query = text(f""" query = text(f"""
SELECT id, source, source_id, data_type, name, title, description, SELECT id, source, source_id, data_type, name, title, description,
country, city, latitude, longitude, value, unit, metadata, collected_at, reference_date, is_valid,
metadata, collected_at, reference_date, is_valid {search_rank_sql} AS search_rank
FROM collected_data FROM collected_data
WHERE {where_sql} WHERE {where_sql}
ORDER BY collected_at DESC ORDER BY search_rank DESC, collected_at DESC
LIMIT :limit OFFSET :offset LIMIT :limit OFFSET :offset
""") """)
params["limit"] = page_size params["limit"] = page_size
@@ -70,30 +197,11 @@ async def list_collected_data(
result = await db.execute(query, params) result = await db.execute(query, params)
rows = result.fetchall() rows = result.fetchall()
source_name_map = await get_source_name_map(db)
data = [] data = []
for row in rows: for row in rows:
data.append( data.append(serialize_collected_row(row[:11], source_name_map))
{
"id": row[0],
"source": row[1],
"source_id": row[2],
"data_type": row[3],
"name": row[4],
"title": row[5],
"description": row[6],
"country": row[7],
"city": row[8],
"latitude": row[9],
"longitude": row[10],
"value": row[11],
"unit": row[12],
"metadata": row[13],
"collected_at": row[14].isoformat() if row[14] else None,
"reference_date": row[15].isoformat() if row[15] else None,
"is_valid": row[16],
}
)
return { return {
"total": total, "total": total,
@@ -105,21 +213,39 @@ async def list_collected_data(
@router.get("/summary") @router.get("/summary")
async def get_data_summary( async def get_data_summary(
mode: str = Query("current", description="查询模式: current/history"),
source: Optional[str] = Query(None, description="数据源过滤"),
data_type: Optional[str] = Query(None, description="数据类型过滤"),
country: Optional[str] = Query(None, description="国家过滤"),
search: Optional[str] = Query(None, description="搜索名称"),
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
"""获取数据汇总统计""" """获取数据汇总统计"""
where_sql, params = build_where_clause(source, data_type, country, search)
if mode != "history":
where_sql = f"({where_sql}) AND COALESCE(is_current, TRUE) = TRUE"
overall_where_sql = "COALESCE(is_current, TRUE) = TRUE" if mode != "history" else "1=1"
overall_total_result = await db.execute(
text(f"SELECT COUNT(*) FROM collected_data WHERE {overall_where_sql}")
)
overall_total = overall_total_result.scalar() or 0
# By source and data_type # By source and data_type
result = await db.execute( result = await db.execute(
text(""" text(f"""
SELECT source, data_type, COUNT(*) as count SELECT source, data_type, COUNT(*) as count
FROM collected_data FROM collected_data
WHERE {where_sql}
GROUP BY source, data_type GROUP BY source, data_type
ORDER BY source, data_type ORDER BY source, data_type
""") """),
params,
) )
rows = result.fetchall() rows = result.fetchall()
source_name_map = await get_source_name_map(db)
by_source = {} by_source = {}
total = 0 total = 0
@@ -128,31 +254,62 @@ async def get_data_summary(
data_type = row[1] data_type = row[1]
count = row[2] count = row[2]
if source not in by_source: source_key = source_name_map.get(source, source)
by_source[source] = {} if source_key not in by_source:
by_source[source][data_type] = count by_source[source_key] = {}
by_source[source_key][data_type] = count
total += count total += count
# Total by source # Total by source
source_totals = await db.execute( source_totals = await db.execute(
text(""" text(f"""
SELECT source, COUNT(*) as count SELECT source, COUNT(*) as count
FROM collected_data FROM collected_data
WHERE {where_sql}
GROUP BY source GROUP BY source
ORDER BY count DESC ORDER BY count DESC
""") """),
params,
) )
source_rows = source_totals.fetchall() source_rows = source_totals.fetchall()
type_totals = await db.execute(
text(f"""
SELECT data_type, COUNT(*) as count
FROM collected_data
WHERE {where_sql}
GROUP BY data_type
ORDER BY count DESC, data_type
"""),
params,
)
type_rows = type_totals.fetchall()
return { return {
"total_records": total, "total_records": total,
"overall_total_records": overall_total,
"by_source": by_source, "by_source": by_source,
"source_totals": [{"source": row[0], "count": row[1]} for row in source_rows], "source_totals": [
{
"source": row[0],
"source_name": source_name_map.get(row[0], row[0]),
"count": row[1],
}
for row in source_rows
],
"type_totals": [
{
"data_type": row[0],
"count": row[1],
}
for row in type_rows
],
} }
@router.get("/sources") @router.get("/sources")
async def get_data_sources( async def get_data_sources(
mode: str = Query("current", description="查询模式: current/history"),
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
@@ -160,18 +317,25 @@ async def get_data_sources(
result = await db.execute( result = await db.execute(
text(""" text("""
SELECT DISTINCT source FROM collected_data ORDER BY source SELECT DISTINCT source FROM collected_data
""" + ("WHERE COALESCE(is_current, TRUE) = TRUE " if mode != "history" else "") + """
ORDER BY source
""") """)
) )
rows = result.fetchall() rows = result.fetchall()
source_name_map = await get_source_name_map(db)
return { return {
"sources": [row[0] for row in rows], "sources": [
{"source": row[0], "source_name": source_name_map.get(row[0], row[0])}
for row in rows
],
} }
@router.get("/types") @router.get("/types")
async def get_data_types( async def get_data_types(
mode: str = Query("current", description="查询模式: current/history"),
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
@@ -179,7 +343,9 @@ async def get_data_types(
result = await db.execute( result = await db.execute(
text(""" text("""
SELECT DISTINCT data_type FROM collected_data ORDER BY data_type SELECT DISTINCT data_type FROM collected_data
""" + ("WHERE COALESCE(is_current, TRUE) = TRUE " if mode != "history" else "") + """
ORDER BY data_type
""") """)
) )
rows = result.fetchall() rows = result.fetchall()
@@ -196,17 +362,8 @@ async def get_countries(
): ):
"""获取所有国家列表""" """获取所有国家列表"""
result = await db.execute(
text("""
SELECT DISTINCT country FROM collected_data
WHERE country IS NOT NULL AND country != ''
ORDER BY country
""")
)
rows = result.fetchall()
return { return {
"countries": [row[0] for row in rows], "countries": COUNTRY_OPTIONS,
} }
@@ -221,7 +378,6 @@ async def get_collected_data(
result = await db.execute( result = await db.execute(
text(""" text("""
SELECT id, source, source_id, data_type, name, title, description, SELECT id, source, source_id, data_type, name, title, description,
country, city, latitude, longitude, value, unit,
metadata, collected_at, reference_date, is_valid metadata, collected_at, reference_date, is_valid
FROM collected_data FROM collected_data
WHERE id = :id WHERE id = :id
@@ -236,25 +392,8 @@ async def get_collected_data(
detail="数据不存在", detail="数据不存在",
) )
return { source_name_map = await get_source_name_map(db)
"id": row[0], return serialize_collected_row(row, source_name_map)
"source": row[1],
"source_id": row[2],
"data_type": row[3],
"name": row[4],
"title": row[5],
"description": row[6],
"country": row[7],
"city": row[8],
"latitude": row[9],
"longitude": row[10],
"value": row[11],
"unit": row[12],
"metadata": row[13],
"collected_at": row[14].isoformat() if row[14] else None,
"reference_date": row[15].isoformat() if row[15] else None,
"is_valid": row[16],
}
def build_where_clause( def build_where_clause(
@@ -263,19 +402,21 @@ def build_where_clause(
"""Build WHERE clause and params for queries""" """Build WHERE clause and params for queries"""
conditions = [] conditions = []
params = {} params = {}
source_values = parse_multi_values(source)
data_type_values = parse_multi_values(data_type)
if source: if source_values:
conditions.append("source = :source") conditions.append(build_in_condition("source", source_values, "source", params))
params["source"] = source if data_type_values:
if data_type: conditions.append(build_in_condition("data_type", data_type_values, "data_type", params))
conditions.append("data_type = :data_type") normalized_country = normalize_country(country) if country else None
params["data_type"] = data_type
if country: if normalized_country:
conditions.append("country = :country") conditions.append(f"{COUNTRY_SQL} = :country")
params["country"] = country params["country"] = normalized_country
if search: search_condition = build_search_condition(search, params)
conditions.append("(name ILIKE :search OR title ILIKE :search)") if search_condition:
params["search"] = f"%{search}%" conditions.append(search_condition)
where_sql = " AND ".join(conditions) if conditions else "1=1" where_sql = " AND ".join(conditions) if conditions else "1=1"
return where_sql, params return where_sql, params
@@ -283,6 +424,7 @@ def build_where_clause(
@router.get("/export/json") @router.get("/export/json")
async def export_json( async def export_json(
mode: str = Query("current", description="查询模式: current/history"),
source: Optional[str] = Query(None, description="数据源过滤"), source: Optional[str] = Query(None, description="数据源过滤"),
data_type: Optional[str] = Query(None, description="数据类型过滤"), data_type: Optional[str] = Query(None, description="数据类型过滤"),
country: Optional[str] = Query(None, description="国家过滤"), country: Optional[str] = Query(None, description="国家过滤"),
@@ -294,11 +436,12 @@ async def export_json(
"""导出数据为 JSON 格式""" """导出数据为 JSON 格式"""
where_sql, params = build_where_clause(source, data_type, country, search) where_sql, params = build_where_clause(source, data_type, country, search)
if mode != "history":
where_sql = f"({where_sql}) AND COALESCE(is_current, TRUE) = TRUE"
params["limit"] = limit params["limit"] = limit
query = text(f""" query = text(f"""
SELECT id, source, source_id, data_type, name, title, description, SELECT id, source, source_id, data_type, name, title, description,
country, city, latitude, longitude, value, unit,
metadata, collected_at, reference_date, is_valid metadata, collected_at, reference_date, is_valid
FROM collected_data FROM collected_data
WHERE {where_sql} WHERE {where_sql}
@@ -311,27 +454,7 @@ async def export_json(
data = [] data = []
for row in rows: for row in rows:
data.append( data.append(serialize_collected_row(row))
{
"id": row[0],
"source": row[1],
"source_id": row[2],
"data_type": row[3],
"name": row[4],
"title": row[5],
"description": row[6],
"country": row[7],
"city": row[8],
"latitude": row[9],
"longitude": row[10],
"value": row[11],
"unit": row[12],
"metadata": row[13],
"collected_at": row[14].isoformat() if row[14] else None,
"reference_date": row[15].isoformat() if row[15] else None,
"is_valid": row[16],
}
)
json_str = json.dumps({"data": data, "total": len(data)}, ensure_ascii=False, indent=2) json_str = json.dumps({"data": data, "total": len(data)}, ensure_ascii=False, indent=2)
@@ -346,6 +469,7 @@ async def export_json(
@router.get("/export/csv") @router.get("/export/csv")
async def export_csv( async def export_csv(
mode: str = Query("current", description="查询模式: current/history"),
source: Optional[str] = Query(None, description="数据源过滤"), source: Optional[str] = Query(None, description="数据源过滤"),
data_type: Optional[str] = Query(None, description="数据类型过滤"), data_type: Optional[str] = Query(None, description="数据类型过滤"),
country: Optional[str] = Query(None, description="国家过滤"), country: Optional[str] = Query(None, description="国家过滤"),
@@ -357,11 +481,12 @@ async def export_csv(
"""导出数据为 CSV 格式""" """导出数据为 CSV 格式"""
where_sql, params = build_where_clause(source, data_type, country, search) where_sql, params = build_where_clause(source, data_type, country, search)
if mode != "history":
where_sql = f"({where_sql}) AND COALESCE(is_current, TRUE) = TRUE"
params["limit"] = limit params["limit"] = limit
query = text(f""" query = text(f"""
SELECT id, source, source_id, data_type, name, title, description, SELECT id, source, source_id, data_type, name, title, description,
country, city, latitude, longitude, value, unit,
metadata, collected_at, reference_date, is_valid metadata, collected_at, reference_date, is_valid
FROM collected_data FROM collected_data
WHERE {where_sql} WHERE {where_sql}
@@ -409,16 +534,16 @@ async def export_csv(
row[4], row[4],
row[5], row[5],
row[6], row[6],
row[7], get_metadata_field(row[7], "country"),
row[8], get_metadata_field(row[7], "city"),
row[9], get_metadata_field(row[7], "latitude"),
get_metadata_field(row[7], "longitude"),
get_metadata_field(row[7], "value"),
get_metadata_field(row[7], "unit"),
json.dumps(row[7]) if row[7] else "",
to_iso8601_utc(row[8]) or "",
to_iso8601_utc(row[9]) or "",
row[10], row[10],
row[11],
row[12],
json.dumps(row[13]) if row[13] else "",
row[14].isoformat() if row[14] else "",
row[15].isoformat() if row[15] else "",
row[16],
] ]
) )

View File

@@ -1,6 +1,6 @@
"""Dashboard API with caching and optimizations""" """Dashboard API with caching and optimizations"""
from datetime import datetime, timedelta from datetime import UTC, datetime, timedelta
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from sqlalchemy import select, func, text from sqlalchemy import select, func, text
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
@@ -13,6 +13,8 @@ from app.models.alert import Alert, AlertSeverity
from app.models.task import CollectionTask from app.models.task import CollectionTask
from app.core.security import get_current_user from app.core.security import get_current_user
from app.core.cache import cache from app.core.cache import cache
from app.core.time import to_iso8601_utc
# Built-in collectors info (mirrored from datasources.py) # Built-in collectors info (mirrored from datasources.py)
COLLECTOR_INFO = { COLLECTOR_INFO = {
@@ -110,7 +112,7 @@ async def get_stats(
if cached_result: if cached_result:
return cached_result return cached_result
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) today_start = datetime.now(UTC).replace(hour=0, minute=0, second=0, microsecond=0)
# Count built-in collectors # Count built-in collectors
built_in_count = len(COLLECTOR_INFO) built_in_count = len(COLLECTOR_INFO)
@@ -174,7 +176,7 @@ async def get_stats(
"active_datasources": active_datasources, "active_datasources": active_datasources,
"tasks_today": tasks_today, "tasks_today": tasks_today,
"success_rate": round(success_rate, 1), "success_rate": round(success_rate, 1),
"last_updated": datetime.utcnow().isoformat(), "last_updated": to_iso8601_utc(datetime.now(UTC)),
"alerts": { "alerts": {
"critical": critical_alerts, "critical": critical_alerts,
"warning": warning_alerts, "warning": warning_alerts,
@@ -229,10 +231,10 @@ async def get_summary(
summary[module] = { summary[module] = {
"datasources": data["datasources"], "datasources": data["datasources"],
"total_records": 0, # Built-in don't track this in dashboard stats "total_records": 0, # Built-in don't track this in dashboard stats
"last_updated": datetime.utcnow().isoformat(), "last_updated": to_iso8601_utc(datetime.now(UTC)),
} }
response = {"modules": summary, "last_updated": datetime.utcnow().isoformat()} response = {"modules": summary, "last_updated": to_iso8601_utc(datetime.now(UTC))}
cache.set(cache_key, response, expire_seconds=300) cache.set(cache_key, response, expire_seconds=300)

View File

@@ -14,6 +14,7 @@ from app.models.user import User
from app.models.datasource_config import DataSourceConfig from app.models.datasource_config import DataSourceConfig
from app.core.security import get_current_user from app.core.security import get_current_user
from app.core.cache import cache from app.core.cache import cache
from app.core.time import to_iso8601_utc
router = APIRouter() router = APIRouter()
@@ -123,8 +124,8 @@ async def list_configs(
"headers": c.headers, "headers": c.headers,
"config": c.config, "config": c.config,
"is_active": c.is_active, "is_active": c.is_active,
"created_at": c.created_at.isoformat() if c.created_at else None, "created_at": to_iso8601_utc(c.created_at),
"updated_at": c.updated_at.isoformat() if c.updated_at else None, "updated_at": to_iso8601_utc(c.updated_at),
} }
for c in configs for c in configs
], ],
@@ -155,8 +156,8 @@ async def get_config(
"headers": config.headers, "headers": config.headers,
"config": config.config, "config": config.config,
"is_active": config.is_active, "is_active": config.is_active,
"created_at": config.created_at.isoformat() if config.created_at else None, "created_at": to_iso8601_utc(config.created_at),
"updated_at": config.updated_at.isoformat() if config.updated_at else None, "updated_at": to_iso8601_utc(config.updated_at),
} }
@@ -307,3 +308,40 @@ async def test_new_config(
"error": "Connection failed", "error": "Connection failed",
"message": str(e), "message": str(e),
} }
@router.get("/configs/all")
async def list_all_datasources(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""List all data sources: YAML defaults + DB overrides"""
from app.core.data_sources import COLLECTOR_URL_KEYS, get_data_sources_config
config = get_data_sources_config()
db_query = await db.execute(select(DataSourceConfig))
db_configs = {c.name: c for c in db_query.scalars().all()}
result = []
for name, yaml_key in COLLECTOR_URL_KEYS.items():
yaml_url = config.get_yaml_url(name)
db_config = db_configs.get(name)
result.append(
{
"name": name,
"default_url": yaml_url,
"endpoint": db_config.endpoint if db_config else yaml_url,
"is_overridden": db_config is not None and db_config.endpoint != yaml_url
if yaml_url
else db_config is not None,
"is_active": db_config.is_active if db_config else True,
"source_type": db_config.source_type if db_config else "http",
"description": db_config.description
if db_config
else f"Data source from YAML: {yaml_key}",
}
)
return {"total": len(result), "data": result}

View File

@@ -1,127 +1,77 @@
from typing import List, Optional import asyncio
from datetime import datetime from datetime import datetime, timedelta, timezone
from fastapi import APIRouter, Depends, HTTPException, status from typing import Optional
from sqlalchemy import select, func
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.core.time import to_iso8601_utc
from app.core.security import get_current_user
from app.core.data_sources import get_data_sources_config
from app.db.session import get_db from app.db.session import get_db
from app.models.user import User from app.models.collected_data import CollectedData
from app.models.datasource import DataSource from app.models.datasource import DataSource
from app.models.task import CollectionTask from app.models.task import CollectionTask
from app.models.collected_data import CollectedData from app.models.user import User
from app.core.security import get_current_user from app.services.scheduler import get_latest_task_id_for_datasource, run_collector_now, sync_datasource_job
from app.services.collectors.registry import collector_registry
router = APIRouter() router = APIRouter()
COLLECTOR_INFO = {
"top500": {
"id": 1,
"name": "TOP500 Supercomputers",
"module": "L1",
"priority": "P0",
"frequency_hours": 4,
},
"epoch_ai_gpu": {
"id": 2,
"name": "Epoch AI GPU Clusters",
"module": "L1",
"priority": "P0",
"frequency_hours": 6,
},
"huggingface_models": {
"id": 3,
"name": "HuggingFace Models",
"module": "L2",
"priority": "P1",
"frequency_hours": 12,
},
"huggingface_datasets": {
"id": 4,
"name": "HuggingFace Datasets",
"module": "L2",
"priority": "P1",
"frequency_hours": 12,
},
"huggingface_spaces": {
"id": 5,
"name": "HuggingFace Spaces",
"module": "L2",
"priority": "P2",
"frequency_hours": 24,
},
"peeringdb_ixp": {
"id": 6,
"name": "PeeringDB IXP",
"module": "L2",
"priority": "P1",
"frequency_hours": 24,
},
"peeringdb_network": {
"id": 7,
"name": "PeeringDB Networks",
"module": "L2",
"priority": "P2",
"frequency_hours": 48,
},
"peeringdb_facility": {
"id": 8,
"name": "PeeringDB Facilities",
"module": "L2",
"priority": "P2",
"frequency_hours": 48,
},
"telegeography_cables": {
"id": 9,
"name": "Submarine Cables",
"module": "L2",
"priority": "P1",
"frequency_hours": 168,
},
"telegeography_landing": {
"id": 10,
"name": "Cable Landing Points",
"module": "L2",
"priority": "P2",
"frequency_hours": 168,
},
"telegeography_systems": {
"id": 11,
"name": "Cable Systems",
"module": "L2",
"priority": "P2",
"frequency_hours": 168,
},
"arcgis_cables": {
"id": 15,
"name": "ArcGIS Submarine Cables",
"module": "L2",
"priority": "P1",
"frequency_hours": 168,
},
"fao_landing_points": {
"id": 16,
"name": "FAO Landing Points",
"module": "L2",
"priority": "P1",
"frequency_hours": 168,
},
}
ID_TO_COLLECTOR = {info["id"]: name for name, info in COLLECTOR_INFO.items()} def format_frequency_label(minutes: int) -> str:
COLLECTOR_TO_ID = {name: info["id"] for name, info in COLLECTOR_INFO.items()} if minutes % 1440 == 0:
return f"{minutes // 1440}d"
if minutes % 60 == 0:
return f"{minutes // 60}h"
return f"{minutes}m"
def get_collector_name(source_id: str) -> Optional[str]: def is_due_for_collection(datasource: DataSource, now: datetime) -> bool:
if datasource.last_run_at is None:
return True
return datasource.last_run_at + timedelta(minutes=datasource.frequency_minutes) <= now
async def get_datasource_record(db: AsyncSession, source_id: str) -> Optional[DataSource]:
datasource = None
try: try:
numeric_id = int(source_id) datasource = await db.get(DataSource, int(source_id))
if numeric_id in ID_TO_COLLECTOR:
return ID_TO_COLLECTOR[numeric_id]
except ValueError: except ValueError:
pass pass
if source_id in COLLECTOR_INFO:
return source_id if datasource is not None:
return None return datasource
result = await db.execute(
select(DataSource).where(
(DataSource.source == source_id) | (DataSource.collector_class == source_id)
)
)
return result.scalar_one_or_none()
async def get_last_completed_task(db: AsyncSession, datasource_id: int) -> Optional[CollectionTask]:
result = await db.execute(
select(CollectionTask)
.where(CollectionTask.datasource_id == datasource_id)
.where(CollectionTask.completed_at.isnot(None))
.where(CollectionTask.status.in_(("success", "failed", "cancelled")))
.order_by(CollectionTask.completed_at.desc())
.limit(1)
)
return result.scalar_one_or_none()
async def get_running_task(db: AsyncSession, datasource_id: int) -> Optional[CollectionTask]:
result = await db.execute(
select(CollectionTask)
.where(CollectionTask.datasource_id == datasource_id)
.where(CollectionTask.status == "running")
.order_by(CollectionTask.started_at.desc())
.limit(1)
)
return result.scalar_one_or_none()
@router.get("") @router.get("")
@@ -132,80 +82,156 @@ async def list_datasources(
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
query = select(DataSource) query = select(DataSource).order_by(DataSource.module, DataSource.id)
filters = []
if module: if module:
filters.append(DataSource.module == module) query = query.where(DataSource.module == module)
if is_active is not None: if is_active is not None:
filters.append(DataSource.is_active == is_active) query = query.where(DataSource.is_active == is_active)
if priority: if priority:
filters.append(DataSource.priority == priority) query = query.where(DataSource.priority == priority)
if filters:
query = query.where(*filters)
result = await db.execute(query) result = await db.execute(query)
datasources = result.scalars().all() datasources = result.scalars().all()
collector_list = [] collector_list = []
for name, info in COLLECTOR_INFO.items(): config = get_data_sources_config()
is_active_status = collector_registry.is_active(name) for datasource in datasources:
running_task = await get_running_task(db, datasource.id)
running_task_query = ( last_task = await get_last_completed_task(db, datasource.id)
select(CollectionTask) endpoint = await config.get_url(datasource.source, db)
.where(CollectionTask.datasource_id == info["id"]) data_count_result = await db.execute(
.where(CollectionTask.status == "running") select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source)
.order_by(CollectionTask.started_at.desc())
.limit(1)
) )
running_result = await db.execute(running_task_query)
running_task = running_result.scalar_one_or_none()
last_run_query = (
select(CollectionTask)
.where(CollectionTask.datasource_id == info["id"])
.where(CollectionTask.completed_at.isnot(None))
.order_by(CollectionTask.completed_at.desc())
.limit(1)
)
last_run_result = await db.execute(last_run_query)
last_task = last_run_result.scalar_one_or_none()
data_count_query = select(func.count(CollectedData.id)).where(CollectedData.source == name)
data_count_result = await db.execute(data_count_query)
data_count = data_count_result.scalar() or 0 data_count = data_count_result.scalar() or 0
last_run = None last_run_at = datasource.last_run_at or (last_task.completed_at if last_task else None)
if last_task and last_task.completed_at and data_count > 0: last_run = to_iso8601_utc(last_run_at)
last_run = last_task.completed_at.strftime("%Y-%m-%d %H:%M") last_status = datasource.last_status or (last_task.status if last_task else None)
collector_list.append( collector_list.append(
{ {
"id": info["id"], "id": datasource.id,
"name": info["name"], "name": datasource.name,
"module": info["module"], "module": datasource.module,
"priority": info["priority"], "priority": datasource.priority,
"frequency": f"{info['frequency_hours']}h", "frequency": format_frequency_label(datasource.frequency_minutes),
"is_active": is_active_status, "frequency_minutes": datasource.frequency_minutes,
"collector_class": name, "is_active": datasource.is_active,
"collector_class": datasource.collector_class,
"endpoint": endpoint,
"last_run": last_run, "last_run": last_run,
"last_run_at": to_iso8601_utc(last_run_at),
"last_status": last_status,
"last_records_processed": last_task.records_processed if last_task else None,
"data_count": data_count,
"is_running": running_task is not None, "is_running": running_task is not None,
"task_id": running_task.id if running_task else None, "task_id": running_task.id if running_task else None,
"progress": running_task.progress if running_task else None, "progress": running_task.progress if running_task else None,
"phase": running_task.phase if running_task else None,
"records_processed": running_task.records_processed if running_task else None, "records_processed": running_task.records_processed if running_task else None,
"total_records": running_task.total_records if running_task else None, "total_records": running_task.total_records if running_task else None,
} }
) )
if module: return {"total": len(collector_list), "data": collector_list}
collector_list = [c for c in collector_list if c["module"] == module]
if priority:
collector_list = [c for c in collector_list if c["priority"] == priority] @router.post("/trigger-all")
async def trigger_all_datasources(
force: bool = Query(False),
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
result = await db.execute(
select(DataSource)
.where(DataSource.is_active == True)
.order_by(DataSource.module, DataSource.id)
)
datasources = result.scalars().all()
if not datasources:
return {
"status": "noop",
"message": "No active data sources to trigger",
"triggered": [],
"skipped": [],
"failed": [],
}
previous_task_ids: dict[int, Optional[int]] = {}
triggered_sources: list[dict] = []
skipped_sources: list[dict] = []
failed_sources: list[dict] = []
now = datetime.now(timezone.utc)
for datasource in datasources:
running_task = await get_running_task(db, datasource.id)
if running_task is not None:
skipped_sources.append(
{
"id": datasource.id,
"source": datasource.source,
"name": datasource.name,
"reason": "already_running",
"task_id": running_task.id,
}
)
continue
if not force and not is_due_for_collection(datasource, now):
skipped_sources.append(
{
"id": datasource.id,
"source": datasource.source,
"name": datasource.name,
"reason": "within_frequency_window",
"last_run_at": to_iso8601_utc(datasource.last_run_at),
"next_run_at": to_iso8601_utc(
datasource.last_run_at + timedelta(minutes=datasource.frequency_minutes)
),
}
)
continue
previous_task_ids[datasource.id] = await get_latest_task_id_for_datasource(datasource.id)
success = run_collector_now(datasource.source)
if not success:
failed_sources.append(
{
"id": datasource.id,
"source": datasource.source,
"name": datasource.name,
"reason": "trigger_failed",
}
)
continue
triggered_sources.append(
{
"id": datasource.id,
"source": datasource.source,
"name": datasource.name,
"task_id": None,
}
)
for _ in range(20):
await asyncio.sleep(0.1)
pending = [item for item in triggered_sources if item["task_id"] is None]
if not pending:
break
for item in pending:
task_id = await get_latest_task_id_for_datasource(item["id"])
if task_id is not None and task_id != previous_task_ids.get(item["id"]):
item["task_id"] = task_id
return { return {
"total": len(collector_list), "status": "triggered" if triggered_sources else "partial",
"data": collector_list, "message": f"Triggered {len(triggered_sources)} data sources",
"force": force,
"triggered": triggered_sources,
"skipped": skipped_sources,
"failed": failed_sources,
} }
@@ -215,19 +241,24 @@ async def get_datasource(
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
collector_name = get_collector_name(source_id) datasource = await get_datasource_record(db, source_id)
if not collector_name: if not datasource:
raise HTTPException(status_code=404, detail="Data source not found") raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name] config = get_data_sources_config()
endpoint = await config.get_url(datasource.source, db)
return { return {
"id": info["id"], "id": datasource.id,
"name": info["name"], "name": datasource.name,
"module": info["module"], "module": datasource.module,
"priority": info["priority"], "priority": datasource.priority,
"frequency": f"{info['frequency_hours']}h", "frequency": format_frequency_label(datasource.frequency_minutes),
"collector_class": collector_name, "frequency_minutes": datasource.frequency_minutes,
"is_active": collector_registry.is_active(collector_name), "collector_class": datasource.collector_class,
"source": datasource.source,
"endpoint": endpoint,
"is_active": datasource.is_active,
} }
@@ -235,24 +266,32 @@ async def get_datasource(
async def enable_datasource( async def enable_datasource(
source_id: str, source_id: str,
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
): ):
collector_name = get_collector_name(source_id) datasource = await get_datasource_record(db, source_id)
if not collector_name: if not datasource:
raise HTTPException(status_code=404, detail="Data source not found") raise HTTPException(status_code=404, detail="Data source not found")
collector_registry.set_active(collector_name, True)
return {"status": "enabled", "source_id": source_id} datasource.is_active = True
await db.commit()
await sync_datasource_job(datasource.id)
return {"status": "enabled", "source_id": datasource.id}
@router.post("/{source_id}/disable") @router.post("/{source_id}/disable")
async def disable_datasource( async def disable_datasource(
source_id: str, source_id: str,
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
): ):
collector_name = get_collector_name(source_id) datasource = await get_datasource_record(db, source_id)
if not collector_name: if not datasource:
raise HTTPException(status_code=404, detail="Data source not found") raise HTTPException(status_code=404, detail="Data source not found")
collector_registry.set_active(collector_name, False)
return {"status": "disabled", "source_id": source_id} datasource.is_active = False
await db.commit()
await sync_datasource_job(datasource.id)
return {"status": "disabled", "source_id": datasource.id}
@router.get("/{source_id}/stats") @router.get("/{source_id}/stats")
@@ -261,26 +300,19 @@ async def get_datasource_stats(
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
collector_name = get_collector_name(source_id) datasource = await get_datasource_record(db, source_id)
if not collector_name: if not datasource:
raise HTTPException(status_code=404, detail="Data source not found") raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name] result = await db.execute(
source_name = info["name"] select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source)
)
query = select(func.count(CollectedData.id)).where(CollectedData.source == collector_name)
result = await db.execute(query)
total = result.scalar() or 0 total = result.scalar() or 0
if total == 0:
query = select(func.count(CollectedData.id)).where(CollectedData.source == source_name)
result = await db.execute(query)
total = result.scalar() or 0
return { return {
"source_id": source_id, "source_id": datasource.id,
"collector_name": collector_name, "collector_name": datasource.collector_class,
"name": info["name"], "name": datasource.name,
"total_records": total, "total_records": total,
} }
@@ -289,30 +321,36 @@ async def get_datasource_stats(
async def trigger_datasource( async def trigger_datasource(
source_id: str, source_id: str,
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
): ):
collector_name = get_collector_name(source_id) datasource = await get_datasource_record(db, source_id)
if not collector_name: if not datasource:
raise HTTPException(status_code=404, detail="Data source not found") raise HTTPException(status_code=404, detail="Data source not found")
from app.services.scheduler import run_collector_now if not datasource.is_active:
if not collector_registry.is_active(collector_name):
raise HTTPException(status_code=400, detail="Data source is disabled") raise HTTPException(status_code=400, detail="Data source is disabled")
success = run_collector_now(collector_name) previous_task_id = await get_latest_task_id_for_datasource(datasource.id)
success = run_collector_now(datasource.source)
if not success:
raise HTTPException(status_code=500, detail=f"Failed to trigger collector '{datasource.source}'")
if success: task_id = None
return { for _ in range(20):
"status": "triggered", await asyncio.sleep(0.1)
"source_id": source_id, task_id = await get_latest_task_id_for_datasource(datasource.id)
"collector_name": collector_name, if task_id is not None and task_id != previous_task_id:
"message": f"Collector '{collector_name}' has been triggered", break
} if task_id == previous_task_id:
else: task_id = None
raise HTTPException(
status_code=500, return {
detail=f"Failed to trigger collector '{collector_name}'", "status": "triggered",
) "source_id": datasource.id,
"task_id": task_id,
"collector_name": datasource.source,
"message": f"Collector '{datasource.source}' has been triggered",
}
@router.delete("/{source_id}/data") @router.delete("/{source_id}/data")
@@ -321,39 +359,25 @@ async def clear_datasource_data(
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
collector_name = get_collector_name(source_id) datasource = await get_datasource_record(db, source_id)
if not collector_name: if not datasource:
raise HTTPException(status_code=404, detail="Data source not found") raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name] result = await db.execute(
source_name = info["name"] select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source)
)
query = select(func.count(CollectedData.id)).where(CollectedData.source == collector_name)
result = await db.execute(query)
count = result.scalar() or 0 count = result.scalar() or 0
if count == 0: if count == 0:
query = select(func.count(CollectedData.id)).where(CollectedData.source == source_name) return {"status": "success", "message": "No data to clear", "deleted_count": 0}
result = await db.execute(query)
count = result.scalar() or 0
delete_source = source_name
else:
delete_source = collector_name
if count == 0: delete_query = CollectedData.__table__.delete().where(CollectedData.source == datasource.source)
return {
"status": "success",
"message": "No data to clear",
"deleted_count": 0,
}
delete_query = CollectedData.__table__.delete().where(CollectedData.source == delete_source)
await db.execute(delete_query) await db.execute(delete_query)
await db.commit() await db.commit()
return { return {
"status": "success", "status": "success",
"message": f"Cleared {count} records for data source '{info['name']}'", "message": f"Cleared {count} records for data source '{datasource.name}'",
"deleted_count": count, "deleted_count": count,
} }
@@ -361,32 +385,29 @@ async def clear_datasource_data(
@router.get("/{source_id}/task-status") @router.get("/{source_id}/task-status")
async def get_task_status( async def get_task_status(
source_id: str, source_id: str,
task_id: Optional[int] = None,
db: AsyncSession = Depends(get_db), db: AsyncSession = Depends(get_db),
): ):
collector_name = get_collector_name(source_id) datasource = await get_datasource_record(db, source_id)
if not collector_name: if not datasource:
raise HTTPException(status_code=404, detail="Data source not found") raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name] if task_id is not None:
task = await db.get(CollectionTask, task_id)
if not task or task.datasource_id != datasource.id:
raise HTTPException(status_code=404, detail="Task not found")
else:
task = await get_running_task(db, datasource.id)
running_task_query = ( if not task:
select(CollectionTask) return {"is_running": False, "task_id": None, "progress": None, "phase": None, "status": "idle"}
.where(CollectionTask.datasource_id == info["id"])
.where(CollectionTask.status == "running")
.order_by(CollectionTask.started_at.desc())
.limit(1)
)
running_result = await db.execute(running_task_query)
running_task = running_result.scalar_one_or_none()
if not running_task:
return {"is_running": False, "task_id": None, "progress": None}
return { return {
"is_running": True, "is_running": task.status == "running",
"task_id": running_task.id, "task_id": task.id,
"progress": running_task.progress, "progress": task.progress,
"records_processed": running_task.records_processed, "phase": task.phase,
"total_records": running_task.total_records, "records_processed": task.records_processed,
"status": running_task.status, "total_records": task.total_records,
"status": task.status,
} }

View File

@@ -1,13 +1,22 @@
from datetime import UTC, datetime
from typing import Optional from typing import Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, EmailStr
from app.models.user import User from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, EmailStr, Field
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.security import get_current_user from app.core.security import get_current_user
from app.core.time import to_iso8601_utc
from app.db.session import get_db
from app.models.datasource import DataSource
from app.models.system_setting import SystemSetting
from app.models.user import User
from app.services.scheduler import sync_datasource_job
router = APIRouter() router = APIRouter()
default_settings = { DEFAULT_SETTINGS = {
"system": { "system": {
"system_name": "智能星球", "system_name": "智能星球",
"refresh_interval": 60, "refresh_interval": 60,
@@ -29,17 +38,13 @@ default_settings = {
}, },
} }
system_settings = default_settings["system"].copy()
notification_settings = default_settings["notifications"].copy()
security_settings = default_settings["security"].copy()
class SystemSettingsUpdate(BaseModel): class SystemSettingsUpdate(BaseModel):
system_name: str = "智能星球" system_name: str = "智能星球"
refresh_interval: int = 60 refresh_interval: int = Field(default=60, ge=10, le=3600)
auto_refresh: bool = True auto_refresh: bool = True
data_retention_days: int = 30 data_retention_days: int = Field(default=30, ge=1, le=3650)
max_concurrent_tasks: int = 5 max_concurrent_tasks: int = Field(default=5, ge=1, le=50)
class NotificationSettingsUpdate(BaseModel): class NotificationSettingsUpdate(BaseModel):
@@ -51,60 +56,166 @@ class NotificationSettingsUpdate(BaseModel):
class SecuritySettingsUpdate(BaseModel): class SecuritySettingsUpdate(BaseModel):
session_timeout: int = 60 session_timeout: int = Field(default=60, ge=5, le=1440)
max_login_attempts: int = 5 max_login_attempts: int = Field(default=5, ge=1, le=20)
password_policy: str = "medium" password_policy: str = Field(default="medium")
class CollectorSettingsUpdate(BaseModel):
is_active: bool
priority: str = Field(default="P1")
frequency_minutes: int = Field(default=60, ge=1, le=10080)
def merge_with_defaults(category: str, payload: Optional[dict]) -> dict:
merged = DEFAULT_SETTINGS[category].copy()
if payload:
merged.update(payload)
return merged
async def get_setting_record(db: AsyncSession, category: str) -> Optional[SystemSetting]:
result = await db.execute(select(SystemSetting).where(SystemSetting.category == category))
return result.scalar_one_or_none()
async def get_setting_payload(db: AsyncSession, category: str) -> dict:
record = await get_setting_record(db, category)
return merge_with_defaults(category, record.payload if record else None)
async def save_setting_payload(db: AsyncSession, category: str, payload: dict) -> dict:
record = await get_setting_record(db, category)
if record is None:
record = SystemSetting(category=category, payload=payload)
db.add(record)
else:
record.payload = payload
await db.commit()
await db.refresh(record)
return merge_with_defaults(category, record.payload)
def format_frequency_label(minutes: int) -> str:
if minutes % 1440 == 0:
return f"{minutes // 1440}d"
if minutes % 60 == 0:
return f"{minutes // 60}h"
return f"{minutes}m"
def serialize_collector(datasource: DataSource) -> dict:
return {
"id": datasource.id,
"name": datasource.name,
"source": datasource.source,
"module": datasource.module,
"priority": datasource.priority,
"frequency_minutes": datasource.frequency_minutes,
"frequency": format_frequency_label(datasource.frequency_minutes),
"is_active": datasource.is_active,
"last_run_at": to_iso8601_utc(datasource.last_run_at),
"last_status": datasource.last_status,
"next_run_at": to_iso8601_utc(datasource.next_run_at),
}
@router.get("/system") @router.get("/system")
async def get_system_settings(current_user: User = Depends(get_current_user)): async def get_system_settings(
return {"system": system_settings} current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
return {"system": await get_setting_payload(db, "system")}
@router.put("/system") @router.put("/system")
async def update_system_settings( async def update_system_settings(
settings: SystemSettingsUpdate, settings: SystemSettingsUpdate,
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
): ):
global system_settings payload = await save_setting_payload(db, "system", settings.model_dump())
system_settings = settings.model_dump() return {"status": "updated", "system": payload}
return {"status": "updated", "system": system_settings}
@router.get("/notifications") @router.get("/notifications")
async def get_notification_settings(current_user: User = Depends(get_current_user)): async def get_notification_settings(
return {"notifications": notification_settings} current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
return {"notifications": await get_setting_payload(db, "notifications")}
@router.put("/notifications") @router.put("/notifications")
async def update_notification_settings( async def update_notification_settings(
settings: NotificationSettingsUpdate, settings: NotificationSettingsUpdate,
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
): ):
global notification_settings payload = await save_setting_payload(db, "notifications", settings.model_dump())
notification_settings = settings.model_dump() return {"status": "updated", "notifications": payload}
return {"status": "updated", "notifications": notification_settings}
@router.get("/security") @router.get("/security")
async def get_security_settings(current_user: User = Depends(get_current_user)): async def get_security_settings(
return {"security": security_settings} current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
return {"security": await get_setting_payload(db, "security")}
@router.put("/security") @router.put("/security")
async def update_security_settings( async def update_security_settings(
settings: SecuritySettingsUpdate, settings: SecuritySettingsUpdate,
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
): ):
global security_settings payload = await save_setting_payload(db, "security", settings.model_dump())
security_settings = settings.model_dump() return {"status": "updated", "security": payload}
return {"status": "updated", "security": security_settings}
@router.get("/collectors")
async def get_collector_settings(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
result = await db.execute(select(DataSource).order_by(DataSource.module, DataSource.id))
datasources = result.scalars().all()
return {"collectors": [serialize_collector(datasource) for datasource in datasources]}
@router.put("/collectors/{datasource_id}")
async def update_collector_settings(
datasource_id: int,
settings: CollectorSettingsUpdate,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
datasource = await db.get(DataSource, datasource_id)
if not datasource:
raise HTTPException(status_code=404, detail="Data source not found")
datasource.is_active = settings.is_active
datasource.priority = settings.priority
datasource.frequency_minutes = settings.frequency_minutes
await db.commit()
await db.refresh(datasource)
await sync_datasource_job(datasource.id)
return {"status": "updated", "collector": serialize_collector(datasource)}
@router.get("") @router.get("")
async def get_all_settings(current_user: User = Depends(get_current_user)): async def get_all_settings(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
result = await db.execute(select(DataSource).order_by(DataSource.module, DataSource.id))
datasources = result.scalars().all()
return { return {
"system": system_settings, "system": await get_setting_payload(db, "system"),
"notifications": notification_settings, "notifications": await get_setting_payload(db, "notifications"),
"security": security_settings, "security": await get_setting_payload(db, "security"),
"collectors": [serialize_collector(datasource) for datasource in datasources],
"generated_at": to_iso8601_utc(datetime.now(UTC)),
} }

View File

@@ -1,4 +1,4 @@
from datetime import datetime from datetime import UTC, datetime
from typing import Optional from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, status from fastapi import APIRouter, Depends, HTTPException, status
@@ -8,8 +8,10 @@ from sqlalchemy import text
from app.db.session import get_db from app.db.session import get_db
from app.models.user import User from app.models.user import User
from app.core.security import get_current_user from app.core.security import get_current_user
from app.core.time import to_iso8601_utc
from app.services.collectors.registry import collector_registry from app.services.collectors.registry import collector_registry
router = APIRouter() router = APIRouter()
@@ -60,8 +62,8 @@ async def list_tasks(
"datasource_id": t[1], "datasource_id": t[1],
"datasource_name": t[2], "datasource_name": t[2],
"status": t[3], "status": t[3],
"started_at": t[4].isoformat() if t[4] else None, "started_at": to_iso8601_utc(t[4]),
"completed_at": t[5].isoformat() if t[5] else None, "completed_at": to_iso8601_utc(t[5]),
"records_processed": t[6], "records_processed": t[6],
"error_message": t[7], "error_message": t[7],
} }
@@ -99,8 +101,8 @@ async def get_task(
"datasource_id": task[1], "datasource_id": task[1],
"datasource_name": task[2], "datasource_name": task[2],
"status": task[3], "status": task[3],
"started_at": task[4].isoformat() if task[4] else None, "started_at": to_iso8601_utc(task[4]),
"completed_at": task[5].isoformat() if task[5] else None, "completed_at": to_iso8601_utc(task[5]),
"records_processed": task[6], "records_processed": task[6],
"error_message": task[7], "error_message": task[7],
} }
@@ -146,8 +148,8 @@ async def trigger_collection(
"status": result.get("status", "unknown"), "status": result.get("status", "unknown"),
"records_processed": result.get("records_processed", 0), "records_processed": result.get("records_processed", 0),
"error_message": result.get("error"), "error_message": result.get("error"),
"started_at": datetime.utcnow(), "started_at": datetime.now(UTC),
"completed_at": datetime.utcnow(), "completed_at": datetime.now(UTC),
}, },
) )

View File

@@ -1,17 +1,30 @@
"""Visualization API - GeoJSON endpoints for 3D Earth display""" """Visualization API - GeoJSON endpoints for 3D Earth display
from fastapi import APIRouter, HTTPException, Depends Unified API for all visualization data sources.
Returns GeoJSON format compatible with Three.js, CesiumJS, and Unreal Cesium.
"""
from datetime import UTC, datetime
from fastapi import APIRouter, HTTPException, Depends, Query
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select from sqlalchemy import select, func
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from app.core.collected_data_fields import get_record_field
from app.core.satellite_tle import build_tle_lines_from_elements
from app.core.time import to_iso8601_utc
from app.db.session import get_db from app.db.session import get_db
from app.models.bgp_anomaly import BGPAnomaly
from app.models.collected_data import CollectedData from app.models.collected_data import CollectedData
from app.services.cable_graph import build_graph_from_data, CableGraph from app.services.cable_graph import build_graph_from_data, CableGraph
from app.services.collectors.bgp_common import RIPE_RIS_COLLECTOR_COORDS
router = APIRouter() router = APIRouter()
# ============== Converter Functions ==============
def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]: def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
"""Convert cable records to GeoJSON FeatureCollection""" """Convert cable records to GeoJSON FeatureCollection"""
features = [] features = []
@@ -66,6 +79,7 @@ def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
"geometry": {"type": "MultiLineString", "coordinates": all_lines}, "geometry": {"type": "MultiLineString", "coordinates": all_lines},
"properties": { "properties": {
"id": record.id, "id": record.id,
"cable_id": record.name,
"source_id": record.source_id, "source_id": record.source_id,
"Name": record.name, "Name": record.name,
"name": record.name, "name": record.name,
@@ -74,9 +88,9 @@ def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
"rfs": metadata.get("rfs"), "rfs": metadata.get("rfs"),
"RFS": metadata.get("rfs"), "RFS": metadata.get("rfs"),
"status": metadata.get("status", "active"), "status": metadata.get("status", "active"),
"length": record.value, "length": get_record_field(record, "value"),
"length_km": record.value, "length_km": get_record_field(record, "value"),
"SHAPE__Length": record.value, "SHAPE__Length": get_record_field(record, "value"),
"url": metadata.get("url"), "url": metadata.get("url"),
"color": metadata.get("color"), "color": metadata.get("color"),
"year": metadata.get("year"), "year": metadata.get("year"),
@@ -87,33 +101,140 @@ def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
return {"type": "FeatureCollection", "features": features} return {"type": "FeatureCollection", "features": features}
def convert_landing_point_to_geojson(records: List[CollectedData]) -> Dict[str, Any]: def convert_landing_point_to_geojson(records: List[CollectedData], city_to_cable_ids_map: Dict[int, List[int]] = None, cable_id_to_name_map: Dict[int, str] = None) -> Dict[str, Any]:
"""Convert landing point records to GeoJSON FeatureCollection""" features = []
for record in records:
try:
latitude = get_record_field(record, "latitude")
longitude = get_record_field(record, "longitude")
lat = float(latitude) if latitude else None
lon = float(longitude) if longitude else None
except (ValueError, TypeError):
continue
if lat is None or lon is None:
continue
metadata = record.extra_data or {}
city_id = metadata.get("city_id")
props = {
"id": record.id,
"source_id": record.source_id,
"name": record.name,
"country": get_record_field(record, "country"),
"city": get_record_field(record, "city"),
"is_tbd": metadata.get("is_tbd", False),
}
cable_names = []
if city_to_cable_ids_map and city_id in city_to_cable_ids_map:
for cable_id in city_to_cable_ids_map[city_id]:
if cable_id_to_name_map and cable_id in cable_id_to_name_map:
cable_names.append(cable_id_to_name_map[cable_id])
if cable_names:
props["cable_names"] = cable_names
features.append(
{
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [lon, lat]},
"properties": props,
}
)
return {"type": "FeatureCollection", "features": features}
def convert_satellite_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
"""Convert satellite TLE records to GeoJSON"""
features = []
for record in records:
metadata = record.extra_data or {}
norad_id = metadata.get("norad_cat_id")
if not norad_id:
continue
tle_line1 = metadata.get("tle_line1")
tle_line2 = metadata.get("tle_line2")
if not tle_line1 or not tle_line2:
tle_line1, tle_line2 = build_tle_lines_from_elements(
norad_cat_id=norad_id,
epoch=metadata.get("epoch"),
inclination=metadata.get("inclination"),
raan=metadata.get("raan"),
eccentricity=metadata.get("eccentricity"),
arg_of_perigee=metadata.get("arg_of_perigee"),
mean_anomaly=metadata.get("mean_anomaly"),
mean_motion=metadata.get("mean_motion"),
)
features.append(
{
"type": "Feature",
"id": norad_id,
"geometry": {"type": "Point", "coordinates": [0, 0, 0]},
"properties": {
"id": record.id,
"norad_cat_id": norad_id,
"name": record.name,
"international_designator": metadata.get("international_designator"),
"epoch": metadata.get("epoch"),
"inclination": metadata.get("inclination"),
"raan": metadata.get("raan"),
"eccentricity": metadata.get("eccentricity"),
"arg_of_perigee": metadata.get("arg_of_perigee"),
"mean_anomaly": metadata.get("mean_anomaly"),
"mean_motion": metadata.get("mean_motion"),
"bstar": metadata.get("bstar"),
"classification_type": metadata.get("classification_type"),
"tle_line1": tle_line1,
"tle_line2": tle_line2,
"data_type": "satellite_tle",
},
}
)
return {"type": "FeatureCollection", "features": features}
def convert_supercomputer_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
"""Convert TOP500 supercomputer records to GeoJSON"""
features = [] features = []
for record in records: for record in records:
try: try:
lat = float(record.latitude) if record.latitude else None latitude = get_record_field(record, "latitude")
lon = float(record.longitude) if record.longitude else None longitude = get_record_field(record, "longitude")
lat = float(latitude) if latitude and latitude != "0.0" else None
lon = (
float(longitude) if longitude and longitude != "0.0" else None
)
except (ValueError, TypeError): except (ValueError, TypeError):
continue lat, lon = None, None
if lat is None or lon is None:
continue
metadata = record.extra_data or {} metadata = record.extra_data or {}
features.append( features.append(
{ {
"type": "Feature", "type": "Feature",
"geometry": {"type": "Point", "coordinates": [lon, lat]}, "id": record.id,
"geometry": {"type": "Point", "coordinates": [lon or 0, lat or 0]},
"properties": { "properties": {
"id": record.id, "id": record.id,
"source_id": record.source_id,
"name": record.name, "name": record.name,
"country": record.country, "rank": metadata.get("rank"),
"city": record.city, "r_max": get_record_field(record, "rmax"),
"is_tbd": metadata.get("is_tbd", False), "r_peak": get_record_field(record, "rpeak"),
"cores": get_record_field(record, "cores"),
"power": get_record_field(record, "power"),
"country": get_record_field(record, "country"),
"city": get_record_field(record, "city"),
"data_type": "supercomputer",
}, },
} }
) )
@@ -121,6 +242,168 @@ def convert_landing_point_to_geojson(records: List[CollectedData]) -> Dict[str,
return {"type": "FeatureCollection", "features": features} return {"type": "FeatureCollection", "features": features}
def convert_gpu_cluster_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
"""Convert GPU cluster records to GeoJSON"""
features = []
for record in records:
try:
latitude = get_record_field(record, "latitude")
longitude = get_record_field(record, "longitude")
lat = float(latitude) if latitude else None
lon = float(longitude) if longitude else None
except (ValueError, TypeError):
lat, lon = None, None
metadata = record.extra_data or {}
features.append(
{
"type": "Feature",
"id": record.id,
"geometry": {"type": "Point", "coordinates": [lon or 0, lat or 0]},
"properties": {
"id": record.id,
"name": record.name,
"country": get_record_field(record, "country"),
"city": get_record_field(record, "city"),
"metadata": metadata,
"data_type": "gpu_cluster",
},
}
)
return {"type": "FeatureCollection", "features": features}
def convert_bgp_anomalies_to_geojson(records: List[BGPAnomaly]) -> Dict[str, Any]:
features = []
for record in records:
evidence = record.evidence or {}
collectors = evidence.get("collectors") or record.peer_scope or []
if not collectors:
nested = evidence.get("events") or []
collectors = [
str((item or {}).get("collector") or "").strip()
for item in nested
if (item or {}).get("collector")
]
collectors = [collector for collector in collectors if collector]
if not collectors:
collectors = []
collector = collectors[0] if collectors else None
location = None
if collector:
location = RIPE_RIS_COLLECTOR_COORDS.get(str(collector))
if location is None:
nested = evidence.get("events") or []
for item in nested:
collector_name = (item or {}).get("collector")
if collector_name and collector_name in RIPE_RIS_COLLECTOR_COORDS:
location = RIPE_RIS_COLLECTOR_COORDS[collector_name]
collector = collector_name
break
if location is None:
continue
as_path = []
if isinstance(evidence.get("as_path"), list):
as_path = evidence.get("as_path") or []
if not as_path:
nested = evidence.get("events") or []
for item in nested:
candidate_path = (item or {}).get("as_path")
if isinstance(candidate_path, list) and candidate_path:
as_path = candidate_path
break
impacted_regions = []
seen_regions = set()
for collector_name in collectors:
collector_location = RIPE_RIS_COLLECTOR_COORDS.get(str(collector_name))
if not collector_location:
continue
region_key = (
collector_location.get("country"),
collector_location.get("city"),
)
if region_key in seen_regions:
continue
seen_regions.add(region_key)
impacted_regions.append(
{
"collector": collector_name,
"country": collector_location.get("country"),
"city": collector_location.get("city"),
"latitude": collector_location.get("latitude"),
"longitude": collector_location.get("longitude"),
}
)
features.append(
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [location["longitude"], location["latitude"]],
},
"properties": {
"id": record.id,
"collector": collector,
"city": location.get("city"),
"country": location.get("country"),
"source": record.source,
"anomaly_type": record.anomaly_type,
"severity": record.severity,
"status": record.status,
"prefix": record.prefix,
"origin_asn": record.origin_asn,
"new_origin_asn": record.new_origin_asn,
"collectors": collectors,
"collector_count": len(collectors) or 1,
"as_path": as_path,
"impacted_regions": impacted_regions,
"confidence": record.confidence,
"summary": record.summary,
"created_at": to_iso8601_utc(record.created_at),
},
}
)
return {"type": "FeatureCollection", "features": features}
def convert_bgp_collectors_to_geojson() -> Dict[str, Any]:
features = []
for collector, location in sorted(RIPE_RIS_COLLECTOR_COORDS.items()):
features.append(
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [location["longitude"], location["latitude"]],
},
"properties": {
"collector": collector,
"city": location.get("city"),
"country": location.get("country"),
"status": "online",
},
}
)
return {"type": "FeatureCollection", "features": features}
# ============== API Endpoints ==============
@router.get("/geo/cables") @router.get("/geo/cables")
async def get_cables_geojson(db: AsyncSession = Depends(get_db)): async def get_cables_geojson(db: AsyncSession = Depends(get_db)):
"""获取海底电缆 GeoJSON 数据 (LineString)""" """获取海底电缆 GeoJSON 数据 (LineString)"""
@@ -144,19 +427,45 @@ async def get_cables_geojson(db: AsyncSession = Depends(get_db)):
@router.get("/geo/landing-points") @router.get("/geo/landing-points")
async def get_landing_points_geojson(db: AsyncSession = Depends(get_db)): async def get_landing_points_geojson(db: AsyncSession = Depends(get_db)):
"""获取登陆点 GeoJSON 数据 (Point)"""
try: try:
stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points") landing_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
result = await db.execute(stmt) landing_result = await db.execute(landing_stmt)
records = result.scalars().all() records = landing_result.scalars().all()
relation_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cable_landing_relation")
relation_result = await db.execute(relation_stmt)
relation_records = relation_result.scalars().all()
cable_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cables")
cable_result = await db.execute(cable_stmt)
cable_records = cable_result.scalars().all()
city_to_cable_ids_map = {}
for rel in relation_records:
if rel.extra_data:
city_id = rel.extra_data.get("city_id")
cable_id = rel.extra_data.get("cable_id")
if city_id is not None and cable_id is not None:
if city_id not in city_to_cable_ids_map:
city_to_cable_ids_map[city_id] = []
if cable_id not in city_to_cable_ids_map[city_id]:
city_to_cable_ids_map[city_id].append(cable_id)
cable_id_to_name_map = {}
for cable in cable_records:
if cable.extra_data:
cable_id = cable.extra_data.get("cable_id")
cable_name = cable.name
if cable_id and cable_name:
cable_id_to_name_map[cable_id] = cable_name
if not records: if not records:
raise HTTPException( raise HTTPException(
status_code=404, status_code=404,
detail="No landing point data found. Please run the fao_landing_points collector first.", detail="No landing point data found. Please run the arcgis_landing_points collector first.",
) )
return convert_landing_point_to_geojson(records) return convert_landing_point_to_geojson(records, city_to_cable_ids_map, cable_id_to_name_map)
except HTTPException: except HTTPException:
raise raise
except Exception as e: except Exception as e:
@@ -165,14 +474,36 @@ async def get_landing_points_geojson(db: AsyncSession = Depends(get_db)):
@router.get("/geo/all") @router.get("/geo/all")
async def get_all_geojson(db: AsyncSession = Depends(get_db)): async def get_all_geojson(db: AsyncSession = Depends(get_db)):
"""获取所有可视化数据 (电缆 + 登陆点)"""
cables_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cables") cables_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cables")
cables_result = await db.execute(cables_stmt) cables_result = await db.execute(cables_stmt)
cables_records = cables_result.scalars().all() cables_records = cables_result.scalars().all()
points_stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points") points_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
points_result = await db.execute(points_stmt) points_result = await db.execute(points_stmt)
points_records = points_result.scalars().all() points_records = points_result.scalars().all()
relation_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cable_landing_relation")
relation_result = await db.execute(relation_stmt)
relation_records = relation_result.scalars().all()
city_to_cable_ids_map = {}
for rel in relation_records:
if rel.extra_data:
city_id = rel.extra_data.get("city_id")
cable_id = rel.extra_data.get("cable_id")
if city_id is not None and cable_id is not None:
if city_id not in city_to_cable_ids_map:
city_to_cable_ids_map[city_id] = []
if cable_id not in city_to_cable_ids_map[city_id]:
city_to_cable_ids_map[city_id].append(cable_id)
cable_id_to_name_map = {}
for cable in cables_records:
if cable.extra_data:
cable_id = cable.extra_data.get("cable_id")
cable_name = cable.name
if cable_id and cable_name:
cable_id_to_name_map[cable_id] = cable_name
cables = ( cables = (
convert_cable_to_geojson(cables_records) convert_cable_to_geojson(cables_records)
@@ -180,7 +511,7 @@ async def get_all_geojson(db: AsyncSession = Depends(get_db)):
else {"type": "FeatureCollection", "features": []} else {"type": "FeatureCollection", "features": []}
) )
points = ( points = (
convert_landing_point_to_geojson(points_records) convert_landing_point_to_geojson(points_records, city_to_cable_ids_map, cable_id_to_name_map)
if points_records if points_records
else {"type": "FeatureCollection", "features": []} else {"type": "FeatureCollection", "features": []}
) )
@@ -195,6 +526,208 @@ async def get_all_geojson(db: AsyncSession = Depends(get_db)):
} }
@router.get("/geo/satellites")
async def get_satellites_geojson(
limit: Optional[int] = Query(
None,
ge=1,
description="Maximum number of satellites to return. Omit for no limit.",
),
db: AsyncSession = Depends(get_db),
):
"""获取卫星 TLE GeoJSON 数据"""
stmt = (
select(CollectedData)
.where(CollectedData.source == "celestrak_tle")
.where(CollectedData.name != "Unknown")
.order_by(CollectedData.id.desc())
)
if limit is not None:
stmt = stmt.limit(limit)
result = await db.execute(stmt)
records = result.scalars().all()
if not records:
return {"type": "FeatureCollection", "features": [], "count": 0}
geojson = convert_satellite_to_geojson(list(records))
return {
**geojson,
"count": len(geojson.get("features", [])),
}
@router.get("/geo/supercomputers")
async def get_supercomputers_geojson(
limit: int = 500,
db: AsyncSession = Depends(get_db),
):
"""获取 TOP500 超算中心 GeoJSON 数据"""
stmt = (
select(CollectedData)
.where(CollectedData.source == "top500")
.where(CollectedData.name != "Unknown")
.limit(limit)
)
result = await db.execute(stmt)
records = result.scalars().all()
if not records:
return {"type": "FeatureCollection", "features": [], "count": 0}
geojson = convert_supercomputer_to_geojson(list(records))
return {
**geojson,
"count": len(geojson.get("features", [])),
}
@router.get("/geo/gpu-clusters")
async def get_gpu_clusters_geojson(
limit: int = 100,
db: AsyncSession = Depends(get_db),
):
"""获取 GPU 集群 GeoJSON 数据"""
stmt = (
select(CollectedData)
.where(CollectedData.source == "epoch_ai_gpu")
.where(CollectedData.name != "Unknown")
.limit(limit)
)
result = await db.execute(stmt)
records = result.scalars().all()
if not records:
return {"type": "FeatureCollection", "features": [], "count": 0}
geojson = convert_gpu_cluster_to_geojson(list(records))
return {
**geojson,
"count": len(geojson.get("features", [])),
}
@router.get("/geo/bgp-anomalies")
async def get_bgp_anomalies_geojson(
severity: Optional[str] = Query(None),
status: Optional[str] = Query("active"),
limit: int = Query(200, ge=1, le=1000),
db: AsyncSession = Depends(get_db),
):
stmt = select(BGPAnomaly).order_by(BGPAnomaly.created_at.desc()).limit(limit)
if severity:
stmt = stmt.where(BGPAnomaly.severity == severity)
if status:
stmt = stmt.where(BGPAnomaly.status == status)
result = await db.execute(stmt)
records = list(result.scalars().all())
geojson = convert_bgp_anomalies_to_geojson(records)
return {**geojson, "count": len(geojson.get("features", []))}
@router.get("/geo/bgp-collectors")
async def get_bgp_collectors_geojson():
geojson = convert_bgp_collectors_to_geojson()
return {**geojson, "count": len(geojson.get("features", []))}
@router.get("/all")
async def get_all_visualization_data(db: AsyncSession = Depends(get_db)):
"""获取所有可视化数据的统一端点
Returns GeoJSON FeatureCollections for all data types:
- satellites: 卫星 TLE 数据
- cables: 海底电缆
- landing_points: 登陆点
- supercomputers: TOP500 超算
- gpu_clusters: GPU 集群
"""
cables_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cables")
cables_result = await db.execute(cables_stmt)
cables_records = list(cables_result.scalars().all())
points_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
points_result = await db.execute(points_stmt)
points_records = list(points_result.scalars().all())
satellites_stmt = (
select(CollectedData)
.where(CollectedData.source == "celestrak_tle")
.where(CollectedData.name != "Unknown")
)
satellites_result = await db.execute(satellites_stmt)
satellites_records = list(satellites_result.scalars().all())
supercomputers_stmt = (
select(CollectedData)
.where(CollectedData.source == "top500")
.where(CollectedData.name != "Unknown")
)
supercomputers_result = await db.execute(supercomputers_stmt)
supercomputers_records = list(supercomputers_result.scalars().all())
gpu_stmt = (
select(CollectedData)
.where(CollectedData.source == "epoch_ai_gpu")
.where(CollectedData.name != "Unknown")
)
gpu_result = await db.execute(gpu_stmt)
gpu_records = list(gpu_result.scalars().all())
cables = (
convert_cable_to_geojson(cables_records)
if cables_records
else {"type": "FeatureCollection", "features": []}
)
landing_points = (
convert_landing_point_to_geojson(points_records)
if points_records
else {"type": "FeatureCollection", "features": []}
)
satellites = (
convert_satellite_to_geojson(satellites_records)
if satellites_records
else {"type": "FeatureCollection", "features": []}
)
supercomputers = (
convert_supercomputer_to_geojson(supercomputers_records)
if supercomputers_records
else {"type": "FeatureCollection", "features": []}
)
gpu_clusters = (
convert_gpu_cluster_to_geojson(gpu_records)
if gpu_records
else {"type": "FeatureCollection", "features": []}
)
return {
"generated_at": to_iso8601_utc(datetime.now(UTC)),
"version": "1.0",
"data": {
"satellites": satellites,
"cables": cables,
"landing_points": landing_points,
"supercomputers": supercomputers,
"gpu_clusters": gpu_clusters,
},
"stats": {
"total_features": (
len(satellites.get("features", []))
+ len(cables.get("features", []))
+ len(landing_points.get("features", []))
+ len(supercomputers.get("features", []))
+ len(gpu_clusters.get("features", []))
),
"satellites": len(satellites.get("features", [])),
"cables": len(cables.get("features", [])),
"landing_points": len(landing_points.get("features", [])),
"supercomputers": len(supercomputers.get("features", [])),
"gpu_clusters": len(gpu_clusters.get("features", [])),
},
}
# Cache for cable graph # Cache for cable graph
_cable_graph: Optional[CableGraph] = None _cable_graph: Optional[CableGraph] = None
@@ -208,7 +741,7 @@ async def get_cable_graph(db: AsyncSession) -> CableGraph:
cables_result = await db.execute(cables_stmt) cables_result = await db.execute(cables_stmt)
cables_records = list(cables_result.scalars().all()) cables_records = list(cables_result.scalars().all())
points_stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points") points_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
points_result = await db.execute(points_stmt) points_result = await db.execute(points_stmt)
points_records = list(points_result.scalars().all()) points_records = list(points_result.scalars().all())

View File

@@ -3,13 +3,14 @@
import asyncio import asyncio
import json import json
import logging import logging
from datetime import datetime from datetime import UTC, datetime
from typing import Optional from typing import Optional
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Query from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Query
from jose import jwt, JWTError from jose import jwt, JWTError
from app.core.config import settings from app.core.config import settings
from app.core.time import to_iso8601_utc
from app.core.websocket.manager import manager from app.core.websocket.manager import manager
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -59,6 +60,7 @@ async def websocket_endpoint(
"ixp_nodes", "ixp_nodes",
"alerts", "alerts",
"dashboard", "dashboard",
"datasource_tasks",
], ],
}, },
} }
@@ -72,7 +74,7 @@ async def websocket_endpoint(
await websocket.send_json( await websocket.send_json(
{ {
"type": "heartbeat", "type": "heartbeat",
"data": {"action": "pong", "timestamp": datetime.utcnow().isoformat()}, "data": {"action": "pong", "timestamp": to_iso8601_utc(datetime.now(UTC))},
} }
) )
elif data.get("type") == "subscribe": elif data.get("type") == "subscribe":

View File

@@ -0,0 +1,62 @@
from typing import Any, Dict, Optional
FIELD_ALIASES = {
"country": ("country",),
"city": ("city",),
"latitude": ("latitude",),
"longitude": ("longitude",),
"value": ("value",),
"unit": ("unit",),
"cores": ("cores",),
"rmax": ("rmax", "r_max"),
"rpeak": ("rpeak", "r_peak"),
"power": ("power",),
}
def get_metadata_field(metadata: Optional[Dict[str, Any]], field: str, fallback: Any = None) -> Any:
if isinstance(metadata, dict):
for key in FIELD_ALIASES.get(field, (field,)):
value = metadata.get(key)
if value not in (None, ""):
return value
return fallback
def build_dynamic_metadata(
metadata: Optional[Dict[str, Any]],
*,
country: Any = None,
city: Any = None,
latitude: Any = None,
longitude: Any = None,
value: Any = None,
unit: Any = None,
) -> Dict[str, Any]:
merged = dict(metadata) if isinstance(metadata, dict) else {}
fallbacks = {
"country": country,
"city": city,
"latitude": latitude,
"longitude": longitude,
"value": value,
"unit": unit,
}
for field, fallback in fallbacks.items():
if fallback not in (None, "") and get_metadata_field(merged, field) in (None, ""):
merged[field] = fallback
return merged
def get_record_field(record: Any, field: str) -> Any:
metadata = getattr(record, "extra_data", None) or {}
fallback_attr = field
if field in {"cores", "rmax", "rpeak", "power"}:
fallback = None
else:
fallback = getattr(record, fallback_attr, None)
return get_metadata_field(metadata, field, fallback=fallback)

View File

@@ -6,9 +6,16 @@ import os
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
ROOT_DIR = Path(__file__).parent.parent.parent.parent
VERSION_FILE = ROOT_DIR / "VERSION"
class Settings(BaseSettings): class Settings(BaseSettings):
PROJECT_NAME: str = "Intelligent Planet Plan" PROJECT_NAME: str = "Intelligent Planet Plan"
VERSION: str = "1.0.0" VERSION: str = (
os.getenv("APP_VERSION")
or (VERSION_FILE.read_text(encoding="utf-8").strip() if VERSION_FILE.exists() else "0.19.0")
)
API_V1_STR: str = "/api/v1" API_V1_STR: str = "/api/v1"
SECRET_KEY: str = "your-secret-key-change-in-production" SECRET_KEY: str = "your-secret-key-change-in-production"
ALGORITHM: str = "HS256" ALGORITHM: str = "HS256"
@@ -27,6 +34,9 @@ class Settings(BaseSettings):
CORS_ORIGINS: List[str] = ["http://localhost:3000", "http://localhost:8000"] CORS_ORIGINS: List[str] = ["http://localhost:3000", "http://localhost:8000"]
SPACETRACK_USERNAME: str = ""
SPACETRACK_PASSWORD: str = ""
@property @property
def REDIS_URL(self) -> str: def REDIS_URL(self) -> str:
return os.getenv( return os.getenv(
@@ -34,7 +44,7 @@ class Settings(BaseSettings):
) )
class Config: class Config:
env_file = ".env" env_file = Path(__file__).parent.parent.parent / ".env"
case_sensitive = True case_sensitive = True

View File

@@ -0,0 +1,280 @@
import re
from typing import Any, Optional
COUNTRY_ENTRIES = [
("阿富汗", ["Afghanistan", "AF", "AFG"]),
("阿尔巴尼亚", ["Albania", "AL", "ALB"]),
("阿尔及利亚", ["Algeria", "DZ", "DZA"]),
("安道尔", ["Andorra", "AD", "AND"]),
("安哥拉", ["Angola", "AO", "AGO"]),
("安提瓜和巴布达", ["Antigua and Barbuda", "AG", "ATG"]),
("阿根廷", ["Argentina", "AR", "ARG"]),
("亚美尼亚", ["Armenia", "AM", "ARM"]),
("澳大利亚", ["Australia", "AU", "AUS"]),
("奥地利", ["Austria", "AT", "AUT"]),
("阿塞拜疆", ["Azerbaijan", "AZ", "AZE"]),
("巴哈马", ["Bahamas", "BS", "BHS"]),
("巴林", ["Bahrain", "BH", "BHR"]),
("孟加拉国", ["Bangladesh", "BD", "BGD"]),
("巴巴多斯", ["Barbados", "BB", "BRB"]),
("白俄罗斯", ["Belarus", "BY", "BLR"]),
("比利时", ["Belgium", "BE", "BEL"]),
("伯利兹", ["Belize", "BZ", "BLZ"]),
("贝宁", ["Benin", "BJ", "BEN"]),
("不丹", ["Bhutan", "BT", "BTN"]),
("玻利维亚", ["Bolivia", "BO", "BOL", "Bolivia (Plurinational State of)"]),
("波斯尼亚和黑塞哥维那", ["Bosnia and Herzegovina", "BA", "BIH"]),
("博茨瓦纳", ["Botswana", "BW", "BWA"]),
("巴西", ["Brazil", "BR", "BRA"]),
("文莱", ["Brunei", "BN", "BRN", "Brunei Darussalam"]),
("保加利亚", ["Bulgaria", "BG", "BGR"]),
("布基纳法索", ["Burkina Faso", "BF", "BFA"]),
("布隆迪", ["Burundi", "BI", "BDI"]),
("柬埔寨", ["Cambodia", "KH", "KHM"]),
("喀麦隆", ["Cameroon", "CM", "CMR"]),
("加拿大", ["Canada", "CA", "CAN"]),
("佛得角", ["Cape Verde", "CV", "CPV", "Cabo Verde"]),
("中非", ["Central African Republic", "CF", "CAF"]),
("乍得", ["Chad", "TD", "TCD"]),
("智利", ["Chile", "CL", "CHL"]),
("中国", ["China", "CN", "CHN", "Mainland China", "PRC", "People's Republic of China"]),
("中国(香港)", ["Hong Kong", "HK", "HKG", "Hong Kong SAR", "China Hong Kong", "Hong Kong, China"]),
("中国(澳门)", ["Macao", "Macau", "MO", "MAC", "Macao SAR", "China Macao", "Macau, China"]),
("中国(台湾)", ["Taiwan", "TW", "TWN", "Chinese Taipei", "Taiwan, China"]),
("哥伦比亚", ["Colombia", "CO", "COL"]),
("科摩罗", ["Comoros", "KM", "COM"]),
("刚果(布)", ["Republic of the Congo", "Congo", "Congo-Brazzaville", "CG", "COG"]),
("刚果(金)", ["Democratic Republic of the Congo", "DR Congo", "Congo-Kinshasa", "CD", "COD"]),
("哥斯达黎加", ["Costa Rica", "CR", "CRI"]),
("科特迪瓦", ["Cote d'Ivoire", "Côte d'Ivoire", "Ivory Coast", "CI", "CIV"]),
("克罗地亚", ["Croatia", "HR", "HRV"]),
("古巴", ["Cuba", "CU", "CUB"]),
("塞浦路斯", ["Cyprus", "CY", "CYP"]),
("捷克", ["Czech Republic", "Czechia", "CZ", "CZE"]),
("丹麦", ["Denmark", "DK", "DNK"]),
("吉布提", ["Djibouti", "DJ", "DJI"]),
("多米尼克", ["Dominica", "DM", "DMA"]),
("多米尼加", ["Dominican Republic", "DO", "DOM"]),
("厄瓜多尔", ["Ecuador", "EC", "ECU"]),
("埃及", ["Egypt", "EG", "EGY"]),
("萨尔瓦多", ["El Salvador", "SV", "SLV"]),
("赤道几内亚", ["Equatorial Guinea", "GQ", "GNQ"]),
("厄立特里亚", ["Eritrea", "ER", "ERI"]),
("爱沙尼亚", ["Estonia", "EE", "EST"]),
("埃斯瓦蒂尼", ["Eswatini", "SZ", "SWZ", "Swaziland"]),
("埃塞俄比亚", ["Ethiopia", "ET", "ETH"]),
("斐济", ["Fiji", "FJ", "FJI"]),
("芬兰", ["Finland", "FI", "FIN"]),
("法国", ["France", "FR", "FRA"]),
("加蓬", ["Gabon", "GA", "GAB"]),
("冈比亚", ["Gambia", "GM", "GMB"]),
("格鲁吉亚", ["Georgia", "GE", "GEO"]),
("德国", ["Germany", "DE", "DEU"]),
("加纳", ["Ghana", "GH", "GHA"]),
("希腊", ["Greece", "GR", "GRC"]),
("格林纳达", ["Grenada", "GD", "GRD"]),
("危地马拉", ["Guatemala", "GT", "GTM"]),
("几内亚", ["Guinea", "GN", "GIN"]),
("几内亚比绍", ["Guinea-Bissau", "GW", "GNB"]),
("圭亚那", ["Guyana", "GY", "GUY"]),
("海地", ["Haiti", "HT", "HTI"]),
("洪都拉斯", ["Honduras", "HN", "HND"]),
("匈牙利", ["Hungary", "HU", "HUN"]),
("冰岛", ["Iceland", "IS", "ISL"]),
("印度", ["India", "IN", "IND"]),
("印度尼西亚", ["Indonesia", "ID", "IDN"]),
("伊朗", ["Iran", "IR", "IRN", "Iran (Islamic Republic of)"]),
("伊拉克", ["Iraq", "IQ", "IRQ"]),
("爱尔兰", ["Ireland", "IE", "IRL"]),
("以色列", ["Israel", "IL", "ISR"]),
("意大利", ["Italy", "IT", "ITA"]),
("牙买加", ["Jamaica", "JM", "JAM"]),
("日本", ["Japan", "JP", "JPN"]),
("约旦", ["Jordan", "JO", "JOR"]),
("哈萨克斯坦", ["Kazakhstan", "KZ", "KAZ"]),
("肯尼亚", ["Kenya", "KE", "KEN"]),
("基里巴斯", ["Kiribati", "KI", "KIR"]),
("朝鲜", ["North Korea", "Korea, DPRK", "Democratic People's Republic of Korea", "KP", "PRK"]),
("韩国", ["South Korea", "Republic of Korea", "Korea", "KR", "KOR"]),
("科威特", ["Kuwait", "KW", "KWT"]),
("吉尔吉斯斯坦", ["Kyrgyzstan", "KG", "KGZ"]),
("老挝", ["Laos", "Lao PDR", "Lao People's Democratic Republic", "LA", "LAO"]),
("拉脱维亚", ["Latvia", "LV", "LVA"]),
("黎巴嫩", ["Lebanon", "LB", "LBN"]),
("莱索托", ["Lesotho", "LS", "LSO"]),
("利比里亚", ["Liberia", "LR", "LBR"]),
("利比亚", ["Libya", "LY", "LBY"]),
("列支敦士登", ["Liechtenstein", "LI", "LIE"]),
("立陶宛", ["Lithuania", "LT", "LTU"]),
("卢森堡", ["Luxembourg", "LU", "LUX"]),
("马达加斯加", ["Madagascar", "MG", "MDG"]),
("马拉维", ["Malawi", "MW", "MWI"]),
("马来西亚", ["Malaysia", "MY", "MYS"]),
("马尔代夫", ["Maldives", "MV", "MDV"]),
("马里", ["Mali", "ML", "MLI"]),
("马耳他", ["Malta", "MT", "MLT"]),
("马绍尔群岛", ["Marshall Islands", "MH", "MHL"]),
("毛里塔尼亚", ["Mauritania", "MR", "MRT"]),
("毛里求斯", ["Mauritius", "MU", "MUS"]),
("墨西哥", ["Mexico", "MX", "MEX"]),
("密克罗尼西亚", ["Micronesia", "FM", "FSM", "Federated States of Micronesia"]),
("摩尔多瓦", ["Moldova", "MD", "MDA", "Republic of Moldova"]),
("摩纳哥", ["Monaco", "MC", "MCO"]),
("蒙古", ["Mongolia", "MN", "MNG"]),
("黑山", ["Montenegro", "ME", "MNE"]),
("摩洛哥", ["Morocco", "MA", "MAR"]),
("莫桑比克", ["Mozambique", "MZ", "MOZ"]),
("缅甸", ["Myanmar", "MM", "MMR", "Burma"]),
("纳米比亚", ["Namibia", "NA", "NAM"]),
("瑙鲁", ["Nauru", "NR", "NRU"]),
("尼泊尔", ["Nepal", "NP", "NPL"]),
("荷兰", ["Netherlands", "NL", "NLD"]),
("新西兰", ["New Zealand", "NZ", "NZL"]),
("尼加拉瓜", ["Nicaragua", "NI", "NIC"]),
("尼日尔", ["Niger", "NE", "NER"]),
("尼日利亚", ["Nigeria", "NG", "NGA"]),
("北马其顿", ["North Macedonia", "MK", "MKD", "Macedonia"]),
("挪威", ["Norway", "NO", "NOR"]),
("阿曼", ["Oman", "OM", "OMN"]),
("巴基斯坦", ["Pakistan", "PK", "PAK"]),
("帕劳", ["Palau", "PW", "PLW"]),
("巴勒斯坦", ["Palestine", "PS", "PSE", "State of Palestine"]),
("巴拿马", ["Panama", "PA", "PAN"]),
("巴布亚新几内亚", ["Papua New Guinea", "PG", "PNG"]),
("巴拉圭", ["Paraguay", "PY", "PRY"]),
("秘鲁", ["Peru", "PE", "PER"]),
("菲律宾", ["Philippines", "PH", "PHL"]),
("波兰", ["Poland", "PL", "POL"]),
("葡萄牙", ["Portugal", "PT", "PRT"]),
("卡塔尔", ["Qatar", "QA", "QAT"]),
("罗马尼亚", ["Romania", "RO", "ROU"]),
("俄罗斯", ["Russia", "Russian Federation", "RU", "RUS"]),
("卢旺达", ["Rwanda", "RW", "RWA"]),
("圣基茨和尼维斯", ["Saint Kitts and Nevis", "KN", "KNA"]),
("圣卢西亚", ["Saint Lucia", "LC", "LCA"]),
("圣文森特和格林纳丁斯", ["Saint Vincent and the Grenadines", "VC", "VCT"]),
("萨摩亚", ["Samoa", "WS", "WSM"]),
("圣马力诺", ["San Marino", "SM", "SMR"]),
("圣多美和普林西比", ["Sao Tome and Principe", "ST", "STP", "São Tomé and Príncipe"]),
("沙特阿拉伯", ["Saudi Arabia", "SA", "SAU"]),
("塞内加尔", ["Senegal", "SN", "SEN"]),
("塞尔维亚", ["Serbia", "RS", "SRB", "Kosovo", "XK", "XKS", "Republic of Kosovo"]),
("塞舌尔", ["Seychelles", "SC", "SYC"]),
("塞拉利昂", ["Sierra Leone", "SL", "SLE"]),
("新加坡", ["Singapore", "SG", "SGP"]),
("斯洛伐克", ["Slovakia", "SK", "SVK"]),
("斯洛文尼亚", ["Slovenia", "SI", "SVN"]),
("所罗门群岛", ["Solomon Islands", "SB", "SLB"]),
("索马里", ["Somalia", "SO", "SOM"]),
("南非", ["South Africa", "ZA", "ZAF"]),
("南苏丹", ["South Sudan", "SS", "SSD"]),
("西班牙", ["Spain", "ES", "ESP"]),
("斯里兰卡", ["Sri Lanka", "LK", "LKA"]),
("苏丹", ["Sudan", "SD", "SDN"]),
("苏里南", ["Suriname", "SR", "SUR"]),
("瑞典", ["Sweden", "SE", "SWE"]),
("瑞士", ["Switzerland", "CH", "CHE"]),
("叙利亚", ["Syria", "SY", "SYR", "Syrian Arab Republic"]),
("塔吉克斯坦", ["Tajikistan", "TJ", "TJK"]),
("坦桑尼亚", ["Tanzania", "TZ", "TZA", "United Republic of Tanzania"]),
("泰国", ["Thailand", "TH", "THA"]),
("东帝汶", ["Timor-Leste", "East Timor", "TL", "TLS"]),
("多哥", ["Togo", "TG", "TGO"]),
("汤加", ["Tonga", "TO", "TON"]),
("特立尼达和多巴哥", ["Trinidad and Tobago", "TT", "TTO"]),
("突尼斯", ["Tunisia", "TN", "TUN"]),
("土耳其", ["Turkey", "TR", "TUR", "Türkiye"]),
("土库曼斯坦", ["Turkmenistan", "TM", "TKM"]),
("图瓦卢", ["Tuvalu", "TV", "TUV"]),
("乌干达", ["Uganda", "UG", "UGA"]),
("乌克兰", ["Ukraine", "UA", "UKR"]),
("阿联酋", ["United Arab Emirates", "AE", "ARE", "UAE"]),
("英国", ["United Kingdom", "UK", "GB", "GBR", "Great Britain", "Britain", "England"]),
("美国", ["United States", "United States of America", "US", "USA", "U.S.", "U.S.A."]),
("乌拉圭", ["Uruguay", "UY", "URY"]),
("乌兹别克斯坦", ["Uzbekistan", "UZ", "UZB"]),
("瓦努阿图", ["Vanuatu", "VU", "VUT"]),
("梵蒂冈", ["Vatican City", "Holy See", "VA", "VAT"]),
("委内瑞拉", ["Venezuela", "VE", "VEN", "Venezuela (Bolivarian Republic of)"]),
("越南", ["Vietnam", "Viet Nam", "VN", "VNM"]),
("也门", ["Yemen", "YE", "YEM"]),
("赞比亚", ["Zambia", "ZM", "ZMB"]),
("津巴布韦", ["Zimbabwe", "ZW", "ZWE"]),
]
COUNTRY_OPTIONS = [entry[0] for entry in COUNTRY_ENTRIES]
CANONICAL_COUNTRY_SET = set(COUNTRY_OPTIONS)
INVALID_COUNTRY_VALUES = {
"",
"-",
"--",
"unknown",
"n/a",
"na",
"none",
"null",
"global",
"world",
"worldwide",
"xx",
}
NUMERIC_LIKE_PATTERN = re.compile(r"^[\d\s,._%+\-]+$")
COUNTRY_ALIAS_MAP = {}
COUNTRY_VARIANTS_MAP = {}
for canonical, aliases in COUNTRY_ENTRIES:
COUNTRY_ALIAS_MAP[canonical.casefold()] = canonical
variants = [canonical, *aliases]
COUNTRY_VARIANTS_MAP[canonical] = variants
for alias in aliases:
COUNTRY_ALIAS_MAP[alias.casefold()] = canonical
def normalize_country(value: Any) -> Optional[str]:
if value is None:
return None
if not isinstance(value, str):
return None
normalized = re.sub(r"\s+", " ", value.strip())
normalized = normalized.replace("(", "").replace(")", "")
if not normalized:
return None
lowered = normalized.casefold()
if lowered in INVALID_COUNTRY_VALUES:
return None
if NUMERIC_LIKE_PATTERN.fullmatch(normalized):
return None
if normalized in CANONICAL_COUNTRY_SET:
return normalized
return COUNTRY_ALIAS_MAP.get(lowered)
def get_country_search_variants(value: Any) -> list[str]:
canonical = normalize_country(value)
if canonical is None:
return []
variants = []
seen = set()
for item in COUNTRY_VARIANTS_MAP.get(canonical, [canonical]):
if not isinstance(item, str):
continue
normalized = re.sub(r"\s+", " ", item.strip())
if not normalized:
continue
key = normalized.casefold()
if key in seen:
continue
seen.add(key)
variants.append(normalized)
return variants

View File

@@ -0,0 +1,81 @@
import os
import yaml
from functools import lru_cache
from typing import Optional
COLLECTOR_URL_KEYS = {
"arcgis_cables": "arcgis.cable_url",
"arcgis_landing_points": "arcgis.landing_point_url",
"arcgis_cable_landing_relation": "arcgis.cable_landing_relation_url",
"fao_landing_points": "fao.landing_point_url",
"telegeography_cables": "telegeography.cable_url",
"telegeography_landing": "telegeography.landing_point_url",
"huggingface_models": "huggingface.models_url",
"huggingface_datasets": "huggingface.datasets_url",
"huggingface_spaces": "huggingface.spaces_url",
"cloudflare_radar_device": "cloudflare.radar_device_url",
"cloudflare_radar_traffic": "cloudflare.radar_traffic_url",
"cloudflare_radar_top_locations": "cloudflare.radar_top_locations_url",
"peeringdb_ixp": "peeringdb.ixp_url",
"peeringdb_network": "peeringdb.network_url",
"peeringdb_facility": "peeringdb.facility_url",
"top500": "top500.url",
"epoch_ai_gpu": "epoch_ai.gpu_clusters_url",
"spacetrack_tle": "spacetrack.tle_query_url",
"ris_live_bgp": "ris_live.url",
"bgpstream_bgp": "bgpstream.url",
}
class DataSourcesConfig:
def __init__(self, config_path: str = None):
if config_path is None:
config_path = os.path.join(os.path.dirname(__file__), "data_sources.yaml")
self._yaml_config = {}
if os.path.exists(config_path):
with open(config_path, "r") as f:
self._yaml_config = yaml.safe_load(f) or {}
def get_yaml_url(self, collector_name: str) -> str:
key = COLLECTOR_URL_KEYS.get(collector_name, "")
if not key:
return ""
parts = key.split(".")
value = self._yaml_config
for part in parts:
if isinstance(value, dict):
value = value.get(part, "")
else:
return ""
return value if isinstance(value, str) else ""
async def get_url(self, collector_name: str, db) -> str:
yaml_url = self.get_yaml_url(collector_name)
if not db:
return yaml_url
try:
from sqlalchemy import select
from app.models.datasource_config import DataSourceConfig
query = select(DataSourceConfig).where(
DataSourceConfig.name == collector_name, DataSourceConfig.is_active == True
)
result = await db.execute(query)
db_config = result.scalar_one_or_none()
if db_config and db_config.endpoint:
return db_config.endpoint
except Exception:
pass
return yaml_url
@lru_cache()
def get_data_sources_config() -> DataSourcesConfig:
return DataSourcesConfig()

View File

@@ -0,0 +1,45 @@
# Data Sources Configuration
# All external data source URLs should be configured here
arcgis:
cable_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/2/query"
landing_point_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/1/query"
cable_landing_relation_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/3/query"
fao:
landing_point_url: "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
telegeography:
cable_url: "https://raw.githubusercontent.com/lintaojlu/submarine_cable_information/main/cable.json"
landing_point_url: "https://raw.githubusercontent.com/lintaojlu/submarine_cable_information/main/landing_point.json"
huggingface:
models_url: "https://huggingface.co/api/models"
datasets_url: "https://huggingface.co/api/datasets"
spaces_url: "https://huggingface.co/api/spaces"
cloudflare:
radar_device_url: "https://api.cloudflare.com/client/v4/radar/http/summary/device_type"
radar_traffic_url: "https://api.cloudflare.com/client/v4/radar/http/timeseries/requests"
radar_top_locations_url: "https://api.cloudflare.com/client/v4/radar/http/top/locations"
peeringdb:
ixp_url: "https://www.peeringdb.com/api/ix"
network_url: "https://www.peeringdb.com/api/net"
facility_url: "https://www.peeringdb.com/api/fac"
top500:
url: "https://top500.org/lists/top500/list/2025/11/"
epoch_ai:
gpu_clusters_url: "https://epoch.ai/data/gpu-clusters"
spacetrack:
base_url: "https://www.space-track.org"
tle_query_url: "https://www.space-track.org/basicspacedata/query/class/gp/orderby/EPOCH%20desc/limit/1000/format/json"
ris_live:
url: "https://ris-live.ripe.net/v1/stream/?format=json&client=planet-ris-live"
bgpstream:
url: "https://broker.bgpstream.caida.org/v2"

View File

@@ -0,0 +1,140 @@
"""Default built-in datasource definitions."""
DEFAULT_DATASOURCES = {
"top500": {
"id": 1,
"name": "TOP500 Supercomputers",
"module": "L1",
"priority": "P0",
"frequency_minutes": 240,
},
"epoch_ai_gpu": {
"id": 2,
"name": "Epoch AI GPU Clusters",
"module": "L1",
"priority": "P0",
"frequency_minutes": 360,
},
"huggingface_models": {
"id": 3,
"name": "HuggingFace Models",
"module": "L2",
"priority": "P1",
"frequency_minutes": 720,
},
"huggingface_datasets": {
"id": 4,
"name": "HuggingFace Datasets",
"module": "L2",
"priority": "P1",
"frequency_minutes": 720,
},
"huggingface_spaces": {
"id": 5,
"name": "HuggingFace Spaces",
"module": "L2",
"priority": "P2",
"frequency_minutes": 1440,
},
"peeringdb_ixp": {
"id": 6,
"name": "PeeringDB IXP",
"module": "L2",
"priority": "P1",
"frequency_minutes": 1440,
},
"peeringdb_network": {
"id": 7,
"name": "PeeringDB Networks",
"module": "L2",
"priority": "P2",
"frequency_minutes": 2880,
},
"peeringdb_facility": {
"id": 8,
"name": "PeeringDB Facilities",
"module": "L2",
"priority": "P2",
"frequency_minutes": 2880,
},
"telegeography_cables": {
"id": 9,
"name": "Submarine Cables",
"module": "L2",
"priority": "P1",
"frequency_minutes": 10080,
},
"telegeography_landing": {
"id": 10,
"name": "Cable Landing Points",
"module": "L2",
"priority": "P2",
"frequency_minutes": 10080,
},
"telegeography_systems": {
"id": 11,
"name": "Cable Systems",
"module": "L2",
"priority": "P2",
"frequency_minutes": 10080,
},
"arcgis_cables": {
"id": 15,
"name": "ArcGIS Submarine Cables",
"module": "L2",
"priority": "P1",
"frequency_minutes": 10080,
},
"arcgis_landing_points": {
"id": 16,
"name": "ArcGIS Landing Points",
"module": "L2",
"priority": "P1",
"frequency_minutes": 10080,
},
"arcgis_cable_landing_relation": {
"id": 17,
"name": "ArcGIS Cable-Landing Relations",
"module": "L2",
"priority": "P1",
"frequency_minutes": 10080,
},
"fao_landing_points": {
"id": 18,
"name": "FAO Landing Points",
"module": "L2",
"priority": "P1",
"frequency_minutes": 10080,
},
"spacetrack_tle": {
"id": 19,
"name": "Space-Track TLE",
"module": "L3",
"priority": "P2",
"frequency_minutes": 1440,
},
"celestrak_tle": {
"id": 20,
"name": "CelesTrak TLE",
"module": "L3",
"priority": "P2",
"frequency_minutes": 1440,
},
"ris_live_bgp": {
"id": 21,
"name": "RIPE RIS Live BGP",
"module": "L3",
"priority": "P1",
"frequency_minutes": 15,
},
"bgpstream_bgp": {
"id": 22,
"name": "CAIDA BGPStream Backfill",
"module": "L3",
"priority": "P1",
"frequency_minutes": 360,
},
}
ID_TO_COLLECTOR = {info["id"]: name for name, info in DEFAULT_DATASOURCES.items()}
COLLECTOR_TO_ID = {name: info["id"] for name, info in DEFAULT_DATASOURCES.items()}

View File

@@ -0,0 +1,116 @@
"""Helpers for building stable TLE lines from orbital elements."""
from __future__ import annotations
from datetime import datetime
from typing import Any, Optional
def compute_tle_checksum(line: str) -> str:
"""Compute the standard modulo-10 checksum for a TLE line."""
total = 0
for char in line[:68]:
if char.isdigit():
total += int(char)
elif char == "-":
total += 1
return str(total % 10)
def _parse_epoch(value: Any) -> Optional[datetime]:
if not value:
return None
if isinstance(value, datetime):
return value
if isinstance(value, str):
return datetime.fromisoformat(value.replace("Z", "+00:00"))
return None
def build_tle_line1(norad_cat_id: Any, epoch: Any) -> Optional[str]:
"""Build a valid TLE line 1 from the NORAD id and epoch."""
epoch_date = _parse_epoch(epoch)
if not norad_cat_id or epoch_date is None:
return None
epoch_year = epoch_date.year % 100
start_of_year = datetime(epoch_date.year, 1, 1, tzinfo=epoch_date.tzinfo)
day_of_year = (epoch_date - start_of_year).days + 1
ms_of_day = (
epoch_date.hour * 3600000
+ epoch_date.minute * 60000
+ epoch_date.second * 1000
+ int(epoch_date.microsecond / 1000)
)
day_fraction = ms_of_day / 86400000
decimal_fraction = f"{day_fraction:.8f}"[1:]
epoch_str = f"{epoch_year:02d}{day_of_year:03d}{decimal_fraction}"
core = (
f"1 {int(norad_cat_id):05d}U 00001A {epoch_str}"
" .00000000 00000-0 00000-0 0 999"
)
return core + compute_tle_checksum(core)
def build_tle_line2(
norad_cat_id: Any,
inclination: Any,
raan: Any,
eccentricity: Any,
arg_of_perigee: Any,
mean_anomaly: Any,
mean_motion: Any,
) -> Optional[str]:
"""Build a valid TLE line 2 from the standard orbital elements."""
required = [
norad_cat_id,
inclination,
raan,
eccentricity,
arg_of_perigee,
mean_anomaly,
mean_motion,
]
if any(value is None for value in required):
return None
eccentricity_digits = str(round(float(eccentricity) * 10_000_000)).zfill(7)
core = (
f"2 {int(norad_cat_id):05d}"
f" {float(inclination):8.4f}"
f" {float(raan):8.4f}"
f" {eccentricity_digits}"
f" {float(arg_of_perigee):8.4f}"
f" {float(mean_anomaly):8.4f}"
f" {float(mean_motion):11.8f}"
"00000"
)
return core + compute_tle_checksum(core)
def build_tle_lines_from_elements(
*,
norad_cat_id: Any,
epoch: Any,
inclination: Any,
raan: Any,
eccentricity: Any,
arg_of_perigee: Any,
mean_anomaly: Any,
mean_motion: Any,
) -> tuple[Optional[str], Optional[str]]:
"""Build both TLE lines from a metadata payload."""
line1 = build_tle_line1(norad_cat_id, epoch)
line2 = build_tle_line2(
norad_cat_id,
inclination,
raan,
eccentricity,
arg_of_perigee,
mean_anomaly,
mean_motion,
)
return line1, line2

View File

@@ -1,4 +1,4 @@
from datetime import datetime, timedelta from datetime import UTC, datetime, timedelta
from typing import Optional from typing import Optional
import bcrypt import bcrypt
@@ -49,9 +49,9 @@ def get_password_hash(password: str) -> str:
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str: def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
to_encode = data.copy() to_encode = data.copy()
if expires_delta: if expires_delta:
expire = datetime.utcnow() + expires_delta expire = datetime.now(UTC) + expires_delta
elif settings.ACCESS_TOKEN_EXPIRE_MINUTES > 0: elif settings.ACCESS_TOKEN_EXPIRE_MINUTES > 0:
expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES) expire = datetime.now(UTC) + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
else: else:
expire = None expire = None
if expire: if expire:
@@ -65,7 +65,7 @@ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -
def create_refresh_token(data: dict) -> str: def create_refresh_token(data: dict) -> str:
to_encode = data.copy() to_encode = data.copy()
if settings.REFRESH_TOKEN_EXPIRE_DAYS > 0: if settings.REFRESH_TOKEN_EXPIRE_DAYS > 0:
expire = datetime.utcnow() + timedelta(days=settings.REFRESH_TOKEN_EXPIRE_DAYS) expire = datetime.now(UTC) + timedelta(days=settings.REFRESH_TOKEN_EXPIRE_DAYS)
to_encode.update({"exp": expire}) to_encode.update({"exp": expire})
to_encode.update({"type": "refresh"}) to_encode.update({"type": "refresh"})
if "sub" in to_encode: if "sub" in to_encode:

20
backend/app/core/time.py Normal file
View File

@@ -0,0 +1,20 @@
"""Time helpers for API serialization."""
from __future__ import annotations
from datetime import UTC, datetime
def ensure_utc(value: datetime | None) -> datetime | None:
if value is None:
return None
if value.tzinfo is None:
return value.replace(tzinfo=UTC)
return value.astimezone(UTC)
def to_iso8601_utc(value: datetime | None) -> str | None:
normalized = ensure_utc(value)
if normalized is None:
return None
return normalized.isoformat().replace("+00:00", "Z")

View File

@@ -1,12 +1,14 @@
"""Data broadcaster for WebSocket connections""" """Data broadcaster for WebSocket connections"""
import asyncio import asyncio
from datetime import datetime from datetime import UTC, datetime
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
from app.core.time import to_iso8601_utc
from app.core.websocket.manager import manager from app.core.websocket.manager import manager
class DataBroadcaster: class DataBroadcaster:
"""Periodically broadcasts data to connected WebSocket clients""" """Periodically broadcasts data to connected WebSocket clients"""
@@ -21,7 +23,7 @@ class DataBroadcaster:
"active_datasources": 8, "active_datasources": 8,
"tasks_today": 45, "tasks_today": 45,
"success_rate": 97.8, "success_rate": 97.8,
"last_updated": datetime.utcnow().isoformat(), "last_updated": to_iso8601_utc(datetime.now(UTC)),
"alerts": {"critical": 0, "warning": 2, "info": 5}, "alerts": {"critical": 0, "warning": 2, "info": 5},
} }
@@ -34,7 +36,7 @@ class DataBroadcaster:
{ {
"type": "data_frame", "type": "data_frame",
"channel": "dashboard", "channel": "dashboard",
"timestamp": datetime.utcnow().isoformat(), "timestamp": to_iso8601_utc(datetime.now(UTC)),
"payload": {"stats": stats}, "payload": {"stats": stats},
}, },
channel="dashboard", channel="dashboard",
@@ -48,7 +50,7 @@ class DataBroadcaster:
await manager.broadcast( await manager.broadcast(
{ {
"type": "alert_notification", "type": "alert_notification",
"timestamp": datetime.utcnow().isoformat(), "timestamp": to_iso8601_utc(datetime.now(UTC)),
"data": {"alert": alert}, "data": {"alert": alert},
} }
) )
@@ -59,7 +61,7 @@ class DataBroadcaster:
{ {
"type": "data_frame", "type": "data_frame",
"channel": "gpu_clusters", "channel": "gpu_clusters",
"timestamp": datetime.utcnow().isoformat(), "timestamp": to_iso8601_utc(datetime.now(UTC)),
"payload": data, "payload": data,
} }
) )
@@ -70,12 +72,24 @@ class DataBroadcaster:
{ {
"type": "data_frame", "type": "data_frame",
"channel": channel, "channel": channel,
"timestamp": datetime.utcnow().isoformat(), "timestamp": to_iso8601_utc(datetime.now(UTC)),
"payload": data, "payload": data,
}, },
channel=channel if channel in manager.active_connections else "all", channel=channel if channel in manager.active_connections else "all",
) )
async def broadcast_datasource_task_update(self, data: Dict[str, Any]):
"""Broadcast datasource task progress updates to connected clients."""
await manager.broadcast(
{
"type": "data_frame",
"channel": "datasource_tasks",
"timestamp": to_iso8601_utc(datetime.now(UTC)),
"payload": data,
},
channel="all",
)
def start(self): def start(self):
"""Start all broadcasters""" """Start all broadcasters"""
if not self.running: if not self.running:

View File

@@ -1,5 +1,6 @@
from typing import AsyncGenerator from typing import AsyncGenerator
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
from sqlalchemy.orm import declarative_base from sqlalchemy.orm import declarative_base
@@ -25,11 +26,126 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
raise raise
async def seed_default_datasources(session: AsyncSession):
from app.core.datasource_defaults import DEFAULT_DATASOURCES
from app.models.datasource import DataSource
for source, info in DEFAULT_DATASOURCES.items():
existing = await session.get(DataSource, info["id"])
if existing:
existing.name = info["name"]
existing.source = source
existing.module = info["module"]
existing.priority = info["priority"]
existing.frequency_minutes = info["frequency_minutes"]
existing.collector_class = source
if existing.config is None:
existing.config = "{}"
continue
session.add(
DataSource(
id=info["id"],
name=info["name"],
source=source,
module=info["module"],
priority=info["priority"],
frequency_minutes=info["frequency_minutes"],
collector_class=source,
config="{}",
is_active=True,
)
)
await session.commit()
async def ensure_default_admin_user(session: AsyncSession):
from app.core.security import get_password_hash
from app.models.user import User
result = await session.execute(
text("SELECT id FROM users WHERE username = 'admin'")
)
if result.fetchone():
return
session.add(
User(
username="admin",
email="admin@planet.local",
password_hash=get_password_hash("admin123"),
role="super_admin",
is_active=True,
)
)
await session.commit()
async def init_db(): async def init_db():
import app.models.user # noqa: F401 import app.models.user # noqa: F401
import app.models.gpu_cluster # noqa: F401 import app.models.gpu_cluster # noqa: F401
import app.models.task # noqa: F401 import app.models.task # noqa: F401
import app.models.data_snapshot # noqa: F401
import app.models.datasource # noqa: F401 import app.models.datasource # noqa: F401
import app.models.datasource_config # noqa: F401
import app.models.alert # noqa: F401
import app.models.bgp_anomaly # noqa: F401
import app.models.collected_data # noqa: F401
import app.models.system_setting # noqa: F401
async with engine.begin() as conn: async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all) await conn.run_sync(Base.metadata.create_all)
await conn.execute(
text(
"""
ALTER TABLE collected_data
ADD COLUMN IF NOT EXISTS snapshot_id INTEGER,
ADD COLUMN IF NOT EXISTS task_id INTEGER,
ADD COLUMN IF NOT EXISTS entity_key VARCHAR(255),
ADD COLUMN IF NOT EXISTS is_current BOOLEAN DEFAULT TRUE,
ADD COLUMN IF NOT EXISTS previous_record_id INTEGER,
ADD COLUMN IF NOT EXISTS change_type VARCHAR(20),
ADD COLUMN IF NOT EXISTS change_summary JSONB DEFAULT '{}'::jsonb,
ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ
"""
)
)
await conn.execute(
text(
"""
ALTER TABLE collection_tasks
ADD COLUMN IF NOT EXISTS phase VARCHAR(30) DEFAULT 'queued'
"""
)
)
await conn.execute(
text(
"""
CREATE INDEX IF NOT EXISTS idx_collected_data_source_source_id
ON collected_data (source, source_id)
"""
)
)
await conn.execute(
text(
"""
UPDATE collected_data
SET entity_key = source || ':' || COALESCE(source_id, id::text)
WHERE entity_key IS NULL
"""
)
)
await conn.execute(
text(
"""
UPDATE collected_data
SET is_current = TRUE
WHERE is_current IS NULL
"""
)
)
async with async_session_factory() as session:
await seed_default_datasources(session)
await ensure_default_admin_user(session)

View File

@@ -2,15 +2,19 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from starlette.middleware.base import BaseHTTPMiddleware from starlette.middleware.base import BaseHTTPMiddleware
from app.core.config import settings
from app.core.websocket.broadcaster import broadcaster
from app.db.session import init_db, async_session_factory
from app.api.main import api_router from app.api.main import api_router
from app.api.v1 import websocket from app.api.v1 import websocket
from app.services.scheduler import start_scheduler, stop_scheduler from app.core.config import settings
from app.core.websocket.broadcaster import broadcaster
from app.db.session import init_db
from app.services.scheduler import (
cleanup_stale_running_tasks,
start_scheduler,
stop_scheduler,
sync_scheduler_with_datasources,
)
class WebSocketCORSMiddleware(BaseHTTPMiddleware): class WebSocketCORSMiddleware(BaseHTTPMiddleware):
@@ -27,7 +31,9 @@ class WebSocketCORSMiddleware(BaseHTTPMiddleware):
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
await init_db() await init_db()
await cleanup_stale_running_tasks()
start_scheduler() start_scheduler()
await sync_scheduler_with_datasources()
broadcaster.start() broadcaster.start()
yield yield
broadcaster.stop() broadcaster.stop()
@@ -60,16 +66,11 @@ app.include_router(websocket.router)
@app.get("/health") @app.get("/health")
async def health_check(): async def health_check():
"""健康检查端点""" return {"status": "healthy", "version": settings.VERSION}
return {
"status": "healthy",
"version": settings.VERSION,
}
@app.get("/") @app.get("/")
async def root(): async def root():
"""API根目录"""
return { return {
"name": settings.PROJECT_NAME, "name": settings.PROJECT_NAME,
"version": settings.VERSION, "version": settings.VERSION,
@@ -80,7 +81,6 @@ async def root():
@app.get("/api/v1/scheduler/jobs") @app.get("/api/v1/scheduler/jobs")
async def get_scheduler_jobs(): async def get_scheduler_jobs():
"""获取调度任务列表"""
from app.services.scheduler import get_scheduler_jobs from app.services.scheduler import get_scheduler_jobs
return {"jobs": get_scheduler_jobs()} return {"jobs": get_scheduler_jobs()}

View File

@@ -1,15 +1,23 @@
from app.models.user import User from app.models.user import User
from app.models.gpu_cluster import GPUCluster from app.models.gpu_cluster import GPUCluster
from app.models.task import CollectionTask from app.models.task import CollectionTask
from app.models.data_snapshot import DataSnapshot
from app.models.datasource import DataSource from app.models.datasource import DataSource
from app.models.datasource_config import DataSourceConfig
from app.models.alert import Alert, AlertSeverity, AlertStatus from app.models.alert import Alert, AlertSeverity, AlertStatus
from app.models.bgp_anomaly import BGPAnomaly
from app.models.system_setting import SystemSetting
__all__ = [ __all__ = [
"User", "User",
"GPUCluster", "GPUCluster",
"CollectionTask", "CollectionTask",
"DataSnapshot",
"DataSource", "DataSource",
"DataSourceConfig",
"SystemSetting",
"Alert", "Alert",
"AlertSeverity", "AlertSeverity",
"AlertStatus", "AlertStatus",
"BGPAnomaly",
] ]

View File

@@ -5,6 +5,7 @@ from typing import Optional
from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey, Enum as SQLEnum from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey, Enum as SQLEnum
from sqlalchemy.orm import relationship from sqlalchemy.orm import relationship
from app.core.time import to_iso8601_utc
from app.db.session import Base from app.db.session import Base
@@ -50,8 +51,8 @@ class Alert(Base):
"acknowledged_by": self.acknowledged_by, "acknowledged_by": self.acknowledged_by,
"resolved_by": self.resolved_by, "resolved_by": self.resolved_by,
"resolution_notes": self.resolution_notes, "resolution_notes": self.resolution_notes,
"created_at": self.created_at.isoformat() if self.created_at else None, "created_at": to_iso8601_utc(self.created_at),
"updated_at": self.updated_at.isoformat() if self.updated_at else None, "updated_at": to_iso8601_utc(self.updated_at),
"acknowledged_at": self.acknowledged_at.isoformat() if self.acknowledged_at else None, "acknowledged_at": to_iso8601_utc(self.acknowledged_at),
"resolved_at": self.resolved_at.isoformat() if self.resolved_at else None, "resolved_at": to_iso8601_utc(self.resolved_at),
} }

View File

@@ -0,0 +1,58 @@
"""BGP anomaly model for derived routing intelligence."""
from datetime import datetime
from sqlalchemy import Column, DateTime, Float, ForeignKey, Index, Integer, JSON, String, Text
from app.core.time import to_iso8601_utc
from app.db.session import Base
class BGPAnomaly(Base):
__tablename__ = "bgp_anomalies"
id = Column(Integer, primary_key=True, index=True)
snapshot_id = Column(Integer, ForeignKey("data_snapshots.id"), nullable=True, index=True)
task_id = Column(Integer, ForeignKey("collection_tasks.id"), nullable=True, index=True)
source = Column(String(100), nullable=False, index=True)
anomaly_type = Column(String(50), nullable=False, index=True)
severity = Column(String(20), nullable=False, index=True)
status = Column(String(20), nullable=False, default="active", index=True)
entity_key = Column(String(255), nullable=False, index=True)
prefix = Column(String(64), nullable=True, index=True)
origin_asn = Column(Integer, nullable=True, index=True)
new_origin_asn = Column(Integer, nullable=True, index=True)
peer_scope = Column(JSON, default=list)
started_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow, index=True)
ended_at = Column(DateTime(timezone=True), nullable=True)
confidence = Column(Float, nullable=False, default=0.5)
summary = Column(Text, nullable=False)
evidence = Column(JSON, default=dict)
created_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow, index=True)
__table_args__ = (
Index("idx_bgp_anomalies_source_created", "source", "created_at"),
Index("idx_bgp_anomalies_type_status", "anomaly_type", "status"),
)
def to_dict(self) -> dict:
return {
"id": self.id,
"snapshot_id": self.snapshot_id,
"task_id": self.task_id,
"source": self.source,
"anomaly_type": self.anomaly_type,
"severity": self.severity,
"status": self.status,
"entity_key": self.entity_key,
"prefix": self.prefix,
"origin_asn": self.origin_asn,
"new_origin_asn": self.new_origin_asn,
"peer_scope": self.peer_scope or [],
"started_at": to_iso8601_utc(self.started_at),
"ended_at": to_iso8601_utc(self.ended_at),
"confidence": self.confidence,
"summary": self.summary,
"evidence": self.evidence or {},
"created_at": to_iso8601_utc(self.created_at),
}

View File

@@ -1,8 +1,10 @@
"""Collected Data model for storing data from all collectors""" """Collected Data model for storing data from all collectors"""
from sqlalchemy import Column, DateTime, Integer, String, Text, JSON, Index from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, String, Text, JSON, Index
from sqlalchemy.sql import func from sqlalchemy.sql import func
from app.core.collected_data_fields import get_record_field
from app.core.time import to_iso8601_utc
from app.db.session import Base from app.db.session import Base
@@ -12,8 +14,11 @@ class CollectedData(Base):
__tablename__ = "collected_data" __tablename__ = "collected_data"
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
snapshot_id = Column(Integer, ForeignKey("data_snapshots.id"), nullable=True, index=True)
task_id = Column(Integer, ForeignKey("collection_tasks.id"), nullable=True, index=True)
source = Column(String(100), nullable=False, index=True) # e.g., "top500", "huggingface_models" source = Column(String(100), nullable=False, index=True) # e.g., "top500", "huggingface_models"
source_id = Column(String(100), index=True) # Original ID from source, e.g., "rank_1" source_id = Column(String(100), index=True) # Original ID from source, e.g., "rank_1"
entity_key = Column(String(255), index=True)
data_type = Column( data_type = Column(
String(50), nullable=False, index=True String(50), nullable=False, index=True
) # e.g., "supercomputer", "model", "dataset" ) # e.g., "supercomputer", "model", "dataset"
@@ -23,16 +28,6 @@ class CollectedData(Base):
title = Column(String(500)) title = Column(String(500))
description = Column(Text) description = Column(Text)
# Location data (for geo visualization)
country = Column(String(100))
city = Column(String(100))
latitude = Column(String(50))
longitude = Column(String(50))
# Performance metrics
value = Column(String(100)) # Generic value field (Rmax, Rpeak, etc.)
unit = Column(String(20))
# Additional metadata as JSON # Additional metadata as JSON
extra_data = Column( extra_data = Column(
"metadata", JSON, default={} "metadata", JSON, default={}
@@ -44,11 +39,17 @@ class CollectedData(Base):
# Status # Status
is_valid = Column(Integer, default=1) # 1=valid, 0=invalid is_valid = Column(Integer, default=1) # 1=valid, 0=invalid
is_current = Column(Boolean, default=True, index=True)
previous_record_id = Column(Integer, ForeignKey("collected_data.id"), nullable=True, index=True)
change_type = Column(String(20), nullable=True)
change_summary = Column(JSON, default={})
deleted_at = Column(DateTime(timezone=True), nullable=True)
# Indexes for common queries # Indexes for common queries
__table_args__ = ( __table_args__ = (
Index("idx_collected_data_source_collected", "source", "collected_at"), Index("idx_collected_data_source_collected", "source", "collected_at"),
Index("idx_collected_data_source_type", "source", "data_type"), Index("idx_collected_data_source_type", "source", "data_type"),
Index("idx_collected_data_source_source_id", "source", "source_id"),
) )
def __repr__(self): def __repr__(self):
@@ -58,23 +59,27 @@ class CollectedData(Base):
"""Convert to dictionary""" """Convert to dictionary"""
return { return {
"id": self.id, "id": self.id,
"snapshot_id": self.snapshot_id,
"task_id": self.task_id,
"source": self.source, "source": self.source,
"source_id": self.source_id, "source_id": self.source_id,
"entity_key": self.entity_key,
"data_type": self.data_type, "data_type": self.data_type,
"name": self.name, "name": self.name,
"title": self.title, "title": self.title,
"description": self.description, "description": self.description,
"country": self.country, "country": get_record_field(self, "country"),
"city": self.city, "city": get_record_field(self, "city"),
"latitude": self.latitude, "latitude": get_record_field(self, "latitude"),
"longitude": self.longitude, "longitude": get_record_field(self, "longitude"),
"value": self.value, "value": get_record_field(self, "value"),
"unit": self.unit, "unit": get_record_field(self, "unit"),
"metadata": self.extra_data, "metadata": self.extra_data,
"collected_at": self.collected_at.isoformat() "collected_at": to_iso8601_utc(self.collected_at),
if self.collected_at is not None "reference_date": to_iso8601_utc(self.reference_date),
else None, "is_current": self.is_current,
"reference_date": self.reference_date.isoformat() "previous_record_id": self.previous_record_id,
if self.reference_date is not None "change_type": self.change_type,
else None, "change_summary": self.change_summary,
"deleted_at": to_iso8601_utc(self.deleted_at),
} }

View File

@@ -0,0 +1,26 @@
from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, JSON, String
from sqlalchemy.sql import func
from app.db.session import Base
class DataSnapshot(Base):
__tablename__ = "data_snapshots"
id = Column(Integer, primary_key=True, autoincrement=True)
datasource_id = Column(Integer, nullable=False, index=True)
task_id = Column(Integer, ForeignKey("collection_tasks.id"), nullable=True, index=True)
source = Column(String(100), nullable=False, index=True)
snapshot_key = Column(String(100), nullable=True, index=True)
reference_date = Column(DateTime(timezone=True), nullable=True)
started_at = Column(DateTime(timezone=True), server_default=func.now())
completed_at = Column(DateTime(timezone=True), nullable=True)
record_count = Column(Integer, default=0)
status = Column(String(20), nullable=False, default="running")
is_current = Column(Boolean, default=True, index=True)
parent_snapshot_id = Column(Integer, ForeignKey("data_snapshots.id"), nullable=True, index=True)
summary = Column(JSON, default={})
created_at = Column(DateTime(timezone=True), server_default=func.now())
def __repr__(self):
return f"<DataSnapshot {self.id}: {self.source}/{self.status}>"

View File

@@ -0,0 +1,19 @@
"""Persistent system settings model."""
from sqlalchemy import JSON, Column, DateTime, Integer, String, UniqueConstraint
from sqlalchemy.sql import func
from app.db.session import Base
class SystemSetting(Base):
__tablename__ = "system_settings"
__table_args__ = (UniqueConstraint("category", name="uq_system_settings_category"),)
id = Column(Integer, primary_key=True, autoincrement=True)
category = Column(String(50), nullable=False)
payload = Column(JSON, nullable=False, default={})
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
def __repr__(self):
return f"<SystemSetting {self.category}>"

View File

@@ -12,6 +12,7 @@ class CollectionTask(Base):
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
datasource_id = Column(Integer, nullable=False, index=True) datasource_id = Column(Integer, nullable=False, index=True)
status = Column(String(20), nullable=False) # pending, running, success, failed, cancelled status = Column(String(20), nullable=False) # pending, running, success, failed, cancelled
phase = Column(String(30), default="queued")
started_at = Column(DateTime(timezone=True)) started_at = Column(DateTime(timezone=True))
completed_at = Column(DateTime(timezone=True)) completed_at = Column(DateTime(timezone=True))
records_processed = Column(Integer, default=0) records_processed = Column(Integer, default=0)

View File

@@ -28,6 +28,10 @@ from app.services.collectors.arcgis_cables import ArcGISCableCollector
from app.services.collectors.fao_landing import FAOLandingPointCollector from app.services.collectors.fao_landing import FAOLandingPointCollector
from app.services.collectors.arcgis_landing import ArcGISLandingPointCollector from app.services.collectors.arcgis_landing import ArcGISLandingPointCollector
from app.services.collectors.arcgis_relation import ArcGISCableLandingRelationCollector from app.services.collectors.arcgis_relation import ArcGISCableLandingRelationCollector
from app.services.collectors.spacetrack import SpaceTrackTLECollector
from app.services.collectors.celestrak import CelesTrakTLECollector
from app.services.collectors.ris_live import RISLiveCollector
from app.services.collectors.bgpstream import BGPStreamBackfillCollector
collector_registry.register(TOP500Collector()) collector_registry.register(TOP500Collector())
collector_registry.register(EpochAIGPUCollector()) collector_registry.register(EpochAIGPUCollector())
@@ -47,3 +51,7 @@ collector_registry.register(ArcGISCableCollector())
collector_registry.register(FAOLandingPointCollector()) collector_registry.register(FAOLandingPointCollector())
collector_registry.register(ArcGISLandingPointCollector()) collector_registry.register(ArcGISLandingPointCollector())
collector_registry.register(ArcGISCableLandingRelationCollector()) collector_registry.register(ArcGISCableLandingRelationCollector())
collector_registry.register(SpaceTrackTLECollector())
collector_registry.register(CelesTrakTLECollector())
collector_registry.register(RISLiveCollector())
collector_registry.register(BGPStreamBackfillCollector())

View File

@@ -5,10 +5,12 @@ Collects submarine cable data from ArcGIS GeoJSON API.
import json import json
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime from datetime import UTC, datetime
import httpx import httpx
from app.services.collectors.base import BaseCollector from app.services.collectors.base import BaseCollector
from app.core.data_sources import get_data_sources_config
class ArcGISCableCollector(BaseCollector): class ArcGISCableCollector(BaseCollector):
@@ -18,7 +20,14 @@ class ArcGISCableCollector(BaseCollector):
frequency_hours = 168 frequency_hours = 168
data_type = "submarine_cable" data_type = "submarine_cable"
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/2/query" @property
def base_url(self) -> str:
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_cables")
async def fetch(self) -> List[Dict[str, Any]]: async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"} params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
@@ -75,7 +84,7 @@ class ArcGISCableCollector(BaseCollector):
"color": props.get("color"), "color": props.get("color"),
"route_coordinates": route_coordinates, "route_coordinates": route_coordinates,
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
result.append(entry) result.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):

View File

@@ -1,12 +1,10 @@
"""ArcGIS Landing Points Collector
Collects landing point data from ArcGIS GeoJSON API.
"""
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime from datetime import UTC, datetime
import httpx
from app.services.collectors.base import BaseCollector from app.services.collectors.base import BaseCollector
from app.core.data_sources import get_data_sources_config
class ArcGISLandingPointCollector(BaseCollector): class ArcGISLandingPointCollector(BaseCollector):
@@ -16,21 +14,23 @@ class ArcGISLandingPointCollector(BaseCollector):
frequency_hours = 168 frequency_hours = 168
data_type = "landing_point" data_type = "landing_point"
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/1/query" @property
def base_url(self) -> str:
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_landing_points")
async def fetch(self) -> List[Dict[str, Any]]: async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"} params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
async with self._get_client() as client: async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(self.base_url, params=params) response = await client.get(self.base_url, params=params)
response.raise_for_status() response.raise_for_status()
return self.parse_response(response.json()) return self.parse_response(response.json())
def _get_client(self):
import httpx
return httpx.AsyncClient(timeout=60.0)
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
result = [] result = []
@@ -39,8 +39,13 @@ class ArcGISLandingPointCollector(BaseCollector):
props = feature.get("properties", {}) props = feature.get("properties", {})
geometry = feature.get("geometry", {}) geometry = feature.get("geometry", {})
lat = geometry.get("y") if geometry else None if geometry.get("type") == "Point":
lon = geometry.get("x") if geometry else None coords = geometry.get("coordinates", [])
lon = coords[0] if len(coords) > 0 else None
lat = coords[1] if len(coords) > 1 else None
else:
lat = geometry.get("y") if geometry else None
lon = geometry.get("x") if geometry else None
try: try:
entry = { entry = {
@@ -54,6 +59,7 @@ class ArcGISLandingPointCollector(BaseCollector):
"unit": "", "unit": "",
"metadata": { "metadata": {
"objectid": props.get("OBJECTID"), "objectid": props.get("OBJECTID"),
"city_id": props.get("city_id"),
"cable_id": props.get("cable_id"), "cable_id": props.get("cable_id"),
"cable_name": props.get("cable_name"), "cable_name": props.get("cable_name"),
"facility": props.get("facility"), "facility": props.get("facility"),
@@ -61,7 +67,7 @@ class ArcGISLandingPointCollector(BaseCollector):
"status": props.get("status"), "status": props.get("status"),
"landing_point_id": props.get("landing_point_id"), "landing_point_id": props.get("landing_point_id"),
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
result.append(entry) result.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):

View File

@@ -1,6 +1,10 @@
from typing import Dict, Any, List import asyncio
from datetime import datetime from datetime import UTC, datetime
from typing import Any, Dict, List, Optional
import httpx
from app.core.data_sources import get_data_sources_config
from app.services.collectors.base import BaseCollector from app.services.collectors.base import BaseCollector
@@ -11,45 +15,135 @@ class ArcGISCableLandingRelationCollector(BaseCollector):
frequency_hours = 168 frequency_hours = 168
data_type = "cable_landing_relation" data_type = "cable_landing_relation"
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/3/query" @property
def base_url(self) -> str:
if self._resolved_url:
return self._resolved_url
config = get_data_sources_config()
return config.get_yaml_url("arcgis_cable_landing_relation")
def _layer_url(self, layer_id: int) -> str:
if "/FeatureServer/" not in self.base_url:
return self.base_url
prefix = self.base_url.split("/FeatureServer/")[0]
return f"{prefix}/FeatureServer/{layer_id}/query"
async def _fetch_layer_attributes(
self, client: httpx.AsyncClient, layer_id: int
) -> List[Dict[str, Any]]:
response = await client.get(
self._layer_url(layer_id),
params={
"where": "1=1",
"outFields": "*",
"returnGeometry": "false",
"f": "json",
},
)
response.raise_for_status()
data = response.json()
return [feature.get("attributes", {}) for feature in data.get("features", [])]
async def _fetch_relation_features(self, client: httpx.AsyncClient) -> List[Dict[str, Any]]:
response = await client.get(
self.base_url,
params={
"where": "1=1",
"outFields": "*",
"returnGeometry": "true",
"f": "geojson",
},
)
response.raise_for_status()
data = response.json()
return data.get("features", [])
async def fetch(self) -> List[Dict[str, Any]]: async def fetch(self) -> List[Dict[str, Any]]:
import httpx
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
async with httpx.AsyncClient(timeout=60.0) as client: async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(self.base_url, params=params) relation_features, landing_rows, cable_rows = await asyncio.gather(
response.raise_for_status() self._fetch_relation_features(client),
return self.parse_response(response.json()) self._fetch_layer_attributes(client, 1),
self._fetch_layer_attributes(client, 2),
)
return self.parse_response(relation_features, landing_rows, cable_rows)
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: def _build_landing_lookup(self, landing_rows: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
result = [] lookup: Dict[int, Dict[str, Any]] = {}
for row in landing_rows:
city_id = row.get("city_id")
if city_id is None:
continue
lookup[int(city_id)] = {
"landing_point_id": row.get("landing_point_id") or city_id,
"landing_point_name": row.get("Name") or row.get("name") or "",
"facility": row.get("facility") or "",
"status": row.get("status") or "",
"country": row.get("country") or "",
}
return lookup
features = data.get("features", []) def _build_cable_lookup(self, cable_rows: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
for feature in features: lookup: Dict[int, Dict[str, Any]] = {}
for row in cable_rows:
cable_id = row.get("cable_id")
if cable_id is None:
continue
lookup[int(cable_id)] = {
"cable_name": row.get("Name") or "",
"status": row.get("status") or "active",
}
return lookup
def parse_response(
self,
relation_features: List[Dict[str, Any]],
landing_rows: List[Dict[str, Any]],
cable_rows: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
result: List[Dict[str, Any]] = []
landing_lookup = self._build_landing_lookup(landing_rows)
cable_lookup = self._build_cable_lookup(cable_rows)
for feature in relation_features:
props = feature.get("properties", {}) props = feature.get("properties", {})
try: try:
city_id = props.get("city_id")
cable_id = props.get("cable_id")
landing_info = landing_lookup.get(int(city_id), {}) if city_id is not None else {}
cable_info = cable_lookup.get(int(cable_id), {}) if cable_id is not None else {}
cable_name = cable_info.get("cable_name") or props.get("cable_name") or "Unknown"
landing_point_name = (
landing_info.get("landing_point_name")
or props.get("landing_point_name")
or "Unknown"
)
facility = landing_info.get("facility") or props.get("facility") or "-"
status = cable_info.get("status") or landing_info.get("status") or props.get("status") or "-"
country = landing_info.get("country") or props.get("country") or ""
landing_point_id = landing_info.get("landing_point_id") or props.get("landing_point_id") or city_id
entry = { entry = {
"source_id": f"arcgis_relation_{props.get('OBJECTID', props.get('id', ''))}", "source_id": f"arcgis_relation_{props.get('OBJECTID', props.get('id', ''))}",
"name": f"{props.get('cable_name', 'Unknown')} - {props.get('landing_point_name', 'Unknown')}", "name": f"{cable_name} - {landing_point_name}",
"country": props.get("country", ""), "country": country,
"city": props.get("landing_point_name", ""), "city": landing_point_name,
"latitude": str(props.get("latitude", "")) if props.get("latitude") else "", "latitude": str(props.get("latitude", "")) if props.get("latitude") else "",
"longitude": str(props.get("longitude", "")) if props.get("longitude") else "", "longitude": str(props.get("longitude", "")) if props.get("longitude") else "",
"value": "", "value": "",
"unit": "", "unit": "",
"metadata": { "metadata": {
"objectid": props.get("OBJECTID"), "objectid": props.get("OBJECTID"),
"cable_id": props.get("cable_id"), "city_id": city_id,
"cable_name": props.get("cable_name"), "cable_id": cable_id,
"landing_point_id": props.get("landing_point_id"), "cable_name": cable_name,
"landing_point_name": props.get("landing_point_name"), "landing_point_id": landing_point_id,
"facility": props.get("facility"), "landing_point_name": landing_point_name,
"status": props.get("status"), "facility": facility,
"status": status,
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
result.append(entry) result.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):

View File

@@ -2,12 +2,16 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, List, Any, Optional from typing import Dict, List, Any, Optional
from datetime import datetime from datetime import UTC, datetime
import httpx import httpx
from sqlalchemy import text from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.core.collected_data_fields import build_dynamic_metadata, get_record_field
from app.core.config import settings from app.core.config import settings
from app.core.countries import normalize_country
from app.core.time import to_iso8601_utc
from app.core.websocket.broadcaster import broadcaster
class BaseCollector(ABC): class BaseCollector(ABC):
@@ -18,19 +22,68 @@ class BaseCollector(ABC):
module: str = "L1" module: str = "L1"
frequency_hours: int = 4 frequency_hours: int = 4
data_type: str = "generic" data_type: str = "generic"
fail_on_empty: bool = False
def __init__(self): def __init__(self):
self._current_task = None self._current_task = None
self._db_session = None self._db_session = None
self._datasource_id = 1 self._datasource_id = 1
self._resolved_url: Optional[str] = None
self._last_broadcast_progress: Optional[int] = None
def update_progress(self, records_processed: int): async def resolve_url(self, db: AsyncSession) -> None:
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
self._resolved_url = await config.get_url(self.name, db)
async def _publish_task_update(self, force: bool = False):
if not self._current_task:
return
progress = float(self._current_task.progress or 0.0)
rounded_progress = int(round(progress))
if not force and self._last_broadcast_progress == rounded_progress:
return
await broadcaster.broadcast_datasource_task_update(
{
"datasource_id": getattr(self, "_datasource_id", None),
"collector_name": self.name,
"task_id": self._current_task.id,
"status": self._current_task.status,
"phase": self._current_task.phase,
"progress": progress,
"records_processed": self._current_task.records_processed,
"total_records": self._current_task.total_records,
"started_at": to_iso8601_utc(self._current_task.started_at),
"completed_at": to_iso8601_utc(self._current_task.completed_at),
"error_message": self._current_task.error_message,
}
)
self._last_broadcast_progress = rounded_progress
async def update_progress(self, records_processed: int, *, commit: bool = False, force: bool = False):
"""Update task progress - call this during data processing""" """Update task progress - call this during data processing"""
if self._current_task and self._db_session and self._current_task.total_records > 0: if self._current_task and self._db_session:
self._current_task.records_processed = records_processed self._current_task.records_processed = records_processed
self._current_task.progress = ( if self._current_task.total_records and self._current_task.total_records > 0:
records_processed / self._current_task.total_records self._current_task.progress = (
) * 100 records_processed / self._current_task.total_records
) * 100
else:
self._current_task.progress = 0.0
if commit:
await self._db_session.commit()
await self._publish_task_update(force=force)
async def set_phase(self, phase: str):
if self._current_task and self._db_session:
self._current_task.phase = phase
await self._db_session.commit()
await self._publish_task_update(force=True)
@abstractmethod @abstractmethod
async def fetch(self) -> List[Dict[str, Any]]: async def fetch(self) -> List[Dict[str, Any]]:
@@ -41,14 +94,87 @@ class BaseCollector(ABC):
"""Transform raw data to internal format (default: pass through)""" """Transform raw data to internal format (default: pass through)"""
return raw_data return raw_data
def _parse_reference_date(self, value: Any) -> Optional[datetime]:
if not value:
return None
if isinstance(value, datetime):
return value
if isinstance(value, str):
return datetime.fromisoformat(value.replace("Z", "+00:00"))
return None
def _build_comparable_payload(self, record: Any) -> Dict[str, Any]:
return {
"name": getattr(record, "name", None),
"title": getattr(record, "title", None),
"description": getattr(record, "description", None),
"country": get_record_field(record, "country"),
"city": get_record_field(record, "city"),
"latitude": get_record_field(record, "latitude"),
"longitude": get_record_field(record, "longitude"),
"value": get_record_field(record, "value"),
"unit": get_record_field(record, "unit"),
"metadata": getattr(record, "extra_data", None) or {},
"reference_date": (
getattr(record, "reference_date", None).isoformat()
if getattr(record, "reference_date", None)
else None
),
}
async def _create_snapshot(
self,
db: AsyncSession,
task_id: int,
data: List[Dict[str, Any]],
started_at: datetime,
) -> int:
from app.models.data_snapshot import DataSnapshot
reference_dates = [
parsed
for parsed in (self._parse_reference_date(item.get("reference_date")) for item in data)
if parsed is not None
]
reference_date = max(reference_dates) if reference_dates else None
result = await db.execute(
select(DataSnapshot)
.where(DataSnapshot.source == self.name, DataSnapshot.is_current == True)
.order_by(DataSnapshot.completed_at.desc().nullslast(), DataSnapshot.id.desc())
.limit(1)
)
previous_snapshot = result.scalar_one_or_none()
snapshot = DataSnapshot(
datasource_id=getattr(self, "_datasource_id", 1),
task_id=task_id,
source=self.name,
snapshot_key=f"{self.name}:{task_id}",
reference_date=reference_date,
started_at=started_at,
status="running",
is_current=True,
parent_snapshot_id=previous_snapshot.id if previous_snapshot else None,
summary={},
)
db.add(snapshot)
if previous_snapshot:
previous_snapshot.is_current = False
await db.commit()
return snapshot.id
async def run(self, db: AsyncSession) -> Dict[str, Any]: async def run(self, db: AsyncSession) -> Dict[str, Any]:
"""Full pipeline: fetch -> transform -> save""" """Full pipeline: fetch -> transform -> save"""
from app.services.collectors.registry import collector_registry from app.services.collectors.registry import collector_registry
from app.models.task import CollectionTask from app.models.task import CollectionTask
from app.models.collected_data import CollectedData from app.models.data_snapshot import DataSnapshot
start_time = datetime.utcnow() start_time = datetime.now(UTC)
datasource_id = getattr(self, "_datasource_id", 1) datasource_id = getattr(self, "_datasource_id", 1)
snapshot_id: Optional[int] = None
if not collector_registry.is_active(self.name): if not collector_registry.is_active(self.name):
return {"status": "skipped", "reason": "Collector is disabled"} return {"status": "skipped", "reason": "Collector is disabled"}
@@ -56,6 +182,7 @@ class BaseCollector(ABC):
task = CollectionTask( task = CollectionTask(
datasource_id=datasource_id, datasource_id=datasource_id,
status="running", status="running",
phase="queued",
started_at=start_time, started_at=start_time,
) )
db.add(task) db.add(task)
@@ -64,88 +191,221 @@ class BaseCollector(ABC):
self._current_task = task self._current_task = task
self._db_session = db self._db_session = db
self._last_broadcast_progress = None
await self.resolve_url(db)
await self._publish_task_update(force=True)
try: try:
await self.set_phase("fetching")
raw_data = await self.fetch() raw_data = await self.fetch()
task.total_records = len(raw_data) task.total_records = len(raw_data)
await db.commit() await db.commit()
await self._publish_task_update(force=True)
if self.fail_on_empty and not raw_data:
raise RuntimeError(f"Collector {self.name} returned no data")
await self.set_phase("transforming")
data = self.transform(raw_data) data = self.transform(raw_data)
snapshot_id = await self._create_snapshot(db, task_id, data, start_time)
records_count = await self._save_data(db, data) await self.set_phase("saving")
records_count = await self._save_data(db, data, task_id=task_id, snapshot_id=snapshot_id)
task.status = "success" task.status = "success"
task.phase = "completed"
task.records_processed = records_count task.records_processed = records_count
task.progress = 100.0 task.progress = 100.0
task.completed_at = datetime.utcnow() task.completed_at = datetime.now(UTC)
await db.commit() await db.commit()
await self._publish_task_update(force=True)
return { return {
"status": "success", "status": "success",
"task_id": task_id, "task_id": task_id,
"records_processed": records_count, "records_processed": records_count,
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(), "execution_time_seconds": (datetime.now(UTC) - start_time).total_seconds(),
} }
except Exception as e: except Exception as e:
# Log task failure
task.status = "failed" task.status = "failed"
task.phase = "failed"
task.error_message = str(e) task.error_message = str(e)
task.completed_at = datetime.utcnow() task.completed_at = datetime.now(UTC)
if snapshot_id is not None:
snapshot = await db.get(DataSnapshot, snapshot_id)
if snapshot:
snapshot.status = "failed"
snapshot.completed_at = datetime.now(UTC)
snapshot.summary = {"error": str(e)}
await db.commit() await db.commit()
await self._publish_task_update(force=True)
return { return {
"status": "failed", "status": "failed",
"task_id": task_id, "task_id": task_id,
"error": str(e), "error": str(e),
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(), "execution_time_seconds": (datetime.now(UTC) - start_time).total_seconds(),
} }
async def _save_data(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int: async def _save_data(
self,
db: AsyncSession,
data: List[Dict[str, Any]],
task_id: Optional[int] = None,
snapshot_id: Optional[int] = None,
) -> int:
"""Save transformed data to database""" """Save transformed data to database"""
from app.models.collected_data import CollectedData from app.models.collected_data import CollectedData
from app.models.data_snapshot import DataSnapshot
if not data: if not data:
if snapshot_id is not None:
snapshot = await db.get(DataSnapshot, snapshot_id)
if snapshot:
snapshot.record_count = 0
snapshot.summary = {"created": 0, "updated": 0, "unchanged": 0}
snapshot.status = "success"
snapshot.completed_at = datetime.now(UTC)
await db.commit()
return 0 return 0
collected_at = datetime.utcnow() collected_at = datetime.now(UTC)
records_added = 0 records_added = 0
created_count = 0
updated_count = 0
unchanged_count = 0
seen_entity_keys: set[str] = set()
previous_current_keys: set[str] = set()
previous_current_result = await db.execute(
select(CollectedData.entity_key).where(
CollectedData.source == self.name,
CollectedData.is_current == True,
)
)
previous_current_keys = {row[0] for row in previous_current_result.fetchall() if row[0]}
for i, item in enumerate(data): for i, item in enumerate(data):
print(
f"DEBUG: Saving item {i}: name={item.get('name')}, metadata={item.get('metadata', 'NOT FOUND')}"
)
raw_metadata = item.get("metadata", {})
extra_data = build_dynamic_metadata(
raw_metadata,
country=item.get("country"),
city=item.get("city"),
latitude=item.get("latitude"),
longitude=item.get("longitude"),
value=item.get("value"),
unit=item.get("unit"),
)
normalized_country = normalize_country(item.get("country"))
if normalized_country is not None:
extra_data["country"] = normalized_country
if item.get("country") and normalized_country != item.get("country"):
extra_data["raw_country"] = item.get("country")
if normalized_country is None:
extra_data["country_validation"] = "invalid"
source_id = item.get("source_id") or item.get("id")
reference_date = (
self._parse_reference_date(item.get("reference_date"))
)
source_id_str = str(source_id) if source_id is not None else None
entity_key = f"{self.name}:{source_id_str}" if source_id_str else f"{self.name}:{i}"
previous_record = None
if entity_key and entity_key not in seen_entity_keys:
result = await db.execute(
select(CollectedData)
.where(
CollectedData.source == self.name,
CollectedData.entity_key == entity_key,
CollectedData.is_current == True,
)
.order_by(CollectedData.collected_at.desc().nullslast(), CollectedData.id.desc())
)
previous_records = result.scalars().all()
if previous_records:
previous_record = previous_records[0]
for old_record in previous_records:
old_record.is_current = False
record = CollectedData( record = CollectedData(
snapshot_id=snapshot_id,
task_id=task_id,
source=self.name, source=self.name,
source_id=item.get("source_id") or item.get("id"), source_id=source_id_str,
entity_key=entity_key,
data_type=self.data_type, data_type=self.data_type,
name=item.get("name"), name=item.get("name"),
title=item.get("title"), title=item.get("title"),
description=item.get("description"), description=item.get("description"),
country=item.get("country"), extra_data=extra_data,
city=item.get("city"),
latitude=str(item.get("latitude", ""))
if item.get("latitude") is not None
else None,
longitude=str(item.get("longitude", ""))
if item.get("longitude") is not None
else None,
value=item.get("value"),
unit=item.get("unit"),
extra_data=item.get("metadata", {}),
collected_at=collected_at, collected_at=collected_at,
reference_date=datetime.fromisoformat( reference_date=reference_date,
item.get("reference_date").replace("Z", "+00:00")
)
if item.get("reference_date")
else None,
is_valid=1, is_valid=1,
is_current=True,
previous_record_id=previous_record.id if previous_record else None,
deleted_at=None,
) )
if previous_record is None:
record.change_type = "created"
record.change_summary = {}
created_count += 1
else:
previous_payload = self._build_comparable_payload(previous_record)
current_payload = self._build_comparable_payload(record)
if current_payload == previous_payload:
record.change_type = "unchanged"
record.change_summary = {}
unchanged_count += 1
else:
changed_fields = [
key for key in current_payload.keys() if current_payload[key] != previous_payload.get(key)
]
record.change_type = "updated"
record.change_summary = {"changed_fields": changed_fields}
updated_count += 1
db.add(record) db.add(record)
seen_entity_keys.add(entity_key)
records_added += 1 records_added += 1
if i % 100 == 0: if i % 100 == 0:
self.update_progress(i + 1) await self.update_progress(i + 1, commit=True)
await db.commit()
if snapshot_id is not None:
deleted_keys = previous_current_keys - seen_entity_keys
await db.execute(
text(
"""
UPDATE collected_data
SET is_current = FALSE
WHERE source = :source
AND snapshot_id IS DISTINCT FROM :snapshot_id
AND COALESCE(is_current, TRUE) = TRUE
"""
),
{"source": self.name, "snapshot_id": snapshot_id},
)
snapshot = await db.get(DataSnapshot, snapshot_id)
if snapshot:
snapshot.record_count = records_added
snapshot.status = "success"
snapshot.completed_at = datetime.now(UTC)
snapshot.summary = {
"created": created_count,
"updated": updated_count,
"unchanged": unchanged_count,
"deleted": len(deleted_keys),
}
await db.commit() await db.commit()
self.update_progress(len(data)) await self.update_progress(len(data), force=True)
return records_added return records_added
async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int: async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:
@@ -192,8 +452,8 @@ async def log_task(
status=status, status=status,
records_processed=records_processed, records_processed=records_processed,
error_message=error_message, error_message=error_message,
started_at=datetime.utcnow(), started_at=datetime.now(UTC),
completed_at=datetime.utcnow(), completed_at=datetime.now(UTC),
) )
db.add(task) db.add(task)
await db.commit() await db.commit()

View File

@@ -0,0 +1,313 @@
"""Shared helpers for BGP collectors."""
from __future__ import annotations
import hashlib
import ipaddress
from collections import Counter, defaultdict
from datetime import UTC, datetime
from typing import Any
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.bgp_anomaly import BGPAnomaly
from app.models.collected_data import CollectedData
RIPE_RIS_COLLECTOR_COORDS: dict[str, dict[str, Any]] = {
"rrc00": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041},
"rrc01": {"city": "London", "country": "United Kingdom", "latitude": 51.5072, "longitude": -0.1276},
"rrc03": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041},
"rrc04": {"city": "Geneva", "country": "Switzerland", "latitude": 46.2044, "longitude": 6.1432},
"rrc05": {"city": "Vienna", "country": "Austria", "latitude": 48.2082, "longitude": 16.3738},
"rrc06": {"city": "Otemachi", "country": "Japan", "latitude": 35.686, "longitude": 139.7671},
"rrc07": {"city": "Stockholm", "country": "Sweden", "latitude": 59.3293, "longitude": 18.0686},
"rrc10": {"city": "Milan", "country": "Italy", "latitude": 45.4642, "longitude": 9.19},
"rrc11": {"city": "New York", "country": "United States", "latitude": 40.7128, "longitude": -74.006},
"rrc12": {"city": "Frankfurt", "country": "Germany", "latitude": 50.1109, "longitude": 8.6821},
"rrc13": {"city": "Moscow", "country": "Russia", "latitude": 55.7558, "longitude": 37.6173},
"rrc14": {"city": "Palo Alto", "country": "United States", "latitude": 37.4419, "longitude": -122.143},
"rrc15": {"city": "Sao Paulo", "country": "Brazil", "latitude": -23.5558, "longitude": -46.6396},
"rrc16": {"city": "Miami", "country": "United States", "latitude": 25.7617, "longitude": -80.1918},
"rrc18": {"city": "Barcelona", "country": "Spain", "latitude": 41.3874, "longitude": 2.1686},
"rrc19": {"city": "Johannesburg", "country": "South Africa", "latitude": -26.2041, "longitude": 28.0473},
"rrc20": {"city": "Zurich", "country": "Switzerland", "latitude": 47.3769, "longitude": 8.5417},
"rrc21": {"city": "Paris", "country": "France", "latitude": 48.8566, "longitude": 2.3522},
"rrc22": {"city": "Bucharest", "country": "Romania", "latitude": 44.4268, "longitude": 26.1025},
"rrc23": {"city": "Singapore", "country": "Singapore", "latitude": 1.3521, "longitude": 103.8198},
"rrc24": {"city": "Montevideo", "country": "Uruguay", "latitude": -34.9011, "longitude": -56.1645},
"rrc25": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041},
"rrc26": {"city": "Dubai", "country": "United Arab Emirates", "latitude": 25.2048, "longitude": 55.2708},
}
def _safe_int(value: Any) -> int | None:
try:
if value in (None, ""):
return None
return int(value)
except (TypeError, ValueError):
return None
def _parse_timestamp(value: Any) -> datetime:
if isinstance(value, datetime):
return value.astimezone(UTC) if value.tzinfo else value.replace(tzinfo=UTC)
if isinstance(value, (int, float)):
return datetime.fromtimestamp(value, tz=UTC)
if isinstance(value, str) and value:
normalized = value.replace("Z", "+00:00")
parsed = datetime.fromisoformat(normalized)
return parsed.astimezone(UTC) if parsed.tzinfo else parsed.replace(tzinfo=UTC)
return datetime.now(UTC)
def _normalize_as_path(raw_path: Any) -> list[int]:
if raw_path in (None, ""):
return []
if isinstance(raw_path, list):
return [asn for asn in (_safe_int(item) for item in raw_path) if asn is not None]
if isinstance(raw_path, str):
parts = raw_path.replace("{", "").replace("}", "").split()
return [asn for asn in (_safe_int(item) for item in parts) if asn is not None]
return []
def normalize_bgp_event(payload: dict[str, Any], *, project: str) -> dict[str, Any]:
raw_message = payload.get("raw_message", payload)
raw_path = (
payload.get("path")
or payload.get("as_path")
or payload.get("attrs", {}).get("path")
or payload.get("attrs", {}).get("as_path")
or []
)
as_path = _normalize_as_path(raw_path)
raw_type = str(payload.get("event_type") or payload.get("type") or payload.get("msg_type") or "").lower()
if raw_type in {"a", "announce", "announcement"}:
event_type = "announcement"
elif raw_type in {"w", "withdraw", "withdrawal"}:
event_type = "withdrawal"
elif raw_type in {"r", "rib"}:
event_type = "rib"
else:
event_type = raw_type or "announcement"
prefix = str(payload.get("prefix") or payload.get("prefixes") or payload.get("target_prefix") or "").strip()
if prefix.startswith("[") and prefix.endswith("]"):
prefix = prefix[1:-1]
timestamp = _parse_timestamp(payload.get("timestamp") or payload.get("time") or payload.get("ts"))
collector = str(payload.get("collector") or payload.get("host") or payload.get("router") or "unknown")
peer_asn = _safe_int(payload.get("peer_asn") or payload.get("peer"))
origin_asn = _safe_int(payload.get("origin_asn")) or (as_path[-1] if as_path else None)
source_material = "|".join(
[
collector,
str(peer_asn or ""),
prefix,
event_type,
timestamp.isoformat(),
",".join(str(asn) for asn in as_path),
]
)
source_id = hashlib.sha1(source_material.encode("utf-8")).hexdigest()[:24]
prefix_length = None
is_more_specific = False
if prefix:
try:
network = ipaddress.ip_network(prefix, strict=False)
prefix_length = int(network.prefixlen)
is_more_specific = prefix_length > (24 if network.version == 4 else 48)
except ValueError:
prefix_length = None
collector_location = RIPE_RIS_COLLECTOR_COORDS.get(collector, {})
metadata = {
"project": project,
"collector": collector,
"peer_asn": peer_asn,
"peer_ip": payload.get("peer_ip") or payload.get("peer_address"),
"event_type": event_type,
"prefix": prefix,
"origin_asn": origin_asn,
"as_path": as_path,
"communities": payload.get("communities") or payload.get("attrs", {}).get("communities") or [],
"next_hop": payload.get("next_hop") or payload.get("attrs", {}).get("next_hop"),
"med": payload.get("med") or payload.get("attrs", {}).get("med"),
"local_pref": payload.get("local_pref") or payload.get("attrs", {}).get("local_pref"),
"timestamp": timestamp.isoformat(),
"as_path_length": len(as_path),
"prefix_length": prefix_length,
"is_more_specific": is_more_specific,
"visibility_weight": 1,
"collector_location": collector_location,
"raw_message": raw_message,
}
return {
"source_id": source_id,
"name": prefix or f"{collector}:{event_type}",
"title": f"{event_type} {prefix}".strip(),
"description": f"{collector} observed {event_type} for {prefix}".strip(),
"reference_date": timestamp.isoformat(),
"country": collector_location.get("country"),
"city": collector_location.get("city"),
"latitude": collector_location.get("latitude"),
"longitude": collector_location.get("longitude"),
"metadata": metadata,
}
async def create_bgp_anomalies_for_batch(
db: AsyncSession,
*,
source: str,
snapshot_id: int | None,
task_id: int | None,
events: list[dict[str, Any]],
) -> int:
if not events:
return 0
pending_anomalies: list[BGPAnomaly] = []
prefix_to_origins: defaultdict[str, set[int]] = defaultdict(set)
prefix_to_more_specifics: defaultdict[str, list[dict[str, Any]]] = defaultdict(list)
withdrawal_counter: Counter[tuple[str, int | None]] = Counter()
prefixes = {event["metadata"].get("prefix") for event in events if event.get("metadata", {}).get("prefix")}
previous_origin_map: dict[str, set[int]] = defaultdict(set)
if prefixes:
previous_query = await db.execute(
select(CollectedData).where(
CollectedData.source == source,
CollectedData.snapshot_id != snapshot_id,
CollectedData.extra_data["prefix"].as_string().in_(sorted(prefixes)),
)
)
for record in previous_query.scalars().all():
metadata = record.extra_data or {}
prefix = metadata.get("prefix")
origin = _safe_int(metadata.get("origin_asn"))
if prefix and origin is not None:
previous_origin_map[prefix].add(origin)
for event in events:
metadata = event.get("metadata", {})
prefix = metadata.get("prefix")
origin_asn = _safe_int(metadata.get("origin_asn"))
if not prefix:
continue
if origin_asn is not None:
prefix_to_origins[prefix].add(origin_asn)
if metadata.get("is_more_specific"):
prefix_to_more_specifics[prefix.split("/")[0]].append(event)
if metadata.get("event_type") == "withdrawal":
withdrawal_counter[(prefix, origin_asn)] += 1
for prefix, origins in prefix_to_origins.items():
historic = previous_origin_map.get(prefix, set())
new_origins = sorted(origin for origin in origins if origin not in historic)
if historic and new_origins:
for new_origin in new_origins:
pending_anomalies.append(
BGPAnomaly(
snapshot_id=snapshot_id,
task_id=task_id,
source=source,
anomaly_type="origin_change",
severity="critical",
status="active",
entity_key=f"origin_change:{prefix}:{new_origin}",
prefix=prefix,
origin_asn=sorted(historic)[0],
new_origin_asn=new_origin,
peer_scope=[],
started_at=datetime.now(UTC),
confidence=0.86,
summary=f"Prefix {prefix} is now originated by AS{new_origin}, outside the current baseline.",
evidence={"previous_origins": sorted(historic), "current_origins": sorted(origins)},
)
)
for root_prefix, more_specifics in prefix_to_more_specifics.items():
if len(more_specifics) >= 2:
sample = more_specifics[0]["metadata"]
pending_anomalies.append(
BGPAnomaly(
snapshot_id=snapshot_id,
task_id=task_id,
source=source,
anomaly_type="more_specific_burst",
severity="high",
status="active",
entity_key=f"more_specific_burst:{root_prefix}:{len(more_specifics)}",
prefix=sample.get("prefix"),
origin_asn=_safe_int(sample.get("origin_asn")),
new_origin_asn=None,
peer_scope=sorted(
{
str(item.get("metadata", {}).get("collector") or "")
for item in more_specifics
if item.get("metadata", {}).get("collector")
}
),
started_at=datetime.now(UTC),
confidence=0.72,
summary=f"{len(more_specifics)} more-specific announcements clustered around {root_prefix}.",
evidence={"events": [item.get("metadata") for item in more_specifics[:10]]},
)
)
for (prefix, origin_asn), count in withdrawal_counter.items():
if count >= 3:
pending_anomalies.append(
BGPAnomaly(
snapshot_id=snapshot_id,
task_id=task_id,
source=source,
anomaly_type="mass_withdrawal",
severity="high" if count < 8 else "critical",
status="active",
entity_key=f"mass_withdrawal:{prefix}:{origin_asn}:{count}",
prefix=prefix,
origin_asn=origin_asn,
new_origin_asn=None,
peer_scope=[],
started_at=datetime.now(UTC),
confidence=min(0.55 + (count * 0.05), 0.95),
summary=f"{count} withdrawal events observed for {prefix} in the current ingest window.",
evidence={"withdrawal_count": count},
)
)
if not pending_anomalies:
return 0
existing_result = await db.execute(
select(BGPAnomaly.entity_key).where(
BGPAnomaly.entity_key.in_([item.entity_key for item in pending_anomalies])
)
)
existing_keys = {row[0] for row in existing_result.fetchall()}
created = 0
for anomaly in pending_anomalies:
if anomaly.entity_key in existing_keys:
continue
db.add(anomaly)
created += 1
if created:
await db.commit()
return created

View File

@@ -0,0 +1,120 @@
"""BGPStream backfill collector."""
from __future__ import annotations
import asyncio
import json
import time
import urllib.parse
import urllib.request
from typing import Any
from app.services.collectors.base import BaseCollector
from app.services.collectors.bgp_common import create_bgp_anomalies_for_batch, normalize_bgp_event
class BGPStreamBackfillCollector(BaseCollector):
name = "bgpstream_bgp"
priority = "P1"
module = "L3"
frequency_hours = 6
data_type = "bgp_rib"
fail_on_empty = True
async def fetch(self) -> list[dict[str, Any]]:
if not self._resolved_url:
raise RuntimeError("BGPStream URL is not configured")
return await asyncio.to_thread(self._fetch_resource_windows)
def _fetch_resource_windows(self) -> list[dict[str, Any]]:
end = int(time.time()) - 3600
start = end - 86400
params = [
("projects[]", "routeviews"),
("collectors[]", "route-views2"),
("types[]", "updates"),
("intervals[]", f"{start},{end}"),
]
url = f"{self._resolved_url}/data?{urllib.parse.urlencode(params)}"
request = urllib.request.Request(
url,
headers={"User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)"},
)
with urllib.request.urlopen(request, timeout=30) as response:
body = json.loads(response.read().decode())
if body.get("error"):
raise RuntimeError(f"BGPStream broker error: {body['error']}")
return body.get("data", {}).get("resources", [])
def transform(self, raw_data: list[dict[str, Any]]) -> list[dict[str, Any]]:
transformed: list[dict[str, Any]] = []
for item in raw_data:
if not isinstance(item, dict):
continue
is_broker_window = any(key in item for key in ("filename", "url", "startTime", "start_time"))
if {"collector", "prefix"} <= set(item.keys()) and not is_broker_window:
transformed.append(normalize_bgp_event(item, project="bgpstream"))
continue
# Broker responses provide file windows rather than decoded events.
collector = item.get("collector") or item.get("project") or "bgpstream"
timestamp = item.get("time") or item.get("startTime") or item.get("start_time")
name = item.get("filename") or item.get("url") or f"{collector}-window"
normalized = normalize_bgp_event(
{
"collector": collector,
"event_type": "rib",
"prefix": item.get("prefix") or "historical-window",
"timestamp": timestamp,
"origin_asn": item.get("origin_asn"),
"path": item.get("path") or [],
"raw_message": item,
},
project="bgpstream",
)
transformed.append(
normalized
| {
"name": name,
"title": f"BGPStream {collector}",
"description": "Historical BGPStream backfill window",
"metadata": {
**normalized["metadata"],
"broker_record": item,
},
}
)
self._latest_transformed_batch = transformed
return transformed
async def run(self, db):
result = await super().run(db)
if result.get("status") != "success":
return result
snapshot_id = await self._resolve_snapshot_id(db, result.get("task_id"))
anomaly_count = await create_bgp_anomalies_for_batch(
db,
source=self.name,
snapshot_id=snapshot_id,
task_id=result.get("task_id"),
events=getattr(self, "_latest_transformed_batch", []),
)
result["anomalies_created"] = anomaly_count
return result
async def _resolve_snapshot_id(self, db, task_id: int | None) -> int | None:
if task_id is None:
return None
from sqlalchemy import select
from app.models.data_snapshot import DataSnapshot
result = await db.execute(
select(DataSnapshot.id).where(DataSnapshot.task_id == task_id).order_by(DataSnapshot.id.desc())
)
return result.scalar_one_or_none()

View File

@@ -0,0 +1,115 @@
"""CelesTrak TLE Collector
Collects satellite TLE (Two-Line Element) data from CelesTrak.org.
Free, no authentication required.
"""
import json
from typing import Dict, Any, List
import httpx
from app.core.satellite_tle import build_tle_lines_from_elements
from app.services.collectors.base import BaseCollector
class CelesTrakTLECollector(BaseCollector):
name = "celestrak_tle"
priority = "P2"
module = "L3"
frequency_hours = 24
data_type = "satellite_tle"
@property
def base_url(self) -> str:
return "https://celestrak.org/NORAD/elements/gp.php"
async def fetch(self) -> List[Dict[str, Any]]:
satellite_groups = [
"starlink",
"gps-ops",
"galileo",
"glonass",
"beidou",
"leo",
"geo",
"iridium-next",
]
all_satellites = []
async with httpx.AsyncClient(timeout=120.0) as client:
for group in satellite_groups:
try:
url = f"https://celestrak.org/NORAD/elements/gp.php?GROUP={group}&FORMAT=json"
response = await client.get(url)
if response.status_code == 200:
data = response.json()
if isinstance(data, list):
all_satellites.extend(data)
print(f"CelesTrak: Fetched {len(data)} satellites from group '{group}'")
except Exception as e:
print(f"CelesTrak: Error fetching group '{group}': {e}")
if not all_satellites:
return self._get_sample_data()
print(f"CelesTrak: Total satellites fetched: {len(all_satellites)}")
# Return raw data - base.run() will call transform()
return all_satellites
def transform(self, raw_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
transformed = []
for item in raw_data:
tle_line1, tle_line2 = build_tle_lines_from_elements(
norad_cat_id=item.get("NORAD_CAT_ID"),
epoch=item.get("EPOCH"),
inclination=item.get("INCLINATION"),
raan=item.get("RA_OF_ASC_NODE"),
eccentricity=item.get("ECCENTRICITY"),
arg_of_perigee=item.get("ARG_OF_PERICENTER"),
mean_anomaly=item.get("MEAN_ANOMALY"),
mean_motion=item.get("MEAN_MOTION"),
)
transformed.append(
{
"name": item.get("OBJECT_NAME", "Unknown"),
"reference_date": item.get("EPOCH", ""),
"metadata": {
"norad_cat_id": item.get("NORAD_CAT_ID"),
"international_designator": item.get("OBJECT_ID"),
"epoch": item.get("EPOCH"),
"mean_motion": item.get("MEAN_MOTION"),
"eccentricity": item.get("ECCENTRICITY"),
"inclination": item.get("INCLINATION"),
"raan": item.get("RA_OF_ASC_NODE"),
"arg_of_perigee": item.get("ARG_OF_PERICENTER"),
"mean_anomaly": item.get("MEAN_ANOMALY"),
"classification_type": item.get("CLASSIFICATION_TYPE"),
"bstar": item.get("BSTAR"),
"mean_motion_dot": item.get("MEAN_MOTION_DOT"),
"mean_motion_ddot": item.get("MEAN_MOTION_DDOT"),
"ephemeris_type": item.get("EPHEMERIS_TYPE"),
# Prefer the original TLE lines when the source provides them.
# If they are missing, store a normalized TLE pair built once on the backend.
"tle_line1": item.get("TLE_LINE1") or tle_line1,
"tle_line2": item.get("TLE_LINE2") or tle_line2,
},
}
)
return transformed
def _get_sample_data(self) -> List[Dict[str, Any]]:
return [
{
"name": "STARLINK-1000",
"norad_cat_id": 44720,
"international_designator": "2019-029AZ",
"epoch": "2026-03-13T00:00:00Z",
"mean_motion": 15.79234567,
"eccentricity": 0.0001234,
"inclination": 53.0,
},
]

View File

@@ -10,11 +10,12 @@ Some endpoints require authentication for higher rate limits.
import asyncio import asyncio
import os import os
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime from datetime import UTC, datetime
import httpx import httpx
from app.services.collectors.base import HTTPCollector from app.services.collectors.base import HTTPCollector
# Cloudflare API token (optional - for higher rate limits) # Cloudflare API token (optional - for higher rate limits)
CLOUDFLARE_API_TOKEN = os.environ.get("CLOUDFLARE_API_TOKEN", "") CLOUDFLARE_API_TOKEN = os.environ.get("CLOUDFLARE_API_TOKEN", "")
@@ -58,7 +59,7 @@ class CloudflareRadarDeviceCollector(HTTPCollector):
"other_percent": float(summary.get("other", 0)), "other_percent": float(summary.get("other", 0)),
"date_range": result.get("meta", {}).get("dateRange", {}), "date_range": result.get("meta", {}).get("dateRange", {}),
}, },
"reference_date": datetime.utcnow().isoformat(), "reference_date": datetime.now(UTC).isoformat(),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -106,7 +107,7 @@ class CloudflareRadarTrafficCollector(HTTPCollector):
"requests": item.get("requests"), "requests": item.get("requests"),
"visit_duration": item.get("visitDuration"), "visit_duration": item.get("visitDuration"),
}, },
"reference_date": item.get("datetime", datetime.utcnow().isoformat()), "reference_date": item.get("datetime", datetime.now(UTC).isoformat()),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -154,7 +155,7 @@ class CloudflareRadarTopASCollector(HTTPCollector):
"traffic_share": item.get("trafficShare"), "traffic_share": item.get("trafficShare"),
"country_code": item.get("location", {}).get("countryCode"), "country_code": item.get("location", {}).get("countryCode"),
}, },
"reference_date": datetime.utcnow().isoformat(), "reference_date": datetime.now(UTC).isoformat(),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):

View File

@@ -6,13 +6,14 @@ https://epoch.ai/data/gpu-clusters
import re import re
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime from datetime import UTC, datetime
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import httpx import httpx
from app.services.collectors.base import BaseCollector from app.services.collectors.base import BaseCollector
class EpochAIGPUCollector(BaseCollector): class EpochAIGPUCollector(BaseCollector):
name = "epoch_ai_gpu" name = "epoch_ai_gpu"
priority = "P0" priority = "P0"
@@ -63,7 +64,7 @@ class EpochAIGPUCollector(BaseCollector):
"metadata": { "metadata": {
"raw_data": perf_cell, "raw_data": perf_cell,
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
data.append(entry) data.append(entry)
except (ValueError, IndexError, AttributeError): except (ValueError, IndexError, AttributeError):
@@ -113,6 +114,6 @@ class EpochAIGPUCollector(BaseCollector):
"metadata": { "metadata": {
"note": "Sample data - Epoch AI page structure may vary", "note": "Sample data - Epoch AI page structure may vary",
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
}, },
] ]

View File

@@ -4,12 +4,13 @@ Collects landing point data from FAO CSV API.
""" """
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime from datetime import UTC, datetime
import httpx import httpx
from app.services.collectors.base import BaseCollector from app.services.collectors.base import BaseCollector
class FAOLandingPointCollector(BaseCollector): class FAOLandingPointCollector(BaseCollector):
name = "fao_landing_points" name = "fao_landing_points"
priority = "P1" priority = "P1"
@@ -57,7 +58,7 @@ class FAOLandingPointCollector(BaseCollector):
"is_tbd": is_tbd, "is_tbd": is_tbd,
"original_id": feature_id, "original_id": feature_id,
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
result.append(entry) result.append(entry)
except (ValueError, IndexError): except (ValueError, IndexError):

View File

@@ -7,11 +7,12 @@ https://huggingface.co/spaces
""" """
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime from datetime import UTC, datetime
from app.services.collectors.base import HTTPCollector from app.services.collectors.base import HTTPCollector
class HuggingFaceModelCollector(HTTPCollector): class HuggingFaceModelCollector(HTTPCollector):
name = "huggingface_models" name = "huggingface_models"
priority = "P1" priority = "P1"
@@ -45,7 +46,7 @@ class HuggingFaceModelCollector(HTTPCollector):
"library_name": item.get("library_name"), "library_name": item.get("library_name"),
"created_at": item.get("createdAt"), "created_at": item.get("createdAt"),
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -86,7 +87,7 @@ class HuggingFaceDatasetCollector(HTTPCollector):
"tags": (item.get("tags", []) or [])[:10], "tags": (item.get("tags", []) or [])[:10],
"created_at": item.get("createdAt"), "created_at": item.get("createdAt"),
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -127,7 +128,7 @@ class HuggingFaceSpacesCollector(HTTPCollector):
"tags": (item.get("tags", []) or [])[:10], "tags": (item.get("tags", []) or [])[:10],
"created_at": item.get("createdAt"), "created_at": item.get("createdAt"),
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):

View File

@@ -13,11 +13,12 @@ To get higher limits, set PEERINGDB_API_KEY environment variable.
import asyncio import asyncio
import os import os
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime from datetime import UTC, datetime
import httpx import httpx
from app.services.collectors.base import HTTPCollector from app.services.collectors.base import HTTPCollector
# PeeringDB API key - read from environment variable # PeeringDB API key - read from environment variable
PEERINGDB_API_KEY = os.environ.get("PEERINGDB_API_KEY", "") PEERINGDB_API_KEY = os.environ.get("PEERINGDB_API_KEY", "")
@@ -75,7 +76,7 @@ class PeeringDBIXPCollector(HTTPCollector):
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}") print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
return {} return {}
async def collect(self) -> List[Dict[str, Any]]: async def fetch(self) -> List[Dict[str, Any]]:
"""Collect IXP data from PeeringDB with rate limit handling""" """Collect IXP data from PeeringDB with rate limit handling"""
response_data = await self.fetch_with_retry() response_data = await self.fetch_with_retry()
if not response_data: if not response_data:
@@ -105,7 +106,7 @@ class PeeringDBIXPCollector(HTTPCollector):
"created": item.get("created"), "created": item.get("created"),
"updated": item.get("updated"), "updated": item.get("updated"),
}, },
"reference_date": datetime.utcnow().isoformat(), "reference_date": datetime.now(UTC).isoformat(),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -176,7 +177,7 @@ class PeeringDBNetworkCollector(HTTPCollector):
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}") print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
return {} return {}
async def collect(self) -> List[Dict[str, Any]]: async def fetch(self) -> List[Dict[str, Any]]:
"""Collect Network data from PeeringDB with rate limit handling""" """Collect Network data from PeeringDB with rate limit handling"""
response_data = await self.fetch_with_retry() response_data = await self.fetch_with_retry()
if not response_data: if not response_data:
@@ -208,7 +209,7 @@ class PeeringDBNetworkCollector(HTTPCollector):
"created": item.get("created"), "created": item.get("created"),
"updated": item.get("updated"), "updated": item.get("updated"),
}, },
"reference_date": datetime.utcnow().isoformat(), "reference_date": datetime.now(UTC).isoformat(),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -279,7 +280,7 @@ class PeeringDBFacilityCollector(HTTPCollector):
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}") print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
return {} return {}
async def collect(self) -> List[Dict[str, Any]]: async def fetch(self) -> List[Dict[str, Any]]:
"""Collect Facility data from PeeringDB with rate limit handling""" """Collect Facility data from PeeringDB with rate limit handling"""
response_data = await self.fetch_with_retry() response_data = await self.fetch_with_retry()
if not response_data: if not response_data:
@@ -310,7 +311,7 @@ class PeeringDBFacilityCollector(HTTPCollector):
"created": item.get("created"), "created": item.get("created"),
"updated": item.get("updated"), "updated": item.get("updated"),
}, },
"reference_date": datetime.utcnow().isoformat(), "reference_date": datetime.now(UTC).isoformat(),
} }
data.append(entry) data.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):

View File

@@ -0,0 +1,131 @@
"""RIPE RIS Live collector."""
from __future__ import annotations
import asyncio
import json
import urllib.request
from typing import Any
from app.services.collectors.base import BaseCollector
from app.services.collectors.bgp_common import create_bgp_anomalies_for_batch, normalize_bgp_event
class RISLiveCollector(BaseCollector):
name = "ris_live_bgp"
priority = "P1"
module = "L3"
frequency_hours = 1
data_type = "bgp_update"
fail_on_empty = True
max_messages = 100
idle_timeout_seconds = 15
async def fetch(self) -> list[dict[str, Any]]:
if not self._resolved_url:
raise RuntimeError("RIS Live URL is not configured")
return await asyncio.to_thread(self._fetch_via_stream)
def _fetch_via_stream(self) -> list[dict[str, Any]]:
events: list[dict[str, Any]] = []
stream_url = "https://ris-live.ripe.net/v1/stream/?format=json&client=planet-ris-live"
subscribe = json.dumps(
{
"host": "rrc00",
"type": "UPDATE",
"require": "announcements",
}
)
request = urllib.request.Request(
stream_url,
headers={"X-RIS-Subscribe": subscribe},
)
with urllib.request.urlopen(request, timeout=20) as response:
while len(events) < self.max_messages:
line = response.readline().decode().strip()
if not line:
break
payload = json.loads(line)
if payload.get("type") != "ris_message":
continue
data = payload.get("data", {})
if isinstance(data, dict):
events.append(data)
return events
def transform(self, raw_data: list[dict[str, Any]]) -> list[dict[str, Any]]:
transformed: list[dict[str, Any]] = []
for item in raw_data:
announcements = item.get("announcements") or []
withdrawals = item.get("withdrawals") or []
for announcement in announcements:
next_hop = announcement.get("next_hop")
for prefix in announcement.get("prefixes") or []:
transformed.append(
normalize_bgp_event(
{
**item,
"collector": item.get("host", "").replace(".ripe.net", ""),
"event_type": "announcement",
"prefix": prefix,
"next_hop": next_hop,
},
project="ris-live",
)
)
for prefix in withdrawals:
transformed.append(
normalize_bgp_event(
{
**item,
"collector": item.get("host", "").replace(".ripe.net", ""),
"event_type": "withdrawal",
"prefix": prefix,
},
project="ris-live",
)
)
if not announcements and not withdrawals:
transformed.append(
normalize_bgp_event(
{
**item,
"collector": item.get("host", "").replace(".ripe.net", ""),
},
project="ris-live",
)
)
self._latest_transformed_batch = transformed
return transformed
async def run(self, db):
result = await super().run(db)
if result.get("status") != "success":
return result
snapshot_id = await self._resolve_snapshot_id(db, result.get("task_id"))
anomaly_count = await create_bgp_anomalies_for_batch(
db,
source=self.name,
snapshot_id=snapshot_id,
task_id=result.get("task_id"),
events=getattr(self, "_latest_transformed_batch", []),
)
result["anomalies_created"] = anomaly_count
return result
async def _resolve_snapshot_id(self, db, task_id: int | None) -> int | None:
if task_id is None:
return None
from sqlalchemy import select
from app.models.data_snapshot import DataSnapshot
result = await db.execute(
select(DataSnapshot.id).where(DataSnapshot.task_id == task_id).order_by(DataSnapshot.id.desc())
)
return result.scalar_one_or_none()

View File

@@ -0,0 +1,239 @@
"""Space-Track TLE Collector
Collects satellite TLE (Two-Line Element) data from Space-Track.org.
API documentation: https://www.space-track.org/documentation
"""
import json
from typing import Dict, Any, List
import httpx
from app.services.collectors.base import BaseCollector
from app.core.data_sources import get_data_sources_config
from app.core.satellite_tle import build_tle_lines_from_elements
class SpaceTrackTLECollector(BaseCollector):
name = "spacetrack_tle"
priority = "P2"
module = "L3"
frequency_hours = 24
data_type = "satellite_tle"
@property
def base_url(self) -> str:
config = get_data_sources_config()
if self._resolved_url:
return self._resolved_url
return config.get_yaml_url("spacetrack_tle")
async def fetch(self) -> List[Dict[str, Any]]:
from app.core.config import settings
username = settings.SPACETRACK_USERNAME
password = settings.SPACETRACK_PASSWORD
if not username or not password:
print("SPACETRACK: No credentials configured, using sample data")
return self._get_sample_data()
print(f"SPACETRACK: Attempting to fetch TLE data with username: {username}")
try:
async with httpx.AsyncClient(
timeout=120.0,
follow_redirects=True,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "application/json, text/html, */*",
"Accept-Language": "en-US,en;q=0.9",
"Referer": "https://www.space-track.org/",
},
) as client:
await client.get("https://www.space-track.org/")
login_response = await client.post(
"https://www.space-track.org/ajaxauth/login",
data={
"identity": username,
"password": password,
},
)
print(f"SPACETRACK: Login response status: {login_response.status_code}")
print(f"SPACETRACK: Login response URL: {login_response.url}")
if login_response.status_code == 403:
print("SPACETRACK: Trying alternate login method...")
async with httpx.AsyncClient(
timeout=120.0,
follow_redirects=True,
) as alt_client:
await alt_client.get("https://www.space-track.org/")
form_data = {
"username": username,
"password": password,
"query": "class/gp/NORAD_CAT_ID/25544/format/json",
}
alt_login = await alt_client.post(
"https://www.space-track.org/ajaxauth/login",
data={
"identity": username,
"password": password,
},
)
print(f"SPACETRACK: Alt login status: {alt_login.status_code}")
if alt_login.status_code == 200:
tle_response = await alt_client.get(
"https://www.space-track.org/basicspacedata/query/class/gp/NORAD_CAT_ID/25544/format/json"
)
if tle_response.status_code == 200:
data = tle_response.json()
print(f"SPACETRACK: Received {len(data)} records via alt method")
return data
if login_response.status_code != 200:
print(f"SPACETRACK: Login failed, using sample data")
return self._get_sample_data()
tle_response = await client.get(
"https://www.space-track.org/basicspacedata/query/class/gp/NORAD_CAT_ID/25544/format/json"
)
print(f"SPACETRACK: TLE query status: {tle_response.status_code}")
if tle_response.status_code != 200:
print(f"SPACETRACK: Query failed, using sample data")
return self._get_sample_data()
data = tle_response.json()
print(f"SPACETRACK: Received {len(data)} records")
return data
except Exception as e:
print(f"SPACETRACK: Error - {e}, using sample data")
return self._get_sample_data()
print(f"SPACETRACK: Attempting to fetch TLE data with username: {username}")
try:
async with httpx.AsyncClient(
timeout=120.0,
follow_redirects=True,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept": "application/json, text/html, */*",
"Accept-Language": "en-US,en;q=0.9",
},
) as client:
# First, visit the main page to get any cookies
await client.get("https://www.space-track.org/")
# Login to get session cookie
login_response = await client.post(
"https://www.space-track.org/ajaxauth/login",
data={
"identity": username,
"password": password,
},
)
print(f"SPACETRACK: Login response status: {login_response.status_code}")
print(f"SPACETRACK: Login response URL: {login_response.url}")
print(f"SPACETRACK: Login response body: {login_response.text[:500]}")
if login_response.status_code != 200:
print(f"SPACETRACK: Login failed, using sample data")
return self._get_sample_data()
# Query for TLE data (get first 1000 satellites)
tle_response = await client.get(
"https://www.space-track.org/basicspacedata/query"
"/class/gp"
"/orderby/EPOCH%20desc"
"/limit/1000"
"/format/json"
)
print(f"SPACETRACK: TLE query status: {tle_response.status_code}")
if tle_response.status_code != 200:
print(f"SPACETRACK: Query failed, using sample data")
return self._get_sample_data()
data = tle_response.json()
print(f"SPACETRACK: Received {len(data)} records")
return data
except Exception as e:
print(f"SPACETRACK: Error - {e}, using sample data")
return self._get_sample_data()
def transform(self, raw_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Transform TLE data to internal format"""
transformed = []
for item in raw_data:
tle_line1, tle_line2 = build_tle_lines_from_elements(
norad_cat_id=item.get("NORAD_CAT_ID"),
epoch=item.get("EPOCH"),
inclination=item.get("INCLINATION"),
raan=item.get("RAAN"),
eccentricity=item.get("ECCENTRICITY"),
arg_of_perigee=item.get("ARG_OF_PERIGEE"),
mean_anomaly=item.get("MEAN_ANOMALY"),
mean_motion=item.get("MEAN_MOTION"),
)
transformed.append(
{
"name": item.get("OBJECT_NAME", "Unknown"),
"reference_date": item.get("EPOCH", ""),
"metadata": {
"norad_cat_id": item.get("NORAD_CAT_ID"),
"international_designator": item.get("INTL_DESIGNATOR"),
"epoch": item.get("EPOCH"),
"mean_motion": item.get("MEAN_MOTION"),
"eccentricity": item.get("ECCENTRICITY"),
"inclination": item.get("INCLINATION"),
"raan": item.get("RAAN"),
"arg_of_perigee": item.get("ARG_OF_PERIGEE"),
"mean_anomaly": item.get("MEAN_ANOMALY"),
"ephemeris_type": item.get("EPHEMERIS_TYPE"),
"classification_type": item.get("CLASSIFICATION_TYPE"),
"element_set_no": item.get("ELEMENT_SET_NO"),
"rev_at_epoch": item.get("REV_AT_EPOCH"),
"bstar": item.get("BSTAR"),
"mean_motion_dot": item.get("MEAN_MOTION_DOT"),
"mean_motion_ddot": item.get("MEAN_MOTION_DDOT"),
# Prefer original lines from the source, but keep a backend-built pair as a stable fallback.
"tle_line1": item.get("TLE_LINE1") or item.get("TLE1") or tle_line1,
"tle_line2": item.get("TLE_LINE2") or item.get("TLE2") or tle_line2,
},
}
)
return transformed
def _get_sample_data(self) -> List[Dict[str, Any]]:
"""Return sample TLE data for testing"""
return [
{
"name": "ISS (ZARYA)",
"norad_cat_id": 25544,
"international_designator": "1998-067A",
"epoch": "2026-03-13T00:00:00Z",
"mean_motion": 15.49872723,
"eccentricity": 0.0006292,
"inclination": 51.6400,
"raan": 315.0000,
"arg_of_perigee": 100.0000,
"mean_anomaly": 260.0000,
},
{
"name": "STARLINK-1000",
"norad_cat_id": 44720,
"international_designator": "2019-029AZ",
"epoch": "2026-03-13T00:00:00Z",
"mean_motion": 15.79234567,
"eccentricity": 0.0001234,
"inclination": 53.0000,
"raan": 120.0000,
"arg_of_perigee": 90.0000,
"mean_anomaly": 270.0000,
},
]

View File

@@ -7,13 +7,14 @@ Uses Wayback Machine as backup data source since live data requires JavaScript r
import json import json
import re import re
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime from datetime import UTC, datetime
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import httpx import httpx
from app.services.collectors.base import BaseCollector from app.services.collectors.base import BaseCollector
class TeleGeographyCableCollector(BaseCollector): class TeleGeographyCableCollector(BaseCollector):
name = "telegeography_cables" name = "telegeography_cables"
priority = "P1" priority = "P1"
@@ -102,7 +103,7 @@ class TeleGeographyCableCollector(BaseCollector):
"capacity_tbps": item.get("capacity"), "capacity_tbps": item.get("capacity"),
"url": item.get("url"), "url": item.get("url"),
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
result.append(entry) result.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -130,7 +131,7 @@ class TeleGeographyCableCollector(BaseCollector):
"owner": "Meta, Orange, Vodafone, etc.", "owner": "Meta, Orange, Vodafone, etc.",
"status": "active", "status": "active",
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
}, },
{ {
"source_id": "telegeo_sample_2", "source_id": "telegeo_sample_2",
@@ -146,7 +147,7 @@ class TeleGeographyCableCollector(BaseCollector):
"owner": "Alibaba, NEC", "owner": "Alibaba, NEC",
"status": "planned", "status": "planned",
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
}, },
] ]
@@ -186,7 +187,7 @@ class TeleGeographyLandingPointCollector(BaseCollector):
"cable_count": len(item.get("cables", [])), "cable_count": len(item.get("cables", [])),
"url": item.get("url"), "url": item.get("url"),
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
result.append(entry) result.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -210,7 +211,7 @@ class TeleGeographyLandingPointCollector(BaseCollector):
"value": "", "value": "",
"unit": "", "unit": "",
"metadata": {"note": "Sample data"}, "metadata": {"note": "Sample data"},
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
}, },
] ]
@@ -257,7 +258,7 @@ class TeleGeographyCableSystemCollector(BaseCollector):
"investment": item.get("investment"), "investment": item.get("investment"),
"url": item.get("url"), "url": item.get("url"),
}, },
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
} }
result.append(entry) result.append(entry)
except (ValueError, TypeError, KeyError): except (ValueError, TypeError, KeyError):
@@ -281,6 +282,6 @@ class TeleGeographyCableSystemCollector(BaseCollector):
"value": "5000", "value": "5000",
"unit": "km", "unit": "km",
"metadata": {"note": "Sample data"}, "metadata": {"note": "Sample data"},
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"), "reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
}, },
] ]

View File

@@ -4,9 +4,9 @@ Collects data from TOP500 supercomputer rankings.
https://top500.org/lists/top500/ https://top500.org/lists/top500/
""" """
import asyncio
import re import re
from typing import Dict, Any, List from typing import Dict, Any, List
from datetime import datetime
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import httpx import httpx
@@ -21,14 +21,108 @@ class TOP500Collector(BaseCollector):
data_type = "supercomputer" data_type = "supercomputer"
async def fetch(self) -> List[Dict[str, Any]]: async def fetch(self) -> List[Dict[str, Any]]:
"""Fetch TOP500 data from website (scraping)""" """Fetch TOP500 list data and enrich each row with detail-page metadata."""
# Get the latest list page
url = "https://top500.org/lists/top500/list/2025/11/" url = "https://top500.org/lists/top500/list/2025/11/"
async with httpx.AsyncClient(timeout=60.0) as client: async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
response = await client.get(url) response = await client.get(url)
response.raise_for_status() response.raise_for_status()
return self.parse_response(response.text) entries = self.parse_response(response.text)
semaphore = asyncio.Semaphore(8)
async def enrich(entry: Dict[str, Any]) -> Dict[str, Any]:
detail_url = entry.pop("_detail_url", "")
if not detail_url:
return entry
async with semaphore:
try:
detail_response = await client.get(detail_url)
detail_response.raise_for_status()
entry["metadata"].update(self.parse_detail_response(detail_response.text))
except Exception:
entry["metadata"]["detail_fetch_failed"] = True
return entry
return await asyncio.gather(*(enrich(entry) for entry in entries))
def _extract_system_fields(self, system_cell) -> Dict[str, str]:
link = system_cell.find("a")
system_name = link.get_text(" ", strip=True) if link else system_cell.get_text(" ", strip=True)
detail_url = ""
if link and link.get("href"):
detail_url = f"https://top500.org{link.get('href')}"
manufacturer = ""
if link and link.next_sibling:
manufacturer = str(link.next_sibling).strip(" ,\n\t")
cell_text = system_cell.get_text("\n", strip=True)
lines = [line.strip(" ,") for line in cell_text.splitlines() if line.strip()]
site = ""
country = ""
if lines:
system_name = lines[0]
if len(lines) >= 3:
site = lines[-2]
country = lines[-1]
elif len(lines) == 2:
country = lines[-1]
if not manufacturer and len(lines) >= 2:
manufacturer = lines[1]
return {
"name": system_name,
"manufacturer": manufacturer,
"site": site,
"country": country,
"detail_url": detail_url,
}
def parse_detail_response(self, html: str) -> Dict[str, Any]:
soup = BeautifulSoup(html, "html.parser")
detail_table = soup.find("table", {"class": "table table-condensed"})
if not detail_table:
return {}
detail_map: Dict[str, Any] = {}
label_aliases = {
"Site": "site",
"Manufacturer": "manufacturer",
"Cores": "cores",
"Processor": "processor",
"Interconnect": "interconnect",
"Installation Year": "installation_year",
"Linpack Performance (Rmax)": "rmax",
"Theoretical Peak (Rpeak)": "rpeak",
"Nmax": "nmax",
"HPCG": "hpcg",
"Power": "power",
"Power Measurement Level": "power_measurement_level",
"Operating System": "operating_system",
"Compiler": "compiler",
"Math Library": "math_library",
"MPI": "mpi",
}
for row in detail_table.find_all("tr"):
header = row.find("th")
value_cell = row.find("td")
if not header or not value_cell:
continue
label = header.get_text(" ", strip=True).rstrip(":")
key = label_aliases.get(label)
if not key:
continue
value = value_cell.get_text(" ", strip=True)
detail_map[key] = value
return detail_map
def parse_response(self, html: str) -> List[Dict[str, Any]]: def parse_response(self, html: str) -> List[Dict[str, Any]]:
"""Parse TOP500 HTML response""" """Parse TOP500 HTML response"""
@@ -36,27 +130,26 @@ class TOP500Collector(BaseCollector):
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
# Find the table with TOP500 data # Find the table with TOP500 data
table = soup.find("table", {"class": "top500-table"}) table = None
if not table: for candidate in soup.find_all("table"):
# Try alternative table selector header_cells = [
table = soup.find("table", {"id": "top500"}) cell.get_text(" ", strip=True) for cell in candidate.select("thead th")
]
normalized_headers = [header.lower() for header in header_cells]
if (
"rank" in normalized_headers
and "system" in normalized_headers
and any("cores" in header for header in normalized_headers)
and any("rmax" in header for header in normalized_headers)
):
table = candidate
break
if not table: if not table:
# Try to find any table with rank data table = soup.find("table", {"class": "top500-table"}) or soup.find("table", {"id": "top500"})
tables = soup.find_all("table")
for t in tables:
if t.find(string=re.compile(r"Rank.*System.*Cores.*Rmax", re.I)):
table = t
break
if not table:
# Fallback: try to extract data from any table
tables = soup.find_all("table")
if tables:
table = tables[0]
if table: if table:
rows = table.find_all("tr") rows = table.select("tr")
for row in rows[1:]: # Skip header row for row in rows[1:]: # Skip header row
cells = row.find_all(["td", "th"]) cells = row.find_all(["td", "th"])
if len(cells) >= 6: if len(cells) >= 6:
@@ -68,43 +161,26 @@ class TOP500Collector(BaseCollector):
rank = int(rank_text) rank = int(rank_text)
# System name (may contain link)
system_cell = cells[1] system_cell = cells[1]
system_name = system_cell.get_text(strip=True) system_fields = self._extract_system_fields(system_cell)
# Try to get full name from link title or data attribute system_name = system_fields["name"]
link = system_cell.find("a") manufacturer = system_fields["manufacturer"]
if link and link.get("title"): site = system_fields["site"]
system_name = link.get("title") country = system_fields["country"]
detail_url = system_fields["detail_url"]
# Country
country_cell = cells[2]
country = country_cell.get_text(strip=True)
# Try to get country from data attribute or image alt
img = country_cell.find("img")
if img and img.get("alt"):
country = img.get("alt")
# Extract location (city)
city = "" city = ""
location_text = country_cell.get_text(strip=True) cores = cells[2].get_text(strip=True).replace(",", "")
if "(" in location_text and ")" in location_text:
city = location_text.split("(")[0].strip()
# Cores rmax_text = cells[3].get_text(strip=True)
cores = cells[3].get_text(strip=True).replace(",", "")
# Rmax
rmax_text = cells[4].get_text(strip=True)
rmax = self._parse_performance(rmax_text) rmax = self._parse_performance(rmax_text)
# Rpeak rpeak_text = cells[4].get_text(strip=True)
rpeak_text = cells[5].get_text(strip=True)
rpeak = self._parse_performance(rpeak_text) rpeak = self._parse_performance(rpeak_text)
# Power (optional)
power = "" power = ""
if len(cells) >= 7: if len(cells) >= 6:
power = cells[6].get_text(strip=True) power = cells[5].get_text(strip=True).replace(",", "")
entry = { entry = {
"source_id": f"top500_{rank}", "source_id": f"top500_{rank}",
@@ -117,10 +193,14 @@ class TOP500Collector(BaseCollector):
"unit": "PFlop/s", "unit": "PFlop/s",
"metadata": { "metadata": {
"rank": rank, "rank": rank,
"r_peak": rpeak,
"power": power,
"cores": cores, "cores": cores,
"rmax": rmax_text,
"rpeak": rpeak_text,
"power": power,
"manufacturer": manufacturer,
"site": site,
}, },
"_detail_url": detail_url,
"reference_date": "2025-11-01", "reference_date": "2025-11-01",
} }
data.append(entry) data.append(entry)
@@ -184,10 +264,15 @@ class TOP500Collector(BaseCollector):
"unit": "PFlop/s", "unit": "PFlop/s",
"metadata": { "metadata": {
"rank": 1, "rank": 1,
"r_peak": 2746.38, "cores": "11039616",
"power": 29581, "rmax": "1742.00",
"cores": 11039616, "rpeak": "2746.38",
"power": "29581",
"manufacturer": "HPE", "manufacturer": "HPE",
"site": "DOE/NNSA/LLNL",
"processor": "AMD 4th Gen EPYC 24C 1.8GHz",
"interconnect": "Slingshot-11",
"installation_year": "2025",
}, },
"reference_date": "2025-11-01", "reference_date": "2025-11-01",
}, },
@@ -202,10 +287,12 @@ class TOP500Collector(BaseCollector):
"unit": "PFlop/s", "unit": "PFlop/s",
"metadata": { "metadata": {
"rank": 2, "rank": 2,
"r_peak": 2055.72, "cores": "9066176",
"power": 24607, "rmax": "1353.00",
"cores": 9066176, "rpeak": "2055.72",
"power": "24607",
"manufacturer": "HPE", "manufacturer": "HPE",
"site": "DOE/SC/Oak Ridge National Laboratory",
}, },
"reference_date": "2025-11-01", "reference_date": "2025-11-01",
}, },
@@ -220,9 +307,10 @@ class TOP500Collector(BaseCollector):
"unit": "PFlop/s", "unit": "PFlop/s",
"metadata": { "metadata": {
"rank": 3, "rank": 3,
"r_peak": 1980.01, "cores": "9264128",
"power": 38698, "rmax": "1012.00",
"cores": 9264128, "rpeak": "1980.01",
"power": "38698",
"manufacturer": "Intel", "manufacturer": "Intel",
}, },
"reference_date": "2025-11-01", "reference_date": "2025-11-01",

View File

@@ -1,15 +1,18 @@
"""Task Scheduler for running collection jobs""" """Task Scheduler for running collection jobs."""
import asyncio import asyncio
import logging import logging
from datetime import datetime from datetime import UTC, datetime, timedelta
from typing import Dict, Any from typing import Any, Dict, Optional
from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger from apscheduler.triggers.interval import IntervalTrigger
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select
from app.db.session import async_session_factory from app.db.session import async_session_factory
from app.core.time import to_iso8601_utc
from app.models.datasource import DataSource
from app.models.task import CollectionTask
from app.services.collectors.registry import collector_registry from app.services.collectors.registry import collector_registry
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -17,132 +20,185 @@ logger = logging.getLogger(__name__)
scheduler = AsyncIOScheduler() scheduler = AsyncIOScheduler()
COLLECTOR_TO_ID = { async def _update_next_run_at(datasource: DataSource, session) -> None:
"top500": 1, job = scheduler.get_job(datasource.source)
"epoch_ai_gpu": 2, datasource.next_run_at = job.next_run_time if job else None
"huggingface_models": 3, await session.commit()
"huggingface_datasets": 4,
"huggingface_spaces": 5,
"peeringdb_ixp": 6, async def _apply_datasource_schedule(datasource: DataSource, session) -> None:
"peeringdb_network": 7, collector = collector_registry.get(datasource.source)
"peeringdb_facility": 8, if not collector:
"telegeography_cables": 9, logger.warning("Collector not found for datasource %s", datasource.source)
"telegeography_landing": 10, return
"telegeography_systems": 11,
"arcgis_cables": 15, collector_registry.set_active(datasource.source, datasource.is_active)
"fao_landing_points": 16,
} existing_job = scheduler.get_job(datasource.source)
if existing_job:
scheduler.remove_job(datasource.source)
if datasource.is_active:
scheduler.add_job(
run_collector_task,
trigger=IntervalTrigger(minutes=max(1, datasource.frequency_minutes)),
id=datasource.source,
name=datasource.name,
replace_existing=True,
kwargs={"collector_name": datasource.source},
)
logger.info(
"Scheduled collector: %s (every %sm)",
datasource.source,
datasource.frequency_minutes,
)
else:
logger.info("Collector disabled: %s", datasource.source)
await _update_next_run_at(datasource, session)
async def run_collector_task(collector_name: str): async def run_collector_task(collector_name: str):
"""Run a single collector task""" """Run a single collector task."""
collector = collector_registry.get(collector_name) collector = collector_registry.get(collector_name)
if not collector: if not collector:
logger.error(f"Collector not found: {collector_name}") logger.error("Collector not found: %s", collector_name)
return return
# Get the correct datasource_id async with async_session_factory() as db:
datasource_id = COLLECTOR_TO_ID.get(collector_name, 1) result = await db.execute(select(DataSource).where(DataSource.source == collector_name))
datasource = result.scalar_one_or_none()
if not datasource:
logger.error("Datasource not found for collector: %s", collector_name)
return
if not datasource.is_active:
logger.info("Skipping disabled collector: %s", collector_name)
return
try:
collector._datasource_id = datasource.id
logger.info("Running collector: %s (datasource_id=%s)", collector_name, datasource.id)
task_result = await collector.run(db)
datasource.last_run_at = datetime.now(UTC)
datasource.last_status = task_result.get("status")
await _update_next_run_at(datasource, db)
logger.info("Collector %s completed: %s", collector_name, task_result)
except Exception as exc:
datasource.last_run_at = datetime.now(UTC)
datasource.last_status = "failed"
await db.commit()
logger.exception("Collector %s failed: %s", collector_name, exc)
async def cleanup_stale_running_tasks(max_age_hours: int = 2) -> int:
"""Mark stale running tasks as failed after restarts or collector hangs."""
cutoff = datetime.now(UTC) - timedelta(hours=max_age_hours)
async with async_session_factory() as db: async with async_session_factory() as db:
try: result = await db.execute(
# Set the datasource_id on the collector instance select(CollectionTask).where(
collector._datasource_id = datasource_id CollectionTask.status == "running",
CollectionTask.started_at.is_not(None),
logger.info(f"Running collector: {collector_name} (datasource_id={datasource_id})") CollectionTask.started_at < cutoff,
result = await collector.run(db)
logger.info(f"Collector {collector_name} completed: {result}")
except Exception as e:
logger.error(f"Collector {collector_name} failed: {e}")
def start_scheduler():
"""Start the scheduler with all registered collectors"""
collectors = collector_registry.all()
for name, collector in collectors.items():
if collector_registry.is_active(name):
scheduler.add_job(
run_collector_task,
trigger=IntervalTrigger(hours=collector.frequency_hours),
id=name,
name=name,
replace_existing=True,
kwargs={"collector_name": name},
) )
logger.info(f"Scheduled collector: {name} (every {collector.frequency_hours}h)") )
stale_tasks = result.scalars().all()
scheduler.start() for task in stale_tasks:
logger.info("Scheduler started") task.status = "failed"
task.phase = "failed"
task.completed_at = datetime.now(UTC)
existing_error = (task.error_message or "").strip()
cleanup_error = "Marked failed automatically after stale running task cleanup"
task.error_message = f"{existing_error}\n{cleanup_error}".strip() if existing_error else cleanup_error
if stale_tasks:
await db.commit()
logger.warning("Cleaned up %s stale running collection task(s)", len(stale_tasks))
return len(stale_tasks)
def stop_scheduler(): def start_scheduler() -> None:
"""Stop the scheduler""" """Start the scheduler."""
scheduler.shutdown() if not scheduler.running:
logger.info("Scheduler stopped") scheduler.start()
logger.info("Scheduler started")
def stop_scheduler() -> None:
"""Stop the scheduler."""
if scheduler.running:
scheduler.shutdown(wait=False)
logger.info("Scheduler stopped")
async def sync_scheduler_with_datasources() -> None:
"""Synchronize scheduler jobs with datasource table."""
async with async_session_factory() as db:
result = await db.execute(select(DataSource).order_by(DataSource.id))
datasources = result.scalars().all()
configured_sources = {datasource.source for datasource in datasources}
for job in list(scheduler.get_jobs()):
if job.id not in configured_sources:
scheduler.remove_job(job.id)
for datasource in datasources:
await _apply_datasource_schedule(datasource, db)
async def sync_datasource_job(datasource_id: int) -> bool:
"""Synchronize a single datasource job after settings changes."""
async with async_session_factory() as db:
datasource = await db.get(DataSource, datasource_id)
if not datasource:
return False
await _apply_datasource_schedule(datasource, db)
return True
def get_scheduler_jobs() -> list[Dict[str, Any]]: def get_scheduler_jobs() -> list[Dict[str, Any]]:
"""Get all scheduled jobs""" """Get all scheduled jobs."""
jobs = [] jobs = []
for job in scheduler.get_jobs(): for job in scheduler.get_jobs():
jobs.append( jobs.append(
{ {
"id": job.id, "id": job.id,
"name": job.name, "name": job.name,
"next_run_time": job.next_run_time.isoformat() if job.next_run_time else None, "next_run_time": to_iso8601_utc(job.next_run_time),
"trigger": str(job.trigger), "trigger": str(job.trigger),
} }
) )
return jobs return jobs
def add_job(collector_name: str, hours: int = 4): async def get_latest_task_id_for_datasource(datasource_id: int) -> Optional[int]:
"""Add a new scheduled job""" from app.models.task import CollectionTask
collector = collector_registry.get(collector_name)
if not collector:
raise ValueError(f"Collector not found: {collector_name}")
scheduler.add_job( async with async_session_factory() as db:
run_collector_task, result = await db.execute(
trigger=IntervalTrigger(hours=hours), select(CollectionTask.id)
id=collector_name, .where(CollectionTask.datasource_id == datasource_id)
name=collector_name, .order_by(CollectionTask.created_at.desc(), CollectionTask.id.desc())
replace_existing=True, .limit(1)
kwargs={"collector_name": collector_name}, )
) return result.scalar_one_or_none()
logger.info(f"Added scheduled job: {collector_name} (every {hours}h)")
def remove_job(collector_name: str):
"""Remove a scheduled job"""
scheduler.remove_job(collector_name)
logger.info(f"Removed scheduled job: {collector_name}")
def pause_job(collector_name: str):
"""Pause a scheduled job"""
scheduler.pause_job(collector_name)
logger.info(f"Paused job: {collector_name}")
def resume_job(collector_name: str):
"""Resume a scheduled job"""
scheduler.resume_job(collector_name)
logger.info(f"Resumed job: {collector_name}")
def run_collector_now(collector_name: str) -> bool: def run_collector_now(collector_name: str) -> bool:
"""Run a collector immediately (not scheduled)""" """Run a collector immediately (not scheduled)."""
collector = collector_registry.get(collector_name) collector = collector_registry.get(collector_name)
if not collector: if not collector:
logger.error(f"Collector not found: {collector_name}") logger.error("Collector not found: %s", collector_name)
return False return False
try: try:
asyncio.create_task(run_collector_task(collector_name)) asyncio.create_task(run_collector_task(collector_name))
logger.info(f"Triggered collector: {collector_name}") logger.info("Triggered collector: %s", collector_name)
return True return True
except Exception as e: except Exception as exc:
logger.error(f"Failed to trigger collector {collector_name}: {e}") logger.error("Failed to trigger collector %s: %s", collector_name, exc)
return False return False

View File

@@ -16,3 +16,4 @@ email-validator
apscheduler>=3.10.4 apscheduler>=3.10.4
pytest>=7.4.0 pytest>=7.4.0
pytest-asyncio>=0.23.0 pytest-asyncio>=0.23.0
networkx>=3.0

Some files were not shown because too many files have changed in this diff Show More