Compare commits
53 Commits
14d11cd99d
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7a3ca6e1b3 | ||
|
|
62f2d9f403 | ||
|
|
b448a1e560 | ||
|
|
2cc0c9412c | ||
|
|
3dd210a3e5 | ||
|
|
a761dfc5fb | ||
|
|
7ec9586f7a | ||
|
|
b0058edf17 | ||
|
|
bf2c4a172d | ||
|
|
30a29a6e34 | ||
|
|
ab09f0ba78 | ||
|
|
7b53cf9a06 | ||
|
|
a04f4f9e67 | ||
|
|
ce5feba3b9 | ||
|
|
3fd6cbb6f7 | ||
|
|
020c1d5051 | ||
|
|
cc5f16f8a7 | ||
|
|
ef0fefdfc7 | ||
|
|
81a0ca5e7a | ||
|
|
b57d69c98b | ||
|
|
b9fbacade7 | ||
|
|
543fe35fbb | ||
|
|
1784c057e5 | ||
|
|
465129eec7 | ||
|
|
0c950262d3 | ||
|
|
eabdbdc85a | ||
|
|
af29e90cb0 | ||
|
|
d9a64f7768 | ||
|
|
78bb639a83 | ||
|
|
96222b9e4c | ||
|
|
3fcbae55dc | ||
|
|
3e3090d72a | ||
|
|
4f922f13d1 | ||
|
|
bb6b18fe3b | ||
|
|
0ecc1bc537 | ||
|
|
869d661a94 | ||
|
|
d18e400fcb | ||
|
|
6fabbcfe5c | ||
|
|
1189fec014 | ||
|
|
82f7aa29a6 | ||
|
|
777891f865 | ||
|
|
c2eba54da0 | ||
|
|
f50830712c | ||
|
|
e21b783bef | ||
|
|
11a9dda942 | ||
|
|
3b0e9dec5a | ||
|
|
c82e1d5a04 | ||
|
|
02991730e5 | ||
|
|
4e487b315a | ||
|
|
948af2c88f | ||
|
|
b06cb4606f | ||
|
|
de32552159 | ||
|
|
99771a88c5 |
25
.env
25
.env
@@ -1,25 +0,0 @@
|
|||||||
# Database
|
|
||||||
POSTGRES_SERVER=localhost
|
|
||||||
POSTGRES_USER=postgres
|
|
||||||
POSTGRES_PASSWORD=postgres
|
|
||||||
POSTGRES_DB=planet_db
|
|
||||||
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost:5432/planet_db
|
|
||||||
|
|
||||||
# Redis
|
|
||||||
REDIS_SERVER=localhost
|
|
||||||
REDIS_PORT=6379
|
|
||||||
REDIS_URL=redis://localhost:6379/0
|
|
||||||
|
|
||||||
# Security
|
|
||||||
SECRET_KEY=your-secret-key-change-in-production
|
|
||||||
ALGORITHM=HS256
|
|
||||||
ACCESS_TOKEN_EXPIRE_MINUTES=15
|
|
||||||
REFRESH_TOKEN_EXPIRE_DAYS=7
|
|
||||||
|
|
||||||
# API
|
|
||||||
API_V1_STR=/api/v1
|
|
||||||
PROJECT_NAME="Intelligent Planet Plan"
|
|
||||||
VERSION=1.0.0
|
|
||||||
|
|
||||||
# CORS
|
|
||||||
CORS_ORIGINS=["http://localhost:3000", "http://localhost:8000"]
|
|
||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -41,6 +41,8 @@ MANIFEST
|
|||||||
venv/
|
venv/
|
||||||
ENV/
|
ENV/
|
||||||
env/
|
env/
|
||||||
|
.uv/
|
||||||
|
.uv-cache/
|
||||||
.ruff_cache/
|
.ruff_cache/
|
||||||
*.db
|
*.db
|
||||||
*.sqlite
|
*.sqlite
|
||||||
|
|||||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
|||||||
|
3.14
|
||||||
165
.sisyphus/plans/earth-architecture-refactor.md
Normal file
165
.sisyphus/plans/earth-architecture-refactor.md
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
# 地球3D可视化架构重构计划
|
||||||
|
|
||||||
|
## 背景
|
||||||
|
|
||||||
|
当前 `frontend/public/earth` 3D地球可视化系统基于 Three.js 构建,未来需要迁移到 Unreal Engine (Cesium)。为降低迁移成本,需要提前做好**逻辑与渲染分离**的架构设计。
|
||||||
|
|
||||||
|
## 目标
|
||||||
|
|
||||||
|
- 将线缆高亮逻辑与渲染实现分离
|
||||||
|
- 保持交互逻辑可复用,只需重写渲染层
|
||||||
|
- 为后续迁移到 UE/Cesium 做好准备
|
||||||
|
|
||||||
|
## 已完成
|
||||||
|
|
||||||
|
### 1. 状态枚举定义 (constants.js)
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
export const CABLE_STATE = {
|
||||||
|
NORMAL: 'normal',
|
||||||
|
HOVERED: 'hovered',
|
||||||
|
LOCKED: 'locked'
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. 线缆状态管理 (cables.js - 数据层)
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
const cableStates = new Map();
|
||||||
|
|
||||||
|
export function getCableState(cableId) { ... }
|
||||||
|
export function setCableState(cableId, state) { ... }
|
||||||
|
export function clearAllCableStates() { ... }
|
||||||
|
export function getCableStateInfo() { ... }
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. 逻辑层调用 (main.js)
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// 悬停
|
||||||
|
setCableState(cable.userData.cableId, CABLE_STATE.HOVERED);
|
||||||
|
|
||||||
|
// 锁定
|
||||||
|
setCableState(cableId, CABLE_STATE.LOCKED);
|
||||||
|
|
||||||
|
// 恢复
|
||||||
|
setCableState(cableId, CABLE_STATE.NORMAL);
|
||||||
|
clearAllCableStates();
|
||||||
|
|
||||||
|
// 清除锁定时
|
||||||
|
clearLockedObject() {
|
||||||
|
hoveredCable = null;
|
||||||
|
clearAllCableStates();
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. 渲染层 (main.js - applyCableVisualState)
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
function applyCableVisualState() {
|
||||||
|
const allCables = getCableLines();
|
||||||
|
const pulse = (Math.sin(Date.now() * CABLE_CONFIG.pulseSpeed) + 1) * 0.5;
|
||||||
|
|
||||||
|
allCables.forEach(c => {
|
||||||
|
const cableId = c.userData.cableId;
|
||||||
|
const state = getCableState(cableId);
|
||||||
|
|
||||||
|
switch (state) {
|
||||||
|
case CABLE_STATE.LOCKED:
|
||||||
|
// 呼吸效果 + 白色
|
||||||
|
c.material.opacity = CABLE_CONFIG.lockedOpacityMin + pulse * CABLE_CONFIG.pulseCoefficient;
|
||||||
|
c.material.color.setRGB(1, 1, 1);
|
||||||
|
break;
|
||||||
|
case CABLE_STATE.HOVERED:
|
||||||
|
// 白色高亮
|
||||||
|
c.material.opacity = 1;
|
||||||
|
c.material.color.setRGB(1, 1, 1);
|
||||||
|
break;
|
||||||
|
case CABLE_STATE.NORMAL:
|
||||||
|
default:
|
||||||
|
if (lockedObjectType === 'cable' && lockedObject) {
|
||||||
|
// 其他线缆变暗
|
||||||
|
c.material.opacity = CABLE_CONFIG.otherOpacity;
|
||||||
|
...
|
||||||
|
} else {
|
||||||
|
// 恢复原始
|
||||||
|
c.material.opacity = 1;
|
||||||
|
c.material.color.setHex(c.userData.originalColor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 待完成
|
||||||
|
|
||||||
|
### Phase 1: 完善状态配置 (constants.js)
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
export const CABLE_CONFIG = {
|
||||||
|
lockedOpacityMin: 0.6,
|
||||||
|
lockedOpacityMax: 1.0,
|
||||||
|
otherOpacity: 0.5,
|
||||||
|
otherBrightness: 0.6,
|
||||||
|
pulseSpeed: 0.003,
|
||||||
|
pulseCoefficient: 0.4,
|
||||||
|
// 未来可扩展
|
||||||
|
// lockedLineWidth: 3,
|
||||||
|
// normalLineWidth: 1,
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Phase 2: 卫星状态管理 (satellites.js)
|
||||||
|
|
||||||
|
参考线缆状态管理,为卫星添加类似的状态枚举和状态管理函数:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
export const SATELLITE_STATE = {
|
||||||
|
NORMAL: 'normal',
|
||||||
|
HOVERED: 'hovered',
|
||||||
|
LOCKED: 'locked'
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 卫星数据源说明
|
||||||
|
|
||||||
|
- **当前使用**: CelesTrak (https://celestrak.org) - 免费,无需认证
|
||||||
|
- **后续计划**: Space-Track.org (https://space-track.org) - 需要认证,数据更权威
|
||||||
|
- 迁移时只需修改 `satellites.js` 中的数据获取逻辑,状态管理和渲染逻辑不变
|
||||||
|
|
||||||
|
### Phase 3: 统一渲染接口
|
||||||
|
|
||||||
|
将所有对象的渲染逻辑抽象为一个统一的渲染函数:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
function applyObjectVisualState() {
|
||||||
|
applyCableVisualState();
|
||||||
|
applySatelliteVisualState();
|
||||||
|
applyLandingPointVisualState();
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Phase 4: UE 迁移准备
|
||||||
|
|
||||||
|
迁移到 Unreal Engine 时:
|
||||||
|
1. 保留 `constants.js` 中的枚举和配置
|
||||||
|
2. 保留 `cables.js` 中的数据层和状态管理
|
||||||
|
3. 保留 `main.js` 中的交互逻辑
|
||||||
|
4. **仅重写** `applyCableVisualState()` 等渲染函数
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 架构原则
|
||||||
|
|
||||||
|
1. **状态与渲染分离** - 对象状态由数据层管理,渲染层只负责根据状态更新视觉效果
|
||||||
|
2. **逻辑可复用** - 交互逻辑(点击、悬停、锁定)在迁移时应直接复用
|
||||||
|
3. **渲染可替换** - 渲染实现可以针对不同引擎重写,不影响逻辑层
|
||||||
|
|
||||||
|
## 文件变更记录
|
||||||
|
|
||||||
|
| 日期 | 文件 | 变更 |
|
||||||
|
|------|------|------|
|
||||||
|
| 2026-03-19 | constants.js | 新增 CABLE_STATE 枚举 |
|
||||||
|
| 2026-03-19 | cables.js | 新增状态管理函数 |
|
||||||
|
| 2026-03-19 | main.js | 使用状态管理,抽象 applyCableVisualState() |
|
||||||
136
.sisyphus/plans/predicted-orbit.md
Normal file
136
.sisyphus/plans/predicted-orbit.md
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
# 卫星预测轨道显示功能
|
||||||
|
|
||||||
|
## TL;DR
|
||||||
|
> 锁定卫星时显示绕地球完整一圈的预测轨道轨迹,从当前位置向外渐变消失
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
### 目标
|
||||||
|
点击锁定卫星 → 显示该卫星绕地球一周的完整预测轨道(而非当前的历史轨迹)
|
||||||
|
|
||||||
|
### 当前实现
|
||||||
|
- `TRAIL_LENGTH = 30` - 历史轨迹点数,每帧 push 当前位置
|
||||||
|
- 显示最近30帧历史轨迹(类似彗星尾巴)
|
||||||
|
|
||||||
|
### 参考: SatelliteMap.space
|
||||||
|
- 锁定时显示预测轨道
|
||||||
|
- 颜色从当前位置向外渐变消失
|
||||||
|
- 使用 satellite.js(与本项目相同)
|
||||||
|
|
||||||
|
## 实现状态
|
||||||
|
|
||||||
|
### ✅ 已完成
|
||||||
|
- [x] 计算卫星轨道周期(基于 `meanMotion`)
|
||||||
|
- [x] 生成预测轨道点(10秒采样间隔)
|
||||||
|
- [x] 创建独立预测轨道渲染对象
|
||||||
|
- [x] 锁定卫星时显示预测轨道
|
||||||
|
- [x] 解除锁定时隐藏预测轨道
|
||||||
|
- [x] 颜色渐变:当前位置(亮) → 轨道终点(暗)
|
||||||
|
- [x] 页面隐藏时清除轨迹(防止切回时闪现)
|
||||||
|
|
||||||
|
### 🚧 进行中
|
||||||
|
- [ ] 完整圆环轨道(部分卫星因 SGP4 计算问题使用 fallback 圆形轨道)
|
||||||
|
- [ ] 每颗卫星只显示一条轨道
|
||||||
|
|
||||||
|
## 技术细节
|
||||||
|
|
||||||
|
### 轨道周期计算
|
||||||
|
```javascript
|
||||||
|
function calculateOrbitalPeriod(meanMotion) {
|
||||||
|
return 86400 / meanMotion;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 预测轨道计算
|
||||||
|
```javascript
|
||||||
|
function calculatePredictedOrbit(satellite, periodSeconds, sampleInterval = 10) {
|
||||||
|
const points = [];
|
||||||
|
const samples = Math.ceil(periodSeconds / sampleInterval);
|
||||||
|
const now = new Date();
|
||||||
|
|
||||||
|
// Full orbit: from now to now+period
|
||||||
|
for (let i = 0; i <= samples; i++) {
|
||||||
|
const time = new Date(now.getTime() + i * sampleInterval * 1000);
|
||||||
|
const pos = computeSatellitePosition(satellite, time);
|
||||||
|
if (pos) points.push(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: 如果真实位置计算点太少,使用圆形 fallback
|
||||||
|
if (points.length < samples * 0.5) {
|
||||||
|
points.length = 0;
|
||||||
|
// ... 圆形轨道生成
|
||||||
|
}
|
||||||
|
|
||||||
|
return points;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 渲染对象
|
||||||
|
```javascript
|
||||||
|
let predictedOrbitLine = null;
|
||||||
|
|
||||||
|
export function showPredictedOrbit(satellite) {
|
||||||
|
hidePredictedOrbit();
|
||||||
|
// ... 计算并渲染轨道
|
||||||
|
}
|
||||||
|
|
||||||
|
export function hidePredictedOrbit() {
|
||||||
|
if (predictedOrbitLine) {
|
||||||
|
earthObjRef.remove(predictedOrbitLine);
|
||||||
|
predictedOrbitLine.geometry.dispose();
|
||||||
|
predictedOrbitLine.material.dispose();
|
||||||
|
predictedOrbitLine = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 已知问题
|
||||||
|
|
||||||
|
### 1. TLE 格式问题
|
||||||
|
`computeSatellitePosition` 使用自行构建的 TLE 格式,对某些卫星返回 null。当前使用 fallback 圆形轨道作为补偿。
|
||||||
|
|
||||||
|
### 2. 多条轨道
|
||||||
|
部分情况下锁定时会显示多条轨道。需要确保 `hidePredictedOrbit()` 被正确调用。
|
||||||
|
|
||||||
|
## 性能考虑
|
||||||
|
|
||||||
|
### 点数估算
|
||||||
|
| 卫星类型 | 周期 | 10秒采样 | 点数 |
|
||||||
|
|---------|------|---------|------|
|
||||||
|
| LEO | 90分钟 | 540秒 | ~54点 |
|
||||||
|
| MEO | 12小时 | 4320秒 | ~432点 |
|
||||||
|
| GEO | 24小时 | 8640秒 | ~864点 |
|
||||||
|
|
||||||
|
### 优化策略
|
||||||
|
- 当前方案(~900点 GEO)性能可接受
|
||||||
|
- 如遇性能问题:GEO 降低采样率到 30秒
|
||||||
|
|
||||||
|
## 验证方案
|
||||||
|
|
||||||
|
### QA Scenarios
|
||||||
|
|
||||||
|
**Scenario: 锁定 Starlink 卫星显示预测轨道**
|
||||||
|
1. 打开浏览器,进入 Earth 页面
|
||||||
|
2. 显示卫星(点击按钮)
|
||||||
|
3. 点击一颗 Starlink 卫星(低轨道 LEO)
|
||||||
|
4. 验证:出现黄色预测轨道线,从卫星向外绕行
|
||||||
|
5. 验证:颜色从亮黄渐变到暗蓝
|
||||||
|
6. 验证:轨道完整闭环
|
||||||
|
|
||||||
|
**Scenario: 锁定 GEO 卫星显示预测轨道**
|
||||||
|
1. 筛选一颗 GEO 卫星(倾斜角 0-10° 或高轨道)
|
||||||
|
2. 点击锁定
|
||||||
|
3. 验证:显示完整 24 小时轨道(或 fallback 圆形轨道)
|
||||||
|
4. 验证:点数合理(~864点或 fallback)
|
||||||
|
|
||||||
|
**Scenario: 解除锁定隐藏预测轨道**
|
||||||
|
1. 锁定一颗卫星,显示预测轨道
|
||||||
|
2. 点击地球空白处解除锁定
|
||||||
|
3. 验证:预测轨道消失
|
||||||
|
|
||||||
|
**Scenario: 切换页面后轨迹不闪现**
|
||||||
|
1. 锁定一颗卫星
|
||||||
|
2. 切换到其他标签页
|
||||||
|
3. 等待几秒
|
||||||
|
4. 切回页面
|
||||||
|
5. 验证:轨迹不突然闪现累积
|
||||||
293
.sisyphus/plans/webgl-instancing-satellites.md
Normal file
293
.sisyphus/plans/webgl-instancing-satellites.md
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
# WebGL Instancing 卫星渲染优化计划
|
||||||
|
|
||||||
|
## 背景
|
||||||
|
|
||||||
|
当前 `satellites.js` 使用 `THREE.Points` 渲染卫星,受限于 WebGL 点渲染性能,只能显示 ~500-1000 颗卫星。
|
||||||
|
需要迁移到真正的 WebGL Instancing 以支持 5000+ 卫星流畅渲染。
|
||||||
|
|
||||||
|
## 技术选型
|
||||||
|
|
||||||
|
| 方案 | 性能 | 改动量 | 维护性 | 推荐 |
|
||||||
|
|------|------|--------|--------|------|
|
||||||
|
| THREE.Points (现状) | ★★☆ | - | - | 基准 |
|
||||||
|
| THREE.InstancedMesh | ★★★ | 中 | 高 | 不适合点 |
|
||||||
|
| InstancedBufferGeometry + 自定义Shader | ★★★★ | 中高 | 中 | ✅ 推荐 |
|
||||||
|
| 迁移到 TWGL.js / Raw WebGL | ★★★★★ | 高 | 低 | 未来UE |
|
||||||
|
|
||||||
|
**推荐方案**: InstancedBufferGeometry + 自定义 Shader
|
||||||
|
- 保持 Three.js 架构
|
||||||
|
- 复用 satellite.js 数据层
|
||||||
|
- 性能接近原生 WebGL
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1: 调研与原型
|
||||||
|
|
||||||
|
### 1.1 分析现有架构
|
||||||
|
|
||||||
|
**现状 (satellites.js)**:
|
||||||
|
```javascript
|
||||||
|
// 创建点云
|
||||||
|
const pointsGeometry = new THREE.BufferGeometry();
|
||||||
|
pointsGeometry.setAttribute('position', new THREE.BufferAttribute(positions, 3));
|
||||||
|
pointsGeometry.setAttribute('color', new THREE.BufferAttribute(colors, 3));
|
||||||
|
|
||||||
|
const pointsMaterial = new THREE.PointsMaterial({
|
||||||
|
size: 2,
|
||||||
|
vertexColors: true,
|
||||||
|
transparent: true,
|
||||||
|
opacity: 0.8,
|
||||||
|
sizeAttenuation: true
|
||||||
|
});
|
||||||
|
|
||||||
|
satellitePoints = new THREE.Points(pointsGeometry, pointsMaterial);
|
||||||
|
```
|
||||||
|
|
||||||
|
**问题**: 每个卫星作为一个顶点,GPU 需要处理 ~500 个 draw calls (取决于视锥体裁剪)
|
||||||
|
|
||||||
|
### 1.2 Instanced Rendering 原理
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// 目标:单次 draw call 渲染所有卫星
|
||||||
|
// 每个卫星属性:
|
||||||
|
// - position (vec3): 位置
|
||||||
|
// - color (vec3): 颜色
|
||||||
|
// - size (float): 大小 (可选)
|
||||||
|
// - selected (float): 是否选中 (0/1)
|
||||||
|
|
||||||
|
// 使用 InstancedBufferGeometry
|
||||||
|
const geometry = new THREE.InstancedBufferGeometry();
|
||||||
|
geometry.index = originalGeometry.index;
|
||||||
|
geometry.attributes.position = originalGeometry.attributes.position;
|
||||||
|
geometry.attributes.uv = originalGeometry.attributes.uv;
|
||||||
|
|
||||||
|
// 实例数据
|
||||||
|
const instancePositions = new Float32Array(satelliteCount * 3);
|
||||||
|
const instanceColors = new Float32Array(satelliteCount * 3);
|
||||||
|
|
||||||
|
geometry.setAttribute('instancePosition',
|
||||||
|
new THREE.InstancedBufferAttribute(instancePositions, 3));
|
||||||
|
geometry.setAttribute('instanceColor',
|
||||||
|
new THREE.InstancedBufferAttribute(instanceColors, 3));
|
||||||
|
|
||||||
|
// 自定义 Shader
|
||||||
|
const material = new THREE.ShaderMaterial({
|
||||||
|
vertexShader: `
|
||||||
|
attribute vec3 instancePosition;
|
||||||
|
attribute vec3 instanceColor;
|
||||||
|
varying vec3 vColor;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vColor = instanceColor;
|
||||||
|
vec3 transformed = position + instancePosition;
|
||||||
|
gl_Position = projectionMatrix * modelViewMatrix * vec4(transformed, 1.0);
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
fragmentShader: `
|
||||||
|
varying vec3 vColor;
|
||||||
|
void main() {
|
||||||
|
gl_FragColor = vec4(vColor, 0.8);
|
||||||
|
}
|
||||||
|
`
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2: 实现
|
||||||
|
|
||||||
|
### 2.1 创建 instanced-satellites.js
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// instanced-satellites.js - Instanced rendering for satellites
|
||||||
|
|
||||||
|
import * as THREE from 'three';
|
||||||
|
import { SATELLITE_CONFIG } from './constants.js';
|
||||||
|
|
||||||
|
let instancedMesh = null;
|
||||||
|
let satelliteData = [];
|
||||||
|
let instancePositions = null;
|
||||||
|
let instanceColors = null;
|
||||||
|
let satelliteCount = 0;
|
||||||
|
|
||||||
|
const SATELLITE_VERTEX_SHADER = `
|
||||||
|
attribute vec3 instancePosition;
|
||||||
|
attribute vec3 instanceColor;
|
||||||
|
attribute float instanceSize;
|
||||||
|
|
||||||
|
varying vec3 vColor;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vColor = instanceColor;
|
||||||
|
vec3 transformed = position * instanceSize + instancePosition;
|
||||||
|
gl_Position = projectionMatrix * modelViewMatrix * vec4(transformed, 1.0);
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
const SATELLITE_FRAGMENT_SHADER = `
|
||||||
|
varying vec3 vColor;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
gl_FragColor = vec4(vColor, 0.9);
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
export function createInstancedSatellites(scene, earthObj) {
|
||||||
|
// 基础球体几何 (每个卫星是一个小圆点)
|
||||||
|
const baseGeometry = new THREE.CircleGeometry(1, 8);
|
||||||
|
|
||||||
|
// 创建 InstancedBufferGeometry
|
||||||
|
const geometry = new THREE.InstancedBufferGeometry();
|
||||||
|
geometry.index = baseGeometry.index;
|
||||||
|
geometry.attributes.position = baseGeometry.attributes.position;
|
||||||
|
geometry.attributes.uv = baseGeometry.attributes.uv;
|
||||||
|
|
||||||
|
// 初始化实例数据数组 (稍后填充)
|
||||||
|
instancePositions = new Float32Array(MAX_SATELLITES * 3);
|
||||||
|
instanceColors = new Float32Array(MAX_SATELLITES * 3);
|
||||||
|
const instanceSizes = new Float32Array(MAX_SATELLITES);
|
||||||
|
|
||||||
|
geometry.setAttribute('instancePosition',
|
||||||
|
new THREE.InstancedBufferAttribute(instancePositions, 3));
|
||||||
|
geometry.setAttribute('instanceColor',
|
||||||
|
new THREE.InstancedBufferAttribute(instanceColors, 3));
|
||||||
|
geometry.setAttribute('instanceSize',
|
||||||
|
new THREE.InstancedBufferAttribute(instanceSizes, 1));
|
||||||
|
|
||||||
|
const material = new THREE.ShaderMaterial({
|
||||||
|
vertexShader: SATELLITE_VERTEX_SHADER,
|
||||||
|
fragmentShader: SATELLITE_FRAGMENT_SHADER,
|
||||||
|
transparent: true,
|
||||||
|
side: THREE.DoubleSide
|
||||||
|
});
|
||||||
|
|
||||||
|
instancedMesh = new THREE.Mesh(geometry, material);
|
||||||
|
instancedMesh.frustumCulled = false; // 我们自己处理裁剪
|
||||||
|
scene.add(instancedMesh);
|
||||||
|
|
||||||
|
return instancedMesh;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function updateInstancedSatellites(satellitePositions) {
|
||||||
|
// satellitePositions: Array of { position: Vector3, color: Color }
|
||||||
|
const count = Math.min(satellitePositions.length, MAX_SATELLITES);
|
||||||
|
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const sat = satellitePositions[i];
|
||||||
|
instancePositions[i * 3] = sat.position.x;
|
||||||
|
instancePositions[i * 3 + 1] = sat.position.y;
|
||||||
|
instancePositions[i * 3 + 2] = sat.position.z;
|
||||||
|
|
||||||
|
instanceColors[i * 3] = sat.color.r;
|
||||||
|
instanceColors[i * 3 + 1] = sat.color.g;
|
||||||
|
instanceColors[i * 3 + 2] = sat.color.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
instancedMesh.geometry.attributes.instancePosition.needsUpdate = true;
|
||||||
|
instancedMesh.geometry.attributes.instanceColor.needsUpdate = true;
|
||||||
|
instancedMesh.geometry.setDrawRange(0, count);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 修改现有 satellites.js
|
||||||
|
|
||||||
|
保持数据层不变,添加新渲染模式:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// 添加配置
|
||||||
|
export const SATELLITE_CONFIG = {
|
||||||
|
USE_INSTANCING: true, // 切换渲染模式
|
||||||
|
MAX_SATELLITES: 5000,
|
||||||
|
SATELLITE_SIZE: 0.5,
|
||||||
|
// ...
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.3 性能优化点
|
||||||
|
|
||||||
|
1. **GPU 实例化**: 单次 draw call 渲染所有卫星
|
||||||
|
2. **批量更新**: 所有位置/颜色一次更新
|
||||||
|
3. **视锥体裁剪**: 自定义裁剪逻辑,避免 CPU 端逐卫星检测
|
||||||
|
4. **LOD (可选)**: 远处卫星简化显示
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3: 与现有系统集成
|
||||||
|
|
||||||
|
### 3.1 悬停/选中处理
|
||||||
|
|
||||||
|
当前通过 `selectSatellite()` 设置选中状态,Instanced 模式下需要:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// 在 shader 中通过 instanceId 判断是否选中
|
||||||
|
// 或者使用单独的 InstancedBufferAttribute 存储选中状态
|
||||||
|
const instanceSelected = new Float32Array(MAX_SATELLITES);
|
||||||
|
geometry.setAttribute('instanceSelected',
|
||||||
|
new THREE.InstancedBufferAttribute(instanceSelected, 1));
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.2 轨迹线
|
||||||
|
|
||||||
|
轨迹线仍然使用 `THREE.Line` 或 `THREE.LineSegments`,但可以类似地 Instanced 化:
|
||||||
|
|
||||||
|
```javascript
|
||||||
|
// Instanced LineSegments for trails
|
||||||
|
const trailGeometry = new THREE.InstancedBufferGeometry();
|
||||||
|
trailGeometry.setAttribute('position', trailPositions);
|
||||||
|
trailGeometry.setAttribute('instanceStart', ...);
|
||||||
|
trailGeometry.setAttribute('instanceEnd', ...);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4: 验证与调优
|
||||||
|
|
||||||
|
### 4.1 性能测试
|
||||||
|
|
||||||
|
| 卫星数量 | Points 模式 | Instanced 模式 |
|
||||||
|
|----------|-------------|----------------|
|
||||||
|
| 500 | ✅ 60fps | ✅ 60fps |
|
||||||
|
| 2000 | ⚠️ 30fps | ✅ 60fps |
|
||||||
|
| 5000 | ❌ 10fps | ✅ 45fps |
|
||||||
|
| 10000 | ❌ 卡顿 | ⚠️ 30fps |
|
||||||
|
|
||||||
|
### 4.2 可能遇到的问题
|
||||||
|
|
||||||
|
1. **Shader 编译错误**: 需要调试 GLSL
|
||||||
|
2. **实例数量限制**: GPU 最大实例数 (通常 65535)
|
||||||
|
3. **大小不一**: 需要 per-instance size 属性
|
||||||
|
4. **透明度排序**: Instanced 渲染透明度处理复杂
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 文件变更清单
|
||||||
|
|
||||||
|
| 文件 | 变更 |
|
||||||
|
|------|------|
|
||||||
|
| `constants.js` | 新增 `SATELLITE_CONFIG` |
|
||||||
|
| `satellites.js` | 添加 Instanced 模式支持 |
|
||||||
|
| `instanced-satellites.js` | 新文件 - Instanced 渲染核心 |
|
||||||
|
| `main.js` | 集成新渲染模块 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 时间估算
|
||||||
|
|
||||||
|
| Phase | 工作量 | 难度 |
|
||||||
|
|-------|--------|------|
|
||||||
|
| Phase 1 | 1-2 天 | 低 |
|
||||||
|
| Phase 2 | 2-3 天 | 中 |
|
||||||
|
| Phase 3 | 1-2 天 | 中 |
|
||||||
|
| Phase 4 | 1 天 | 低 |
|
||||||
|
| **总计** | **5-8 天** | - |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 替代方案考虑
|
||||||
|
|
||||||
|
如果 Phase 2 实施困难,可以考虑:
|
||||||
|
|
||||||
|
1. **使用 Three.js InstancedMesh**: 适合渲染小型 3D 模型替代点
|
||||||
|
2. **使用 pointcloud2 格式**: 类似 LiDAR 点云渲染
|
||||||
|
3. **Web Workers**: 将轨道计算移到 Worker 线程
|
||||||
|
4. **迁移到 Cesium**: Cesium 原生支持 Instancing,且是 UE 迁移的中间步骤
|
||||||
18
README.md
18
README.md
@@ -184,14 +184,20 @@
|
|||||||
## 快速启动
|
## 快速启动
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 启动全部服务
|
# 启动前后端服务
|
||||||
docker-compose up -d
|
./planet.sh start
|
||||||
|
|
||||||
# 仅启动后端
|
# 仅重启后端
|
||||||
cd backend && python -m uvicorn app.main:app --reload
|
./planet.sh restart -b
|
||||||
|
|
||||||
# 仅启动前端
|
# 仅重启前端
|
||||||
cd frontend && npm run dev
|
./planet.sh restart -f
|
||||||
|
|
||||||
|
# 交互创建用户
|
||||||
|
./planet.sh createuser
|
||||||
|
|
||||||
|
# 查看服务状态
|
||||||
|
./planet.sh health
|
||||||
```
|
```
|
||||||
|
|
||||||
## API 文档
|
## API 文档
|
||||||
|
|||||||
@@ -16,4 +16,4 @@ COPY . .
|
|||||||
|
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|
||||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -11,6 +11,7 @@ from app.api.v1 import (
|
|||||||
settings,
|
settings,
|
||||||
collected_data,
|
collected_data,
|
||||||
visualization,
|
visualization,
|
||||||
|
bgp,
|
||||||
)
|
)
|
||||||
|
|
||||||
api_router = APIRouter()
|
api_router = APIRouter()
|
||||||
@@ -27,3 +28,4 @@ api_router.include_router(dashboard.router, prefix="/dashboard", tags=["dashboar
|
|||||||
api_router.include_router(alerts.router, prefix="/alerts", tags=["alerts"])
|
api_router.include_router(alerts.router, prefix="/alerts", tags=["alerts"])
|
||||||
api_router.include_router(settings.router, prefix="/settings", tags=["settings"])
|
api_router.include_router(settings.router, prefix="/settings", tags=["settings"])
|
||||||
api_router.include_router(visualization.router, prefix="/visualization", tags=["visualization"])
|
api_router.include_router(visualization.router, prefix="/visualization", tags=["visualization"])
|
||||||
|
api_router.include_router(bgp.router, prefix="/bgp", tags=["bgp"])
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,4 +1,4 @@
|
|||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends
|
from fastapi import APIRouter, Depends
|
||||||
@@ -10,6 +10,7 @@ from app.models.user import User
|
|||||||
from app.core.security import get_current_user
|
from app.core.security import get_current_user
|
||||||
from app.models.alert import Alert, AlertSeverity, AlertStatus
|
from app.models.alert import Alert, AlertSeverity, AlertStatus
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
@@ -67,7 +68,7 @@ async def acknowledge_alert(
|
|||||||
|
|
||||||
alert.status = AlertStatus.ACKNOWLEDGED
|
alert.status = AlertStatus.ACKNOWLEDGED
|
||||||
alert.acknowledged_by = current_user.id
|
alert.acknowledged_by = current_user.id
|
||||||
alert.acknowledged_at = datetime.utcnow()
|
alert.acknowledged_at = datetime.now(UTC)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
return {"message": "Alert acknowledged", "alert": alert.to_dict()}
|
return {"message": "Alert acknowledged", "alert": alert.to_dict()}
|
||||||
@@ -88,7 +89,7 @@ async def resolve_alert(
|
|||||||
|
|
||||||
alert.status = AlertStatus.RESOLVED
|
alert.status = AlertStatus.RESOLVED
|
||||||
alert.resolved_by = current_user.id
|
alert.resolved_by = current_user.id
|
||||||
alert.resolved_at = datetime.utcnow()
|
alert.resolved_at = datetime.now(UTC)
|
||||||
alert.resolution_notes = resolution
|
alert.resolution_notes = resolution
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
|
|||||||
182
backend/app/api/v1/bgp.py
Normal file
182
backend/app/api/v1/bgp.py
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
from sqlalchemy import func, select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.core.security import get_current_user
|
||||||
|
from app.db.session import get_db
|
||||||
|
from app.models.bgp_anomaly import BGPAnomaly
|
||||||
|
from app.models.collected_data import CollectedData
|
||||||
|
from app.models.user import User
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
BGP_SOURCES = ("ris_live_bgp", "bgpstream_bgp")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_dt(value: Optional[str]) -> Optional[datetime]:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
return datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||||
|
|
||||||
|
|
||||||
|
def _matches_time(value: Optional[datetime], time_from: Optional[datetime], time_to: Optional[datetime]) -> bool:
|
||||||
|
if value is None:
|
||||||
|
return False
|
||||||
|
if time_from and value < time_from:
|
||||||
|
return False
|
||||||
|
if time_to and value > time_to:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/events")
|
||||||
|
async def list_bgp_events(
|
||||||
|
prefix: Optional[str] = Query(None),
|
||||||
|
origin_asn: Optional[int] = Query(None),
|
||||||
|
peer_asn: Optional[int] = Query(None),
|
||||||
|
collector: Optional[str] = Query(None),
|
||||||
|
event_type: Optional[str] = Query(None),
|
||||||
|
source: Optional[str] = Query(None),
|
||||||
|
time_from: Optional[str] = Query(None),
|
||||||
|
time_to: Optional[str] = Query(None),
|
||||||
|
page: int = Query(1, ge=1),
|
||||||
|
page_size: int = Query(50, ge=1, le=200),
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
stmt = (
|
||||||
|
select(CollectedData)
|
||||||
|
.where(CollectedData.source.in_(BGP_SOURCES))
|
||||||
|
.order_by(CollectedData.reference_date.desc().nullslast(), CollectedData.id.desc())
|
||||||
|
)
|
||||||
|
if source:
|
||||||
|
stmt = stmt.where(CollectedData.source == source)
|
||||||
|
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
records = result.scalars().all()
|
||||||
|
dt_from = _parse_dt(time_from)
|
||||||
|
dt_to = _parse_dt(time_to)
|
||||||
|
|
||||||
|
filtered = []
|
||||||
|
for record in records:
|
||||||
|
metadata = record.extra_data or {}
|
||||||
|
if prefix and metadata.get("prefix") != prefix:
|
||||||
|
continue
|
||||||
|
if origin_asn is not None and metadata.get("origin_asn") != origin_asn:
|
||||||
|
continue
|
||||||
|
if peer_asn is not None and metadata.get("peer_asn") != peer_asn:
|
||||||
|
continue
|
||||||
|
if collector and metadata.get("collector") != collector:
|
||||||
|
continue
|
||||||
|
if event_type and metadata.get("event_type") != event_type:
|
||||||
|
continue
|
||||||
|
if (dt_from or dt_to) and not _matches_time(record.reference_date, dt_from, dt_to):
|
||||||
|
continue
|
||||||
|
filtered.append(record)
|
||||||
|
|
||||||
|
offset = (page - 1) * page_size
|
||||||
|
return {
|
||||||
|
"total": len(filtered),
|
||||||
|
"page": page,
|
||||||
|
"page_size": page_size,
|
||||||
|
"data": [record.to_dict() for record in filtered[offset : offset + page_size]],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/events/{event_id}")
|
||||||
|
async def get_bgp_event(
|
||||||
|
event_id: int,
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
record = await db.get(CollectedData, event_id)
|
||||||
|
if not record or record.source not in BGP_SOURCES:
|
||||||
|
raise HTTPException(status_code=404, detail="BGP event not found")
|
||||||
|
return record.to_dict()
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/anomalies")
|
||||||
|
async def list_bgp_anomalies(
|
||||||
|
severity: Optional[str] = Query(None),
|
||||||
|
anomaly_type: Optional[str] = Query(None),
|
||||||
|
status: Optional[str] = Query(None),
|
||||||
|
prefix: Optional[str] = Query(None),
|
||||||
|
origin_asn: Optional[int] = Query(None),
|
||||||
|
time_from: Optional[str] = Query(None),
|
||||||
|
time_to: Optional[str] = Query(None),
|
||||||
|
page: int = Query(1, ge=1),
|
||||||
|
page_size: int = Query(50, ge=1, le=200),
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
stmt = select(BGPAnomaly).order_by(BGPAnomaly.created_at.desc(), BGPAnomaly.id.desc())
|
||||||
|
if severity:
|
||||||
|
stmt = stmt.where(BGPAnomaly.severity == severity)
|
||||||
|
if anomaly_type:
|
||||||
|
stmt = stmt.where(BGPAnomaly.anomaly_type == anomaly_type)
|
||||||
|
if status:
|
||||||
|
stmt = stmt.where(BGPAnomaly.status == status)
|
||||||
|
if prefix:
|
||||||
|
stmt = stmt.where(BGPAnomaly.prefix == prefix)
|
||||||
|
if origin_asn is not None:
|
||||||
|
stmt = stmt.where(BGPAnomaly.origin_asn == origin_asn)
|
||||||
|
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
records = result.scalars().all()
|
||||||
|
dt_from = _parse_dt(time_from)
|
||||||
|
dt_to = _parse_dt(time_to)
|
||||||
|
if dt_from or dt_to:
|
||||||
|
records = [record for record in records if _matches_time(record.created_at, dt_from, dt_to)]
|
||||||
|
|
||||||
|
offset = (page - 1) * page_size
|
||||||
|
return {
|
||||||
|
"total": len(records),
|
||||||
|
"page": page,
|
||||||
|
"page_size": page_size,
|
||||||
|
"data": [record.to_dict() for record in records[offset : offset + page_size]],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/anomalies/summary")
|
||||||
|
async def get_bgp_anomaly_summary(
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
total_result = await db.execute(select(func.count(BGPAnomaly.id)))
|
||||||
|
type_result = await db.execute(
|
||||||
|
select(BGPAnomaly.anomaly_type, func.count(BGPAnomaly.id))
|
||||||
|
.group_by(BGPAnomaly.anomaly_type)
|
||||||
|
.order_by(func.count(BGPAnomaly.id).desc())
|
||||||
|
)
|
||||||
|
severity_result = await db.execute(
|
||||||
|
select(BGPAnomaly.severity, func.count(BGPAnomaly.id))
|
||||||
|
.group_by(BGPAnomaly.severity)
|
||||||
|
.order_by(func.count(BGPAnomaly.id).desc())
|
||||||
|
)
|
||||||
|
status_result = await db.execute(
|
||||||
|
select(BGPAnomaly.status, func.count(BGPAnomaly.id))
|
||||||
|
.group_by(BGPAnomaly.status)
|
||||||
|
.order_by(func.count(BGPAnomaly.id).desc())
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total": total_result.scalar() or 0,
|
||||||
|
"by_type": {row[0]: row[1] for row in type_result.fetchall()},
|
||||||
|
"by_severity": {row[0]: row[1] for row in severity_result.fetchall()},
|
||||||
|
"by_status": {row[0]: row[1] for row in status_result.fetchall()},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/anomalies/{anomaly_id}")
|
||||||
|
async def get_bgp_anomaly(
|
||||||
|
anomaly_id: int,
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
record = await db.get(BGPAnomaly, anomaly_id)
|
||||||
|
if not record:
|
||||||
|
raise HTTPException(status_code=404, detail="BGP anomaly not found")
|
||||||
|
return record.to_dict()
|
||||||
@@ -7,16 +7,138 @@ import json
|
|||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
|
|
||||||
|
from app.core.collected_data_fields import get_metadata_field
|
||||||
|
from app.core.countries import COUNTRY_OPTIONS, get_country_search_variants, normalize_country
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
from app.db.session import get_db
|
from app.db.session import get_db
|
||||||
from app.models.user import User
|
from app.models.user import User
|
||||||
from app.core.security import get_current_user
|
from app.core.security import get_current_user
|
||||||
from app.models.collected_data import CollectedData
|
from app.models.collected_data import CollectedData
|
||||||
|
from app.models.datasource import DataSource
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
COUNTRY_SQL = "metadata->>'country'"
|
||||||
|
SEARCHABLE_SQL = [
|
||||||
|
"name",
|
||||||
|
"title",
|
||||||
|
"description",
|
||||||
|
"source",
|
||||||
|
"data_type",
|
||||||
|
"source_id",
|
||||||
|
"metadata::text",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_multi_values(value: Optional[str]) -> list[str]:
|
||||||
|
if not value:
|
||||||
|
return []
|
||||||
|
return [item.strip() for item in value.split(",") if item.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def build_in_condition(field_sql: str, values: list[str], param_prefix: str, params: dict) -> str:
|
||||||
|
placeholders = []
|
||||||
|
for index, value in enumerate(values):
|
||||||
|
key = f"{param_prefix}_{index}"
|
||||||
|
params[key] = value
|
||||||
|
placeholders.append(f":{key}")
|
||||||
|
return f"{field_sql} IN ({', '.join(placeholders)})"
|
||||||
|
|
||||||
|
|
||||||
|
def build_search_condition(search: Optional[str], params: dict) -> Optional[str]:
|
||||||
|
if not search:
|
||||||
|
return None
|
||||||
|
|
||||||
|
normalized = search.strip()
|
||||||
|
if not normalized:
|
||||||
|
return None
|
||||||
|
|
||||||
|
search_terms = [normalized]
|
||||||
|
for variant in get_country_search_variants(normalized):
|
||||||
|
if variant.casefold() not in {term.casefold() for term in search_terms}:
|
||||||
|
search_terms.append(variant)
|
||||||
|
|
||||||
|
conditions = []
|
||||||
|
for index, term in enumerate(search_terms):
|
||||||
|
params[f"search_{index}"] = f"%{term}%"
|
||||||
|
conditions.extend(f"{field} ILIKE :search_{index}" for field in SEARCHABLE_SQL)
|
||||||
|
|
||||||
|
params["search_exact"] = normalized
|
||||||
|
params["search_prefix"] = f"{normalized}%"
|
||||||
|
|
||||||
|
canonical_variants = get_country_search_variants(normalized)
|
||||||
|
canonical = canonical_variants[0] if canonical_variants else None
|
||||||
|
params["country_search_exact"] = canonical or normalized
|
||||||
|
params["country_search_prefix"] = f"{(canonical or normalized)}%"
|
||||||
|
|
||||||
|
return "(" + " OR ".join(conditions) + ")"
|
||||||
|
|
||||||
|
|
||||||
|
def build_search_rank_sql(search: Optional[str]) -> str:
|
||||||
|
if not search or not search.strip():
|
||||||
|
return "0"
|
||||||
|
|
||||||
|
return """
|
||||||
|
CASE
|
||||||
|
WHEN name ILIKE :search_exact THEN 700
|
||||||
|
WHEN name ILIKE :search_prefix THEN 600
|
||||||
|
WHEN title ILIKE :search_exact THEN 500
|
||||||
|
WHEN title ILIKE :search_prefix THEN 400
|
||||||
|
WHEN metadata->>'country' ILIKE :country_search_exact THEN 380
|
||||||
|
WHEN metadata->>'country' ILIKE :country_search_prefix THEN 340
|
||||||
|
WHEN source_id ILIKE :search_exact THEN 350
|
||||||
|
WHEN source ILIKE :search_exact THEN 300
|
||||||
|
WHEN data_type ILIKE :search_exact THEN 250
|
||||||
|
WHEN description ILIKE :search_0 THEN 150
|
||||||
|
WHEN metadata::text ILIKE :search_0 THEN 100
|
||||||
|
WHEN title ILIKE :search_0 THEN 80
|
||||||
|
WHEN name ILIKE :search_0 THEN 60
|
||||||
|
WHEN source ILIKE :search_0 THEN 40
|
||||||
|
WHEN data_type ILIKE :search_0 THEN 30
|
||||||
|
WHEN source_id ILIKE :search_0 THEN 20
|
||||||
|
ELSE 0
|
||||||
|
END
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_collected_row(row, source_name_map: dict[str, str] | None = None) -> dict:
|
||||||
|
metadata = row[7]
|
||||||
|
source = row[1]
|
||||||
|
return {
|
||||||
|
"id": row[0],
|
||||||
|
"source": source,
|
||||||
|
"source_name": source_name_map.get(source, source) if source_name_map else source,
|
||||||
|
"source_id": row[2],
|
||||||
|
"data_type": row[3],
|
||||||
|
"name": row[4],
|
||||||
|
"title": row[5],
|
||||||
|
"description": row[6],
|
||||||
|
"country": get_metadata_field(metadata, "country"),
|
||||||
|
"city": get_metadata_field(metadata, "city"),
|
||||||
|
"latitude": get_metadata_field(metadata, "latitude"),
|
||||||
|
"longitude": get_metadata_field(metadata, "longitude"),
|
||||||
|
"value": get_metadata_field(metadata, "value"),
|
||||||
|
"unit": get_metadata_field(metadata, "unit"),
|
||||||
|
"metadata": metadata,
|
||||||
|
"cores": get_metadata_field(metadata, "cores"),
|
||||||
|
"rmax": get_metadata_field(metadata, "rmax"),
|
||||||
|
"rpeak": get_metadata_field(metadata, "rpeak"),
|
||||||
|
"power": get_metadata_field(metadata, "power"),
|
||||||
|
"collected_at": to_iso8601_utc(row[8]),
|
||||||
|
"reference_date": to_iso8601_utc(row[9]),
|
||||||
|
"is_valid": row[10],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def get_source_name_map(db: AsyncSession) -> dict[str, str]:
|
||||||
|
result = await db.execute(select(DataSource.source, DataSource.name))
|
||||||
|
return {row[0]: row[1] for row in result.fetchall()}
|
||||||
|
|
||||||
|
|
||||||
@router.get("")
|
@router.get("")
|
||||||
async def list_collected_data(
|
async def list_collected_data(
|
||||||
|
mode: str = Query("current", description="查询模式: current/history"),
|
||||||
source: Optional[str] = Query(None, description="数据源过滤"),
|
source: Optional[str] = Query(None, description="数据源过滤"),
|
||||||
data_type: Optional[str] = Query(None, description="数据类型过滤"),
|
data_type: Optional[str] = Query(None, description="数据类型过滤"),
|
||||||
country: Optional[str] = Query(None, description="国家过滤"),
|
country: Optional[str] = Query(None, description="国家过滤"),
|
||||||
@@ -27,25 +149,30 @@ async def list_collected_data(
|
|||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
"""查询采集的数据列表"""
|
"""查询采集的数据列表"""
|
||||||
|
normalized_country = normalize_country(country) if country else None
|
||||||
|
source_values = parse_multi_values(source)
|
||||||
|
data_type_values = parse_multi_values(data_type)
|
||||||
|
|
||||||
# Build WHERE clause
|
# Build WHERE clause
|
||||||
conditions = []
|
conditions = []
|
||||||
params = {}
|
params = {}
|
||||||
|
|
||||||
if source:
|
if mode != "history":
|
||||||
conditions.append("source = :source")
|
conditions.append("COALESCE(is_current, TRUE) = TRUE")
|
||||||
params["source"] = source
|
|
||||||
if data_type:
|
if source_values:
|
||||||
conditions.append("data_type = :data_type")
|
conditions.append(build_in_condition("source", source_values, "source", params))
|
||||||
params["data_type"] = data_type
|
if data_type_values:
|
||||||
if country:
|
conditions.append(build_in_condition("data_type", data_type_values, "data_type", params))
|
||||||
conditions.append("country = :country")
|
if normalized_country:
|
||||||
params["country"] = country
|
conditions.append(f"{COUNTRY_SQL} = :country")
|
||||||
if search:
|
params["country"] = normalized_country
|
||||||
conditions.append("(name ILIKE :search OR title ILIKE :search)")
|
search_condition = build_search_condition(search, params)
|
||||||
params["search"] = f"%{search}%"
|
if search_condition:
|
||||||
|
conditions.append(search_condition)
|
||||||
|
|
||||||
where_sql = " AND ".join(conditions) if conditions else "1=1"
|
where_sql = " AND ".join(conditions) if conditions else "1=1"
|
||||||
|
search_rank_sql = build_search_rank_sql(search)
|
||||||
|
|
||||||
# Calculate offset
|
# Calculate offset
|
||||||
offset = (page - 1) * page_size
|
offset = (page - 1) * page_size
|
||||||
@@ -58,11 +185,11 @@ async def list_collected_data(
|
|||||||
# Query data
|
# Query data
|
||||||
query = text(f"""
|
query = text(f"""
|
||||||
SELECT id, source, source_id, data_type, name, title, description,
|
SELECT id, source, source_id, data_type, name, title, description,
|
||||||
country, city, latitude, longitude, value, unit,
|
metadata, collected_at, reference_date, is_valid,
|
||||||
metadata, collected_at, reference_date, is_valid
|
{search_rank_sql} AS search_rank
|
||||||
FROM collected_data
|
FROM collected_data
|
||||||
WHERE {where_sql}
|
WHERE {where_sql}
|
||||||
ORDER BY collected_at DESC
|
ORDER BY search_rank DESC, collected_at DESC
|
||||||
LIMIT :limit OFFSET :offset
|
LIMIT :limit OFFSET :offset
|
||||||
""")
|
""")
|
||||||
params["limit"] = page_size
|
params["limit"] = page_size
|
||||||
@@ -70,30 +197,11 @@ async def list_collected_data(
|
|||||||
|
|
||||||
result = await db.execute(query, params)
|
result = await db.execute(query, params)
|
||||||
rows = result.fetchall()
|
rows = result.fetchall()
|
||||||
|
source_name_map = await get_source_name_map(db)
|
||||||
|
|
||||||
data = []
|
data = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
data.append(
|
data.append(serialize_collected_row(row[:11], source_name_map))
|
||||||
{
|
|
||||||
"id": row[0],
|
|
||||||
"source": row[1],
|
|
||||||
"source_id": row[2],
|
|
||||||
"data_type": row[3],
|
|
||||||
"name": row[4],
|
|
||||||
"title": row[5],
|
|
||||||
"description": row[6],
|
|
||||||
"country": row[7],
|
|
||||||
"city": row[8],
|
|
||||||
"latitude": row[9],
|
|
||||||
"longitude": row[10],
|
|
||||||
"value": row[11],
|
|
||||||
"unit": row[12],
|
|
||||||
"metadata": row[13],
|
|
||||||
"collected_at": row[14].isoformat() if row[14] else None,
|
|
||||||
"reference_date": row[15].isoformat() if row[15] else None,
|
|
||||||
"is_valid": row[16],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total": total,
|
"total": total,
|
||||||
@@ -105,21 +213,39 @@ async def list_collected_data(
|
|||||||
|
|
||||||
@router.get("/summary")
|
@router.get("/summary")
|
||||||
async def get_data_summary(
|
async def get_data_summary(
|
||||||
|
mode: str = Query("current", description="查询模式: current/history"),
|
||||||
|
source: Optional[str] = Query(None, description="数据源过滤"),
|
||||||
|
data_type: Optional[str] = Query(None, description="数据类型过滤"),
|
||||||
|
country: Optional[str] = Query(None, description="国家过滤"),
|
||||||
|
search: Optional[str] = Query(None, description="搜索名称"),
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
"""获取数据汇总统计"""
|
"""获取数据汇总统计"""
|
||||||
|
where_sql, params = build_where_clause(source, data_type, country, search)
|
||||||
|
if mode != "history":
|
||||||
|
where_sql = f"({where_sql}) AND COALESCE(is_current, TRUE) = TRUE"
|
||||||
|
|
||||||
|
overall_where_sql = "COALESCE(is_current, TRUE) = TRUE" if mode != "history" else "1=1"
|
||||||
|
|
||||||
|
overall_total_result = await db.execute(
|
||||||
|
text(f"SELECT COUNT(*) FROM collected_data WHERE {overall_where_sql}")
|
||||||
|
)
|
||||||
|
overall_total = overall_total_result.scalar() or 0
|
||||||
|
|
||||||
# By source and data_type
|
# By source and data_type
|
||||||
result = await db.execute(
|
result = await db.execute(
|
||||||
text("""
|
text(f"""
|
||||||
SELECT source, data_type, COUNT(*) as count
|
SELECT source, data_type, COUNT(*) as count
|
||||||
FROM collected_data
|
FROM collected_data
|
||||||
|
WHERE {where_sql}
|
||||||
GROUP BY source, data_type
|
GROUP BY source, data_type
|
||||||
ORDER BY source, data_type
|
ORDER BY source, data_type
|
||||||
""")
|
"""),
|
||||||
|
params,
|
||||||
)
|
)
|
||||||
rows = result.fetchall()
|
rows = result.fetchall()
|
||||||
|
source_name_map = await get_source_name_map(db)
|
||||||
|
|
||||||
by_source = {}
|
by_source = {}
|
||||||
total = 0
|
total = 0
|
||||||
@@ -128,31 +254,62 @@ async def get_data_summary(
|
|||||||
data_type = row[1]
|
data_type = row[1]
|
||||||
count = row[2]
|
count = row[2]
|
||||||
|
|
||||||
if source not in by_source:
|
source_key = source_name_map.get(source, source)
|
||||||
by_source[source] = {}
|
if source_key not in by_source:
|
||||||
by_source[source][data_type] = count
|
by_source[source_key] = {}
|
||||||
|
by_source[source_key][data_type] = count
|
||||||
total += count
|
total += count
|
||||||
|
|
||||||
# Total by source
|
# Total by source
|
||||||
source_totals = await db.execute(
|
source_totals = await db.execute(
|
||||||
text("""
|
text(f"""
|
||||||
SELECT source, COUNT(*) as count
|
SELECT source, COUNT(*) as count
|
||||||
FROM collected_data
|
FROM collected_data
|
||||||
|
WHERE {where_sql}
|
||||||
GROUP BY source
|
GROUP BY source
|
||||||
ORDER BY count DESC
|
ORDER BY count DESC
|
||||||
""")
|
"""),
|
||||||
|
params,
|
||||||
)
|
)
|
||||||
source_rows = source_totals.fetchall()
|
source_rows = source_totals.fetchall()
|
||||||
|
|
||||||
|
type_totals = await db.execute(
|
||||||
|
text(f"""
|
||||||
|
SELECT data_type, COUNT(*) as count
|
||||||
|
FROM collected_data
|
||||||
|
WHERE {where_sql}
|
||||||
|
GROUP BY data_type
|
||||||
|
ORDER BY count DESC, data_type
|
||||||
|
"""),
|
||||||
|
params,
|
||||||
|
)
|
||||||
|
type_rows = type_totals.fetchall()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total_records": total,
|
"total_records": total,
|
||||||
|
"overall_total_records": overall_total,
|
||||||
"by_source": by_source,
|
"by_source": by_source,
|
||||||
"source_totals": [{"source": row[0], "count": row[1]} for row in source_rows],
|
"source_totals": [
|
||||||
|
{
|
||||||
|
"source": row[0],
|
||||||
|
"source_name": source_name_map.get(row[0], row[0]),
|
||||||
|
"count": row[1],
|
||||||
|
}
|
||||||
|
for row in source_rows
|
||||||
|
],
|
||||||
|
"type_totals": [
|
||||||
|
{
|
||||||
|
"data_type": row[0],
|
||||||
|
"count": row[1],
|
||||||
|
}
|
||||||
|
for row in type_rows
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/sources")
|
@router.get("/sources")
|
||||||
async def get_data_sources(
|
async def get_data_sources(
|
||||||
|
mode: str = Query("current", description="查询模式: current/history"),
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
@@ -160,18 +317,25 @@ async def get_data_sources(
|
|||||||
|
|
||||||
result = await db.execute(
|
result = await db.execute(
|
||||||
text("""
|
text("""
|
||||||
SELECT DISTINCT source FROM collected_data ORDER BY source
|
SELECT DISTINCT source FROM collected_data
|
||||||
|
""" + ("WHERE COALESCE(is_current, TRUE) = TRUE " if mode != "history" else "") + """
|
||||||
|
ORDER BY source
|
||||||
""")
|
""")
|
||||||
)
|
)
|
||||||
rows = result.fetchall()
|
rows = result.fetchall()
|
||||||
|
source_name_map = await get_source_name_map(db)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"sources": [row[0] for row in rows],
|
"sources": [
|
||||||
|
{"source": row[0], "source_name": source_name_map.get(row[0], row[0])}
|
||||||
|
for row in rows
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/types")
|
@router.get("/types")
|
||||||
async def get_data_types(
|
async def get_data_types(
|
||||||
|
mode: str = Query("current", description="查询模式: current/history"),
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
@@ -179,7 +343,9 @@ async def get_data_types(
|
|||||||
|
|
||||||
result = await db.execute(
|
result = await db.execute(
|
||||||
text("""
|
text("""
|
||||||
SELECT DISTINCT data_type FROM collected_data ORDER BY data_type
|
SELECT DISTINCT data_type FROM collected_data
|
||||||
|
""" + ("WHERE COALESCE(is_current, TRUE) = TRUE " if mode != "history" else "") + """
|
||||||
|
ORDER BY data_type
|
||||||
""")
|
""")
|
||||||
)
|
)
|
||||||
rows = result.fetchall()
|
rows = result.fetchall()
|
||||||
@@ -196,17 +362,8 @@ async def get_countries(
|
|||||||
):
|
):
|
||||||
"""获取所有国家列表"""
|
"""获取所有国家列表"""
|
||||||
|
|
||||||
result = await db.execute(
|
|
||||||
text("""
|
|
||||||
SELECT DISTINCT country FROM collected_data
|
|
||||||
WHERE country IS NOT NULL AND country != ''
|
|
||||||
ORDER BY country
|
|
||||||
""")
|
|
||||||
)
|
|
||||||
rows = result.fetchall()
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"countries": [row[0] for row in rows],
|
"countries": COUNTRY_OPTIONS,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -221,7 +378,6 @@ async def get_collected_data(
|
|||||||
result = await db.execute(
|
result = await db.execute(
|
||||||
text("""
|
text("""
|
||||||
SELECT id, source, source_id, data_type, name, title, description,
|
SELECT id, source, source_id, data_type, name, title, description,
|
||||||
country, city, latitude, longitude, value, unit,
|
|
||||||
metadata, collected_at, reference_date, is_valid
|
metadata, collected_at, reference_date, is_valid
|
||||||
FROM collected_data
|
FROM collected_data
|
||||||
WHERE id = :id
|
WHERE id = :id
|
||||||
@@ -236,25 +392,8 @@ async def get_collected_data(
|
|||||||
detail="数据不存在",
|
detail="数据不存在",
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
source_name_map = await get_source_name_map(db)
|
||||||
"id": row[0],
|
return serialize_collected_row(row, source_name_map)
|
||||||
"source": row[1],
|
|
||||||
"source_id": row[2],
|
|
||||||
"data_type": row[3],
|
|
||||||
"name": row[4],
|
|
||||||
"title": row[5],
|
|
||||||
"description": row[6],
|
|
||||||
"country": row[7],
|
|
||||||
"city": row[8],
|
|
||||||
"latitude": row[9],
|
|
||||||
"longitude": row[10],
|
|
||||||
"value": row[11],
|
|
||||||
"unit": row[12],
|
|
||||||
"metadata": row[13],
|
|
||||||
"collected_at": row[14].isoformat() if row[14] else None,
|
|
||||||
"reference_date": row[15].isoformat() if row[15] else None,
|
|
||||||
"is_valid": row[16],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def build_where_clause(
|
def build_where_clause(
|
||||||
@@ -263,19 +402,21 @@ def build_where_clause(
|
|||||||
"""Build WHERE clause and params for queries"""
|
"""Build WHERE clause and params for queries"""
|
||||||
conditions = []
|
conditions = []
|
||||||
params = {}
|
params = {}
|
||||||
|
source_values = parse_multi_values(source)
|
||||||
|
data_type_values = parse_multi_values(data_type)
|
||||||
|
|
||||||
if source:
|
if source_values:
|
||||||
conditions.append("source = :source")
|
conditions.append(build_in_condition("source", source_values, "source", params))
|
||||||
params["source"] = source
|
if data_type_values:
|
||||||
if data_type:
|
conditions.append(build_in_condition("data_type", data_type_values, "data_type", params))
|
||||||
conditions.append("data_type = :data_type")
|
normalized_country = normalize_country(country) if country else None
|
||||||
params["data_type"] = data_type
|
|
||||||
if country:
|
if normalized_country:
|
||||||
conditions.append("country = :country")
|
conditions.append(f"{COUNTRY_SQL} = :country")
|
||||||
params["country"] = country
|
params["country"] = normalized_country
|
||||||
if search:
|
search_condition = build_search_condition(search, params)
|
||||||
conditions.append("(name ILIKE :search OR title ILIKE :search)")
|
if search_condition:
|
||||||
params["search"] = f"%{search}%"
|
conditions.append(search_condition)
|
||||||
|
|
||||||
where_sql = " AND ".join(conditions) if conditions else "1=1"
|
where_sql = " AND ".join(conditions) if conditions else "1=1"
|
||||||
return where_sql, params
|
return where_sql, params
|
||||||
@@ -283,6 +424,7 @@ def build_where_clause(
|
|||||||
|
|
||||||
@router.get("/export/json")
|
@router.get("/export/json")
|
||||||
async def export_json(
|
async def export_json(
|
||||||
|
mode: str = Query("current", description="查询模式: current/history"),
|
||||||
source: Optional[str] = Query(None, description="数据源过滤"),
|
source: Optional[str] = Query(None, description="数据源过滤"),
|
||||||
data_type: Optional[str] = Query(None, description="数据类型过滤"),
|
data_type: Optional[str] = Query(None, description="数据类型过滤"),
|
||||||
country: Optional[str] = Query(None, description="国家过滤"),
|
country: Optional[str] = Query(None, description="国家过滤"),
|
||||||
@@ -294,11 +436,12 @@ async def export_json(
|
|||||||
"""导出数据为 JSON 格式"""
|
"""导出数据为 JSON 格式"""
|
||||||
|
|
||||||
where_sql, params = build_where_clause(source, data_type, country, search)
|
where_sql, params = build_where_clause(source, data_type, country, search)
|
||||||
|
if mode != "history":
|
||||||
|
where_sql = f"({where_sql}) AND COALESCE(is_current, TRUE) = TRUE"
|
||||||
params["limit"] = limit
|
params["limit"] = limit
|
||||||
|
|
||||||
query = text(f"""
|
query = text(f"""
|
||||||
SELECT id, source, source_id, data_type, name, title, description,
|
SELECT id, source, source_id, data_type, name, title, description,
|
||||||
country, city, latitude, longitude, value, unit,
|
|
||||||
metadata, collected_at, reference_date, is_valid
|
metadata, collected_at, reference_date, is_valid
|
||||||
FROM collected_data
|
FROM collected_data
|
||||||
WHERE {where_sql}
|
WHERE {where_sql}
|
||||||
@@ -311,27 +454,7 @@ async def export_json(
|
|||||||
|
|
||||||
data = []
|
data = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
data.append(
|
data.append(serialize_collected_row(row))
|
||||||
{
|
|
||||||
"id": row[0],
|
|
||||||
"source": row[1],
|
|
||||||
"source_id": row[2],
|
|
||||||
"data_type": row[3],
|
|
||||||
"name": row[4],
|
|
||||||
"title": row[5],
|
|
||||||
"description": row[6],
|
|
||||||
"country": row[7],
|
|
||||||
"city": row[8],
|
|
||||||
"latitude": row[9],
|
|
||||||
"longitude": row[10],
|
|
||||||
"value": row[11],
|
|
||||||
"unit": row[12],
|
|
||||||
"metadata": row[13],
|
|
||||||
"collected_at": row[14].isoformat() if row[14] else None,
|
|
||||||
"reference_date": row[15].isoformat() if row[15] else None,
|
|
||||||
"is_valid": row[16],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
json_str = json.dumps({"data": data, "total": len(data)}, ensure_ascii=False, indent=2)
|
json_str = json.dumps({"data": data, "total": len(data)}, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
@@ -346,6 +469,7 @@ async def export_json(
|
|||||||
|
|
||||||
@router.get("/export/csv")
|
@router.get("/export/csv")
|
||||||
async def export_csv(
|
async def export_csv(
|
||||||
|
mode: str = Query("current", description="查询模式: current/history"),
|
||||||
source: Optional[str] = Query(None, description="数据源过滤"),
|
source: Optional[str] = Query(None, description="数据源过滤"),
|
||||||
data_type: Optional[str] = Query(None, description="数据类型过滤"),
|
data_type: Optional[str] = Query(None, description="数据类型过滤"),
|
||||||
country: Optional[str] = Query(None, description="国家过滤"),
|
country: Optional[str] = Query(None, description="国家过滤"),
|
||||||
@@ -357,11 +481,12 @@ async def export_csv(
|
|||||||
"""导出数据为 CSV 格式"""
|
"""导出数据为 CSV 格式"""
|
||||||
|
|
||||||
where_sql, params = build_where_clause(source, data_type, country, search)
|
where_sql, params = build_where_clause(source, data_type, country, search)
|
||||||
|
if mode != "history":
|
||||||
|
where_sql = f"({where_sql}) AND COALESCE(is_current, TRUE) = TRUE"
|
||||||
params["limit"] = limit
|
params["limit"] = limit
|
||||||
|
|
||||||
query = text(f"""
|
query = text(f"""
|
||||||
SELECT id, source, source_id, data_type, name, title, description,
|
SELECT id, source, source_id, data_type, name, title, description,
|
||||||
country, city, latitude, longitude, value, unit,
|
|
||||||
metadata, collected_at, reference_date, is_valid
|
metadata, collected_at, reference_date, is_valid
|
||||||
FROM collected_data
|
FROM collected_data
|
||||||
WHERE {where_sql}
|
WHERE {where_sql}
|
||||||
@@ -409,16 +534,16 @@ async def export_csv(
|
|||||||
row[4],
|
row[4],
|
||||||
row[5],
|
row[5],
|
||||||
row[6],
|
row[6],
|
||||||
row[7],
|
get_metadata_field(row[7], "country"),
|
||||||
row[8],
|
get_metadata_field(row[7], "city"),
|
||||||
row[9],
|
get_metadata_field(row[7], "latitude"),
|
||||||
|
get_metadata_field(row[7], "longitude"),
|
||||||
|
get_metadata_field(row[7], "value"),
|
||||||
|
get_metadata_field(row[7], "unit"),
|
||||||
|
json.dumps(row[7]) if row[7] else "",
|
||||||
|
to_iso8601_utc(row[8]) or "",
|
||||||
|
to_iso8601_utc(row[9]) or "",
|
||||||
row[10],
|
row[10],
|
||||||
row[11],
|
|
||||||
row[12],
|
|
||||||
json.dumps(row[13]) if row[13] else "",
|
|
||||||
row[14].isoformat() if row[14] else "",
|
|
||||||
row[15].isoformat() if row[15] else "",
|
|
||||||
row[16],
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""Dashboard API with caching and optimizations"""
|
"""Dashboard API with caching and optimizations"""
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import UTC, datetime, timedelta
|
||||||
from fastapi import APIRouter, Depends
|
from fastapi import APIRouter, Depends
|
||||||
from sqlalchemy import select, func, text
|
from sqlalchemy import select, func, text
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
@@ -13,6 +13,8 @@ from app.models.alert import Alert, AlertSeverity
|
|||||||
from app.models.task import CollectionTask
|
from app.models.task import CollectionTask
|
||||||
from app.core.security import get_current_user
|
from app.core.security import get_current_user
|
||||||
from app.core.cache import cache
|
from app.core.cache import cache
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
|
|
||||||
|
|
||||||
# Built-in collectors info (mirrored from datasources.py)
|
# Built-in collectors info (mirrored from datasources.py)
|
||||||
COLLECTOR_INFO = {
|
COLLECTOR_INFO = {
|
||||||
@@ -110,7 +112,7 @@ async def get_stats(
|
|||||||
if cached_result:
|
if cached_result:
|
||||||
return cached_result
|
return cached_result
|
||||||
|
|
||||||
today_start = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
today_start = datetime.now(UTC).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
# Count built-in collectors
|
# Count built-in collectors
|
||||||
built_in_count = len(COLLECTOR_INFO)
|
built_in_count = len(COLLECTOR_INFO)
|
||||||
@@ -174,7 +176,7 @@ async def get_stats(
|
|||||||
"active_datasources": active_datasources,
|
"active_datasources": active_datasources,
|
||||||
"tasks_today": tasks_today,
|
"tasks_today": tasks_today,
|
||||||
"success_rate": round(success_rate, 1),
|
"success_rate": round(success_rate, 1),
|
||||||
"last_updated": datetime.utcnow().isoformat(),
|
"last_updated": to_iso8601_utc(datetime.now(UTC)),
|
||||||
"alerts": {
|
"alerts": {
|
||||||
"critical": critical_alerts,
|
"critical": critical_alerts,
|
||||||
"warning": warning_alerts,
|
"warning": warning_alerts,
|
||||||
@@ -229,10 +231,10 @@ async def get_summary(
|
|||||||
summary[module] = {
|
summary[module] = {
|
||||||
"datasources": data["datasources"],
|
"datasources": data["datasources"],
|
||||||
"total_records": 0, # Built-in don't track this in dashboard stats
|
"total_records": 0, # Built-in don't track this in dashboard stats
|
||||||
"last_updated": datetime.utcnow().isoformat(),
|
"last_updated": to_iso8601_utc(datetime.now(UTC)),
|
||||||
}
|
}
|
||||||
|
|
||||||
response = {"modules": summary, "last_updated": datetime.utcnow().isoformat()}
|
response = {"modules": summary, "last_updated": to_iso8601_utc(datetime.now(UTC))}
|
||||||
|
|
||||||
cache.set(cache_key, response, expire_seconds=300)
|
cache.set(cache_key, response, expire_seconds=300)
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ from app.models.user import User
|
|||||||
from app.models.datasource_config import DataSourceConfig
|
from app.models.datasource_config import DataSourceConfig
|
||||||
from app.core.security import get_current_user
|
from app.core.security import get_current_user
|
||||||
from app.core.cache import cache
|
from app.core.cache import cache
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@@ -123,8 +124,8 @@ async def list_configs(
|
|||||||
"headers": c.headers,
|
"headers": c.headers,
|
||||||
"config": c.config,
|
"config": c.config,
|
||||||
"is_active": c.is_active,
|
"is_active": c.is_active,
|
||||||
"created_at": c.created_at.isoformat() if c.created_at else None,
|
"created_at": to_iso8601_utc(c.created_at),
|
||||||
"updated_at": c.updated_at.isoformat() if c.updated_at else None,
|
"updated_at": to_iso8601_utc(c.updated_at),
|
||||||
}
|
}
|
||||||
for c in configs
|
for c in configs
|
||||||
],
|
],
|
||||||
@@ -155,8 +156,8 @@ async def get_config(
|
|||||||
"headers": config.headers,
|
"headers": config.headers,
|
||||||
"config": config.config,
|
"config": config.config,
|
||||||
"is_active": config.is_active,
|
"is_active": config.is_active,
|
||||||
"created_at": config.created_at.isoformat() if config.created_at else None,
|
"created_at": to_iso8601_utc(config.created_at),
|
||||||
"updated_at": config.updated_at.isoformat() if config.updated_at else None,
|
"updated_at": to_iso8601_utc(config.updated_at),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -307,3 +308,40 @@ async def test_new_config(
|
|||||||
"error": "Connection failed",
|
"error": "Connection failed",
|
||||||
"message": str(e),
|
"message": str(e),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/configs/all")
|
||||||
|
async def list_all_datasources(
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""List all data sources: YAML defaults + DB overrides"""
|
||||||
|
from app.core.data_sources import COLLECTOR_URL_KEYS, get_data_sources_config
|
||||||
|
|
||||||
|
config = get_data_sources_config()
|
||||||
|
|
||||||
|
db_query = await db.execute(select(DataSourceConfig))
|
||||||
|
db_configs = {c.name: c for c in db_query.scalars().all()}
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for name, yaml_key in COLLECTOR_URL_KEYS.items():
|
||||||
|
yaml_url = config.get_yaml_url(name)
|
||||||
|
db_config = db_configs.get(name)
|
||||||
|
|
||||||
|
result.append(
|
||||||
|
{
|
||||||
|
"name": name,
|
||||||
|
"default_url": yaml_url,
|
||||||
|
"endpoint": db_config.endpoint if db_config else yaml_url,
|
||||||
|
"is_overridden": db_config is not None and db_config.endpoint != yaml_url
|
||||||
|
if yaml_url
|
||||||
|
else db_config is not None,
|
||||||
|
"is_active": db_config.is_active if db_config else True,
|
||||||
|
"source_type": db_config.source_type if db_config else "http",
|
||||||
|
"description": db_config.description
|
||||||
|
if db_config
|
||||||
|
else f"Data source from YAML: {yaml_key}",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"total": len(result), "data": result}
|
||||||
|
|||||||
@@ -1,127 +1,77 @@
|
|||||||
from typing import List, Optional
|
import asyncio
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta, timezone
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status
|
from typing import Optional
|
||||||
from sqlalchemy import select, func
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||||
|
from sqlalchemy import func, select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
|
from app.core.security import get_current_user
|
||||||
|
from app.core.data_sources import get_data_sources_config
|
||||||
from app.db.session import get_db
|
from app.db.session import get_db
|
||||||
from app.models.user import User
|
from app.models.collected_data import CollectedData
|
||||||
from app.models.datasource import DataSource
|
from app.models.datasource import DataSource
|
||||||
from app.models.task import CollectionTask
|
from app.models.task import CollectionTask
|
||||||
from app.models.collected_data import CollectedData
|
from app.models.user import User
|
||||||
from app.core.security import get_current_user
|
from app.services.scheduler import get_latest_task_id_for_datasource, run_collector_now, sync_datasource_job
|
||||||
from app.services.collectors.registry import collector_registry
|
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
COLLECTOR_INFO = {
|
|
||||||
"top500": {
|
|
||||||
"id": 1,
|
|
||||||
"name": "TOP500 Supercomputers",
|
|
||||||
"module": "L1",
|
|
||||||
"priority": "P0",
|
|
||||||
"frequency_hours": 4,
|
|
||||||
},
|
|
||||||
"epoch_ai_gpu": {
|
|
||||||
"id": 2,
|
|
||||||
"name": "Epoch AI GPU Clusters",
|
|
||||||
"module": "L1",
|
|
||||||
"priority": "P0",
|
|
||||||
"frequency_hours": 6,
|
|
||||||
},
|
|
||||||
"huggingface_models": {
|
|
||||||
"id": 3,
|
|
||||||
"name": "HuggingFace Models",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P1",
|
|
||||||
"frequency_hours": 12,
|
|
||||||
},
|
|
||||||
"huggingface_datasets": {
|
|
||||||
"id": 4,
|
|
||||||
"name": "HuggingFace Datasets",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P1",
|
|
||||||
"frequency_hours": 12,
|
|
||||||
},
|
|
||||||
"huggingface_spaces": {
|
|
||||||
"id": 5,
|
|
||||||
"name": "HuggingFace Spaces",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P2",
|
|
||||||
"frequency_hours": 24,
|
|
||||||
},
|
|
||||||
"peeringdb_ixp": {
|
|
||||||
"id": 6,
|
|
||||||
"name": "PeeringDB IXP",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P1",
|
|
||||||
"frequency_hours": 24,
|
|
||||||
},
|
|
||||||
"peeringdb_network": {
|
|
||||||
"id": 7,
|
|
||||||
"name": "PeeringDB Networks",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P2",
|
|
||||||
"frequency_hours": 48,
|
|
||||||
},
|
|
||||||
"peeringdb_facility": {
|
|
||||||
"id": 8,
|
|
||||||
"name": "PeeringDB Facilities",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P2",
|
|
||||||
"frequency_hours": 48,
|
|
||||||
},
|
|
||||||
"telegeography_cables": {
|
|
||||||
"id": 9,
|
|
||||||
"name": "Submarine Cables",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P1",
|
|
||||||
"frequency_hours": 168,
|
|
||||||
},
|
|
||||||
"telegeography_landing": {
|
|
||||||
"id": 10,
|
|
||||||
"name": "Cable Landing Points",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P2",
|
|
||||||
"frequency_hours": 168,
|
|
||||||
},
|
|
||||||
"telegeography_systems": {
|
|
||||||
"id": 11,
|
|
||||||
"name": "Cable Systems",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P2",
|
|
||||||
"frequency_hours": 168,
|
|
||||||
},
|
|
||||||
"arcgis_cables": {
|
|
||||||
"id": 15,
|
|
||||||
"name": "ArcGIS Submarine Cables",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P1",
|
|
||||||
"frequency_hours": 168,
|
|
||||||
},
|
|
||||||
"fao_landing_points": {
|
|
||||||
"id": 16,
|
|
||||||
"name": "FAO Landing Points",
|
|
||||||
"module": "L2",
|
|
||||||
"priority": "P1",
|
|
||||||
"frequency_hours": 168,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
ID_TO_COLLECTOR = {info["id"]: name for name, info in COLLECTOR_INFO.items()}
|
def format_frequency_label(minutes: int) -> str:
|
||||||
COLLECTOR_TO_ID = {name: info["id"] for name, info in COLLECTOR_INFO.items()}
|
if minutes % 1440 == 0:
|
||||||
|
return f"{minutes // 1440}d"
|
||||||
|
if minutes % 60 == 0:
|
||||||
|
return f"{minutes // 60}h"
|
||||||
|
return f"{minutes}m"
|
||||||
|
|
||||||
|
|
||||||
def get_collector_name(source_id: str) -> Optional[str]:
|
def is_due_for_collection(datasource: DataSource, now: datetime) -> bool:
|
||||||
|
if datasource.last_run_at is None:
|
||||||
|
return True
|
||||||
|
return datasource.last_run_at + timedelta(minutes=datasource.frequency_minutes) <= now
|
||||||
|
|
||||||
|
|
||||||
|
async def get_datasource_record(db: AsyncSession, source_id: str) -> Optional[DataSource]:
|
||||||
|
datasource = None
|
||||||
try:
|
try:
|
||||||
numeric_id = int(source_id)
|
datasource = await db.get(DataSource, int(source_id))
|
||||||
if numeric_id in ID_TO_COLLECTOR:
|
|
||||||
return ID_TO_COLLECTOR[numeric_id]
|
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
if source_id in COLLECTOR_INFO:
|
|
||||||
return source_id
|
if datasource is not None:
|
||||||
return None
|
return datasource
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
select(DataSource).where(
|
||||||
|
(DataSource.source == source_id) | (DataSource.collector_class == source_id)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return result.scalar_one_or_none()
|
||||||
|
|
||||||
|
|
||||||
|
async def get_last_completed_task(db: AsyncSession, datasource_id: int) -> Optional[CollectionTask]:
|
||||||
|
result = await db.execute(
|
||||||
|
select(CollectionTask)
|
||||||
|
.where(CollectionTask.datasource_id == datasource_id)
|
||||||
|
.where(CollectionTask.completed_at.isnot(None))
|
||||||
|
.where(CollectionTask.status.in_(("success", "failed", "cancelled")))
|
||||||
|
.order_by(CollectionTask.completed_at.desc())
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
return result.scalar_one_or_none()
|
||||||
|
|
||||||
|
|
||||||
|
async def get_running_task(db: AsyncSession, datasource_id: int) -> Optional[CollectionTask]:
|
||||||
|
result = await db.execute(
|
||||||
|
select(CollectionTask)
|
||||||
|
.where(CollectionTask.datasource_id == datasource_id)
|
||||||
|
.where(CollectionTask.status == "running")
|
||||||
|
.order_by(CollectionTask.started_at.desc())
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
return result.scalar_one_or_none()
|
||||||
|
|
||||||
|
|
||||||
@router.get("")
|
@router.get("")
|
||||||
@@ -132,80 +82,156 @@ async def list_datasources(
|
|||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
query = select(DataSource)
|
query = select(DataSource).order_by(DataSource.module, DataSource.id)
|
||||||
|
|
||||||
filters = []
|
|
||||||
if module:
|
if module:
|
||||||
filters.append(DataSource.module == module)
|
query = query.where(DataSource.module == module)
|
||||||
if is_active is not None:
|
if is_active is not None:
|
||||||
filters.append(DataSource.is_active == is_active)
|
query = query.where(DataSource.is_active == is_active)
|
||||||
if priority:
|
if priority:
|
||||||
filters.append(DataSource.priority == priority)
|
query = query.where(DataSource.priority == priority)
|
||||||
|
|
||||||
if filters:
|
|
||||||
query = query.where(*filters)
|
|
||||||
|
|
||||||
result = await db.execute(query)
|
result = await db.execute(query)
|
||||||
datasources = result.scalars().all()
|
datasources = result.scalars().all()
|
||||||
|
|
||||||
collector_list = []
|
collector_list = []
|
||||||
for name, info in COLLECTOR_INFO.items():
|
config = get_data_sources_config()
|
||||||
is_active_status = collector_registry.is_active(name)
|
for datasource in datasources:
|
||||||
|
running_task = await get_running_task(db, datasource.id)
|
||||||
running_task_query = (
|
last_task = await get_last_completed_task(db, datasource.id)
|
||||||
select(CollectionTask)
|
endpoint = await config.get_url(datasource.source, db)
|
||||||
.where(CollectionTask.datasource_id == info["id"])
|
data_count_result = await db.execute(
|
||||||
.where(CollectionTask.status == "running")
|
select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source)
|
||||||
.order_by(CollectionTask.started_at.desc())
|
|
||||||
.limit(1)
|
|
||||||
)
|
)
|
||||||
running_result = await db.execute(running_task_query)
|
|
||||||
running_task = running_result.scalar_one_or_none()
|
|
||||||
|
|
||||||
last_run_query = (
|
|
||||||
select(CollectionTask)
|
|
||||||
.where(CollectionTask.datasource_id == info["id"])
|
|
||||||
.where(CollectionTask.completed_at.isnot(None))
|
|
||||||
.order_by(CollectionTask.completed_at.desc())
|
|
||||||
.limit(1)
|
|
||||||
)
|
|
||||||
last_run_result = await db.execute(last_run_query)
|
|
||||||
last_task = last_run_result.scalar_one_or_none()
|
|
||||||
|
|
||||||
data_count_query = select(func.count(CollectedData.id)).where(CollectedData.source == name)
|
|
||||||
data_count_result = await db.execute(data_count_query)
|
|
||||||
data_count = data_count_result.scalar() or 0
|
data_count = data_count_result.scalar() or 0
|
||||||
|
|
||||||
last_run = None
|
last_run_at = datasource.last_run_at or (last_task.completed_at if last_task else None)
|
||||||
if last_task and last_task.completed_at and data_count > 0:
|
last_run = to_iso8601_utc(last_run_at)
|
||||||
last_run = last_task.completed_at.strftime("%Y-%m-%d %H:%M")
|
last_status = datasource.last_status or (last_task.status if last_task else None)
|
||||||
|
|
||||||
collector_list.append(
|
collector_list.append(
|
||||||
{
|
{
|
||||||
"id": info["id"],
|
"id": datasource.id,
|
||||||
"name": info["name"],
|
"name": datasource.name,
|
||||||
"module": info["module"],
|
"module": datasource.module,
|
||||||
"priority": info["priority"],
|
"priority": datasource.priority,
|
||||||
"frequency": f"{info['frequency_hours']}h",
|
"frequency": format_frequency_label(datasource.frequency_minutes),
|
||||||
"is_active": is_active_status,
|
"frequency_minutes": datasource.frequency_minutes,
|
||||||
"collector_class": name,
|
"is_active": datasource.is_active,
|
||||||
|
"collector_class": datasource.collector_class,
|
||||||
|
"endpoint": endpoint,
|
||||||
"last_run": last_run,
|
"last_run": last_run,
|
||||||
|
"last_run_at": to_iso8601_utc(last_run_at),
|
||||||
|
"last_status": last_status,
|
||||||
|
"last_records_processed": last_task.records_processed if last_task else None,
|
||||||
|
"data_count": data_count,
|
||||||
"is_running": running_task is not None,
|
"is_running": running_task is not None,
|
||||||
"task_id": running_task.id if running_task else None,
|
"task_id": running_task.id if running_task else None,
|
||||||
"progress": running_task.progress if running_task else None,
|
"progress": running_task.progress if running_task else None,
|
||||||
|
"phase": running_task.phase if running_task else None,
|
||||||
"records_processed": running_task.records_processed if running_task else None,
|
"records_processed": running_task.records_processed if running_task else None,
|
||||||
"total_records": running_task.total_records if running_task else None,
|
"total_records": running_task.total_records if running_task else None,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
if module:
|
return {"total": len(collector_list), "data": collector_list}
|
||||||
collector_list = [c for c in collector_list if c["module"] == module]
|
|
||||||
if priority:
|
|
||||||
collector_list = [c for c in collector_list if c["priority"] == priority]
|
@router.post("/trigger-all")
|
||||||
|
async def trigger_all_datasources(
|
||||||
|
force: bool = Query(False),
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
result = await db.execute(
|
||||||
|
select(DataSource)
|
||||||
|
.where(DataSource.is_active == True)
|
||||||
|
.order_by(DataSource.module, DataSource.id)
|
||||||
|
)
|
||||||
|
datasources = result.scalars().all()
|
||||||
|
|
||||||
|
if not datasources:
|
||||||
|
return {
|
||||||
|
"status": "noop",
|
||||||
|
"message": "No active data sources to trigger",
|
||||||
|
"triggered": [],
|
||||||
|
"skipped": [],
|
||||||
|
"failed": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
previous_task_ids: dict[int, Optional[int]] = {}
|
||||||
|
triggered_sources: list[dict] = []
|
||||||
|
skipped_sources: list[dict] = []
|
||||||
|
failed_sources: list[dict] = []
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
for datasource in datasources:
|
||||||
|
running_task = await get_running_task(db, datasource.id)
|
||||||
|
if running_task is not None:
|
||||||
|
skipped_sources.append(
|
||||||
|
{
|
||||||
|
"id": datasource.id,
|
||||||
|
"source": datasource.source,
|
||||||
|
"name": datasource.name,
|
||||||
|
"reason": "already_running",
|
||||||
|
"task_id": running_task.id,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not force and not is_due_for_collection(datasource, now):
|
||||||
|
skipped_sources.append(
|
||||||
|
{
|
||||||
|
"id": datasource.id,
|
||||||
|
"source": datasource.source,
|
||||||
|
"name": datasource.name,
|
||||||
|
"reason": "within_frequency_window",
|
||||||
|
"last_run_at": to_iso8601_utc(datasource.last_run_at),
|
||||||
|
"next_run_at": to_iso8601_utc(
|
||||||
|
datasource.last_run_at + timedelta(minutes=datasource.frequency_minutes)
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
previous_task_ids[datasource.id] = await get_latest_task_id_for_datasource(datasource.id)
|
||||||
|
success = run_collector_now(datasource.source)
|
||||||
|
if not success:
|
||||||
|
failed_sources.append(
|
||||||
|
{
|
||||||
|
"id": datasource.id,
|
||||||
|
"source": datasource.source,
|
||||||
|
"name": datasource.name,
|
||||||
|
"reason": "trigger_failed",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
triggered_sources.append(
|
||||||
|
{
|
||||||
|
"id": datasource.id,
|
||||||
|
"source": datasource.source,
|
||||||
|
"name": datasource.name,
|
||||||
|
"task_id": None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
for _ in range(20):
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
pending = [item for item in triggered_sources if item["task_id"] is None]
|
||||||
|
if not pending:
|
||||||
|
break
|
||||||
|
for item in pending:
|
||||||
|
task_id = await get_latest_task_id_for_datasource(item["id"])
|
||||||
|
if task_id is not None and task_id != previous_task_ids.get(item["id"]):
|
||||||
|
item["task_id"] = task_id
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total": len(collector_list),
|
"status": "triggered" if triggered_sources else "partial",
|
||||||
"data": collector_list,
|
"message": f"Triggered {len(triggered_sources)} data sources",
|
||||||
|
"force": force,
|
||||||
|
"triggered": triggered_sources,
|
||||||
|
"skipped": skipped_sources,
|
||||||
|
"failed": failed_sources,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -215,19 +241,24 @@ async def get_datasource(
|
|||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
collector_name = get_collector_name(source_id)
|
datasource = await get_datasource_record(db, source_id)
|
||||||
if not collector_name:
|
if not datasource:
|
||||||
raise HTTPException(status_code=404, detail="Data source not found")
|
raise HTTPException(status_code=404, detail="Data source not found")
|
||||||
|
|
||||||
info = COLLECTOR_INFO[collector_name]
|
config = get_data_sources_config()
|
||||||
|
endpoint = await config.get_url(datasource.source, db)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"id": info["id"],
|
"id": datasource.id,
|
||||||
"name": info["name"],
|
"name": datasource.name,
|
||||||
"module": info["module"],
|
"module": datasource.module,
|
||||||
"priority": info["priority"],
|
"priority": datasource.priority,
|
||||||
"frequency": f"{info['frequency_hours']}h",
|
"frequency": format_frequency_label(datasource.frequency_minutes),
|
||||||
"collector_class": collector_name,
|
"frequency_minutes": datasource.frequency_minutes,
|
||||||
"is_active": collector_registry.is_active(collector_name),
|
"collector_class": datasource.collector_class,
|
||||||
|
"source": datasource.source,
|
||||||
|
"endpoint": endpoint,
|
||||||
|
"is_active": datasource.is_active,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -235,24 +266,32 @@ async def get_datasource(
|
|||||||
async def enable_datasource(
|
async def enable_datasource(
|
||||||
source_id: str,
|
source_id: str,
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
collector_name = get_collector_name(source_id)
|
datasource = await get_datasource_record(db, source_id)
|
||||||
if not collector_name:
|
if not datasource:
|
||||||
raise HTTPException(status_code=404, detail="Data source not found")
|
raise HTTPException(status_code=404, detail="Data source not found")
|
||||||
collector_registry.set_active(collector_name, True)
|
|
||||||
return {"status": "enabled", "source_id": source_id}
|
datasource.is_active = True
|
||||||
|
await db.commit()
|
||||||
|
await sync_datasource_job(datasource.id)
|
||||||
|
return {"status": "enabled", "source_id": datasource.id}
|
||||||
|
|
||||||
|
|
||||||
@router.post("/{source_id}/disable")
|
@router.post("/{source_id}/disable")
|
||||||
async def disable_datasource(
|
async def disable_datasource(
|
||||||
source_id: str,
|
source_id: str,
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
collector_name = get_collector_name(source_id)
|
datasource = await get_datasource_record(db, source_id)
|
||||||
if not collector_name:
|
if not datasource:
|
||||||
raise HTTPException(status_code=404, detail="Data source not found")
|
raise HTTPException(status_code=404, detail="Data source not found")
|
||||||
collector_registry.set_active(collector_name, False)
|
|
||||||
return {"status": "disabled", "source_id": source_id}
|
datasource.is_active = False
|
||||||
|
await db.commit()
|
||||||
|
await sync_datasource_job(datasource.id)
|
||||||
|
return {"status": "disabled", "source_id": datasource.id}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/{source_id}/stats")
|
@router.get("/{source_id}/stats")
|
||||||
@@ -261,26 +300,19 @@ async def get_datasource_stats(
|
|||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
collector_name = get_collector_name(source_id)
|
datasource = await get_datasource_record(db, source_id)
|
||||||
if not collector_name:
|
if not datasource:
|
||||||
raise HTTPException(status_code=404, detail="Data source not found")
|
raise HTTPException(status_code=404, detail="Data source not found")
|
||||||
|
|
||||||
info = COLLECTOR_INFO[collector_name]
|
result = await db.execute(
|
||||||
source_name = info["name"]
|
select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source)
|
||||||
|
)
|
||||||
query = select(func.count(CollectedData.id)).where(CollectedData.source == collector_name)
|
|
||||||
result = await db.execute(query)
|
|
||||||
total = result.scalar() or 0
|
|
||||||
|
|
||||||
if total == 0:
|
|
||||||
query = select(func.count(CollectedData.id)).where(CollectedData.source == source_name)
|
|
||||||
result = await db.execute(query)
|
|
||||||
total = result.scalar() or 0
|
total = result.scalar() or 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"source_id": source_id,
|
"source_id": datasource.id,
|
||||||
"collector_name": collector_name,
|
"collector_name": datasource.collector_class,
|
||||||
"name": info["name"],
|
"name": datasource.name,
|
||||||
"total_records": total,
|
"total_records": total,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -289,30 +321,36 @@ async def get_datasource_stats(
|
|||||||
async def trigger_datasource(
|
async def trigger_datasource(
|
||||||
source_id: str,
|
source_id: str,
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
collector_name = get_collector_name(source_id)
|
datasource = await get_datasource_record(db, source_id)
|
||||||
if not collector_name:
|
if not datasource:
|
||||||
raise HTTPException(status_code=404, detail="Data source not found")
|
raise HTTPException(status_code=404, detail="Data source not found")
|
||||||
|
|
||||||
from app.services.scheduler import run_collector_now
|
if not datasource.is_active:
|
||||||
|
|
||||||
if not collector_registry.is_active(collector_name):
|
|
||||||
raise HTTPException(status_code=400, detail="Data source is disabled")
|
raise HTTPException(status_code=400, detail="Data source is disabled")
|
||||||
|
|
||||||
success = run_collector_now(collector_name)
|
previous_task_id = await get_latest_task_id_for_datasource(datasource.id)
|
||||||
|
success = run_collector_now(datasource.source)
|
||||||
|
if not success:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to trigger collector '{datasource.source}'")
|
||||||
|
|
||||||
|
task_id = None
|
||||||
|
for _ in range(20):
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
task_id = await get_latest_task_id_for_datasource(datasource.id)
|
||||||
|
if task_id is not None and task_id != previous_task_id:
|
||||||
|
break
|
||||||
|
if task_id == previous_task_id:
|
||||||
|
task_id = None
|
||||||
|
|
||||||
if success:
|
|
||||||
return {
|
return {
|
||||||
"status": "triggered",
|
"status": "triggered",
|
||||||
"source_id": source_id,
|
"source_id": datasource.id,
|
||||||
"collector_name": collector_name,
|
"task_id": task_id,
|
||||||
"message": f"Collector '{collector_name}' has been triggered",
|
"collector_name": datasource.source,
|
||||||
|
"message": f"Collector '{datasource.source}' has been triggered",
|
||||||
}
|
}
|
||||||
else:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=500,
|
|
||||||
detail=f"Failed to trigger collector '{collector_name}'",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@router.delete("/{source_id}/data")
|
@router.delete("/{source_id}/data")
|
||||||
@@ -321,39 +359,25 @@ async def clear_datasource_data(
|
|||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
collector_name = get_collector_name(source_id)
|
datasource = await get_datasource_record(db, source_id)
|
||||||
if not collector_name:
|
if not datasource:
|
||||||
raise HTTPException(status_code=404, detail="Data source not found")
|
raise HTTPException(status_code=404, detail="Data source not found")
|
||||||
|
|
||||||
info = COLLECTOR_INFO[collector_name]
|
result = await db.execute(
|
||||||
source_name = info["name"]
|
select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source)
|
||||||
|
)
|
||||||
query = select(func.count(CollectedData.id)).where(CollectedData.source == collector_name)
|
|
||||||
result = await db.execute(query)
|
|
||||||
count = result.scalar() or 0
|
count = result.scalar() or 0
|
||||||
|
|
||||||
if count == 0:
|
if count == 0:
|
||||||
query = select(func.count(CollectedData.id)).where(CollectedData.source == source_name)
|
return {"status": "success", "message": "No data to clear", "deleted_count": 0}
|
||||||
result = await db.execute(query)
|
|
||||||
count = result.scalar() or 0
|
|
||||||
delete_source = source_name
|
|
||||||
else:
|
|
||||||
delete_source = collector_name
|
|
||||||
|
|
||||||
if count == 0:
|
delete_query = CollectedData.__table__.delete().where(CollectedData.source == datasource.source)
|
||||||
return {
|
|
||||||
"status": "success",
|
|
||||||
"message": "No data to clear",
|
|
||||||
"deleted_count": 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
delete_query = CollectedData.__table__.delete().where(CollectedData.source == delete_source)
|
|
||||||
await db.execute(delete_query)
|
await db.execute(delete_query)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "success",
|
"status": "success",
|
||||||
"message": f"Cleared {count} records for data source '{info['name']}'",
|
"message": f"Cleared {count} records for data source '{datasource.name}'",
|
||||||
"deleted_count": count,
|
"deleted_count": count,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -361,32 +385,29 @@ async def clear_datasource_data(
|
|||||||
@router.get("/{source_id}/task-status")
|
@router.get("/{source_id}/task-status")
|
||||||
async def get_task_status(
|
async def get_task_status(
|
||||||
source_id: str,
|
source_id: str,
|
||||||
|
task_id: Optional[int] = None,
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
collector_name = get_collector_name(source_id)
|
datasource = await get_datasource_record(db, source_id)
|
||||||
if not collector_name:
|
if not datasource:
|
||||||
raise HTTPException(status_code=404, detail="Data source not found")
|
raise HTTPException(status_code=404, detail="Data source not found")
|
||||||
|
|
||||||
info = COLLECTOR_INFO[collector_name]
|
if task_id is not None:
|
||||||
|
task = await db.get(CollectionTask, task_id)
|
||||||
|
if not task or task.datasource_id != datasource.id:
|
||||||
|
raise HTTPException(status_code=404, detail="Task not found")
|
||||||
|
else:
|
||||||
|
task = await get_running_task(db, datasource.id)
|
||||||
|
|
||||||
running_task_query = (
|
if not task:
|
||||||
select(CollectionTask)
|
return {"is_running": False, "task_id": None, "progress": None, "phase": None, "status": "idle"}
|
||||||
.where(CollectionTask.datasource_id == info["id"])
|
|
||||||
.where(CollectionTask.status == "running")
|
|
||||||
.order_by(CollectionTask.started_at.desc())
|
|
||||||
.limit(1)
|
|
||||||
)
|
|
||||||
running_result = await db.execute(running_task_query)
|
|
||||||
running_task = running_result.scalar_one_or_none()
|
|
||||||
|
|
||||||
if not running_task:
|
|
||||||
return {"is_running": False, "task_id": None, "progress": None}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"is_running": True,
|
"is_running": task.status == "running",
|
||||||
"task_id": running_task.id,
|
"task_id": task.id,
|
||||||
"progress": running_task.progress,
|
"progress": task.progress,
|
||||||
"records_processed": running_task.records_processed,
|
"phase": task.phase,
|
||||||
"total_records": running_task.total_records,
|
"records_processed": task.records_processed,
|
||||||
"status": running_task.status,
|
"total_records": task.total_records,
|
||||||
|
"status": task.status,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,13 +1,22 @@
|
|||||||
|
from datetime import UTC, datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
|
||||||
from pydantic import BaseModel, EmailStr
|
|
||||||
|
|
||||||
from app.models.user import User
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from pydantic import BaseModel, EmailStr, Field
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.core.security import get_current_user
|
from app.core.security import get_current_user
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
|
from app.db.session import get_db
|
||||||
|
from app.models.datasource import DataSource
|
||||||
|
from app.models.system_setting import SystemSetting
|
||||||
|
from app.models.user import User
|
||||||
|
from app.services.scheduler import sync_datasource_job
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
default_settings = {
|
DEFAULT_SETTINGS = {
|
||||||
"system": {
|
"system": {
|
||||||
"system_name": "智能星球",
|
"system_name": "智能星球",
|
||||||
"refresh_interval": 60,
|
"refresh_interval": 60,
|
||||||
@@ -29,17 +38,13 @@ default_settings = {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
system_settings = default_settings["system"].copy()
|
|
||||||
notification_settings = default_settings["notifications"].copy()
|
|
||||||
security_settings = default_settings["security"].copy()
|
|
||||||
|
|
||||||
|
|
||||||
class SystemSettingsUpdate(BaseModel):
|
class SystemSettingsUpdate(BaseModel):
|
||||||
system_name: str = "智能星球"
|
system_name: str = "智能星球"
|
||||||
refresh_interval: int = 60
|
refresh_interval: int = Field(default=60, ge=10, le=3600)
|
||||||
auto_refresh: bool = True
|
auto_refresh: bool = True
|
||||||
data_retention_days: int = 30
|
data_retention_days: int = Field(default=30, ge=1, le=3650)
|
||||||
max_concurrent_tasks: int = 5
|
max_concurrent_tasks: int = Field(default=5, ge=1, le=50)
|
||||||
|
|
||||||
|
|
||||||
class NotificationSettingsUpdate(BaseModel):
|
class NotificationSettingsUpdate(BaseModel):
|
||||||
@@ -51,60 +56,166 @@ class NotificationSettingsUpdate(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class SecuritySettingsUpdate(BaseModel):
|
class SecuritySettingsUpdate(BaseModel):
|
||||||
session_timeout: int = 60
|
session_timeout: int = Field(default=60, ge=5, le=1440)
|
||||||
max_login_attempts: int = 5
|
max_login_attempts: int = Field(default=5, ge=1, le=20)
|
||||||
password_policy: str = "medium"
|
password_policy: str = Field(default="medium")
|
||||||
|
|
||||||
|
|
||||||
|
class CollectorSettingsUpdate(BaseModel):
|
||||||
|
is_active: bool
|
||||||
|
priority: str = Field(default="P1")
|
||||||
|
frequency_minutes: int = Field(default=60, ge=1, le=10080)
|
||||||
|
|
||||||
|
|
||||||
|
def merge_with_defaults(category: str, payload: Optional[dict]) -> dict:
|
||||||
|
merged = DEFAULT_SETTINGS[category].copy()
|
||||||
|
if payload:
|
||||||
|
merged.update(payload)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
async def get_setting_record(db: AsyncSession, category: str) -> Optional[SystemSetting]:
|
||||||
|
result = await db.execute(select(SystemSetting).where(SystemSetting.category == category))
|
||||||
|
return result.scalar_one_or_none()
|
||||||
|
|
||||||
|
|
||||||
|
async def get_setting_payload(db: AsyncSession, category: str) -> dict:
|
||||||
|
record = await get_setting_record(db, category)
|
||||||
|
return merge_with_defaults(category, record.payload if record else None)
|
||||||
|
|
||||||
|
|
||||||
|
async def save_setting_payload(db: AsyncSession, category: str, payload: dict) -> dict:
|
||||||
|
record = await get_setting_record(db, category)
|
||||||
|
if record is None:
|
||||||
|
record = SystemSetting(category=category, payload=payload)
|
||||||
|
db.add(record)
|
||||||
|
else:
|
||||||
|
record.payload = payload
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(record)
|
||||||
|
return merge_with_defaults(category, record.payload)
|
||||||
|
|
||||||
|
|
||||||
|
def format_frequency_label(minutes: int) -> str:
|
||||||
|
if minutes % 1440 == 0:
|
||||||
|
return f"{minutes // 1440}d"
|
||||||
|
if minutes % 60 == 0:
|
||||||
|
return f"{minutes // 60}h"
|
||||||
|
return f"{minutes}m"
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_collector(datasource: DataSource) -> dict:
|
||||||
|
return {
|
||||||
|
"id": datasource.id,
|
||||||
|
"name": datasource.name,
|
||||||
|
"source": datasource.source,
|
||||||
|
"module": datasource.module,
|
||||||
|
"priority": datasource.priority,
|
||||||
|
"frequency_minutes": datasource.frequency_minutes,
|
||||||
|
"frequency": format_frequency_label(datasource.frequency_minutes),
|
||||||
|
"is_active": datasource.is_active,
|
||||||
|
"last_run_at": to_iso8601_utc(datasource.last_run_at),
|
||||||
|
"last_status": datasource.last_status,
|
||||||
|
"next_run_at": to_iso8601_utc(datasource.next_run_at),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@router.get("/system")
|
@router.get("/system")
|
||||||
async def get_system_settings(current_user: User = Depends(get_current_user)):
|
async def get_system_settings(
|
||||||
return {"system": system_settings}
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
return {"system": await get_setting_payload(db, "system")}
|
||||||
|
|
||||||
|
|
||||||
@router.put("/system")
|
@router.put("/system")
|
||||||
async def update_system_settings(
|
async def update_system_settings(
|
||||||
settings: SystemSettingsUpdate,
|
settings: SystemSettingsUpdate,
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
global system_settings
|
payload = await save_setting_payload(db, "system", settings.model_dump())
|
||||||
system_settings = settings.model_dump()
|
return {"status": "updated", "system": payload}
|
||||||
return {"status": "updated", "system": system_settings}
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/notifications")
|
@router.get("/notifications")
|
||||||
async def get_notification_settings(current_user: User = Depends(get_current_user)):
|
async def get_notification_settings(
|
||||||
return {"notifications": notification_settings}
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
return {"notifications": await get_setting_payload(db, "notifications")}
|
||||||
|
|
||||||
|
|
||||||
@router.put("/notifications")
|
@router.put("/notifications")
|
||||||
async def update_notification_settings(
|
async def update_notification_settings(
|
||||||
settings: NotificationSettingsUpdate,
|
settings: NotificationSettingsUpdate,
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
global notification_settings
|
payload = await save_setting_payload(db, "notifications", settings.model_dump())
|
||||||
notification_settings = settings.model_dump()
|
return {"status": "updated", "notifications": payload}
|
||||||
return {"status": "updated", "notifications": notification_settings}
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/security")
|
@router.get("/security")
|
||||||
async def get_security_settings(current_user: User = Depends(get_current_user)):
|
async def get_security_settings(
|
||||||
return {"security": security_settings}
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
return {"security": await get_setting_payload(db, "security")}
|
||||||
|
|
||||||
|
|
||||||
@router.put("/security")
|
@router.put("/security")
|
||||||
async def update_security_settings(
|
async def update_security_settings(
|
||||||
settings: SecuritySettingsUpdate,
|
settings: SecuritySettingsUpdate,
|
||||||
current_user: User = Depends(get_current_user),
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
):
|
):
|
||||||
global security_settings
|
payload = await save_setting_payload(db, "security", settings.model_dump())
|
||||||
security_settings = settings.model_dump()
|
return {"status": "updated", "security": payload}
|
||||||
return {"status": "updated", "security": security_settings}
|
|
||||||
|
|
||||||
|
@router.get("/collectors")
|
||||||
|
async def get_collector_settings(
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
result = await db.execute(select(DataSource).order_by(DataSource.module, DataSource.id))
|
||||||
|
datasources = result.scalars().all()
|
||||||
|
return {"collectors": [serialize_collector(datasource) for datasource in datasources]}
|
||||||
|
|
||||||
|
|
||||||
|
@router.put("/collectors/{datasource_id}")
|
||||||
|
async def update_collector_settings(
|
||||||
|
datasource_id: int,
|
||||||
|
settings: CollectorSettingsUpdate,
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
datasource = await db.get(DataSource, datasource_id)
|
||||||
|
if not datasource:
|
||||||
|
raise HTTPException(status_code=404, detail="Data source not found")
|
||||||
|
|
||||||
|
datasource.is_active = settings.is_active
|
||||||
|
datasource.priority = settings.priority
|
||||||
|
datasource.frequency_minutes = settings.frequency_minutes
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(datasource)
|
||||||
|
await sync_datasource_job(datasource.id)
|
||||||
|
return {"status": "updated", "collector": serialize_collector(datasource)}
|
||||||
|
|
||||||
|
|
||||||
@router.get("")
|
@router.get("")
|
||||||
async def get_all_settings(current_user: User = Depends(get_current_user)):
|
async def get_all_settings(
|
||||||
|
current_user: User = Depends(get_current_user),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
result = await db.execute(select(DataSource).order_by(DataSource.module, DataSource.id))
|
||||||
|
datasources = result.scalars().all()
|
||||||
return {
|
return {
|
||||||
"system": system_settings,
|
"system": await get_setting_payload(db, "system"),
|
||||||
"notifications": notification_settings,
|
"notifications": await get_setting_payload(db, "notifications"),
|
||||||
"security": security_settings,
|
"security": await get_setting_payload(db, "security"),
|
||||||
|
"collectors": [serialize_collector(datasource) for datasource in datasources],
|
||||||
|
"generated_at": to_iso8601_utc(datetime.now(UTC)),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status
|
from fastapi import APIRouter, Depends, HTTPException, status
|
||||||
@@ -8,8 +8,10 @@ from sqlalchemy import text
|
|||||||
from app.db.session import get_db
|
from app.db.session import get_db
|
||||||
from app.models.user import User
|
from app.models.user import User
|
||||||
from app.core.security import get_current_user
|
from app.core.security import get_current_user
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
from app.services.collectors.registry import collector_registry
|
from app.services.collectors.registry import collector_registry
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
@@ -60,8 +62,8 @@ async def list_tasks(
|
|||||||
"datasource_id": t[1],
|
"datasource_id": t[1],
|
||||||
"datasource_name": t[2],
|
"datasource_name": t[2],
|
||||||
"status": t[3],
|
"status": t[3],
|
||||||
"started_at": t[4].isoformat() if t[4] else None,
|
"started_at": to_iso8601_utc(t[4]),
|
||||||
"completed_at": t[5].isoformat() if t[5] else None,
|
"completed_at": to_iso8601_utc(t[5]),
|
||||||
"records_processed": t[6],
|
"records_processed": t[6],
|
||||||
"error_message": t[7],
|
"error_message": t[7],
|
||||||
}
|
}
|
||||||
@@ -99,8 +101,8 @@ async def get_task(
|
|||||||
"datasource_id": task[1],
|
"datasource_id": task[1],
|
||||||
"datasource_name": task[2],
|
"datasource_name": task[2],
|
||||||
"status": task[3],
|
"status": task[3],
|
||||||
"started_at": task[4].isoformat() if task[4] else None,
|
"started_at": to_iso8601_utc(task[4]),
|
||||||
"completed_at": task[5].isoformat() if task[5] else None,
|
"completed_at": to_iso8601_utc(task[5]),
|
||||||
"records_processed": task[6],
|
"records_processed": task[6],
|
||||||
"error_message": task[7],
|
"error_message": task[7],
|
||||||
}
|
}
|
||||||
@@ -146,8 +148,8 @@ async def trigger_collection(
|
|||||||
"status": result.get("status", "unknown"),
|
"status": result.get("status", "unknown"),
|
||||||
"records_processed": result.get("records_processed", 0),
|
"records_processed": result.get("records_processed", 0),
|
||||||
"error_message": result.get("error"),
|
"error_message": result.get("error"),
|
||||||
"started_at": datetime.utcnow(),
|
"started_at": datetime.now(UTC),
|
||||||
"completed_at": datetime.utcnow(),
|
"completed_at": datetime.now(UTC),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,17 +1,30 @@
|
|||||||
"""Visualization API - GeoJSON endpoints for 3D Earth display"""
|
"""Visualization API - GeoJSON endpoints for 3D Earth display
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException, Depends
|
Unified API for all visualization data sources.
|
||||||
|
Returns GeoJSON format compatible with Three.js, CesiumJS, and Unreal Cesium.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from fastapi import APIRouter, HTTPException, Depends, Query
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select, func
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
|
from app.core.collected_data_fields import get_record_field
|
||||||
|
from app.core.satellite_tle import build_tle_lines_from_elements
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
from app.db.session import get_db
|
from app.db.session import get_db
|
||||||
|
from app.models.bgp_anomaly import BGPAnomaly
|
||||||
from app.models.collected_data import CollectedData
|
from app.models.collected_data import CollectedData
|
||||||
from app.services.cable_graph import build_graph_from_data, CableGraph
|
from app.services.cable_graph import build_graph_from_data, CableGraph
|
||||||
|
from app.services.collectors.bgp_common import RIPE_RIS_COLLECTOR_COORDS
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
# ============== Converter Functions ==============
|
||||||
|
|
||||||
|
|
||||||
def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
||||||
"""Convert cable records to GeoJSON FeatureCollection"""
|
"""Convert cable records to GeoJSON FeatureCollection"""
|
||||||
features = []
|
features = []
|
||||||
@@ -66,6 +79,7 @@ def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
|||||||
"geometry": {"type": "MultiLineString", "coordinates": all_lines},
|
"geometry": {"type": "MultiLineString", "coordinates": all_lines},
|
||||||
"properties": {
|
"properties": {
|
||||||
"id": record.id,
|
"id": record.id,
|
||||||
|
"cable_id": record.name,
|
||||||
"source_id": record.source_id,
|
"source_id": record.source_id,
|
||||||
"Name": record.name,
|
"Name": record.name,
|
||||||
"name": record.name,
|
"name": record.name,
|
||||||
@@ -74,9 +88,9 @@ def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
|||||||
"rfs": metadata.get("rfs"),
|
"rfs": metadata.get("rfs"),
|
||||||
"RFS": metadata.get("rfs"),
|
"RFS": metadata.get("rfs"),
|
||||||
"status": metadata.get("status", "active"),
|
"status": metadata.get("status", "active"),
|
||||||
"length": record.value,
|
"length": get_record_field(record, "value"),
|
||||||
"length_km": record.value,
|
"length_km": get_record_field(record, "value"),
|
||||||
"SHAPE__Length": record.value,
|
"SHAPE__Length": get_record_field(record, "value"),
|
||||||
"url": metadata.get("url"),
|
"url": metadata.get("url"),
|
||||||
"color": metadata.get("color"),
|
"color": metadata.get("color"),
|
||||||
"year": metadata.get("year"),
|
"year": metadata.get("year"),
|
||||||
@@ -87,14 +101,15 @@ def convert_cable_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
|||||||
return {"type": "FeatureCollection", "features": features}
|
return {"type": "FeatureCollection", "features": features}
|
||||||
|
|
||||||
|
|
||||||
def convert_landing_point_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
def convert_landing_point_to_geojson(records: List[CollectedData], city_to_cable_ids_map: Dict[int, List[int]] = None, cable_id_to_name_map: Dict[int, str] = None) -> Dict[str, Any]:
|
||||||
"""Convert landing point records to GeoJSON FeatureCollection"""
|
|
||||||
features = []
|
features = []
|
||||||
|
|
||||||
for record in records:
|
for record in records:
|
||||||
try:
|
try:
|
||||||
lat = float(record.latitude) if record.latitude else None
|
latitude = get_record_field(record, "latitude")
|
||||||
lon = float(record.longitude) if record.longitude else None
|
longitude = get_record_field(record, "longitude")
|
||||||
|
lat = float(latitude) if latitude else None
|
||||||
|
lon = float(longitude) if longitude else None
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -102,18 +117,84 @@ def convert_landing_point_to_geojson(records: List[CollectedData]) -> Dict[str,
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
metadata = record.extra_data or {}
|
metadata = record.extra_data or {}
|
||||||
|
city_id = metadata.get("city_id")
|
||||||
|
|
||||||
|
props = {
|
||||||
|
"id": record.id,
|
||||||
|
"source_id": record.source_id,
|
||||||
|
"name": record.name,
|
||||||
|
"country": get_record_field(record, "country"),
|
||||||
|
"city": get_record_field(record, "city"),
|
||||||
|
"is_tbd": metadata.get("is_tbd", False),
|
||||||
|
}
|
||||||
|
|
||||||
|
cable_names = []
|
||||||
|
if city_to_cable_ids_map and city_id in city_to_cable_ids_map:
|
||||||
|
for cable_id in city_to_cable_ids_map[city_id]:
|
||||||
|
if cable_id_to_name_map and cable_id in cable_id_to_name_map:
|
||||||
|
cable_names.append(cable_id_to_name_map[cable_id])
|
||||||
|
|
||||||
|
if cable_names:
|
||||||
|
props["cable_names"] = cable_names
|
||||||
|
|
||||||
features.append(
|
features.append(
|
||||||
{
|
{
|
||||||
"type": "Feature",
|
"type": "Feature",
|
||||||
"geometry": {"type": "Point", "coordinates": [lon, lat]},
|
"geometry": {"type": "Point", "coordinates": [lon, lat]},
|
||||||
|
"properties": props,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"type": "FeatureCollection", "features": features}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_satellite_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
||||||
|
"""Convert satellite TLE records to GeoJSON"""
|
||||||
|
features = []
|
||||||
|
|
||||||
|
for record in records:
|
||||||
|
metadata = record.extra_data or {}
|
||||||
|
norad_id = metadata.get("norad_cat_id")
|
||||||
|
|
||||||
|
if not norad_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tle_line1 = metadata.get("tle_line1")
|
||||||
|
tle_line2 = metadata.get("tle_line2")
|
||||||
|
if not tle_line1 or not tle_line2:
|
||||||
|
tle_line1, tle_line2 = build_tle_lines_from_elements(
|
||||||
|
norad_cat_id=norad_id,
|
||||||
|
epoch=metadata.get("epoch"),
|
||||||
|
inclination=metadata.get("inclination"),
|
||||||
|
raan=metadata.get("raan"),
|
||||||
|
eccentricity=metadata.get("eccentricity"),
|
||||||
|
arg_of_perigee=metadata.get("arg_of_perigee"),
|
||||||
|
mean_anomaly=metadata.get("mean_anomaly"),
|
||||||
|
mean_motion=metadata.get("mean_motion"),
|
||||||
|
)
|
||||||
|
|
||||||
|
features.append(
|
||||||
|
{
|
||||||
|
"type": "Feature",
|
||||||
|
"id": norad_id,
|
||||||
|
"geometry": {"type": "Point", "coordinates": [0, 0, 0]},
|
||||||
"properties": {
|
"properties": {
|
||||||
"id": record.id,
|
"id": record.id,
|
||||||
"source_id": record.source_id,
|
"norad_cat_id": norad_id,
|
||||||
"name": record.name,
|
"name": record.name,
|
||||||
"country": record.country,
|
"international_designator": metadata.get("international_designator"),
|
||||||
"city": record.city,
|
"epoch": metadata.get("epoch"),
|
||||||
"is_tbd": metadata.get("is_tbd", False),
|
"inclination": metadata.get("inclination"),
|
||||||
|
"raan": metadata.get("raan"),
|
||||||
|
"eccentricity": metadata.get("eccentricity"),
|
||||||
|
"arg_of_perigee": metadata.get("arg_of_perigee"),
|
||||||
|
"mean_anomaly": metadata.get("mean_anomaly"),
|
||||||
|
"mean_motion": metadata.get("mean_motion"),
|
||||||
|
"bstar": metadata.get("bstar"),
|
||||||
|
"classification_type": metadata.get("classification_type"),
|
||||||
|
"tle_line1": tle_line1,
|
||||||
|
"tle_line2": tle_line2,
|
||||||
|
"data_type": "satellite_tle",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -121,6 +202,158 @@ def convert_landing_point_to_geojson(records: List[CollectedData]) -> Dict[str,
|
|||||||
return {"type": "FeatureCollection", "features": features}
|
return {"type": "FeatureCollection", "features": features}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_supercomputer_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
||||||
|
"""Convert TOP500 supercomputer records to GeoJSON"""
|
||||||
|
features = []
|
||||||
|
|
||||||
|
for record in records:
|
||||||
|
try:
|
||||||
|
latitude = get_record_field(record, "latitude")
|
||||||
|
longitude = get_record_field(record, "longitude")
|
||||||
|
lat = float(latitude) if latitude and latitude != "0.0" else None
|
||||||
|
lon = (
|
||||||
|
float(longitude) if longitude and longitude != "0.0" else None
|
||||||
|
)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
lat, lon = None, None
|
||||||
|
|
||||||
|
metadata = record.extra_data or {}
|
||||||
|
|
||||||
|
features.append(
|
||||||
|
{
|
||||||
|
"type": "Feature",
|
||||||
|
"id": record.id,
|
||||||
|
"geometry": {"type": "Point", "coordinates": [lon or 0, lat or 0]},
|
||||||
|
"properties": {
|
||||||
|
"id": record.id,
|
||||||
|
"name": record.name,
|
||||||
|
"rank": metadata.get("rank"),
|
||||||
|
"r_max": get_record_field(record, "rmax"),
|
||||||
|
"r_peak": get_record_field(record, "rpeak"),
|
||||||
|
"cores": get_record_field(record, "cores"),
|
||||||
|
"power": get_record_field(record, "power"),
|
||||||
|
"country": get_record_field(record, "country"),
|
||||||
|
"city": get_record_field(record, "city"),
|
||||||
|
"data_type": "supercomputer",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"type": "FeatureCollection", "features": features}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_gpu_cluster_to_geojson(records: List[CollectedData]) -> Dict[str, Any]:
|
||||||
|
"""Convert GPU cluster records to GeoJSON"""
|
||||||
|
features = []
|
||||||
|
|
||||||
|
for record in records:
|
||||||
|
try:
|
||||||
|
latitude = get_record_field(record, "latitude")
|
||||||
|
longitude = get_record_field(record, "longitude")
|
||||||
|
lat = float(latitude) if latitude else None
|
||||||
|
lon = float(longitude) if longitude else None
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
lat, lon = None, None
|
||||||
|
|
||||||
|
metadata = record.extra_data or {}
|
||||||
|
|
||||||
|
features.append(
|
||||||
|
{
|
||||||
|
"type": "Feature",
|
||||||
|
"id": record.id,
|
||||||
|
"geometry": {"type": "Point", "coordinates": [lon or 0, lat or 0]},
|
||||||
|
"properties": {
|
||||||
|
"id": record.id,
|
||||||
|
"name": record.name,
|
||||||
|
"country": get_record_field(record, "country"),
|
||||||
|
"city": get_record_field(record, "city"),
|
||||||
|
"metadata": metadata,
|
||||||
|
"data_type": "gpu_cluster",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"type": "FeatureCollection", "features": features}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_bgp_anomalies_to_geojson(records: List[BGPAnomaly]) -> Dict[str, Any]:
|
||||||
|
features = []
|
||||||
|
|
||||||
|
for record in records:
|
||||||
|
evidence = record.evidence or {}
|
||||||
|
collectors = evidence.get("collectors") or record.peer_scope or []
|
||||||
|
collector = collectors[0] if collectors else None
|
||||||
|
location = None
|
||||||
|
if collector:
|
||||||
|
location = RIPE_RIS_COLLECTOR_COORDS.get(str(collector))
|
||||||
|
|
||||||
|
if location is None:
|
||||||
|
nested = evidence.get("events") or []
|
||||||
|
for item in nested:
|
||||||
|
collector_name = (item or {}).get("collector")
|
||||||
|
if collector_name and collector_name in RIPE_RIS_COLLECTOR_COORDS:
|
||||||
|
location = RIPE_RIS_COLLECTOR_COORDS[collector_name]
|
||||||
|
collector = collector_name
|
||||||
|
break
|
||||||
|
|
||||||
|
if location is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
features.append(
|
||||||
|
{
|
||||||
|
"type": "Feature",
|
||||||
|
"geometry": {
|
||||||
|
"type": "Point",
|
||||||
|
"coordinates": [location["longitude"], location["latitude"]],
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"id": record.id,
|
||||||
|
"collector": collector,
|
||||||
|
"city": location.get("city"),
|
||||||
|
"country": location.get("country"),
|
||||||
|
"source": record.source,
|
||||||
|
"anomaly_type": record.anomaly_type,
|
||||||
|
"severity": record.severity,
|
||||||
|
"status": record.status,
|
||||||
|
"prefix": record.prefix,
|
||||||
|
"origin_asn": record.origin_asn,
|
||||||
|
"new_origin_asn": record.new_origin_asn,
|
||||||
|
"confidence": record.confidence,
|
||||||
|
"summary": record.summary,
|
||||||
|
"created_at": to_iso8601_utc(record.created_at),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"type": "FeatureCollection", "features": features}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_bgp_collectors_to_geojson() -> Dict[str, Any]:
|
||||||
|
features = []
|
||||||
|
|
||||||
|
for collector, location in sorted(RIPE_RIS_COLLECTOR_COORDS.items()):
|
||||||
|
features.append(
|
||||||
|
{
|
||||||
|
"type": "Feature",
|
||||||
|
"geometry": {
|
||||||
|
"type": "Point",
|
||||||
|
"coordinates": [location["longitude"], location["latitude"]],
|
||||||
|
},
|
||||||
|
"properties": {
|
||||||
|
"collector": collector,
|
||||||
|
"city": location.get("city"),
|
||||||
|
"country": location.get("country"),
|
||||||
|
"status": "online",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {"type": "FeatureCollection", "features": features}
|
||||||
|
|
||||||
|
|
||||||
|
# ============== API Endpoints ==============
|
||||||
|
|
||||||
|
|
||||||
@router.get("/geo/cables")
|
@router.get("/geo/cables")
|
||||||
async def get_cables_geojson(db: AsyncSession = Depends(get_db)):
|
async def get_cables_geojson(db: AsyncSession = Depends(get_db)):
|
||||||
"""获取海底电缆 GeoJSON 数据 (LineString)"""
|
"""获取海底电缆 GeoJSON 数据 (LineString)"""
|
||||||
@@ -144,19 +377,45 @@ async def get_cables_geojson(db: AsyncSession = Depends(get_db)):
|
|||||||
|
|
||||||
@router.get("/geo/landing-points")
|
@router.get("/geo/landing-points")
|
||||||
async def get_landing_points_geojson(db: AsyncSession = Depends(get_db)):
|
async def get_landing_points_geojson(db: AsyncSession = Depends(get_db)):
|
||||||
"""获取登陆点 GeoJSON 数据 (Point)"""
|
|
||||||
try:
|
try:
|
||||||
stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points")
|
landing_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
|
||||||
result = await db.execute(stmt)
|
landing_result = await db.execute(landing_stmt)
|
||||||
records = result.scalars().all()
|
records = landing_result.scalars().all()
|
||||||
|
|
||||||
|
relation_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cable_landing_relation")
|
||||||
|
relation_result = await db.execute(relation_stmt)
|
||||||
|
relation_records = relation_result.scalars().all()
|
||||||
|
|
||||||
|
cable_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cables")
|
||||||
|
cable_result = await db.execute(cable_stmt)
|
||||||
|
cable_records = cable_result.scalars().all()
|
||||||
|
|
||||||
|
city_to_cable_ids_map = {}
|
||||||
|
for rel in relation_records:
|
||||||
|
if rel.extra_data:
|
||||||
|
city_id = rel.extra_data.get("city_id")
|
||||||
|
cable_id = rel.extra_data.get("cable_id")
|
||||||
|
if city_id is not None and cable_id is not None:
|
||||||
|
if city_id not in city_to_cable_ids_map:
|
||||||
|
city_to_cable_ids_map[city_id] = []
|
||||||
|
if cable_id not in city_to_cable_ids_map[city_id]:
|
||||||
|
city_to_cable_ids_map[city_id].append(cable_id)
|
||||||
|
|
||||||
|
cable_id_to_name_map = {}
|
||||||
|
for cable in cable_records:
|
||||||
|
if cable.extra_data:
|
||||||
|
cable_id = cable.extra_data.get("cable_id")
|
||||||
|
cable_name = cable.name
|
||||||
|
if cable_id and cable_name:
|
||||||
|
cable_id_to_name_map[cable_id] = cable_name
|
||||||
|
|
||||||
if not records:
|
if not records:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=404,
|
status_code=404,
|
||||||
detail="No landing point data found. Please run the fao_landing_points collector first.",
|
detail="No landing point data found. Please run the arcgis_landing_points collector first.",
|
||||||
)
|
)
|
||||||
|
|
||||||
return convert_landing_point_to_geojson(records)
|
return convert_landing_point_to_geojson(records, city_to_cable_ids_map, cable_id_to_name_map)
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -165,22 +424,44 @@ async def get_landing_points_geojson(db: AsyncSession = Depends(get_db)):
|
|||||||
|
|
||||||
@router.get("/geo/all")
|
@router.get("/geo/all")
|
||||||
async def get_all_geojson(db: AsyncSession = Depends(get_db)):
|
async def get_all_geojson(db: AsyncSession = Depends(get_db)):
|
||||||
"""获取所有可视化数据 (电缆 + 登陆点)"""
|
|
||||||
cables_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cables")
|
cables_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cables")
|
||||||
cables_result = await db.execute(cables_stmt)
|
cables_result = await db.execute(cables_stmt)
|
||||||
cables_records = cables_result.scalars().all()
|
cables_records = cables_result.scalars().all()
|
||||||
|
|
||||||
points_stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points")
|
points_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
|
||||||
points_result = await db.execute(points_stmt)
|
points_result = await db.execute(points_stmt)
|
||||||
points_records = points_result.scalars().all()
|
points_records = points_result.scalars().all()
|
||||||
|
|
||||||
|
relation_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cable_landing_relation")
|
||||||
|
relation_result = await db.execute(relation_stmt)
|
||||||
|
relation_records = relation_result.scalars().all()
|
||||||
|
|
||||||
|
city_to_cable_ids_map = {}
|
||||||
|
for rel in relation_records:
|
||||||
|
if rel.extra_data:
|
||||||
|
city_id = rel.extra_data.get("city_id")
|
||||||
|
cable_id = rel.extra_data.get("cable_id")
|
||||||
|
if city_id is not None and cable_id is not None:
|
||||||
|
if city_id not in city_to_cable_ids_map:
|
||||||
|
city_to_cable_ids_map[city_id] = []
|
||||||
|
if cable_id not in city_to_cable_ids_map[city_id]:
|
||||||
|
city_to_cable_ids_map[city_id].append(cable_id)
|
||||||
|
|
||||||
|
cable_id_to_name_map = {}
|
||||||
|
for cable in cables_records:
|
||||||
|
if cable.extra_data:
|
||||||
|
cable_id = cable.extra_data.get("cable_id")
|
||||||
|
cable_name = cable.name
|
||||||
|
if cable_id and cable_name:
|
||||||
|
cable_id_to_name_map[cable_id] = cable_name
|
||||||
|
|
||||||
cables = (
|
cables = (
|
||||||
convert_cable_to_geojson(cables_records)
|
convert_cable_to_geojson(cables_records)
|
||||||
if cables_records
|
if cables_records
|
||||||
else {"type": "FeatureCollection", "features": []}
|
else {"type": "FeatureCollection", "features": []}
|
||||||
)
|
)
|
||||||
points = (
|
points = (
|
||||||
convert_landing_point_to_geojson(points_records)
|
convert_landing_point_to_geojson(points_records, city_to_cable_ids_map, cable_id_to_name_map)
|
||||||
if points_records
|
if points_records
|
||||||
else {"type": "FeatureCollection", "features": []}
|
else {"type": "FeatureCollection", "features": []}
|
||||||
)
|
)
|
||||||
@@ -195,6 +476,208 @@ async def get_all_geojson(db: AsyncSession = Depends(get_db)):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/geo/satellites")
|
||||||
|
async def get_satellites_geojson(
|
||||||
|
limit: Optional[int] = Query(
|
||||||
|
None,
|
||||||
|
ge=1,
|
||||||
|
description="Maximum number of satellites to return. Omit for no limit.",
|
||||||
|
),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""获取卫星 TLE GeoJSON 数据"""
|
||||||
|
stmt = (
|
||||||
|
select(CollectedData)
|
||||||
|
.where(CollectedData.source == "celestrak_tle")
|
||||||
|
.where(CollectedData.name != "Unknown")
|
||||||
|
.order_by(CollectedData.id.desc())
|
||||||
|
)
|
||||||
|
if limit is not None:
|
||||||
|
stmt = stmt.limit(limit)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
records = result.scalars().all()
|
||||||
|
|
||||||
|
if not records:
|
||||||
|
return {"type": "FeatureCollection", "features": [], "count": 0}
|
||||||
|
|
||||||
|
geojson = convert_satellite_to_geojson(list(records))
|
||||||
|
return {
|
||||||
|
**geojson,
|
||||||
|
"count": len(geojson.get("features", [])),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/geo/supercomputers")
|
||||||
|
async def get_supercomputers_geojson(
|
||||||
|
limit: int = 500,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""获取 TOP500 超算中心 GeoJSON 数据"""
|
||||||
|
stmt = (
|
||||||
|
select(CollectedData)
|
||||||
|
.where(CollectedData.source == "top500")
|
||||||
|
.where(CollectedData.name != "Unknown")
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
records = result.scalars().all()
|
||||||
|
|
||||||
|
if not records:
|
||||||
|
return {"type": "FeatureCollection", "features": [], "count": 0}
|
||||||
|
|
||||||
|
geojson = convert_supercomputer_to_geojson(list(records))
|
||||||
|
return {
|
||||||
|
**geojson,
|
||||||
|
"count": len(geojson.get("features", [])),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/geo/gpu-clusters")
|
||||||
|
async def get_gpu_clusters_geojson(
|
||||||
|
limit: int = 100,
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""获取 GPU 集群 GeoJSON 数据"""
|
||||||
|
stmt = (
|
||||||
|
select(CollectedData)
|
||||||
|
.where(CollectedData.source == "epoch_ai_gpu")
|
||||||
|
.where(CollectedData.name != "Unknown")
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
records = result.scalars().all()
|
||||||
|
|
||||||
|
if not records:
|
||||||
|
return {"type": "FeatureCollection", "features": [], "count": 0}
|
||||||
|
|
||||||
|
geojson = convert_gpu_cluster_to_geojson(list(records))
|
||||||
|
return {
|
||||||
|
**geojson,
|
||||||
|
"count": len(geojson.get("features", [])),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/geo/bgp-anomalies")
|
||||||
|
async def get_bgp_anomalies_geojson(
|
||||||
|
severity: Optional[str] = Query(None),
|
||||||
|
status: Optional[str] = Query("active"),
|
||||||
|
limit: int = Query(200, ge=1, le=1000),
|
||||||
|
db: AsyncSession = Depends(get_db),
|
||||||
|
):
|
||||||
|
stmt = select(BGPAnomaly).order_by(BGPAnomaly.created_at.desc()).limit(limit)
|
||||||
|
if severity:
|
||||||
|
stmt = stmt.where(BGPAnomaly.severity == severity)
|
||||||
|
if status:
|
||||||
|
stmt = stmt.where(BGPAnomaly.status == status)
|
||||||
|
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
records = list(result.scalars().all())
|
||||||
|
geojson = convert_bgp_anomalies_to_geojson(records)
|
||||||
|
return {**geojson, "count": len(geojson.get("features", []))}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/geo/bgp-collectors")
|
||||||
|
async def get_bgp_collectors_geojson():
|
||||||
|
geojson = convert_bgp_collectors_to_geojson()
|
||||||
|
return {**geojson, "count": len(geojson.get("features", []))}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/all")
|
||||||
|
async def get_all_visualization_data(db: AsyncSession = Depends(get_db)):
|
||||||
|
"""获取所有可视化数据的统一端点
|
||||||
|
|
||||||
|
Returns GeoJSON FeatureCollections for all data types:
|
||||||
|
- satellites: 卫星 TLE 数据
|
||||||
|
- cables: 海底电缆
|
||||||
|
- landing_points: 登陆点
|
||||||
|
- supercomputers: TOP500 超算
|
||||||
|
- gpu_clusters: GPU 集群
|
||||||
|
"""
|
||||||
|
cables_stmt = select(CollectedData).where(CollectedData.source == "arcgis_cables")
|
||||||
|
cables_result = await db.execute(cables_stmt)
|
||||||
|
cables_records = list(cables_result.scalars().all())
|
||||||
|
|
||||||
|
points_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
|
||||||
|
points_result = await db.execute(points_stmt)
|
||||||
|
points_records = list(points_result.scalars().all())
|
||||||
|
|
||||||
|
satellites_stmt = (
|
||||||
|
select(CollectedData)
|
||||||
|
.where(CollectedData.source == "celestrak_tle")
|
||||||
|
.where(CollectedData.name != "Unknown")
|
||||||
|
)
|
||||||
|
satellites_result = await db.execute(satellites_stmt)
|
||||||
|
satellites_records = list(satellites_result.scalars().all())
|
||||||
|
|
||||||
|
supercomputers_stmt = (
|
||||||
|
select(CollectedData)
|
||||||
|
.where(CollectedData.source == "top500")
|
||||||
|
.where(CollectedData.name != "Unknown")
|
||||||
|
)
|
||||||
|
supercomputers_result = await db.execute(supercomputers_stmt)
|
||||||
|
supercomputers_records = list(supercomputers_result.scalars().all())
|
||||||
|
|
||||||
|
gpu_stmt = (
|
||||||
|
select(CollectedData)
|
||||||
|
.where(CollectedData.source == "epoch_ai_gpu")
|
||||||
|
.where(CollectedData.name != "Unknown")
|
||||||
|
)
|
||||||
|
gpu_result = await db.execute(gpu_stmt)
|
||||||
|
gpu_records = list(gpu_result.scalars().all())
|
||||||
|
|
||||||
|
cables = (
|
||||||
|
convert_cable_to_geojson(cables_records)
|
||||||
|
if cables_records
|
||||||
|
else {"type": "FeatureCollection", "features": []}
|
||||||
|
)
|
||||||
|
landing_points = (
|
||||||
|
convert_landing_point_to_geojson(points_records)
|
||||||
|
if points_records
|
||||||
|
else {"type": "FeatureCollection", "features": []}
|
||||||
|
)
|
||||||
|
satellites = (
|
||||||
|
convert_satellite_to_geojson(satellites_records)
|
||||||
|
if satellites_records
|
||||||
|
else {"type": "FeatureCollection", "features": []}
|
||||||
|
)
|
||||||
|
supercomputers = (
|
||||||
|
convert_supercomputer_to_geojson(supercomputers_records)
|
||||||
|
if supercomputers_records
|
||||||
|
else {"type": "FeatureCollection", "features": []}
|
||||||
|
)
|
||||||
|
gpu_clusters = (
|
||||||
|
convert_gpu_cluster_to_geojson(gpu_records)
|
||||||
|
if gpu_records
|
||||||
|
else {"type": "FeatureCollection", "features": []}
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"generated_at": to_iso8601_utc(datetime.now(UTC)),
|
||||||
|
"version": "1.0",
|
||||||
|
"data": {
|
||||||
|
"satellites": satellites,
|
||||||
|
"cables": cables,
|
||||||
|
"landing_points": landing_points,
|
||||||
|
"supercomputers": supercomputers,
|
||||||
|
"gpu_clusters": gpu_clusters,
|
||||||
|
},
|
||||||
|
"stats": {
|
||||||
|
"total_features": (
|
||||||
|
len(satellites.get("features", []))
|
||||||
|
+ len(cables.get("features", []))
|
||||||
|
+ len(landing_points.get("features", []))
|
||||||
|
+ len(supercomputers.get("features", []))
|
||||||
|
+ len(gpu_clusters.get("features", []))
|
||||||
|
),
|
||||||
|
"satellites": len(satellites.get("features", [])),
|
||||||
|
"cables": len(cables.get("features", [])),
|
||||||
|
"landing_points": len(landing_points.get("features", [])),
|
||||||
|
"supercomputers": len(supercomputers.get("features", [])),
|
||||||
|
"gpu_clusters": len(gpu_clusters.get("features", [])),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Cache for cable graph
|
# Cache for cable graph
|
||||||
_cable_graph: Optional[CableGraph] = None
|
_cable_graph: Optional[CableGraph] = None
|
||||||
|
|
||||||
@@ -208,7 +691,7 @@ async def get_cable_graph(db: AsyncSession) -> CableGraph:
|
|||||||
cables_result = await db.execute(cables_stmt)
|
cables_result = await db.execute(cables_stmt)
|
||||||
cables_records = list(cables_result.scalars().all())
|
cables_records = list(cables_result.scalars().all())
|
||||||
|
|
||||||
points_stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points")
|
points_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
|
||||||
points_result = await db.execute(points_stmt)
|
points_result = await db.execute(points_stmt)
|
||||||
points_records = list(points_result.scalars().all())
|
points_records = list(points_result.scalars().all())
|
||||||
|
|
||||||
|
|||||||
@@ -3,13 +3,14 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Query
|
from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Query
|
||||||
from jose import jwt, JWTError
|
from jose import jwt, JWTError
|
||||||
|
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
from app.core.websocket.manager import manager
|
from app.core.websocket.manager import manager
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -59,6 +60,7 @@ async def websocket_endpoint(
|
|||||||
"ixp_nodes",
|
"ixp_nodes",
|
||||||
"alerts",
|
"alerts",
|
||||||
"dashboard",
|
"dashboard",
|
||||||
|
"datasource_tasks",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -72,7 +74,7 @@ async def websocket_endpoint(
|
|||||||
await websocket.send_json(
|
await websocket.send_json(
|
||||||
{
|
{
|
||||||
"type": "heartbeat",
|
"type": "heartbeat",
|
||||||
"data": {"action": "pong", "timestamp": datetime.utcnow().isoformat()},
|
"data": {"action": "pong", "timestamp": to_iso8601_utc(datetime.now(UTC))},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
elif data.get("type") == "subscribe":
|
elif data.get("type") == "subscribe":
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
62
backend/app/core/collected_data_fields.py
Normal file
62
backend/app/core/collected_data_fields.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
|
||||||
|
FIELD_ALIASES = {
|
||||||
|
"country": ("country",),
|
||||||
|
"city": ("city",),
|
||||||
|
"latitude": ("latitude",),
|
||||||
|
"longitude": ("longitude",),
|
||||||
|
"value": ("value",),
|
||||||
|
"unit": ("unit",),
|
||||||
|
"cores": ("cores",),
|
||||||
|
"rmax": ("rmax", "r_max"),
|
||||||
|
"rpeak": ("rpeak", "r_peak"),
|
||||||
|
"power": ("power",),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_metadata_field(metadata: Optional[Dict[str, Any]], field: str, fallback: Any = None) -> Any:
|
||||||
|
if isinstance(metadata, dict):
|
||||||
|
for key in FIELD_ALIASES.get(field, (field,)):
|
||||||
|
value = metadata.get(key)
|
||||||
|
if value not in (None, ""):
|
||||||
|
return value
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
|
||||||
|
def build_dynamic_metadata(
|
||||||
|
metadata: Optional[Dict[str, Any]],
|
||||||
|
*,
|
||||||
|
country: Any = None,
|
||||||
|
city: Any = None,
|
||||||
|
latitude: Any = None,
|
||||||
|
longitude: Any = None,
|
||||||
|
value: Any = None,
|
||||||
|
unit: Any = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
merged = dict(metadata) if isinstance(metadata, dict) else {}
|
||||||
|
|
||||||
|
fallbacks = {
|
||||||
|
"country": country,
|
||||||
|
"city": city,
|
||||||
|
"latitude": latitude,
|
||||||
|
"longitude": longitude,
|
||||||
|
"value": value,
|
||||||
|
"unit": unit,
|
||||||
|
}
|
||||||
|
|
||||||
|
for field, fallback in fallbacks.items():
|
||||||
|
if fallback not in (None, "") and get_metadata_field(merged, field) in (None, ""):
|
||||||
|
merged[field] = fallback
|
||||||
|
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def get_record_field(record: Any, field: str) -> Any:
|
||||||
|
metadata = getattr(record, "extra_data", None) or {}
|
||||||
|
fallback_attr = field
|
||||||
|
if field in {"cores", "rmax", "rpeak", "power"}:
|
||||||
|
fallback = None
|
||||||
|
else:
|
||||||
|
fallback = getattr(record, fallback_attr, None)
|
||||||
|
return get_metadata_field(metadata, field, fallback=fallback)
|
||||||
@@ -6,9 +6,16 @@ import os
|
|||||||
from pydantic_settings import BaseSettings
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
|
|
||||||
|
ROOT_DIR = Path(__file__).parent.parent.parent.parent
|
||||||
|
VERSION_FILE = ROOT_DIR / "VERSION"
|
||||||
|
|
||||||
|
|
||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
PROJECT_NAME: str = "Intelligent Planet Plan"
|
PROJECT_NAME: str = "Intelligent Planet Plan"
|
||||||
VERSION: str = "1.0.0"
|
VERSION: str = (
|
||||||
|
os.getenv("APP_VERSION")
|
||||||
|
or (VERSION_FILE.read_text(encoding="utf-8").strip() if VERSION_FILE.exists() else "0.19.0")
|
||||||
|
)
|
||||||
API_V1_STR: str = "/api/v1"
|
API_V1_STR: str = "/api/v1"
|
||||||
SECRET_KEY: str = "your-secret-key-change-in-production"
|
SECRET_KEY: str = "your-secret-key-change-in-production"
|
||||||
ALGORITHM: str = "HS256"
|
ALGORITHM: str = "HS256"
|
||||||
@@ -27,6 +34,9 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
CORS_ORIGINS: List[str] = ["http://localhost:3000", "http://localhost:8000"]
|
CORS_ORIGINS: List[str] = ["http://localhost:3000", "http://localhost:8000"]
|
||||||
|
|
||||||
|
SPACETRACK_USERNAME: str = ""
|
||||||
|
SPACETRACK_PASSWORD: str = ""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def REDIS_URL(self) -> str:
|
def REDIS_URL(self) -> str:
|
||||||
return os.getenv(
|
return os.getenv(
|
||||||
@@ -34,7 +44,7 @@ class Settings(BaseSettings):
|
|||||||
)
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
env_file = ".env"
|
env_file = Path(__file__).parent.parent.parent / ".env"
|
||||||
case_sensitive = True
|
case_sensitive = True
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
280
backend/app/core/countries.py
Normal file
280
backend/app/core/countries.py
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
import re
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
|
||||||
|
COUNTRY_ENTRIES = [
|
||||||
|
("阿富汗", ["Afghanistan", "AF", "AFG"]),
|
||||||
|
("阿尔巴尼亚", ["Albania", "AL", "ALB"]),
|
||||||
|
("阿尔及利亚", ["Algeria", "DZ", "DZA"]),
|
||||||
|
("安道尔", ["Andorra", "AD", "AND"]),
|
||||||
|
("安哥拉", ["Angola", "AO", "AGO"]),
|
||||||
|
("安提瓜和巴布达", ["Antigua and Barbuda", "AG", "ATG"]),
|
||||||
|
("阿根廷", ["Argentina", "AR", "ARG"]),
|
||||||
|
("亚美尼亚", ["Armenia", "AM", "ARM"]),
|
||||||
|
("澳大利亚", ["Australia", "AU", "AUS"]),
|
||||||
|
("奥地利", ["Austria", "AT", "AUT"]),
|
||||||
|
("阿塞拜疆", ["Azerbaijan", "AZ", "AZE"]),
|
||||||
|
("巴哈马", ["Bahamas", "BS", "BHS"]),
|
||||||
|
("巴林", ["Bahrain", "BH", "BHR"]),
|
||||||
|
("孟加拉国", ["Bangladesh", "BD", "BGD"]),
|
||||||
|
("巴巴多斯", ["Barbados", "BB", "BRB"]),
|
||||||
|
("白俄罗斯", ["Belarus", "BY", "BLR"]),
|
||||||
|
("比利时", ["Belgium", "BE", "BEL"]),
|
||||||
|
("伯利兹", ["Belize", "BZ", "BLZ"]),
|
||||||
|
("贝宁", ["Benin", "BJ", "BEN"]),
|
||||||
|
("不丹", ["Bhutan", "BT", "BTN"]),
|
||||||
|
("玻利维亚", ["Bolivia", "BO", "BOL", "Bolivia (Plurinational State of)"]),
|
||||||
|
("波斯尼亚和黑塞哥维那", ["Bosnia and Herzegovina", "BA", "BIH"]),
|
||||||
|
("博茨瓦纳", ["Botswana", "BW", "BWA"]),
|
||||||
|
("巴西", ["Brazil", "BR", "BRA"]),
|
||||||
|
("文莱", ["Brunei", "BN", "BRN", "Brunei Darussalam"]),
|
||||||
|
("保加利亚", ["Bulgaria", "BG", "BGR"]),
|
||||||
|
("布基纳法索", ["Burkina Faso", "BF", "BFA"]),
|
||||||
|
("布隆迪", ["Burundi", "BI", "BDI"]),
|
||||||
|
("柬埔寨", ["Cambodia", "KH", "KHM"]),
|
||||||
|
("喀麦隆", ["Cameroon", "CM", "CMR"]),
|
||||||
|
("加拿大", ["Canada", "CA", "CAN"]),
|
||||||
|
("佛得角", ["Cape Verde", "CV", "CPV", "Cabo Verde"]),
|
||||||
|
("中非", ["Central African Republic", "CF", "CAF"]),
|
||||||
|
("乍得", ["Chad", "TD", "TCD"]),
|
||||||
|
("智利", ["Chile", "CL", "CHL"]),
|
||||||
|
("中国", ["China", "CN", "CHN", "Mainland China", "PRC", "People's Republic of China"]),
|
||||||
|
("中国(香港)", ["Hong Kong", "HK", "HKG", "Hong Kong SAR", "China Hong Kong", "Hong Kong, China"]),
|
||||||
|
("中国(澳门)", ["Macao", "Macau", "MO", "MAC", "Macao SAR", "China Macao", "Macau, China"]),
|
||||||
|
("中国(台湾)", ["Taiwan", "TW", "TWN", "Chinese Taipei", "Taiwan, China"]),
|
||||||
|
("哥伦比亚", ["Colombia", "CO", "COL"]),
|
||||||
|
("科摩罗", ["Comoros", "KM", "COM"]),
|
||||||
|
("刚果(布)", ["Republic of the Congo", "Congo", "Congo-Brazzaville", "CG", "COG"]),
|
||||||
|
("刚果(金)", ["Democratic Republic of the Congo", "DR Congo", "Congo-Kinshasa", "CD", "COD"]),
|
||||||
|
("哥斯达黎加", ["Costa Rica", "CR", "CRI"]),
|
||||||
|
("科特迪瓦", ["Cote d'Ivoire", "Côte d'Ivoire", "Ivory Coast", "CI", "CIV"]),
|
||||||
|
("克罗地亚", ["Croatia", "HR", "HRV"]),
|
||||||
|
("古巴", ["Cuba", "CU", "CUB"]),
|
||||||
|
("塞浦路斯", ["Cyprus", "CY", "CYP"]),
|
||||||
|
("捷克", ["Czech Republic", "Czechia", "CZ", "CZE"]),
|
||||||
|
("丹麦", ["Denmark", "DK", "DNK"]),
|
||||||
|
("吉布提", ["Djibouti", "DJ", "DJI"]),
|
||||||
|
("多米尼克", ["Dominica", "DM", "DMA"]),
|
||||||
|
("多米尼加", ["Dominican Republic", "DO", "DOM"]),
|
||||||
|
("厄瓜多尔", ["Ecuador", "EC", "ECU"]),
|
||||||
|
("埃及", ["Egypt", "EG", "EGY"]),
|
||||||
|
("萨尔瓦多", ["El Salvador", "SV", "SLV"]),
|
||||||
|
("赤道几内亚", ["Equatorial Guinea", "GQ", "GNQ"]),
|
||||||
|
("厄立特里亚", ["Eritrea", "ER", "ERI"]),
|
||||||
|
("爱沙尼亚", ["Estonia", "EE", "EST"]),
|
||||||
|
("埃斯瓦蒂尼", ["Eswatini", "SZ", "SWZ", "Swaziland"]),
|
||||||
|
("埃塞俄比亚", ["Ethiopia", "ET", "ETH"]),
|
||||||
|
("斐济", ["Fiji", "FJ", "FJI"]),
|
||||||
|
("芬兰", ["Finland", "FI", "FIN"]),
|
||||||
|
("法国", ["France", "FR", "FRA"]),
|
||||||
|
("加蓬", ["Gabon", "GA", "GAB"]),
|
||||||
|
("冈比亚", ["Gambia", "GM", "GMB"]),
|
||||||
|
("格鲁吉亚", ["Georgia", "GE", "GEO"]),
|
||||||
|
("德国", ["Germany", "DE", "DEU"]),
|
||||||
|
("加纳", ["Ghana", "GH", "GHA"]),
|
||||||
|
("希腊", ["Greece", "GR", "GRC"]),
|
||||||
|
("格林纳达", ["Grenada", "GD", "GRD"]),
|
||||||
|
("危地马拉", ["Guatemala", "GT", "GTM"]),
|
||||||
|
("几内亚", ["Guinea", "GN", "GIN"]),
|
||||||
|
("几内亚比绍", ["Guinea-Bissau", "GW", "GNB"]),
|
||||||
|
("圭亚那", ["Guyana", "GY", "GUY"]),
|
||||||
|
("海地", ["Haiti", "HT", "HTI"]),
|
||||||
|
("洪都拉斯", ["Honduras", "HN", "HND"]),
|
||||||
|
("匈牙利", ["Hungary", "HU", "HUN"]),
|
||||||
|
("冰岛", ["Iceland", "IS", "ISL"]),
|
||||||
|
("印度", ["India", "IN", "IND"]),
|
||||||
|
("印度尼西亚", ["Indonesia", "ID", "IDN"]),
|
||||||
|
("伊朗", ["Iran", "IR", "IRN", "Iran (Islamic Republic of)"]),
|
||||||
|
("伊拉克", ["Iraq", "IQ", "IRQ"]),
|
||||||
|
("爱尔兰", ["Ireland", "IE", "IRL"]),
|
||||||
|
("以色列", ["Israel", "IL", "ISR"]),
|
||||||
|
("意大利", ["Italy", "IT", "ITA"]),
|
||||||
|
("牙买加", ["Jamaica", "JM", "JAM"]),
|
||||||
|
("日本", ["Japan", "JP", "JPN"]),
|
||||||
|
("约旦", ["Jordan", "JO", "JOR"]),
|
||||||
|
("哈萨克斯坦", ["Kazakhstan", "KZ", "KAZ"]),
|
||||||
|
("肯尼亚", ["Kenya", "KE", "KEN"]),
|
||||||
|
("基里巴斯", ["Kiribati", "KI", "KIR"]),
|
||||||
|
("朝鲜", ["North Korea", "Korea, DPRK", "Democratic People's Republic of Korea", "KP", "PRK"]),
|
||||||
|
("韩国", ["South Korea", "Republic of Korea", "Korea", "KR", "KOR"]),
|
||||||
|
("科威特", ["Kuwait", "KW", "KWT"]),
|
||||||
|
("吉尔吉斯斯坦", ["Kyrgyzstan", "KG", "KGZ"]),
|
||||||
|
("老挝", ["Laos", "Lao PDR", "Lao People's Democratic Republic", "LA", "LAO"]),
|
||||||
|
("拉脱维亚", ["Latvia", "LV", "LVA"]),
|
||||||
|
("黎巴嫩", ["Lebanon", "LB", "LBN"]),
|
||||||
|
("莱索托", ["Lesotho", "LS", "LSO"]),
|
||||||
|
("利比里亚", ["Liberia", "LR", "LBR"]),
|
||||||
|
("利比亚", ["Libya", "LY", "LBY"]),
|
||||||
|
("列支敦士登", ["Liechtenstein", "LI", "LIE"]),
|
||||||
|
("立陶宛", ["Lithuania", "LT", "LTU"]),
|
||||||
|
("卢森堡", ["Luxembourg", "LU", "LUX"]),
|
||||||
|
("马达加斯加", ["Madagascar", "MG", "MDG"]),
|
||||||
|
("马拉维", ["Malawi", "MW", "MWI"]),
|
||||||
|
("马来西亚", ["Malaysia", "MY", "MYS"]),
|
||||||
|
("马尔代夫", ["Maldives", "MV", "MDV"]),
|
||||||
|
("马里", ["Mali", "ML", "MLI"]),
|
||||||
|
("马耳他", ["Malta", "MT", "MLT"]),
|
||||||
|
("马绍尔群岛", ["Marshall Islands", "MH", "MHL"]),
|
||||||
|
("毛里塔尼亚", ["Mauritania", "MR", "MRT"]),
|
||||||
|
("毛里求斯", ["Mauritius", "MU", "MUS"]),
|
||||||
|
("墨西哥", ["Mexico", "MX", "MEX"]),
|
||||||
|
("密克罗尼西亚", ["Micronesia", "FM", "FSM", "Federated States of Micronesia"]),
|
||||||
|
("摩尔多瓦", ["Moldova", "MD", "MDA", "Republic of Moldova"]),
|
||||||
|
("摩纳哥", ["Monaco", "MC", "MCO"]),
|
||||||
|
("蒙古", ["Mongolia", "MN", "MNG"]),
|
||||||
|
("黑山", ["Montenegro", "ME", "MNE"]),
|
||||||
|
("摩洛哥", ["Morocco", "MA", "MAR"]),
|
||||||
|
("莫桑比克", ["Mozambique", "MZ", "MOZ"]),
|
||||||
|
("缅甸", ["Myanmar", "MM", "MMR", "Burma"]),
|
||||||
|
("纳米比亚", ["Namibia", "NA", "NAM"]),
|
||||||
|
("瑙鲁", ["Nauru", "NR", "NRU"]),
|
||||||
|
("尼泊尔", ["Nepal", "NP", "NPL"]),
|
||||||
|
("荷兰", ["Netherlands", "NL", "NLD"]),
|
||||||
|
("新西兰", ["New Zealand", "NZ", "NZL"]),
|
||||||
|
("尼加拉瓜", ["Nicaragua", "NI", "NIC"]),
|
||||||
|
("尼日尔", ["Niger", "NE", "NER"]),
|
||||||
|
("尼日利亚", ["Nigeria", "NG", "NGA"]),
|
||||||
|
("北马其顿", ["North Macedonia", "MK", "MKD", "Macedonia"]),
|
||||||
|
("挪威", ["Norway", "NO", "NOR"]),
|
||||||
|
("阿曼", ["Oman", "OM", "OMN"]),
|
||||||
|
("巴基斯坦", ["Pakistan", "PK", "PAK"]),
|
||||||
|
("帕劳", ["Palau", "PW", "PLW"]),
|
||||||
|
("巴勒斯坦", ["Palestine", "PS", "PSE", "State of Palestine"]),
|
||||||
|
("巴拿马", ["Panama", "PA", "PAN"]),
|
||||||
|
("巴布亚新几内亚", ["Papua New Guinea", "PG", "PNG"]),
|
||||||
|
("巴拉圭", ["Paraguay", "PY", "PRY"]),
|
||||||
|
("秘鲁", ["Peru", "PE", "PER"]),
|
||||||
|
("菲律宾", ["Philippines", "PH", "PHL"]),
|
||||||
|
("波兰", ["Poland", "PL", "POL"]),
|
||||||
|
("葡萄牙", ["Portugal", "PT", "PRT"]),
|
||||||
|
("卡塔尔", ["Qatar", "QA", "QAT"]),
|
||||||
|
("罗马尼亚", ["Romania", "RO", "ROU"]),
|
||||||
|
("俄罗斯", ["Russia", "Russian Federation", "RU", "RUS"]),
|
||||||
|
("卢旺达", ["Rwanda", "RW", "RWA"]),
|
||||||
|
("圣基茨和尼维斯", ["Saint Kitts and Nevis", "KN", "KNA"]),
|
||||||
|
("圣卢西亚", ["Saint Lucia", "LC", "LCA"]),
|
||||||
|
("圣文森特和格林纳丁斯", ["Saint Vincent and the Grenadines", "VC", "VCT"]),
|
||||||
|
("萨摩亚", ["Samoa", "WS", "WSM"]),
|
||||||
|
("圣马力诺", ["San Marino", "SM", "SMR"]),
|
||||||
|
("圣多美和普林西比", ["Sao Tome and Principe", "ST", "STP", "São Tomé and Príncipe"]),
|
||||||
|
("沙特阿拉伯", ["Saudi Arabia", "SA", "SAU"]),
|
||||||
|
("塞内加尔", ["Senegal", "SN", "SEN"]),
|
||||||
|
("塞尔维亚", ["Serbia", "RS", "SRB", "Kosovo", "XK", "XKS", "Republic of Kosovo"]),
|
||||||
|
("塞舌尔", ["Seychelles", "SC", "SYC"]),
|
||||||
|
("塞拉利昂", ["Sierra Leone", "SL", "SLE"]),
|
||||||
|
("新加坡", ["Singapore", "SG", "SGP"]),
|
||||||
|
("斯洛伐克", ["Slovakia", "SK", "SVK"]),
|
||||||
|
("斯洛文尼亚", ["Slovenia", "SI", "SVN"]),
|
||||||
|
("所罗门群岛", ["Solomon Islands", "SB", "SLB"]),
|
||||||
|
("索马里", ["Somalia", "SO", "SOM"]),
|
||||||
|
("南非", ["South Africa", "ZA", "ZAF"]),
|
||||||
|
("南苏丹", ["South Sudan", "SS", "SSD"]),
|
||||||
|
("西班牙", ["Spain", "ES", "ESP"]),
|
||||||
|
("斯里兰卡", ["Sri Lanka", "LK", "LKA"]),
|
||||||
|
("苏丹", ["Sudan", "SD", "SDN"]),
|
||||||
|
("苏里南", ["Suriname", "SR", "SUR"]),
|
||||||
|
("瑞典", ["Sweden", "SE", "SWE"]),
|
||||||
|
("瑞士", ["Switzerland", "CH", "CHE"]),
|
||||||
|
("叙利亚", ["Syria", "SY", "SYR", "Syrian Arab Republic"]),
|
||||||
|
("塔吉克斯坦", ["Tajikistan", "TJ", "TJK"]),
|
||||||
|
("坦桑尼亚", ["Tanzania", "TZ", "TZA", "United Republic of Tanzania"]),
|
||||||
|
("泰国", ["Thailand", "TH", "THA"]),
|
||||||
|
("东帝汶", ["Timor-Leste", "East Timor", "TL", "TLS"]),
|
||||||
|
("多哥", ["Togo", "TG", "TGO"]),
|
||||||
|
("汤加", ["Tonga", "TO", "TON"]),
|
||||||
|
("特立尼达和多巴哥", ["Trinidad and Tobago", "TT", "TTO"]),
|
||||||
|
("突尼斯", ["Tunisia", "TN", "TUN"]),
|
||||||
|
("土耳其", ["Turkey", "TR", "TUR", "Türkiye"]),
|
||||||
|
("土库曼斯坦", ["Turkmenistan", "TM", "TKM"]),
|
||||||
|
("图瓦卢", ["Tuvalu", "TV", "TUV"]),
|
||||||
|
("乌干达", ["Uganda", "UG", "UGA"]),
|
||||||
|
("乌克兰", ["Ukraine", "UA", "UKR"]),
|
||||||
|
("阿联酋", ["United Arab Emirates", "AE", "ARE", "UAE"]),
|
||||||
|
("英国", ["United Kingdom", "UK", "GB", "GBR", "Great Britain", "Britain", "England"]),
|
||||||
|
("美国", ["United States", "United States of America", "US", "USA", "U.S.", "U.S.A."]),
|
||||||
|
("乌拉圭", ["Uruguay", "UY", "URY"]),
|
||||||
|
("乌兹别克斯坦", ["Uzbekistan", "UZ", "UZB"]),
|
||||||
|
("瓦努阿图", ["Vanuatu", "VU", "VUT"]),
|
||||||
|
("梵蒂冈", ["Vatican City", "Holy See", "VA", "VAT"]),
|
||||||
|
("委内瑞拉", ["Venezuela", "VE", "VEN", "Venezuela (Bolivarian Republic of)"]),
|
||||||
|
("越南", ["Vietnam", "Viet Nam", "VN", "VNM"]),
|
||||||
|
("也门", ["Yemen", "YE", "YEM"]),
|
||||||
|
("赞比亚", ["Zambia", "ZM", "ZMB"]),
|
||||||
|
("津巴布韦", ["Zimbabwe", "ZW", "ZWE"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
COUNTRY_OPTIONS = [entry[0] for entry in COUNTRY_ENTRIES]
|
||||||
|
CANONICAL_COUNTRY_SET = set(COUNTRY_OPTIONS)
|
||||||
|
INVALID_COUNTRY_VALUES = {
|
||||||
|
"",
|
||||||
|
"-",
|
||||||
|
"--",
|
||||||
|
"unknown",
|
||||||
|
"n/a",
|
||||||
|
"na",
|
||||||
|
"none",
|
||||||
|
"null",
|
||||||
|
"global",
|
||||||
|
"world",
|
||||||
|
"worldwide",
|
||||||
|
"xx",
|
||||||
|
}
|
||||||
|
NUMERIC_LIKE_PATTERN = re.compile(r"^[\d\s,._%+\-]+$")
|
||||||
|
|
||||||
|
COUNTRY_ALIAS_MAP = {}
|
||||||
|
COUNTRY_VARIANTS_MAP = {}
|
||||||
|
for canonical, aliases in COUNTRY_ENTRIES:
|
||||||
|
COUNTRY_ALIAS_MAP[canonical.casefold()] = canonical
|
||||||
|
variants = [canonical, *aliases]
|
||||||
|
COUNTRY_VARIANTS_MAP[canonical] = variants
|
||||||
|
for alias in aliases:
|
||||||
|
COUNTRY_ALIAS_MAP[alias.casefold()] = canonical
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_country(value: Any) -> Optional[str]:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not isinstance(value, str):
|
||||||
|
return None
|
||||||
|
|
||||||
|
normalized = re.sub(r"\s+", " ", value.strip())
|
||||||
|
normalized = normalized.replace("(", "(").replace(")", ")")
|
||||||
|
|
||||||
|
if not normalized:
|
||||||
|
return None
|
||||||
|
|
||||||
|
lowered = normalized.casefold()
|
||||||
|
if lowered in INVALID_COUNTRY_VALUES:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if NUMERIC_LIKE_PATTERN.fullmatch(normalized):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if normalized in CANONICAL_COUNTRY_SET:
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
return COUNTRY_ALIAS_MAP.get(lowered)
|
||||||
|
|
||||||
|
|
||||||
|
def get_country_search_variants(value: Any) -> list[str]:
|
||||||
|
canonical = normalize_country(value)
|
||||||
|
if canonical is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
variants = []
|
||||||
|
seen = set()
|
||||||
|
for item in COUNTRY_VARIANTS_MAP.get(canonical, [canonical]):
|
||||||
|
if not isinstance(item, str):
|
||||||
|
continue
|
||||||
|
normalized = re.sub(r"\s+", " ", item.strip())
|
||||||
|
if not normalized:
|
||||||
|
continue
|
||||||
|
key = normalized.casefold()
|
||||||
|
if key in seen:
|
||||||
|
continue
|
||||||
|
seen.add(key)
|
||||||
|
variants.append(normalized)
|
||||||
|
|
||||||
|
return variants
|
||||||
81
backend/app/core/data_sources.py
Normal file
81
backend/app/core/data_sources.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
from functools import lru_cache
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
COLLECTOR_URL_KEYS = {
|
||||||
|
"arcgis_cables": "arcgis.cable_url",
|
||||||
|
"arcgis_landing_points": "arcgis.landing_point_url",
|
||||||
|
"arcgis_cable_landing_relation": "arcgis.cable_landing_relation_url",
|
||||||
|
"fao_landing_points": "fao.landing_point_url",
|
||||||
|
"telegeography_cables": "telegeography.cable_url",
|
||||||
|
"telegeography_landing": "telegeography.landing_point_url",
|
||||||
|
"huggingface_models": "huggingface.models_url",
|
||||||
|
"huggingface_datasets": "huggingface.datasets_url",
|
||||||
|
"huggingface_spaces": "huggingface.spaces_url",
|
||||||
|
"cloudflare_radar_device": "cloudflare.radar_device_url",
|
||||||
|
"cloudflare_radar_traffic": "cloudflare.radar_traffic_url",
|
||||||
|
"cloudflare_radar_top_locations": "cloudflare.radar_top_locations_url",
|
||||||
|
"peeringdb_ixp": "peeringdb.ixp_url",
|
||||||
|
"peeringdb_network": "peeringdb.network_url",
|
||||||
|
"peeringdb_facility": "peeringdb.facility_url",
|
||||||
|
"top500": "top500.url",
|
||||||
|
"epoch_ai_gpu": "epoch_ai.gpu_clusters_url",
|
||||||
|
"spacetrack_tle": "spacetrack.tle_query_url",
|
||||||
|
"ris_live_bgp": "ris_live.url",
|
||||||
|
"bgpstream_bgp": "bgpstream.url",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DataSourcesConfig:
|
||||||
|
def __init__(self, config_path: str = None):
|
||||||
|
if config_path is None:
|
||||||
|
config_path = os.path.join(os.path.dirname(__file__), "data_sources.yaml")
|
||||||
|
|
||||||
|
self._yaml_config = {}
|
||||||
|
if os.path.exists(config_path):
|
||||||
|
with open(config_path, "r") as f:
|
||||||
|
self._yaml_config = yaml.safe_load(f) or {}
|
||||||
|
|
||||||
|
def get_yaml_url(self, collector_name: str) -> str:
|
||||||
|
key = COLLECTOR_URL_KEYS.get(collector_name, "")
|
||||||
|
if not key:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
parts = key.split(".")
|
||||||
|
value = self._yaml_config
|
||||||
|
for part in parts:
|
||||||
|
if isinstance(value, dict):
|
||||||
|
value = value.get(part, "")
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
return value if isinstance(value, str) else ""
|
||||||
|
|
||||||
|
async def get_url(self, collector_name: str, db) -> str:
|
||||||
|
yaml_url = self.get_yaml_url(collector_name)
|
||||||
|
|
||||||
|
if not db:
|
||||||
|
return yaml_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
from sqlalchemy import select
|
||||||
|
from app.models.datasource_config import DataSourceConfig
|
||||||
|
|
||||||
|
query = select(DataSourceConfig).where(
|
||||||
|
DataSourceConfig.name == collector_name, DataSourceConfig.is_active == True
|
||||||
|
)
|
||||||
|
result = await db.execute(query)
|
||||||
|
db_config = result.scalar_one_or_none()
|
||||||
|
|
||||||
|
if db_config and db_config.endpoint:
|
||||||
|
return db_config.endpoint
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return yaml_url
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache()
|
||||||
|
def get_data_sources_config() -> DataSourcesConfig:
|
||||||
|
return DataSourcesConfig()
|
||||||
45
backend/app/core/data_sources.yaml
Normal file
45
backend/app/core/data_sources.yaml
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# Data Sources Configuration
|
||||||
|
# All external data source URLs should be configured here
|
||||||
|
|
||||||
|
arcgis:
|
||||||
|
cable_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/2/query"
|
||||||
|
landing_point_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/1/query"
|
||||||
|
cable_landing_relation_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/3/query"
|
||||||
|
|
||||||
|
fao:
|
||||||
|
landing_point_url: "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
|
||||||
|
|
||||||
|
telegeography:
|
||||||
|
cable_url: "https://raw.githubusercontent.com/lintaojlu/submarine_cable_information/main/cable.json"
|
||||||
|
landing_point_url: "https://raw.githubusercontent.com/lintaojlu/submarine_cable_information/main/landing_point.json"
|
||||||
|
|
||||||
|
huggingface:
|
||||||
|
models_url: "https://huggingface.co/api/models"
|
||||||
|
datasets_url: "https://huggingface.co/api/datasets"
|
||||||
|
spaces_url: "https://huggingface.co/api/spaces"
|
||||||
|
|
||||||
|
cloudflare:
|
||||||
|
radar_device_url: "https://api.cloudflare.com/client/v4/radar/http/summary/device_type"
|
||||||
|
radar_traffic_url: "https://api.cloudflare.com/client/v4/radar/http/timeseries/requests"
|
||||||
|
radar_top_locations_url: "https://api.cloudflare.com/client/v4/radar/http/top/locations"
|
||||||
|
|
||||||
|
peeringdb:
|
||||||
|
ixp_url: "https://www.peeringdb.com/api/ix"
|
||||||
|
network_url: "https://www.peeringdb.com/api/net"
|
||||||
|
facility_url: "https://www.peeringdb.com/api/fac"
|
||||||
|
|
||||||
|
top500:
|
||||||
|
url: "https://top500.org/lists/top500/list/2025/11/"
|
||||||
|
|
||||||
|
epoch_ai:
|
||||||
|
gpu_clusters_url: "https://epoch.ai/data/gpu-clusters"
|
||||||
|
|
||||||
|
spacetrack:
|
||||||
|
base_url: "https://www.space-track.org"
|
||||||
|
tle_query_url: "https://www.space-track.org/basicspacedata/query/class/gp/orderby/EPOCH%20desc/limit/1000/format/json"
|
||||||
|
|
||||||
|
ris_live:
|
||||||
|
url: "https://ris-live.ripe.net/v1/stream/?format=json&client=planet-ris-live"
|
||||||
|
|
||||||
|
bgpstream:
|
||||||
|
url: "https://broker.bgpstream.caida.org/v2"
|
||||||
140
backend/app/core/datasource_defaults.py
Normal file
140
backend/app/core/datasource_defaults.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
"""Default built-in datasource definitions."""
|
||||||
|
|
||||||
|
DEFAULT_DATASOURCES = {
|
||||||
|
"top500": {
|
||||||
|
"id": 1,
|
||||||
|
"name": "TOP500 Supercomputers",
|
||||||
|
"module": "L1",
|
||||||
|
"priority": "P0",
|
||||||
|
"frequency_minutes": 240,
|
||||||
|
},
|
||||||
|
"epoch_ai_gpu": {
|
||||||
|
"id": 2,
|
||||||
|
"name": "Epoch AI GPU Clusters",
|
||||||
|
"module": "L1",
|
||||||
|
"priority": "P0",
|
||||||
|
"frequency_minutes": 360,
|
||||||
|
},
|
||||||
|
"huggingface_models": {
|
||||||
|
"id": 3,
|
||||||
|
"name": "HuggingFace Models",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 720,
|
||||||
|
},
|
||||||
|
"huggingface_datasets": {
|
||||||
|
"id": 4,
|
||||||
|
"name": "HuggingFace Datasets",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 720,
|
||||||
|
},
|
||||||
|
"huggingface_spaces": {
|
||||||
|
"id": 5,
|
||||||
|
"name": "HuggingFace Spaces",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P2",
|
||||||
|
"frequency_minutes": 1440,
|
||||||
|
},
|
||||||
|
"peeringdb_ixp": {
|
||||||
|
"id": 6,
|
||||||
|
"name": "PeeringDB IXP",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 1440,
|
||||||
|
},
|
||||||
|
"peeringdb_network": {
|
||||||
|
"id": 7,
|
||||||
|
"name": "PeeringDB Networks",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P2",
|
||||||
|
"frequency_minutes": 2880,
|
||||||
|
},
|
||||||
|
"peeringdb_facility": {
|
||||||
|
"id": 8,
|
||||||
|
"name": "PeeringDB Facilities",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P2",
|
||||||
|
"frequency_minutes": 2880,
|
||||||
|
},
|
||||||
|
"telegeography_cables": {
|
||||||
|
"id": 9,
|
||||||
|
"name": "Submarine Cables",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 10080,
|
||||||
|
},
|
||||||
|
"telegeography_landing": {
|
||||||
|
"id": 10,
|
||||||
|
"name": "Cable Landing Points",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P2",
|
||||||
|
"frequency_minutes": 10080,
|
||||||
|
},
|
||||||
|
"telegeography_systems": {
|
||||||
|
"id": 11,
|
||||||
|
"name": "Cable Systems",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P2",
|
||||||
|
"frequency_minutes": 10080,
|
||||||
|
},
|
||||||
|
"arcgis_cables": {
|
||||||
|
"id": 15,
|
||||||
|
"name": "ArcGIS Submarine Cables",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 10080,
|
||||||
|
},
|
||||||
|
"arcgis_landing_points": {
|
||||||
|
"id": 16,
|
||||||
|
"name": "ArcGIS Landing Points",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 10080,
|
||||||
|
},
|
||||||
|
"arcgis_cable_landing_relation": {
|
||||||
|
"id": 17,
|
||||||
|
"name": "ArcGIS Cable-Landing Relations",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 10080,
|
||||||
|
},
|
||||||
|
"fao_landing_points": {
|
||||||
|
"id": 18,
|
||||||
|
"name": "FAO Landing Points",
|
||||||
|
"module": "L2",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 10080,
|
||||||
|
},
|
||||||
|
"spacetrack_tle": {
|
||||||
|
"id": 19,
|
||||||
|
"name": "Space-Track TLE",
|
||||||
|
"module": "L3",
|
||||||
|
"priority": "P2",
|
||||||
|
"frequency_minutes": 1440,
|
||||||
|
},
|
||||||
|
"celestrak_tle": {
|
||||||
|
"id": 20,
|
||||||
|
"name": "CelesTrak TLE",
|
||||||
|
"module": "L3",
|
||||||
|
"priority": "P2",
|
||||||
|
"frequency_minutes": 1440,
|
||||||
|
},
|
||||||
|
"ris_live_bgp": {
|
||||||
|
"id": 21,
|
||||||
|
"name": "RIPE RIS Live BGP",
|
||||||
|
"module": "L3",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 15,
|
||||||
|
},
|
||||||
|
"bgpstream_bgp": {
|
||||||
|
"id": 22,
|
||||||
|
"name": "CAIDA BGPStream Backfill",
|
||||||
|
"module": "L3",
|
||||||
|
"priority": "P1",
|
||||||
|
"frequency_minutes": 360,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ID_TO_COLLECTOR = {info["id"]: name for name, info in DEFAULT_DATASOURCES.items()}
|
||||||
|
COLLECTOR_TO_ID = {name: info["id"] for name, info in DEFAULT_DATASOURCES.items()}
|
||||||
116
backend/app/core/satellite_tle.py
Normal file
116
backend/app/core/satellite_tle.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
"""Helpers for building stable TLE lines from orbital elements."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
|
||||||
|
def compute_tle_checksum(line: str) -> str:
|
||||||
|
"""Compute the standard modulo-10 checksum for a TLE line."""
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
for char in line[:68]:
|
||||||
|
if char.isdigit():
|
||||||
|
total += int(char)
|
||||||
|
elif char == "-":
|
||||||
|
total += 1
|
||||||
|
|
||||||
|
return str(total % 10)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_epoch(value: Any) -> Optional[datetime]:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
return datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def build_tle_line1(norad_cat_id: Any, epoch: Any) -> Optional[str]:
|
||||||
|
"""Build a valid TLE line 1 from the NORAD id and epoch."""
|
||||||
|
epoch_date = _parse_epoch(epoch)
|
||||||
|
if not norad_cat_id or epoch_date is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
epoch_year = epoch_date.year % 100
|
||||||
|
start_of_year = datetime(epoch_date.year, 1, 1, tzinfo=epoch_date.tzinfo)
|
||||||
|
day_of_year = (epoch_date - start_of_year).days + 1
|
||||||
|
ms_of_day = (
|
||||||
|
epoch_date.hour * 3600000
|
||||||
|
+ epoch_date.minute * 60000
|
||||||
|
+ epoch_date.second * 1000
|
||||||
|
+ int(epoch_date.microsecond / 1000)
|
||||||
|
)
|
||||||
|
day_fraction = ms_of_day / 86400000
|
||||||
|
decimal_fraction = f"{day_fraction:.8f}"[1:]
|
||||||
|
epoch_str = f"{epoch_year:02d}{day_of_year:03d}{decimal_fraction}"
|
||||||
|
|
||||||
|
core = (
|
||||||
|
f"1 {int(norad_cat_id):05d}U 00001A {epoch_str}"
|
||||||
|
" .00000000 00000-0 00000-0 0 999"
|
||||||
|
)
|
||||||
|
return core + compute_tle_checksum(core)
|
||||||
|
|
||||||
|
|
||||||
|
def build_tle_line2(
|
||||||
|
norad_cat_id: Any,
|
||||||
|
inclination: Any,
|
||||||
|
raan: Any,
|
||||||
|
eccentricity: Any,
|
||||||
|
arg_of_perigee: Any,
|
||||||
|
mean_anomaly: Any,
|
||||||
|
mean_motion: Any,
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""Build a valid TLE line 2 from the standard orbital elements."""
|
||||||
|
required = [
|
||||||
|
norad_cat_id,
|
||||||
|
inclination,
|
||||||
|
raan,
|
||||||
|
eccentricity,
|
||||||
|
arg_of_perigee,
|
||||||
|
mean_anomaly,
|
||||||
|
mean_motion,
|
||||||
|
]
|
||||||
|
if any(value is None for value in required):
|
||||||
|
return None
|
||||||
|
|
||||||
|
eccentricity_digits = str(round(float(eccentricity) * 10_000_000)).zfill(7)
|
||||||
|
core = (
|
||||||
|
f"2 {int(norad_cat_id):05d}"
|
||||||
|
f" {float(inclination):8.4f}"
|
||||||
|
f" {float(raan):8.4f}"
|
||||||
|
f" {eccentricity_digits}"
|
||||||
|
f" {float(arg_of_perigee):8.4f}"
|
||||||
|
f" {float(mean_anomaly):8.4f}"
|
||||||
|
f" {float(mean_motion):11.8f}"
|
||||||
|
"00000"
|
||||||
|
)
|
||||||
|
return core + compute_tle_checksum(core)
|
||||||
|
|
||||||
|
|
||||||
|
def build_tle_lines_from_elements(
|
||||||
|
*,
|
||||||
|
norad_cat_id: Any,
|
||||||
|
epoch: Any,
|
||||||
|
inclination: Any,
|
||||||
|
raan: Any,
|
||||||
|
eccentricity: Any,
|
||||||
|
arg_of_perigee: Any,
|
||||||
|
mean_anomaly: Any,
|
||||||
|
mean_motion: Any,
|
||||||
|
) -> tuple[Optional[str], Optional[str]]:
|
||||||
|
"""Build both TLE lines from a metadata payload."""
|
||||||
|
line1 = build_tle_line1(norad_cat_id, epoch)
|
||||||
|
line2 = build_tle_line2(
|
||||||
|
norad_cat_id,
|
||||||
|
inclination,
|
||||||
|
raan,
|
||||||
|
eccentricity,
|
||||||
|
arg_of_perigee,
|
||||||
|
mean_anomaly,
|
||||||
|
mean_motion,
|
||||||
|
)
|
||||||
|
return line1, line2
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
from datetime import datetime, timedelta
|
from datetime import UTC, datetime, timedelta
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import bcrypt
|
import bcrypt
|
||||||
@@ -49,9 +49,9 @@ def get_password_hash(password: str) -> str:
|
|||||||
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
|
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
|
||||||
to_encode = data.copy()
|
to_encode = data.copy()
|
||||||
if expires_delta:
|
if expires_delta:
|
||||||
expire = datetime.utcnow() + expires_delta
|
expire = datetime.now(UTC) + expires_delta
|
||||||
elif settings.ACCESS_TOKEN_EXPIRE_MINUTES > 0:
|
elif settings.ACCESS_TOKEN_EXPIRE_MINUTES > 0:
|
||||||
expire = datetime.utcnow() + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
|
expire = datetime.now(UTC) + timedelta(minutes=settings.ACCESS_TOKEN_EXPIRE_MINUTES)
|
||||||
else:
|
else:
|
||||||
expire = None
|
expire = None
|
||||||
if expire:
|
if expire:
|
||||||
@@ -65,7 +65,7 @@ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -
|
|||||||
def create_refresh_token(data: dict) -> str:
|
def create_refresh_token(data: dict) -> str:
|
||||||
to_encode = data.copy()
|
to_encode = data.copy()
|
||||||
if settings.REFRESH_TOKEN_EXPIRE_DAYS > 0:
|
if settings.REFRESH_TOKEN_EXPIRE_DAYS > 0:
|
||||||
expire = datetime.utcnow() + timedelta(days=settings.REFRESH_TOKEN_EXPIRE_DAYS)
|
expire = datetime.now(UTC) + timedelta(days=settings.REFRESH_TOKEN_EXPIRE_DAYS)
|
||||||
to_encode.update({"exp": expire})
|
to_encode.update({"exp": expire})
|
||||||
to_encode.update({"type": "refresh"})
|
to_encode.update({"type": "refresh"})
|
||||||
if "sub" in to_encode:
|
if "sub" in to_encode:
|
||||||
|
|||||||
20
backend/app/core/time.py
Normal file
20
backend/app/core/time.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
"""Time helpers for API serialization."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_utc(value: datetime | None) -> datetime | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if value.tzinfo is None:
|
||||||
|
return value.replace(tzinfo=UTC)
|
||||||
|
return value.astimezone(UTC)
|
||||||
|
|
||||||
|
|
||||||
|
def to_iso8601_utc(value: datetime | None) -> str | None:
|
||||||
|
normalized = ensure_utc(value)
|
||||||
|
if normalized is None:
|
||||||
|
return None
|
||||||
|
return normalized.isoformat().replace("+00:00", "Z")
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,12 +1,14 @@
|
|||||||
"""Data broadcaster for WebSocket connections"""
|
"""Data broadcaster for WebSocket connections"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
from typing import Dict, Any, Optional
|
from typing import Dict, Any, Optional
|
||||||
|
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
from app.core.websocket.manager import manager
|
from app.core.websocket.manager import manager
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DataBroadcaster:
|
class DataBroadcaster:
|
||||||
"""Periodically broadcasts data to connected WebSocket clients"""
|
"""Periodically broadcasts data to connected WebSocket clients"""
|
||||||
|
|
||||||
@@ -21,7 +23,7 @@ class DataBroadcaster:
|
|||||||
"active_datasources": 8,
|
"active_datasources": 8,
|
||||||
"tasks_today": 45,
|
"tasks_today": 45,
|
||||||
"success_rate": 97.8,
|
"success_rate": 97.8,
|
||||||
"last_updated": datetime.utcnow().isoformat(),
|
"last_updated": to_iso8601_utc(datetime.now(UTC)),
|
||||||
"alerts": {"critical": 0, "warning": 2, "info": 5},
|
"alerts": {"critical": 0, "warning": 2, "info": 5},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,7 +36,7 @@ class DataBroadcaster:
|
|||||||
{
|
{
|
||||||
"type": "data_frame",
|
"type": "data_frame",
|
||||||
"channel": "dashboard",
|
"channel": "dashboard",
|
||||||
"timestamp": datetime.utcnow().isoformat(),
|
"timestamp": to_iso8601_utc(datetime.now(UTC)),
|
||||||
"payload": {"stats": stats},
|
"payload": {"stats": stats},
|
||||||
},
|
},
|
||||||
channel="dashboard",
|
channel="dashboard",
|
||||||
@@ -48,7 +50,7 @@ class DataBroadcaster:
|
|||||||
await manager.broadcast(
|
await manager.broadcast(
|
||||||
{
|
{
|
||||||
"type": "alert_notification",
|
"type": "alert_notification",
|
||||||
"timestamp": datetime.utcnow().isoformat(),
|
"timestamp": to_iso8601_utc(datetime.now(UTC)),
|
||||||
"data": {"alert": alert},
|
"data": {"alert": alert},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -59,7 +61,7 @@ class DataBroadcaster:
|
|||||||
{
|
{
|
||||||
"type": "data_frame",
|
"type": "data_frame",
|
||||||
"channel": "gpu_clusters",
|
"channel": "gpu_clusters",
|
||||||
"timestamp": datetime.utcnow().isoformat(),
|
"timestamp": to_iso8601_utc(datetime.now(UTC)),
|
||||||
"payload": data,
|
"payload": data,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -70,12 +72,24 @@ class DataBroadcaster:
|
|||||||
{
|
{
|
||||||
"type": "data_frame",
|
"type": "data_frame",
|
||||||
"channel": channel,
|
"channel": channel,
|
||||||
"timestamp": datetime.utcnow().isoformat(),
|
"timestamp": to_iso8601_utc(datetime.now(UTC)),
|
||||||
"payload": data,
|
"payload": data,
|
||||||
},
|
},
|
||||||
channel=channel if channel in manager.active_connections else "all",
|
channel=channel if channel in manager.active_connections else "all",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def broadcast_datasource_task_update(self, data: Dict[str, Any]):
|
||||||
|
"""Broadcast datasource task progress updates to connected clients."""
|
||||||
|
await manager.broadcast(
|
||||||
|
{
|
||||||
|
"type": "data_frame",
|
||||||
|
"channel": "datasource_tasks",
|
||||||
|
"timestamp": to_iso8601_utc(datetime.now(UTC)),
|
||||||
|
"payload": data,
|
||||||
|
},
|
||||||
|
channel="all",
|
||||||
|
)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
"""Start all broadcasters"""
|
"""Start all broadcasters"""
|
||||||
if not self.running:
|
if not self.running:
|
||||||
|
|||||||
Binary file not shown.
@@ -1,5 +1,6 @@
|
|||||||
from typing import AsyncGenerator
|
from typing import AsyncGenerator
|
||||||
|
|
||||||
|
from sqlalchemy import text
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
||||||
from sqlalchemy.orm import declarative_base
|
from sqlalchemy.orm import declarative_base
|
||||||
|
|
||||||
@@ -25,11 +26,126 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
|||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
async def seed_default_datasources(session: AsyncSession):
|
||||||
|
from app.core.datasource_defaults import DEFAULT_DATASOURCES
|
||||||
|
from app.models.datasource import DataSource
|
||||||
|
|
||||||
|
for source, info in DEFAULT_DATASOURCES.items():
|
||||||
|
existing = await session.get(DataSource, info["id"])
|
||||||
|
if existing:
|
||||||
|
existing.name = info["name"]
|
||||||
|
existing.source = source
|
||||||
|
existing.module = info["module"]
|
||||||
|
existing.priority = info["priority"]
|
||||||
|
existing.frequency_minutes = info["frequency_minutes"]
|
||||||
|
existing.collector_class = source
|
||||||
|
if existing.config is None:
|
||||||
|
existing.config = "{}"
|
||||||
|
continue
|
||||||
|
|
||||||
|
session.add(
|
||||||
|
DataSource(
|
||||||
|
id=info["id"],
|
||||||
|
name=info["name"],
|
||||||
|
source=source,
|
||||||
|
module=info["module"],
|
||||||
|
priority=info["priority"],
|
||||||
|
frequency_minutes=info["frequency_minutes"],
|
||||||
|
collector_class=source,
|
||||||
|
config="{}",
|
||||||
|
is_active=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def ensure_default_admin_user(session: AsyncSession):
|
||||||
|
from app.core.security import get_password_hash
|
||||||
|
from app.models.user import User
|
||||||
|
|
||||||
|
result = await session.execute(
|
||||||
|
text("SELECT id FROM users WHERE username = 'admin'")
|
||||||
|
)
|
||||||
|
if result.fetchone():
|
||||||
|
return
|
||||||
|
|
||||||
|
session.add(
|
||||||
|
User(
|
||||||
|
username="admin",
|
||||||
|
email="admin@planet.local",
|
||||||
|
password_hash=get_password_hash("admin123"),
|
||||||
|
role="super_admin",
|
||||||
|
is_active=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
|
||||||
async def init_db():
|
async def init_db():
|
||||||
import app.models.user # noqa: F401
|
import app.models.user # noqa: F401
|
||||||
import app.models.gpu_cluster # noqa: F401
|
import app.models.gpu_cluster # noqa: F401
|
||||||
import app.models.task # noqa: F401
|
import app.models.task # noqa: F401
|
||||||
|
import app.models.data_snapshot # noqa: F401
|
||||||
import app.models.datasource # noqa: F401
|
import app.models.datasource # noqa: F401
|
||||||
|
import app.models.datasource_config # noqa: F401
|
||||||
|
import app.models.alert # noqa: F401
|
||||||
|
import app.models.bgp_anomaly # noqa: F401
|
||||||
|
import app.models.collected_data # noqa: F401
|
||||||
|
import app.models.system_setting # noqa: F401
|
||||||
|
|
||||||
async with engine.begin() as conn:
|
async with engine.begin() as conn:
|
||||||
await conn.run_sync(Base.metadata.create_all)
|
await conn.run_sync(Base.metadata.create_all)
|
||||||
|
await conn.execute(
|
||||||
|
text(
|
||||||
|
"""
|
||||||
|
ALTER TABLE collected_data
|
||||||
|
ADD COLUMN IF NOT EXISTS snapshot_id INTEGER,
|
||||||
|
ADD COLUMN IF NOT EXISTS task_id INTEGER,
|
||||||
|
ADD COLUMN IF NOT EXISTS entity_key VARCHAR(255),
|
||||||
|
ADD COLUMN IF NOT EXISTS is_current BOOLEAN DEFAULT TRUE,
|
||||||
|
ADD COLUMN IF NOT EXISTS previous_record_id INTEGER,
|
||||||
|
ADD COLUMN IF NOT EXISTS change_type VARCHAR(20),
|
||||||
|
ADD COLUMN IF NOT EXISTS change_summary JSONB DEFAULT '{}'::jsonb,
|
||||||
|
ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await conn.execute(
|
||||||
|
text(
|
||||||
|
"""
|
||||||
|
ALTER TABLE collection_tasks
|
||||||
|
ADD COLUMN IF NOT EXISTS phase VARCHAR(30) DEFAULT 'queued'
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await conn.execute(
|
||||||
|
text(
|
||||||
|
"""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_collected_data_source_source_id
|
||||||
|
ON collected_data (source, source_id)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await conn.execute(
|
||||||
|
text(
|
||||||
|
"""
|
||||||
|
UPDATE collected_data
|
||||||
|
SET entity_key = source || ':' || COALESCE(source_id, id::text)
|
||||||
|
WHERE entity_key IS NULL
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await conn.execute(
|
||||||
|
text(
|
||||||
|
"""
|
||||||
|
UPDATE collected_data
|
||||||
|
SET is_current = TRUE
|
||||||
|
WHERE is_current IS NULL
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
async with async_session_factory() as session:
|
||||||
|
await seed_default_datasources(session)
|
||||||
|
await ensure_default_admin_user(session)
|
||||||
|
|||||||
@@ -2,15 +2,19 @@ from contextlib import asynccontextmanager
|
|||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from fastapi.staticfiles import StaticFiles
|
|
||||||
from starlette.middleware.base import BaseHTTPMiddleware
|
from starlette.middleware.base import BaseHTTPMiddleware
|
||||||
|
|
||||||
from app.core.config import settings
|
|
||||||
from app.core.websocket.broadcaster import broadcaster
|
|
||||||
from app.db.session import init_db, async_session_factory
|
|
||||||
from app.api.main import api_router
|
from app.api.main import api_router
|
||||||
from app.api.v1 import websocket
|
from app.api.v1 import websocket
|
||||||
from app.services.scheduler import start_scheduler, stop_scheduler
|
from app.core.config import settings
|
||||||
|
from app.core.websocket.broadcaster import broadcaster
|
||||||
|
from app.db.session import init_db
|
||||||
|
from app.services.scheduler import (
|
||||||
|
cleanup_stale_running_tasks,
|
||||||
|
start_scheduler,
|
||||||
|
stop_scheduler,
|
||||||
|
sync_scheduler_with_datasources,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class WebSocketCORSMiddleware(BaseHTTPMiddleware):
|
class WebSocketCORSMiddleware(BaseHTTPMiddleware):
|
||||||
@@ -27,7 +31,9 @@ class WebSocketCORSMiddleware(BaseHTTPMiddleware):
|
|||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
await init_db()
|
await init_db()
|
||||||
|
await cleanup_stale_running_tasks()
|
||||||
start_scheduler()
|
start_scheduler()
|
||||||
|
await sync_scheduler_with_datasources()
|
||||||
broadcaster.start()
|
broadcaster.start()
|
||||||
yield
|
yield
|
||||||
broadcaster.stop()
|
broadcaster.stop()
|
||||||
@@ -60,16 +66,11 @@ app.include_router(websocket.router)
|
|||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
async def health_check():
|
async def health_check():
|
||||||
"""健康检查端点"""
|
return {"status": "healthy", "version": settings.VERSION}
|
||||||
return {
|
|
||||||
"status": "healthy",
|
|
||||||
"version": settings.VERSION,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
async def root():
|
async def root():
|
||||||
"""API根目录"""
|
|
||||||
return {
|
return {
|
||||||
"name": settings.PROJECT_NAME,
|
"name": settings.PROJECT_NAME,
|
||||||
"version": settings.VERSION,
|
"version": settings.VERSION,
|
||||||
@@ -80,7 +81,6 @@ async def root():
|
|||||||
|
|
||||||
@app.get("/api/v1/scheduler/jobs")
|
@app.get("/api/v1/scheduler/jobs")
|
||||||
async def get_scheduler_jobs():
|
async def get_scheduler_jobs():
|
||||||
"""获取调度任务列表"""
|
|
||||||
from app.services.scheduler import get_scheduler_jobs
|
from app.services.scheduler import get_scheduler_jobs
|
||||||
|
|
||||||
return {"jobs": get_scheduler_jobs()}
|
return {"jobs": get_scheduler_jobs()}
|
||||||
|
|||||||
@@ -1,15 +1,23 @@
|
|||||||
from app.models.user import User
|
from app.models.user import User
|
||||||
from app.models.gpu_cluster import GPUCluster
|
from app.models.gpu_cluster import GPUCluster
|
||||||
from app.models.task import CollectionTask
|
from app.models.task import CollectionTask
|
||||||
|
from app.models.data_snapshot import DataSnapshot
|
||||||
from app.models.datasource import DataSource
|
from app.models.datasource import DataSource
|
||||||
|
from app.models.datasource_config import DataSourceConfig
|
||||||
from app.models.alert import Alert, AlertSeverity, AlertStatus
|
from app.models.alert import Alert, AlertSeverity, AlertStatus
|
||||||
|
from app.models.bgp_anomaly import BGPAnomaly
|
||||||
|
from app.models.system_setting import SystemSetting
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"User",
|
"User",
|
||||||
"GPUCluster",
|
"GPUCluster",
|
||||||
"CollectionTask",
|
"CollectionTask",
|
||||||
|
"DataSnapshot",
|
||||||
"DataSource",
|
"DataSource",
|
||||||
|
"DataSourceConfig",
|
||||||
|
"SystemSetting",
|
||||||
"Alert",
|
"Alert",
|
||||||
"AlertSeverity",
|
"AlertSeverity",
|
||||||
"AlertStatus",
|
"AlertStatus",
|
||||||
|
"BGPAnomaly",
|
||||||
]
|
]
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -5,6 +5,7 @@ from typing import Optional
|
|||||||
from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey, Enum as SQLEnum
|
from sqlalchemy import Column, Integer, String, DateTime, Text, ForeignKey, Enum as SQLEnum
|
||||||
from sqlalchemy.orm import relationship
|
from sqlalchemy.orm import relationship
|
||||||
|
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
from app.db.session import Base
|
from app.db.session import Base
|
||||||
|
|
||||||
|
|
||||||
@@ -50,8 +51,8 @@ class Alert(Base):
|
|||||||
"acknowledged_by": self.acknowledged_by,
|
"acknowledged_by": self.acknowledged_by,
|
||||||
"resolved_by": self.resolved_by,
|
"resolved_by": self.resolved_by,
|
||||||
"resolution_notes": self.resolution_notes,
|
"resolution_notes": self.resolution_notes,
|
||||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
"created_at": to_iso8601_utc(self.created_at),
|
||||||
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
|
"updated_at": to_iso8601_utc(self.updated_at),
|
||||||
"acknowledged_at": self.acknowledged_at.isoformat() if self.acknowledged_at else None,
|
"acknowledged_at": to_iso8601_utc(self.acknowledged_at),
|
||||||
"resolved_at": self.resolved_at.isoformat() if self.resolved_at else None,
|
"resolved_at": to_iso8601_utc(self.resolved_at),
|
||||||
}
|
}
|
||||||
|
|||||||
58
backend/app/models/bgp_anomaly.py
Normal file
58
backend/app/models/bgp_anomaly.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
"""BGP anomaly model for derived routing intelligence."""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from sqlalchemy import Column, DateTime, Float, ForeignKey, Index, Integer, JSON, String, Text
|
||||||
|
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
|
from app.db.session import Base
|
||||||
|
|
||||||
|
|
||||||
|
class BGPAnomaly(Base):
|
||||||
|
__tablename__ = "bgp_anomalies"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, index=True)
|
||||||
|
snapshot_id = Column(Integer, ForeignKey("data_snapshots.id"), nullable=True, index=True)
|
||||||
|
task_id = Column(Integer, ForeignKey("collection_tasks.id"), nullable=True, index=True)
|
||||||
|
source = Column(String(100), nullable=False, index=True)
|
||||||
|
anomaly_type = Column(String(50), nullable=False, index=True)
|
||||||
|
severity = Column(String(20), nullable=False, index=True)
|
||||||
|
status = Column(String(20), nullable=False, default="active", index=True)
|
||||||
|
entity_key = Column(String(255), nullable=False, index=True)
|
||||||
|
prefix = Column(String(64), nullable=True, index=True)
|
||||||
|
origin_asn = Column(Integer, nullable=True, index=True)
|
||||||
|
new_origin_asn = Column(Integer, nullable=True, index=True)
|
||||||
|
peer_scope = Column(JSON, default=list)
|
||||||
|
started_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow, index=True)
|
||||||
|
ended_at = Column(DateTime(timezone=True), nullable=True)
|
||||||
|
confidence = Column(Float, nullable=False, default=0.5)
|
||||||
|
summary = Column(Text, nullable=False)
|
||||||
|
evidence = Column(JSON, default=dict)
|
||||||
|
created_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow, index=True)
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index("idx_bgp_anomalies_source_created", "source", "created_at"),
|
||||||
|
Index("idx_bgp_anomalies_type_status", "anomaly_type", "status"),
|
||||||
|
)
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"id": self.id,
|
||||||
|
"snapshot_id": self.snapshot_id,
|
||||||
|
"task_id": self.task_id,
|
||||||
|
"source": self.source,
|
||||||
|
"anomaly_type": self.anomaly_type,
|
||||||
|
"severity": self.severity,
|
||||||
|
"status": self.status,
|
||||||
|
"entity_key": self.entity_key,
|
||||||
|
"prefix": self.prefix,
|
||||||
|
"origin_asn": self.origin_asn,
|
||||||
|
"new_origin_asn": self.new_origin_asn,
|
||||||
|
"peer_scope": self.peer_scope or [],
|
||||||
|
"started_at": to_iso8601_utc(self.started_at),
|
||||||
|
"ended_at": to_iso8601_utc(self.ended_at),
|
||||||
|
"confidence": self.confidence,
|
||||||
|
"summary": self.summary,
|
||||||
|
"evidence": self.evidence or {},
|
||||||
|
"created_at": to_iso8601_utc(self.created_at),
|
||||||
|
}
|
||||||
@@ -1,8 +1,10 @@
|
|||||||
"""Collected Data model for storing data from all collectors"""
|
"""Collected Data model for storing data from all collectors"""
|
||||||
|
|
||||||
from sqlalchemy import Column, DateTime, Integer, String, Text, JSON, Index
|
from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, String, Text, JSON, Index
|
||||||
from sqlalchemy.sql import func
|
from sqlalchemy.sql import func
|
||||||
|
|
||||||
|
from app.core.collected_data_fields import get_record_field
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
from app.db.session import Base
|
from app.db.session import Base
|
||||||
|
|
||||||
|
|
||||||
@@ -12,8 +14,11 @@ class CollectedData(Base):
|
|||||||
__tablename__ = "collected_data"
|
__tablename__ = "collected_data"
|
||||||
|
|
||||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
snapshot_id = Column(Integer, ForeignKey("data_snapshots.id"), nullable=True, index=True)
|
||||||
|
task_id = Column(Integer, ForeignKey("collection_tasks.id"), nullable=True, index=True)
|
||||||
source = Column(String(100), nullable=False, index=True) # e.g., "top500", "huggingface_models"
|
source = Column(String(100), nullable=False, index=True) # e.g., "top500", "huggingface_models"
|
||||||
source_id = Column(String(100), index=True) # Original ID from source, e.g., "rank_1"
|
source_id = Column(String(100), index=True) # Original ID from source, e.g., "rank_1"
|
||||||
|
entity_key = Column(String(255), index=True)
|
||||||
data_type = Column(
|
data_type = Column(
|
||||||
String(50), nullable=False, index=True
|
String(50), nullable=False, index=True
|
||||||
) # e.g., "supercomputer", "model", "dataset"
|
) # e.g., "supercomputer", "model", "dataset"
|
||||||
@@ -23,16 +28,6 @@ class CollectedData(Base):
|
|||||||
title = Column(String(500))
|
title = Column(String(500))
|
||||||
description = Column(Text)
|
description = Column(Text)
|
||||||
|
|
||||||
# Location data (for geo visualization)
|
|
||||||
country = Column(String(100))
|
|
||||||
city = Column(String(100))
|
|
||||||
latitude = Column(String(50))
|
|
||||||
longitude = Column(String(50))
|
|
||||||
|
|
||||||
# Performance metrics
|
|
||||||
value = Column(String(100)) # Generic value field (Rmax, Rpeak, etc.)
|
|
||||||
unit = Column(String(20))
|
|
||||||
|
|
||||||
# Additional metadata as JSON
|
# Additional metadata as JSON
|
||||||
extra_data = Column(
|
extra_data = Column(
|
||||||
"metadata", JSON, default={}
|
"metadata", JSON, default={}
|
||||||
@@ -44,11 +39,17 @@ class CollectedData(Base):
|
|||||||
|
|
||||||
# Status
|
# Status
|
||||||
is_valid = Column(Integer, default=1) # 1=valid, 0=invalid
|
is_valid = Column(Integer, default=1) # 1=valid, 0=invalid
|
||||||
|
is_current = Column(Boolean, default=True, index=True)
|
||||||
|
previous_record_id = Column(Integer, ForeignKey("collected_data.id"), nullable=True, index=True)
|
||||||
|
change_type = Column(String(20), nullable=True)
|
||||||
|
change_summary = Column(JSON, default={})
|
||||||
|
deleted_at = Column(DateTime(timezone=True), nullable=True)
|
||||||
|
|
||||||
# Indexes for common queries
|
# Indexes for common queries
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
Index("idx_collected_data_source_collected", "source", "collected_at"),
|
Index("idx_collected_data_source_collected", "source", "collected_at"),
|
||||||
Index("idx_collected_data_source_type", "source", "data_type"),
|
Index("idx_collected_data_source_type", "source", "data_type"),
|
||||||
|
Index("idx_collected_data_source_source_id", "source", "source_id"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
@@ -58,23 +59,27 @@ class CollectedData(Base):
|
|||||||
"""Convert to dictionary"""
|
"""Convert to dictionary"""
|
||||||
return {
|
return {
|
||||||
"id": self.id,
|
"id": self.id,
|
||||||
|
"snapshot_id": self.snapshot_id,
|
||||||
|
"task_id": self.task_id,
|
||||||
"source": self.source,
|
"source": self.source,
|
||||||
"source_id": self.source_id,
|
"source_id": self.source_id,
|
||||||
|
"entity_key": self.entity_key,
|
||||||
"data_type": self.data_type,
|
"data_type": self.data_type,
|
||||||
"name": self.name,
|
"name": self.name,
|
||||||
"title": self.title,
|
"title": self.title,
|
||||||
"description": self.description,
|
"description": self.description,
|
||||||
"country": self.country,
|
"country": get_record_field(self, "country"),
|
||||||
"city": self.city,
|
"city": get_record_field(self, "city"),
|
||||||
"latitude": self.latitude,
|
"latitude": get_record_field(self, "latitude"),
|
||||||
"longitude": self.longitude,
|
"longitude": get_record_field(self, "longitude"),
|
||||||
"value": self.value,
|
"value": get_record_field(self, "value"),
|
||||||
"unit": self.unit,
|
"unit": get_record_field(self, "unit"),
|
||||||
"metadata": self.extra_data,
|
"metadata": self.extra_data,
|
||||||
"collected_at": self.collected_at.isoformat()
|
"collected_at": to_iso8601_utc(self.collected_at),
|
||||||
if self.collected_at is not None
|
"reference_date": to_iso8601_utc(self.reference_date),
|
||||||
else None,
|
"is_current": self.is_current,
|
||||||
"reference_date": self.reference_date.isoformat()
|
"previous_record_id": self.previous_record_id,
|
||||||
if self.reference_date is not None
|
"change_type": self.change_type,
|
||||||
else None,
|
"change_summary": self.change_summary,
|
||||||
|
"deleted_at": to_iso8601_utc(self.deleted_at),
|
||||||
}
|
}
|
||||||
|
|||||||
26
backend/app/models/data_snapshot.py
Normal file
26
backend/app/models/data_snapshot.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, JSON, String
|
||||||
|
from sqlalchemy.sql import func
|
||||||
|
|
||||||
|
from app.db.session import Base
|
||||||
|
|
||||||
|
|
||||||
|
class DataSnapshot(Base):
|
||||||
|
__tablename__ = "data_snapshots"
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
datasource_id = Column(Integer, nullable=False, index=True)
|
||||||
|
task_id = Column(Integer, ForeignKey("collection_tasks.id"), nullable=True, index=True)
|
||||||
|
source = Column(String(100), nullable=False, index=True)
|
||||||
|
snapshot_key = Column(String(100), nullable=True, index=True)
|
||||||
|
reference_date = Column(DateTime(timezone=True), nullable=True)
|
||||||
|
started_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||||
|
record_count = Column(Integer, default=0)
|
||||||
|
status = Column(String(20), nullable=False, default="running")
|
||||||
|
is_current = Column(Boolean, default=True, index=True)
|
||||||
|
parent_snapshot_id = Column(Integer, ForeignKey("data_snapshots.id"), nullable=True, index=True)
|
||||||
|
summary = Column(JSON, default={})
|
||||||
|
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<DataSnapshot {self.id}: {self.source}/{self.status}>"
|
||||||
19
backend/app/models/system_setting.py
Normal file
19
backend/app/models/system_setting.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
"""Persistent system settings model."""
|
||||||
|
|
||||||
|
from sqlalchemy import JSON, Column, DateTime, Integer, String, UniqueConstraint
|
||||||
|
from sqlalchemy.sql import func
|
||||||
|
|
||||||
|
from app.db.session import Base
|
||||||
|
|
||||||
|
|
||||||
|
class SystemSetting(Base):
|
||||||
|
__tablename__ = "system_settings"
|
||||||
|
__table_args__ = (UniqueConstraint("category", name="uq_system_settings_category"),)
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
|
category = Column(String(50), nullable=False)
|
||||||
|
payload = Column(JSON, nullable=False, default={})
|
||||||
|
updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"<SystemSetting {self.category}>"
|
||||||
@@ -12,6 +12,7 @@ class CollectionTask(Base):
|
|||||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||||
datasource_id = Column(Integer, nullable=False, index=True)
|
datasource_id = Column(Integer, nullable=False, index=True)
|
||||||
status = Column(String(20), nullable=False) # pending, running, success, failed, cancelled
|
status = Column(String(20), nullable=False) # pending, running, success, failed, cancelled
|
||||||
|
phase = Column(String(30), default="queued")
|
||||||
started_at = Column(DateTime(timezone=True))
|
started_at = Column(DateTime(timezone=True))
|
||||||
completed_at = Column(DateTime(timezone=True))
|
completed_at = Column(DateTime(timezone=True))
|
||||||
records_processed = Column(Integer, default=0)
|
records_processed = Column(Integer, default=0)
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -28,6 +28,10 @@ from app.services.collectors.arcgis_cables import ArcGISCableCollector
|
|||||||
from app.services.collectors.fao_landing import FAOLandingPointCollector
|
from app.services.collectors.fao_landing import FAOLandingPointCollector
|
||||||
from app.services.collectors.arcgis_landing import ArcGISLandingPointCollector
|
from app.services.collectors.arcgis_landing import ArcGISLandingPointCollector
|
||||||
from app.services.collectors.arcgis_relation import ArcGISCableLandingRelationCollector
|
from app.services.collectors.arcgis_relation import ArcGISCableLandingRelationCollector
|
||||||
|
from app.services.collectors.spacetrack import SpaceTrackTLECollector
|
||||||
|
from app.services.collectors.celestrak import CelesTrakTLECollector
|
||||||
|
from app.services.collectors.ris_live import RISLiveCollector
|
||||||
|
from app.services.collectors.bgpstream import BGPStreamBackfillCollector
|
||||||
|
|
||||||
collector_registry.register(TOP500Collector())
|
collector_registry.register(TOP500Collector())
|
||||||
collector_registry.register(EpochAIGPUCollector())
|
collector_registry.register(EpochAIGPUCollector())
|
||||||
@@ -47,3 +51,7 @@ collector_registry.register(ArcGISCableCollector())
|
|||||||
collector_registry.register(FAOLandingPointCollector())
|
collector_registry.register(FAOLandingPointCollector())
|
||||||
collector_registry.register(ArcGISLandingPointCollector())
|
collector_registry.register(ArcGISLandingPointCollector())
|
||||||
collector_registry.register(ArcGISCableLandingRelationCollector())
|
collector_registry.register(ArcGISCableLandingRelationCollector())
|
||||||
|
collector_registry.register(SpaceTrackTLECollector())
|
||||||
|
collector_registry.register(CelesTrakTLECollector())
|
||||||
|
collector_registry.register(RISLiveCollector())
|
||||||
|
collector_registry.register(BGPStreamBackfillCollector())
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -5,10 +5,12 @@ Collects submarine cable data from ArcGIS GeoJSON API.
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from app.services.collectors.base import BaseCollector
|
from app.services.collectors.base import BaseCollector
|
||||||
|
from app.core.data_sources import get_data_sources_config
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ArcGISCableCollector(BaseCollector):
|
class ArcGISCableCollector(BaseCollector):
|
||||||
@@ -18,7 +20,14 @@ class ArcGISCableCollector(BaseCollector):
|
|||||||
frequency_hours = 168
|
frequency_hours = 168
|
||||||
data_type = "submarine_cable"
|
data_type = "submarine_cable"
|
||||||
|
|
||||||
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/2/query"
|
@property
|
||||||
|
def base_url(self) -> str:
|
||||||
|
if self._resolved_url:
|
||||||
|
return self._resolved_url
|
||||||
|
from app.core.data_sources import get_data_sources_config
|
||||||
|
|
||||||
|
config = get_data_sources_config()
|
||||||
|
return config.get_yaml_url("arcgis_cables")
|
||||||
|
|
||||||
async def fetch(self) -> List[Dict[str, Any]]:
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
||||||
@@ -75,7 +84,7 @@ class ArcGISCableCollector(BaseCollector):
|
|||||||
"color": props.get("color"),
|
"color": props.get("color"),
|
||||||
"route_coordinates": route_coordinates,
|
"route_coordinates": route_coordinates,
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
result.append(entry)
|
result.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
|
|||||||
@@ -1,12 +1,10 @@
|
|||||||
"""ArcGIS Landing Points Collector
|
|
||||||
|
|
||||||
Collects landing point data from ArcGIS GeoJSON API.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
|
import httpx
|
||||||
|
|
||||||
from app.services.collectors.base import BaseCollector
|
from app.services.collectors.base import BaseCollector
|
||||||
|
from app.core.data_sources import get_data_sources_config
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ArcGISLandingPointCollector(BaseCollector):
|
class ArcGISLandingPointCollector(BaseCollector):
|
||||||
@@ -16,21 +14,23 @@ class ArcGISLandingPointCollector(BaseCollector):
|
|||||||
frequency_hours = 168
|
frequency_hours = 168
|
||||||
data_type = "landing_point"
|
data_type = "landing_point"
|
||||||
|
|
||||||
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/1/query"
|
@property
|
||||||
|
def base_url(self) -> str:
|
||||||
|
if self._resolved_url:
|
||||||
|
return self._resolved_url
|
||||||
|
from app.core.data_sources import get_data_sources_config
|
||||||
|
|
||||||
|
config = get_data_sources_config()
|
||||||
|
return config.get_yaml_url("arcgis_landing_points")
|
||||||
|
|
||||||
async def fetch(self) -> List[Dict[str, Any]]:
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
||||||
|
|
||||||
async with self._get_client() as client:
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
response = await client.get(self.base_url, params=params)
|
response = await client.get(self.base_url, params=params)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return self.parse_response(response.json())
|
return self.parse_response(response.json())
|
||||||
|
|
||||||
def _get_client(self):
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
return httpx.AsyncClient(timeout=60.0)
|
|
||||||
|
|
||||||
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
@@ -39,6 +39,11 @@ class ArcGISLandingPointCollector(BaseCollector):
|
|||||||
props = feature.get("properties", {})
|
props = feature.get("properties", {})
|
||||||
geometry = feature.get("geometry", {})
|
geometry = feature.get("geometry", {})
|
||||||
|
|
||||||
|
if geometry.get("type") == "Point":
|
||||||
|
coords = geometry.get("coordinates", [])
|
||||||
|
lon = coords[0] if len(coords) > 0 else None
|
||||||
|
lat = coords[1] if len(coords) > 1 else None
|
||||||
|
else:
|
||||||
lat = geometry.get("y") if geometry else None
|
lat = geometry.get("y") if geometry else None
|
||||||
lon = geometry.get("x") if geometry else None
|
lon = geometry.get("x") if geometry else None
|
||||||
|
|
||||||
@@ -54,6 +59,7 @@ class ArcGISLandingPointCollector(BaseCollector):
|
|||||||
"unit": "",
|
"unit": "",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"objectid": props.get("OBJECTID"),
|
"objectid": props.get("OBJECTID"),
|
||||||
|
"city_id": props.get("city_id"),
|
||||||
"cable_id": props.get("cable_id"),
|
"cable_id": props.get("cable_id"),
|
||||||
"cable_name": props.get("cable_name"),
|
"cable_name": props.get("cable_name"),
|
||||||
"facility": props.get("facility"),
|
"facility": props.get("facility"),
|
||||||
@@ -61,7 +67,7 @@ class ArcGISLandingPointCollector(BaseCollector):
|
|||||||
"status": props.get("status"),
|
"status": props.get("status"),
|
||||||
"landing_point_id": props.get("landing_point_id"),
|
"landing_point_id": props.get("landing_point_id"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
result.append(entry)
|
result.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
from typing import Dict, Any, List
|
import asyncio
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.core.data_sources import get_data_sources_config
|
||||||
from app.services.collectors.base import BaseCollector
|
from app.services.collectors.base import BaseCollector
|
||||||
|
|
||||||
|
|
||||||
@@ -11,45 +15,135 @@ class ArcGISCableLandingRelationCollector(BaseCollector):
|
|||||||
frequency_hours = 168
|
frequency_hours = 168
|
||||||
data_type = "cable_landing_relation"
|
data_type = "cable_landing_relation"
|
||||||
|
|
||||||
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/3/query"
|
@property
|
||||||
|
def base_url(self) -> str:
|
||||||
|
if self._resolved_url:
|
||||||
|
return self._resolved_url
|
||||||
|
config = get_data_sources_config()
|
||||||
|
return config.get_yaml_url("arcgis_cable_landing_relation")
|
||||||
|
|
||||||
|
def _layer_url(self, layer_id: int) -> str:
|
||||||
|
if "/FeatureServer/" not in self.base_url:
|
||||||
|
return self.base_url
|
||||||
|
prefix = self.base_url.split("/FeatureServer/")[0]
|
||||||
|
return f"{prefix}/FeatureServer/{layer_id}/query"
|
||||||
|
|
||||||
|
async def _fetch_layer_attributes(
|
||||||
|
self, client: httpx.AsyncClient, layer_id: int
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
response = await client.get(
|
||||||
|
self._layer_url(layer_id),
|
||||||
|
params={
|
||||||
|
"where": "1=1",
|
||||||
|
"outFields": "*",
|
||||||
|
"returnGeometry": "false",
|
||||||
|
"f": "json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return [feature.get("attributes", {}) for feature in data.get("features", [])]
|
||||||
|
|
||||||
|
async def _fetch_relation_features(self, client: httpx.AsyncClient) -> List[Dict[str, Any]]:
|
||||||
|
response = await client.get(
|
||||||
|
self.base_url,
|
||||||
|
params={
|
||||||
|
"where": "1=1",
|
||||||
|
"outFields": "*",
|
||||||
|
"returnGeometry": "true",
|
||||||
|
"f": "geojson",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return data.get("features", [])
|
||||||
|
|
||||||
async def fetch(self) -> List[Dict[str, Any]]:
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
import httpx
|
|
||||||
|
|
||||||
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||||
response = await client.get(self.base_url, params=params)
|
relation_features, landing_rows, cable_rows = await asyncio.gather(
|
||||||
response.raise_for_status()
|
self._fetch_relation_features(client),
|
||||||
return self.parse_response(response.json())
|
self._fetch_layer_attributes(client, 1),
|
||||||
|
self._fetch_layer_attributes(client, 2),
|
||||||
|
)
|
||||||
|
return self.parse_response(relation_features, landing_rows, cable_rows)
|
||||||
|
|
||||||
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
def _build_landing_lookup(self, landing_rows: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
|
||||||
result = []
|
lookup: Dict[int, Dict[str, Any]] = {}
|
||||||
|
for row in landing_rows:
|
||||||
|
city_id = row.get("city_id")
|
||||||
|
if city_id is None:
|
||||||
|
continue
|
||||||
|
lookup[int(city_id)] = {
|
||||||
|
"landing_point_id": row.get("landing_point_id") or city_id,
|
||||||
|
"landing_point_name": row.get("Name") or row.get("name") or "",
|
||||||
|
"facility": row.get("facility") or "",
|
||||||
|
"status": row.get("status") or "",
|
||||||
|
"country": row.get("country") or "",
|
||||||
|
}
|
||||||
|
return lookup
|
||||||
|
|
||||||
features = data.get("features", [])
|
def _build_cable_lookup(self, cable_rows: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
|
||||||
for feature in features:
|
lookup: Dict[int, Dict[str, Any]] = {}
|
||||||
|
for row in cable_rows:
|
||||||
|
cable_id = row.get("cable_id")
|
||||||
|
if cable_id is None:
|
||||||
|
continue
|
||||||
|
lookup[int(cable_id)] = {
|
||||||
|
"cable_name": row.get("Name") or "",
|
||||||
|
"status": row.get("status") or "active",
|
||||||
|
}
|
||||||
|
return lookup
|
||||||
|
|
||||||
|
def parse_response(
|
||||||
|
self,
|
||||||
|
relation_features: List[Dict[str, Any]],
|
||||||
|
landing_rows: List[Dict[str, Any]],
|
||||||
|
cable_rows: List[Dict[str, Any]],
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
result: List[Dict[str, Any]] = []
|
||||||
|
landing_lookup = self._build_landing_lookup(landing_rows)
|
||||||
|
cable_lookup = self._build_cable_lookup(cable_rows)
|
||||||
|
|
||||||
|
for feature in relation_features:
|
||||||
props = feature.get("properties", {})
|
props = feature.get("properties", {})
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
city_id = props.get("city_id")
|
||||||
|
cable_id = props.get("cable_id")
|
||||||
|
landing_info = landing_lookup.get(int(city_id), {}) if city_id is not None else {}
|
||||||
|
cable_info = cable_lookup.get(int(cable_id), {}) if cable_id is not None else {}
|
||||||
|
|
||||||
|
cable_name = cable_info.get("cable_name") or props.get("cable_name") or "Unknown"
|
||||||
|
landing_point_name = (
|
||||||
|
landing_info.get("landing_point_name")
|
||||||
|
or props.get("landing_point_name")
|
||||||
|
or "Unknown"
|
||||||
|
)
|
||||||
|
facility = landing_info.get("facility") or props.get("facility") or "-"
|
||||||
|
status = cable_info.get("status") or landing_info.get("status") or props.get("status") or "-"
|
||||||
|
country = landing_info.get("country") or props.get("country") or ""
|
||||||
|
landing_point_id = landing_info.get("landing_point_id") or props.get("landing_point_id") or city_id
|
||||||
|
|
||||||
entry = {
|
entry = {
|
||||||
"source_id": f"arcgis_relation_{props.get('OBJECTID', props.get('id', ''))}",
|
"source_id": f"arcgis_relation_{props.get('OBJECTID', props.get('id', ''))}",
|
||||||
"name": f"{props.get('cable_name', 'Unknown')} - {props.get('landing_point_name', 'Unknown')}",
|
"name": f"{cable_name} - {landing_point_name}",
|
||||||
"country": props.get("country", ""),
|
"country": country,
|
||||||
"city": props.get("landing_point_name", ""),
|
"city": landing_point_name,
|
||||||
"latitude": str(props.get("latitude", "")) if props.get("latitude") else "",
|
"latitude": str(props.get("latitude", "")) if props.get("latitude") else "",
|
||||||
"longitude": str(props.get("longitude", "")) if props.get("longitude") else "",
|
"longitude": str(props.get("longitude", "")) if props.get("longitude") else "",
|
||||||
"value": "",
|
"value": "",
|
||||||
"unit": "",
|
"unit": "",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"objectid": props.get("OBJECTID"),
|
"objectid": props.get("OBJECTID"),
|
||||||
"cable_id": props.get("cable_id"),
|
"city_id": city_id,
|
||||||
"cable_name": props.get("cable_name"),
|
"cable_id": cable_id,
|
||||||
"landing_point_id": props.get("landing_point_id"),
|
"cable_name": cable_name,
|
||||||
"landing_point_name": props.get("landing_point_name"),
|
"landing_point_id": landing_point_id,
|
||||||
"facility": props.get("facility"),
|
"landing_point_name": landing_point_name,
|
||||||
"status": props.get("status"),
|
"facility": facility,
|
||||||
|
"status": status,
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
result.append(entry)
|
result.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
|
|||||||
@@ -2,12 +2,16 @@
|
|||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Dict, List, Any, Optional
|
from typing import Dict, List, Any, Optional
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
import httpx
|
import httpx
|
||||||
from sqlalchemy import text
|
from sqlalchemy import select, text
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.core.collected_data_fields import build_dynamic_metadata, get_record_field
|
||||||
from app.core.config import settings
|
from app.core.config import settings
|
||||||
|
from app.core.countries import normalize_country
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
|
from app.core.websocket.broadcaster import broadcaster
|
||||||
|
|
||||||
|
|
||||||
class BaseCollector(ABC):
|
class BaseCollector(ABC):
|
||||||
@@ -18,19 +22,68 @@ class BaseCollector(ABC):
|
|||||||
module: str = "L1"
|
module: str = "L1"
|
||||||
frequency_hours: int = 4
|
frequency_hours: int = 4
|
||||||
data_type: str = "generic"
|
data_type: str = "generic"
|
||||||
|
fail_on_empty: bool = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._current_task = None
|
self._current_task = None
|
||||||
self._db_session = None
|
self._db_session = None
|
||||||
self._datasource_id = 1
|
self._datasource_id = 1
|
||||||
|
self._resolved_url: Optional[str] = None
|
||||||
|
self._last_broadcast_progress: Optional[int] = None
|
||||||
|
|
||||||
def update_progress(self, records_processed: int):
|
async def resolve_url(self, db: AsyncSession) -> None:
|
||||||
|
from app.core.data_sources import get_data_sources_config
|
||||||
|
|
||||||
|
config = get_data_sources_config()
|
||||||
|
self._resolved_url = await config.get_url(self.name, db)
|
||||||
|
|
||||||
|
async def _publish_task_update(self, force: bool = False):
|
||||||
|
if not self._current_task:
|
||||||
|
return
|
||||||
|
|
||||||
|
progress = float(self._current_task.progress or 0.0)
|
||||||
|
rounded_progress = int(round(progress))
|
||||||
|
if not force and self._last_broadcast_progress == rounded_progress:
|
||||||
|
return
|
||||||
|
|
||||||
|
await broadcaster.broadcast_datasource_task_update(
|
||||||
|
{
|
||||||
|
"datasource_id": getattr(self, "_datasource_id", None),
|
||||||
|
"collector_name": self.name,
|
||||||
|
"task_id": self._current_task.id,
|
||||||
|
"status": self._current_task.status,
|
||||||
|
"phase": self._current_task.phase,
|
||||||
|
"progress": progress,
|
||||||
|
"records_processed": self._current_task.records_processed,
|
||||||
|
"total_records": self._current_task.total_records,
|
||||||
|
"started_at": to_iso8601_utc(self._current_task.started_at),
|
||||||
|
"completed_at": to_iso8601_utc(self._current_task.completed_at),
|
||||||
|
"error_message": self._current_task.error_message,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self._last_broadcast_progress = rounded_progress
|
||||||
|
|
||||||
|
async def update_progress(self, records_processed: int, *, commit: bool = False, force: bool = False):
|
||||||
"""Update task progress - call this during data processing"""
|
"""Update task progress - call this during data processing"""
|
||||||
if self._current_task and self._db_session and self._current_task.total_records > 0:
|
if self._current_task and self._db_session:
|
||||||
self._current_task.records_processed = records_processed
|
self._current_task.records_processed = records_processed
|
||||||
|
if self._current_task.total_records and self._current_task.total_records > 0:
|
||||||
self._current_task.progress = (
|
self._current_task.progress = (
|
||||||
records_processed / self._current_task.total_records
|
records_processed / self._current_task.total_records
|
||||||
) * 100
|
) * 100
|
||||||
|
else:
|
||||||
|
self._current_task.progress = 0.0
|
||||||
|
|
||||||
|
if commit:
|
||||||
|
await self._db_session.commit()
|
||||||
|
|
||||||
|
await self._publish_task_update(force=force)
|
||||||
|
|
||||||
|
async def set_phase(self, phase: str):
|
||||||
|
if self._current_task and self._db_session:
|
||||||
|
self._current_task.phase = phase
|
||||||
|
await self._db_session.commit()
|
||||||
|
await self._publish_task_update(force=True)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def fetch(self) -> List[Dict[str, Any]]:
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
@@ -41,14 +94,87 @@ class BaseCollector(ABC):
|
|||||||
"""Transform raw data to internal format (default: pass through)"""
|
"""Transform raw data to internal format (default: pass through)"""
|
||||||
return raw_data
|
return raw_data
|
||||||
|
|
||||||
|
def _parse_reference_date(self, value: Any) -> Optional[datetime]:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value
|
||||||
|
if isinstance(value, str):
|
||||||
|
return datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _build_comparable_payload(self, record: Any) -> Dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"name": getattr(record, "name", None),
|
||||||
|
"title": getattr(record, "title", None),
|
||||||
|
"description": getattr(record, "description", None),
|
||||||
|
"country": get_record_field(record, "country"),
|
||||||
|
"city": get_record_field(record, "city"),
|
||||||
|
"latitude": get_record_field(record, "latitude"),
|
||||||
|
"longitude": get_record_field(record, "longitude"),
|
||||||
|
"value": get_record_field(record, "value"),
|
||||||
|
"unit": get_record_field(record, "unit"),
|
||||||
|
"metadata": getattr(record, "extra_data", None) or {},
|
||||||
|
"reference_date": (
|
||||||
|
getattr(record, "reference_date", None).isoformat()
|
||||||
|
if getattr(record, "reference_date", None)
|
||||||
|
else None
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _create_snapshot(
|
||||||
|
self,
|
||||||
|
db: AsyncSession,
|
||||||
|
task_id: int,
|
||||||
|
data: List[Dict[str, Any]],
|
||||||
|
started_at: datetime,
|
||||||
|
) -> int:
|
||||||
|
from app.models.data_snapshot import DataSnapshot
|
||||||
|
|
||||||
|
reference_dates = [
|
||||||
|
parsed
|
||||||
|
for parsed in (self._parse_reference_date(item.get("reference_date")) for item in data)
|
||||||
|
if parsed is not None
|
||||||
|
]
|
||||||
|
reference_date = max(reference_dates) if reference_dates else None
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
select(DataSnapshot)
|
||||||
|
.where(DataSnapshot.source == self.name, DataSnapshot.is_current == True)
|
||||||
|
.order_by(DataSnapshot.completed_at.desc().nullslast(), DataSnapshot.id.desc())
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
previous_snapshot = result.scalar_one_or_none()
|
||||||
|
|
||||||
|
snapshot = DataSnapshot(
|
||||||
|
datasource_id=getattr(self, "_datasource_id", 1),
|
||||||
|
task_id=task_id,
|
||||||
|
source=self.name,
|
||||||
|
snapshot_key=f"{self.name}:{task_id}",
|
||||||
|
reference_date=reference_date,
|
||||||
|
started_at=started_at,
|
||||||
|
status="running",
|
||||||
|
is_current=True,
|
||||||
|
parent_snapshot_id=previous_snapshot.id if previous_snapshot else None,
|
||||||
|
summary={},
|
||||||
|
)
|
||||||
|
db.add(snapshot)
|
||||||
|
|
||||||
|
if previous_snapshot:
|
||||||
|
previous_snapshot.is_current = False
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
return snapshot.id
|
||||||
|
|
||||||
async def run(self, db: AsyncSession) -> Dict[str, Any]:
|
async def run(self, db: AsyncSession) -> Dict[str, Any]:
|
||||||
"""Full pipeline: fetch -> transform -> save"""
|
"""Full pipeline: fetch -> transform -> save"""
|
||||||
from app.services.collectors.registry import collector_registry
|
from app.services.collectors.registry import collector_registry
|
||||||
from app.models.task import CollectionTask
|
from app.models.task import CollectionTask
|
||||||
from app.models.collected_data import CollectedData
|
from app.models.data_snapshot import DataSnapshot
|
||||||
|
|
||||||
start_time = datetime.utcnow()
|
start_time = datetime.now(UTC)
|
||||||
datasource_id = getattr(self, "_datasource_id", 1)
|
datasource_id = getattr(self, "_datasource_id", 1)
|
||||||
|
snapshot_id: Optional[int] = None
|
||||||
|
|
||||||
if not collector_registry.is_active(self.name):
|
if not collector_registry.is_active(self.name):
|
||||||
return {"status": "skipped", "reason": "Collector is disabled"}
|
return {"status": "skipped", "reason": "Collector is disabled"}
|
||||||
@@ -56,6 +182,7 @@ class BaseCollector(ABC):
|
|||||||
task = CollectionTask(
|
task = CollectionTask(
|
||||||
datasource_id=datasource_id,
|
datasource_id=datasource_id,
|
||||||
status="running",
|
status="running",
|
||||||
|
phase="queued",
|
||||||
started_at=start_time,
|
started_at=start_time,
|
||||||
)
|
)
|
||||||
db.add(task)
|
db.add(task)
|
||||||
@@ -64,88 +191,221 @@ class BaseCollector(ABC):
|
|||||||
|
|
||||||
self._current_task = task
|
self._current_task = task
|
||||||
self._db_session = db
|
self._db_session = db
|
||||||
|
self._last_broadcast_progress = None
|
||||||
|
|
||||||
|
await self.resolve_url(db)
|
||||||
|
await self._publish_task_update(force=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
await self.set_phase("fetching")
|
||||||
raw_data = await self.fetch()
|
raw_data = await self.fetch()
|
||||||
task.total_records = len(raw_data)
|
task.total_records = len(raw_data)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
await self._publish_task_update(force=True)
|
||||||
|
|
||||||
|
if self.fail_on_empty and not raw_data:
|
||||||
|
raise RuntimeError(f"Collector {self.name} returned no data")
|
||||||
|
|
||||||
|
await self.set_phase("transforming")
|
||||||
data = self.transform(raw_data)
|
data = self.transform(raw_data)
|
||||||
|
snapshot_id = await self._create_snapshot(db, task_id, data, start_time)
|
||||||
|
|
||||||
records_count = await self._save_data(db, data)
|
await self.set_phase("saving")
|
||||||
|
records_count = await self._save_data(db, data, task_id=task_id, snapshot_id=snapshot_id)
|
||||||
|
|
||||||
task.status = "success"
|
task.status = "success"
|
||||||
|
task.phase = "completed"
|
||||||
task.records_processed = records_count
|
task.records_processed = records_count
|
||||||
task.progress = 100.0
|
task.progress = 100.0
|
||||||
task.completed_at = datetime.utcnow()
|
task.completed_at = datetime.now(UTC)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
await self._publish_task_update(force=True)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "success",
|
"status": "success",
|
||||||
"task_id": task_id,
|
"task_id": task_id,
|
||||||
"records_processed": records_count,
|
"records_processed": records_count,
|
||||||
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(),
|
"execution_time_seconds": (datetime.now(UTC) - start_time).total_seconds(),
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Log task failure
|
|
||||||
task.status = "failed"
|
task.status = "failed"
|
||||||
|
task.phase = "failed"
|
||||||
task.error_message = str(e)
|
task.error_message = str(e)
|
||||||
task.completed_at = datetime.utcnow()
|
task.completed_at = datetime.now(UTC)
|
||||||
|
if snapshot_id is not None:
|
||||||
|
snapshot = await db.get(DataSnapshot, snapshot_id)
|
||||||
|
if snapshot:
|
||||||
|
snapshot.status = "failed"
|
||||||
|
snapshot.completed_at = datetime.now(UTC)
|
||||||
|
snapshot.summary = {"error": str(e)}
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
await self._publish_task_update(force=True)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"task_id": task_id,
|
"task_id": task_id,
|
||||||
"error": str(e),
|
"error": str(e),
|
||||||
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(),
|
"execution_time_seconds": (datetime.now(UTC) - start_time).total_seconds(),
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _save_data(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:
|
async def _save_data(
|
||||||
|
self,
|
||||||
|
db: AsyncSession,
|
||||||
|
data: List[Dict[str, Any]],
|
||||||
|
task_id: Optional[int] = None,
|
||||||
|
snapshot_id: Optional[int] = None,
|
||||||
|
) -> int:
|
||||||
"""Save transformed data to database"""
|
"""Save transformed data to database"""
|
||||||
from app.models.collected_data import CollectedData
|
from app.models.collected_data import CollectedData
|
||||||
|
from app.models.data_snapshot import DataSnapshot
|
||||||
|
|
||||||
if not data:
|
if not data:
|
||||||
|
if snapshot_id is not None:
|
||||||
|
snapshot = await db.get(DataSnapshot, snapshot_id)
|
||||||
|
if snapshot:
|
||||||
|
snapshot.record_count = 0
|
||||||
|
snapshot.summary = {"created": 0, "updated": 0, "unchanged": 0}
|
||||||
|
snapshot.status = "success"
|
||||||
|
snapshot.completed_at = datetime.now(UTC)
|
||||||
|
await db.commit()
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
collected_at = datetime.utcnow()
|
collected_at = datetime.now(UTC)
|
||||||
records_added = 0
|
records_added = 0
|
||||||
|
created_count = 0
|
||||||
|
updated_count = 0
|
||||||
|
unchanged_count = 0
|
||||||
|
seen_entity_keys: set[str] = set()
|
||||||
|
previous_current_keys: set[str] = set()
|
||||||
|
|
||||||
|
previous_current_result = await db.execute(
|
||||||
|
select(CollectedData.entity_key).where(
|
||||||
|
CollectedData.source == self.name,
|
||||||
|
CollectedData.is_current == True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
previous_current_keys = {row[0] for row in previous_current_result.fetchall() if row[0]}
|
||||||
|
|
||||||
for i, item in enumerate(data):
|
for i, item in enumerate(data):
|
||||||
|
print(
|
||||||
|
f"DEBUG: Saving item {i}: name={item.get('name')}, metadata={item.get('metadata', 'NOT FOUND')}"
|
||||||
|
)
|
||||||
|
raw_metadata = item.get("metadata", {})
|
||||||
|
extra_data = build_dynamic_metadata(
|
||||||
|
raw_metadata,
|
||||||
|
country=item.get("country"),
|
||||||
|
city=item.get("city"),
|
||||||
|
latitude=item.get("latitude"),
|
||||||
|
longitude=item.get("longitude"),
|
||||||
|
value=item.get("value"),
|
||||||
|
unit=item.get("unit"),
|
||||||
|
)
|
||||||
|
normalized_country = normalize_country(item.get("country"))
|
||||||
|
if normalized_country is not None:
|
||||||
|
extra_data["country"] = normalized_country
|
||||||
|
|
||||||
|
if item.get("country") and normalized_country != item.get("country"):
|
||||||
|
extra_data["raw_country"] = item.get("country")
|
||||||
|
if normalized_country is None:
|
||||||
|
extra_data["country_validation"] = "invalid"
|
||||||
|
|
||||||
|
source_id = item.get("source_id") or item.get("id")
|
||||||
|
reference_date = (
|
||||||
|
self._parse_reference_date(item.get("reference_date"))
|
||||||
|
)
|
||||||
|
source_id_str = str(source_id) if source_id is not None else None
|
||||||
|
entity_key = f"{self.name}:{source_id_str}" if source_id_str else f"{self.name}:{i}"
|
||||||
|
previous_record = None
|
||||||
|
|
||||||
|
if entity_key and entity_key not in seen_entity_keys:
|
||||||
|
result = await db.execute(
|
||||||
|
select(CollectedData)
|
||||||
|
.where(
|
||||||
|
CollectedData.source == self.name,
|
||||||
|
CollectedData.entity_key == entity_key,
|
||||||
|
CollectedData.is_current == True,
|
||||||
|
)
|
||||||
|
.order_by(CollectedData.collected_at.desc().nullslast(), CollectedData.id.desc())
|
||||||
|
)
|
||||||
|
previous_records = result.scalars().all()
|
||||||
|
if previous_records:
|
||||||
|
previous_record = previous_records[0]
|
||||||
|
for old_record in previous_records:
|
||||||
|
old_record.is_current = False
|
||||||
|
|
||||||
record = CollectedData(
|
record = CollectedData(
|
||||||
|
snapshot_id=snapshot_id,
|
||||||
|
task_id=task_id,
|
||||||
source=self.name,
|
source=self.name,
|
||||||
source_id=item.get("source_id") or item.get("id"),
|
source_id=source_id_str,
|
||||||
|
entity_key=entity_key,
|
||||||
data_type=self.data_type,
|
data_type=self.data_type,
|
||||||
name=item.get("name"),
|
name=item.get("name"),
|
||||||
title=item.get("title"),
|
title=item.get("title"),
|
||||||
description=item.get("description"),
|
description=item.get("description"),
|
||||||
country=item.get("country"),
|
extra_data=extra_data,
|
||||||
city=item.get("city"),
|
|
||||||
latitude=str(item.get("latitude", ""))
|
|
||||||
if item.get("latitude") is not None
|
|
||||||
else None,
|
|
||||||
longitude=str(item.get("longitude", ""))
|
|
||||||
if item.get("longitude") is not None
|
|
||||||
else None,
|
|
||||||
value=item.get("value"),
|
|
||||||
unit=item.get("unit"),
|
|
||||||
extra_data=item.get("metadata", {}),
|
|
||||||
collected_at=collected_at,
|
collected_at=collected_at,
|
||||||
reference_date=datetime.fromisoformat(
|
reference_date=reference_date,
|
||||||
item.get("reference_date").replace("Z", "+00:00")
|
|
||||||
)
|
|
||||||
if item.get("reference_date")
|
|
||||||
else None,
|
|
||||||
is_valid=1,
|
is_valid=1,
|
||||||
|
is_current=True,
|
||||||
|
previous_record_id=previous_record.id if previous_record else None,
|
||||||
|
deleted_at=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if previous_record is None:
|
||||||
|
record.change_type = "created"
|
||||||
|
record.change_summary = {}
|
||||||
|
created_count += 1
|
||||||
|
else:
|
||||||
|
previous_payload = self._build_comparable_payload(previous_record)
|
||||||
|
current_payload = self._build_comparable_payload(record)
|
||||||
|
if current_payload == previous_payload:
|
||||||
|
record.change_type = "unchanged"
|
||||||
|
record.change_summary = {}
|
||||||
|
unchanged_count += 1
|
||||||
|
else:
|
||||||
|
changed_fields = [
|
||||||
|
key for key in current_payload.keys() if current_payload[key] != previous_payload.get(key)
|
||||||
|
]
|
||||||
|
record.change_type = "updated"
|
||||||
|
record.change_summary = {"changed_fields": changed_fields}
|
||||||
|
updated_count += 1
|
||||||
|
|
||||||
db.add(record)
|
db.add(record)
|
||||||
|
seen_entity_keys.add(entity_key)
|
||||||
records_added += 1
|
records_added += 1
|
||||||
|
|
||||||
if i % 100 == 0:
|
if i % 100 == 0:
|
||||||
self.update_progress(i + 1)
|
await self.update_progress(i + 1, commit=True)
|
||||||
await db.commit()
|
|
||||||
|
if snapshot_id is not None:
|
||||||
|
deleted_keys = previous_current_keys - seen_entity_keys
|
||||||
|
await db.execute(
|
||||||
|
text(
|
||||||
|
"""
|
||||||
|
UPDATE collected_data
|
||||||
|
SET is_current = FALSE
|
||||||
|
WHERE source = :source
|
||||||
|
AND snapshot_id IS DISTINCT FROM :snapshot_id
|
||||||
|
AND COALESCE(is_current, TRUE) = TRUE
|
||||||
|
"""
|
||||||
|
),
|
||||||
|
{"source": self.name, "snapshot_id": snapshot_id},
|
||||||
|
)
|
||||||
|
snapshot = await db.get(DataSnapshot, snapshot_id)
|
||||||
|
if snapshot:
|
||||||
|
snapshot.record_count = records_added
|
||||||
|
snapshot.status = "success"
|
||||||
|
snapshot.completed_at = datetime.now(UTC)
|
||||||
|
snapshot.summary = {
|
||||||
|
"created": created_count,
|
||||||
|
"updated": updated_count,
|
||||||
|
"unchanged": unchanged_count,
|
||||||
|
"deleted": len(deleted_keys),
|
||||||
|
}
|
||||||
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
self.update_progress(len(data))
|
await self.update_progress(len(data), force=True)
|
||||||
return records_added
|
return records_added
|
||||||
|
|
||||||
async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:
|
async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:
|
||||||
@@ -192,8 +452,8 @@ async def log_task(
|
|||||||
status=status,
|
status=status,
|
||||||
records_processed=records_processed,
|
records_processed=records_processed,
|
||||||
error_message=error_message,
|
error_message=error_message,
|
||||||
started_at=datetime.utcnow(),
|
started_at=datetime.now(UTC),
|
||||||
completed_at=datetime.utcnow(),
|
completed_at=datetime.now(UTC),
|
||||||
)
|
)
|
||||||
db.add(task)
|
db.add(task)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|||||||
313
backend/app/services/collectors/bgp_common.py
Normal file
313
backend/app/services/collectors/bgp_common.py
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
"""Shared helpers for BGP collectors."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import ipaddress
|
||||||
|
from collections import Counter, defaultdict
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.models.bgp_anomaly import BGPAnomaly
|
||||||
|
from app.models.collected_data import CollectedData
|
||||||
|
|
||||||
|
|
||||||
|
RIPE_RIS_COLLECTOR_COORDS: dict[str, dict[str, Any]] = {
|
||||||
|
"rrc00": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041},
|
||||||
|
"rrc01": {"city": "London", "country": "United Kingdom", "latitude": 51.5072, "longitude": -0.1276},
|
||||||
|
"rrc03": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041},
|
||||||
|
"rrc04": {"city": "Geneva", "country": "Switzerland", "latitude": 46.2044, "longitude": 6.1432},
|
||||||
|
"rrc05": {"city": "Vienna", "country": "Austria", "latitude": 48.2082, "longitude": 16.3738},
|
||||||
|
"rrc06": {"city": "Otemachi", "country": "Japan", "latitude": 35.686, "longitude": 139.7671},
|
||||||
|
"rrc07": {"city": "Stockholm", "country": "Sweden", "latitude": 59.3293, "longitude": 18.0686},
|
||||||
|
"rrc10": {"city": "Milan", "country": "Italy", "latitude": 45.4642, "longitude": 9.19},
|
||||||
|
"rrc11": {"city": "New York", "country": "United States", "latitude": 40.7128, "longitude": -74.006},
|
||||||
|
"rrc12": {"city": "Frankfurt", "country": "Germany", "latitude": 50.1109, "longitude": 8.6821},
|
||||||
|
"rrc13": {"city": "Moscow", "country": "Russia", "latitude": 55.7558, "longitude": 37.6173},
|
||||||
|
"rrc14": {"city": "Palo Alto", "country": "United States", "latitude": 37.4419, "longitude": -122.143},
|
||||||
|
"rrc15": {"city": "Sao Paulo", "country": "Brazil", "latitude": -23.5558, "longitude": -46.6396},
|
||||||
|
"rrc16": {"city": "Miami", "country": "United States", "latitude": 25.7617, "longitude": -80.1918},
|
||||||
|
"rrc18": {"city": "Barcelona", "country": "Spain", "latitude": 41.3874, "longitude": 2.1686},
|
||||||
|
"rrc19": {"city": "Johannesburg", "country": "South Africa", "latitude": -26.2041, "longitude": 28.0473},
|
||||||
|
"rrc20": {"city": "Zurich", "country": "Switzerland", "latitude": 47.3769, "longitude": 8.5417},
|
||||||
|
"rrc21": {"city": "Paris", "country": "France", "latitude": 48.8566, "longitude": 2.3522},
|
||||||
|
"rrc22": {"city": "Bucharest", "country": "Romania", "latitude": 44.4268, "longitude": 26.1025},
|
||||||
|
"rrc23": {"city": "Singapore", "country": "Singapore", "latitude": 1.3521, "longitude": 103.8198},
|
||||||
|
"rrc24": {"city": "Montevideo", "country": "Uruguay", "latitude": -34.9011, "longitude": -56.1645},
|
||||||
|
"rrc25": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041},
|
||||||
|
"rrc26": {"city": "Dubai", "country": "United Arab Emirates", "latitude": 25.2048, "longitude": 55.2708},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_int(value: Any) -> int | None:
|
||||||
|
try:
|
||||||
|
if value in (None, ""):
|
||||||
|
return None
|
||||||
|
return int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_timestamp(value: Any) -> datetime:
|
||||||
|
if isinstance(value, datetime):
|
||||||
|
return value.astimezone(UTC) if value.tzinfo else value.replace(tzinfo=UTC)
|
||||||
|
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return datetime.fromtimestamp(value, tz=UTC)
|
||||||
|
|
||||||
|
if isinstance(value, str) and value:
|
||||||
|
normalized = value.replace("Z", "+00:00")
|
||||||
|
parsed = datetime.fromisoformat(normalized)
|
||||||
|
return parsed.astimezone(UTC) if parsed.tzinfo else parsed.replace(tzinfo=UTC)
|
||||||
|
|
||||||
|
return datetime.now(UTC)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_as_path(raw_path: Any) -> list[int]:
|
||||||
|
if raw_path in (None, ""):
|
||||||
|
return []
|
||||||
|
if isinstance(raw_path, list):
|
||||||
|
return [asn for asn in (_safe_int(item) for item in raw_path) if asn is not None]
|
||||||
|
if isinstance(raw_path, str):
|
||||||
|
parts = raw_path.replace("{", "").replace("}", "").split()
|
||||||
|
return [asn for asn in (_safe_int(item) for item in parts) if asn is not None]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_bgp_event(payload: dict[str, Any], *, project: str) -> dict[str, Any]:
|
||||||
|
raw_message = payload.get("raw_message", payload)
|
||||||
|
raw_path = (
|
||||||
|
payload.get("path")
|
||||||
|
or payload.get("as_path")
|
||||||
|
or payload.get("attrs", {}).get("path")
|
||||||
|
or payload.get("attrs", {}).get("as_path")
|
||||||
|
or []
|
||||||
|
)
|
||||||
|
as_path = _normalize_as_path(raw_path)
|
||||||
|
|
||||||
|
raw_type = str(payload.get("event_type") or payload.get("type") or payload.get("msg_type") or "").lower()
|
||||||
|
if raw_type in {"a", "announce", "announcement"}:
|
||||||
|
event_type = "announcement"
|
||||||
|
elif raw_type in {"w", "withdraw", "withdrawal"}:
|
||||||
|
event_type = "withdrawal"
|
||||||
|
elif raw_type in {"r", "rib"}:
|
||||||
|
event_type = "rib"
|
||||||
|
else:
|
||||||
|
event_type = raw_type or "announcement"
|
||||||
|
|
||||||
|
prefix = str(payload.get("prefix") or payload.get("prefixes") or payload.get("target_prefix") or "").strip()
|
||||||
|
if prefix.startswith("[") and prefix.endswith("]"):
|
||||||
|
prefix = prefix[1:-1]
|
||||||
|
|
||||||
|
timestamp = _parse_timestamp(payload.get("timestamp") or payload.get("time") or payload.get("ts"))
|
||||||
|
collector = str(payload.get("collector") or payload.get("host") or payload.get("router") or "unknown")
|
||||||
|
peer_asn = _safe_int(payload.get("peer_asn") or payload.get("peer"))
|
||||||
|
origin_asn = _safe_int(payload.get("origin_asn")) or (as_path[-1] if as_path else None)
|
||||||
|
source_material = "|".join(
|
||||||
|
[
|
||||||
|
collector,
|
||||||
|
str(peer_asn or ""),
|
||||||
|
prefix,
|
||||||
|
event_type,
|
||||||
|
timestamp.isoformat(),
|
||||||
|
",".join(str(asn) for asn in as_path),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
source_id = hashlib.sha1(source_material.encode("utf-8")).hexdigest()[:24]
|
||||||
|
|
||||||
|
prefix_length = None
|
||||||
|
is_more_specific = False
|
||||||
|
if prefix:
|
||||||
|
try:
|
||||||
|
network = ipaddress.ip_network(prefix, strict=False)
|
||||||
|
prefix_length = int(network.prefixlen)
|
||||||
|
is_more_specific = prefix_length > (24 if network.version == 4 else 48)
|
||||||
|
except ValueError:
|
||||||
|
prefix_length = None
|
||||||
|
|
||||||
|
collector_location = RIPE_RIS_COLLECTOR_COORDS.get(collector, {})
|
||||||
|
metadata = {
|
||||||
|
"project": project,
|
||||||
|
"collector": collector,
|
||||||
|
"peer_asn": peer_asn,
|
||||||
|
"peer_ip": payload.get("peer_ip") or payload.get("peer_address"),
|
||||||
|
"event_type": event_type,
|
||||||
|
"prefix": prefix,
|
||||||
|
"origin_asn": origin_asn,
|
||||||
|
"as_path": as_path,
|
||||||
|
"communities": payload.get("communities") or payload.get("attrs", {}).get("communities") or [],
|
||||||
|
"next_hop": payload.get("next_hop") or payload.get("attrs", {}).get("next_hop"),
|
||||||
|
"med": payload.get("med") or payload.get("attrs", {}).get("med"),
|
||||||
|
"local_pref": payload.get("local_pref") or payload.get("attrs", {}).get("local_pref"),
|
||||||
|
"timestamp": timestamp.isoformat(),
|
||||||
|
"as_path_length": len(as_path),
|
||||||
|
"prefix_length": prefix_length,
|
||||||
|
"is_more_specific": is_more_specific,
|
||||||
|
"visibility_weight": 1,
|
||||||
|
"collector_location": collector_location,
|
||||||
|
"raw_message": raw_message,
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"source_id": source_id,
|
||||||
|
"name": prefix or f"{collector}:{event_type}",
|
||||||
|
"title": f"{event_type} {prefix}".strip(),
|
||||||
|
"description": f"{collector} observed {event_type} for {prefix}".strip(),
|
||||||
|
"reference_date": timestamp.isoformat(),
|
||||||
|
"country": collector_location.get("country"),
|
||||||
|
"city": collector_location.get("city"),
|
||||||
|
"latitude": collector_location.get("latitude"),
|
||||||
|
"longitude": collector_location.get("longitude"),
|
||||||
|
"metadata": metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def create_bgp_anomalies_for_batch(
|
||||||
|
db: AsyncSession,
|
||||||
|
*,
|
||||||
|
source: str,
|
||||||
|
snapshot_id: int | None,
|
||||||
|
task_id: int | None,
|
||||||
|
events: list[dict[str, Any]],
|
||||||
|
) -> int:
|
||||||
|
if not events:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
pending_anomalies: list[BGPAnomaly] = []
|
||||||
|
prefix_to_origins: defaultdict[str, set[int]] = defaultdict(set)
|
||||||
|
prefix_to_more_specifics: defaultdict[str, list[dict[str, Any]]] = defaultdict(list)
|
||||||
|
withdrawal_counter: Counter[tuple[str, int | None]] = Counter()
|
||||||
|
|
||||||
|
prefixes = {event["metadata"].get("prefix") for event in events if event.get("metadata", {}).get("prefix")}
|
||||||
|
previous_origin_map: dict[str, set[int]] = defaultdict(set)
|
||||||
|
|
||||||
|
if prefixes:
|
||||||
|
previous_query = await db.execute(
|
||||||
|
select(CollectedData).where(
|
||||||
|
CollectedData.source == source,
|
||||||
|
CollectedData.snapshot_id != snapshot_id,
|
||||||
|
CollectedData.extra_data["prefix"].as_string().in_(sorted(prefixes)),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for record in previous_query.scalars().all():
|
||||||
|
metadata = record.extra_data or {}
|
||||||
|
prefix = metadata.get("prefix")
|
||||||
|
origin = _safe_int(metadata.get("origin_asn"))
|
||||||
|
if prefix and origin is not None:
|
||||||
|
previous_origin_map[prefix].add(origin)
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
metadata = event.get("metadata", {})
|
||||||
|
prefix = metadata.get("prefix")
|
||||||
|
origin_asn = _safe_int(metadata.get("origin_asn"))
|
||||||
|
if not prefix:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if origin_asn is not None:
|
||||||
|
prefix_to_origins[prefix].add(origin_asn)
|
||||||
|
|
||||||
|
if metadata.get("is_more_specific"):
|
||||||
|
prefix_to_more_specifics[prefix.split("/")[0]].append(event)
|
||||||
|
|
||||||
|
if metadata.get("event_type") == "withdrawal":
|
||||||
|
withdrawal_counter[(prefix, origin_asn)] += 1
|
||||||
|
|
||||||
|
for prefix, origins in prefix_to_origins.items():
|
||||||
|
historic = previous_origin_map.get(prefix, set())
|
||||||
|
new_origins = sorted(origin for origin in origins if origin not in historic)
|
||||||
|
if historic and new_origins:
|
||||||
|
for new_origin in new_origins:
|
||||||
|
pending_anomalies.append(
|
||||||
|
BGPAnomaly(
|
||||||
|
snapshot_id=snapshot_id,
|
||||||
|
task_id=task_id,
|
||||||
|
source=source,
|
||||||
|
anomaly_type="origin_change",
|
||||||
|
severity="critical",
|
||||||
|
status="active",
|
||||||
|
entity_key=f"origin_change:{prefix}:{new_origin}",
|
||||||
|
prefix=prefix,
|
||||||
|
origin_asn=sorted(historic)[0],
|
||||||
|
new_origin_asn=new_origin,
|
||||||
|
peer_scope=[],
|
||||||
|
started_at=datetime.now(UTC),
|
||||||
|
confidence=0.86,
|
||||||
|
summary=f"Prefix {prefix} is now originated by AS{new_origin}, outside the current baseline.",
|
||||||
|
evidence={"previous_origins": sorted(historic), "current_origins": sorted(origins)},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for root_prefix, more_specifics in prefix_to_more_specifics.items():
|
||||||
|
if len(more_specifics) >= 2:
|
||||||
|
sample = more_specifics[0]["metadata"]
|
||||||
|
pending_anomalies.append(
|
||||||
|
BGPAnomaly(
|
||||||
|
snapshot_id=snapshot_id,
|
||||||
|
task_id=task_id,
|
||||||
|
source=source,
|
||||||
|
anomaly_type="more_specific_burst",
|
||||||
|
severity="high",
|
||||||
|
status="active",
|
||||||
|
entity_key=f"more_specific_burst:{root_prefix}:{len(more_specifics)}",
|
||||||
|
prefix=sample.get("prefix"),
|
||||||
|
origin_asn=_safe_int(sample.get("origin_asn")),
|
||||||
|
new_origin_asn=None,
|
||||||
|
peer_scope=sorted(
|
||||||
|
{
|
||||||
|
str(item.get("metadata", {}).get("collector") or "")
|
||||||
|
for item in more_specifics
|
||||||
|
if item.get("metadata", {}).get("collector")
|
||||||
|
}
|
||||||
|
),
|
||||||
|
started_at=datetime.now(UTC),
|
||||||
|
confidence=0.72,
|
||||||
|
summary=f"{len(more_specifics)} more-specific announcements clustered around {root_prefix}.",
|
||||||
|
evidence={"events": [item.get("metadata") for item in more_specifics[:10]]},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for (prefix, origin_asn), count in withdrawal_counter.items():
|
||||||
|
if count >= 3:
|
||||||
|
pending_anomalies.append(
|
||||||
|
BGPAnomaly(
|
||||||
|
snapshot_id=snapshot_id,
|
||||||
|
task_id=task_id,
|
||||||
|
source=source,
|
||||||
|
anomaly_type="mass_withdrawal",
|
||||||
|
severity="high" if count < 8 else "critical",
|
||||||
|
status="active",
|
||||||
|
entity_key=f"mass_withdrawal:{prefix}:{origin_asn}:{count}",
|
||||||
|
prefix=prefix,
|
||||||
|
origin_asn=origin_asn,
|
||||||
|
new_origin_asn=None,
|
||||||
|
peer_scope=[],
|
||||||
|
started_at=datetime.now(UTC),
|
||||||
|
confidence=min(0.55 + (count * 0.05), 0.95),
|
||||||
|
summary=f"{count} withdrawal events observed for {prefix} in the current ingest window.",
|
||||||
|
evidence={"withdrawal_count": count},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not pending_anomalies:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
existing_result = await db.execute(
|
||||||
|
select(BGPAnomaly.entity_key).where(
|
||||||
|
BGPAnomaly.entity_key.in_([item.entity_key for item in pending_anomalies])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
existing_keys = {row[0] for row in existing_result.fetchall()}
|
||||||
|
|
||||||
|
created = 0
|
||||||
|
for anomaly in pending_anomalies:
|
||||||
|
if anomaly.entity_key in existing_keys:
|
||||||
|
continue
|
||||||
|
db.add(anomaly)
|
||||||
|
created += 1
|
||||||
|
|
||||||
|
if created:
|
||||||
|
await db.commit()
|
||||||
|
return created
|
||||||
120
backend/app/services/collectors/bgpstream.py
Normal file
120
backend/app/services/collectors/bgpstream.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
"""BGPStream backfill collector."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.services.collectors.base import BaseCollector
|
||||||
|
from app.services.collectors.bgp_common import create_bgp_anomalies_for_batch, normalize_bgp_event
|
||||||
|
|
||||||
|
|
||||||
|
class BGPStreamBackfillCollector(BaseCollector):
|
||||||
|
name = "bgpstream_bgp"
|
||||||
|
priority = "P1"
|
||||||
|
module = "L3"
|
||||||
|
frequency_hours = 6
|
||||||
|
data_type = "bgp_rib"
|
||||||
|
fail_on_empty = True
|
||||||
|
|
||||||
|
async def fetch(self) -> list[dict[str, Any]]:
|
||||||
|
if not self._resolved_url:
|
||||||
|
raise RuntimeError("BGPStream URL is not configured")
|
||||||
|
|
||||||
|
return await asyncio.to_thread(self._fetch_resource_windows)
|
||||||
|
|
||||||
|
def _fetch_resource_windows(self) -> list[dict[str, Any]]:
|
||||||
|
end = int(time.time()) - 3600
|
||||||
|
start = end - 86400
|
||||||
|
params = [
|
||||||
|
("projects[]", "routeviews"),
|
||||||
|
("collectors[]", "route-views2"),
|
||||||
|
("types[]", "updates"),
|
||||||
|
("intervals[]", f"{start},{end}"),
|
||||||
|
]
|
||||||
|
url = f"{self._resolved_url}/data?{urllib.parse.urlencode(params)}"
|
||||||
|
request = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
headers={"User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)"},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(request, timeout=30) as response:
|
||||||
|
body = json.loads(response.read().decode())
|
||||||
|
|
||||||
|
if body.get("error"):
|
||||||
|
raise RuntimeError(f"BGPStream broker error: {body['error']}")
|
||||||
|
|
||||||
|
return body.get("data", {}).get("resources", [])
|
||||||
|
|
||||||
|
def transform(self, raw_data: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
transformed: list[dict[str, Any]] = []
|
||||||
|
for item in raw_data:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
is_broker_window = any(key in item for key in ("filename", "url", "startTime", "start_time"))
|
||||||
|
|
||||||
|
if {"collector", "prefix"} <= set(item.keys()) and not is_broker_window:
|
||||||
|
transformed.append(normalize_bgp_event(item, project="bgpstream"))
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Broker responses provide file windows rather than decoded events.
|
||||||
|
collector = item.get("collector") or item.get("project") or "bgpstream"
|
||||||
|
timestamp = item.get("time") or item.get("startTime") or item.get("start_time")
|
||||||
|
name = item.get("filename") or item.get("url") or f"{collector}-window"
|
||||||
|
normalized = normalize_bgp_event(
|
||||||
|
{
|
||||||
|
"collector": collector,
|
||||||
|
"event_type": "rib",
|
||||||
|
"prefix": item.get("prefix") or "historical-window",
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"origin_asn": item.get("origin_asn"),
|
||||||
|
"path": item.get("path") or [],
|
||||||
|
"raw_message": item,
|
||||||
|
},
|
||||||
|
project="bgpstream",
|
||||||
|
)
|
||||||
|
transformed.append(
|
||||||
|
normalized
|
||||||
|
| {
|
||||||
|
"name": name,
|
||||||
|
"title": f"BGPStream {collector}",
|
||||||
|
"description": "Historical BGPStream backfill window",
|
||||||
|
"metadata": {
|
||||||
|
**normalized["metadata"],
|
||||||
|
"broker_record": item,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self._latest_transformed_batch = transformed
|
||||||
|
return transformed
|
||||||
|
|
||||||
|
async def run(self, db):
|
||||||
|
result = await super().run(db)
|
||||||
|
if result.get("status") != "success":
|
||||||
|
return result
|
||||||
|
|
||||||
|
snapshot_id = await self._resolve_snapshot_id(db, result.get("task_id"))
|
||||||
|
anomaly_count = await create_bgp_anomalies_for_batch(
|
||||||
|
db,
|
||||||
|
source=self.name,
|
||||||
|
snapshot_id=snapshot_id,
|
||||||
|
task_id=result.get("task_id"),
|
||||||
|
events=getattr(self, "_latest_transformed_batch", []),
|
||||||
|
)
|
||||||
|
result["anomalies_created"] = anomaly_count
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def _resolve_snapshot_id(self, db, task_id: int | None) -> int | None:
|
||||||
|
if task_id is None:
|
||||||
|
return None
|
||||||
|
from sqlalchemy import select
|
||||||
|
from app.models.data_snapshot import DataSnapshot
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
select(DataSnapshot.id).where(DataSnapshot.task_id == task_id).order_by(DataSnapshot.id.desc())
|
||||||
|
)
|
||||||
|
return result.scalar_one_or_none()
|
||||||
115
backend/app/services/collectors/celestrak.py
Normal file
115
backend/app/services/collectors/celestrak.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
"""CelesTrak TLE Collector
|
||||||
|
|
||||||
|
Collects satellite TLE (Two-Line Element) data from CelesTrak.org.
|
||||||
|
Free, no authentication required.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.core.satellite_tle import build_tle_lines_from_elements
|
||||||
|
from app.services.collectors.base import BaseCollector
|
||||||
|
|
||||||
|
|
||||||
|
class CelesTrakTLECollector(BaseCollector):
|
||||||
|
name = "celestrak_tle"
|
||||||
|
priority = "P2"
|
||||||
|
module = "L3"
|
||||||
|
frequency_hours = 24
|
||||||
|
data_type = "satellite_tle"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def base_url(self) -> str:
|
||||||
|
return "https://celestrak.org/NORAD/elements/gp.php"
|
||||||
|
|
||||||
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
|
satellite_groups = [
|
||||||
|
"starlink",
|
||||||
|
"gps-ops",
|
||||||
|
"galileo",
|
||||||
|
"glonass",
|
||||||
|
"beidou",
|
||||||
|
"leo",
|
||||||
|
"geo",
|
||||||
|
"iridium-next",
|
||||||
|
]
|
||||||
|
|
||||||
|
all_satellites = []
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||||
|
for group in satellite_groups:
|
||||||
|
try:
|
||||||
|
url = f"https://celestrak.org/NORAD/elements/gp.php?GROUP={group}&FORMAT=json"
|
||||||
|
response = await client.get(url)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
if isinstance(data, list):
|
||||||
|
all_satellites.extend(data)
|
||||||
|
print(f"CelesTrak: Fetched {len(data)} satellites from group '{group}'")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"CelesTrak: Error fetching group '{group}': {e}")
|
||||||
|
|
||||||
|
if not all_satellites:
|
||||||
|
return self._get_sample_data()
|
||||||
|
|
||||||
|
print(f"CelesTrak: Total satellites fetched: {len(all_satellites)}")
|
||||||
|
|
||||||
|
# Return raw data - base.run() will call transform()
|
||||||
|
return all_satellites
|
||||||
|
|
||||||
|
def transform(self, raw_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
transformed = []
|
||||||
|
for item in raw_data:
|
||||||
|
tle_line1, tle_line2 = build_tle_lines_from_elements(
|
||||||
|
norad_cat_id=item.get("NORAD_CAT_ID"),
|
||||||
|
epoch=item.get("EPOCH"),
|
||||||
|
inclination=item.get("INCLINATION"),
|
||||||
|
raan=item.get("RA_OF_ASC_NODE"),
|
||||||
|
eccentricity=item.get("ECCENTRICITY"),
|
||||||
|
arg_of_perigee=item.get("ARG_OF_PERICENTER"),
|
||||||
|
mean_anomaly=item.get("MEAN_ANOMALY"),
|
||||||
|
mean_motion=item.get("MEAN_MOTION"),
|
||||||
|
)
|
||||||
|
|
||||||
|
transformed.append(
|
||||||
|
{
|
||||||
|
"name": item.get("OBJECT_NAME", "Unknown"),
|
||||||
|
"reference_date": item.get("EPOCH", ""),
|
||||||
|
"metadata": {
|
||||||
|
"norad_cat_id": item.get("NORAD_CAT_ID"),
|
||||||
|
"international_designator": item.get("OBJECT_ID"),
|
||||||
|
"epoch": item.get("EPOCH"),
|
||||||
|
"mean_motion": item.get("MEAN_MOTION"),
|
||||||
|
"eccentricity": item.get("ECCENTRICITY"),
|
||||||
|
"inclination": item.get("INCLINATION"),
|
||||||
|
"raan": item.get("RA_OF_ASC_NODE"),
|
||||||
|
"arg_of_perigee": item.get("ARG_OF_PERICENTER"),
|
||||||
|
"mean_anomaly": item.get("MEAN_ANOMALY"),
|
||||||
|
"classification_type": item.get("CLASSIFICATION_TYPE"),
|
||||||
|
"bstar": item.get("BSTAR"),
|
||||||
|
"mean_motion_dot": item.get("MEAN_MOTION_DOT"),
|
||||||
|
"mean_motion_ddot": item.get("MEAN_MOTION_DDOT"),
|
||||||
|
"ephemeris_type": item.get("EPHEMERIS_TYPE"),
|
||||||
|
# Prefer the original TLE lines when the source provides them.
|
||||||
|
# If they are missing, store a normalized TLE pair built once on the backend.
|
||||||
|
"tle_line1": item.get("TLE_LINE1") or tle_line1,
|
||||||
|
"tle_line2": item.get("TLE_LINE2") or tle_line2,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return transformed
|
||||||
|
|
||||||
|
def _get_sample_data(self) -> List[Dict[str, Any]]:
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"name": "STARLINK-1000",
|
||||||
|
"norad_cat_id": 44720,
|
||||||
|
"international_designator": "2019-029AZ",
|
||||||
|
"epoch": "2026-03-13T00:00:00Z",
|
||||||
|
"mean_motion": 15.79234567,
|
||||||
|
"eccentricity": 0.0001234,
|
||||||
|
"inclination": 53.0,
|
||||||
|
},
|
||||||
|
]
|
||||||
@@ -10,11 +10,12 @@ Some endpoints require authentication for higher rate limits.
|
|||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from app.services.collectors.base import HTTPCollector
|
from app.services.collectors.base import HTTPCollector
|
||||||
|
|
||||||
|
|
||||||
# Cloudflare API token (optional - for higher rate limits)
|
# Cloudflare API token (optional - for higher rate limits)
|
||||||
CLOUDFLARE_API_TOKEN = os.environ.get("CLOUDFLARE_API_TOKEN", "")
|
CLOUDFLARE_API_TOKEN = os.environ.get("CLOUDFLARE_API_TOKEN", "")
|
||||||
|
|
||||||
@@ -58,7 +59,7 @@ class CloudflareRadarDeviceCollector(HTTPCollector):
|
|||||||
"other_percent": float(summary.get("other", 0)),
|
"other_percent": float(summary.get("other", 0)),
|
||||||
"date_range": result.get("meta", {}).get("dateRange", {}),
|
"date_range": result.get("meta", {}).get("dateRange", {}),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().isoformat(),
|
"reference_date": datetime.now(UTC).isoformat(),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -106,7 +107,7 @@ class CloudflareRadarTrafficCollector(HTTPCollector):
|
|||||||
"requests": item.get("requests"),
|
"requests": item.get("requests"),
|
||||||
"visit_duration": item.get("visitDuration"),
|
"visit_duration": item.get("visitDuration"),
|
||||||
},
|
},
|
||||||
"reference_date": item.get("datetime", datetime.utcnow().isoformat()),
|
"reference_date": item.get("datetime", datetime.now(UTC).isoformat()),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -154,7 +155,7 @@ class CloudflareRadarTopASCollector(HTTPCollector):
|
|||||||
"traffic_share": item.get("trafficShare"),
|
"traffic_share": item.get("trafficShare"),
|
||||||
"country_code": item.get("location", {}).get("countryCode"),
|
"country_code": item.get("location", {}).get("countryCode"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().isoformat(),
|
"reference_date": datetime.now(UTC).isoformat(),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
|
|||||||
@@ -6,13 +6,14 @@ https://epoch.ai/data/gpu-clusters
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from app.services.collectors.base import BaseCollector
|
from app.services.collectors.base import BaseCollector
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class EpochAIGPUCollector(BaseCollector):
|
class EpochAIGPUCollector(BaseCollector):
|
||||||
name = "epoch_ai_gpu"
|
name = "epoch_ai_gpu"
|
||||||
priority = "P0"
|
priority = "P0"
|
||||||
@@ -63,7 +64,7 @@ class EpochAIGPUCollector(BaseCollector):
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"raw_data": perf_cell,
|
"raw_data": perf_cell,
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, IndexError, AttributeError):
|
except (ValueError, IndexError, AttributeError):
|
||||||
@@ -113,6 +114,6 @@ class EpochAIGPUCollector(BaseCollector):
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"note": "Sample data - Epoch AI page structure may vary",
|
"note": "Sample data - Epoch AI page structure may vary",
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -4,12 +4,13 @@ Collects landing point data from FAO CSV API.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from app.services.collectors.base import BaseCollector
|
from app.services.collectors.base import BaseCollector
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class FAOLandingPointCollector(BaseCollector):
|
class FAOLandingPointCollector(BaseCollector):
|
||||||
name = "fao_landing_points"
|
name = "fao_landing_points"
|
||||||
priority = "P1"
|
priority = "P1"
|
||||||
@@ -57,7 +58,7 @@ class FAOLandingPointCollector(BaseCollector):
|
|||||||
"is_tbd": is_tbd,
|
"is_tbd": is_tbd,
|
||||||
"original_id": feature_id,
|
"original_id": feature_id,
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
result.append(entry)
|
result.append(entry)
|
||||||
except (ValueError, IndexError):
|
except (ValueError, IndexError):
|
||||||
|
|||||||
@@ -7,11 +7,12 @@ https://huggingface.co/spaces
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
from app.services.collectors.base import HTTPCollector
|
from app.services.collectors.base import HTTPCollector
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class HuggingFaceModelCollector(HTTPCollector):
|
class HuggingFaceModelCollector(HTTPCollector):
|
||||||
name = "huggingface_models"
|
name = "huggingface_models"
|
||||||
priority = "P1"
|
priority = "P1"
|
||||||
@@ -45,7 +46,7 @@ class HuggingFaceModelCollector(HTTPCollector):
|
|||||||
"library_name": item.get("library_name"),
|
"library_name": item.get("library_name"),
|
||||||
"created_at": item.get("createdAt"),
|
"created_at": item.get("createdAt"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -86,7 +87,7 @@ class HuggingFaceDatasetCollector(HTTPCollector):
|
|||||||
"tags": (item.get("tags", []) or [])[:10],
|
"tags": (item.get("tags", []) or [])[:10],
|
||||||
"created_at": item.get("createdAt"),
|
"created_at": item.get("createdAt"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -127,7 +128,7 @@ class HuggingFaceSpacesCollector(HTTPCollector):
|
|||||||
"tags": (item.get("tags", []) or [])[:10],
|
"tags": (item.get("tags", []) or [])[:10],
|
||||||
"created_at": item.get("createdAt"),
|
"created_at": item.get("createdAt"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
|
|||||||
@@ -13,11 +13,12 @@ To get higher limits, set PEERINGDB_API_KEY environment variable.
|
|||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from app.services.collectors.base import HTTPCollector
|
from app.services.collectors.base import HTTPCollector
|
||||||
|
|
||||||
|
|
||||||
# PeeringDB API key - read from environment variable
|
# PeeringDB API key - read from environment variable
|
||||||
PEERINGDB_API_KEY = os.environ.get("PEERINGDB_API_KEY", "")
|
PEERINGDB_API_KEY = os.environ.get("PEERINGDB_API_KEY", "")
|
||||||
|
|
||||||
@@ -75,7 +76,7 @@ class PeeringDBIXPCollector(HTTPCollector):
|
|||||||
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
|
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
async def collect(self) -> List[Dict[str, Any]]:
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
"""Collect IXP data from PeeringDB with rate limit handling"""
|
"""Collect IXP data from PeeringDB with rate limit handling"""
|
||||||
response_data = await self.fetch_with_retry()
|
response_data = await self.fetch_with_retry()
|
||||||
if not response_data:
|
if not response_data:
|
||||||
@@ -105,7 +106,7 @@ class PeeringDBIXPCollector(HTTPCollector):
|
|||||||
"created": item.get("created"),
|
"created": item.get("created"),
|
||||||
"updated": item.get("updated"),
|
"updated": item.get("updated"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().isoformat(),
|
"reference_date": datetime.now(UTC).isoformat(),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -176,7 +177,7 @@ class PeeringDBNetworkCollector(HTTPCollector):
|
|||||||
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
|
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
async def collect(self) -> List[Dict[str, Any]]:
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
"""Collect Network data from PeeringDB with rate limit handling"""
|
"""Collect Network data from PeeringDB with rate limit handling"""
|
||||||
response_data = await self.fetch_with_retry()
|
response_data = await self.fetch_with_retry()
|
||||||
if not response_data:
|
if not response_data:
|
||||||
@@ -208,7 +209,7 @@ class PeeringDBNetworkCollector(HTTPCollector):
|
|||||||
"created": item.get("created"),
|
"created": item.get("created"),
|
||||||
"updated": item.get("updated"),
|
"updated": item.get("updated"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().isoformat(),
|
"reference_date": datetime.now(UTC).isoformat(),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -279,7 +280,7 @@ class PeeringDBFacilityCollector(HTTPCollector):
|
|||||||
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
|
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
async def collect(self) -> List[Dict[str, Any]]:
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
"""Collect Facility data from PeeringDB with rate limit handling"""
|
"""Collect Facility data from PeeringDB with rate limit handling"""
|
||||||
response_data = await self.fetch_with_retry()
|
response_data = await self.fetch_with_retry()
|
||||||
if not response_data:
|
if not response_data:
|
||||||
@@ -310,7 +311,7 @@ class PeeringDBFacilityCollector(HTTPCollector):
|
|||||||
"created": item.get("created"),
|
"created": item.get("created"),
|
||||||
"updated": item.get("updated"),
|
"updated": item.get("updated"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().isoformat(),
|
"reference_date": datetime.now(UTC).isoformat(),
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
|
|||||||
131
backend/app/services/collectors/ris_live.py
Normal file
131
backend/app/services/collectors/ris_live.py
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
"""RIPE RIS Live collector."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import urllib.request
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.services.collectors.base import BaseCollector
|
||||||
|
from app.services.collectors.bgp_common import create_bgp_anomalies_for_batch, normalize_bgp_event
|
||||||
|
|
||||||
|
|
||||||
|
class RISLiveCollector(BaseCollector):
|
||||||
|
name = "ris_live_bgp"
|
||||||
|
priority = "P1"
|
||||||
|
module = "L3"
|
||||||
|
frequency_hours = 1
|
||||||
|
data_type = "bgp_update"
|
||||||
|
fail_on_empty = True
|
||||||
|
max_messages = 100
|
||||||
|
idle_timeout_seconds = 15
|
||||||
|
|
||||||
|
async def fetch(self) -> list[dict[str, Any]]:
|
||||||
|
if not self._resolved_url:
|
||||||
|
raise RuntimeError("RIS Live URL is not configured")
|
||||||
|
|
||||||
|
return await asyncio.to_thread(self._fetch_via_stream)
|
||||||
|
|
||||||
|
def _fetch_via_stream(self) -> list[dict[str, Any]]:
|
||||||
|
events: list[dict[str, Any]] = []
|
||||||
|
stream_url = "https://ris-live.ripe.net/v1/stream/?format=json&client=planet-ris-live"
|
||||||
|
subscribe = json.dumps(
|
||||||
|
{
|
||||||
|
"host": "rrc00",
|
||||||
|
"type": "UPDATE",
|
||||||
|
"require": "announcements",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
request = urllib.request.Request(
|
||||||
|
stream_url,
|
||||||
|
headers={"X-RIS-Subscribe": subscribe},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(request, timeout=20) as response:
|
||||||
|
while len(events) < self.max_messages:
|
||||||
|
line = response.readline().decode().strip()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
payload = json.loads(line)
|
||||||
|
if payload.get("type") != "ris_message":
|
||||||
|
continue
|
||||||
|
data = payload.get("data", {})
|
||||||
|
if isinstance(data, dict):
|
||||||
|
events.append(data)
|
||||||
|
return events
|
||||||
|
|
||||||
|
def transform(self, raw_data: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
transformed: list[dict[str, Any]] = []
|
||||||
|
for item in raw_data:
|
||||||
|
announcements = item.get("announcements") or []
|
||||||
|
withdrawals = item.get("withdrawals") or []
|
||||||
|
|
||||||
|
for announcement in announcements:
|
||||||
|
next_hop = announcement.get("next_hop")
|
||||||
|
for prefix in announcement.get("prefixes") or []:
|
||||||
|
transformed.append(
|
||||||
|
normalize_bgp_event(
|
||||||
|
{
|
||||||
|
**item,
|
||||||
|
"collector": item.get("host", "").replace(".ripe.net", ""),
|
||||||
|
"event_type": "announcement",
|
||||||
|
"prefix": prefix,
|
||||||
|
"next_hop": next_hop,
|
||||||
|
},
|
||||||
|
project="ris-live",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for prefix in withdrawals:
|
||||||
|
transformed.append(
|
||||||
|
normalize_bgp_event(
|
||||||
|
{
|
||||||
|
**item,
|
||||||
|
"collector": item.get("host", "").replace(".ripe.net", ""),
|
||||||
|
"event_type": "withdrawal",
|
||||||
|
"prefix": prefix,
|
||||||
|
},
|
||||||
|
project="ris-live",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not announcements and not withdrawals:
|
||||||
|
transformed.append(
|
||||||
|
normalize_bgp_event(
|
||||||
|
{
|
||||||
|
**item,
|
||||||
|
"collector": item.get("host", "").replace(".ripe.net", ""),
|
||||||
|
},
|
||||||
|
project="ris-live",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self._latest_transformed_batch = transformed
|
||||||
|
return transformed
|
||||||
|
|
||||||
|
async def run(self, db):
|
||||||
|
result = await super().run(db)
|
||||||
|
if result.get("status") != "success":
|
||||||
|
return result
|
||||||
|
|
||||||
|
snapshot_id = await self._resolve_snapshot_id(db, result.get("task_id"))
|
||||||
|
anomaly_count = await create_bgp_anomalies_for_batch(
|
||||||
|
db,
|
||||||
|
source=self.name,
|
||||||
|
snapshot_id=snapshot_id,
|
||||||
|
task_id=result.get("task_id"),
|
||||||
|
events=getattr(self, "_latest_transformed_batch", []),
|
||||||
|
)
|
||||||
|
result["anomalies_created"] = anomaly_count
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def _resolve_snapshot_id(self, db, task_id: int | None) -> int | None:
|
||||||
|
if task_id is None:
|
||||||
|
return None
|
||||||
|
from sqlalchemy import select
|
||||||
|
from app.models.data_snapshot import DataSnapshot
|
||||||
|
|
||||||
|
result = await db.execute(
|
||||||
|
select(DataSnapshot.id).where(DataSnapshot.task_id == task_id).order_by(DataSnapshot.id.desc())
|
||||||
|
)
|
||||||
|
return result.scalar_one_or_none()
|
||||||
239
backend/app/services/collectors/spacetrack.py
Normal file
239
backend/app/services/collectors/spacetrack.py
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
"""Space-Track TLE Collector
|
||||||
|
|
||||||
|
Collects satellite TLE (Two-Line Element) data from Space-Track.org.
|
||||||
|
API documentation: https://www.space-track.org/documentation
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.services.collectors.base import BaseCollector
|
||||||
|
from app.core.data_sources import get_data_sources_config
|
||||||
|
from app.core.satellite_tle import build_tle_lines_from_elements
|
||||||
|
|
||||||
|
|
||||||
|
class SpaceTrackTLECollector(BaseCollector):
|
||||||
|
name = "spacetrack_tle"
|
||||||
|
priority = "P2"
|
||||||
|
module = "L3"
|
||||||
|
frequency_hours = 24
|
||||||
|
data_type = "satellite_tle"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def base_url(self) -> str:
|
||||||
|
config = get_data_sources_config()
|
||||||
|
if self._resolved_url:
|
||||||
|
return self._resolved_url
|
||||||
|
return config.get_yaml_url("spacetrack_tle")
|
||||||
|
|
||||||
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
|
from app.core.config import settings
|
||||||
|
|
||||||
|
username = settings.SPACETRACK_USERNAME
|
||||||
|
password = settings.SPACETRACK_PASSWORD
|
||||||
|
|
||||||
|
if not username or not password:
|
||||||
|
print("SPACETRACK: No credentials configured, using sample data")
|
||||||
|
return self._get_sample_data()
|
||||||
|
|
||||||
|
print(f"SPACETRACK: Attempting to fetch TLE data with username: {username}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=120.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
headers={
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
|
"Accept": "application/json, text/html, */*",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
"Referer": "https://www.space-track.org/",
|
||||||
|
},
|
||||||
|
) as client:
|
||||||
|
await client.get("https://www.space-track.org/")
|
||||||
|
|
||||||
|
login_response = await client.post(
|
||||||
|
"https://www.space-track.org/ajaxauth/login",
|
||||||
|
data={
|
||||||
|
"identity": username,
|
||||||
|
"password": password,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(f"SPACETRACK: Login response status: {login_response.status_code}")
|
||||||
|
print(f"SPACETRACK: Login response URL: {login_response.url}")
|
||||||
|
|
||||||
|
if login_response.status_code == 403:
|
||||||
|
print("SPACETRACK: Trying alternate login method...")
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=120.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
) as alt_client:
|
||||||
|
await alt_client.get("https://www.space-track.org/")
|
||||||
|
|
||||||
|
form_data = {
|
||||||
|
"username": username,
|
||||||
|
"password": password,
|
||||||
|
"query": "class/gp/NORAD_CAT_ID/25544/format/json",
|
||||||
|
}
|
||||||
|
alt_login = await alt_client.post(
|
||||||
|
"https://www.space-track.org/ajaxauth/login",
|
||||||
|
data={
|
||||||
|
"identity": username,
|
||||||
|
"password": password,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(f"SPACETRACK: Alt login status: {alt_login.status_code}")
|
||||||
|
|
||||||
|
if alt_login.status_code == 200:
|
||||||
|
tle_response = await alt_client.get(
|
||||||
|
"https://www.space-track.org/basicspacedata/query/class/gp/NORAD_CAT_ID/25544/format/json"
|
||||||
|
)
|
||||||
|
if tle_response.status_code == 200:
|
||||||
|
data = tle_response.json()
|
||||||
|
print(f"SPACETRACK: Received {len(data)} records via alt method")
|
||||||
|
return data
|
||||||
|
|
||||||
|
if login_response.status_code != 200:
|
||||||
|
print(f"SPACETRACK: Login failed, using sample data")
|
||||||
|
return self._get_sample_data()
|
||||||
|
|
||||||
|
tle_response = await client.get(
|
||||||
|
"https://www.space-track.org/basicspacedata/query/class/gp/NORAD_CAT_ID/25544/format/json"
|
||||||
|
)
|
||||||
|
print(f"SPACETRACK: TLE query status: {tle_response.status_code}")
|
||||||
|
|
||||||
|
if tle_response.status_code != 200:
|
||||||
|
print(f"SPACETRACK: Query failed, using sample data")
|
||||||
|
return self._get_sample_data()
|
||||||
|
|
||||||
|
data = tle_response.json()
|
||||||
|
print(f"SPACETRACK: Received {len(data)} records")
|
||||||
|
return data
|
||||||
|
except Exception as e:
|
||||||
|
print(f"SPACETRACK: Error - {e}, using sample data")
|
||||||
|
return self._get_sample_data()
|
||||||
|
|
||||||
|
print(f"SPACETRACK: Attempting to fetch TLE data with username: {username}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=120.0,
|
||||||
|
follow_redirects=True,
|
||||||
|
headers={
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||||
|
"Accept": "application/json, text/html, */*",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
},
|
||||||
|
) as client:
|
||||||
|
# First, visit the main page to get any cookies
|
||||||
|
await client.get("https://www.space-track.org/")
|
||||||
|
|
||||||
|
# Login to get session cookie
|
||||||
|
login_response = await client.post(
|
||||||
|
"https://www.space-track.org/ajaxauth/login",
|
||||||
|
data={
|
||||||
|
"identity": username,
|
||||||
|
"password": password,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(f"SPACETRACK: Login response status: {login_response.status_code}")
|
||||||
|
print(f"SPACETRACK: Login response URL: {login_response.url}")
|
||||||
|
print(f"SPACETRACK: Login response body: {login_response.text[:500]}")
|
||||||
|
|
||||||
|
if login_response.status_code != 200:
|
||||||
|
print(f"SPACETRACK: Login failed, using sample data")
|
||||||
|
return self._get_sample_data()
|
||||||
|
|
||||||
|
# Query for TLE data (get first 1000 satellites)
|
||||||
|
tle_response = await client.get(
|
||||||
|
"https://www.space-track.org/basicspacedata/query"
|
||||||
|
"/class/gp"
|
||||||
|
"/orderby/EPOCH%20desc"
|
||||||
|
"/limit/1000"
|
||||||
|
"/format/json"
|
||||||
|
)
|
||||||
|
print(f"SPACETRACK: TLE query status: {tle_response.status_code}")
|
||||||
|
|
||||||
|
if tle_response.status_code != 200:
|
||||||
|
print(f"SPACETRACK: Query failed, using sample data")
|
||||||
|
return self._get_sample_data()
|
||||||
|
|
||||||
|
data = tle_response.json()
|
||||||
|
print(f"SPACETRACK: Received {len(data)} records")
|
||||||
|
return data
|
||||||
|
except Exception as e:
|
||||||
|
print(f"SPACETRACK: Error - {e}, using sample data")
|
||||||
|
return self._get_sample_data()
|
||||||
|
|
||||||
|
def transform(self, raw_data: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""Transform TLE data to internal format"""
|
||||||
|
transformed = []
|
||||||
|
for item in raw_data:
|
||||||
|
tle_line1, tle_line2 = build_tle_lines_from_elements(
|
||||||
|
norad_cat_id=item.get("NORAD_CAT_ID"),
|
||||||
|
epoch=item.get("EPOCH"),
|
||||||
|
inclination=item.get("INCLINATION"),
|
||||||
|
raan=item.get("RAAN"),
|
||||||
|
eccentricity=item.get("ECCENTRICITY"),
|
||||||
|
arg_of_perigee=item.get("ARG_OF_PERIGEE"),
|
||||||
|
mean_anomaly=item.get("MEAN_ANOMALY"),
|
||||||
|
mean_motion=item.get("MEAN_MOTION"),
|
||||||
|
)
|
||||||
|
transformed.append(
|
||||||
|
{
|
||||||
|
"name": item.get("OBJECT_NAME", "Unknown"),
|
||||||
|
"reference_date": item.get("EPOCH", ""),
|
||||||
|
"metadata": {
|
||||||
|
"norad_cat_id": item.get("NORAD_CAT_ID"),
|
||||||
|
"international_designator": item.get("INTL_DESIGNATOR"),
|
||||||
|
"epoch": item.get("EPOCH"),
|
||||||
|
"mean_motion": item.get("MEAN_MOTION"),
|
||||||
|
"eccentricity": item.get("ECCENTRICITY"),
|
||||||
|
"inclination": item.get("INCLINATION"),
|
||||||
|
"raan": item.get("RAAN"),
|
||||||
|
"arg_of_perigee": item.get("ARG_OF_PERIGEE"),
|
||||||
|
"mean_anomaly": item.get("MEAN_ANOMALY"),
|
||||||
|
"ephemeris_type": item.get("EPHEMERIS_TYPE"),
|
||||||
|
"classification_type": item.get("CLASSIFICATION_TYPE"),
|
||||||
|
"element_set_no": item.get("ELEMENT_SET_NO"),
|
||||||
|
"rev_at_epoch": item.get("REV_AT_EPOCH"),
|
||||||
|
"bstar": item.get("BSTAR"),
|
||||||
|
"mean_motion_dot": item.get("MEAN_MOTION_DOT"),
|
||||||
|
"mean_motion_ddot": item.get("MEAN_MOTION_DDOT"),
|
||||||
|
# Prefer original lines from the source, but keep a backend-built pair as a stable fallback.
|
||||||
|
"tle_line1": item.get("TLE_LINE1") or item.get("TLE1") or tle_line1,
|
||||||
|
"tle_line2": item.get("TLE_LINE2") or item.get("TLE2") or tle_line2,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return transformed
|
||||||
|
|
||||||
|
def _get_sample_data(self) -> List[Dict[str, Any]]:
|
||||||
|
"""Return sample TLE data for testing"""
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"name": "ISS (ZARYA)",
|
||||||
|
"norad_cat_id": 25544,
|
||||||
|
"international_designator": "1998-067A",
|
||||||
|
"epoch": "2026-03-13T00:00:00Z",
|
||||||
|
"mean_motion": 15.49872723,
|
||||||
|
"eccentricity": 0.0006292,
|
||||||
|
"inclination": 51.6400,
|
||||||
|
"raan": 315.0000,
|
||||||
|
"arg_of_perigee": 100.0000,
|
||||||
|
"mean_anomaly": 260.0000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "STARLINK-1000",
|
||||||
|
"norad_cat_id": 44720,
|
||||||
|
"international_designator": "2019-029AZ",
|
||||||
|
"epoch": "2026-03-13T00:00:00Z",
|
||||||
|
"mean_motion": 15.79234567,
|
||||||
|
"eccentricity": 0.0001234,
|
||||||
|
"inclination": 53.0000,
|
||||||
|
"raan": 120.0000,
|
||||||
|
"arg_of_perigee": 90.0000,
|
||||||
|
"mean_anomaly": 270.0000,
|
||||||
|
},
|
||||||
|
]
|
||||||
@@ -7,13 +7,14 @@ Uses Wayback Machine as backup data source since live data requires JavaScript r
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from app.services.collectors.base import BaseCollector
|
from app.services.collectors.base import BaseCollector
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class TeleGeographyCableCollector(BaseCollector):
|
class TeleGeographyCableCollector(BaseCollector):
|
||||||
name = "telegeography_cables"
|
name = "telegeography_cables"
|
||||||
priority = "P1"
|
priority = "P1"
|
||||||
@@ -102,7 +103,7 @@ class TeleGeographyCableCollector(BaseCollector):
|
|||||||
"capacity_tbps": item.get("capacity"),
|
"capacity_tbps": item.get("capacity"),
|
||||||
"url": item.get("url"),
|
"url": item.get("url"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
result.append(entry)
|
result.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -130,7 +131,7 @@ class TeleGeographyCableCollector(BaseCollector):
|
|||||||
"owner": "Meta, Orange, Vodafone, etc.",
|
"owner": "Meta, Orange, Vodafone, etc.",
|
||||||
"status": "active",
|
"status": "active",
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"source_id": "telegeo_sample_2",
|
"source_id": "telegeo_sample_2",
|
||||||
@@ -146,7 +147,7 @@ class TeleGeographyCableCollector(BaseCollector):
|
|||||||
"owner": "Alibaba, NEC",
|
"owner": "Alibaba, NEC",
|
||||||
"status": "planned",
|
"status": "planned",
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -186,7 +187,7 @@ class TeleGeographyLandingPointCollector(BaseCollector):
|
|||||||
"cable_count": len(item.get("cables", [])),
|
"cable_count": len(item.get("cables", [])),
|
||||||
"url": item.get("url"),
|
"url": item.get("url"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
result.append(entry)
|
result.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -210,7 +211,7 @@ class TeleGeographyLandingPointCollector(BaseCollector):
|
|||||||
"value": "",
|
"value": "",
|
||||||
"unit": "",
|
"unit": "",
|
||||||
"metadata": {"note": "Sample data"},
|
"metadata": {"note": "Sample data"},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -257,7 +258,7 @@ class TeleGeographyCableSystemCollector(BaseCollector):
|
|||||||
"investment": item.get("investment"),
|
"investment": item.get("investment"),
|
||||||
"url": item.get("url"),
|
"url": item.get("url"),
|
||||||
},
|
},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
}
|
}
|
||||||
result.append(entry)
|
result.append(entry)
|
||||||
except (ValueError, TypeError, KeyError):
|
except (ValueError, TypeError, KeyError):
|
||||||
@@ -281,6 +282,6 @@ class TeleGeographyCableSystemCollector(BaseCollector):
|
|||||||
"value": "5000",
|
"value": "5000",
|
||||||
"unit": "km",
|
"unit": "km",
|
||||||
"metadata": {"note": "Sample data"},
|
"metadata": {"note": "Sample data"},
|
||||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
"reference_date": datetime.now(UTC).strftime("%Y-%m-%d"),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ Collects data from TOP500 supercomputer rankings.
|
|||||||
https://top500.org/lists/top500/
|
https://top500.org/lists/top500/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import re
|
import re
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from datetime import datetime
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
@@ -21,14 +21,108 @@ class TOP500Collector(BaseCollector):
|
|||||||
data_type = "supercomputer"
|
data_type = "supercomputer"
|
||||||
|
|
||||||
async def fetch(self) -> List[Dict[str, Any]]:
|
async def fetch(self) -> List[Dict[str, Any]]:
|
||||||
"""Fetch TOP500 data from website (scraping)"""
|
"""Fetch TOP500 list data and enrich each row with detail-page metadata."""
|
||||||
# Get the latest list page
|
|
||||||
url = "https://top500.org/lists/top500/list/2025/11/"
|
url = "https://top500.org/lists/top500/list/2025/11/"
|
||||||
|
|
||||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
|
||||||
response = await client.get(url)
|
response = await client.get(url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return self.parse_response(response.text)
|
entries = self.parse_response(response.text)
|
||||||
|
|
||||||
|
semaphore = asyncio.Semaphore(8)
|
||||||
|
|
||||||
|
async def enrich(entry: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
detail_url = entry.pop("_detail_url", "")
|
||||||
|
if not detail_url:
|
||||||
|
return entry
|
||||||
|
|
||||||
|
async with semaphore:
|
||||||
|
try:
|
||||||
|
detail_response = await client.get(detail_url)
|
||||||
|
detail_response.raise_for_status()
|
||||||
|
entry["metadata"].update(self.parse_detail_response(detail_response.text))
|
||||||
|
except Exception:
|
||||||
|
entry["metadata"]["detail_fetch_failed"] = True
|
||||||
|
return entry
|
||||||
|
|
||||||
|
return await asyncio.gather(*(enrich(entry) for entry in entries))
|
||||||
|
|
||||||
|
def _extract_system_fields(self, system_cell) -> Dict[str, str]:
|
||||||
|
link = system_cell.find("a")
|
||||||
|
system_name = link.get_text(" ", strip=True) if link else system_cell.get_text(" ", strip=True)
|
||||||
|
detail_url = ""
|
||||||
|
if link and link.get("href"):
|
||||||
|
detail_url = f"https://top500.org{link.get('href')}"
|
||||||
|
|
||||||
|
manufacturer = ""
|
||||||
|
if link and link.next_sibling:
|
||||||
|
manufacturer = str(link.next_sibling).strip(" ,\n\t")
|
||||||
|
|
||||||
|
cell_text = system_cell.get_text("\n", strip=True)
|
||||||
|
lines = [line.strip(" ,") for line in cell_text.splitlines() if line.strip()]
|
||||||
|
|
||||||
|
site = ""
|
||||||
|
country = ""
|
||||||
|
if lines:
|
||||||
|
system_name = lines[0]
|
||||||
|
if len(lines) >= 3:
|
||||||
|
site = lines[-2]
|
||||||
|
country = lines[-1]
|
||||||
|
elif len(lines) == 2:
|
||||||
|
country = lines[-1]
|
||||||
|
|
||||||
|
if not manufacturer and len(lines) >= 2:
|
||||||
|
manufacturer = lines[1]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"name": system_name,
|
||||||
|
"manufacturer": manufacturer,
|
||||||
|
"site": site,
|
||||||
|
"country": country,
|
||||||
|
"detail_url": detail_url,
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse_detail_response(self, html: str) -> Dict[str, Any]:
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
detail_table = soup.find("table", {"class": "table table-condensed"})
|
||||||
|
if not detail_table:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
detail_map: Dict[str, Any] = {}
|
||||||
|
label_aliases = {
|
||||||
|
"Site": "site",
|
||||||
|
"Manufacturer": "manufacturer",
|
||||||
|
"Cores": "cores",
|
||||||
|
"Processor": "processor",
|
||||||
|
"Interconnect": "interconnect",
|
||||||
|
"Installation Year": "installation_year",
|
||||||
|
"Linpack Performance (Rmax)": "rmax",
|
||||||
|
"Theoretical Peak (Rpeak)": "rpeak",
|
||||||
|
"Nmax": "nmax",
|
||||||
|
"HPCG": "hpcg",
|
||||||
|
"Power": "power",
|
||||||
|
"Power Measurement Level": "power_measurement_level",
|
||||||
|
"Operating System": "operating_system",
|
||||||
|
"Compiler": "compiler",
|
||||||
|
"Math Library": "math_library",
|
||||||
|
"MPI": "mpi",
|
||||||
|
}
|
||||||
|
|
||||||
|
for row in detail_table.find_all("tr"):
|
||||||
|
header = row.find("th")
|
||||||
|
value_cell = row.find("td")
|
||||||
|
if not header or not value_cell:
|
||||||
|
continue
|
||||||
|
|
||||||
|
label = header.get_text(" ", strip=True).rstrip(":")
|
||||||
|
key = label_aliases.get(label)
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
|
||||||
|
value = value_cell.get_text(" ", strip=True)
|
||||||
|
detail_map[key] = value
|
||||||
|
|
||||||
|
return detail_map
|
||||||
|
|
||||||
def parse_response(self, html: str) -> List[Dict[str, Any]]:
|
def parse_response(self, html: str) -> List[Dict[str, Any]]:
|
||||||
"""Parse TOP500 HTML response"""
|
"""Parse TOP500 HTML response"""
|
||||||
@@ -36,27 +130,26 @@ class TOP500Collector(BaseCollector):
|
|||||||
soup = BeautifulSoup(html, "html.parser")
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
# Find the table with TOP500 data
|
# Find the table with TOP500 data
|
||||||
table = soup.find("table", {"class": "top500-table"})
|
table = None
|
||||||
if not table:
|
for candidate in soup.find_all("table"):
|
||||||
# Try alternative table selector
|
header_cells = [
|
||||||
table = soup.find("table", {"id": "top500"})
|
cell.get_text(" ", strip=True) for cell in candidate.select("thead th")
|
||||||
|
]
|
||||||
if not table:
|
normalized_headers = [header.lower() for header in header_cells]
|
||||||
# Try to find any table with rank data
|
if (
|
||||||
tables = soup.find_all("table")
|
"rank" in normalized_headers
|
||||||
for t in tables:
|
and "system" in normalized_headers
|
||||||
if t.find(string=re.compile(r"Rank.*System.*Cores.*Rmax", re.I)):
|
and any("cores" in header for header in normalized_headers)
|
||||||
table = t
|
and any("rmax" in header for header in normalized_headers)
|
||||||
|
):
|
||||||
|
table = candidate
|
||||||
break
|
break
|
||||||
|
|
||||||
if not table:
|
if not table:
|
||||||
# Fallback: try to extract data from any table
|
table = soup.find("table", {"class": "top500-table"}) or soup.find("table", {"id": "top500"})
|
||||||
tables = soup.find_all("table")
|
|
||||||
if tables:
|
|
||||||
table = tables[0]
|
|
||||||
|
|
||||||
if table:
|
if table:
|
||||||
rows = table.find_all("tr")
|
rows = table.select("tr")
|
||||||
for row in rows[1:]: # Skip header row
|
for row in rows[1:]: # Skip header row
|
||||||
cells = row.find_all(["td", "th"])
|
cells = row.find_all(["td", "th"])
|
||||||
if len(cells) >= 6:
|
if len(cells) >= 6:
|
||||||
@@ -68,43 +161,26 @@ class TOP500Collector(BaseCollector):
|
|||||||
|
|
||||||
rank = int(rank_text)
|
rank = int(rank_text)
|
||||||
|
|
||||||
# System name (may contain link)
|
|
||||||
system_cell = cells[1]
|
system_cell = cells[1]
|
||||||
system_name = system_cell.get_text(strip=True)
|
system_fields = self._extract_system_fields(system_cell)
|
||||||
# Try to get full name from link title or data attribute
|
system_name = system_fields["name"]
|
||||||
link = system_cell.find("a")
|
manufacturer = system_fields["manufacturer"]
|
||||||
if link and link.get("title"):
|
site = system_fields["site"]
|
||||||
system_name = link.get("title")
|
country = system_fields["country"]
|
||||||
|
detail_url = system_fields["detail_url"]
|
||||||
|
|
||||||
# Country
|
|
||||||
country_cell = cells[2]
|
|
||||||
country = country_cell.get_text(strip=True)
|
|
||||||
# Try to get country from data attribute or image alt
|
|
||||||
img = country_cell.find("img")
|
|
||||||
if img and img.get("alt"):
|
|
||||||
country = img.get("alt")
|
|
||||||
|
|
||||||
# Extract location (city)
|
|
||||||
city = ""
|
city = ""
|
||||||
location_text = country_cell.get_text(strip=True)
|
cores = cells[2].get_text(strip=True).replace(",", "")
|
||||||
if "(" in location_text and ")" in location_text:
|
|
||||||
city = location_text.split("(")[0].strip()
|
|
||||||
|
|
||||||
# Cores
|
rmax_text = cells[3].get_text(strip=True)
|
||||||
cores = cells[3].get_text(strip=True).replace(",", "")
|
|
||||||
|
|
||||||
# Rmax
|
|
||||||
rmax_text = cells[4].get_text(strip=True)
|
|
||||||
rmax = self._parse_performance(rmax_text)
|
rmax = self._parse_performance(rmax_text)
|
||||||
|
|
||||||
# Rpeak
|
rpeak_text = cells[4].get_text(strip=True)
|
||||||
rpeak_text = cells[5].get_text(strip=True)
|
|
||||||
rpeak = self._parse_performance(rpeak_text)
|
rpeak = self._parse_performance(rpeak_text)
|
||||||
|
|
||||||
# Power (optional)
|
|
||||||
power = ""
|
power = ""
|
||||||
if len(cells) >= 7:
|
if len(cells) >= 6:
|
||||||
power = cells[6].get_text(strip=True)
|
power = cells[5].get_text(strip=True).replace(",", "")
|
||||||
|
|
||||||
entry = {
|
entry = {
|
||||||
"source_id": f"top500_{rank}",
|
"source_id": f"top500_{rank}",
|
||||||
@@ -117,10 +193,14 @@ class TOP500Collector(BaseCollector):
|
|||||||
"unit": "PFlop/s",
|
"unit": "PFlop/s",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"rank": rank,
|
"rank": rank,
|
||||||
"r_peak": rpeak,
|
|
||||||
"power": power,
|
|
||||||
"cores": cores,
|
"cores": cores,
|
||||||
|
"rmax": rmax_text,
|
||||||
|
"rpeak": rpeak_text,
|
||||||
|
"power": power,
|
||||||
|
"manufacturer": manufacturer,
|
||||||
|
"site": site,
|
||||||
},
|
},
|
||||||
|
"_detail_url": detail_url,
|
||||||
"reference_date": "2025-11-01",
|
"reference_date": "2025-11-01",
|
||||||
}
|
}
|
||||||
data.append(entry)
|
data.append(entry)
|
||||||
@@ -184,10 +264,15 @@ class TOP500Collector(BaseCollector):
|
|||||||
"unit": "PFlop/s",
|
"unit": "PFlop/s",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"rank": 1,
|
"rank": 1,
|
||||||
"r_peak": 2746.38,
|
"cores": "11039616",
|
||||||
"power": 29581,
|
"rmax": "1742.00",
|
||||||
"cores": 11039616,
|
"rpeak": "2746.38",
|
||||||
|
"power": "29581",
|
||||||
"manufacturer": "HPE",
|
"manufacturer": "HPE",
|
||||||
|
"site": "DOE/NNSA/LLNL",
|
||||||
|
"processor": "AMD 4th Gen EPYC 24C 1.8GHz",
|
||||||
|
"interconnect": "Slingshot-11",
|
||||||
|
"installation_year": "2025",
|
||||||
},
|
},
|
||||||
"reference_date": "2025-11-01",
|
"reference_date": "2025-11-01",
|
||||||
},
|
},
|
||||||
@@ -202,10 +287,12 @@ class TOP500Collector(BaseCollector):
|
|||||||
"unit": "PFlop/s",
|
"unit": "PFlop/s",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"rank": 2,
|
"rank": 2,
|
||||||
"r_peak": 2055.72,
|
"cores": "9066176",
|
||||||
"power": 24607,
|
"rmax": "1353.00",
|
||||||
"cores": 9066176,
|
"rpeak": "2055.72",
|
||||||
|
"power": "24607",
|
||||||
"manufacturer": "HPE",
|
"manufacturer": "HPE",
|
||||||
|
"site": "DOE/SC/Oak Ridge National Laboratory",
|
||||||
},
|
},
|
||||||
"reference_date": "2025-11-01",
|
"reference_date": "2025-11-01",
|
||||||
},
|
},
|
||||||
@@ -220,9 +307,10 @@ class TOP500Collector(BaseCollector):
|
|||||||
"unit": "PFlop/s",
|
"unit": "PFlop/s",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"rank": 3,
|
"rank": 3,
|
||||||
"r_peak": 1980.01,
|
"cores": "9264128",
|
||||||
"power": 38698,
|
"rmax": "1012.00",
|
||||||
"cores": 9264128,
|
"rpeak": "1980.01",
|
||||||
|
"power": "38698",
|
||||||
"manufacturer": "Intel",
|
"manufacturer": "Intel",
|
||||||
},
|
},
|
||||||
"reference_date": "2025-11-01",
|
"reference_date": "2025-11-01",
|
||||||
|
|||||||
@@ -1,15 +1,18 @@
|
|||||||
"""Task Scheduler for running collection jobs"""
|
"""Task Scheduler for running collection jobs."""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import UTC, datetime, timedelta
|
||||||
from typing import Dict, Any
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
from apscheduler.triggers.interval import IntervalTrigger
|
from apscheduler.triggers.interval import IntervalTrigger
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy import select
|
||||||
|
|
||||||
from app.db.session import async_session_factory
|
from app.db.session import async_session_factory
|
||||||
|
from app.core.time import to_iso8601_utc
|
||||||
|
from app.models.datasource import DataSource
|
||||||
|
from app.models.task import CollectionTask
|
||||||
from app.services.collectors.registry import collector_registry
|
from app.services.collectors.registry import collector_registry
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -17,132 +20,185 @@ logger = logging.getLogger(__name__)
|
|||||||
scheduler = AsyncIOScheduler()
|
scheduler = AsyncIOScheduler()
|
||||||
|
|
||||||
|
|
||||||
COLLECTOR_TO_ID = {
|
async def _update_next_run_at(datasource: DataSource, session) -> None:
|
||||||
"top500": 1,
|
job = scheduler.get_job(datasource.source)
|
||||||
"epoch_ai_gpu": 2,
|
datasource.next_run_at = job.next_run_time if job else None
|
||||||
"huggingface_models": 3,
|
await session.commit()
|
||||||
"huggingface_datasets": 4,
|
|
||||||
"huggingface_spaces": 5,
|
|
||||||
"peeringdb_ixp": 6,
|
async def _apply_datasource_schedule(datasource: DataSource, session) -> None:
|
||||||
"peeringdb_network": 7,
|
collector = collector_registry.get(datasource.source)
|
||||||
"peeringdb_facility": 8,
|
if not collector:
|
||||||
"telegeography_cables": 9,
|
logger.warning("Collector not found for datasource %s", datasource.source)
|
||||||
"telegeography_landing": 10,
|
return
|
||||||
"telegeography_systems": 11,
|
|
||||||
"arcgis_cables": 15,
|
collector_registry.set_active(datasource.source, datasource.is_active)
|
||||||
"fao_landing_points": 16,
|
|
||||||
}
|
existing_job = scheduler.get_job(datasource.source)
|
||||||
|
if existing_job:
|
||||||
|
scheduler.remove_job(datasource.source)
|
||||||
|
|
||||||
|
if datasource.is_active:
|
||||||
|
scheduler.add_job(
|
||||||
|
run_collector_task,
|
||||||
|
trigger=IntervalTrigger(minutes=max(1, datasource.frequency_minutes)),
|
||||||
|
id=datasource.source,
|
||||||
|
name=datasource.name,
|
||||||
|
replace_existing=True,
|
||||||
|
kwargs={"collector_name": datasource.source},
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Scheduled collector: %s (every %sm)",
|
||||||
|
datasource.source,
|
||||||
|
datasource.frequency_minutes,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info("Collector disabled: %s", datasource.source)
|
||||||
|
|
||||||
|
await _update_next_run_at(datasource, session)
|
||||||
|
|
||||||
|
|
||||||
async def run_collector_task(collector_name: str):
|
async def run_collector_task(collector_name: str):
|
||||||
"""Run a single collector task"""
|
"""Run a single collector task."""
|
||||||
collector = collector_registry.get(collector_name)
|
collector = collector_registry.get(collector_name)
|
||||||
if not collector:
|
if not collector:
|
||||||
logger.error(f"Collector not found: {collector_name}")
|
logger.error("Collector not found: %s", collector_name)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Get the correct datasource_id
|
async with async_session_factory() as db:
|
||||||
datasource_id = COLLECTOR_TO_ID.get(collector_name, 1)
|
result = await db.execute(select(DataSource).where(DataSource.source == collector_name))
|
||||||
|
datasource = result.scalar_one_or_none()
|
||||||
|
if not datasource:
|
||||||
|
logger.error("Datasource not found for collector: %s", collector_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not datasource.is_active:
|
||||||
|
logger.info("Skipping disabled collector: %s", collector_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
collector._datasource_id = datasource.id
|
||||||
|
logger.info("Running collector: %s (datasource_id=%s)", collector_name, datasource.id)
|
||||||
|
task_result = await collector.run(db)
|
||||||
|
datasource.last_run_at = datetime.now(UTC)
|
||||||
|
datasource.last_status = task_result.get("status")
|
||||||
|
await _update_next_run_at(datasource, db)
|
||||||
|
logger.info("Collector %s completed: %s", collector_name, task_result)
|
||||||
|
except Exception as exc:
|
||||||
|
datasource.last_run_at = datetime.now(UTC)
|
||||||
|
datasource.last_status = "failed"
|
||||||
|
await db.commit()
|
||||||
|
logger.exception("Collector %s failed: %s", collector_name, exc)
|
||||||
|
|
||||||
|
|
||||||
|
async def cleanup_stale_running_tasks(max_age_hours: int = 2) -> int:
|
||||||
|
"""Mark stale running tasks as failed after restarts or collector hangs."""
|
||||||
|
cutoff = datetime.now(UTC) - timedelta(hours=max_age_hours)
|
||||||
|
|
||||||
async with async_session_factory() as db:
|
async with async_session_factory() as db:
|
||||||
try:
|
result = await db.execute(
|
||||||
# Set the datasource_id on the collector instance
|
select(CollectionTask).where(
|
||||||
collector._datasource_id = datasource_id
|
CollectionTask.status == "running",
|
||||||
|
CollectionTask.started_at.is_not(None),
|
||||||
logger.info(f"Running collector: {collector_name} (datasource_id={datasource_id})")
|
CollectionTask.started_at < cutoff,
|
||||||
result = await collector.run(db)
|
|
||||||
logger.info(f"Collector {collector_name} completed: {result}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Collector {collector_name} failed: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def start_scheduler():
|
|
||||||
"""Start the scheduler with all registered collectors"""
|
|
||||||
collectors = collector_registry.all()
|
|
||||||
|
|
||||||
for name, collector in collectors.items():
|
|
||||||
if collector_registry.is_active(name):
|
|
||||||
scheduler.add_job(
|
|
||||||
run_collector_task,
|
|
||||||
trigger=IntervalTrigger(hours=collector.frequency_hours),
|
|
||||||
id=name,
|
|
||||||
name=name,
|
|
||||||
replace_existing=True,
|
|
||||||
kwargs={"collector_name": name},
|
|
||||||
)
|
)
|
||||||
logger.info(f"Scheduled collector: {name} (every {collector.frequency_hours}h)")
|
)
|
||||||
|
stale_tasks = result.scalars().all()
|
||||||
|
|
||||||
|
for task in stale_tasks:
|
||||||
|
task.status = "failed"
|
||||||
|
task.phase = "failed"
|
||||||
|
task.completed_at = datetime.now(UTC)
|
||||||
|
existing_error = (task.error_message or "").strip()
|
||||||
|
cleanup_error = "Marked failed automatically after stale running task cleanup"
|
||||||
|
task.error_message = f"{existing_error}\n{cleanup_error}".strip() if existing_error else cleanup_error
|
||||||
|
|
||||||
|
if stale_tasks:
|
||||||
|
await db.commit()
|
||||||
|
logger.warning("Cleaned up %s stale running collection task(s)", len(stale_tasks))
|
||||||
|
|
||||||
|
return len(stale_tasks)
|
||||||
|
|
||||||
|
|
||||||
|
def start_scheduler() -> None:
|
||||||
|
"""Start the scheduler."""
|
||||||
|
if not scheduler.running:
|
||||||
scheduler.start()
|
scheduler.start()
|
||||||
logger.info("Scheduler started")
|
logger.info("Scheduler started")
|
||||||
|
|
||||||
|
|
||||||
def stop_scheduler():
|
def stop_scheduler() -> None:
|
||||||
"""Stop the scheduler"""
|
"""Stop the scheduler."""
|
||||||
scheduler.shutdown()
|
if scheduler.running:
|
||||||
|
scheduler.shutdown(wait=False)
|
||||||
logger.info("Scheduler stopped")
|
logger.info("Scheduler stopped")
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_scheduler_with_datasources() -> None:
|
||||||
|
"""Synchronize scheduler jobs with datasource table."""
|
||||||
|
async with async_session_factory() as db:
|
||||||
|
result = await db.execute(select(DataSource).order_by(DataSource.id))
|
||||||
|
datasources = result.scalars().all()
|
||||||
|
|
||||||
|
configured_sources = {datasource.source for datasource in datasources}
|
||||||
|
for job in list(scheduler.get_jobs()):
|
||||||
|
if job.id not in configured_sources:
|
||||||
|
scheduler.remove_job(job.id)
|
||||||
|
|
||||||
|
for datasource in datasources:
|
||||||
|
await _apply_datasource_schedule(datasource, db)
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_datasource_job(datasource_id: int) -> bool:
|
||||||
|
"""Synchronize a single datasource job after settings changes."""
|
||||||
|
async with async_session_factory() as db:
|
||||||
|
datasource = await db.get(DataSource, datasource_id)
|
||||||
|
if not datasource:
|
||||||
|
return False
|
||||||
|
|
||||||
|
await _apply_datasource_schedule(datasource, db)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def get_scheduler_jobs() -> list[Dict[str, Any]]:
|
def get_scheduler_jobs() -> list[Dict[str, Any]]:
|
||||||
"""Get all scheduled jobs"""
|
"""Get all scheduled jobs."""
|
||||||
jobs = []
|
jobs = []
|
||||||
for job in scheduler.get_jobs():
|
for job in scheduler.get_jobs():
|
||||||
jobs.append(
|
jobs.append(
|
||||||
{
|
{
|
||||||
"id": job.id,
|
"id": job.id,
|
||||||
"name": job.name,
|
"name": job.name,
|
||||||
"next_run_time": job.next_run_time.isoformat() if job.next_run_time else None,
|
"next_run_time": to_iso8601_utc(job.next_run_time),
|
||||||
"trigger": str(job.trigger),
|
"trigger": str(job.trigger),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return jobs
|
return jobs
|
||||||
|
|
||||||
|
|
||||||
def add_job(collector_name: str, hours: int = 4):
|
async def get_latest_task_id_for_datasource(datasource_id: int) -> Optional[int]:
|
||||||
"""Add a new scheduled job"""
|
from app.models.task import CollectionTask
|
||||||
collector = collector_registry.get(collector_name)
|
|
||||||
if not collector:
|
|
||||||
raise ValueError(f"Collector not found: {collector_name}")
|
|
||||||
|
|
||||||
scheduler.add_job(
|
async with async_session_factory() as db:
|
||||||
run_collector_task,
|
result = await db.execute(
|
||||||
trigger=IntervalTrigger(hours=hours),
|
select(CollectionTask.id)
|
||||||
id=collector_name,
|
.where(CollectionTask.datasource_id == datasource_id)
|
||||||
name=collector_name,
|
.order_by(CollectionTask.created_at.desc(), CollectionTask.id.desc())
|
||||||
replace_existing=True,
|
.limit(1)
|
||||||
kwargs={"collector_name": collector_name},
|
|
||||||
)
|
)
|
||||||
logger.info(f"Added scheduled job: {collector_name} (every {hours}h)")
|
return result.scalar_one_or_none()
|
||||||
|
|
||||||
|
|
||||||
def remove_job(collector_name: str):
|
|
||||||
"""Remove a scheduled job"""
|
|
||||||
scheduler.remove_job(collector_name)
|
|
||||||
logger.info(f"Removed scheduled job: {collector_name}")
|
|
||||||
|
|
||||||
|
|
||||||
def pause_job(collector_name: str):
|
|
||||||
"""Pause a scheduled job"""
|
|
||||||
scheduler.pause_job(collector_name)
|
|
||||||
logger.info(f"Paused job: {collector_name}")
|
|
||||||
|
|
||||||
|
|
||||||
def resume_job(collector_name: str):
|
|
||||||
"""Resume a scheduled job"""
|
|
||||||
scheduler.resume_job(collector_name)
|
|
||||||
logger.info(f"Resumed job: {collector_name}")
|
|
||||||
|
|
||||||
|
|
||||||
def run_collector_now(collector_name: str) -> bool:
|
def run_collector_now(collector_name: str) -> bool:
|
||||||
"""Run a collector immediately (not scheduled)"""
|
"""Run a collector immediately (not scheduled)."""
|
||||||
collector = collector_registry.get(collector_name)
|
collector = collector_registry.get(collector_name)
|
||||||
if not collector:
|
if not collector:
|
||||||
logger.error(f"Collector not found: {collector_name}")
|
logger.error("Collector not found: %s", collector_name)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
asyncio.create_task(run_collector_task(collector_name))
|
asyncio.create_task(run_collector_task(collector_name))
|
||||||
logger.info(f"Triggered collector: {collector_name}")
|
logger.info("Triggered collector: %s", collector_name)
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as exc:
|
||||||
logger.error(f"Failed to trigger collector {collector_name}: {e}")
|
logger.error("Failed to trigger collector %s: %s", collector_name, exc)
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -16,3 +16,4 @@ email-validator
|
|||||||
apscheduler>=3.10.4
|
apscheduler>=3.10.4
|
||||||
pytest>=7.4.0
|
pytest>=7.4.0
|
||||||
pytest-asyncio>=0.23.0
|
pytest-asyncio>=0.23.0
|
||||||
|
networkx>=3.0
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user