feat: add data sources config system and Earth API integration

- Add data_sources.yaml for configurable data source URLs
- Add data_sources.py to load config with database override support
- Add arcgis_landing_points and arcgis_cable_landing_relation collectors
- Change visualization API to query arcgis_landing_points
- Add /api/v1/datasources/configs/all endpoint
- Update Earth to fetch from API instead of static files
- Fix scheduler collector ID mappings
This commit is contained in:
rayd1o
2026-03-13 10:54:02 +08:00
parent 99771a88c5
commit de32552159
25 changed files with 222 additions and 23 deletions

View File

@@ -10,6 +10,7 @@ from app.models.user import User
from app.core.security import get_current_user
from app.models.alert import Alert, AlertSeverity, AlertStatus
router = APIRouter()

View File

@@ -14,6 +14,7 @@ from app.models.task import CollectionTask
from app.core.security import get_current_user
from app.core.cache import cache
# Built-in collectors info (mirrored from datasources.py)
COLLECTOR_INFO = {
"top500": {

View File

@@ -307,3 +307,40 @@ async def test_new_config(
"error": "Connection failed",
"message": str(e),
}
@router.get("/configs/all")
async def list_all_datasources(
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""List all data sources: YAML defaults + DB overrides"""
from app.core.data_sources import COLLECTOR_URL_KEYS, get_data_sources_config
config = get_data_sources_config()
db_query = await db.execute(select(DataSourceConfig))
db_configs = {c.name: c for c in db_query.scalars().all()}
result = []
for name, yaml_key in COLLECTOR_URL_KEYS.items():
yaml_url = config.get_yaml_url(name)
db_config = db_configs.get(name)
result.append(
{
"name": name,
"default_url": yaml_url,
"endpoint": db_config.endpoint if db_config else yaml_url,
"is_overridden": db_config is not None and db_config.endpoint != yaml_url
if yaml_url
else db_config is not None,
"is_active": db_config.is_active if db_config else True,
"source_type": db_config.source_type if db_config else "http",
"description": db_config.description
if db_config
else f"Data source from YAML: {yaml_key}",
}
)
return {"total": len(result), "data": result}

View File

@@ -99,8 +99,22 @@ COLLECTOR_INFO = {
"priority": "P1",
"frequency_hours": 168,
},
"fao_landing_points": {
"arcgis_landing_points": {
"id": 16,
"name": "ArcGIS Landing Points",
"module": "L2",
"priority": "P1",
"frequency_hours": 168,
},
"arcgis_cable_landing_relation": {
"id": 17,
"name": "ArcGIS Cable-Landing Relations",
"module": "L2",
"priority": "P1",
"frequency_hours": 168,
},
"fao_landing_points": {
"id": 18,
"name": "FAO Landing Points",
"module": "L2",
"priority": "P1",

View File

@@ -10,6 +10,7 @@ from app.models.user import User
from app.core.security import get_current_user
from app.services.collectors.registry import collector_registry
router = APIRouter()

View File

@@ -146,14 +146,14 @@ async def get_cables_geojson(db: AsyncSession = Depends(get_db)):
async def get_landing_points_geojson(db: AsyncSession = Depends(get_db)):
"""获取登陆点 GeoJSON 数据 (Point)"""
try:
stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points")
stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
result = await db.execute(stmt)
records = result.scalars().all()
if not records:
raise HTTPException(
status_code=404,
detail="No landing point data found. Please run the fao_landing_points collector first.",
detail="No landing point data found. Please run the arcgis_landing_points collector first.",
)
return convert_landing_point_to_geojson(records)
@@ -170,7 +170,7 @@ async def get_all_geojson(db: AsyncSession = Depends(get_db)):
cables_result = await db.execute(cables_stmt)
cables_records = cables_result.scalars().all()
points_stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points")
points_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
points_result = await db.execute(points_stmt)
points_records = points_result.scalars().all()
@@ -208,7 +208,7 @@ async def get_cable_graph(db: AsyncSession) -> CableGraph:
cables_result = await db.execute(cables_stmt)
cables_records = list(cables_result.scalars().all())
points_stmt = select(CollectedData).where(CollectedData.source == "fao_landing_points")
points_stmt = select(CollectedData).where(CollectedData.source == "arcgis_landing_points")
points_result = await db.execute(points_stmt)
points_records = list(points_result.scalars().all())

View File

@@ -27,10 +27,6 @@ class Settings(BaseSettings):
CORS_ORIGINS: List[str] = ["http://localhost:3000", "http://localhost:8000"]
ARCGIS_CABLE_URL: str = "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/2/query"
ARCGIS_LANDING_POINT_URL: str = "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/1/query"
ARCGIS_CABLE_LANDING_RELATION_URL: str = "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/3/query"
@property
def REDIS_URL(self) -> str:
return os.getenv(

View File

@@ -0,0 +1,78 @@
import os
import yaml
from functools import lru_cache
from typing import Optional
COLLECTOR_URL_KEYS = {
"arcgis_cables": "arcgis.cable_url",
"arcgis_landing_points": "arcgis.landing_point_url",
"arcgis_cable_landing_relation": "arcgis.cable_landing_relation_url",
"fao_landing_points": "fao.landing_point_url",
"telegeography_cables": "telegeography.cable_url",
"telegeography_landing": "telegeography.landing_point_url",
"huggingface_models": "huggingface.models_url",
"huggingface_datasets": "huggingface.datasets_url",
"huggingface_spaces": "huggingface.spaces_url",
"cloudflare_radar_device": "cloudflare.radar_device_url",
"cloudflare_radar_traffic": "cloudflare.radar_traffic_url",
"cloudflare_radar_top_locations": "cloudflare.radar_top_locations_url",
"peeringdb_ixp": "peeringdb.ixp_url",
"peeringdb_network": "peeringdb.network_url",
"peeringdb_facility": "peeringdb.facility_url",
"top500": "top500.url",
"epoch_ai_gpu": "epoch_ai.gpu_clusters_url",
}
class DataSourcesConfig:
def __init__(self, config_path: str = None):
if config_path is None:
config_path = os.path.join(os.path.dirname(__file__), "data_sources.yaml")
self._yaml_config = {}
if os.path.exists(config_path):
with open(config_path, "r") as f:
self._yaml_config = yaml.safe_load(f) or {}
def get_yaml_url(self, collector_name: str) -> str:
key = COLLECTOR_URL_KEYS.get(collector_name, "")
if not key:
return ""
parts = key.split(".")
value = self._yaml_config
for part in parts:
if isinstance(value, dict):
value = value.get(part, "")
else:
return ""
return value if isinstance(value, str) else ""
async def get_url(self, collector_name: str, db) -> str:
yaml_url = self.get_yaml_url(collector_name)
if not db:
return yaml_url
try:
from sqlalchemy import select
from app.models.datasource_config import DataSourceConfig
query = select(DataSourceConfig).where(
DataSourceConfig.name == collector_name, DataSourceConfig.is_active == True
)
result = await db.execute(query)
db_config = result.scalar_one_or_none()
if db_config and db_config.endpoint:
return db_config.endpoint
except Exception:
pass
return yaml_url
@lru_cache()
def get_data_sources_config() -> DataSourcesConfig:
return DataSourcesConfig()

View File

@@ -0,0 +1,35 @@
# Data Sources Configuration
# All external data source URLs should be configured here
arcgis:
cable_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/2/query"
landing_point_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/1/query"
cable_landing_relation_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/3/query"
fao:
landing_point_url: "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
telegeography:
cable_url: "https://raw.githubusercontent.com/lintaojlu/submarine_cable_information/main/cable.json"
landing_point_url: "https://raw.githubusercontent.com/lintaojlu/submarine_cable_information/main/landing_point.json"
huggingface:
models_url: "https://huggingface.co/api/models"
datasets_url: "https://huggingface.co/api/datasets"
spaces_url: "https://huggingface.co/api/spaces"
cloudflare:
radar_device_url: "https://api.cloudflare.com/client/v4/radar/http/summary/device_type"
radar_traffic_url: "https://api.cloudflare.com/client/v4/radar/http/timeseries/requests"
radar_top_locations_url: "https://api.cloudflare.com/client/v4/radar/http/top/locations"
peeringdb:
ixp_url: "https://www.peeringdb.com/api/ix"
network_url: "https://www.peeringdb.com/api/net"
facility_url: "https://www.peeringdb.com/api/fac"
top500:
url: "https://top500.org/lists/top500/list/2025/11/"
epoch_ai:
gpu_clusters_url: "https://epoch.ai/data/gpu-clusters"

View File

@@ -7,6 +7,7 @@ from typing import Dict, Any, Optional
from app.core.websocket.manager import manager
class DataBroadcaster:
"""Periodically broadcasts data to connected WebSocket clients"""

View File

@@ -9,7 +9,8 @@ from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
from app.core.config import settings
from app.core.data_sources import get_data_sources_config
class ArcGISCableCollector(BaseCollector):
@@ -21,7 +22,12 @@ class ArcGISCableCollector(BaseCollector):
@property
def base_url(self) -> str:
return settings.ARCGIS_CABLE_URL
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_cables")
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}

View File

@@ -3,7 +3,8 @@ from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
from app.core.config import settings
from app.core.data_sources import get_data_sources_config
class ArcGISLandingPointCollector(BaseCollector):
@@ -15,7 +16,12 @@ class ArcGISLandingPointCollector(BaseCollector):
@property
def base_url(self) -> str:
return settings.ARCGIS_LANDING_POINT_URL
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_landing_points")
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}

View File

@@ -3,7 +3,8 @@ from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
from app.core.config import settings
from app.core.data_sources import get_data_sources_config
class ArcGISCableLandingRelationCollector(BaseCollector):
@@ -15,7 +16,12 @@ class ArcGISCableLandingRelationCollector(BaseCollector):
@property
def base_url(self) -> str:
return settings.ARCGIS_CABLE_LANDING_RELATION_URL
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_cable_landing_relation")
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}

View File

@@ -23,6 +23,13 @@ class BaseCollector(ABC):
self._current_task = None
self._db_session = None
self._datasource_id = 1
self._resolved_url: Optional[str] = None
async def resolve_url(self, db: AsyncSession) -> None:
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
self._resolved_url = await config.get_url(self.name, db)
def update_progress(self, records_processed: int):
"""Update task progress - call this during data processing"""
@@ -65,6 +72,8 @@ class BaseCollector(ABC):
self._current_task = task
self._db_session = db
await self.resolve_url(db)
try:
raw_data = await self.fetch()
task.total_records = len(raw_data)
@@ -87,7 +96,6 @@ class BaseCollector(ABC):
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(),
}
except Exception as e:
# Log task failure
task.status = "failed"
task.error_message = str(e)
task.completed_at = datetime.utcnow()

View File

@@ -15,6 +15,7 @@ from datetime import datetime
import httpx
from app.services.collectors.base import HTTPCollector
# Cloudflare API token (optional - for higher rate limits)
CLOUDFLARE_API_TOKEN = os.environ.get("CLOUDFLARE_API_TOKEN", "")

View File

@@ -13,6 +13,7 @@ import httpx
from app.services.collectors.base import BaseCollector
class EpochAIGPUCollector(BaseCollector):
name = "epoch_ai_gpu"
priority = "P0"

View File

@@ -10,6 +10,7 @@ import httpx
from app.services.collectors.base import BaseCollector
class FAOLandingPointCollector(BaseCollector):
name = "fao_landing_points"
priority = "P1"

View File

@@ -12,6 +12,7 @@ from datetime import datetime
from app.services.collectors.base import HTTPCollector
class HuggingFaceModelCollector(HTTPCollector):
name = "huggingface_models"
priority = "P1"

View File

@@ -18,6 +18,7 @@ from datetime import datetime
import httpx
from app.services.collectors.base import HTTPCollector
# PeeringDB API key - read from environment variable
PEERINGDB_API_KEY = os.environ.get("PEERINGDB_API_KEY", "")

View File

@@ -14,6 +14,7 @@ import httpx
from app.services.collectors.base import BaseCollector
class TeleGeographyCableCollector(BaseCollector):
name = "telegeography_cables"
priority = "P1"

View File

@@ -30,7 +30,9 @@ COLLECTOR_TO_ID = {
"telegeography_landing": 10,
"telegeography_systems": 11,
"arcgis_cables": 15,
"fao_landing_points": 16,
"arcgis_landing_points": 16,
"arcgis_cable_landing_relation": 17,
"fao_landing_points": 18,
}

View File

@@ -128,7 +128,7 @@ export async function loadGeoJSONFromPath(scene, earthObj) {
console.log('正在加载电缆数据...');
showStatusMessage('正在加载电缆数据...', 'warning');
const response = await fetch(PATHS.geoJSON);
const response = await fetch(PATHS.cablesApi);
if (!response.ok) {
throw new Error(`HTTP错误: ${response.status}`);
}
@@ -161,7 +161,7 @@ export async function loadGeoJSONFromPath(scene, earthObj) {
if (!geometry || !geometry.coordinates) continue;
const color = getCableColor(properties);
console.log('电缆:', properties.Name, '颜色:', color);
console.log('电缆 properties:', JSON.stringify(properties));
if (geometry.type === 'MultiLineString') {
for (const lineCoords of geometry.coordinates) {
@@ -239,7 +239,7 @@ export async function loadLandingPoints(scene, earthObj) {
try {
console.log('正在加载登陆点数据...');
const response = await fetch('./landing-point-geo.geojson');
const response = await fetch(PATHS.landingPointsApi);
if (!response.ok) {
console.error('HTTP错误:', response.status);
return;

View File

@@ -9,9 +9,11 @@ export const CONFIG = {
rotationSpeed: 0.002,
};
// Paths
export const PATHS = {
cablesApi: '/api/v1/visualization/geo/cables',
landingPointsApi: '/api/v1/visualization/geo/landing-points',
geoJSON: './geo.json',
landingPointsStatic: './landing-point-geo.geojson',
};
// Cable colors mapping

View File

@@ -171,7 +171,6 @@ function onMouseMove(event, camera) {
c.material.opacity = 1;
});
hoveredCable = cable;
hoveredCable = cable;
}
const userData = cable.userData;

View File

@@ -1,7 +1,7 @@
function Earth() {
return (
<iframe
src="/earth/3dearthmult.html"
src="/earth/index.html"
style={{
width: '100vw',
height: '100vh',