feat: add data sources config system and Earth API integration

- Add data_sources.yaml for configurable data source URLs
- Add data_sources.py to load config with database override support
- Add arcgis_landing_points and arcgis_cable_landing_relation collectors
- Change visualization API to query arcgis_landing_points
- Add /api/v1/datasources/configs/all endpoint
- Update Earth to fetch from API instead of static files
- Fix scheduler collector ID mappings
This commit is contained in:
rayd1o
2026-03-13 10:54:02 +08:00
parent 99771a88c5
commit de32552159
25 changed files with 222 additions and 23 deletions

View File

@@ -27,10 +27,6 @@ class Settings(BaseSettings):
CORS_ORIGINS: List[str] = ["http://localhost:3000", "http://localhost:8000"]
ARCGIS_CABLE_URL: str = "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/2/query"
ARCGIS_LANDING_POINT_URL: str = "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/1/query"
ARCGIS_CABLE_LANDING_RELATION_URL: str = "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/3/query"
@property
def REDIS_URL(self) -> str:
return os.getenv(

View File

@@ -0,0 +1,78 @@
import os
import yaml
from functools import lru_cache
from typing import Optional
COLLECTOR_URL_KEYS = {
"arcgis_cables": "arcgis.cable_url",
"arcgis_landing_points": "arcgis.landing_point_url",
"arcgis_cable_landing_relation": "arcgis.cable_landing_relation_url",
"fao_landing_points": "fao.landing_point_url",
"telegeography_cables": "telegeography.cable_url",
"telegeography_landing": "telegeography.landing_point_url",
"huggingface_models": "huggingface.models_url",
"huggingface_datasets": "huggingface.datasets_url",
"huggingface_spaces": "huggingface.spaces_url",
"cloudflare_radar_device": "cloudflare.radar_device_url",
"cloudflare_radar_traffic": "cloudflare.radar_traffic_url",
"cloudflare_radar_top_locations": "cloudflare.radar_top_locations_url",
"peeringdb_ixp": "peeringdb.ixp_url",
"peeringdb_network": "peeringdb.network_url",
"peeringdb_facility": "peeringdb.facility_url",
"top500": "top500.url",
"epoch_ai_gpu": "epoch_ai.gpu_clusters_url",
}
class DataSourcesConfig:
def __init__(self, config_path: str = None):
if config_path is None:
config_path = os.path.join(os.path.dirname(__file__), "data_sources.yaml")
self._yaml_config = {}
if os.path.exists(config_path):
with open(config_path, "r") as f:
self._yaml_config = yaml.safe_load(f) or {}
def get_yaml_url(self, collector_name: str) -> str:
key = COLLECTOR_URL_KEYS.get(collector_name, "")
if not key:
return ""
parts = key.split(".")
value = self._yaml_config
for part in parts:
if isinstance(value, dict):
value = value.get(part, "")
else:
return ""
return value if isinstance(value, str) else ""
async def get_url(self, collector_name: str, db) -> str:
yaml_url = self.get_yaml_url(collector_name)
if not db:
return yaml_url
try:
from sqlalchemy import select
from app.models.datasource_config import DataSourceConfig
query = select(DataSourceConfig).where(
DataSourceConfig.name == collector_name, DataSourceConfig.is_active == True
)
result = await db.execute(query)
db_config = result.scalar_one_or_none()
if db_config and db_config.endpoint:
return db_config.endpoint
except Exception:
pass
return yaml_url
@lru_cache()
def get_data_sources_config() -> DataSourcesConfig:
return DataSourcesConfig()

View File

@@ -0,0 +1,35 @@
# Data Sources Configuration
# All external data source URLs should be configured here
arcgis:
cable_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/2/query"
landing_point_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/1/query"
cable_landing_relation_url: "https://services.arcgis.com/6DIQcwlPy8knb6sg/ArcGIS/rest/services/SubmarineCables/FeatureServer/3/query"
fao:
landing_point_url: "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
telegeography:
cable_url: "https://raw.githubusercontent.com/lintaojlu/submarine_cable_information/main/cable.json"
landing_point_url: "https://raw.githubusercontent.com/lintaojlu/submarine_cable_information/main/landing_point.json"
huggingface:
models_url: "https://huggingface.co/api/models"
datasets_url: "https://huggingface.co/api/datasets"
spaces_url: "https://huggingface.co/api/spaces"
cloudflare:
radar_device_url: "https://api.cloudflare.com/client/v4/radar/http/summary/device_type"
radar_traffic_url: "https://api.cloudflare.com/client/v4/radar/http/timeseries/requests"
radar_top_locations_url: "https://api.cloudflare.com/client/v4/radar/http/top/locations"
peeringdb:
ixp_url: "https://www.peeringdb.com/api/ix"
network_url: "https://www.peeringdb.com/api/net"
facility_url: "https://www.peeringdb.com/api/fac"
top500:
url: "https://top500.org/lists/top500/list/2025/11/"
epoch_ai:
gpu_clusters_url: "https://epoch.ai/data/gpu-clusters"

View File

@@ -7,6 +7,7 @@ from typing import Dict, Any, Optional
from app.core.websocket.manager import manager
class DataBroadcaster:
"""Periodically broadcasts data to connected WebSocket clients"""