feat(backend): Add cable graph service and data collectors

## Changelog

### New Features

#### Cable Graph Service
- Add cable_graph.py for finding shortest path between landing points
- Implement haversine distance calculation for great circle distances
- Support for dateline crossing (longitude normalization)
- NetworkX-based graph for optimal path finding

#### Data Collectors
- Add ArcGISCableCollector for fetching submarine cable data from ArcGIS GeoJSON API
- Add FAOLandingPointCollector for fetching landing point data from FAO CSV API

### Backend Changes

#### API Updates
- auth.py: Update authentication logic
- datasources.py: Add datasource endpoints and management
- visualization.py: Add visualization API endpoints
- config.py: Update configuration settings
- security.py: Improve security settings

#### Models & Schemas
- task.py: Update task model with new fields
- token.py: Update token schema

#### Services
- collectors/base.py: Improve base collector with better error handling
- collectors/__init__.py: Register new collectors
- scheduler.py: Update scheduler logic
- tasks/scheduler.py: Add task scheduling

### Frontend Changes
- AppLayout.tsx: Improve layout component
- index.css: Add global styles
- DataSources.tsx: Enhance data sources management page
- vite.config.ts: Add Vite configuration for earth module
This commit is contained in:
rayd1o
2026-03-11 16:38:49 +08:00
parent 6cb4398f3a
commit aaae6a53c3
18 changed files with 990 additions and 146 deletions

View File

@@ -24,6 +24,8 @@ from app.services.collectors.cloudflare import (
CloudflareRadarTrafficCollector,
CloudflareRadarTopASCollector,
)
from app.services.collectors.arcgis_cables import ArcGISCableCollector
from app.services.collectors.fao_landing import FAOLandingPointCollector
collector_registry.register(TOP500Collector())
collector_registry.register(EpochAIGPUCollector())
@@ -39,3 +41,5 @@ collector_registry.register(TeleGeographyCableSystemCollector())
collector_registry.register(CloudflareRadarDeviceCollector())
collector_registry.register(CloudflareRadarTrafficCollector())
collector_registry.register(CloudflareRadarTopASCollector())
collector_registry.register(ArcGISCableCollector())
collector_registry.register(FAOLandingPointCollector())

View File

@@ -0,0 +1,84 @@
"""ArcGIS Submarine Cables Collector
Collects submarine cable data from ArcGIS GeoJSON API.
"""
import json
from typing import Dict, Any, List
from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
class ArcGISCableCollector(BaseCollector):
name = "arcgis_cables"
priority = "P1"
module = "L2"
frequency_hours = 168
data_type = "submarine_cable"
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/2/query"
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(self.base_url, params=params)
response.raise_for_status()
return self.parse_response(response.json())
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
result = []
features = data.get("features", [])
for feature in features:
props = feature.get("properties", {})
geometry = feature.get("geometry", {})
route_coordinates = []
if geometry.get("type") == "MultiLineString":
coords = geometry.get("coordinates", [])
for line in coords:
line_coords = []
for point in line:
if len(point) >= 2:
line_coords.append(point)
if line_coords:
route_coordinates.append(line_coords)
elif geometry.get("type") == "LineString":
coords = geometry.get("coordinates", [])
line_coords = []
for point in coords:
if len(point) >= 2:
line_coords.append(point)
if line_coords:
route_coordinates.append(line_coords)
try:
entry = {
"source_id": f"arcgis_cable_{props.get('cable_id', props.get('OBJECTID', ''))}",
"name": props.get("Name", "Unknown"),
"country": "",
"city": "",
"latitude": "",
"longitude": "",
"value": str(props.get("length", "")).replace(",", ""),
"unit": "km",
"metadata": {
"cable_id": props.get("cable_id"),
"owners": props.get("owners"),
"rfs": props.get("rfs"),
"status": "active",
"year": props.get("year"),
"url": props.get("url"),
"color": props.get("color"),
"route_coordinates": route_coordinates,
},
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
}
result.append(entry)
except (ValueError, TypeError, KeyError):
continue
return result

View File

@@ -17,7 +17,20 @@ class BaseCollector(ABC):
priority: str = "P1"
module: str = "L1"
frequency_hours: int = 4
data_type: str = "generic" # Override in subclass: "supercomputer", "model", "dataset", etc.
data_type: str = "generic"
def __init__(self):
self._current_task = None
self._db_session = None
self._datasource_id = 1
def update_progress(self, records_processed: int):
"""Update task progress - call this during data processing"""
if self._current_task and self._db_session and self._current_task.total_records > 0:
self._current_task.records_processed = records_processed
self._current_task.progress = (
records_processed / self._current_task.total_records
) * 100
@abstractmethod
async def fetch(self) -> List[Dict[str, Any]]:
@@ -35,13 +48,11 @@ class BaseCollector(ABC):
from app.models.collected_data import CollectedData
start_time = datetime.utcnow()
datasource_id = getattr(self, "_datasource_id", 1) # Default to 1 for built-in collectors
datasource_id = getattr(self, "_datasource_id", 1)
# Check if collector is active
if not collector_registry.is_active(self.name):
return {"status": "skipped", "reason": "Collector is disabled"}
# Log task start
task = CollectionTask(
datasource_id=datasource_id,
status="running",
@@ -51,16 +62,21 @@ class BaseCollector(ABC):
await db.commit()
task_id = task.id
self._current_task = task
self._db_session = db
try:
raw_data = await self.fetch()
task.total_records = len(raw_data)
await db.commit()
data = self.transform(raw_data)
# Save data to database
records_count = await self._save_data(db, data)
# Log task success
task.status = "success"
task.records_processed = records_count
task.progress = 100.0
task.completed_at = datetime.utcnow()
await db.commit()
@@ -94,8 +110,7 @@ class BaseCollector(ABC):
collected_at = datetime.utcnow()
records_added = 0
for item in data:
# Create CollectedData entry
for i, item in enumerate(data):
record = CollectedData(
source=self.name,
source_id=item.get("source_id") or item.get("id"),
@@ -125,7 +140,12 @@ class BaseCollector(ABC):
db.add(record)
records_added += 1
if i % 100 == 0:
self.update_progress(i + 1)
await db.commit()
await db.commit()
self.update_progress(len(data))
return records_added
async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:

View File

@@ -0,0 +1,66 @@
"""FAO Landing Points Collector
Collects landing point data from FAO CSV API.
"""
from typing import Dict, Any, List
from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
class FAOLandingPointCollector(BaseCollector):
name = "fao_landing_points"
priority = "P1"
module = "L2"
frequency_hours = 168
data_type = "landing_point"
csv_url = "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
async def fetch(self) -> List[Dict[str, Any]]:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(self.csv_url)
response.raise_for_status()
return self.parse_csv(response.text)
def parse_csv(self, csv_text: str) -> List[Dict[str, Any]]:
result = []
lines = csv_text.strip().split("\n")
if not lines:
return result
for line in lines[1:]:
if not line.strip():
continue
parts = line.split(",")
if len(parts) >= 4:
try:
lon = float(parts[0])
lat = float(parts[1])
feature_id = parts[2]
name = parts[3].strip('"')
is_tbd = parts[4].strip() == "true" if len(parts) > 4 else False
entry = {
"source_id": f"fao_lp_{feature_id}",
"name": name,
"country": "",
"city": "",
"latitude": str(lat),
"longitude": str(lon),
"value": "",
"unit": "",
"metadata": {
"is_tbd": is_tbd,
"original_id": feature_id,
},
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
}
result.append(entry)
except (ValueError, IndexError):
continue
return result