feat(backend): Add cable graph service and data collectors

## Changelog

### New Features

#### Cable Graph Service
- Add cable_graph.py for finding shortest path between landing points
- Implement haversine distance calculation for great circle distances
- Support for dateline crossing (longitude normalization)
- NetworkX-based graph for optimal path finding

#### Data Collectors
- Add ArcGISCableCollector for fetching submarine cable data from ArcGIS GeoJSON API
- Add FAOLandingPointCollector for fetching landing point data from FAO CSV API

### Backend Changes

#### API Updates
- auth.py: Update authentication logic
- datasources.py: Add datasource endpoints and management
- visualization.py: Add visualization API endpoints
- config.py: Update configuration settings
- security.py: Improve security settings

#### Models & Schemas
- task.py: Update task model with new fields
- token.py: Update token schema

#### Services
- collectors/base.py: Improve base collector with better error handling
- collectors/__init__.py: Register new collectors
- scheduler.py: Update scheduler logic
- tasks/scheduler.py: Add task scheduling

### Frontend Changes
- AppLayout.tsx: Improve layout component
- index.css: Add global styles
- DataSources.tsx: Enhance data sources management page
- vite.config.ts: Add Vite configuration for earth module
This commit is contained in:
rayd1o
2026-03-11 16:38:49 +08:00
parent 6cb4398f3a
commit aaae6a53c3
18 changed files with 990 additions and 146 deletions

View File

@@ -0,0 +1,239 @@
"""Cable graph service for finding shortest path between landing points"""
import math
from typing import List, Dict, Any, Optional, Tuple
import networkx as nx
def normalize_longitude(lon: float) -> float:
"""Normalize longitude to -180 to 180 range"""
while lon > 180:
lon -= 360
while lon < -180:
lon += 360
return lon
def haversine_distance(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
"""Calculate great circle distance between two points in km, handling dateline crossing"""
lon1, lat1 = normalize_longitude(coord1[0]), coord1[1]
lon2, lat2 = normalize_longitude(coord2[0]), coord2[1]
R = 6371
lat1_rad = math.radians(lat1)
lat2_rad = math.radians(lat2)
delta_lat = math.radians(lat2 - lat1)
delta_lon = math.radians(lon2 - lon1)
a = (
math.sin(delta_lat / 2) ** 2
+ math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2) ** 2
)
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
return R * c
class CableGraph:
def __init__(self, cables: List[Dict], landing_points: List[Dict]):
self.graph = nx.Graph()
self.landing_points = {lp["id"]: lp for lp in landing_points}
self.point_coords = {lp["id"]: (lp["lon"], lp["lat"]) for lp in landing_points}
self._build_graph(cables)
def _build_graph(self, cables: List[Dict]):
"""Build graph from cables and landing points"""
for cable in cables:
coords = cable.get("coordinates", [])
if len(coords) < 2:
continue
# Find nearest landing points for start and end (search more points)
start_point = self._find_nearest_landing_point_multi(coords[:3]) # First 3 points
end_point = self._find_nearest_landing_point_multi(coords[-3:]) # Last 3 points
if start_point and end_point and start_point != end_point:
# Calculate distance via cable route
distance = self._calculate_cable_distance(coords)
# Add edge with cable info
edge_data = {
"distance": distance,
"cable_name": cable.get("name", "Unknown"),
"cable_id": cable.get("id"),
"coordinates": coords,
}
# If edge exists, keep the shorter one
if self.graph.has_edge(start_point, end_point):
existing_dist = self.graph[start_point][end_point]["distance"]
if distance < existing_dist:
self.graph[start_point][end_point].update(edge_data)
else:
self.graph.add_edge(start_point, end_point, **edge_data)
def _find_nearest_landing_point_multi(self, coords_subset: List[List[float]]) -> Optional[int]:
"""Find nearest landing point from multiple coordinates (e.g., first/last N points)"""
best_point = None
best_dist = float("inf")
for coord in coords_subset:
point = self._find_nearest_landing_point(coord)
if point:
dist = haversine_distance(
(normalize_longitude(coord[0]), coord[1]), self.point_coords[point]
)
if dist < best_dist:
best_dist = dist
best_point = point
return best_point
def _find_nearest_landing_point(self, coord: List[float]) -> Optional[int]:
"""Find nearest landing point to given coordinate"""
if not self.point_coords:
return None
min_dist = float("inf")
nearest_id = None
target_lon = normalize_longitude(coord[0])
target_lat = coord[1]
for lp_id, (lon, lat) in self.point_coords.items():
dist = haversine_distance((target_lon, target_lat), (lon, lat))
if dist < min_dist:
min_dist = dist
nearest_id = lp_id
return nearest_id if min_dist < 500 else None
def _find_nearest_connected_landing_point(self, coord: List[float]) -> Optional[int]:
"""Find nearest landing point that's connected to the graph, handling dateline"""
if not self.point_coords or not self.graph.nodes():
return None
connected_nodes = set(self.graph.nodes())
min_dist = float("inf")
nearest_id = None
target_lon, target_lat = normalize_longitude(coord[0]), coord[1]
for lp_id in connected_nodes:
lp_lon, lp_lat = self.point_coords[lp_id]
# Try both normalized versions (for points near dateline)
dist = haversine_distance((target_lon, target_lat), (lp_lon, lp_lat))
if dist < min_dist:
min_dist = dist
nearest_id = lp_id
return nearest_id if min_dist < 500 else None
def _calculate_cable_distance(self, coordinates: List[List[float]]) -> float:
"""Calculate total distance along cable route"""
total = 0
for i in range(len(coordinates) - 1):
total += haversine_distance(
(coordinates[i][0], coordinates[i][1]),
(coordinates[i + 1][0], coordinates[i + 1][1]),
)
return total
def find_shortest_path(
self, start_coords: List[float], end_coords: List[float]
) -> Optional[Dict[str, Any]]:
"""Find shortest path between two coordinates"""
start_point = self._find_nearest_connected_landing_point(start_coords)
end_point = self._find_nearest_connected_landing_point(end_coords)
if not start_point or not end_point:
return None
if not nx.has_path(self.graph, start_point, end_point):
return None
try:
path = nx.shortest_path(self.graph, start_point, end_point, weight="distance")
except nx.NetworkXNoPath:
return None
if not nx.has_path(self.graph, start_point, end_point):
return None
try:
path = nx.shortest_path(self.graph, start_point, end_point, weight="distance")
except nx.NetworkXNoPath:
return None
# Build result
total_distance = 0
path_segments = []
for i in range(len(path) - 1):
u, v = path[i], path[i + 1]
edge_data = self.graph[u][v]
total_distance += edge_data["distance"]
path_segments.append(
{
"from": self.landing_points[u],
"to": self.landing_points[v],
"cable_name": edge_data["cable_name"],
"cable_id": edge_data["cable_id"],
"distance_km": round(edge_data["distance"], 2),
"coordinates": edge_data["coordinates"],
}
)
return {
"start": {
"id": start_point,
"name": self.landing_points[start_point].get("name", "Unknown"),
"coords": list(self.point_coords[start_point]),
},
"end": {
"id": end_point,
"name": self.landing_points[end_point].get("name", "Unknown"),
"coords": list(self.point_coords[end_point]),
},
"total_distance_km": round(total_distance, 2),
"segments": path_segments,
"segment_count": len(path_segments),
}
def build_graph_from_data(cables_data: Dict, points_data: Dict) -> CableGraph:
"""Build cable graph from GeoJSON data"""
cables = []
for feature in cables_data.get("features", []):
props = feature.get("properties", {})
coords = feature.get("geometry", {}).get("coordinates", [])
if coords and isinstance(coords[0], list):
coords = coords[0] # MultiLineString - take first line
cables.append(
{
"id": props.get("id"),
"name": props.get("name", props.get("Name", "Unknown")),
"coordinates": coords,
}
)
points = []
for feature in points_data.get("features", []):
geom = feature.get("geometry", {})
props = feature.get("properties", {})
coords = geom.get("coordinates", [])
if coords and len(coords) >= 2:
points.append(
{
"id": props.get("id"),
"name": props.get("name", "Unknown"),
"lon": coords[0],
"lat": coords[1],
}
)
return CableGraph(cables, points)

View File

@@ -24,6 +24,8 @@ from app.services.collectors.cloudflare import (
CloudflareRadarTrafficCollector,
CloudflareRadarTopASCollector,
)
from app.services.collectors.arcgis_cables import ArcGISCableCollector
from app.services.collectors.fao_landing import FAOLandingPointCollector
collector_registry.register(TOP500Collector())
collector_registry.register(EpochAIGPUCollector())
@@ -39,3 +41,5 @@ collector_registry.register(TeleGeographyCableSystemCollector())
collector_registry.register(CloudflareRadarDeviceCollector())
collector_registry.register(CloudflareRadarTrafficCollector())
collector_registry.register(CloudflareRadarTopASCollector())
collector_registry.register(ArcGISCableCollector())
collector_registry.register(FAOLandingPointCollector())

View File

@@ -0,0 +1,84 @@
"""ArcGIS Submarine Cables Collector
Collects submarine cable data from ArcGIS GeoJSON API.
"""
import json
from typing import Dict, Any, List
from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
class ArcGISCableCollector(BaseCollector):
name = "arcgis_cables"
priority = "P1"
module = "L2"
frequency_hours = 168
data_type = "submarine_cable"
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/2/query"
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(self.base_url, params=params)
response.raise_for_status()
return self.parse_response(response.json())
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
result = []
features = data.get("features", [])
for feature in features:
props = feature.get("properties", {})
geometry = feature.get("geometry", {})
route_coordinates = []
if geometry.get("type") == "MultiLineString":
coords = geometry.get("coordinates", [])
for line in coords:
line_coords = []
for point in line:
if len(point) >= 2:
line_coords.append(point)
if line_coords:
route_coordinates.append(line_coords)
elif geometry.get("type") == "LineString":
coords = geometry.get("coordinates", [])
line_coords = []
for point in coords:
if len(point) >= 2:
line_coords.append(point)
if line_coords:
route_coordinates.append(line_coords)
try:
entry = {
"source_id": f"arcgis_cable_{props.get('cable_id', props.get('OBJECTID', ''))}",
"name": props.get("Name", "Unknown"),
"country": "",
"city": "",
"latitude": "",
"longitude": "",
"value": str(props.get("length", "")).replace(",", ""),
"unit": "km",
"metadata": {
"cable_id": props.get("cable_id"),
"owners": props.get("owners"),
"rfs": props.get("rfs"),
"status": "active",
"year": props.get("year"),
"url": props.get("url"),
"color": props.get("color"),
"route_coordinates": route_coordinates,
},
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
}
result.append(entry)
except (ValueError, TypeError, KeyError):
continue
return result

View File

@@ -17,7 +17,20 @@ class BaseCollector(ABC):
priority: str = "P1"
module: str = "L1"
frequency_hours: int = 4
data_type: str = "generic" # Override in subclass: "supercomputer", "model", "dataset", etc.
data_type: str = "generic"
def __init__(self):
self._current_task = None
self._db_session = None
self._datasource_id = 1
def update_progress(self, records_processed: int):
"""Update task progress - call this during data processing"""
if self._current_task and self._db_session and self._current_task.total_records > 0:
self._current_task.records_processed = records_processed
self._current_task.progress = (
records_processed / self._current_task.total_records
) * 100
@abstractmethod
async def fetch(self) -> List[Dict[str, Any]]:
@@ -35,13 +48,11 @@ class BaseCollector(ABC):
from app.models.collected_data import CollectedData
start_time = datetime.utcnow()
datasource_id = getattr(self, "_datasource_id", 1) # Default to 1 for built-in collectors
datasource_id = getattr(self, "_datasource_id", 1)
# Check if collector is active
if not collector_registry.is_active(self.name):
return {"status": "skipped", "reason": "Collector is disabled"}
# Log task start
task = CollectionTask(
datasource_id=datasource_id,
status="running",
@@ -51,16 +62,21 @@ class BaseCollector(ABC):
await db.commit()
task_id = task.id
self._current_task = task
self._db_session = db
try:
raw_data = await self.fetch()
task.total_records = len(raw_data)
await db.commit()
data = self.transform(raw_data)
# Save data to database
records_count = await self._save_data(db, data)
# Log task success
task.status = "success"
task.records_processed = records_count
task.progress = 100.0
task.completed_at = datetime.utcnow()
await db.commit()
@@ -94,8 +110,7 @@ class BaseCollector(ABC):
collected_at = datetime.utcnow()
records_added = 0
for item in data:
# Create CollectedData entry
for i, item in enumerate(data):
record = CollectedData(
source=self.name,
source_id=item.get("source_id") or item.get("id"),
@@ -125,7 +140,12 @@ class BaseCollector(ABC):
db.add(record)
records_added += 1
if i % 100 == 0:
self.update_progress(i + 1)
await db.commit()
await db.commit()
self.update_progress(len(data))
return records_added
async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:

View File

@@ -0,0 +1,66 @@
"""FAO Landing Points Collector
Collects landing point data from FAO CSV API.
"""
from typing import Dict, Any, List
from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
class FAOLandingPointCollector(BaseCollector):
name = "fao_landing_points"
priority = "P1"
module = "L2"
frequency_hours = 168
data_type = "landing_point"
csv_url = "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
async def fetch(self) -> List[Dict[str, Any]]:
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(self.csv_url)
response.raise_for_status()
return self.parse_csv(response.text)
def parse_csv(self, csv_text: str) -> List[Dict[str, Any]]:
result = []
lines = csv_text.strip().split("\n")
if not lines:
return result
for line in lines[1:]:
if not line.strip():
continue
parts = line.split(",")
if len(parts) >= 4:
try:
lon = float(parts[0])
lat = float(parts[1])
feature_id = parts[2]
name = parts[3].strip('"')
is_tbd = parts[4].strip() == "true" if len(parts) > 4 else False
entry = {
"source_id": f"fao_lp_{feature_id}",
"name": name,
"country": "",
"city": "",
"latitude": str(lat),
"longitude": str(lon),
"value": "",
"unit": "",
"metadata": {
"is_tbd": is_tbd,
"original_id": feature_id,
},
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
}
result.append(entry)
except (ValueError, IndexError):
continue
return result

View File

@@ -29,6 +29,8 @@ COLLECTOR_TO_ID = {
"telegeography_cables": 9,
"telegeography_landing": 10,
"telegeography_systems": 11,
"arcgis_cables": 15,
"fao_landing_points": 16,
}