feat(backend): Add cable graph service and data collectors
## Changelog ### New Features #### Cable Graph Service - Add cable_graph.py for finding shortest path between landing points - Implement haversine distance calculation for great circle distances - Support for dateline crossing (longitude normalization) - NetworkX-based graph for optimal path finding #### Data Collectors - Add ArcGISCableCollector for fetching submarine cable data from ArcGIS GeoJSON API - Add FAOLandingPointCollector for fetching landing point data from FAO CSV API ### Backend Changes #### API Updates - auth.py: Update authentication logic - datasources.py: Add datasource endpoints and management - visualization.py: Add visualization API endpoints - config.py: Update configuration settings - security.py: Improve security settings #### Models & Schemas - task.py: Update task model with new fields - token.py: Update token schema #### Services - collectors/base.py: Improve base collector with better error handling - collectors/__init__.py: Register new collectors - scheduler.py: Update scheduler logic - tasks/scheduler.py: Add task scheduling ### Frontend Changes - AppLayout.tsx: Improve layout component - index.css: Add global styles - DataSources.tsx: Enhance data sources management page - vite.config.ts: Add Vite configuration for earth module
This commit is contained in:
239
backend/app/services/cable_graph.py
Normal file
239
backend/app/services/cable_graph.py
Normal file
@@ -0,0 +1,239 @@
|
||||
"""Cable graph service for finding shortest path between landing points"""
|
||||
|
||||
import math
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
import networkx as nx
|
||||
|
||||
|
||||
def normalize_longitude(lon: float) -> float:
|
||||
"""Normalize longitude to -180 to 180 range"""
|
||||
while lon > 180:
|
||||
lon -= 360
|
||||
while lon < -180:
|
||||
lon += 360
|
||||
return lon
|
||||
|
||||
|
||||
def haversine_distance(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
|
||||
"""Calculate great circle distance between two points in km, handling dateline crossing"""
|
||||
lon1, lat1 = normalize_longitude(coord1[0]), coord1[1]
|
||||
lon2, lat2 = normalize_longitude(coord2[0]), coord2[1]
|
||||
|
||||
R = 6371
|
||||
|
||||
lat1_rad = math.radians(lat1)
|
||||
lat2_rad = math.radians(lat2)
|
||||
delta_lat = math.radians(lat2 - lat1)
|
||||
delta_lon = math.radians(lon2 - lon1)
|
||||
|
||||
a = (
|
||||
math.sin(delta_lat / 2) ** 2
|
||||
+ math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(delta_lon / 2) ** 2
|
||||
)
|
||||
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
||||
|
||||
return R * c
|
||||
|
||||
|
||||
class CableGraph:
|
||||
def __init__(self, cables: List[Dict], landing_points: List[Dict]):
|
||||
self.graph = nx.Graph()
|
||||
self.landing_points = {lp["id"]: lp for lp in landing_points}
|
||||
self.point_coords = {lp["id"]: (lp["lon"], lp["lat"]) for lp in landing_points}
|
||||
self._build_graph(cables)
|
||||
|
||||
def _build_graph(self, cables: List[Dict]):
|
||||
"""Build graph from cables and landing points"""
|
||||
for cable in cables:
|
||||
coords = cable.get("coordinates", [])
|
||||
if len(coords) < 2:
|
||||
continue
|
||||
|
||||
# Find nearest landing points for start and end (search more points)
|
||||
start_point = self._find_nearest_landing_point_multi(coords[:3]) # First 3 points
|
||||
end_point = self._find_nearest_landing_point_multi(coords[-3:]) # Last 3 points
|
||||
|
||||
if start_point and end_point and start_point != end_point:
|
||||
# Calculate distance via cable route
|
||||
distance = self._calculate_cable_distance(coords)
|
||||
|
||||
# Add edge with cable info
|
||||
edge_data = {
|
||||
"distance": distance,
|
||||
"cable_name": cable.get("name", "Unknown"),
|
||||
"cable_id": cable.get("id"),
|
||||
"coordinates": coords,
|
||||
}
|
||||
|
||||
# If edge exists, keep the shorter one
|
||||
if self.graph.has_edge(start_point, end_point):
|
||||
existing_dist = self.graph[start_point][end_point]["distance"]
|
||||
if distance < existing_dist:
|
||||
self.graph[start_point][end_point].update(edge_data)
|
||||
else:
|
||||
self.graph.add_edge(start_point, end_point, **edge_data)
|
||||
|
||||
def _find_nearest_landing_point_multi(self, coords_subset: List[List[float]]) -> Optional[int]:
|
||||
"""Find nearest landing point from multiple coordinates (e.g., first/last N points)"""
|
||||
best_point = None
|
||||
best_dist = float("inf")
|
||||
|
||||
for coord in coords_subset:
|
||||
point = self._find_nearest_landing_point(coord)
|
||||
if point:
|
||||
dist = haversine_distance(
|
||||
(normalize_longitude(coord[0]), coord[1]), self.point_coords[point]
|
||||
)
|
||||
if dist < best_dist:
|
||||
best_dist = dist
|
||||
best_point = point
|
||||
|
||||
return best_point
|
||||
|
||||
def _find_nearest_landing_point(self, coord: List[float]) -> Optional[int]:
|
||||
"""Find nearest landing point to given coordinate"""
|
||||
if not self.point_coords:
|
||||
return None
|
||||
|
||||
min_dist = float("inf")
|
||||
nearest_id = None
|
||||
|
||||
target_lon = normalize_longitude(coord[0])
|
||||
target_lat = coord[1]
|
||||
|
||||
for lp_id, (lon, lat) in self.point_coords.items():
|
||||
dist = haversine_distance((target_lon, target_lat), (lon, lat))
|
||||
if dist < min_dist:
|
||||
min_dist = dist
|
||||
nearest_id = lp_id
|
||||
|
||||
return nearest_id if min_dist < 500 else None
|
||||
|
||||
def _find_nearest_connected_landing_point(self, coord: List[float]) -> Optional[int]:
|
||||
"""Find nearest landing point that's connected to the graph, handling dateline"""
|
||||
if not self.point_coords or not self.graph.nodes():
|
||||
return None
|
||||
|
||||
connected_nodes = set(self.graph.nodes())
|
||||
min_dist = float("inf")
|
||||
nearest_id = None
|
||||
|
||||
target_lon, target_lat = normalize_longitude(coord[0]), coord[1]
|
||||
|
||||
for lp_id in connected_nodes:
|
||||
lp_lon, lp_lat = self.point_coords[lp_id]
|
||||
# Try both normalized versions (for points near dateline)
|
||||
dist = haversine_distance((target_lon, target_lat), (lp_lon, lp_lat))
|
||||
if dist < min_dist:
|
||||
min_dist = dist
|
||||
nearest_id = lp_id
|
||||
|
||||
return nearest_id if min_dist < 500 else None
|
||||
|
||||
def _calculate_cable_distance(self, coordinates: List[List[float]]) -> float:
|
||||
"""Calculate total distance along cable route"""
|
||||
total = 0
|
||||
for i in range(len(coordinates) - 1):
|
||||
total += haversine_distance(
|
||||
(coordinates[i][0], coordinates[i][1]),
|
||||
(coordinates[i + 1][0], coordinates[i + 1][1]),
|
||||
)
|
||||
return total
|
||||
|
||||
def find_shortest_path(
|
||||
self, start_coords: List[float], end_coords: List[float]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Find shortest path between two coordinates"""
|
||||
start_point = self._find_nearest_connected_landing_point(start_coords)
|
||||
end_point = self._find_nearest_connected_landing_point(end_coords)
|
||||
|
||||
if not start_point or not end_point:
|
||||
return None
|
||||
|
||||
if not nx.has_path(self.graph, start_point, end_point):
|
||||
return None
|
||||
|
||||
try:
|
||||
path = nx.shortest_path(self.graph, start_point, end_point, weight="distance")
|
||||
except nx.NetworkXNoPath:
|
||||
return None
|
||||
|
||||
if not nx.has_path(self.graph, start_point, end_point):
|
||||
return None
|
||||
|
||||
try:
|
||||
path = nx.shortest_path(self.graph, start_point, end_point, weight="distance")
|
||||
except nx.NetworkXNoPath:
|
||||
return None
|
||||
|
||||
# Build result
|
||||
total_distance = 0
|
||||
path_segments = []
|
||||
|
||||
for i in range(len(path) - 1):
|
||||
u, v = path[i], path[i + 1]
|
||||
edge_data = self.graph[u][v]
|
||||
total_distance += edge_data["distance"]
|
||||
|
||||
path_segments.append(
|
||||
{
|
||||
"from": self.landing_points[u],
|
||||
"to": self.landing_points[v],
|
||||
"cable_name": edge_data["cable_name"],
|
||||
"cable_id": edge_data["cable_id"],
|
||||
"distance_km": round(edge_data["distance"], 2),
|
||||
"coordinates": edge_data["coordinates"],
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"start": {
|
||||
"id": start_point,
|
||||
"name": self.landing_points[start_point].get("name", "Unknown"),
|
||||
"coords": list(self.point_coords[start_point]),
|
||||
},
|
||||
"end": {
|
||||
"id": end_point,
|
||||
"name": self.landing_points[end_point].get("name", "Unknown"),
|
||||
"coords": list(self.point_coords[end_point]),
|
||||
},
|
||||
"total_distance_km": round(total_distance, 2),
|
||||
"segments": path_segments,
|
||||
"segment_count": len(path_segments),
|
||||
}
|
||||
|
||||
|
||||
def build_graph_from_data(cables_data: Dict, points_data: Dict) -> CableGraph:
|
||||
"""Build cable graph from GeoJSON data"""
|
||||
cables = []
|
||||
for feature in cables_data.get("features", []):
|
||||
props = feature.get("properties", {})
|
||||
coords = feature.get("geometry", {}).get("coordinates", [])
|
||||
if coords and isinstance(coords[0], list):
|
||||
coords = coords[0] # MultiLineString - take first line
|
||||
|
||||
cables.append(
|
||||
{
|
||||
"id": props.get("id"),
|
||||
"name": props.get("name", props.get("Name", "Unknown")),
|
||||
"coordinates": coords,
|
||||
}
|
||||
)
|
||||
|
||||
points = []
|
||||
for feature in points_data.get("features", []):
|
||||
geom = feature.get("geometry", {})
|
||||
props = feature.get("properties", {})
|
||||
coords = geom.get("coordinates", [])
|
||||
|
||||
if coords and len(coords) >= 2:
|
||||
points.append(
|
||||
{
|
||||
"id": props.get("id"),
|
||||
"name": props.get("name", "Unknown"),
|
||||
"lon": coords[0],
|
||||
"lat": coords[1],
|
||||
}
|
||||
)
|
||||
|
||||
return CableGraph(cables, points)
|
||||
@@ -24,6 +24,8 @@ from app.services.collectors.cloudflare import (
|
||||
CloudflareRadarTrafficCollector,
|
||||
CloudflareRadarTopASCollector,
|
||||
)
|
||||
from app.services.collectors.arcgis_cables import ArcGISCableCollector
|
||||
from app.services.collectors.fao_landing import FAOLandingPointCollector
|
||||
|
||||
collector_registry.register(TOP500Collector())
|
||||
collector_registry.register(EpochAIGPUCollector())
|
||||
@@ -39,3 +41,5 @@ collector_registry.register(TeleGeographyCableSystemCollector())
|
||||
collector_registry.register(CloudflareRadarDeviceCollector())
|
||||
collector_registry.register(CloudflareRadarTrafficCollector())
|
||||
collector_registry.register(CloudflareRadarTopASCollector())
|
||||
collector_registry.register(ArcGISCableCollector())
|
||||
collector_registry.register(FAOLandingPointCollector())
|
||||
|
||||
84
backend/app/services/collectors/arcgis_cables.py
Normal file
84
backend/app/services/collectors/arcgis_cables.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""ArcGIS Submarine Cables Collector
|
||||
|
||||
Collects submarine cable data from ArcGIS GeoJSON API.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
import httpx
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
|
||||
|
||||
class ArcGISCableCollector(BaseCollector):
|
||||
name = "arcgis_cables"
|
||||
priority = "P1"
|
||||
module = "L2"
|
||||
frequency_hours = 168
|
||||
data_type = "submarine_cable"
|
||||
|
||||
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/2/query"
|
||||
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.get(self.base_url, params=params)
|
||||
response.raise_for_status()
|
||||
return self.parse_response(response.json())
|
||||
|
||||
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
result = []
|
||||
|
||||
features = data.get("features", [])
|
||||
for feature in features:
|
||||
props = feature.get("properties", {})
|
||||
geometry = feature.get("geometry", {})
|
||||
|
||||
route_coordinates = []
|
||||
if geometry.get("type") == "MultiLineString":
|
||||
coords = geometry.get("coordinates", [])
|
||||
for line in coords:
|
||||
line_coords = []
|
||||
for point in line:
|
||||
if len(point) >= 2:
|
||||
line_coords.append(point)
|
||||
if line_coords:
|
||||
route_coordinates.append(line_coords)
|
||||
elif geometry.get("type") == "LineString":
|
||||
coords = geometry.get("coordinates", [])
|
||||
line_coords = []
|
||||
for point in coords:
|
||||
if len(point) >= 2:
|
||||
line_coords.append(point)
|
||||
if line_coords:
|
||||
route_coordinates.append(line_coords)
|
||||
|
||||
try:
|
||||
entry = {
|
||||
"source_id": f"arcgis_cable_{props.get('cable_id', props.get('OBJECTID', ''))}",
|
||||
"name": props.get("Name", "Unknown"),
|
||||
"country": "",
|
||||
"city": "",
|
||||
"latitude": "",
|
||||
"longitude": "",
|
||||
"value": str(props.get("length", "")).replace(",", ""),
|
||||
"unit": "km",
|
||||
"metadata": {
|
||||
"cable_id": props.get("cable_id"),
|
||||
"owners": props.get("owners"),
|
||||
"rfs": props.get("rfs"),
|
||||
"status": "active",
|
||||
"year": props.get("year"),
|
||||
"url": props.get("url"),
|
||||
"color": props.get("color"),
|
||||
"route_coordinates": route_coordinates,
|
||||
},
|
||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
||||
}
|
||||
result.append(entry)
|
||||
except (ValueError, TypeError, KeyError):
|
||||
continue
|
||||
|
||||
return result
|
||||
@@ -17,7 +17,20 @@ class BaseCollector(ABC):
|
||||
priority: str = "P1"
|
||||
module: str = "L1"
|
||||
frequency_hours: int = 4
|
||||
data_type: str = "generic" # Override in subclass: "supercomputer", "model", "dataset", etc.
|
||||
data_type: str = "generic"
|
||||
|
||||
def __init__(self):
|
||||
self._current_task = None
|
||||
self._db_session = None
|
||||
self._datasource_id = 1
|
||||
|
||||
def update_progress(self, records_processed: int):
|
||||
"""Update task progress - call this during data processing"""
|
||||
if self._current_task and self._db_session and self._current_task.total_records > 0:
|
||||
self._current_task.records_processed = records_processed
|
||||
self._current_task.progress = (
|
||||
records_processed / self._current_task.total_records
|
||||
) * 100
|
||||
|
||||
@abstractmethod
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
@@ -35,13 +48,11 @@ class BaseCollector(ABC):
|
||||
from app.models.collected_data import CollectedData
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
datasource_id = getattr(self, "_datasource_id", 1) # Default to 1 for built-in collectors
|
||||
datasource_id = getattr(self, "_datasource_id", 1)
|
||||
|
||||
# Check if collector is active
|
||||
if not collector_registry.is_active(self.name):
|
||||
return {"status": "skipped", "reason": "Collector is disabled"}
|
||||
|
||||
# Log task start
|
||||
task = CollectionTask(
|
||||
datasource_id=datasource_id,
|
||||
status="running",
|
||||
@@ -51,16 +62,21 @@ class BaseCollector(ABC):
|
||||
await db.commit()
|
||||
task_id = task.id
|
||||
|
||||
self._current_task = task
|
||||
self._db_session = db
|
||||
|
||||
try:
|
||||
raw_data = await self.fetch()
|
||||
task.total_records = len(raw_data)
|
||||
await db.commit()
|
||||
|
||||
data = self.transform(raw_data)
|
||||
|
||||
# Save data to database
|
||||
records_count = await self._save_data(db, data)
|
||||
|
||||
# Log task success
|
||||
task.status = "success"
|
||||
task.records_processed = records_count
|
||||
task.progress = 100.0
|
||||
task.completed_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
|
||||
@@ -94,8 +110,7 @@ class BaseCollector(ABC):
|
||||
collected_at = datetime.utcnow()
|
||||
records_added = 0
|
||||
|
||||
for item in data:
|
||||
# Create CollectedData entry
|
||||
for i, item in enumerate(data):
|
||||
record = CollectedData(
|
||||
source=self.name,
|
||||
source_id=item.get("source_id") or item.get("id"),
|
||||
@@ -125,7 +140,12 @@ class BaseCollector(ABC):
|
||||
db.add(record)
|
||||
records_added += 1
|
||||
|
||||
if i % 100 == 0:
|
||||
self.update_progress(i + 1)
|
||||
await db.commit()
|
||||
|
||||
await db.commit()
|
||||
self.update_progress(len(data))
|
||||
return records_added
|
||||
|
||||
async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:
|
||||
|
||||
66
backend/app/services/collectors/fao_landing.py
Normal file
66
backend/app/services/collectors/fao_landing.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""FAO Landing Points Collector
|
||||
|
||||
Collects landing point data from FAO CSV API.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
import httpx
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
|
||||
|
||||
class FAOLandingPointCollector(BaseCollector):
|
||||
name = "fao_landing_points"
|
||||
priority = "P1"
|
||||
module = "L2"
|
||||
frequency_hours = 168
|
||||
data_type = "landing_point"
|
||||
|
||||
csv_url = "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
|
||||
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.get(self.csv_url)
|
||||
response.raise_for_status()
|
||||
return self.parse_csv(response.text)
|
||||
|
||||
def parse_csv(self, csv_text: str) -> List[Dict[str, Any]]:
|
||||
result = []
|
||||
|
||||
lines = csv_text.strip().split("\n")
|
||||
if not lines:
|
||||
return result
|
||||
|
||||
for line in lines[1:]:
|
||||
if not line.strip():
|
||||
continue
|
||||
parts = line.split(",")
|
||||
if len(parts) >= 4:
|
||||
try:
|
||||
lon = float(parts[0])
|
||||
lat = float(parts[1])
|
||||
feature_id = parts[2]
|
||||
name = parts[3].strip('"')
|
||||
is_tbd = parts[4].strip() == "true" if len(parts) > 4 else False
|
||||
|
||||
entry = {
|
||||
"source_id": f"fao_lp_{feature_id}",
|
||||
"name": name,
|
||||
"country": "",
|
||||
"city": "",
|
||||
"latitude": str(lat),
|
||||
"longitude": str(lon),
|
||||
"value": "",
|
||||
"unit": "",
|
||||
"metadata": {
|
||||
"is_tbd": is_tbd,
|
||||
"original_id": feature_id,
|
||||
},
|
||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
||||
}
|
||||
result.append(entry)
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
return result
|
||||
@@ -29,6 +29,8 @@ COLLECTOR_TO_ID = {
|
||||
"telegeography_cables": 9,
|
||||
"telegeography_landing": 10,
|
||||
"telegeography_systems": 11,
|
||||
"arcgis_cables": 15,
|
||||
"fao_landing_points": 16,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user