feat(backend): Add cable graph service and data collectors
## Changelog ### New Features #### Cable Graph Service - Add cable_graph.py for finding shortest path between landing points - Implement haversine distance calculation for great circle distances - Support for dateline crossing (longitude normalization) - NetworkX-based graph for optimal path finding #### Data Collectors - Add ArcGISCableCollector for fetching submarine cable data from ArcGIS GeoJSON API - Add FAOLandingPointCollector for fetching landing point data from FAO CSV API ### Backend Changes #### API Updates - auth.py: Update authentication logic - datasources.py: Add datasource endpoints and management - visualization.py: Add visualization API endpoints - config.py: Update configuration settings - security.py: Improve security settings #### Models & Schemas - task.py: Update task model with new fields - token.py: Update token schema #### Services - collectors/base.py: Improve base collector with better error handling - collectors/__init__.py: Register new collectors - scheduler.py: Update scheduler logic - tasks/scheduler.py: Add task scheduling ### Frontend Changes - AppLayout.tsx: Improve layout component - index.css: Add global styles - DataSources.tsx: Enhance data sources management page - vite.config.ts: Add Vite configuration for earth module
This commit is contained in:
@@ -24,6 +24,8 @@ from app.services.collectors.cloudflare import (
|
||||
CloudflareRadarTrafficCollector,
|
||||
CloudflareRadarTopASCollector,
|
||||
)
|
||||
from app.services.collectors.arcgis_cables import ArcGISCableCollector
|
||||
from app.services.collectors.fao_landing import FAOLandingPointCollector
|
||||
|
||||
collector_registry.register(TOP500Collector())
|
||||
collector_registry.register(EpochAIGPUCollector())
|
||||
@@ -39,3 +41,5 @@ collector_registry.register(TeleGeographyCableSystemCollector())
|
||||
collector_registry.register(CloudflareRadarDeviceCollector())
|
||||
collector_registry.register(CloudflareRadarTrafficCollector())
|
||||
collector_registry.register(CloudflareRadarTopASCollector())
|
||||
collector_registry.register(ArcGISCableCollector())
|
||||
collector_registry.register(FAOLandingPointCollector())
|
||||
|
||||
84
backend/app/services/collectors/arcgis_cables.py
Normal file
84
backend/app/services/collectors/arcgis_cables.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""ArcGIS Submarine Cables Collector
|
||||
|
||||
Collects submarine cable data from ArcGIS GeoJSON API.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
import httpx
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
|
||||
|
||||
class ArcGISCableCollector(BaseCollector):
|
||||
name = "arcgis_cables"
|
||||
priority = "P1"
|
||||
module = "L2"
|
||||
frequency_hours = 168
|
||||
data_type = "submarine_cable"
|
||||
|
||||
base_url = "https://services.arcgis.com/6DIQcwlPy8knb6sg/arcgis/rest/services/SubmarineCables/FeatureServer/2/query"
|
||||
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.get(self.base_url, params=params)
|
||||
response.raise_for_status()
|
||||
return self.parse_response(response.json())
|
||||
|
||||
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
result = []
|
||||
|
||||
features = data.get("features", [])
|
||||
for feature in features:
|
||||
props = feature.get("properties", {})
|
||||
geometry = feature.get("geometry", {})
|
||||
|
||||
route_coordinates = []
|
||||
if geometry.get("type") == "MultiLineString":
|
||||
coords = geometry.get("coordinates", [])
|
||||
for line in coords:
|
||||
line_coords = []
|
||||
for point in line:
|
||||
if len(point) >= 2:
|
||||
line_coords.append(point)
|
||||
if line_coords:
|
||||
route_coordinates.append(line_coords)
|
||||
elif geometry.get("type") == "LineString":
|
||||
coords = geometry.get("coordinates", [])
|
||||
line_coords = []
|
||||
for point in coords:
|
||||
if len(point) >= 2:
|
||||
line_coords.append(point)
|
||||
if line_coords:
|
||||
route_coordinates.append(line_coords)
|
||||
|
||||
try:
|
||||
entry = {
|
||||
"source_id": f"arcgis_cable_{props.get('cable_id', props.get('OBJECTID', ''))}",
|
||||
"name": props.get("Name", "Unknown"),
|
||||
"country": "",
|
||||
"city": "",
|
||||
"latitude": "",
|
||||
"longitude": "",
|
||||
"value": str(props.get("length", "")).replace(",", ""),
|
||||
"unit": "km",
|
||||
"metadata": {
|
||||
"cable_id": props.get("cable_id"),
|
||||
"owners": props.get("owners"),
|
||||
"rfs": props.get("rfs"),
|
||||
"status": "active",
|
||||
"year": props.get("year"),
|
||||
"url": props.get("url"),
|
||||
"color": props.get("color"),
|
||||
"route_coordinates": route_coordinates,
|
||||
},
|
||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
||||
}
|
||||
result.append(entry)
|
||||
except (ValueError, TypeError, KeyError):
|
||||
continue
|
||||
|
||||
return result
|
||||
@@ -17,7 +17,20 @@ class BaseCollector(ABC):
|
||||
priority: str = "P1"
|
||||
module: str = "L1"
|
||||
frequency_hours: int = 4
|
||||
data_type: str = "generic" # Override in subclass: "supercomputer", "model", "dataset", etc.
|
||||
data_type: str = "generic"
|
||||
|
||||
def __init__(self):
|
||||
self._current_task = None
|
||||
self._db_session = None
|
||||
self._datasource_id = 1
|
||||
|
||||
def update_progress(self, records_processed: int):
|
||||
"""Update task progress - call this during data processing"""
|
||||
if self._current_task and self._db_session and self._current_task.total_records > 0:
|
||||
self._current_task.records_processed = records_processed
|
||||
self._current_task.progress = (
|
||||
records_processed / self._current_task.total_records
|
||||
) * 100
|
||||
|
||||
@abstractmethod
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
@@ -35,13 +48,11 @@ class BaseCollector(ABC):
|
||||
from app.models.collected_data import CollectedData
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
datasource_id = getattr(self, "_datasource_id", 1) # Default to 1 for built-in collectors
|
||||
datasource_id = getattr(self, "_datasource_id", 1)
|
||||
|
||||
# Check if collector is active
|
||||
if not collector_registry.is_active(self.name):
|
||||
return {"status": "skipped", "reason": "Collector is disabled"}
|
||||
|
||||
# Log task start
|
||||
task = CollectionTask(
|
||||
datasource_id=datasource_id,
|
||||
status="running",
|
||||
@@ -51,16 +62,21 @@ class BaseCollector(ABC):
|
||||
await db.commit()
|
||||
task_id = task.id
|
||||
|
||||
self._current_task = task
|
||||
self._db_session = db
|
||||
|
||||
try:
|
||||
raw_data = await self.fetch()
|
||||
task.total_records = len(raw_data)
|
||||
await db.commit()
|
||||
|
||||
data = self.transform(raw_data)
|
||||
|
||||
# Save data to database
|
||||
records_count = await self._save_data(db, data)
|
||||
|
||||
# Log task success
|
||||
task.status = "success"
|
||||
task.records_processed = records_count
|
||||
task.progress = 100.0
|
||||
task.completed_at = datetime.utcnow()
|
||||
await db.commit()
|
||||
|
||||
@@ -94,8 +110,7 @@ class BaseCollector(ABC):
|
||||
collected_at = datetime.utcnow()
|
||||
records_added = 0
|
||||
|
||||
for item in data:
|
||||
# Create CollectedData entry
|
||||
for i, item in enumerate(data):
|
||||
record = CollectedData(
|
||||
source=self.name,
|
||||
source_id=item.get("source_id") or item.get("id"),
|
||||
@@ -125,7 +140,12 @@ class BaseCollector(ABC):
|
||||
db.add(record)
|
||||
records_added += 1
|
||||
|
||||
if i % 100 == 0:
|
||||
self.update_progress(i + 1)
|
||||
await db.commit()
|
||||
|
||||
await db.commit()
|
||||
self.update_progress(len(data))
|
||||
return records_added
|
||||
|
||||
async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:
|
||||
|
||||
66
backend/app/services/collectors/fao_landing.py
Normal file
66
backend/app/services/collectors/fao_landing.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""FAO Landing Points Collector
|
||||
|
||||
Collects landing point data from FAO CSV API.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
import httpx
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
|
||||
|
||||
class FAOLandingPointCollector(BaseCollector):
|
||||
name = "fao_landing_points"
|
||||
priority = "P1"
|
||||
module = "L2"
|
||||
frequency_hours = 168
|
||||
data_type = "landing_point"
|
||||
|
||||
csv_url = "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
|
||||
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
response = await client.get(self.csv_url)
|
||||
response.raise_for_status()
|
||||
return self.parse_csv(response.text)
|
||||
|
||||
def parse_csv(self, csv_text: str) -> List[Dict[str, Any]]:
|
||||
result = []
|
||||
|
||||
lines = csv_text.strip().split("\n")
|
||||
if not lines:
|
||||
return result
|
||||
|
||||
for line in lines[1:]:
|
||||
if not line.strip():
|
||||
continue
|
||||
parts = line.split(",")
|
||||
if len(parts) >= 4:
|
||||
try:
|
||||
lon = float(parts[0])
|
||||
lat = float(parts[1])
|
||||
feature_id = parts[2]
|
||||
name = parts[3].strip('"')
|
||||
is_tbd = parts[4].strip() == "true" if len(parts) > 4 else False
|
||||
|
||||
entry = {
|
||||
"source_id": f"fao_lp_{feature_id}",
|
||||
"name": name,
|
||||
"country": "",
|
||||
"city": "",
|
||||
"latitude": str(lat),
|
||||
"longitude": str(lon),
|
||||
"value": "",
|
||||
"unit": "",
|
||||
"metadata": {
|
||||
"is_tbd": is_tbd,
|
||||
"original_id": feature_id,
|
||||
},
|
||||
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
||||
}
|
||||
result.append(entry)
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user