feat(backend): Add cable graph service and data collectors

## Changelog

### New Features

#### Cable Graph Service
- Add cable_graph.py for finding shortest path between landing points
- Implement haversine distance calculation for great circle distances
- Support for dateline crossing (longitude normalization)
- NetworkX-based graph for optimal path finding

#### Data Collectors
- Add ArcGISCableCollector for fetching submarine cable data from ArcGIS GeoJSON API
- Add FAOLandingPointCollector for fetching landing point data from FAO CSV API

### Backend Changes

#### API Updates
- auth.py: Update authentication logic
- datasources.py: Add datasource endpoints and management
- visualization.py: Add visualization API endpoints
- config.py: Update configuration settings
- security.py: Improve security settings

#### Models & Schemas
- task.py: Update task model with new fields
- token.py: Update token schema

#### Services
- collectors/base.py: Improve base collector with better error handling
- collectors/__init__.py: Register new collectors
- scheduler.py: Update scheduler logic
- tasks/scheduler.py: Add task scheduling

### Frontend Changes
- AppLayout.tsx: Improve layout component
- index.css: Add global styles
- DataSources.tsx: Enhance data sources management page
- vite.config.ts: Add Vite configuration for earth module
This commit is contained in:
rayd1o
2026-03-11 16:38:49 +08:00
parent 6cb4398f3a
commit aaae6a53c3
18 changed files with 990 additions and 146 deletions

View File

@@ -7,6 +7,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.db.session import get_db
from app.models.user import User
from app.models.datasource import DataSource
from app.models.task import CollectionTask
from app.models.collected_data import CollectedData
from app.core.security import get_current_user
from app.services.collectors.registry import collector_registry
@@ -90,6 +92,20 @@ COLLECTOR_INFO = {
"priority": "P2",
"frequency_hours": 168,
},
"arcgis_cables": {
"id": 15,
"name": "ArcGIS Submarine Cables",
"module": "L2",
"priority": "P1",
"frequency_hours": 168,
},
"fao_landing_points": {
"id": 16,
"name": "FAO Landing Points",
"module": "L2",
"priority": "P1",
"frequency_hours": 168,
},
}
ID_TO_COLLECTOR = {info["id"]: name for name, info in COLLECTOR_INFO.items()}
@@ -135,6 +151,35 @@ async def list_datasources(
collector_list = []
for name, info in COLLECTOR_INFO.items():
is_active_status = collector_registry.is_active(name)
running_task_query = (
select(CollectionTask)
.where(CollectionTask.datasource_id == info["id"])
.where(CollectionTask.status == "running")
.order_by(CollectionTask.started_at.desc())
.limit(1)
)
running_result = await db.execute(running_task_query)
running_task = running_result.scalar_one_or_none()
last_run_query = (
select(CollectionTask)
.where(CollectionTask.datasource_id == info["id"])
.where(CollectionTask.completed_at.isnot(None))
.order_by(CollectionTask.completed_at.desc())
.limit(1)
)
last_run_result = await db.execute(last_run_query)
last_task = last_run_result.scalar_one_or_none()
data_count_query = select(func.count(CollectedData.id)).where(CollectedData.source == name)
data_count_result = await db.execute(data_count_query)
data_count = data_count_result.scalar() or 0
last_run = None
if last_task and last_task.completed_at and data_count > 0:
last_run = last_task.completed_at.strftime("%Y-%m-%d %H:%M")
collector_list.append(
{
"id": info["id"],
@@ -144,6 +189,12 @@ async def list_datasources(
"frequency": f"{info['frequency_hours']}h",
"is_active": is_active_status,
"collector_class": name,
"last_run": last_run,
"is_running": running_task is not None,
"task_id": running_task.id if running_task else None,
"progress": running_task.progress if running_task else None,
"records_processed": running_task.records_processed if running_task else None,
"total_records": running_task.total_records if running_task else None,
}
)
@@ -215,16 +266,22 @@ async def get_datasource_stats(
raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name]
total_query = select(func.count(DataSource.id)).where(DataSource.source == info["name"])
result = await db.execute(total_query)
source_name = info["name"]
query = select(func.count(CollectedData.id)).where(CollectedData.source == collector_name)
result = await db.execute(query)
total = result.scalar() or 0
if total == 0:
query = select(func.count(CollectedData.id)).where(CollectedData.source == source_name)
result = await db.execute(query)
total = result.scalar() or 0
return {
"source_id": source_id,
"collector_name": collector_name,
"name": info["name"],
"total_records": total,
"last_updated": datetime.utcnow().isoformat(),
}
@@ -256,3 +313,80 @@ async def trigger_datasource(
status_code=500,
detail=f"Failed to trigger collector '{collector_name}'",
)
@router.delete("/{source_id}/data")
async def clear_datasource_data(
source_id: str,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
collector_name = get_collector_name(source_id)
if not collector_name:
raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name]
source_name = info["name"]
query = select(func.count(CollectedData.id)).where(CollectedData.source == collector_name)
result = await db.execute(query)
count = result.scalar() or 0
if count == 0:
query = select(func.count(CollectedData.id)).where(CollectedData.source == source_name)
result = await db.execute(query)
count = result.scalar() or 0
delete_source = source_name
else:
delete_source = collector_name
if count == 0:
return {
"status": "success",
"message": "No data to clear",
"deleted_count": 0,
}
delete_query = CollectedData.__table__.delete().where(CollectedData.source == delete_source)
await db.execute(delete_query)
await db.commit()
return {
"status": "success",
"message": f"Cleared {count} records for data source '{info['name']}'",
"deleted_count": count,
}
@router.get("/{source_id}/task-status")
async def get_task_status(
source_id: str,
db: AsyncSession = Depends(get_db),
):
collector_name = get_collector_name(source_id)
if not collector_name:
raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name]
running_task_query = (
select(CollectionTask)
.where(CollectionTask.datasource_id == info["id"])
.where(CollectionTask.status == "running")
.order_by(CollectionTask.started_at.desc())
.limit(1)
)
running_result = await db.execute(running_task_query)
running_task = running_result.scalar_one_or_none()
if not running_task:
return {"is_running": False, "task_id": None, "progress": None}
return {
"is_running": True,
"task_id": running_task.id,
"progress": running_task.progress,
"records_processed": running_task.records_processed,
"total_records": running_task.total_records,
"status": running_task.status,
}