Files
planet/backend/app/api/v1/datasources.py
2026-03-05 11:46:58 +08:00

259 lines
7.3 KiB
Python

from typing import List, Optional
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.session import get_db
from app.models.user import User
from app.models.datasource import DataSource
from app.core.security import get_current_user
from app.services.collectors.registry import collector_registry
router = APIRouter()
COLLECTOR_INFO = {
"top500": {
"id": 1,
"name": "TOP500 Supercomputers",
"module": "L1",
"priority": "P0",
"frequency_hours": 4,
},
"epoch_ai_gpu": {
"id": 2,
"name": "Epoch AI GPU Clusters",
"module": "L1",
"priority": "P0",
"frequency_hours": 6,
},
"huggingface_models": {
"id": 3,
"name": "HuggingFace Models",
"module": "L2",
"priority": "P1",
"frequency_hours": 12,
},
"huggingface_datasets": {
"id": 4,
"name": "HuggingFace Datasets",
"module": "L2",
"priority": "P1",
"frequency_hours": 12,
},
"huggingface_spaces": {
"id": 5,
"name": "HuggingFace Spaces",
"module": "L2",
"priority": "P2",
"frequency_hours": 24,
},
"peeringdb_ixp": {
"id": 6,
"name": "PeeringDB IXP",
"module": "L2",
"priority": "P1",
"frequency_hours": 24,
},
"peeringdb_network": {
"id": 7,
"name": "PeeringDB Networks",
"module": "L2",
"priority": "P2",
"frequency_hours": 48,
},
"peeringdb_facility": {
"id": 8,
"name": "PeeringDB Facilities",
"module": "L2",
"priority": "P2",
"frequency_hours": 48,
},
"telegeography_cables": {
"id": 9,
"name": "Submarine Cables",
"module": "L2",
"priority": "P1",
"frequency_hours": 168,
},
"telegeography_landing": {
"id": 10,
"name": "Cable Landing Points",
"module": "L2",
"priority": "P2",
"frequency_hours": 168,
},
"telegeography_systems": {
"id": 11,
"name": "Cable Systems",
"module": "L2",
"priority": "P2",
"frequency_hours": 168,
},
}
ID_TO_COLLECTOR = {info["id"]: name for name, info in COLLECTOR_INFO.items()}
COLLECTOR_TO_ID = {name: info["id"] for name, info in COLLECTOR_INFO.items()}
def get_collector_name(source_id: str) -> Optional[str]:
try:
numeric_id = int(source_id)
if numeric_id in ID_TO_COLLECTOR:
return ID_TO_COLLECTOR[numeric_id]
except ValueError:
pass
if source_id in COLLECTOR_INFO:
return source_id
return None
@router.get("")
async def list_datasources(
module: Optional[str] = None,
is_active: Optional[bool] = None,
priority: Optional[str] = None,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
query = select(DataSource)
filters = []
if module:
filters.append(DataSource.module == module)
if is_active is not None:
filters.append(DataSource.is_active == is_active)
if priority:
filters.append(DataSource.priority == priority)
if filters:
query = query.where(*filters)
result = await db.execute(query)
datasources = result.scalars().all()
collector_list = []
for name, info in COLLECTOR_INFO.items():
is_active_status = collector_registry.is_active(name)
collector_list.append(
{
"id": info["id"],
"name": info["name"],
"module": info["module"],
"priority": info["priority"],
"frequency": f"{info['frequency_hours']}h",
"is_active": is_active_status,
"collector_class": name,
}
)
if module:
collector_list = [c for c in collector_list if c["module"] == module]
if priority:
collector_list = [c for c in collector_list if c["priority"] == priority]
return {
"total": len(collector_list),
"data": collector_list,
}
@router.get("/{source_id}")
async def get_datasource(
source_id: str,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
collector_name = get_collector_name(source_id)
if not collector_name:
raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name]
return {
"id": info["id"],
"name": info["name"],
"module": info["module"],
"priority": info["priority"],
"frequency": f"{info['frequency_hours']}h",
"collector_class": collector_name,
"is_active": collector_registry.is_active(collector_name),
}
@router.post("/{source_id}/enable")
async def enable_datasource(
source_id: str,
current_user: User = Depends(get_current_user),
):
collector_name = get_collector_name(source_id)
if not collector_name:
raise HTTPException(status_code=404, detail="Data source not found")
collector_registry.set_active(collector_name, True)
return {"status": "enabled", "source_id": source_id}
@router.post("/{source_id}/disable")
async def disable_datasource(
source_id: str,
current_user: User = Depends(get_current_user),
):
collector_name = get_collector_name(source_id)
if not collector_name:
raise HTTPException(status_code=404, detail="Data source not found")
collector_registry.set_active(collector_name, False)
return {"status": "disabled", "source_id": source_id}
@router.get("/{source_id}/stats")
async def get_datasource_stats(
source_id: str,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
collector_name = get_collector_name(source_id)
if not collector_name:
raise HTTPException(status_code=404, detail="Data source not found")
info = COLLECTOR_INFO[collector_name]
total_query = select(func.count(DataSource.id)).where(DataSource.source == info["name"])
result = await db.execute(total_query)
total = result.scalar() or 0
return {
"source_id": source_id,
"collector_name": collector_name,
"name": info["name"],
"total_records": total,
"last_updated": datetime.utcnow().isoformat(),
}
@router.post("/{source_id}/trigger")
async def trigger_datasource(
source_id: str,
current_user: User = Depends(get_current_user),
):
collector_name = get_collector_name(source_id)
if not collector_name:
raise HTTPException(status_code=404, detail="Data source not found")
from app.services.scheduler import run_collector_now
if not collector_registry.is_active(collector_name):
raise HTTPException(status_code=400, detail="Data source is disabled")
success = run_collector_now(collector_name)
if success:
return {
"status": "triggered",
"source_id": source_id,
"collector_name": collector_name,
"message": f"Collector '{collector_name}' has been triggered",
}
else:
raise HTTPException(
status_code=500,
detail=f"Failed to trigger collector '{collector_name}'",
)