259 lines
7.3 KiB
Python
259 lines
7.3 KiB
Python
from typing import List, Optional
|
|
from datetime import datetime
|
|
from fastapi import APIRouter, Depends, HTTPException, status
|
|
from sqlalchemy import select, func
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.db.session import get_db
|
|
from app.models.user import User
|
|
from app.models.datasource import DataSource
|
|
from app.core.security import get_current_user
|
|
from app.services.collectors.registry import collector_registry
|
|
|
|
router = APIRouter()
|
|
|
|
COLLECTOR_INFO = {
|
|
"top500": {
|
|
"id": 1,
|
|
"name": "TOP500 Supercomputers",
|
|
"module": "L1",
|
|
"priority": "P0",
|
|
"frequency_hours": 4,
|
|
},
|
|
"epoch_ai_gpu": {
|
|
"id": 2,
|
|
"name": "Epoch AI GPU Clusters",
|
|
"module": "L1",
|
|
"priority": "P0",
|
|
"frequency_hours": 6,
|
|
},
|
|
"huggingface_models": {
|
|
"id": 3,
|
|
"name": "HuggingFace Models",
|
|
"module": "L2",
|
|
"priority": "P1",
|
|
"frequency_hours": 12,
|
|
},
|
|
"huggingface_datasets": {
|
|
"id": 4,
|
|
"name": "HuggingFace Datasets",
|
|
"module": "L2",
|
|
"priority": "P1",
|
|
"frequency_hours": 12,
|
|
},
|
|
"huggingface_spaces": {
|
|
"id": 5,
|
|
"name": "HuggingFace Spaces",
|
|
"module": "L2",
|
|
"priority": "P2",
|
|
"frequency_hours": 24,
|
|
},
|
|
"peeringdb_ixp": {
|
|
"id": 6,
|
|
"name": "PeeringDB IXP",
|
|
"module": "L2",
|
|
"priority": "P1",
|
|
"frequency_hours": 24,
|
|
},
|
|
"peeringdb_network": {
|
|
"id": 7,
|
|
"name": "PeeringDB Networks",
|
|
"module": "L2",
|
|
"priority": "P2",
|
|
"frequency_hours": 48,
|
|
},
|
|
"peeringdb_facility": {
|
|
"id": 8,
|
|
"name": "PeeringDB Facilities",
|
|
"module": "L2",
|
|
"priority": "P2",
|
|
"frequency_hours": 48,
|
|
},
|
|
"telegeography_cables": {
|
|
"id": 9,
|
|
"name": "Submarine Cables",
|
|
"module": "L2",
|
|
"priority": "P1",
|
|
"frequency_hours": 168,
|
|
},
|
|
"telegeography_landing": {
|
|
"id": 10,
|
|
"name": "Cable Landing Points",
|
|
"module": "L2",
|
|
"priority": "P2",
|
|
"frequency_hours": 168,
|
|
},
|
|
"telegeography_systems": {
|
|
"id": 11,
|
|
"name": "Cable Systems",
|
|
"module": "L2",
|
|
"priority": "P2",
|
|
"frequency_hours": 168,
|
|
},
|
|
}
|
|
|
|
ID_TO_COLLECTOR = {info["id"]: name for name, info in COLLECTOR_INFO.items()}
|
|
COLLECTOR_TO_ID = {name: info["id"] for name, info in COLLECTOR_INFO.items()}
|
|
|
|
|
|
def get_collector_name(source_id: str) -> Optional[str]:
|
|
try:
|
|
numeric_id = int(source_id)
|
|
if numeric_id in ID_TO_COLLECTOR:
|
|
return ID_TO_COLLECTOR[numeric_id]
|
|
except ValueError:
|
|
pass
|
|
if source_id in COLLECTOR_INFO:
|
|
return source_id
|
|
return None
|
|
|
|
|
|
@router.get("")
|
|
async def list_datasources(
|
|
module: Optional[str] = None,
|
|
is_active: Optional[bool] = None,
|
|
priority: Optional[str] = None,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
query = select(DataSource)
|
|
|
|
filters = []
|
|
if module:
|
|
filters.append(DataSource.module == module)
|
|
if is_active is not None:
|
|
filters.append(DataSource.is_active == is_active)
|
|
if priority:
|
|
filters.append(DataSource.priority == priority)
|
|
|
|
if filters:
|
|
query = query.where(*filters)
|
|
|
|
result = await db.execute(query)
|
|
datasources = result.scalars().all()
|
|
|
|
collector_list = []
|
|
for name, info in COLLECTOR_INFO.items():
|
|
is_active_status = collector_registry.is_active(name)
|
|
collector_list.append(
|
|
{
|
|
"id": info["id"],
|
|
"name": info["name"],
|
|
"module": info["module"],
|
|
"priority": info["priority"],
|
|
"frequency": f"{info['frequency_hours']}h",
|
|
"is_active": is_active_status,
|
|
"collector_class": name,
|
|
}
|
|
)
|
|
|
|
if module:
|
|
collector_list = [c for c in collector_list if c["module"] == module]
|
|
if priority:
|
|
collector_list = [c for c in collector_list if c["priority"] == priority]
|
|
|
|
return {
|
|
"total": len(collector_list),
|
|
"data": collector_list,
|
|
}
|
|
|
|
|
|
@router.get("/{source_id}")
|
|
async def get_datasource(
|
|
source_id: str,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
collector_name = get_collector_name(source_id)
|
|
if not collector_name:
|
|
raise HTTPException(status_code=404, detail="Data source not found")
|
|
|
|
info = COLLECTOR_INFO[collector_name]
|
|
return {
|
|
"id": info["id"],
|
|
"name": info["name"],
|
|
"module": info["module"],
|
|
"priority": info["priority"],
|
|
"frequency": f"{info['frequency_hours']}h",
|
|
"collector_class": collector_name,
|
|
"is_active": collector_registry.is_active(collector_name),
|
|
}
|
|
|
|
|
|
@router.post("/{source_id}/enable")
|
|
async def enable_datasource(
|
|
source_id: str,
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
collector_name = get_collector_name(source_id)
|
|
if not collector_name:
|
|
raise HTTPException(status_code=404, detail="Data source not found")
|
|
collector_registry.set_active(collector_name, True)
|
|
return {"status": "enabled", "source_id": source_id}
|
|
|
|
|
|
@router.post("/{source_id}/disable")
|
|
async def disable_datasource(
|
|
source_id: str,
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
collector_name = get_collector_name(source_id)
|
|
if not collector_name:
|
|
raise HTTPException(status_code=404, detail="Data source not found")
|
|
collector_registry.set_active(collector_name, False)
|
|
return {"status": "disabled", "source_id": source_id}
|
|
|
|
|
|
@router.get("/{source_id}/stats")
|
|
async def get_datasource_stats(
|
|
source_id: str,
|
|
current_user: User = Depends(get_current_user),
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
collector_name = get_collector_name(source_id)
|
|
if not collector_name:
|
|
raise HTTPException(status_code=404, detail="Data source not found")
|
|
|
|
info = COLLECTOR_INFO[collector_name]
|
|
total_query = select(func.count(DataSource.id)).where(DataSource.source == info["name"])
|
|
result = await db.execute(total_query)
|
|
total = result.scalar() or 0
|
|
|
|
return {
|
|
"source_id": source_id,
|
|
"collector_name": collector_name,
|
|
"name": info["name"],
|
|
"total_records": total,
|
|
"last_updated": datetime.utcnow().isoformat(),
|
|
}
|
|
|
|
|
|
@router.post("/{source_id}/trigger")
|
|
async def trigger_datasource(
|
|
source_id: str,
|
|
current_user: User = Depends(get_current_user),
|
|
):
|
|
collector_name = get_collector_name(source_id)
|
|
if not collector_name:
|
|
raise HTTPException(status_code=404, detail="Data source not found")
|
|
|
|
from app.services.scheduler import run_collector_now
|
|
|
|
if not collector_registry.is_active(collector_name):
|
|
raise HTTPException(status_code=400, detail="Data source is disabled")
|
|
|
|
success = run_collector_now(collector_name)
|
|
|
|
if success:
|
|
return {
|
|
"status": "triggered",
|
|
"source_id": source_id,
|
|
"collector_name": collector_name,
|
|
"message": f"Collector '{collector_name}' has been triggered",
|
|
}
|
|
else:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to trigger collector '{collector_name}'",
|
|
)
|