from typing import List, Optional from datetime import datetime from fastapi import APIRouter, Depends, HTTPException, status from sqlalchemy import select, func from sqlalchemy.ext.asyncio import AsyncSession from app.db.session import get_db from app.models.user import User from app.models.datasource import DataSource from app.core.security import get_current_user from app.services.collectors.registry import collector_registry router = APIRouter() COLLECTOR_INFO = { "top500": { "id": 1, "name": "TOP500 Supercomputers", "module": "L1", "priority": "P0", "frequency_hours": 4, }, "epoch_ai_gpu": { "id": 2, "name": "Epoch AI GPU Clusters", "module": "L1", "priority": "P0", "frequency_hours": 6, }, "huggingface_models": { "id": 3, "name": "HuggingFace Models", "module": "L2", "priority": "P1", "frequency_hours": 12, }, "huggingface_datasets": { "id": 4, "name": "HuggingFace Datasets", "module": "L2", "priority": "P1", "frequency_hours": 12, }, "huggingface_spaces": { "id": 5, "name": "HuggingFace Spaces", "module": "L2", "priority": "P2", "frequency_hours": 24, }, "peeringdb_ixp": { "id": 6, "name": "PeeringDB IXP", "module": "L2", "priority": "P1", "frequency_hours": 24, }, "peeringdb_network": { "id": 7, "name": "PeeringDB Networks", "module": "L2", "priority": "P2", "frequency_hours": 48, }, "peeringdb_facility": { "id": 8, "name": "PeeringDB Facilities", "module": "L2", "priority": "P2", "frequency_hours": 48, }, "telegeography_cables": { "id": 9, "name": "Submarine Cables", "module": "L2", "priority": "P1", "frequency_hours": 168, }, "telegeography_landing": { "id": 10, "name": "Cable Landing Points", "module": "L2", "priority": "P2", "frequency_hours": 168, }, "telegeography_systems": { "id": 11, "name": "Cable Systems", "module": "L2", "priority": "P2", "frequency_hours": 168, }, } ID_TO_COLLECTOR = {info["id"]: name for name, info in COLLECTOR_INFO.items()} COLLECTOR_TO_ID = {name: info["id"] for name, info in COLLECTOR_INFO.items()} def get_collector_name(source_id: str) -> Optional[str]: try: numeric_id = int(source_id) if numeric_id in ID_TO_COLLECTOR: return ID_TO_COLLECTOR[numeric_id] except ValueError: pass if source_id in COLLECTOR_INFO: return source_id return None @router.get("") async def list_datasources( module: Optional[str] = None, is_active: Optional[bool] = None, priority: Optional[str] = None, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): query = select(DataSource) filters = [] if module: filters.append(DataSource.module == module) if is_active is not None: filters.append(DataSource.is_active == is_active) if priority: filters.append(DataSource.priority == priority) if filters: query = query.where(*filters) result = await db.execute(query) datasources = result.scalars().all() collector_list = [] for name, info in COLLECTOR_INFO.items(): is_active_status = collector_registry.is_active(name) collector_list.append( { "id": info["id"], "name": info["name"], "module": info["module"], "priority": info["priority"], "frequency": f"{info['frequency_hours']}h", "is_active": is_active_status, "collector_class": name, } ) if module: collector_list = [c for c in collector_list if c["module"] == module] if priority: collector_list = [c for c in collector_list if c["priority"] == priority] return { "total": len(collector_list), "data": collector_list, } @router.get("/{source_id}") async def get_datasource( source_id: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): collector_name = get_collector_name(source_id) if not collector_name: raise HTTPException(status_code=404, detail="Data source not found") info = COLLECTOR_INFO[collector_name] return { "id": info["id"], "name": info["name"], "module": info["module"], "priority": info["priority"], "frequency": f"{info['frequency_hours']}h", "collector_class": collector_name, "is_active": collector_registry.is_active(collector_name), } @router.post("/{source_id}/enable") async def enable_datasource( source_id: str, current_user: User = Depends(get_current_user), ): collector_name = get_collector_name(source_id) if not collector_name: raise HTTPException(status_code=404, detail="Data source not found") collector_registry.set_active(collector_name, True) return {"status": "enabled", "source_id": source_id} @router.post("/{source_id}/disable") async def disable_datasource( source_id: str, current_user: User = Depends(get_current_user), ): collector_name = get_collector_name(source_id) if not collector_name: raise HTTPException(status_code=404, detail="Data source not found") collector_registry.set_active(collector_name, False) return {"status": "disabled", "source_id": source_id} @router.get("/{source_id}/stats") async def get_datasource_stats( source_id: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): collector_name = get_collector_name(source_id) if not collector_name: raise HTTPException(status_code=404, detail="Data source not found") info = COLLECTOR_INFO[collector_name] total_query = select(func.count(DataSource.id)).where(DataSource.source == info["name"]) result = await db.execute(total_query) total = result.scalar() or 0 return { "source_id": source_id, "collector_name": collector_name, "name": info["name"], "total_records": total, "last_updated": datetime.utcnow().isoformat(), } @router.post("/{source_id}/trigger") async def trigger_datasource( source_id: str, current_user: User = Depends(get_current_user), ): collector_name = get_collector_name(source_id) if not collector_name: raise HTTPException(status_code=404, detail="Data source not found") from app.services.scheduler import run_collector_now if not collector_registry.is_active(collector_name): raise HTTPException(status_code=400, detail="Data source is disabled") success = run_collector_now(collector_name) if success: return { "status": "triggered", "source_id": source_id, "collector_name": collector_name, "message": f"Collector '{collector_name}' has been triggered", } else: raise HTTPException( status_code=500, detail=f"Failed to trigger collector '{collector_name}'", )