from typing import Optional from fastapi import APIRouter, Depends, HTTPException from sqlalchemy import func, select from sqlalchemy.ext.asyncio import AsyncSession from app.core.security import get_current_user from app.core.data_sources import get_data_sources_config from app.db.session import get_db from app.models.collected_data import CollectedData from app.models.datasource import DataSource from app.models.task import CollectionTask from app.models.user import User from app.services.scheduler import get_latest_task_id_for_datasource, run_collector_now, sync_datasource_job router = APIRouter() def format_frequency_label(minutes: int) -> str: if minutes % 1440 == 0: return f"{minutes // 1440}d" if minutes % 60 == 0: return f"{minutes // 60}h" return f"{minutes}m" async def get_datasource_record(db: AsyncSession, source_id: str) -> Optional[DataSource]: datasource = None try: datasource = await db.get(DataSource, int(source_id)) except ValueError: pass if datasource is not None: return datasource result = await db.execute( select(DataSource).where( (DataSource.source == source_id) | (DataSource.collector_class == source_id) ) ) return result.scalar_one_or_none() async def get_last_completed_task(db: AsyncSession, datasource_id: int) -> Optional[CollectionTask]: result = await db.execute( select(CollectionTask) .where(CollectionTask.datasource_id == datasource_id) .where(CollectionTask.completed_at.isnot(None)) .order_by(CollectionTask.completed_at.desc()) .limit(1) ) return result.scalar_one_or_none() async def get_running_task(db: AsyncSession, datasource_id: int) -> Optional[CollectionTask]: result = await db.execute( select(CollectionTask) .where(CollectionTask.datasource_id == datasource_id) .where(CollectionTask.status == "running") .order_by(CollectionTask.started_at.desc()) .limit(1) ) return result.scalar_one_or_none() @router.get("") async def list_datasources( module: Optional[str] = None, is_active: Optional[bool] = None, priority: Optional[str] = None, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): query = select(DataSource).order_by(DataSource.module, DataSource.id) if module: query = query.where(DataSource.module == module) if is_active is not None: query = query.where(DataSource.is_active == is_active) if priority: query = query.where(DataSource.priority == priority) result = await db.execute(query) datasources = result.scalars().all() collector_list = [] config = get_data_sources_config() for datasource in datasources: running_task = await get_running_task(db, datasource.id) last_task = await get_last_completed_task(db, datasource.id) endpoint = await config.get_url(datasource.source, db) data_count_result = await db.execute( select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source) ) data_count = data_count_result.scalar() or 0 last_run = None if last_task and last_task.completed_at and data_count > 0: last_run = last_task.completed_at.strftime("%Y-%m-%d %H:%M") collector_list.append( { "id": datasource.id, "name": datasource.name, "module": datasource.module, "priority": datasource.priority, "frequency": format_frequency_label(datasource.frequency_minutes), "frequency_minutes": datasource.frequency_minutes, "is_active": datasource.is_active, "collector_class": datasource.collector_class, "endpoint": endpoint, "last_run": last_run, "is_running": running_task is not None, "task_id": running_task.id if running_task else None, "progress": running_task.progress if running_task else None, "phase": running_task.phase if running_task else None, "records_processed": running_task.records_processed if running_task else None, "total_records": running_task.total_records if running_task else None, } ) return {"total": len(collector_list), "data": collector_list} @router.get("/{source_id}") async def get_datasource( source_id: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): datasource = await get_datasource_record(db, source_id) if not datasource: raise HTTPException(status_code=404, detail="Data source not found") config = get_data_sources_config() endpoint = await config.get_url(datasource.source, db) return { "id": datasource.id, "name": datasource.name, "module": datasource.module, "priority": datasource.priority, "frequency": format_frequency_label(datasource.frequency_minutes), "frequency_minutes": datasource.frequency_minutes, "collector_class": datasource.collector_class, "source": datasource.source, "endpoint": endpoint, "is_active": datasource.is_active, } @router.post("/{source_id}/enable") async def enable_datasource( source_id: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): datasource = await get_datasource_record(db, source_id) if not datasource: raise HTTPException(status_code=404, detail="Data source not found") datasource.is_active = True await db.commit() await sync_datasource_job(datasource.id) return {"status": "enabled", "source_id": datasource.id} @router.post("/{source_id}/disable") async def disable_datasource( source_id: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): datasource = await get_datasource_record(db, source_id) if not datasource: raise HTTPException(status_code=404, detail="Data source not found") datasource.is_active = False await db.commit() await sync_datasource_job(datasource.id) return {"status": "disabled", "source_id": datasource.id} @router.get("/{source_id}/stats") async def get_datasource_stats( source_id: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): datasource = await get_datasource_record(db, source_id) if not datasource: raise HTTPException(status_code=404, detail="Data source not found") result = await db.execute( select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source) ) total = result.scalar() or 0 return { "source_id": datasource.id, "collector_name": datasource.collector_class, "name": datasource.name, "total_records": total, } @router.post("/{source_id}/trigger") async def trigger_datasource( source_id: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): datasource = await get_datasource_record(db, source_id) if not datasource: raise HTTPException(status_code=404, detail="Data source not found") if not datasource.is_active: raise HTTPException(status_code=400, detail="Data source is disabled") success = run_collector_now(datasource.source) if not success: raise HTTPException(status_code=500, detail=f"Failed to trigger collector '{datasource.source}'") task_id = None for _ in range(10): task_id = await get_latest_task_id_for_datasource(datasource.id) if task_id is not None: break return { "status": "triggered", "source_id": datasource.id, "task_id": task_id, "collector_name": datasource.source, "message": f"Collector '{datasource.source}' has been triggered", } @router.delete("/{source_id}/data") async def clear_datasource_data( source_id: str, current_user: User = Depends(get_current_user), db: AsyncSession = Depends(get_db), ): datasource = await get_datasource_record(db, source_id) if not datasource: raise HTTPException(status_code=404, detail="Data source not found") result = await db.execute( select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source) ) count = result.scalar() or 0 if count == 0: return {"status": "success", "message": "No data to clear", "deleted_count": 0} delete_query = CollectedData.__table__.delete().where(CollectedData.source == datasource.source) await db.execute(delete_query) await db.commit() return { "status": "success", "message": f"Cleared {count} records for data source '{datasource.name}'", "deleted_count": count, } @router.get("/{source_id}/task-status") async def get_task_status( source_id: str, task_id: Optional[int] = None, db: AsyncSession = Depends(get_db), ): datasource = await get_datasource_record(db, source_id) if not datasource: raise HTTPException(status_code=404, detail="Data source not found") if task_id is not None: task = await db.get(CollectionTask, task_id) if not task or task.datasource_id != datasource.id: raise HTTPException(status_code=404, detail="Task not found") else: task = await get_running_task(db, datasource.id) if not task: return {"is_running": False, "task_id": None, "progress": None, "phase": None, "status": "idle"} return { "is_running": task.status == "running", "task_id": task.id, "progress": task.progress, "phase": task.phase, "records_processed": task.records_processed, "total_records": task.total_records, "status": task.status, }