Refine data management and collection workflows

This commit is contained in:
linkong
2026-03-25 17:19:10 +08:00
parent cc5f16f8a7
commit 020c1d5051
34 changed files with 3341 additions and 947 deletions

View File

@@ -5,12 +5,13 @@ from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.security import get_current_user
from app.core.data_sources import get_data_sources_config
from app.db.session import get_db
from app.models.collected_data import CollectedData
from app.models.datasource import DataSource
from app.models.task import CollectionTask
from app.models.user import User
from app.services.scheduler import run_collector_now, sync_datasource_job
from app.services.scheduler import get_latest_task_id_for_datasource, run_collector_now, sync_datasource_job
router = APIRouter()
@@ -83,9 +84,11 @@ async def list_datasources(
datasources = result.scalars().all()
collector_list = []
config = get_data_sources_config()
for datasource in datasources:
running_task = await get_running_task(db, datasource.id)
last_task = await get_last_completed_task(db, datasource.id)
endpoint = await config.get_url(datasource.source, db)
data_count_result = await db.execute(
select(func.count(CollectedData.id)).where(CollectedData.source == datasource.source)
)
@@ -105,10 +108,12 @@ async def list_datasources(
"frequency_minutes": datasource.frequency_minutes,
"is_active": datasource.is_active,
"collector_class": datasource.collector_class,
"endpoint": endpoint,
"last_run": last_run,
"is_running": running_task is not None,
"task_id": running_task.id if running_task else None,
"progress": running_task.progress if running_task else None,
"phase": running_task.phase if running_task else None,
"records_processed": running_task.records_processed if running_task else None,
"total_records": running_task.total_records if running_task else None,
}
@@ -127,6 +132,9 @@ async def get_datasource(
if not datasource:
raise HTTPException(status_code=404, detail="Data source not found")
config = get_data_sources_config()
endpoint = await config.get_url(datasource.source, db)
return {
"id": datasource.id,
"name": datasource.name,
@@ -136,6 +144,7 @@ async def get_datasource(
"frequency_minutes": datasource.frequency_minutes,
"collector_class": datasource.collector_class,
"source": datasource.source,
"endpoint": endpoint,
"is_active": datasource.is_active,
}
@@ -212,9 +221,16 @@ async def trigger_datasource(
if not success:
raise HTTPException(status_code=500, detail=f"Failed to trigger collector '{datasource.source}'")
task_id = None
for _ in range(10):
task_id = await get_latest_task_id_for_datasource(datasource.id)
if task_id is not None:
break
return {
"status": "triggered",
"source_id": datasource.id,
"task_id": task_id,
"collector_name": datasource.source,
"message": f"Collector '{datasource.source}' has been triggered",
}
@@ -252,21 +268,29 @@ async def clear_datasource_data(
@router.get("/{source_id}/task-status")
async def get_task_status(
source_id: str,
task_id: Optional[int] = None,
db: AsyncSession = Depends(get_db),
):
datasource = await get_datasource_record(db, source_id)
if not datasource:
raise HTTPException(status_code=404, detail="Data source not found")
running_task = await get_running_task(db, datasource.id)
if not running_task:
return {"is_running": False, "task_id": None, "progress": None}
if task_id is not None:
task = await db.get(CollectionTask, task_id)
if not task or task.datasource_id != datasource.id:
raise HTTPException(status_code=404, detail="Task not found")
else:
task = await get_running_task(db, datasource.id)
if not task:
return {"is_running": False, "task_id": None, "progress": None, "phase": None, "status": "idle"}
return {
"is_running": True,
"task_id": running_task.id,
"progress": running_task.progress,
"records_processed": running_task.records_processed,
"total_records": running_task.total_records,
"status": running_task.status,
}
"is_running": task.status == "running",
"task_id": task.id,
"progress": task.progress,
"phase": task.phase,
"records_processed": task.records_processed,
"total_records": task.total_records,
"status": task.status,
}