Refine data management and collection workflows
This commit is contained in:
@@ -2,8 +2,8 @@
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
@@ -11,6 +11,7 @@ from sqlalchemy import select
|
||||
|
||||
from app.db.session import async_session_factory
|
||||
from app.models.datasource import DataSource
|
||||
from app.models.task import CollectionTask
|
||||
from app.services.collectors.registry import collector_registry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -89,6 +90,35 @@ async def run_collector_task(collector_name: str):
|
||||
logger.exception("Collector %s failed: %s", collector_name, exc)
|
||||
|
||||
|
||||
async def cleanup_stale_running_tasks(max_age_hours: int = 2) -> int:
|
||||
"""Mark stale running tasks as failed after restarts or collector hangs."""
|
||||
cutoff = datetime.utcnow() - timedelta(hours=max_age_hours)
|
||||
|
||||
async with async_session_factory() as db:
|
||||
result = await db.execute(
|
||||
select(CollectionTask).where(
|
||||
CollectionTask.status == "running",
|
||||
CollectionTask.started_at.is_not(None),
|
||||
CollectionTask.started_at < cutoff,
|
||||
)
|
||||
)
|
||||
stale_tasks = result.scalars().all()
|
||||
|
||||
for task in stale_tasks:
|
||||
task.status = "failed"
|
||||
task.phase = "failed"
|
||||
task.completed_at = datetime.utcnow()
|
||||
existing_error = (task.error_message or "").strip()
|
||||
cleanup_error = "Marked failed automatically after stale running task cleanup"
|
||||
task.error_message = f"{existing_error}\n{cleanup_error}".strip() if existing_error else cleanup_error
|
||||
|
||||
if stale_tasks:
|
||||
await db.commit()
|
||||
logger.warning("Cleaned up %s stale running collection task(s)", len(stale_tasks))
|
||||
|
||||
return len(stale_tasks)
|
||||
|
||||
|
||||
def start_scheduler() -> None:
|
||||
"""Start the scheduler."""
|
||||
if not scheduler.running:
|
||||
@@ -144,6 +174,19 @@ def get_scheduler_jobs() -> list[Dict[str, Any]]:
|
||||
return jobs
|
||||
|
||||
|
||||
async def get_latest_task_id_for_datasource(datasource_id: int) -> Optional[int]:
|
||||
from app.models.task import CollectionTask
|
||||
|
||||
async with async_session_factory() as db:
|
||||
result = await db.execute(
|
||||
select(CollectionTask.id)
|
||||
.where(CollectionTask.datasource_id == datasource_id)
|
||||
.order_by(CollectionTask.created_at.desc(), CollectionTask.id.desc())
|
||||
.limit(1)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
|
||||
def run_collector_now(collector_name: str) -> bool:
|
||||
"""Run a collector immediately (not scheduled)."""
|
||||
collector = collector_registry.get(collector_name)
|
||||
|
||||
Reference in New Issue
Block a user