feat: persist system settings and refine admin layouts
This commit is contained in:
@@ -1,15 +1,16 @@
|
||||
"""Task Scheduler for running collection jobs"""
|
||||
"""Task Scheduler for running collection jobs."""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any
|
||||
from typing import Any, Dict
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.db.session import async_session_factory
|
||||
from app.models.datasource import DataSource
|
||||
from app.services.collectors.registry import collector_registry
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -17,77 +18,119 @@ logger = logging.getLogger(__name__)
|
||||
scheduler = AsyncIOScheduler()
|
||||
|
||||
|
||||
COLLECTOR_TO_ID = {
|
||||
"top500": 1,
|
||||
"epoch_ai_gpu": 2,
|
||||
"huggingface_models": 3,
|
||||
"huggingface_datasets": 4,
|
||||
"huggingface_spaces": 5,
|
||||
"peeringdb_ixp": 6,
|
||||
"peeringdb_network": 7,
|
||||
"peeringdb_facility": 8,
|
||||
"telegeography_cables": 9,
|
||||
"telegeography_landing": 10,
|
||||
"telegeography_systems": 11,
|
||||
"arcgis_cables": 15,
|
||||
"arcgis_landing_points": 16,
|
||||
"arcgis_cable_landing_relation": 17,
|
||||
"fao_landing_points": 18,
|
||||
"spacetrack_tle": 19,
|
||||
"celestrak_tle": 20,
|
||||
}
|
||||
async def _update_next_run_at(datasource: DataSource, session) -> None:
|
||||
job = scheduler.get_job(datasource.source)
|
||||
datasource.next_run_at = job.next_run_time if job else None
|
||||
await session.commit()
|
||||
|
||||
|
||||
async def _apply_datasource_schedule(datasource: DataSource, session) -> None:
|
||||
collector = collector_registry.get(datasource.source)
|
||||
if not collector:
|
||||
logger.warning("Collector not found for datasource %s", datasource.source)
|
||||
return
|
||||
|
||||
collector_registry.set_active(datasource.source, datasource.is_active)
|
||||
|
||||
existing_job = scheduler.get_job(datasource.source)
|
||||
if existing_job:
|
||||
scheduler.remove_job(datasource.source)
|
||||
|
||||
if datasource.is_active:
|
||||
scheduler.add_job(
|
||||
run_collector_task,
|
||||
trigger=IntervalTrigger(minutes=max(1, datasource.frequency_minutes)),
|
||||
id=datasource.source,
|
||||
name=datasource.name,
|
||||
replace_existing=True,
|
||||
kwargs={"collector_name": datasource.source},
|
||||
)
|
||||
logger.info(
|
||||
"Scheduled collector: %s (every %sm)",
|
||||
datasource.source,
|
||||
datasource.frequency_minutes,
|
||||
)
|
||||
else:
|
||||
logger.info("Collector disabled: %s", datasource.source)
|
||||
|
||||
await _update_next_run_at(datasource, session)
|
||||
|
||||
|
||||
async def run_collector_task(collector_name: str):
|
||||
"""Run a single collector task"""
|
||||
"""Run a single collector task."""
|
||||
collector = collector_registry.get(collector_name)
|
||||
if not collector:
|
||||
logger.error(f"Collector not found: {collector_name}")
|
||||
logger.error("Collector not found: %s", collector_name)
|
||||
return
|
||||
|
||||
# Get the correct datasource_id
|
||||
datasource_id = COLLECTOR_TO_ID.get(collector_name, 1)
|
||||
|
||||
async with async_session_factory() as db:
|
||||
result = await db.execute(select(DataSource).where(DataSource.source == collector_name))
|
||||
datasource = result.scalar_one_or_none()
|
||||
if not datasource:
|
||||
logger.error("Datasource not found for collector: %s", collector_name)
|
||||
return
|
||||
|
||||
if not datasource.is_active:
|
||||
logger.info("Skipping disabled collector: %s", collector_name)
|
||||
return
|
||||
|
||||
try:
|
||||
# Set the datasource_id on the collector instance
|
||||
collector._datasource_id = datasource_id
|
||||
|
||||
logger.info(f"Running collector: {collector_name} (datasource_id={datasource_id})")
|
||||
result = await collector.run(db)
|
||||
logger.info(f"Collector {collector_name} completed: {result}")
|
||||
except Exception as e:
|
||||
logger.error(f"Collector {collector_name} failed: {e}")
|
||||
collector._datasource_id = datasource.id
|
||||
logger.info("Running collector: %s (datasource_id=%s)", collector_name, datasource.id)
|
||||
task_result = await collector.run(db)
|
||||
datasource.last_run_at = datetime.utcnow()
|
||||
datasource.last_status = task_result.get("status")
|
||||
await _update_next_run_at(datasource, db)
|
||||
logger.info("Collector %s completed: %s", collector_name, task_result)
|
||||
except Exception as exc:
|
||||
datasource.last_run_at = datetime.utcnow()
|
||||
datasource.last_status = "failed"
|
||||
await db.commit()
|
||||
logger.exception("Collector %s failed: %s", collector_name, exc)
|
||||
|
||||
|
||||
def start_scheduler():
|
||||
"""Start the scheduler with all registered collectors"""
|
||||
collectors = collector_registry.all()
|
||||
|
||||
for name, collector in collectors.items():
|
||||
if collector_registry.is_active(name):
|
||||
scheduler.add_job(
|
||||
run_collector_task,
|
||||
trigger=IntervalTrigger(hours=collector.frequency_hours),
|
||||
id=name,
|
||||
name=name,
|
||||
replace_existing=True,
|
||||
kwargs={"collector_name": name},
|
||||
)
|
||||
logger.info(f"Scheduled collector: {name} (every {collector.frequency_hours}h)")
|
||||
|
||||
scheduler.start()
|
||||
logger.info("Scheduler started")
|
||||
def start_scheduler() -> None:
|
||||
"""Start the scheduler."""
|
||||
if not scheduler.running:
|
||||
scheduler.start()
|
||||
logger.info("Scheduler started")
|
||||
|
||||
|
||||
def stop_scheduler():
|
||||
"""Stop the scheduler"""
|
||||
scheduler.shutdown()
|
||||
logger.info("Scheduler stopped")
|
||||
def stop_scheduler() -> None:
|
||||
"""Stop the scheduler."""
|
||||
if scheduler.running:
|
||||
scheduler.shutdown(wait=False)
|
||||
logger.info("Scheduler stopped")
|
||||
|
||||
|
||||
async def sync_scheduler_with_datasources() -> None:
|
||||
"""Synchronize scheduler jobs with datasource table."""
|
||||
async with async_session_factory() as db:
|
||||
result = await db.execute(select(DataSource).order_by(DataSource.id))
|
||||
datasources = result.scalars().all()
|
||||
|
||||
configured_sources = {datasource.source for datasource in datasources}
|
||||
for job in list(scheduler.get_jobs()):
|
||||
if job.id not in configured_sources:
|
||||
scheduler.remove_job(job.id)
|
||||
|
||||
for datasource in datasources:
|
||||
await _apply_datasource_schedule(datasource, db)
|
||||
|
||||
|
||||
async def sync_datasource_job(datasource_id: int) -> bool:
|
||||
"""Synchronize a single datasource job after settings changes."""
|
||||
async with async_session_factory() as db:
|
||||
datasource = await db.get(DataSource, datasource_id)
|
||||
if not datasource:
|
||||
return False
|
||||
|
||||
await _apply_datasource_schedule(datasource, db)
|
||||
return True
|
||||
|
||||
|
||||
def get_scheduler_jobs() -> list[Dict[str, Any]]:
|
||||
"""Get all scheduled jobs"""
|
||||
"""Get all scheduled jobs."""
|
||||
jobs = []
|
||||
for job in scheduler.get_jobs():
|
||||
jobs.append(
|
||||
@@ -101,52 +144,17 @@ def get_scheduler_jobs() -> list[Dict[str, Any]]:
|
||||
return jobs
|
||||
|
||||
|
||||
def add_job(collector_name: str, hours: int = 4):
|
||||
"""Add a new scheduled job"""
|
||||
collector = collector_registry.get(collector_name)
|
||||
if not collector:
|
||||
raise ValueError(f"Collector not found: {collector_name}")
|
||||
|
||||
scheduler.add_job(
|
||||
run_collector_task,
|
||||
trigger=IntervalTrigger(hours=hours),
|
||||
id=collector_name,
|
||||
name=collector_name,
|
||||
replace_existing=True,
|
||||
kwargs={"collector_name": collector_name},
|
||||
)
|
||||
logger.info(f"Added scheduled job: {collector_name} (every {hours}h)")
|
||||
|
||||
|
||||
def remove_job(collector_name: str):
|
||||
"""Remove a scheduled job"""
|
||||
scheduler.remove_job(collector_name)
|
||||
logger.info(f"Removed scheduled job: {collector_name}")
|
||||
|
||||
|
||||
def pause_job(collector_name: str):
|
||||
"""Pause a scheduled job"""
|
||||
scheduler.pause_job(collector_name)
|
||||
logger.info(f"Paused job: {collector_name}")
|
||||
|
||||
|
||||
def resume_job(collector_name: str):
|
||||
"""Resume a scheduled job"""
|
||||
scheduler.resume_job(collector_name)
|
||||
logger.info(f"Resumed job: {collector_name}")
|
||||
|
||||
|
||||
def run_collector_now(collector_name: str) -> bool:
|
||||
"""Run a collector immediately (not scheduled)"""
|
||||
"""Run a collector immediately (not scheduled)."""
|
||||
collector = collector_registry.get(collector_name)
|
||||
if not collector:
|
||||
logger.error(f"Collector not found: {collector_name}")
|
||||
logger.error("Collector not found: %s", collector_name)
|
||||
return False
|
||||
|
||||
try:
|
||||
asyncio.create_task(run_collector_task(collector_name))
|
||||
logger.info(f"Triggered collector: {collector_name}")
|
||||
logger.info("Triggered collector: %s", collector_name)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to trigger collector {collector_name}: {e}")
|
||||
return False
|
||||
except Exception as exc:
|
||||
logger.error("Failed to trigger collector %s: %s", collector_name, exc)
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user