feat: persist system settings and refine admin layouts

This commit is contained in:
rayd1o
2026-03-25 02:57:58 +08:00
parent 81a0ca5e7a
commit ef0fefdfc7
19 changed files with 2091 additions and 1231 deletions

View File

@@ -1,15 +1,16 @@
"""Task Scheduler for running collection jobs"""
"""Task Scheduler for running collection jobs."""
import asyncio
import logging
from datetime import datetime
from typing import Dict, Any
from typing import Any, Dict
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.db.session import async_session_factory
from app.models.datasource import DataSource
from app.services.collectors.registry import collector_registry
logger = logging.getLogger(__name__)
@@ -17,77 +18,119 @@ logger = logging.getLogger(__name__)
scheduler = AsyncIOScheduler()
COLLECTOR_TO_ID = {
"top500": 1,
"epoch_ai_gpu": 2,
"huggingface_models": 3,
"huggingface_datasets": 4,
"huggingface_spaces": 5,
"peeringdb_ixp": 6,
"peeringdb_network": 7,
"peeringdb_facility": 8,
"telegeography_cables": 9,
"telegeography_landing": 10,
"telegeography_systems": 11,
"arcgis_cables": 15,
"arcgis_landing_points": 16,
"arcgis_cable_landing_relation": 17,
"fao_landing_points": 18,
"spacetrack_tle": 19,
"celestrak_tle": 20,
}
async def _update_next_run_at(datasource: DataSource, session) -> None:
job = scheduler.get_job(datasource.source)
datasource.next_run_at = job.next_run_time if job else None
await session.commit()
async def _apply_datasource_schedule(datasource: DataSource, session) -> None:
collector = collector_registry.get(datasource.source)
if not collector:
logger.warning("Collector not found for datasource %s", datasource.source)
return
collector_registry.set_active(datasource.source, datasource.is_active)
existing_job = scheduler.get_job(datasource.source)
if existing_job:
scheduler.remove_job(datasource.source)
if datasource.is_active:
scheduler.add_job(
run_collector_task,
trigger=IntervalTrigger(minutes=max(1, datasource.frequency_minutes)),
id=datasource.source,
name=datasource.name,
replace_existing=True,
kwargs={"collector_name": datasource.source},
)
logger.info(
"Scheduled collector: %s (every %sm)",
datasource.source,
datasource.frequency_minutes,
)
else:
logger.info("Collector disabled: %s", datasource.source)
await _update_next_run_at(datasource, session)
async def run_collector_task(collector_name: str):
"""Run a single collector task"""
"""Run a single collector task."""
collector = collector_registry.get(collector_name)
if not collector:
logger.error(f"Collector not found: {collector_name}")
logger.error("Collector not found: %s", collector_name)
return
# Get the correct datasource_id
datasource_id = COLLECTOR_TO_ID.get(collector_name, 1)
async with async_session_factory() as db:
result = await db.execute(select(DataSource).where(DataSource.source == collector_name))
datasource = result.scalar_one_or_none()
if not datasource:
logger.error("Datasource not found for collector: %s", collector_name)
return
if not datasource.is_active:
logger.info("Skipping disabled collector: %s", collector_name)
return
try:
# Set the datasource_id on the collector instance
collector._datasource_id = datasource_id
logger.info(f"Running collector: {collector_name} (datasource_id={datasource_id})")
result = await collector.run(db)
logger.info(f"Collector {collector_name} completed: {result}")
except Exception as e:
logger.error(f"Collector {collector_name} failed: {e}")
collector._datasource_id = datasource.id
logger.info("Running collector: %s (datasource_id=%s)", collector_name, datasource.id)
task_result = await collector.run(db)
datasource.last_run_at = datetime.utcnow()
datasource.last_status = task_result.get("status")
await _update_next_run_at(datasource, db)
logger.info("Collector %s completed: %s", collector_name, task_result)
except Exception as exc:
datasource.last_run_at = datetime.utcnow()
datasource.last_status = "failed"
await db.commit()
logger.exception("Collector %s failed: %s", collector_name, exc)
def start_scheduler():
"""Start the scheduler with all registered collectors"""
collectors = collector_registry.all()
for name, collector in collectors.items():
if collector_registry.is_active(name):
scheduler.add_job(
run_collector_task,
trigger=IntervalTrigger(hours=collector.frequency_hours),
id=name,
name=name,
replace_existing=True,
kwargs={"collector_name": name},
)
logger.info(f"Scheduled collector: {name} (every {collector.frequency_hours}h)")
scheduler.start()
logger.info("Scheduler started")
def start_scheduler() -> None:
"""Start the scheduler."""
if not scheduler.running:
scheduler.start()
logger.info("Scheduler started")
def stop_scheduler():
"""Stop the scheduler"""
scheduler.shutdown()
logger.info("Scheduler stopped")
def stop_scheduler() -> None:
"""Stop the scheduler."""
if scheduler.running:
scheduler.shutdown(wait=False)
logger.info("Scheduler stopped")
async def sync_scheduler_with_datasources() -> None:
"""Synchronize scheduler jobs with datasource table."""
async with async_session_factory() as db:
result = await db.execute(select(DataSource).order_by(DataSource.id))
datasources = result.scalars().all()
configured_sources = {datasource.source for datasource in datasources}
for job in list(scheduler.get_jobs()):
if job.id not in configured_sources:
scheduler.remove_job(job.id)
for datasource in datasources:
await _apply_datasource_schedule(datasource, db)
async def sync_datasource_job(datasource_id: int) -> bool:
"""Synchronize a single datasource job after settings changes."""
async with async_session_factory() as db:
datasource = await db.get(DataSource, datasource_id)
if not datasource:
return False
await _apply_datasource_schedule(datasource, db)
return True
def get_scheduler_jobs() -> list[Dict[str, Any]]:
"""Get all scheduled jobs"""
"""Get all scheduled jobs."""
jobs = []
for job in scheduler.get_jobs():
jobs.append(
@@ -101,52 +144,17 @@ def get_scheduler_jobs() -> list[Dict[str, Any]]:
return jobs
def add_job(collector_name: str, hours: int = 4):
"""Add a new scheduled job"""
collector = collector_registry.get(collector_name)
if not collector:
raise ValueError(f"Collector not found: {collector_name}")
scheduler.add_job(
run_collector_task,
trigger=IntervalTrigger(hours=hours),
id=collector_name,
name=collector_name,
replace_existing=True,
kwargs={"collector_name": collector_name},
)
logger.info(f"Added scheduled job: {collector_name} (every {hours}h)")
def remove_job(collector_name: str):
"""Remove a scheduled job"""
scheduler.remove_job(collector_name)
logger.info(f"Removed scheduled job: {collector_name}")
def pause_job(collector_name: str):
"""Pause a scheduled job"""
scheduler.pause_job(collector_name)
logger.info(f"Paused job: {collector_name}")
def resume_job(collector_name: str):
"""Resume a scheduled job"""
scheduler.resume_job(collector_name)
logger.info(f"Resumed job: {collector_name}")
def run_collector_now(collector_name: str) -> bool:
"""Run a collector immediately (not scheduled)"""
"""Run a collector immediately (not scheduled)."""
collector = collector_registry.get(collector_name)
if not collector:
logger.error(f"Collector not found: {collector_name}")
logger.error("Collector not found: %s", collector_name)
return False
try:
asyncio.create_task(run_collector_task(collector_name))
logger.info(f"Triggered collector: {collector_name}")
logger.info("Triggered collector: %s", collector_name)
return True
except Exception as e:
logger.error(f"Failed to trigger collector {collector_name}: {e}")
return False
except Exception as exc:
logger.error("Failed to trigger collector %s: %s", collector_name, exc)
return False