feat: add bgp observability and admin ui improvements
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime
|
||||
from datetime import UTC, datetime
|
||||
import httpx
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
@@ -10,6 +10,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.collected_data_fields import build_dynamic_metadata, get_record_field
|
||||
from app.core.config import settings
|
||||
from app.core.countries import normalize_country
|
||||
from app.core.time import to_iso8601_utc
|
||||
from app.core.websocket.broadcaster import broadcaster
|
||||
|
||||
|
||||
class BaseCollector(ABC):
|
||||
@@ -20,12 +22,14 @@ class BaseCollector(ABC):
|
||||
module: str = "L1"
|
||||
frequency_hours: int = 4
|
||||
data_type: str = "generic"
|
||||
fail_on_empty: bool = False
|
||||
|
||||
def __init__(self):
|
||||
self._current_task = None
|
||||
self._db_session = None
|
||||
self._datasource_id = 1
|
||||
self._resolved_url: Optional[str] = None
|
||||
self._last_broadcast_progress: Optional[int] = None
|
||||
|
||||
async def resolve_url(self, db: AsyncSession) -> None:
|
||||
from app.core.data_sources import get_data_sources_config
|
||||
@@ -33,18 +37,53 @@ class BaseCollector(ABC):
|
||||
config = get_data_sources_config()
|
||||
self._resolved_url = await config.get_url(self.name, db)
|
||||
|
||||
def update_progress(self, records_processed: int):
|
||||
async def _publish_task_update(self, force: bool = False):
|
||||
if not self._current_task:
|
||||
return
|
||||
|
||||
progress = float(self._current_task.progress or 0.0)
|
||||
rounded_progress = int(round(progress))
|
||||
if not force and self._last_broadcast_progress == rounded_progress:
|
||||
return
|
||||
|
||||
await broadcaster.broadcast_datasource_task_update(
|
||||
{
|
||||
"datasource_id": getattr(self, "_datasource_id", None),
|
||||
"collector_name": self.name,
|
||||
"task_id": self._current_task.id,
|
||||
"status": self._current_task.status,
|
||||
"phase": self._current_task.phase,
|
||||
"progress": progress,
|
||||
"records_processed": self._current_task.records_processed,
|
||||
"total_records": self._current_task.total_records,
|
||||
"started_at": to_iso8601_utc(self._current_task.started_at),
|
||||
"completed_at": to_iso8601_utc(self._current_task.completed_at),
|
||||
"error_message": self._current_task.error_message,
|
||||
}
|
||||
)
|
||||
self._last_broadcast_progress = rounded_progress
|
||||
|
||||
async def update_progress(self, records_processed: int, *, commit: bool = False, force: bool = False):
|
||||
"""Update task progress - call this during data processing"""
|
||||
if self._current_task and self._db_session and self._current_task.total_records > 0:
|
||||
if self._current_task and self._db_session:
|
||||
self._current_task.records_processed = records_processed
|
||||
self._current_task.progress = (
|
||||
records_processed / self._current_task.total_records
|
||||
) * 100
|
||||
if self._current_task.total_records and self._current_task.total_records > 0:
|
||||
self._current_task.progress = (
|
||||
records_processed / self._current_task.total_records
|
||||
) * 100
|
||||
else:
|
||||
self._current_task.progress = 0.0
|
||||
|
||||
if commit:
|
||||
await self._db_session.commit()
|
||||
|
||||
await self._publish_task_update(force=force)
|
||||
|
||||
async def set_phase(self, phase: str):
|
||||
if self._current_task and self._db_session:
|
||||
self._current_task.phase = phase
|
||||
await self._db_session.commit()
|
||||
await self._publish_task_update(force=True)
|
||||
|
||||
@abstractmethod
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
@@ -133,7 +172,7 @@ class BaseCollector(ABC):
|
||||
from app.models.task import CollectionTask
|
||||
from app.models.data_snapshot import DataSnapshot
|
||||
|
||||
start_time = datetime.utcnow()
|
||||
start_time = datetime.now(UTC)
|
||||
datasource_id = getattr(self, "_datasource_id", 1)
|
||||
snapshot_id: Optional[int] = None
|
||||
|
||||
@@ -152,14 +191,20 @@ class BaseCollector(ABC):
|
||||
|
||||
self._current_task = task
|
||||
self._db_session = db
|
||||
self._last_broadcast_progress = None
|
||||
|
||||
await self.resolve_url(db)
|
||||
await self._publish_task_update(force=True)
|
||||
|
||||
try:
|
||||
await self.set_phase("fetching")
|
||||
raw_data = await self.fetch()
|
||||
task.total_records = len(raw_data)
|
||||
await db.commit()
|
||||
await self._publish_task_update(force=True)
|
||||
|
||||
if self.fail_on_empty and not raw_data:
|
||||
raise RuntimeError(f"Collector {self.name} returned no data")
|
||||
|
||||
await self.set_phase("transforming")
|
||||
data = self.transform(raw_data)
|
||||
@@ -172,33 +217,35 @@ class BaseCollector(ABC):
|
||||
task.phase = "completed"
|
||||
task.records_processed = records_count
|
||||
task.progress = 100.0
|
||||
task.completed_at = datetime.utcnow()
|
||||
task.completed_at = datetime.now(UTC)
|
||||
await db.commit()
|
||||
await self._publish_task_update(force=True)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"task_id": task_id,
|
||||
"records_processed": records_count,
|
||||
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(),
|
||||
"execution_time_seconds": (datetime.now(UTC) - start_time).total_seconds(),
|
||||
}
|
||||
except Exception as e:
|
||||
task.status = "failed"
|
||||
task.phase = "failed"
|
||||
task.error_message = str(e)
|
||||
task.completed_at = datetime.utcnow()
|
||||
task.completed_at = datetime.now(UTC)
|
||||
if snapshot_id is not None:
|
||||
snapshot = await db.get(DataSnapshot, snapshot_id)
|
||||
if snapshot:
|
||||
snapshot.status = "failed"
|
||||
snapshot.completed_at = datetime.utcnow()
|
||||
snapshot.completed_at = datetime.now(UTC)
|
||||
snapshot.summary = {"error": str(e)}
|
||||
await db.commit()
|
||||
await self._publish_task_update(force=True)
|
||||
|
||||
return {
|
||||
"status": "failed",
|
||||
"task_id": task_id,
|
||||
"error": str(e),
|
||||
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(),
|
||||
"execution_time_seconds": (datetime.now(UTC) - start_time).total_seconds(),
|
||||
}
|
||||
|
||||
async def _save_data(
|
||||
@@ -219,11 +266,11 @@ class BaseCollector(ABC):
|
||||
snapshot.record_count = 0
|
||||
snapshot.summary = {"created": 0, "updated": 0, "unchanged": 0}
|
||||
snapshot.status = "success"
|
||||
snapshot.completed_at = datetime.utcnow()
|
||||
snapshot.completed_at = datetime.now(UTC)
|
||||
await db.commit()
|
||||
return 0
|
||||
|
||||
collected_at = datetime.utcnow()
|
||||
collected_at = datetime.now(UTC)
|
||||
records_added = 0
|
||||
created_count = 0
|
||||
updated_count = 0
|
||||
@@ -329,8 +376,7 @@ class BaseCollector(ABC):
|
||||
records_added += 1
|
||||
|
||||
if i % 100 == 0:
|
||||
self.update_progress(i + 1)
|
||||
await db.commit()
|
||||
await self.update_progress(i + 1, commit=True)
|
||||
|
||||
if snapshot_id is not None:
|
||||
deleted_keys = previous_current_keys - seen_entity_keys
|
||||
@@ -350,7 +396,7 @@ class BaseCollector(ABC):
|
||||
if snapshot:
|
||||
snapshot.record_count = records_added
|
||||
snapshot.status = "success"
|
||||
snapshot.completed_at = datetime.utcnow()
|
||||
snapshot.completed_at = datetime.now(UTC)
|
||||
snapshot.summary = {
|
||||
"created": created_count,
|
||||
"updated": updated_count,
|
||||
@@ -359,7 +405,7 @@ class BaseCollector(ABC):
|
||||
}
|
||||
|
||||
await db.commit()
|
||||
self.update_progress(len(data))
|
||||
await self.update_progress(len(data), force=True)
|
||||
return records_added
|
||||
|
||||
async def save(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:
|
||||
@@ -406,8 +452,8 @@ async def log_task(
|
||||
status=status,
|
||||
records_processed=records_processed,
|
||||
error_message=error_message,
|
||||
started_at=datetime.utcnow(),
|
||||
completed_at=datetime.utcnow(),
|
||||
started_at=datetime.now(UTC),
|
||||
completed_at=datetime.now(UTC),
|
||||
)
|
||||
db.add(task)
|
||||
await db.commit()
|
||||
|
||||
Reference in New Issue
Block a user