"""BGPStream backfill collector.""" from __future__ import annotations import asyncio import json import time import urllib.parse import urllib.request from typing import Any from app.services.collectors.base import BaseCollector from app.services.collectors.bgp_common import create_bgp_anomalies_for_batch, normalize_bgp_event class BGPStreamBackfillCollector(BaseCollector): name = "bgpstream_bgp" priority = "P1" module = "L3" frequency_hours = 6 data_type = "bgp_rib" fail_on_empty = True async def fetch(self) -> list[dict[str, Any]]: if not self._resolved_url: raise RuntimeError("BGPStream URL is not configured") return await asyncio.to_thread(self._fetch_resource_windows) def _fetch_resource_windows(self) -> list[dict[str, Any]]: end = int(time.time()) - 3600 start = end - 86400 params = [ ("projects[]", "routeviews"), ("collectors[]", "route-views2"), ("types[]", "updates"), ("intervals[]", f"{start},{end}"), ] url = f"{self._resolved_url}/data?{urllib.parse.urlencode(params)}" request = urllib.request.Request( url, headers={"User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)"}, ) with urllib.request.urlopen(request, timeout=30) as response: body = json.loads(response.read().decode()) if body.get("error"): raise RuntimeError(f"BGPStream broker error: {body['error']}") return body.get("data", {}).get("resources", []) def transform(self, raw_data: list[dict[str, Any]]) -> list[dict[str, Any]]: transformed: list[dict[str, Any]] = [] for item in raw_data: if not isinstance(item, dict): continue is_broker_window = any(key in item for key in ("filename", "url", "startTime", "start_time")) if {"collector", "prefix"} <= set(item.keys()) and not is_broker_window: transformed.append(normalize_bgp_event(item, project="bgpstream")) continue # Broker responses provide file windows rather than decoded events. collector = item.get("collector") or item.get("project") or "bgpstream" timestamp = item.get("time") or item.get("startTime") or item.get("start_time") name = item.get("filename") or item.get("url") or f"{collector}-window" normalized = normalize_bgp_event( { "collector": collector, "event_type": "rib", "prefix": item.get("prefix") or "historical-window", "timestamp": timestamp, "origin_asn": item.get("origin_asn"), "path": item.get("path") or [], "raw_message": item, }, project="bgpstream", ) transformed.append( normalized | { "name": name, "title": f"BGPStream {collector}", "description": "Historical BGPStream backfill window", "metadata": { **normalized["metadata"], "broker_record": item, }, } ) self._latest_transformed_batch = transformed return transformed async def run(self, db): result = await super().run(db) if result.get("status") != "success": return result snapshot_id = await self._resolve_snapshot_id(db, result.get("task_id")) anomaly_count = await create_bgp_anomalies_for_batch( db, source=self.name, snapshot_id=snapshot_id, task_id=result.get("task_id"), events=getattr(self, "_latest_transformed_batch", []), ) result["anomalies_created"] = anomaly_count return result async def _resolve_snapshot_id(self, db, task_id: int | None) -> int | None: if task_id is None: return None from sqlalchemy import select from app.models.data_snapshot import DataSnapshot result = await db.execute( select(DataSnapshot.id).where(DataSnapshot.task_id == task_id).order_by(DataSnapshot.id.desc()) ) return result.scalar_one_or_none()