"""Shared helpers for BGP collectors.""" from __future__ import annotations import hashlib import ipaddress from collections import Counter, defaultdict from datetime import UTC, datetime from typing import Any from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from app.models.bgp_anomaly import BGPAnomaly from app.models.collected_data import CollectedData RIPE_RIS_COLLECTOR_COORDS: dict[str, dict[str, Any]] = { "rrc00": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041}, "rrc01": {"city": "London", "country": "United Kingdom", "latitude": 51.5072, "longitude": -0.1276}, "rrc03": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041}, "rrc04": {"city": "Geneva", "country": "Switzerland", "latitude": 46.2044, "longitude": 6.1432}, "rrc05": {"city": "Vienna", "country": "Austria", "latitude": 48.2082, "longitude": 16.3738}, "rrc06": {"city": "Otemachi", "country": "Japan", "latitude": 35.686, "longitude": 139.7671}, "rrc07": {"city": "Stockholm", "country": "Sweden", "latitude": 59.3293, "longitude": 18.0686}, "rrc10": {"city": "Milan", "country": "Italy", "latitude": 45.4642, "longitude": 9.19}, "rrc11": {"city": "New York", "country": "United States", "latitude": 40.7128, "longitude": -74.006}, "rrc12": {"city": "Frankfurt", "country": "Germany", "latitude": 50.1109, "longitude": 8.6821}, "rrc13": {"city": "Moscow", "country": "Russia", "latitude": 55.7558, "longitude": 37.6173}, "rrc14": {"city": "Palo Alto", "country": "United States", "latitude": 37.4419, "longitude": -122.143}, "rrc15": {"city": "Sao Paulo", "country": "Brazil", "latitude": -23.5558, "longitude": -46.6396}, "rrc16": {"city": "Miami", "country": "United States", "latitude": 25.7617, "longitude": -80.1918}, "rrc18": {"city": "Barcelona", "country": "Spain", "latitude": 41.3874, "longitude": 2.1686}, "rrc19": {"city": "Johannesburg", "country": "South Africa", "latitude": -26.2041, "longitude": 28.0473}, "rrc20": {"city": "Zurich", "country": "Switzerland", "latitude": 47.3769, "longitude": 8.5417}, "rrc21": {"city": "Paris", "country": "France", "latitude": 48.8566, "longitude": 2.3522}, "rrc22": {"city": "Bucharest", "country": "Romania", "latitude": 44.4268, "longitude": 26.1025}, "rrc23": {"city": "Singapore", "country": "Singapore", "latitude": 1.3521, "longitude": 103.8198}, "rrc24": {"city": "Montevideo", "country": "Uruguay", "latitude": -34.9011, "longitude": -56.1645}, "rrc25": {"city": "Amsterdam", "country": "Netherlands", "latitude": 52.3676, "longitude": 4.9041}, "rrc26": {"city": "Dubai", "country": "United Arab Emirates", "latitude": 25.2048, "longitude": 55.2708}, } def _safe_int(value: Any) -> int | None: try: if value in (None, ""): return None return int(value) except (TypeError, ValueError): return None def _parse_timestamp(value: Any) -> datetime: if isinstance(value, datetime): return value.astimezone(UTC) if value.tzinfo else value.replace(tzinfo=UTC) if isinstance(value, (int, float)): return datetime.fromtimestamp(value, tz=UTC) if isinstance(value, str) and value: normalized = value.replace("Z", "+00:00") parsed = datetime.fromisoformat(normalized) return parsed.astimezone(UTC) if parsed.tzinfo else parsed.replace(tzinfo=UTC) return datetime.now(UTC) def _normalize_as_path(raw_path: Any) -> list[int]: if raw_path in (None, ""): return [] if isinstance(raw_path, list): return [asn for asn in (_safe_int(item) for item in raw_path) if asn is not None] if isinstance(raw_path, str): parts = raw_path.replace("{", "").replace("}", "").split() return [asn for asn in (_safe_int(item) for item in parts) if asn is not None] return [] def normalize_bgp_event(payload: dict[str, Any], *, project: str) -> dict[str, Any]: raw_message = payload.get("raw_message", payload) raw_path = ( payload.get("path") or payload.get("as_path") or payload.get("attrs", {}).get("path") or payload.get("attrs", {}).get("as_path") or [] ) as_path = _normalize_as_path(raw_path) raw_type = str(payload.get("event_type") or payload.get("type") or payload.get("msg_type") or "").lower() if raw_type in {"a", "announce", "announcement"}: event_type = "announcement" elif raw_type in {"w", "withdraw", "withdrawal"}: event_type = "withdrawal" elif raw_type in {"r", "rib"}: event_type = "rib" else: event_type = raw_type or "announcement" prefix = str(payload.get("prefix") or payload.get("prefixes") or payload.get("target_prefix") or "").strip() if prefix.startswith("[") and prefix.endswith("]"): prefix = prefix[1:-1] timestamp = _parse_timestamp(payload.get("timestamp") or payload.get("time") or payload.get("ts")) collector = str(payload.get("collector") or payload.get("host") or payload.get("router") or "unknown") peer_asn = _safe_int(payload.get("peer_asn") or payload.get("peer")) origin_asn = _safe_int(payload.get("origin_asn")) or (as_path[-1] if as_path else None) source_material = "|".join( [ collector, str(peer_asn or ""), prefix, event_type, timestamp.isoformat(), ",".join(str(asn) for asn in as_path), ] ) source_id = hashlib.sha1(source_material.encode("utf-8")).hexdigest()[:24] prefix_length = None is_more_specific = False if prefix: try: network = ipaddress.ip_network(prefix, strict=False) prefix_length = int(network.prefixlen) is_more_specific = prefix_length > (24 if network.version == 4 else 48) except ValueError: prefix_length = None collector_location = RIPE_RIS_COLLECTOR_COORDS.get(collector, {}) metadata = { "project": project, "collector": collector, "peer_asn": peer_asn, "peer_ip": payload.get("peer_ip") or payload.get("peer_address"), "event_type": event_type, "prefix": prefix, "origin_asn": origin_asn, "as_path": as_path, "communities": payload.get("communities") or payload.get("attrs", {}).get("communities") or [], "next_hop": payload.get("next_hop") or payload.get("attrs", {}).get("next_hop"), "med": payload.get("med") or payload.get("attrs", {}).get("med"), "local_pref": payload.get("local_pref") or payload.get("attrs", {}).get("local_pref"), "timestamp": timestamp.isoformat(), "as_path_length": len(as_path), "prefix_length": prefix_length, "is_more_specific": is_more_specific, "visibility_weight": 1, "collector_location": collector_location, "raw_message": raw_message, } return { "source_id": source_id, "name": prefix or f"{collector}:{event_type}", "title": f"{event_type} {prefix}".strip(), "description": f"{collector} observed {event_type} for {prefix}".strip(), "reference_date": timestamp.isoformat(), "country": collector_location.get("country"), "city": collector_location.get("city"), "latitude": collector_location.get("latitude"), "longitude": collector_location.get("longitude"), "metadata": metadata, } async def create_bgp_anomalies_for_batch( db: AsyncSession, *, source: str, snapshot_id: int | None, task_id: int | None, events: list[dict[str, Any]], ) -> int: if not events: return 0 pending_anomalies: list[BGPAnomaly] = [] prefix_to_origins: defaultdict[str, set[int]] = defaultdict(set) prefix_to_more_specifics: defaultdict[str, list[dict[str, Any]]] = defaultdict(list) withdrawal_counter: Counter[tuple[str, int | None]] = Counter() prefixes = {event["metadata"].get("prefix") for event in events if event.get("metadata", {}).get("prefix")} previous_origin_map: dict[str, set[int]] = defaultdict(set) if prefixes: previous_query = await db.execute( select(CollectedData).where( CollectedData.source == source, CollectedData.snapshot_id != snapshot_id, CollectedData.extra_data["prefix"].as_string().in_(sorted(prefixes)), ) ) for record in previous_query.scalars().all(): metadata = record.extra_data or {} prefix = metadata.get("prefix") origin = _safe_int(metadata.get("origin_asn")) if prefix and origin is not None: previous_origin_map[prefix].add(origin) for event in events: metadata = event.get("metadata", {}) prefix = metadata.get("prefix") origin_asn = _safe_int(metadata.get("origin_asn")) if not prefix: continue if origin_asn is not None: prefix_to_origins[prefix].add(origin_asn) if metadata.get("is_more_specific"): prefix_to_more_specifics[prefix.split("/")[0]].append(event) if metadata.get("event_type") == "withdrawal": withdrawal_counter[(prefix, origin_asn)] += 1 for prefix, origins in prefix_to_origins.items(): historic = previous_origin_map.get(prefix, set()) new_origins = sorted(origin for origin in origins if origin not in historic) if historic and new_origins: for new_origin in new_origins: pending_anomalies.append( BGPAnomaly( snapshot_id=snapshot_id, task_id=task_id, source=source, anomaly_type="origin_change", severity="critical", status="active", entity_key=f"origin_change:{prefix}:{new_origin}", prefix=prefix, origin_asn=sorted(historic)[0], new_origin_asn=new_origin, peer_scope=[], started_at=datetime.now(UTC), confidence=0.86, summary=f"Prefix {prefix} is now originated by AS{new_origin}, outside the current baseline.", evidence={"previous_origins": sorted(historic), "current_origins": sorted(origins)}, ) ) for root_prefix, more_specifics in prefix_to_more_specifics.items(): if len(more_specifics) >= 2: sample = more_specifics[0]["metadata"] pending_anomalies.append( BGPAnomaly( snapshot_id=snapshot_id, task_id=task_id, source=source, anomaly_type="more_specific_burst", severity="high", status="active", entity_key=f"more_specific_burst:{root_prefix}:{len(more_specifics)}", prefix=sample.get("prefix"), origin_asn=_safe_int(sample.get("origin_asn")), new_origin_asn=None, peer_scope=sorted( { str(item.get("metadata", {}).get("collector") or "") for item in more_specifics if item.get("metadata", {}).get("collector") } ), started_at=datetime.now(UTC), confidence=0.72, summary=f"{len(more_specifics)} more-specific announcements clustered around {root_prefix}.", evidence={"events": [item.get("metadata") for item in more_specifics[:10]]}, ) ) for (prefix, origin_asn), count in withdrawal_counter.items(): if count >= 3: pending_anomalies.append( BGPAnomaly( snapshot_id=snapshot_id, task_id=task_id, source=source, anomaly_type="mass_withdrawal", severity="high" if count < 8 else "critical", status="active", entity_key=f"mass_withdrawal:{prefix}:{origin_asn}:{count}", prefix=prefix, origin_asn=origin_asn, new_origin_asn=None, peer_scope=[], started_at=datetime.now(UTC), confidence=min(0.55 + (count * 0.05), 0.95), summary=f"{count} withdrawal events observed for {prefix} in the current ingest window.", evidence={"withdrawal_count": count}, ) ) if not pending_anomalies: return 0 existing_result = await db.execute( select(BGPAnomaly.entity_key).where( BGPAnomaly.entity_key.in_([item.entity_key for item in pending_anomalies]) ) ) existing_keys = {row[0] for row in existing_result.fetchall()} created = 0 for anomaly in pending_anomalies: if anomaly.entity_key in existing_keys: continue db.add(anomaly) created += 1 if created: await db.commit() return created