"""PeeringDB IXP Nodes Collector Collects data from PeeringDB IXP directory. https://www.peeringdb.com Note: PeeringDB API has rate limits: - Anonymous: 20 requests/minute - Authenticated: 40 requests/minute (with API key) To get higher limits, set PEERINGDB_API_KEY environment variable. """ import asyncio import os from typing import Dict, Any, List from datetime import datetime import httpx from app.services.collectors.base import HTTPCollector # PeeringDB API key - read from environment variable PEERINGDB_API_KEY = os.environ.get("PEERINGDB_API_KEY", "") class PeeringDBIXPCollector(HTTPCollector): name = "peeringdb_ixp" priority = "P1" module = "L2" frequency_hours = 24 data_type = "ixp" base_url = "https://www.peeringdb.com/api/ix" def __init__(self): super().__init__() # Set headers with User-Agent self.headers = { "User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)", "Accept": "application/json", } # API key is added to URL as query parameter if PEERINGDB_API_KEY: self.base_url = f"{self.base_url}?key={PEERINGDB_API_KEY}" async def fetch_with_retry( self, max_retries: int = 3, base_delay: float = 2.0 ) -> Dict[str, Any]: """Fetch data with exponential backoff for rate limiting""" last_error = None for attempt in range(max_retries): try: async with httpx.AsyncClient(timeout=60.0) as client: response = await client.get(self.base_url, headers=self.headers) if response.status_code == 429: # Rate limited - wait and retry with exponential backoff delay = base_delay * (2**attempt) print(f"PeeringDB rate limited, waiting {delay}s before retry...") await asyncio.sleep(delay) last_error = "Rate limited" continue response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: delay = base_delay * (2**attempt) print(f"PeeringDB rate limited, waiting {delay}s before retry...") await asyncio.sleep(delay) last_error = "Rate limited" continue raise print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}") return {} async def collect(self) -> List[Dict[str, Any]]: """Collect IXP data from PeeringDB with rate limit handling""" response_data = await self.fetch_with_retry() if not response_data: return [] return self.parse_response(response_data) def parse_response(self, response: Dict[str, Any]) -> List[Dict[str, Any]]: """Parse PeeringDB IXP API response""" data = [] ixps = response.get("data", response.get("ixps", [])) for item in ixps: try: entry = { "source_id": f"peeringdb_ixp_{item.get('id', '')}", "name": item.get("name", "Unknown"), "country": item.get("country", "Unknown"), "city": item.get("city", ""), "latitude": self._parse_coordinate(item.get("latitude")), "longitude": self._parse_coordinate(item.get("longitude")), "metadata": { "org_name": item.get("org_name"), "url": item.get("url"), "tech_email": item.get("tech_email"), "tech_phone": item.get("tech_phone"), "network_count": len(item.get("net_set", [])), "created": item.get("created"), "updated": item.get("updated"), }, "reference_date": datetime.utcnow().isoformat(), } data.append(entry) except (ValueError, TypeError, KeyError): continue return data def _parse_coordinate(self, value: Any) -> float: if value is None: return 0.0 if isinstance(value, (int, float)): return float(value) if isinstance(value, str): try: return float(value) except ValueError: return 0.0 return 0.0 class PeeringDBNetworkCollector(HTTPCollector): name = "peeringdb_network" priority = "P2" module = "L2" frequency_hours = 48 data_type = "network" base_url = "https://www.peeringdb.com/api/net" def __init__(self): super().__init__() self.headers = { "User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)", "Accept": "application/json", } if PEERINGDB_API_KEY: self.base_url = f"{self.base_url}?key={PEERINGDB_API_KEY}" async def fetch_with_retry( self, max_retries: int = 3, base_delay: float = 2.0 ) -> Dict[str, Any]: """Fetch data with exponential backoff for rate limiting""" last_error = None for attempt in range(max_retries): try: async with httpx.AsyncClient(timeout=60.0) as client: response = await client.get(self.base_url, headers=self.headers) if response.status_code == 429: delay = base_delay * (2**attempt) print(f"PeeringDB rate limited, waiting {delay}s before retry...") await asyncio.sleep(delay) last_error = "Rate limited" continue response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: delay = base_delay * (2**attempt) print(f"PeeringDB rate limited, waiting {delay}s before retry...") await asyncio.sleep(delay) last_error = "Rate limited" continue raise print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}") return {} async def collect(self) -> List[Dict[str, Any]]: """Collect Network data from PeeringDB with rate limit handling""" response_data = await self.fetch_with_retry() if not response_data: return [] return self.parse_response(response_data) def parse_response(self, response: Dict[str, Any]) -> List[Dict[str, Any]]: """Parse PeeringDB Network API response""" data = [] networks = response.get("data", response.get("networks", [])) for item in networks: try: entry = { "source_id": f"peeringdb_net_{item.get('id', '')}", "name": item.get("name", "Unknown"), "country": item.get("country", "Unknown"), "city": item.get("city", ""), "latitude": self._parse_coordinate(item.get("latitude")), "longitude": self._parse_coordinate(item.get("longitude")), "metadata": { "asn": item.get("asn"), "irr_as_set": item.get("irr_as_set"), "url": item.get("url"), "info_type": item.get("info_type"), "info_traffic": item.get("info_traffic"), "info_ratio": item.get("info_ratio"), "ix_count": len(item.get("ix_set", [])), "created": item.get("created"), "updated": item.get("updated"), }, "reference_date": datetime.utcnow().isoformat(), } data.append(entry) except (ValueError, TypeError, KeyError): continue return data def _parse_coordinate(self, value: Any) -> float: if value is None: return 0.0 if isinstance(value, (int, float)): return float(value) if isinstance(value, str): try: return float(value) except ValueError: return 0.0 return 0.0 class PeeringDBFacilityCollector(HTTPCollector): name = "peeringdb_facility" priority = "P2" module = "L2" frequency_hours = 48 data_type = "facility" base_url = "https://www.peeringdb.com/api/fac" def __init__(self): super().__init__() self.headers = { "User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)", "Accept": "application/json", } if PEERINGDB_API_KEY: self.base_url = f"{self.base_url}?key={PEERINGDB_API_KEY}" async def fetch_with_retry( self, max_retries: int = 3, base_delay: float = 2.0 ) -> Dict[str, Any]: """Fetch data with exponential backoff for rate limiting""" last_error = None for attempt in range(max_retries): try: async with httpx.AsyncClient(timeout=60.0) as client: response = await client.get(self.base_url, headers=self.headers) if response.status_code == 429: delay = base_delay * (2**attempt) print(f"PeeringDB rate limited, waiting {delay}s before retry...") await asyncio.sleep(delay) last_error = "Rate limited" continue response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: if e.response.status_code == 429: delay = base_delay * (2**attempt) print(f"PeeringDB rate limited, waiting {delay}s before retry...") await asyncio.sleep(delay) last_error = "Rate limited" continue raise print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}") return {} async def collect(self) -> List[Dict[str, Any]]: """Collect Facility data from PeeringDB with rate limit handling""" response_data = await self.fetch_with_retry() if not response_data: return [] return self.parse_response(response_data) def parse_response(self, response: Dict[str, Any]) -> List[Dict[str, Any]]: """Parse PeeringDB Facility API response""" data = [] facilities = response.get("data", response.get("facilities", [])) for item in facilities: try: entry = { "source_id": f"peeringdb_fac_{item.get('id', '')}", "name": item.get("name", "Unknown"), "country": item.get("country", "Unknown"), "city": item.get("city", ""), "latitude": self._parse_coordinate(item.get("latitude")), "longitude": self._parse_coordinate(item.get("longitude")), "metadata": { "org_name": item.get("org_name"), "address": item.get("address"), "url": item.get("url"), "rack_count": item.get("rack_count"), "power": item.get("power"), "network_count": len(item.get("net_set", [])), "created": item.get("created"), "updated": item.get("updated"), }, "reference_date": datetime.utcnow().isoformat(), } data.append(entry) except (ValueError, TypeError, KeyError): continue return data def _parse_coordinate(self, value: Any) -> float: if value is None: return 0.0 if isinstance(value, (int, float)): return float(value) if isinstance(value, str): try: return float(value) except ValueError: return 0.0 return 0.0