Files
planet/backend/app/services/collectors/cloudflare.py
rayd1o de32552159 feat: add data sources config system and Earth API integration
- Add data_sources.yaml for configurable data source URLs
- Add data_sources.py to load config with database override support
- Add arcgis_landing_points and arcgis_cable_landing_relation collectors
- Change visualization API to query arcgis_landing_points
- Add /api/v1/datasources/configs/all endpoint
- Update Earth to fetch from API instead of static files
- Fix scheduler collector ID mappings
2026-03-13 10:54:02 +08:00

165 lines
5.9 KiB
Python

"""Cloudflare Radar Traffic Collector
Collects Internet traffic data from Cloudflare Radar API.
https://developers.cloudflare.com/radar/
Note: Radar API provides free access to global Internet traffic data.
Some endpoints require authentication for higher rate limits.
"""
import asyncio
import os
from typing import Dict, Any, List
from datetime import datetime
import httpx
from app.services.collectors.base import HTTPCollector
# Cloudflare API token (optional - for higher rate limits)
CLOUDFLARE_API_TOKEN = os.environ.get("CLOUDFLARE_API_TOKEN", "")
class CloudflareRadarDeviceCollector(HTTPCollector):
"""Collects device type distribution data (mobile vs desktop)"""
name = "cloudflare_radar_device"
priority = "P2"
module = "L3"
frequency_hours = 24
data_type = "device_stats"
base_url = "https://api.cloudflare.com/client/v4/radar/http/summary/device_type"
def __init__(self):
super().__init__()
self.headers = {
"User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)",
"Accept": "application/json",
}
if CLOUDFLARE_API_TOKEN:
self.headers["Authorization"] = f"Bearer {CLOUDFLARE_API_TOKEN}"
def parse_response(self, response: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Parse Cloudflare Radar device type response"""
data = []
result = response.get("result", {})
summary = result.get("summary_0", {})
try:
entry = {
"source_id": "cloudflare_radar_device_global",
"name": "Global Device Distribution",
"country": "GLOBAL",
"city": "",
"latitude": 0.0,
"longitude": 0.0,
"metadata": {
"desktop_percent": float(summary.get("desktop", 0)),
"mobile_percent": float(summary.get("mobile", 0)),
"other_percent": float(summary.get("other", 0)),
"date_range": result.get("meta", {}).get("dateRange", {}),
},
"reference_date": datetime.utcnow().isoformat(),
}
data.append(entry)
except (ValueError, TypeError, KeyError):
pass
return data
class CloudflareRadarTrafficCollector(HTTPCollector):
"""Collects traffic volume trends"""
name = "cloudflare_radar_traffic"
priority = "P2"
module = "L3"
frequency_hours = 24
data_type = "traffic_stats"
base_url = "https://api.cloudflare.com/client/v4/radar/http/timeseries/requests"
def __init__(self):
super().__init__()
self.headers = {
"User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)",
"Accept": "application/json",
}
if CLOUDFLARE_API_TOKEN:
self.headers["Authorization"] = f"Bearer {CLOUDFLARE_API_TOKEN}"
def parse_response(self, response: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Parse Cloudflare Radar traffic timeseries response"""
data = []
result = response.get("result", {})
timeseries = result.get("requests_0", {}).get("timeseries", [])
for item in timeseries:
try:
entry = {
"source_id": f"cloudflare_traffic_{item.get('datetime', '')}",
"name": f"Traffic {item.get('datetime', '')[:10]}",
"country": "GLOBAL",
"city": "",
"latitude": 0.0,
"longitude": 0.0,
"metadata": {
"datetime": item.get("datetime"),
"requests": item.get("requests"),
"visit_duration": item.get("visitDuration"),
},
"reference_date": item.get("datetime", datetime.utcnow().isoformat()),
}
data.append(entry)
except (ValueError, TypeError, KeyError):
continue
return data
class CloudflareRadarTopASCollector(HTTPCollector):
"""Collects top autonomous systems by traffic"""
name = "cloudflare_radar_top_as"
priority = "P2"
module = "L2"
frequency_hours = 24
data_type = "as_stats"
base_url = "https://api.cloudflare.com/client/v4/radar/http/top/locations"
def __init__(self):
super().__init__()
self.headers = {
"User-Agent": "Planet-Intelligence-System/1.0 (Python/collector)",
"Accept": "application/json",
}
if CLOUDFLARE_API_TOKEN:
self.headers["Authorization"] = f"Bearer {CLOUDFLARE_API_TOKEN}"
def parse_response(self, response: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Parse Cloudflare Radar top locations response"""
data = []
result = response.get("result", {})
top_locations = result.get("top_locations_0", [])
for idx, item in enumerate(top_locations):
try:
entry = {
"source_id": f"cloudflare_as_{item.get('rank', idx)}",
"name": item.get("location", {}).get("countryName", "Unknown"),
"country": item.get("location", {}).get("countryCode", "XX"),
"city": item.get("location", {}).get("cityName", ""),
"latitude": float(item.get("location", {}).get("latitude", 0)),
"longitude": float(item.get("location", {}).get("longitude", 0)),
"metadata": {
"rank": item.get("rank"),
"traffic_share": item.get("trafficShare"),
"country_code": item.get("location", {}).get("countryCode"),
},
"reference_date": datetime.utcnow().isoformat(),
}
data.append(entry)
except (ValueError, TypeError, KeyError):
continue
return data