- Add data_sources.yaml for configurable data source URLs - Add data_sources.py to load config with database override support - Add arcgis_landing_points and arcgis_cable_landing_relation collectors - Change visualization API to query arcgis_landing_points - Add /api/v1/datasources/configs/all endpoint - Update Earth to fetch from API instead of static files - Fix scheduler collector ID mappings
68 lines
2.2 KiB
Python
68 lines
2.2 KiB
Python
"""FAO Landing Points Collector
|
|
|
|
Collects landing point data from FAO CSV API.
|
|
"""
|
|
|
|
from typing import Dict, Any, List
|
|
from datetime import datetime
|
|
import httpx
|
|
|
|
from app.services.collectors.base import BaseCollector
|
|
|
|
|
|
|
|
class FAOLandingPointCollector(BaseCollector):
|
|
name = "fao_landing_points"
|
|
priority = "P1"
|
|
module = "L2"
|
|
frequency_hours = 168
|
|
data_type = "landing_point"
|
|
|
|
csv_url = "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"
|
|
|
|
async def fetch(self) -> List[Dict[str, Any]]:
|
|
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
response = await client.get(self.csv_url)
|
|
response.raise_for_status()
|
|
return self.parse_csv(response.text)
|
|
|
|
def parse_csv(self, csv_text: str) -> List[Dict[str, Any]]:
|
|
result = []
|
|
|
|
lines = csv_text.strip().split("\n")
|
|
if not lines:
|
|
return result
|
|
|
|
for line in lines[1:]:
|
|
if not line.strip():
|
|
continue
|
|
parts = line.split(",")
|
|
if len(parts) >= 4:
|
|
try:
|
|
lon = float(parts[0])
|
|
lat = float(parts[1])
|
|
feature_id = parts[2]
|
|
name = parts[3].strip('"')
|
|
is_tbd = parts[4].strip() == "true" if len(parts) > 4 else False
|
|
|
|
entry = {
|
|
"source_id": f"fao_lp_{feature_id}",
|
|
"name": name,
|
|
"country": "",
|
|
"city": "",
|
|
"latitude": str(lat),
|
|
"longitude": str(lon),
|
|
"value": "",
|
|
"unit": "",
|
|
"metadata": {
|
|
"is_tbd": is_tbd,
|
|
"original_id": feature_id,
|
|
},
|
|
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
|
|
}
|
|
result.append(entry)
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
return result
|