planet/backend/app/services/collectors/fao_landing.py

"""FAO Landing Points Collector

Collects landing point data from FAO CSV API.
"""

from typing import Dict, Any, List
from datetime import datetime
import httpx

from app.services.collectors.base import BaseCollector


class FAOLandingPointCollector(BaseCollector):
    name = "fao_landing_points"
    priority = "P1"
    module = "L2"
    frequency_hours = 168
    data_type = "landing_point"

    csv_url = "https://data.apps.fao.org/catalog/dataset/1b75ff21-92f2-4b96-9b7b-98e8aa65ad5d/resource/b6071077-d1d4-4e97-aa00-42e902847c87/download/landing-point-geo.csv"

    async def fetch(self) -> List[Dict[str, Any]]:
        async with httpx.AsyncClient(timeout=60.0) as client:
            response = await client.get(self.csv_url)
            response.raise_for_status()
            return self.parse_csv(response.text)

    def parse_csv(self, csv_text: str) -> List[Dict[str, Any]]:
        result = []

        lines = csv_text.strip().split("\n")
        if not lines:
            return result

        for line in lines[1:]:
            if not line.strip():
                continue
            parts = line.split(",")
            if len(parts) >= 4:
                try:
                    lon = float(parts[0])
                    lat = float(parts[1])
                    feature_id = parts[2]
                    name = parts[3].strip('"')
                    is_tbd = parts[4].strip() == "true" if len(parts) > 4 else False

                    entry = {
                        "source_id": f"fao_lp_{feature_id}",
                        "name": name,
                        "country": "",
                        "city": "",
                        "latitude": str(lat),
                        "longitude": str(lon),
                        "value": "",
                        "unit": "",
                        "metadata": {
                            "is_tbd": is_tbd,
                            "original_id": feature_id,
                        },
                        "reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
                    }
                    result.append(entry)
                except (ValueError, IndexError):
                    continue

        return result