feat: add data sources config system and Earth API integration

- Add data_sources.yaml for configurable data source URLs
- Add data_sources.py to load config with database override support
- Add arcgis_landing_points and arcgis_cable_landing_relation collectors
- Change visualization API to query arcgis_landing_points
- Add /api/v1/datasources/configs/all endpoint
- Update Earth to fetch from API instead of static files
- Fix scheduler collector ID mappings
This commit is contained in:
rayd1o
2026-03-13 10:54:02 +08:00
parent 99771a88c5
commit de32552159
25 changed files with 222 additions and 23 deletions

View File

@@ -9,7 +9,8 @@ from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
from app.core.config import settings
from app.core.data_sources import get_data_sources_config
class ArcGISCableCollector(BaseCollector):
@@ -21,7 +22,12 @@ class ArcGISCableCollector(BaseCollector):
@property
def base_url(self) -> str:
return settings.ARCGIS_CABLE_URL
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_cables")
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}

View File

@@ -3,7 +3,8 @@ from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
from app.core.config import settings
from app.core.data_sources import get_data_sources_config
class ArcGISLandingPointCollector(BaseCollector):
@@ -15,7 +16,12 @@ class ArcGISLandingPointCollector(BaseCollector):
@property
def base_url(self) -> str:
return settings.ARCGIS_LANDING_POINT_URL
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_landing_points")
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}

View File

@@ -3,7 +3,8 @@ from datetime import datetime
import httpx
from app.services.collectors.base import BaseCollector
from app.core.config import settings
from app.core.data_sources import get_data_sources_config
class ArcGISCableLandingRelationCollector(BaseCollector):
@@ -15,7 +16,12 @@ class ArcGISCableLandingRelationCollector(BaseCollector):
@property
def base_url(self) -> str:
return settings.ARCGIS_CABLE_LANDING_RELATION_URL
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_cable_landing_relation")
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}

View File

@@ -23,6 +23,13 @@ class BaseCollector(ABC):
self._current_task = None
self._db_session = None
self._datasource_id = 1
self._resolved_url: Optional[str] = None
async def resolve_url(self, db: AsyncSession) -> None:
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
self._resolved_url = await config.get_url(self.name, db)
def update_progress(self, records_processed: int):
"""Update task progress - call this during data processing"""
@@ -65,6 +72,8 @@ class BaseCollector(ABC):
self._current_task = task
self._db_session = db
await self.resolve_url(db)
try:
raw_data = await self.fetch()
task.total_records = len(raw_data)
@@ -87,7 +96,6 @@ class BaseCollector(ABC):
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(),
}
except Exception as e:
# Log task failure
task.status = "failed"
task.error_message = str(e)
task.completed_at = datetime.utcnow()

View File

@@ -15,6 +15,7 @@ from datetime import datetime
import httpx
from app.services.collectors.base import HTTPCollector
# Cloudflare API token (optional - for higher rate limits)
CLOUDFLARE_API_TOKEN = os.environ.get("CLOUDFLARE_API_TOKEN", "")

View File

@@ -13,6 +13,7 @@ import httpx
from app.services.collectors.base import BaseCollector
class EpochAIGPUCollector(BaseCollector):
name = "epoch_ai_gpu"
priority = "P0"

View File

@@ -10,6 +10,7 @@ import httpx
from app.services.collectors.base import BaseCollector
class FAOLandingPointCollector(BaseCollector):
name = "fao_landing_points"
priority = "P1"

View File

@@ -12,6 +12,7 @@ from datetime import datetime
from app.services.collectors.base import HTTPCollector
class HuggingFaceModelCollector(HTTPCollector):
name = "huggingface_models"
priority = "P1"

View File

@@ -18,6 +18,7 @@ from datetime import datetime
import httpx
from app.services.collectors.base import HTTPCollector
# PeeringDB API key - read from environment variable
PEERINGDB_API_KEY = os.environ.get("PEERINGDB_API_KEY", "")

View File

@@ -14,6 +14,7 @@ import httpx
from app.services.collectors.base import BaseCollector
class TeleGeographyCableCollector(BaseCollector):
name = "telegeography_cables"
priority = "P1"