feat: add data sources config system and Earth API integration
- Add data_sources.yaml for configurable data source URLs - Add data_sources.py to load config with database override support - Add arcgis_landing_points and arcgis_cable_landing_relation collectors - Change visualization API to query arcgis_landing_points - Add /api/v1/datasources/configs/all endpoint - Update Earth to fetch from API instead of static files - Fix scheduler collector ID mappings
This commit is contained in:
@@ -9,7 +9,8 @@ from datetime import datetime
|
||||
import httpx
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
from app.core.config import settings
|
||||
from app.core.data_sources import get_data_sources_config
|
||||
|
||||
|
||||
|
||||
class ArcGISCableCollector(BaseCollector):
|
||||
@@ -21,7 +22,12 @@ class ArcGISCableCollector(BaseCollector):
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return settings.ARCGIS_CABLE_URL
|
||||
if self._resolved_url:
|
||||
return self._resolved_url
|
||||
from app.core.data_sources import get_data_sources_config
|
||||
|
||||
config = get_data_sources_config()
|
||||
return config.get_yaml_url("arcgis_cables")
|
||||
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
||||
|
||||
@@ -3,7 +3,8 @@ from datetime import datetime
|
||||
import httpx
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
from app.core.config import settings
|
||||
from app.core.data_sources import get_data_sources_config
|
||||
|
||||
|
||||
|
||||
class ArcGISLandingPointCollector(BaseCollector):
|
||||
@@ -15,7 +16,12 @@ class ArcGISLandingPointCollector(BaseCollector):
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return settings.ARCGIS_LANDING_POINT_URL
|
||||
if self._resolved_url:
|
||||
return self._resolved_url
|
||||
from app.core.data_sources import get_data_sources_config
|
||||
|
||||
config = get_data_sources_config()
|
||||
return config.get_yaml_url("arcgis_landing_points")
|
||||
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
||||
|
||||
@@ -3,7 +3,8 @@ from datetime import datetime
|
||||
import httpx
|
||||
|
||||
from app.services.collectors.base import BaseCollector
|
||||
from app.core.config import settings
|
||||
from app.core.data_sources import get_data_sources_config
|
||||
|
||||
|
||||
|
||||
class ArcGISCableLandingRelationCollector(BaseCollector):
|
||||
@@ -15,7 +16,12 @@ class ArcGISCableLandingRelationCollector(BaseCollector):
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return settings.ARCGIS_CABLE_LANDING_RELATION_URL
|
||||
if self._resolved_url:
|
||||
return self._resolved_url
|
||||
from app.core.data_sources import get_data_sources_config
|
||||
|
||||
config = get_data_sources_config()
|
||||
return config.get_yaml_url("arcgis_cable_landing_relation")
|
||||
|
||||
async def fetch(self) -> List[Dict[str, Any]]:
|
||||
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
|
||||
|
||||
@@ -23,6 +23,13 @@ class BaseCollector(ABC):
|
||||
self._current_task = None
|
||||
self._db_session = None
|
||||
self._datasource_id = 1
|
||||
self._resolved_url: Optional[str] = None
|
||||
|
||||
async def resolve_url(self, db: AsyncSession) -> None:
|
||||
from app.core.data_sources import get_data_sources_config
|
||||
|
||||
config = get_data_sources_config()
|
||||
self._resolved_url = await config.get_url(self.name, db)
|
||||
|
||||
def update_progress(self, records_processed: int):
|
||||
"""Update task progress - call this during data processing"""
|
||||
@@ -65,6 +72,8 @@ class BaseCollector(ABC):
|
||||
self._current_task = task
|
||||
self._db_session = db
|
||||
|
||||
await self.resolve_url(db)
|
||||
|
||||
try:
|
||||
raw_data = await self.fetch()
|
||||
task.total_records = len(raw_data)
|
||||
@@ -87,7 +96,6 @@ class BaseCollector(ABC):
|
||||
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(),
|
||||
}
|
||||
except Exception as e:
|
||||
# Log task failure
|
||||
task.status = "failed"
|
||||
task.error_message = str(e)
|
||||
task.completed_at = datetime.utcnow()
|
||||
|
||||
@@ -15,6 +15,7 @@ from datetime import datetime
|
||||
import httpx
|
||||
from app.services.collectors.base import HTTPCollector
|
||||
|
||||
|
||||
# Cloudflare API token (optional - for higher rate limits)
|
||||
CLOUDFLARE_API_TOKEN = os.environ.get("CLOUDFLARE_API_TOKEN", "")
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import httpx
|
||||
from app.services.collectors.base import BaseCollector
|
||||
|
||||
|
||||
|
||||
class EpochAIGPUCollector(BaseCollector):
|
||||
name = "epoch_ai_gpu"
|
||||
priority = "P0"
|
||||
|
||||
@@ -10,6 +10,7 @@ import httpx
|
||||
from app.services.collectors.base import BaseCollector
|
||||
|
||||
|
||||
|
||||
class FAOLandingPointCollector(BaseCollector):
|
||||
name = "fao_landing_points"
|
||||
priority = "P1"
|
||||
|
||||
@@ -12,6 +12,7 @@ from datetime import datetime
|
||||
from app.services.collectors.base import HTTPCollector
|
||||
|
||||
|
||||
|
||||
class HuggingFaceModelCollector(HTTPCollector):
|
||||
name = "huggingface_models"
|
||||
priority = "P1"
|
||||
|
||||
@@ -18,6 +18,7 @@ from datetime import datetime
|
||||
import httpx
|
||||
from app.services.collectors.base import HTTPCollector
|
||||
|
||||
|
||||
# PeeringDB API key - read from environment variable
|
||||
PEERINGDB_API_KEY = os.environ.get("PEERINGDB_API_KEY", "")
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ import httpx
|
||||
from app.services.collectors.base import BaseCollector
|
||||
|
||||
|
||||
|
||||
class TeleGeographyCableCollector(BaseCollector):
|
||||
name = "telegeography_cables"
|
||||
priority = "P1"
|
||||
|
||||
Reference in New Issue
Block a user