Refine data management and collection workflows

This commit is contained in:
linkong
2026-03-25 17:19:10 +08:00
parent cc5f16f8a7
commit 020c1d5051
34 changed files with 3341 additions and 947 deletions

View File

@@ -1,10 +1,11 @@
from typing import Dict, Any, List
import asyncio
from datetime import datetime
from typing import Any, Dict, List, Optional
import httpx
from app.services.collectors.base import BaseCollector
from app.core.data_sources import get_data_sources_config
from app.services.collectors.base import BaseCollector
class ArcGISCableLandingRelationCollector(BaseCollector):
@@ -18,45 +19,129 @@ class ArcGISCableLandingRelationCollector(BaseCollector):
def base_url(self) -> str:
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_cable_landing_relation")
def _layer_url(self, layer_id: int) -> str:
if "/FeatureServer/" not in self.base_url:
return self.base_url
prefix = self.base_url.split("/FeatureServer/")[0]
return f"{prefix}/FeatureServer/{layer_id}/query"
async def _fetch_layer_attributes(
self, client: httpx.AsyncClient, layer_id: int
) -> List[Dict[str, Any]]:
response = await client.get(
self._layer_url(layer_id),
params={
"where": "1=1",
"outFields": "*",
"returnGeometry": "false",
"f": "json",
},
)
response.raise_for_status()
data = response.json()
return [feature.get("attributes", {}) for feature in data.get("features", [])]
async def _fetch_relation_features(self, client: httpx.AsyncClient) -> List[Dict[str, Any]]:
response = await client.get(
self.base_url,
params={
"where": "1=1",
"outFields": "*",
"returnGeometry": "true",
"f": "geojson",
},
)
response.raise_for_status()
data = response.json()
return data.get("features", [])
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(self.base_url, params=params)
response.raise_for_status()
return self.parse_response(response.json())
relation_features, landing_rows, cable_rows = await asyncio.gather(
self._fetch_relation_features(client),
self._fetch_layer_attributes(client, 1),
self._fetch_layer_attributes(client, 2),
)
return self.parse_response(relation_features, landing_rows, cable_rows)
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
result = []
def _build_landing_lookup(self, landing_rows: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
lookup: Dict[int, Dict[str, Any]] = {}
for row in landing_rows:
city_id = row.get("city_id")
if city_id is None:
continue
lookup[int(city_id)] = {
"landing_point_id": row.get("landing_point_id") or city_id,
"landing_point_name": row.get("Name") or row.get("name") or "",
"facility": row.get("facility") or "",
"status": row.get("status") or "",
"country": row.get("country") or "",
}
return lookup
features = data.get("features", [])
for feature in features:
def _build_cable_lookup(self, cable_rows: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
lookup: Dict[int, Dict[str, Any]] = {}
for row in cable_rows:
cable_id = row.get("cable_id")
if cable_id is None:
continue
lookup[int(cable_id)] = {
"cable_name": row.get("Name") or "",
"status": row.get("status") or "active",
}
return lookup
def parse_response(
self,
relation_features: List[Dict[str, Any]],
landing_rows: List[Dict[str, Any]],
cable_rows: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
result: List[Dict[str, Any]] = []
landing_lookup = self._build_landing_lookup(landing_rows)
cable_lookup = self._build_cable_lookup(cable_rows)
for feature in relation_features:
props = feature.get("properties", {})
try:
city_id = props.get("city_id")
cable_id = props.get("cable_id")
landing_info = landing_lookup.get(int(city_id), {}) if city_id is not None else {}
cable_info = cable_lookup.get(int(cable_id), {}) if cable_id is not None else {}
cable_name = cable_info.get("cable_name") or props.get("cable_name") or "Unknown"
landing_point_name = (
landing_info.get("landing_point_name")
or props.get("landing_point_name")
or "Unknown"
)
facility = landing_info.get("facility") or props.get("facility") or "-"
status = cable_info.get("status") or landing_info.get("status") or props.get("status") or "-"
country = landing_info.get("country") or props.get("country") or ""
landing_point_id = landing_info.get("landing_point_id") or props.get("landing_point_id") or city_id
entry = {
"source_id": f"arcgis_relation_{props.get('OBJECTID', props.get('id', ''))}",
"name": f"{props.get('cable_name', 'Unknown')} - {props.get('landing_point_name', 'Unknown')}",
"country": props.get("country", ""),
"city": props.get("landing_point_name", ""),
"name": f"{cable_name} - {landing_point_name}",
"country": country,
"city": landing_point_name,
"latitude": str(props.get("latitude", "")) if props.get("latitude") else "",
"longitude": str(props.get("longitude", "")) if props.get("longitude") else "",
"value": "",
"unit": "",
"metadata": {
"objectid": props.get("OBJECTID"),
"city_id": props.get("city_id"),
"cable_id": props.get("cable_id"),
"cable_name": props.get("cable_name"),
"landing_point_id": props.get("landing_point_id"),
"landing_point_name": props.get("landing_point_name"),
"facility": props.get("facility"),
"status": props.get("status"),
"city_id": city_id,
"cable_id": cable_id,
"cable_name": cable_name,
"landing_point_id": landing_point_id,
"landing_point_name": landing_point_name,
"facility": facility,
"status": status,
},
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
}