Refine data management and collection workflows

This commit is contained in:
linkong
2026-03-25 17:19:10 +08:00
parent cc5f16f8a7
commit 020c1d5051
34 changed files with 3341 additions and 947 deletions

View File

@@ -1,10 +1,11 @@
from typing import Dict, Any, List
import asyncio
from datetime import datetime
from typing import Any, Dict, List, Optional
import httpx
from app.services.collectors.base import BaseCollector
from app.core.data_sources import get_data_sources_config
from app.services.collectors.base import BaseCollector
class ArcGISCableLandingRelationCollector(BaseCollector):
@@ -18,45 +19,129 @@ class ArcGISCableLandingRelationCollector(BaseCollector):
def base_url(self) -> str:
if self._resolved_url:
return self._resolved_url
from app.core.data_sources import get_data_sources_config
config = get_data_sources_config()
return config.get_yaml_url("arcgis_cable_landing_relation")
def _layer_url(self, layer_id: int) -> str:
if "/FeatureServer/" not in self.base_url:
return self.base_url
prefix = self.base_url.split("/FeatureServer/")[0]
return f"{prefix}/FeatureServer/{layer_id}/query"
async def _fetch_layer_attributes(
self, client: httpx.AsyncClient, layer_id: int
) -> List[Dict[str, Any]]:
response = await client.get(
self._layer_url(layer_id),
params={
"where": "1=1",
"outFields": "*",
"returnGeometry": "false",
"f": "json",
},
)
response.raise_for_status()
data = response.json()
return [feature.get("attributes", {}) for feature in data.get("features", [])]
async def _fetch_relation_features(self, client: httpx.AsyncClient) -> List[Dict[str, Any]]:
response = await client.get(
self.base_url,
params={
"where": "1=1",
"outFields": "*",
"returnGeometry": "true",
"f": "geojson",
},
)
response.raise_for_status()
data = response.json()
return data.get("features", [])
async def fetch(self) -> List[Dict[str, Any]]:
params = {"where": "1=1", "outFields": "*", "returnGeometry": "true", "f": "geojson"}
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.get(self.base_url, params=params)
response.raise_for_status()
return self.parse_response(response.json())
relation_features, landing_rows, cable_rows = await asyncio.gather(
self._fetch_relation_features(client),
self._fetch_layer_attributes(client, 1),
self._fetch_layer_attributes(client, 2),
)
return self.parse_response(relation_features, landing_rows, cable_rows)
def parse_response(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
result = []
def _build_landing_lookup(self, landing_rows: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
lookup: Dict[int, Dict[str, Any]] = {}
for row in landing_rows:
city_id = row.get("city_id")
if city_id is None:
continue
lookup[int(city_id)] = {
"landing_point_id": row.get("landing_point_id") or city_id,
"landing_point_name": row.get("Name") or row.get("name") or "",
"facility": row.get("facility") or "",
"status": row.get("status") or "",
"country": row.get("country") or "",
}
return lookup
features = data.get("features", [])
for feature in features:
def _build_cable_lookup(self, cable_rows: List[Dict[str, Any]]) -> Dict[int, Dict[str, Any]]:
lookup: Dict[int, Dict[str, Any]] = {}
for row in cable_rows:
cable_id = row.get("cable_id")
if cable_id is None:
continue
lookup[int(cable_id)] = {
"cable_name": row.get("Name") or "",
"status": row.get("status") or "active",
}
return lookup
def parse_response(
self,
relation_features: List[Dict[str, Any]],
landing_rows: List[Dict[str, Any]],
cable_rows: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
result: List[Dict[str, Any]] = []
landing_lookup = self._build_landing_lookup(landing_rows)
cable_lookup = self._build_cable_lookup(cable_rows)
for feature in relation_features:
props = feature.get("properties", {})
try:
city_id = props.get("city_id")
cable_id = props.get("cable_id")
landing_info = landing_lookup.get(int(city_id), {}) if city_id is not None else {}
cable_info = cable_lookup.get(int(cable_id), {}) if cable_id is not None else {}
cable_name = cable_info.get("cable_name") or props.get("cable_name") or "Unknown"
landing_point_name = (
landing_info.get("landing_point_name")
or props.get("landing_point_name")
or "Unknown"
)
facility = landing_info.get("facility") or props.get("facility") or "-"
status = cable_info.get("status") or landing_info.get("status") or props.get("status") or "-"
country = landing_info.get("country") or props.get("country") or ""
landing_point_id = landing_info.get("landing_point_id") or props.get("landing_point_id") or city_id
entry = {
"source_id": f"arcgis_relation_{props.get('OBJECTID', props.get('id', ''))}",
"name": f"{props.get('cable_name', 'Unknown')} - {props.get('landing_point_name', 'Unknown')}",
"country": props.get("country", ""),
"city": props.get("landing_point_name", ""),
"name": f"{cable_name} - {landing_point_name}",
"country": country,
"city": landing_point_name,
"latitude": str(props.get("latitude", "")) if props.get("latitude") else "",
"longitude": str(props.get("longitude", "")) if props.get("longitude") else "",
"value": "",
"unit": "",
"metadata": {
"objectid": props.get("OBJECTID"),
"city_id": props.get("city_id"),
"cable_id": props.get("cable_id"),
"cable_name": props.get("cable_name"),
"landing_point_id": props.get("landing_point_id"),
"landing_point_name": props.get("landing_point_name"),
"facility": props.get("facility"),
"status": props.get("status"),
"city_id": city_id,
"cable_id": cable_id,
"cable_name": cable_name,
"landing_point_id": landing_point_id,
"landing_point_name": landing_point_name,
"facility": facility,
"status": status,
},
"reference_date": datetime.utcnow().strftime("%Y-%m-%d"),
}

View File

@@ -4,10 +4,12 @@ from abc import ABC, abstractmethod
from typing import Dict, List, Any, Optional
from datetime import datetime
import httpx
from sqlalchemy import text
from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.collected_data_fields import build_dynamic_metadata, get_record_field
from app.core.config import settings
from app.core.countries import normalize_country
class BaseCollector(ABC):
@@ -39,6 +41,11 @@ class BaseCollector(ABC):
records_processed / self._current_task.total_records
) * 100
async def set_phase(self, phase: str):
if self._current_task and self._db_session:
self._current_task.phase = phase
await self._db_session.commit()
@abstractmethod
async def fetch(self) -> List[Dict[str, Any]]:
"""Fetch raw data from source"""
@@ -48,14 +55,87 @@ class BaseCollector(ABC):
"""Transform raw data to internal format (default: pass through)"""
return raw_data
def _parse_reference_date(self, value: Any) -> Optional[datetime]:
if not value:
return None
if isinstance(value, datetime):
return value
if isinstance(value, str):
return datetime.fromisoformat(value.replace("Z", "+00:00"))
return None
def _build_comparable_payload(self, record: Any) -> Dict[str, Any]:
return {
"name": getattr(record, "name", None),
"title": getattr(record, "title", None),
"description": getattr(record, "description", None),
"country": get_record_field(record, "country"),
"city": get_record_field(record, "city"),
"latitude": get_record_field(record, "latitude"),
"longitude": get_record_field(record, "longitude"),
"value": get_record_field(record, "value"),
"unit": get_record_field(record, "unit"),
"metadata": getattr(record, "extra_data", None) or {},
"reference_date": (
getattr(record, "reference_date", None).isoformat()
if getattr(record, "reference_date", None)
else None
),
}
async def _create_snapshot(
self,
db: AsyncSession,
task_id: int,
data: List[Dict[str, Any]],
started_at: datetime,
) -> int:
from app.models.data_snapshot import DataSnapshot
reference_dates = [
parsed
for parsed in (self._parse_reference_date(item.get("reference_date")) for item in data)
if parsed is not None
]
reference_date = max(reference_dates) if reference_dates else None
result = await db.execute(
select(DataSnapshot)
.where(DataSnapshot.source == self.name, DataSnapshot.is_current == True)
.order_by(DataSnapshot.completed_at.desc().nullslast(), DataSnapshot.id.desc())
.limit(1)
)
previous_snapshot = result.scalar_one_or_none()
snapshot = DataSnapshot(
datasource_id=getattr(self, "_datasource_id", 1),
task_id=task_id,
source=self.name,
snapshot_key=f"{self.name}:{task_id}",
reference_date=reference_date,
started_at=started_at,
status="running",
is_current=True,
parent_snapshot_id=previous_snapshot.id if previous_snapshot else None,
summary={},
)
db.add(snapshot)
if previous_snapshot:
previous_snapshot.is_current = False
await db.commit()
return snapshot.id
async def run(self, db: AsyncSession) -> Dict[str, Any]:
"""Full pipeline: fetch -> transform -> save"""
from app.services.collectors.registry import collector_registry
from app.models.task import CollectionTask
from app.models.collected_data import CollectedData
from app.models.data_snapshot import DataSnapshot
start_time = datetime.utcnow()
datasource_id = getattr(self, "_datasource_id", 1)
snapshot_id: Optional[int] = None
if not collector_registry.is_active(self.name):
return {"status": "skipped", "reason": "Collector is disabled"}
@@ -63,6 +143,7 @@ class BaseCollector(ABC):
task = CollectionTask(
datasource_id=datasource_id,
status="running",
phase="queued",
started_at=start_time,
)
db.add(task)
@@ -75,15 +156,20 @@ class BaseCollector(ABC):
await self.resolve_url(db)
try:
await self.set_phase("fetching")
raw_data = await self.fetch()
task.total_records = len(raw_data)
await db.commit()
await self.set_phase("transforming")
data = self.transform(raw_data)
snapshot_id = await self._create_snapshot(db, task_id, data, start_time)
records_count = await self._save_data(db, data)
await self.set_phase("saving")
records_count = await self._save_data(db, data, task_id=task_id, snapshot_id=snapshot_id)
task.status = "success"
task.phase = "completed"
task.records_processed = records_count
task.progress = 100.0
task.completed_at = datetime.utcnow()
@@ -97,8 +183,15 @@ class BaseCollector(ABC):
}
except Exception as e:
task.status = "failed"
task.phase = "failed"
task.error_message = str(e)
task.completed_at = datetime.utcnow()
if snapshot_id is not None:
snapshot = await db.get(DataSnapshot, snapshot_id)
if snapshot:
snapshot.status = "failed"
snapshot.completed_at = datetime.utcnow()
snapshot.summary = {"error": str(e)}
await db.commit()
return {
@@ -108,53 +201,163 @@ class BaseCollector(ABC):
"execution_time_seconds": (datetime.utcnow() - start_time).total_seconds(),
}
async def _save_data(self, db: AsyncSession, data: List[Dict[str, Any]]) -> int:
async def _save_data(
self,
db: AsyncSession,
data: List[Dict[str, Any]],
task_id: Optional[int] = None,
snapshot_id: Optional[int] = None,
) -> int:
"""Save transformed data to database"""
from app.models.collected_data import CollectedData
from app.models.data_snapshot import DataSnapshot
if not data:
if snapshot_id is not None:
snapshot = await db.get(DataSnapshot, snapshot_id)
if snapshot:
snapshot.record_count = 0
snapshot.summary = {"created": 0, "updated": 0, "unchanged": 0}
snapshot.status = "success"
snapshot.completed_at = datetime.utcnow()
await db.commit()
return 0
collected_at = datetime.utcnow()
records_added = 0
created_count = 0
updated_count = 0
unchanged_count = 0
seen_entity_keys: set[str] = set()
previous_current_keys: set[str] = set()
previous_current_result = await db.execute(
select(CollectedData.entity_key).where(
CollectedData.source == self.name,
CollectedData.is_current == True,
)
)
previous_current_keys = {row[0] for row in previous_current_result.fetchall() if row[0]}
for i, item in enumerate(data):
print(
f"DEBUG: Saving item {i}: name={item.get('name')}, metadata={item.get('metadata', 'NOT FOUND')}"
)
raw_metadata = item.get("metadata", {})
extra_data = build_dynamic_metadata(
raw_metadata,
country=item.get("country"),
city=item.get("city"),
latitude=item.get("latitude"),
longitude=item.get("longitude"),
value=item.get("value"),
unit=item.get("unit"),
)
normalized_country = normalize_country(item.get("country"))
if normalized_country is not None:
extra_data["country"] = normalized_country
if item.get("country") and normalized_country != item.get("country"):
extra_data["raw_country"] = item.get("country")
if normalized_country is None:
extra_data["country_validation"] = "invalid"
source_id = item.get("source_id") or item.get("id")
reference_date = (
self._parse_reference_date(item.get("reference_date"))
)
source_id_str = str(source_id) if source_id is not None else None
entity_key = f"{self.name}:{source_id_str}" if source_id_str else f"{self.name}:{i}"
previous_record = None
if entity_key and entity_key not in seen_entity_keys:
result = await db.execute(
select(CollectedData)
.where(
CollectedData.source == self.name,
CollectedData.entity_key == entity_key,
CollectedData.is_current == True,
)
.order_by(CollectedData.collected_at.desc().nullslast(), CollectedData.id.desc())
)
previous_records = result.scalars().all()
if previous_records:
previous_record = previous_records[0]
for old_record in previous_records:
old_record.is_current = False
record = CollectedData(
snapshot_id=snapshot_id,
task_id=task_id,
source=self.name,
source_id=item.get("source_id") or item.get("id"),
source_id=source_id_str,
entity_key=entity_key,
data_type=self.data_type,
name=item.get("name"),
title=item.get("title"),
description=item.get("description"),
country=item.get("country"),
city=item.get("city"),
latitude=str(item.get("latitude", ""))
if item.get("latitude") is not None
else None,
longitude=str(item.get("longitude", ""))
if item.get("longitude") is not None
else None,
value=item.get("value"),
unit=item.get("unit"),
extra_data=item.get("metadata", {}),
extra_data=extra_data,
collected_at=collected_at,
reference_date=datetime.fromisoformat(
item.get("reference_date").replace("Z", "+00:00")
)
if item.get("reference_date")
else None,
reference_date=reference_date,
is_valid=1,
is_current=True,
previous_record_id=previous_record.id if previous_record else None,
deleted_at=None,
)
if previous_record is None:
record.change_type = "created"
record.change_summary = {}
created_count += 1
else:
previous_payload = self._build_comparable_payload(previous_record)
current_payload = self._build_comparable_payload(record)
if current_payload == previous_payload:
record.change_type = "unchanged"
record.change_summary = {}
unchanged_count += 1
else:
changed_fields = [
key for key in current_payload.keys() if current_payload[key] != previous_payload.get(key)
]
record.change_type = "updated"
record.change_summary = {"changed_fields": changed_fields}
updated_count += 1
db.add(record)
seen_entity_keys.add(entity_key)
records_added += 1
if i % 100 == 0:
self.update_progress(i + 1)
await db.commit()
if snapshot_id is not None:
deleted_keys = previous_current_keys - seen_entity_keys
await db.execute(
text(
"""
UPDATE collected_data
SET is_current = FALSE
WHERE source = :source
AND snapshot_id IS DISTINCT FROM :snapshot_id
AND COALESCE(is_current, TRUE) = TRUE
"""
),
{"source": self.name, "snapshot_id": snapshot_id},
)
snapshot = await db.get(DataSnapshot, snapshot_id)
if snapshot:
snapshot.record_count = records_added
snapshot.status = "success"
snapshot.completed_at = datetime.utcnow()
snapshot.summary = {
"created": created_count,
"updated": updated_count,
"unchanged": unchanged_count,
"deleted": len(deleted_keys),
}
await db.commit()
self.update_progress(len(data))
return records_added

View File

@@ -76,7 +76,7 @@ class PeeringDBIXPCollector(HTTPCollector):
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
return {}
async def collect(self) -> List[Dict[str, Any]]:
async def fetch(self) -> List[Dict[str, Any]]:
"""Collect IXP data from PeeringDB with rate limit handling"""
response_data = await self.fetch_with_retry()
if not response_data:
@@ -177,7 +177,7 @@ class PeeringDBNetworkCollector(HTTPCollector):
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
return {}
async def collect(self) -> List[Dict[str, Any]]:
async def fetch(self) -> List[Dict[str, Any]]:
"""Collect Network data from PeeringDB with rate limit handling"""
response_data = await self.fetch_with_retry()
if not response_data:
@@ -280,7 +280,7 @@ class PeeringDBFacilityCollector(HTTPCollector):
print(f"Warning: PeeringDB collection failed after {max_retries} retries: {last_error}")
return {}
async def collect(self) -> List[Dict[str, Any]]:
async def fetch(self) -> List[Dict[str, Any]]:
"""Collect Facility data from PeeringDB with rate limit handling"""
response_data = await self.fetch_with_retry()
if not response_data:

View File

@@ -4,9 +4,9 @@ Collects data from TOP500 supercomputer rankings.
https://top500.org/lists/top500/
"""
import asyncio
import re
from typing import Dict, Any, List
from datetime import datetime
from bs4 import BeautifulSoup
import httpx
@@ -21,14 +21,108 @@ class TOP500Collector(BaseCollector):
data_type = "supercomputer"
async def fetch(self) -> List[Dict[str, Any]]:
"""Fetch TOP500 data from website (scraping)"""
# Get the latest list page
"""Fetch TOP500 list data and enrich each row with detail-page metadata."""
url = "https://top500.org/lists/top500/list/2025/11/"
async with httpx.AsyncClient(timeout=60.0) as client:
async with httpx.AsyncClient(timeout=60.0, follow_redirects=True) as client:
response = await client.get(url)
response.raise_for_status()
return self.parse_response(response.text)
entries = self.parse_response(response.text)
semaphore = asyncio.Semaphore(8)
async def enrich(entry: Dict[str, Any]) -> Dict[str, Any]:
detail_url = entry.pop("_detail_url", "")
if not detail_url:
return entry
async with semaphore:
try:
detail_response = await client.get(detail_url)
detail_response.raise_for_status()
entry["metadata"].update(self.parse_detail_response(detail_response.text))
except Exception:
entry["metadata"]["detail_fetch_failed"] = True
return entry
return await asyncio.gather(*(enrich(entry) for entry in entries))
def _extract_system_fields(self, system_cell) -> Dict[str, str]:
link = system_cell.find("a")
system_name = link.get_text(" ", strip=True) if link else system_cell.get_text(" ", strip=True)
detail_url = ""
if link and link.get("href"):
detail_url = f"https://top500.org{link.get('href')}"
manufacturer = ""
if link and link.next_sibling:
manufacturer = str(link.next_sibling).strip(" ,\n\t")
cell_text = system_cell.get_text("\n", strip=True)
lines = [line.strip(" ,") for line in cell_text.splitlines() if line.strip()]
site = ""
country = ""
if lines:
system_name = lines[0]
if len(lines) >= 3:
site = lines[-2]
country = lines[-1]
elif len(lines) == 2:
country = lines[-1]
if not manufacturer and len(lines) >= 2:
manufacturer = lines[1]
return {
"name": system_name,
"manufacturer": manufacturer,
"site": site,
"country": country,
"detail_url": detail_url,
}
def parse_detail_response(self, html: str) -> Dict[str, Any]:
soup = BeautifulSoup(html, "html.parser")
detail_table = soup.find("table", {"class": "table table-condensed"})
if not detail_table:
return {}
detail_map: Dict[str, Any] = {}
label_aliases = {
"Site": "site",
"Manufacturer": "manufacturer",
"Cores": "cores",
"Processor": "processor",
"Interconnect": "interconnect",
"Installation Year": "installation_year",
"Linpack Performance (Rmax)": "rmax",
"Theoretical Peak (Rpeak)": "rpeak",
"Nmax": "nmax",
"HPCG": "hpcg",
"Power": "power",
"Power Measurement Level": "power_measurement_level",
"Operating System": "operating_system",
"Compiler": "compiler",
"Math Library": "math_library",
"MPI": "mpi",
}
for row in detail_table.find_all("tr"):
header = row.find("th")
value_cell = row.find("td")
if not header or not value_cell:
continue
label = header.get_text(" ", strip=True).rstrip(":")
key = label_aliases.get(label)
if not key:
continue
value = value_cell.get_text(" ", strip=True)
detail_map[key] = value
return detail_map
def parse_response(self, html: str) -> List[Dict[str, Any]]:
"""Parse TOP500 HTML response"""
@@ -36,27 +130,26 @@ class TOP500Collector(BaseCollector):
soup = BeautifulSoup(html, "html.parser")
# Find the table with TOP500 data
table = soup.find("table", {"class": "top500-table"})
if not table:
# Try alternative table selector
table = soup.find("table", {"id": "top500"})
table = None
for candidate in soup.find_all("table"):
header_cells = [
cell.get_text(" ", strip=True) for cell in candidate.select("thead th")
]
normalized_headers = [header.lower() for header in header_cells]
if (
"rank" in normalized_headers
and "system" in normalized_headers
and any("cores" in header for header in normalized_headers)
and any("rmax" in header for header in normalized_headers)
):
table = candidate
break
if not table:
# Try to find any table with rank data
tables = soup.find_all("table")
for t in tables:
if t.find(string=re.compile(r"Rank.*System.*Cores.*Rmax", re.I)):
table = t
break
if not table:
# Fallback: try to extract data from any table
tables = soup.find_all("table")
if tables:
table = tables[0]
table = soup.find("table", {"class": "top500-table"}) or soup.find("table", {"id": "top500"})
if table:
rows = table.find_all("tr")
rows = table.select("tr")
for row in rows[1:]: # Skip header row
cells = row.find_all(["td", "th"])
if len(cells) >= 6:
@@ -68,43 +161,26 @@ class TOP500Collector(BaseCollector):
rank = int(rank_text)
# System name (may contain link)
system_cell = cells[1]
system_name = system_cell.get_text(strip=True)
# Try to get full name from link title or data attribute
link = system_cell.find("a")
if link and link.get("title"):
system_name = link.get("title")
system_fields = self._extract_system_fields(system_cell)
system_name = system_fields["name"]
manufacturer = system_fields["manufacturer"]
site = system_fields["site"]
country = system_fields["country"]
detail_url = system_fields["detail_url"]
# Country
country_cell = cells[2]
country = country_cell.get_text(strip=True)
# Try to get country from data attribute or image alt
img = country_cell.find("img")
if img and img.get("alt"):
country = img.get("alt")
# Extract location (city)
city = ""
location_text = country_cell.get_text(strip=True)
if "(" in location_text and ")" in location_text:
city = location_text.split("(")[0].strip()
cores = cells[2].get_text(strip=True).replace(",", "")
# Cores
cores = cells[3].get_text(strip=True).replace(",", "")
# Rmax
rmax_text = cells[4].get_text(strip=True)
rmax_text = cells[3].get_text(strip=True)
rmax = self._parse_performance(rmax_text)
# Rpeak
rpeak_text = cells[5].get_text(strip=True)
rpeak_text = cells[4].get_text(strip=True)
rpeak = self._parse_performance(rpeak_text)
# Power (optional)
power = ""
if len(cells) >= 7:
power = cells[6].get_text(strip=True)
if len(cells) >= 6:
power = cells[5].get_text(strip=True).replace(",", "")
entry = {
"source_id": f"top500_{rank}",
@@ -117,10 +193,14 @@ class TOP500Collector(BaseCollector):
"unit": "PFlop/s",
"metadata": {
"rank": rank,
"r_peak": rpeak,
"power": power,
"cores": cores,
"rmax": rmax_text,
"rpeak": rpeak_text,
"power": power,
"manufacturer": manufacturer,
"site": site,
},
"_detail_url": detail_url,
"reference_date": "2025-11-01",
}
data.append(entry)
@@ -184,10 +264,15 @@ class TOP500Collector(BaseCollector):
"unit": "PFlop/s",
"metadata": {
"rank": 1,
"r_peak": 2746.38,
"power": 29581,
"cores": 11039616,
"cores": "11039616",
"rmax": "1742.00",
"rpeak": "2746.38",
"power": "29581",
"manufacturer": "HPE",
"site": "DOE/NNSA/LLNL",
"processor": "AMD 4th Gen EPYC 24C 1.8GHz",
"interconnect": "Slingshot-11",
"installation_year": "2025",
},
"reference_date": "2025-11-01",
},
@@ -202,10 +287,12 @@ class TOP500Collector(BaseCollector):
"unit": "PFlop/s",
"metadata": {
"rank": 2,
"r_peak": 2055.72,
"power": 24607,
"cores": 9066176,
"cores": "9066176",
"rmax": "1353.00",
"rpeak": "2055.72",
"power": "24607",
"manufacturer": "HPE",
"site": "DOE/SC/Oak Ridge National Laboratory",
},
"reference_date": "2025-11-01",
},
@@ -220,9 +307,10 @@ class TOP500Collector(BaseCollector):
"unit": "PFlop/s",
"metadata": {
"rank": 3,
"r_peak": 1980.01,
"power": 38698,
"cores": 9264128,
"cores": "9264128",
"rmax": "1012.00",
"rpeak": "1980.01",
"power": "38698",
"manufacturer": "Intel",
},
"reference_date": "2025-11-01",