first commit

This commit is contained in:
rayd1o
2026-03-05 11:46:58 +08:00
commit e7033775d8
20657 changed files with 1988940 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
"""Collected Data model for storing data from all collectors"""
from sqlalchemy import Column, DateTime, Integer, String, Text, JSON, Index
from sqlalchemy.sql import func
from app.db.session import Base
class CollectedData(Base):
"""Generic model for storing collected data from all sources"""
__tablename__ = "collected_data"
id = Column(Integer, primary_key=True, autoincrement=True)
source = Column(String(100), nullable=False, index=True) # e.g., "top500", "huggingface_models"
source_id = Column(String(100), index=True) # Original ID from source, e.g., "rank_1"
data_type = Column(
String(50), nullable=False, index=True
) # e.g., "supercomputer", "model", "dataset"
# Core data fields
name = Column(String(500))
title = Column(String(500))
description = Column(Text)
# Location data (for geo visualization)
country = Column(String(100))
city = Column(String(100))
latitude = Column(String(50))
longitude = Column(String(50))
# Performance metrics
value = Column(String(100)) # Generic value field (Rmax, Rpeak, etc.)
unit = Column(String(20))
# Additional metadata as JSON
extra_data = Column(
"metadata", JSON, default={}
) # Using 'extra_data' as attribute name but 'metadata' as column name
# Timestamps
collected_at = Column(DateTime(timezone=True), server_default=func.now(), index=True)
reference_date = Column(DateTime(timezone=True)) # Data reference date (e.g., TOP500 list date)
# Status
is_valid = Column(Integer, default=1) # 1=valid, 0=invalid
# Indexes for common queries
__table_args__ = (
Index("idx_collected_data_source_collected", "source", "collected_at"),
Index("idx_collected_data_source_type", "source", "data_type"),
)
def __repr__(self):
return f"<CollectedData {self.id}: {self.source}/{self.data_type}>"
def to_dict(self) -> dict:
"""Convert to dictionary"""
return {
"id": self.id,
"source": self.source,
"source_id": self.source_id,
"data_type": self.data_type,
"name": self.name,
"title": self.title,
"description": self.description,
"country": self.country,
"city": self.city,
"latitude": self.latitude,
"longitude": self.longitude,
"value": self.value,
"unit": self.unit,
"metadata": self.extra_data,
"collected_at": self.collected_at.isoformat()
if self.collected_at is not None
else None,
"reference_date": self.reference_date.isoformat()
if self.reference_date is not None
else None,
}