"""Collected Data model for storing data from all collectors""" from sqlalchemy import Column, DateTime, Integer, String, Text, JSON, Index from sqlalchemy.sql import func from app.db.session import Base class CollectedData(Base): """Generic model for storing collected data from all sources""" __tablename__ = "collected_data" id = Column(Integer, primary_key=True, autoincrement=True) source = Column(String(100), nullable=False, index=True) # e.g., "top500", "huggingface_models" source_id = Column(String(100), index=True) # Original ID from source, e.g., "rank_1" data_type = Column( String(50), nullable=False, index=True ) # e.g., "supercomputer", "model", "dataset" # Core data fields name = Column(String(500)) title = Column(String(500)) description = Column(Text) # Location data (for geo visualization) country = Column(String(100)) city = Column(String(100)) latitude = Column(String(50)) longitude = Column(String(50)) # Performance metrics value = Column(String(100)) # Generic value field (Rmax, Rpeak, etc.) unit = Column(String(20)) # Additional metadata as JSON extra_data = Column( "metadata", JSON, default={} ) # Using 'extra_data' as attribute name but 'metadata' as column name # Timestamps collected_at = Column(DateTime(timezone=True), server_default=func.now(), index=True) reference_date = Column(DateTime(timezone=True)) # Data reference date (e.g., TOP500 list date) # Status is_valid = Column(Integer, default=1) # 1=valid, 0=invalid # Indexes for common queries __table_args__ = ( Index("idx_collected_data_source_collected", "source", "collected_at"), Index("idx_collected_data_source_type", "source", "data_type"), ) def __repr__(self): return f"" def to_dict(self) -> dict: """Convert to dictionary""" return { "id": self.id, "source": self.source, "source_id": self.source_id, "data_type": self.data_type, "name": self.name, "title": self.title, "description": self.description, "country": self.country, "city": self.city, "latitude": self.latitude, "longitude": self.longitude, "value": self.value, "unit": self.unit, "metadata": self.extra_data, "collected_at": self.collected_at.isoformat() if self.collected_at is not None else None, "reference_date": self.reference_date.isoformat() if self.reference_date is not None else None, }