Refine data management and collection workflows
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from app.models.user import User
|
||||
from app.models.gpu_cluster import GPUCluster
|
||||
from app.models.task import CollectionTask
|
||||
from app.models.data_snapshot import DataSnapshot
|
||||
from app.models.datasource import DataSource
|
||||
from app.models.datasource_config import DataSourceConfig
|
||||
from app.models.alert import Alert, AlertSeverity, AlertStatus
|
||||
@@ -10,6 +11,7 @@ __all__ = [
|
||||
"User",
|
||||
"GPUCluster",
|
||||
"CollectionTask",
|
||||
"DataSnapshot",
|
||||
"DataSource",
|
||||
"DataSourceConfig",
|
||||
"SystemSetting",
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
"""Collected Data model for storing data from all collectors"""
|
||||
|
||||
from sqlalchemy import Column, DateTime, Integer, String, Text, JSON, Index
|
||||
from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, String, Text, JSON, Index
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.core.collected_data_fields import get_record_field
|
||||
from app.db.session import Base
|
||||
|
||||
|
||||
@@ -12,8 +13,11 @@ class CollectedData(Base):
|
||||
__tablename__ = "collected_data"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
snapshot_id = Column(Integer, ForeignKey("data_snapshots.id"), nullable=True, index=True)
|
||||
task_id = Column(Integer, ForeignKey("collection_tasks.id"), nullable=True, index=True)
|
||||
source = Column(String(100), nullable=False, index=True) # e.g., "top500", "huggingface_models"
|
||||
source_id = Column(String(100), index=True) # Original ID from source, e.g., "rank_1"
|
||||
entity_key = Column(String(255), index=True)
|
||||
data_type = Column(
|
||||
String(50), nullable=False, index=True
|
||||
) # e.g., "supercomputer", "model", "dataset"
|
||||
@@ -23,16 +27,6 @@ class CollectedData(Base):
|
||||
title = Column(String(500))
|
||||
description = Column(Text)
|
||||
|
||||
# Location data (for geo visualization)
|
||||
country = Column(String(100))
|
||||
city = Column(String(100))
|
||||
latitude = Column(String(50))
|
||||
longitude = Column(String(50))
|
||||
|
||||
# Performance metrics
|
||||
value = Column(String(100)) # Generic value field (Rmax, Rpeak, etc.)
|
||||
unit = Column(String(20))
|
||||
|
||||
# Additional metadata as JSON
|
||||
extra_data = Column(
|
||||
"metadata", JSON, default={}
|
||||
@@ -44,11 +38,17 @@ class CollectedData(Base):
|
||||
|
||||
# Status
|
||||
is_valid = Column(Integer, default=1) # 1=valid, 0=invalid
|
||||
is_current = Column(Boolean, default=True, index=True)
|
||||
previous_record_id = Column(Integer, ForeignKey("collected_data.id"), nullable=True, index=True)
|
||||
change_type = Column(String(20), nullable=True)
|
||||
change_summary = Column(JSON, default={})
|
||||
deleted_at = Column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
# Indexes for common queries
|
||||
__table_args__ = (
|
||||
Index("idx_collected_data_source_collected", "source", "collected_at"),
|
||||
Index("idx_collected_data_source_type", "source", "data_type"),
|
||||
Index("idx_collected_data_source_source_id", "source", "source_id"),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
@@ -58,18 +58,21 @@ class CollectedData(Base):
|
||||
"""Convert to dictionary"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"snapshot_id": self.snapshot_id,
|
||||
"task_id": self.task_id,
|
||||
"source": self.source,
|
||||
"source_id": self.source_id,
|
||||
"entity_key": self.entity_key,
|
||||
"data_type": self.data_type,
|
||||
"name": self.name,
|
||||
"title": self.title,
|
||||
"description": self.description,
|
||||
"country": self.country,
|
||||
"city": self.city,
|
||||
"latitude": self.latitude,
|
||||
"longitude": self.longitude,
|
||||
"value": self.value,
|
||||
"unit": self.unit,
|
||||
"country": get_record_field(self, "country"),
|
||||
"city": get_record_field(self, "city"),
|
||||
"latitude": get_record_field(self, "latitude"),
|
||||
"longitude": get_record_field(self, "longitude"),
|
||||
"value": get_record_field(self, "value"),
|
||||
"unit": get_record_field(self, "unit"),
|
||||
"metadata": self.extra_data,
|
||||
"collected_at": self.collected_at.isoformat()
|
||||
if self.collected_at is not None
|
||||
@@ -77,4 +80,9 @@ class CollectedData(Base):
|
||||
"reference_date": self.reference_date.isoformat()
|
||||
if self.reference_date is not None
|
||||
else None,
|
||||
"is_current": self.is_current,
|
||||
"previous_record_id": self.previous_record_id,
|
||||
"change_type": self.change_type,
|
||||
"change_summary": self.change_summary,
|
||||
"deleted_at": self.deleted_at.isoformat() if self.deleted_at is not None else None,
|
||||
}
|
||||
|
||||
26
backend/app/models/data_snapshot.py
Normal file
26
backend/app/models/data_snapshot.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, JSON, String
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.db.session import Base
|
||||
|
||||
|
||||
class DataSnapshot(Base):
|
||||
__tablename__ = "data_snapshots"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
datasource_id = Column(Integer, nullable=False, index=True)
|
||||
task_id = Column(Integer, ForeignKey("collection_tasks.id"), nullable=True, index=True)
|
||||
source = Column(String(100), nullable=False, index=True)
|
||||
snapshot_key = Column(String(100), nullable=True, index=True)
|
||||
reference_date = Column(DateTime(timezone=True), nullable=True)
|
||||
started_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
completed_at = Column(DateTime(timezone=True), nullable=True)
|
||||
record_count = Column(Integer, default=0)
|
||||
status = Column(String(20), nullable=False, default="running")
|
||||
is_current = Column(Boolean, default=True, index=True)
|
||||
parent_snapshot_id = Column(Integer, ForeignKey("data_snapshots.id"), nullable=True, index=True)
|
||||
summary = Column(JSON, default={})
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
||||
|
||||
def __repr__(self):
|
||||
return f"<DataSnapshot {self.id}: {self.source}/{self.status}>"
|
||||
@@ -12,6 +12,7 @@ class CollectionTask(Base):
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
datasource_id = Column(Integer, nullable=False, index=True)
|
||||
status = Column(String(20), nullable=False) # pending, running, success, failed, cancelled
|
||||
phase = Column(String(30), default="queued")
|
||||
started_at = Column(DateTime(timezone=True))
|
||||
completed_at = Column(DateTime(timezone=True))
|
||||
records_processed = Column(Integer, default=0)
|
||||
|
||||
Reference in New Issue
Block a user