Refine data management and collection workflows

This commit is contained in:
linkong
2026-03-25 17:19:10 +08:00
parent cc5f16f8a7
commit 020c1d5051
34 changed files with 3341 additions and 947 deletions

View File

@@ -1,5 +1,6 @@
from typing import AsyncGenerator
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
from sqlalchemy.orm import declarative_base
@@ -63,6 +64,7 @@ async def init_db():
import app.models.user # noqa: F401
import app.models.gpu_cluster # noqa: F401
import app.models.task # noqa: F401
import app.models.data_snapshot # noqa: F401
import app.models.datasource # noqa: F401
import app.models.datasource_config # noqa: F401
import app.models.alert # noqa: F401
@@ -71,6 +73,55 @@ async def init_db():
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
await conn.execute(
text(
"""
ALTER TABLE collected_data
ADD COLUMN IF NOT EXISTS snapshot_id INTEGER,
ADD COLUMN IF NOT EXISTS task_id INTEGER,
ADD COLUMN IF NOT EXISTS entity_key VARCHAR(255),
ADD COLUMN IF NOT EXISTS is_current BOOLEAN DEFAULT TRUE,
ADD COLUMN IF NOT EXISTS previous_record_id INTEGER,
ADD COLUMN IF NOT EXISTS change_type VARCHAR(20),
ADD COLUMN IF NOT EXISTS change_summary JSONB DEFAULT '{}'::jsonb,
ADD COLUMN IF NOT EXISTS deleted_at TIMESTAMPTZ
"""
)
)
await conn.execute(
text(
"""
ALTER TABLE collection_tasks
ADD COLUMN IF NOT EXISTS phase VARCHAR(30) DEFAULT 'queued'
"""
)
)
await conn.execute(
text(
"""
CREATE INDEX IF NOT EXISTS idx_collected_data_source_source_id
ON collected_data (source, source_id)
"""
)
)
await conn.execute(
text(
"""
UPDATE collected_data
SET entity_key = source || ':' || COALESCE(source_id, id::text)
WHERE entity_key IS NULL
"""
)
)
await conn.execute(
text(
"""
UPDATE collected_data
SET is_current = TRUE
WHERE is_current IS NULL
"""
)
)
async with async_session_factory() as session:
await seed_default_datasources(session)
await seed_default_datasources(session)