#!/usr/bin/env python3 """Check whether collected_data is ready for strong-coupled column removal.""" import asyncio import os import sys ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) BACKEND_DIR = os.path.join(ROOT_DIR, "backend") sys.path.insert(0, ROOT_DIR) sys.path.insert(0, BACKEND_DIR) from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker CHECKS = { "country_missing_in_metadata": """ SELECT COUNT(*) FROM collected_data WHERE country IS NOT NULL AND country != '' AND COALESCE(metadata->>'country', '') = '' """, "city_missing_in_metadata": """ SELECT COUNT(*) FROM collected_data WHERE city IS NOT NULL AND city != '' AND COALESCE(metadata->>'city', '') = '' """, "latitude_missing_in_metadata": """ SELECT COUNT(*) FROM collected_data WHERE latitude IS NOT NULL AND latitude != '' AND COALESCE(metadata->>'latitude', '') = '' """, "longitude_missing_in_metadata": """ SELECT COUNT(*) FROM collected_data WHERE longitude IS NOT NULL AND longitude != '' AND COALESCE(metadata->>'longitude', '') = '' """, "value_missing_in_metadata": """ SELECT COUNT(*) FROM collected_data WHERE value IS NOT NULL AND value != '' AND COALESCE(metadata->>'value', '') = '' """, "unit_missing_in_metadata": """ SELECT COUNT(*) FROM collected_data WHERE unit IS NOT NULL AND unit != '' AND COALESCE(metadata->>'unit', '') = '' """, "rows_with_any_legacy_value": """ SELECT COUNT(*) FROM collected_data WHERE COALESCE(country, '') != '' OR COALESCE(city, '') != '' OR COALESCE(latitude, '') != '' OR COALESCE(longitude, '') != '' OR COALESCE(value, '') != '' OR COALESCE(unit, '') != '' """, "total_rows": """ SELECT COUNT(*) FROM collected_data """, } async def scalar(session: AsyncSession, sql: str) -> int: result = await session.execute(text(sql)) return int(result.scalar() or 0) async def main(): database_url = os.environ.get( "DATABASE_URL", "postgresql+asyncpg://postgres:postgres@localhost:5432/planet_db" ) engine = create_async_engine(database_url, echo=False) async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) async with async_session() as session: results = {name: await scalar(session, sql) for name, sql in CHECKS.items()} await engine.dispose() print("Collected Data Column Removal Readiness") print("=" * 44) for key, value in results.items(): print(f"{key}: {value}") blocking_checks = { key: value for key, value in results.items() if key.endswith("_missing_in_metadata") and value > 0 } print("\nConclusion:") if blocking_checks: print("NOT READY") print("The following fields still have legacy column values not mirrored into metadata:") for key, value in blocking_checks.items(): print(f"- {key}: {value}") else: print("READY FOR COLUMN REMOVAL CHECKPOINT") print("All legacy column values are mirrored into metadata.") print("You can proceed to the SQL migration after one more functional verification round.") if __name__ == "__main__": asyncio.run(main())