Files
planet/scripts/check_collected_data_column_removal_ready.py
2026-03-25 17:19:10 +08:00

120 lines
3.5 KiB
Python

#!/usr/bin/env python3
"""Check whether collected_data is ready for strong-coupled column removal."""
import asyncio
import os
import sys
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
BACKEND_DIR = os.path.join(ROOT_DIR, "backend")
sys.path.insert(0, ROOT_DIR)
sys.path.insert(0, BACKEND_DIR)
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
CHECKS = {
"country_missing_in_metadata": """
SELECT COUNT(*)
FROM collected_data
WHERE country IS NOT NULL
AND country != ''
AND COALESCE(metadata->>'country', '') = ''
""",
"city_missing_in_metadata": """
SELECT COUNT(*)
FROM collected_data
WHERE city IS NOT NULL
AND city != ''
AND COALESCE(metadata->>'city', '') = ''
""",
"latitude_missing_in_metadata": """
SELECT COUNT(*)
FROM collected_data
WHERE latitude IS NOT NULL
AND latitude != ''
AND COALESCE(metadata->>'latitude', '') = ''
""",
"longitude_missing_in_metadata": """
SELECT COUNT(*)
FROM collected_data
WHERE longitude IS NOT NULL
AND longitude != ''
AND COALESCE(metadata->>'longitude', '') = ''
""",
"value_missing_in_metadata": """
SELECT COUNT(*)
FROM collected_data
WHERE value IS NOT NULL
AND value != ''
AND COALESCE(metadata->>'value', '') = ''
""",
"unit_missing_in_metadata": """
SELECT COUNT(*)
FROM collected_data
WHERE unit IS NOT NULL
AND unit != ''
AND COALESCE(metadata->>'unit', '') = ''
""",
"rows_with_any_legacy_value": """
SELECT COUNT(*)
FROM collected_data
WHERE COALESCE(country, '') != ''
OR COALESCE(city, '') != ''
OR COALESCE(latitude, '') != ''
OR COALESCE(longitude, '') != ''
OR COALESCE(value, '') != ''
OR COALESCE(unit, '') != ''
""",
"total_rows": """
SELECT COUNT(*) FROM collected_data
""",
}
async def scalar(session: AsyncSession, sql: str) -> int:
result = await session.execute(text(sql))
return int(result.scalar() or 0)
async def main():
database_url = os.environ.get(
"DATABASE_URL", "postgresql+asyncpg://postgres:postgres@localhost:5432/planet_db"
)
engine = create_async_engine(database_url, echo=False)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
async with async_session() as session:
results = {name: await scalar(session, sql) for name, sql in CHECKS.items()}
await engine.dispose()
print("Collected Data Column Removal Readiness")
print("=" * 44)
for key, value in results.items():
print(f"{key}: {value}")
blocking_checks = {
key: value
for key, value in results.items()
if key.endswith("_missing_in_metadata") and value > 0
}
print("\nConclusion:")
if blocking_checks:
print("NOT READY")
print("The following fields still have legacy column values not mirrored into metadata:")
for key, value in blocking_checks.items():
print(f"- {key}: {value}")
else:
print("READY FOR COLUMN REMOVAL CHECKPOINT")
print("All legacy column values are mirrored into metadata.")
print("You can proceed to the SQL migration after one more functional verification round.")
if __name__ == "__main__":
asyncio.run(main())