120 lines
3.5 KiB
Python
120 lines
3.5 KiB
Python
#!/usr/bin/env python3
|
|
"""Check whether collected_data is ready for strong-coupled column removal."""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
|
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
BACKEND_DIR = os.path.join(ROOT_DIR, "backend")
|
|
|
|
sys.path.insert(0, ROOT_DIR)
|
|
sys.path.insert(0, BACKEND_DIR)
|
|
|
|
from sqlalchemy import text
|
|
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
|
|
|
|
CHECKS = {
|
|
"country_missing_in_metadata": """
|
|
SELECT COUNT(*)
|
|
FROM collected_data
|
|
WHERE country IS NOT NULL
|
|
AND country != ''
|
|
AND COALESCE(metadata->>'country', '') = ''
|
|
""",
|
|
"city_missing_in_metadata": """
|
|
SELECT COUNT(*)
|
|
FROM collected_data
|
|
WHERE city IS NOT NULL
|
|
AND city != ''
|
|
AND COALESCE(metadata->>'city', '') = ''
|
|
""",
|
|
"latitude_missing_in_metadata": """
|
|
SELECT COUNT(*)
|
|
FROM collected_data
|
|
WHERE latitude IS NOT NULL
|
|
AND latitude != ''
|
|
AND COALESCE(metadata->>'latitude', '') = ''
|
|
""",
|
|
"longitude_missing_in_metadata": """
|
|
SELECT COUNT(*)
|
|
FROM collected_data
|
|
WHERE longitude IS NOT NULL
|
|
AND longitude != ''
|
|
AND COALESCE(metadata->>'longitude', '') = ''
|
|
""",
|
|
"value_missing_in_metadata": """
|
|
SELECT COUNT(*)
|
|
FROM collected_data
|
|
WHERE value IS NOT NULL
|
|
AND value != ''
|
|
AND COALESCE(metadata->>'value', '') = ''
|
|
""",
|
|
"unit_missing_in_metadata": """
|
|
SELECT COUNT(*)
|
|
FROM collected_data
|
|
WHERE unit IS NOT NULL
|
|
AND unit != ''
|
|
AND COALESCE(metadata->>'unit', '') = ''
|
|
""",
|
|
"rows_with_any_legacy_value": """
|
|
SELECT COUNT(*)
|
|
FROM collected_data
|
|
WHERE COALESCE(country, '') != ''
|
|
OR COALESCE(city, '') != ''
|
|
OR COALESCE(latitude, '') != ''
|
|
OR COALESCE(longitude, '') != ''
|
|
OR COALESCE(value, '') != ''
|
|
OR COALESCE(unit, '') != ''
|
|
""",
|
|
"total_rows": """
|
|
SELECT COUNT(*) FROM collected_data
|
|
""",
|
|
}
|
|
|
|
|
|
async def scalar(session: AsyncSession, sql: str) -> int:
|
|
result = await session.execute(text(sql))
|
|
return int(result.scalar() or 0)
|
|
|
|
|
|
async def main():
|
|
database_url = os.environ.get(
|
|
"DATABASE_URL", "postgresql+asyncpg://postgres:postgres@localhost:5432/planet_db"
|
|
)
|
|
engine = create_async_engine(database_url, echo=False)
|
|
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
|
|
|
async with async_session() as session:
|
|
results = {name: await scalar(session, sql) for name, sql in CHECKS.items()}
|
|
|
|
await engine.dispose()
|
|
|
|
print("Collected Data Column Removal Readiness")
|
|
print("=" * 44)
|
|
for key, value in results.items():
|
|
print(f"{key}: {value}")
|
|
|
|
blocking_checks = {
|
|
key: value
|
|
for key, value in results.items()
|
|
if key.endswith("_missing_in_metadata") and value > 0
|
|
}
|
|
|
|
print("\nConclusion:")
|
|
if blocking_checks:
|
|
print("NOT READY")
|
|
print("The following fields still have legacy column values not mirrored into metadata:")
|
|
for key, value in blocking_checks.items():
|
|
print(f"- {key}: {value}")
|
|
else:
|
|
print("READY FOR COLUMN REMOVAL CHECKPOINT")
|
|
print("All legacy column values are mirrored into metadata.")
|
|
print("You can proceed to the SQL migration after one more functional verification round.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|