Refine data management and collection workflows
This commit is contained in:
119
scripts/check_collected_data_column_removal_ready.py
Normal file
119
scripts/check_collected_data_column_removal_ready.py
Normal file
@@ -0,0 +1,119 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Check whether collected_data is ready for strong-coupled column removal."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
BACKEND_DIR = os.path.join(ROOT_DIR, "backend")
|
||||
|
||||
sys.path.insert(0, ROOT_DIR)
|
||||
sys.path.insert(0, BACKEND_DIR)
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
|
||||
CHECKS = {
|
||||
"country_missing_in_metadata": """
|
||||
SELECT COUNT(*)
|
||||
FROM collected_data
|
||||
WHERE country IS NOT NULL
|
||||
AND country != ''
|
||||
AND COALESCE(metadata->>'country', '') = ''
|
||||
""",
|
||||
"city_missing_in_metadata": """
|
||||
SELECT COUNT(*)
|
||||
FROM collected_data
|
||||
WHERE city IS NOT NULL
|
||||
AND city != ''
|
||||
AND COALESCE(metadata->>'city', '') = ''
|
||||
""",
|
||||
"latitude_missing_in_metadata": """
|
||||
SELECT COUNT(*)
|
||||
FROM collected_data
|
||||
WHERE latitude IS NOT NULL
|
||||
AND latitude != ''
|
||||
AND COALESCE(metadata->>'latitude', '') = ''
|
||||
""",
|
||||
"longitude_missing_in_metadata": """
|
||||
SELECT COUNT(*)
|
||||
FROM collected_data
|
||||
WHERE longitude IS NOT NULL
|
||||
AND longitude != ''
|
||||
AND COALESCE(metadata->>'longitude', '') = ''
|
||||
""",
|
||||
"value_missing_in_metadata": """
|
||||
SELECT COUNT(*)
|
||||
FROM collected_data
|
||||
WHERE value IS NOT NULL
|
||||
AND value != ''
|
||||
AND COALESCE(metadata->>'value', '') = ''
|
||||
""",
|
||||
"unit_missing_in_metadata": """
|
||||
SELECT COUNT(*)
|
||||
FROM collected_data
|
||||
WHERE unit IS NOT NULL
|
||||
AND unit != ''
|
||||
AND COALESCE(metadata->>'unit', '') = ''
|
||||
""",
|
||||
"rows_with_any_legacy_value": """
|
||||
SELECT COUNT(*)
|
||||
FROM collected_data
|
||||
WHERE COALESCE(country, '') != ''
|
||||
OR COALESCE(city, '') != ''
|
||||
OR COALESCE(latitude, '') != ''
|
||||
OR COALESCE(longitude, '') != ''
|
||||
OR COALESCE(value, '') != ''
|
||||
OR COALESCE(unit, '') != ''
|
||||
""",
|
||||
"total_rows": """
|
||||
SELECT COUNT(*) FROM collected_data
|
||||
""",
|
||||
}
|
||||
|
||||
|
||||
async def scalar(session: AsyncSession, sql: str) -> int:
|
||||
result = await session.execute(text(sql))
|
||||
return int(result.scalar() or 0)
|
||||
|
||||
|
||||
async def main():
|
||||
database_url = os.environ.get(
|
||||
"DATABASE_URL", "postgresql+asyncpg://postgres:postgres@localhost:5432/planet_db"
|
||||
)
|
||||
engine = create_async_engine(database_url, echo=False)
|
||||
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
async with async_session() as session:
|
||||
results = {name: await scalar(session, sql) for name, sql in CHECKS.items()}
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
print("Collected Data Column Removal Readiness")
|
||||
print("=" * 44)
|
||||
for key, value in results.items():
|
||||
print(f"{key}: {value}")
|
||||
|
||||
blocking_checks = {
|
||||
key: value
|
||||
for key, value in results.items()
|
||||
if key.endswith("_missing_in_metadata") and value > 0
|
||||
}
|
||||
|
||||
print("\nConclusion:")
|
||||
if blocking_checks:
|
||||
print("NOT READY")
|
||||
print("The following fields still have legacy column values not mirrored into metadata:")
|
||||
for key, value in blocking_checks.items():
|
||||
print(f"- {key}: {value}")
|
||||
else:
|
||||
print("READY FOR COLUMN REMOVAL CHECKPOINT")
|
||||
print("All legacy column values are mirrored into metadata.")
|
||||
print("You can proceed to the SQL migration after one more functional verification round.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user