freeleaps-service-hub/apps/content/backend/content/document_cleaner.py
2025-02-03 09:13:20 +00:00

55 lines
2.1 KiB
Python

from typing import Set
from backend.content.models import DocumentDoc
from backend.content.models import ContentFolderDoc
from backend.document.document_manager import DocumentManager
class DocumentCleaner:
@staticmethod
async def clean_up_unused_documents():
"""
Remove unused DocumentDoc records after all content_directories are updated.
"""
# Collect all valid document IDs from all content folders
valid_document_ids: Set[str] = set()
async for folder in ContentFolderDoc.find():
for directory in folder.content_directories:
valid_document_ids.update(
filter(
None,
[
directory.cover_document_id,
directory.summary_text_new,
directory.title_text_new,
directory.content_html_new,
directory.content_text_new,
directory.content_document_id,
],
)
)
print(
f"Valid document IDs (from updated content directories): {valid_document_ids}"
)
# Retrieve all document IDs from DocumentDoc
all_document_ids = set()
async for doc in DocumentDoc.find(
{"created_by": {"$regex": "^content-service-"}}
): # Fetch all documents that are created by content-service
if doc.document_id: # Ensure document_id is not None
all_document_ids.add(doc.document_id)
print(f"All document IDs (from DocumentDoc): {all_document_ids}")
# Find unused document IDs
unused_document_ids = all_document_ids - valid_document_ids
print(f"Unused document IDs: {unused_document_ids}")
# Remove unused DocumentDoc records
if unused_document_ids:
document_manager = DocumentManager()
await document_manager.delete_documents(unused_document_ids)
else:
print("No unused DocumentDocs to clean up.")