fix(cleaner): update document cleaner job

This commit is contained in:
jetli 2025-04-26 19:53:44 -07:00
parent 91a893a433
commit dd96819709

View File

@ -1,3 +1,4 @@
from datetime import datetime, timedelta
from typing import Set
from backend.content.models import DocumentDoc
from backend.content.models import ContentFolderDoc
@ -32,11 +33,16 @@ class DocumentCleaner:
print(
f"Valid document IDs (from updated content directories): {valid_document_ids}"
)
# Calculate the time 1 day ago
time_1_day_ago = datetime.now(datetime.UTC) - timedelta(days=1)
# Retrieve all document IDs from DocumentDoc
all_document_ids = set()
async for doc in DocumentDoc.find(
{"created_by": {"$regex": "^content-service-"}}
{
"created_by": {"$regex": "^content-service-"},
"create_time": {"$lt": time_1_day_ago},
}
): # Fetch all documents that are created by content-service
if doc.document_id: # Ensure document_id is not None
all_document_ids.add(doc.document_id)