110 lines
4.0 KiB
Python
110 lines
4.0 KiB
Python
from common.config.app_settings import app_settings
|
|
from backend.content.models import DocumentDoc
|
|
from backend.document.models import BasicProfileDoc
|
|
from datetime import datetime, timezone
|
|
import httpx
|
|
import base64
|
|
|
|
|
|
class DocumentManager:
|
|
def __init__(self):
|
|
self.storage_service_api_base = (
|
|
app_settings.CENTRAL_STORAGE_WEBAPI_URL_BASE.rstrip("/") + "/"
|
|
)
|
|
|
|
async def retrieve_document_info(self, document_id: str):
|
|
api_url = (
|
|
self.storage_service_api_base + "retrieve_document_info/" + document_id
|
|
)
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(api_url)
|
|
return response.json()
|
|
|
|
async def retrieve_document_as_http_media(self, document_id: str):
|
|
api_url = (
|
|
self.storage_service_api_base + "read-document-as-http-media/" + document_id
|
|
)
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.get(api_url)
|
|
return response.json()
|
|
|
|
async def save_document_file(
|
|
self, associated_with: str, name: str, blob: bytes
|
|
) -> str:
|
|
api_url = self.storage_service_api_base + "upload-file"
|
|
files = {"file": (name, blob)}
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.post(
|
|
api_url, data={"associated_with": associated_with}, files=files
|
|
)
|
|
return response.json()["document_id"]
|
|
|
|
async def delete_documents(self, document_ids: list):
|
|
api_url = self.storage_service_api_base + "delete-file"
|
|
document_ids_list = list(document_ids)
|
|
async with httpx.AsyncClient() as client:
|
|
response = await client.request(
|
|
method="DELETE",
|
|
url=api_url,
|
|
json={"document_ids": document_ids_list},
|
|
)
|
|
if response.status_code == 202:
|
|
print("Deletion request has been queued successfully.")
|
|
else:
|
|
print(f"Failed to queue deletion: {response.text}")
|
|
|
|
async def cleanup_document(self):
|
|
# Get today's date at midnight (UTC)
|
|
today_start = datetime.now(timezone.utc).replace(hour=0, minute=0, second=0, microsecond=0)
|
|
# Corrected query with regex
|
|
documents = await DocumentDoc.find(
|
|
{"created_by": {"$regex": "^content-service-"},
|
|
"create_time": {"$lt": today_start}}
|
|
).to_list()
|
|
|
|
if documents:
|
|
print(
|
|
f"Found {len(documents)} documents to modify and delete: {[doc.id for doc in documents]}"
|
|
)
|
|
else:
|
|
print("No documents found to modify or delete.")
|
|
|
|
for document in documents:
|
|
print(
|
|
f"Deleting document {document.id} - created_by: {document.created_by}"
|
|
)
|
|
await document.delete()
|
|
|
|
print("Modification and deletion completed.")
|
|
|
|
async def backfill_photo_id(self):
|
|
profiles = await BasicProfileDoc.find().to_list()
|
|
|
|
if profiles:
|
|
print(
|
|
f"Found {len(profiles)} documents to modify and delete: {[profile.id for profile in profiles]}"
|
|
)
|
|
else:
|
|
print("No documents found to modify or delete.")
|
|
|
|
for profile in profiles:
|
|
print(f"Updating document {profile.id}")
|
|
if (
|
|
profile.photo_id is None or profile.photo_id == ""
|
|
) and profile.photo.base64:
|
|
# Strip the metadata prefix (e.g., 'data:image/png;base64,')
|
|
_, encoded_data = profile.photo.base64.split(",", 1)
|
|
|
|
# Decode the Base64 string into bytes
|
|
blob_data = base64.b64decode(encoded_data)
|
|
photo_id = await self.save_document_file(
|
|
profile.user_id,
|
|
profile.photo.filename,
|
|
blob_data,
|
|
)
|
|
print("this is photo_id", photo_id)
|
|
profile.photo_id = photo_id
|
|
await profile.save()
|
|
|
|
print("User photo id backfill completed.")
|