Removing cache, need further tuning
This commit is contained in:
parent
1972a5758b
commit
550c49f3ec
@ -1,3 +1,4 @@
|
|||||||
|
import logging
|
||||||
from common.config.app_settings import app_settings
|
from common.config.app_settings import app_settings
|
||||||
from beanie import init_beanie
|
from beanie import init_beanie
|
||||||
from motor.motor_asyncio import AsyncIOMotorClient
|
from motor.motor_asyncio import AsyncIOMotorClient
|
||||||
@ -8,6 +9,9 @@ def register(app):
|
|||||||
app.debug = "auth_mongo_debug"
|
app.debug = "auth_mongo_debug"
|
||||||
app.title = "auth_mongo_name"
|
app.title = "auth_mongo_name"
|
||||||
|
|
||||||
|
# Configure logging for pymongo
|
||||||
|
logging.getLogger("pymongo").setLevel(logging.WARNING) # Suppress DEBUG logs
|
||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
async def start_database():
|
async def start_database():
|
||||||
await initiate_database()
|
await initiate_database()
|
||||||
|
|||||||
@ -1,35 +1,75 @@
|
|||||||
from typing import Dict, List, Optional
|
import asyncio
|
||||||
|
from typing import List, Optional
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
from .constants import ContentSource, ContentMediaType, ContentDataFormat
|
|
||||||
from .models import ContentDirectory, ContentFolderDoc
|
from .models import ContentDirectory, ContentFolderDoc
|
||||||
from common.config.app_settings import app_settings
|
|
||||||
from backend.document.document_manager import DocumentManager
|
|
||||||
from common.constants.region import UserRegion
|
from common.constants.region import UserRegion
|
||||||
|
from backend.document.document_manager import DocumentManager
|
||||||
from .content_sharepoint_manager import ContentSharePointManager
|
from .content_sharepoint_manager import ContentSharePointManager
|
||||||
import pytz
|
from backend.content.constants import ContentSource
|
||||||
|
|
||||||
|
|
||||||
class ContentService:
|
class ContentService:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
pass
|
self.sharepoint_manager = ContentSharePointManager()
|
||||||
|
self.expiry_time = timedelta(hours=24)
|
||||||
|
|
||||||
async def retrieve_directories_for_folder(
|
async def retrieve_directories_for_folder(
|
||||||
self, folder_name: str, region: UserRegion
|
self, folder_name: str, region: UserRegion
|
||||||
) -> List[ContentDirectory]:
|
) -> List[ContentDirectory]:
|
||||||
|
# Check cache
|
||||||
|
folder_key = f"{folder_name}/{region.name}"
|
||||||
folder = await ContentFolderDoc.find_one(
|
folder = await ContentFolderDoc.find_one(
|
||||||
ContentFolderDoc.folder_name == folder_name,
|
ContentFolderDoc.folder_name == folder_key,
|
||||||
ContentFolderDoc.region == region
|
ContentFolderDoc.region == region,
|
||||||
)
|
)
|
||||||
|
|
||||||
if folder is None or folder.valid_thru.replace(tzinfo=timezone.utc) < datetime.now(timezone.utc):
|
# Refresh cache if expired or not present
|
||||||
await ContentSharePointManager().retrieve_directories_for_folder(folder_name=folder_name, region=region)
|
if folder is None or folder.valid_thru.replace(
|
||||||
|
tzinfo=timezone.utc
|
||||||
|
) < datetime.now(timezone.utc):
|
||||||
|
folder = await self.__refresh_folder_from_sharepoint(folder_name, region)
|
||||||
|
|
||||||
|
return folder.content_directories if folder else []
|
||||||
|
|
||||||
|
async def __refresh_folder_from_sharepoint(
|
||||||
|
self, folder_name: str, region: UserRegion
|
||||||
|
) -> ContentFolderDoc:
|
||||||
|
content_folder_name = f"{folder_name}/{region.name}"
|
||||||
|
sp_folders = self.sharepoint_manager.list_sub_folders(content_folder_name)
|
||||||
|
current_time = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
# Create or update folder metadata
|
||||||
folder = await ContentFolderDoc.find_one(
|
folder = await ContentFolderDoc.find_one(
|
||||||
ContentFolderDoc.folder_name == folder_name,
|
ContentFolderDoc.folder_name == content_folder_name
|
||||||
ContentFolderDoc.region == region
|
|
||||||
)
|
)
|
||||||
|
if folder is None:
|
||||||
|
folder = ContentFolderDoc(
|
||||||
|
folder_name=content_folder_name,
|
||||||
|
content_directories=[],
|
||||||
|
udpate_time=current_time,
|
||||||
|
update_source=ContentSource.SHAREPOINT,
|
||||||
|
valid_thru=current_time + self.expiry_time,
|
||||||
|
region=region,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
folder.content_directories.clear()
|
||||||
|
|
||||||
return folder.content_directories if folder else None
|
# Process subfolders in parallel
|
||||||
|
tasks = [
|
||||||
|
self.sharepoint_manager.process_subfolder(content_folder_name, sp_folder)
|
||||||
|
for sp_folder in sp_folders
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
folder.content_directories = await asyncio.gather(*tasks)
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Failed to process subfolders: {e}")
|
||||||
|
|
||||||
|
# Save folder metadata
|
||||||
|
folder.udpate_time = current_time
|
||||||
|
folder.valid_thru = current_time + self.expiry_time
|
||||||
|
await folder.save()
|
||||||
|
|
||||||
|
return folder
|
||||||
|
|
||||||
async def retrieve_content_as_media_data(self, document_id: str) -> Optional[str]:
|
async def retrieve_content_as_media_data(self, document_id: str) -> Optional[str]:
|
||||||
document_manager = DocumentManager()
|
document_manager = DocumentManager()
|
||||||
|
|||||||
@ -1,14 +1,12 @@
|
|||||||
from typing import Dict, List, Optional
|
import asyncio
|
||||||
from common.config.app_settings import app_settings
|
from datetime import datetime, timezone, timedelta
|
||||||
from datetime import datetime, timedelta, timezone
|
from typing import List, Dict
|
||||||
from backend.sharepoint.sharepoint_graph_client import SharePointGraphClient
|
from backend.sharepoint.sharepoint_graph_client import SharePointGraphClient
|
||||||
from common.constants.region import UserRegion
|
|
||||||
from backend.document.document_manager import DocumentManager
|
from backend.document.document_manager import DocumentManager
|
||||||
from backend.content.constants import (
|
|
||||||
ContentSource,
|
|
||||||
ContentFileConstants,
|
|
||||||
)
|
|
||||||
from backend.content.models import ContentDirectory, ContentFolderDoc
|
from backend.content.models import ContentDirectory, ContentFolderDoc
|
||||||
|
from backend.content.constants import ContentFileConstants, ContentSource
|
||||||
|
|
||||||
|
from common.config.app_settings import app_settings
|
||||||
|
|
||||||
|
|
||||||
class ContentSharePointManager:
|
class ContentSharePointManager:
|
||||||
@ -25,127 +23,62 @@ class ContentSharePointManager:
|
|||||||
)
|
)
|
||||||
self.share_point_file_expiry = timedelta(hours=24)
|
self.share_point_file_expiry = timedelta(hours=24)
|
||||||
|
|
||||||
def __generate_created__by__(self, folder_name):
|
def list_sub_folders(self, folder_name: str) -> List[dict]:
|
||||||
return "content-service-" + folder_name.replace("/", "-").lower()
|
"""
|
||||||
|
Fetches the subfolders under the specified folder from SharePoint.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Use SharePointGraphClient to list subfolders
|
||||||
|
return self.sharepoint_client.list_sub_folders(folder_name)
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Failed to list subfolders for {folder_name}: {e}")
|
||||||
|
|
||||||
async def retrieve_directories_for_folder(
|
async def process_subfolder(
|
||||||
self, folder_name: str, region: UserRegion
|
self, content_folder_name: str, sp_folder: dict
|
||||||
):
|
) -> ContentDirectory:
|
||||||
content_folder_name = folder_name + "/" + region.name
|
content_directory = ContentDirectory(content_name=sp_folder["name"])
|
||||||
sp_folders = self.sharepoint_client.list_sub_folders(content_folder_name)
|
|
||||||
current_time = datetime.now(timezone.utc)
|
|
||||||
|
|
||||||
folder = await ContentFolderDoc.find_one(
|
|
||||||
ContentFolderDoc.folder_name == content_folder_name
|
|
||||||
)
|
|
||||||
|
|
||||||
if folder is None:
|
|
||||||
folder = ContentFolderDoc(
|
|
||||||
folder_name=folder_name,
|
|
||||||
content_directories=[],
|
|
||||||
udpate_time=current_time,
|
|
||||||
update_source=ContentSource.SHAREPOINT,
|
|
||||||
valid_thru=current_time + self.share_point_file_expiry,
|
|
||||||
region=region,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
folder.content_directories.clear()
|
|
||||||
|
|
||||||
for sp_folder in sp_folders:
|
|
||||||
content_directory = ContentDirectory(
|
|
||||||
content_name=sp_folder["name"],
|
|
||||||
cover_document_id=None,
|
|
||||||
summary_text=None,
|
|
||||||
title_text=None,
|
|
||||||
content_link=None,
|
|
||||||
content_document_id=None,
|
|
||||||
)
|
|
||||||
sp_files = self.sharepoint_client.list_files(
|
sp_files = self.sharepoint_client.list_files(
|
||||||
content_folder_name + "/" + sp_folder["name"]
|
f"{content_folder_name}/{sp_folder['name']}"
|
||||||
)
|
|
||||||
for sp_file in sp_files:
|
|
||||||
if (
|
|
||||||
sp_file["name"].lower()
|
|
||||||
== ContentFileConstants.COVER_FILE_NAME.lower()
|
|
||||||
):
|
|
||||||
cover_file_content = self.sharepoint_client.get_file_content(
|
|
||||||
sp_file["id"]
|
|
||||||
)
|
|
||||||
cover_document_manager = DocumentManager()
|
|
||||||
file_name = sp_file["name"].lower()
|
|
||||||
created_by = self.__generate_created__by__(folder_name=folder_name)
|
|
||||||
content_directory.cover_document_id = (
|
|
||||||
await cover_document_manager.save_document_file(
|
|
||||||
created_by, file_name, cover_file_content
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
elif (
|
# Process files in parallel
|
||||||
sp_file["name"].lower()
|
tasks = [
|
||||||
== ContentFileConstants.SUMMARY_FILE_NAME.lower()
|
self.__process_file(file, content_directory, content_folder_name)
|
||||||
|
for file in sp_files
|
||||||
|
]
|
||||||
|
await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
return content_directory
|
||||||
|
|
||||||
|
async def __process_file(
|
||||||
|
self, sp_file: dict, content_directory: ContentDirectory, folder_name: str
|
||||||
):
|
):
|
||||||
content_directory.summary_text = (
|
document_manager = DocumentManager()
|
||||||
self.sharepoint_client.get_file_content(sp_file["id"])
|
file_content = self.sharepoint_client.get_file_content(sp_file["id"])
|
||||||
|
|
||||||
|
if sp_file["name"].lower() == ContentFileConstants.COVER_FILE_NAME.lower():
|
||||||
|
content_directory.cover_document_id = (
|
||||||
|
await document_manager.save_document_file(
|
||||||
|
self.__generate_created_by(folder_name),
|
||||||
|
sp_file["name"],
|
||||||
|
file_content,
|
||||||
)
|
)
|
||||||
elif (
|
|
||||||
sp_file["name"].lower()
|
|
||||||
== ContentFileConstants.TITLE_FILE_NAME.lower()
|
|
||||||
):
|
|
||||||
content_directory.title_text = (
|
|
||||||
self.sharepoint_client.get_file_content(sp_file["id"])
|
|
||||||
)
|
|
||||||
elif (
|
|
||||||
sp_file["name"].lower()
|
|
||||||
== ContentFileConstants.CONTENT_LINK_FILE_NAME.lower()
|
|
||||||
):
|
|
||||||
content_directory.content_link = (
|
|
||||||
self.sharepoint_client.get_file_content(sp_file["id"])
|
|
||||||
)
|
|
||||||
elif (
|
|
||||||
sp_file["name"].lower()
|
|
||||||
== ContentFileConstants.CONTENT_HTML_FILE_NAME.lower()
|
|
||||||
):
|
|
||||||
content_directory.content_html = (
|
|
||||||
self.sharepoint_client.get_file_content(sp_file["id"])
|
|
||||||
)
|
|
||||||
elif (
|
|
||||||
sp_file["name"].lower()
|
|
||||||
== ContentFileConstants.CONTENT_TEXT_FILE_NAME.lower()
|
|
||||||
):
|
|
||||||
content_directory.content_text = (
|
|
||||||
self.sharepoint_client.get_file_content(sp_file["id"])
|
|
||||||
)
|
)
|
||||||
|
elif sp_file["name"].lower() == ContentFileConstants.SUMMARY_FILE_NAME.lower():
|
||||||
|
content_directory.summary_text = file_content
|
||||||
|
elif sp_file["name"].lower() == ContentFileConstants.TITLE_FILE_NAME.lower():
|
||||||
|
content_directory.title_text = file_content
|
||||||
elif (
|
elif (
|
||||||
sp_file["name"].lower()
|
sp_file["name"].lower()
|
||||||
== ContentFileConstants.CONTENT_PDF_FILE_NAME.lower()
|
== ContentFileConstants.CONTENT_PDF_FILE_NAME.lower()
|
||||||
):
|
):
|
||||||
content_file_content = self.sharepoint_client.get_file_content(
|
|
||||||
sp_file["id"]
|
|
||||||
)
|
|
||||||
content_document_manager = DocumentManager()
|
|
||||||
file_name = sp_file["name"]
|
|
||||||
created_by = self.__generate_created__by__(folder_name=folder_name)
|
|
||||||
content_directory.content_document_id = (
|
content_directory.content_document_id = (
|
||||||
await content_document_manager.save_document_file(
|
await document_manager.save_document_file(
|
||||||
created_by, file_name, content_file_content
|
self.__generate_created_by(folder_name),
|
||||||
|
sp_file["name"],
|
||||||
|
file_content,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
folder.content_directories.append(content_directory)
|
def __generate_created_by(self, folder_name: str) -> str:
|
||||||
|
return f"content-service-{folder_name.replace('/', '-').lower()}"
|
||||||
folder.udpate_time = current_time
|
|
||||||
folder.update_source = ContentSource.SHAREPOINT
|
|
||||||
folder.valid_thru = current_time + self.share_point_file_expiry
|
|
||||||
await folder.save()
|
|
||||||
|
|
||||||
async def retrieve_directorys_for_all_folders(self):
|
|
||||||
current_time = datetime.now(timezone.utc)
|
|
||||||
folders = await ContentFolderDoc.find(
|
|
||||||
ContentFolderDoc.update_source == ContentSource.SHAREPOINT,
|
|
||||||
ContentFolderDoc.valid_thru < current_time,
|
|
||||||
).to_list()
|
|
||||||
|
|
||||||
for folder in folders:
|
|
||||||
await self.retrieve_directories_for_folder(
|
|
||||||
folder.folder_name, folder.region
|
|
||||||
)
|
|
||||||
|
|||||||
@ -1,30 +1,37 @@
|
|||||||
from common.config.app_settings import app_settings
|
from common.config.app_settings import app_settings
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
|
||||||
class DocumentManager:
|
class DocumentManager:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.storage_service_api_base = app_settings.CENTRAL_STORAGE_WEBAPI_URL_BASE.rstrip('/') + '/'
|
self.storage_service_api_base = (
|
||||||
|
app_settings.CENTRAL_STORAGE_WEBAPI_URL_BASE.rstrip("/") + "/"
|
||||||
|
)
|
||||||
|
|
||||||
async def retrieve_document_info(self, document_id: str):
|
async def retrieve_document_info(self, document_id: str):
|
||||||
api_url = self.storage_service_api_base + "retrieve_document_info/" + document_id
|
api_url = (
|
||||||
|
self.storage_service_api_base + "retrieve_document_info/" + document_id
|
||||||
|
)
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.get(api_url)
|
response = await client.get(api_url)
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
async def retrieve_document_as_http_media(self, document_id: str):
|
async def retrieve_document_as_http_media(self, document_id: str):
|
||||||
api_url = self.storage_service_api_base + "read-document-as-http-media/" + document_id
|
api_url = (
|
||||||
|
self.storage_service_api_base + "read-document-as-http-media/" + document_id
|
||||||
|
)
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.get(api_url)
|
response = await client.get(api_url)
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
async def save_document_file(self,associated_with:str, name:str,blob:bytes)->str:
|
async def save_document_file(
|
||||||
|
self, associated_with: str, name: str, blob: bytes
|
||||||
|
) -> str:
|
||||||
api_url = self.storage_service_api_base + "upload-file"
|
api_url = self.storage_service_api_base + "upload-file"
|
||||||
files = {'file':(name,blob)}
|
files = {"file": (name, blob)}
|
||||||
|
print("this is files", files)
|
||||||
async with httpx.AsyncClient() as client:
|
async with httpx.AsyncClient() as client:
|
||||||
response = await client.post(api_url,
|
response = await client.post(
|
||||||
data={
|
api_url, data={"associated_with": associated_with}, files=files
|
||||||
'associated_with':associated_with
|
|
||||||
},
|
|
||||||
files=files
|
|
||||||
)
|
)
|
||||||
return response.json()['document_id']
|
return response.json()["document_id"]
|
||||||
|
|||||||
@ -3,7 +3,6 @@ from fastapi.encoders import jsonable_encoder
|
|||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from backend.content.content_service import ContentService
|
from backend.content.content_service import ContentService
|
||||||
from fastapi_cache.decorator import cache # Import the cache decorator
|
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@ -15,10 +14,6 @@ router = APIRouter()
|
|||||||
description="retrieve content as media data which can be posted to web page.",
|
description="retrieve content as media data which can be posted to web page.",
|
||||||
response_description="Media data",
|
response_description="Media data",
|
||||||
)
|
)
|
||||||
@cache(
|
|
||||||
expire=300, # Cache the result for 5 minutes
|
|
||||||
key_builder=lambda func, *args, **kwargs: f"content-media:{kwargs.get('document_id', args[0] if len(args) > 0 else '')}",
|
|
||||||
)
|
|
||||||
async def retrieve_content_as_media_data(document_id: str):
|
async def retrieve_content_as_media_data(document_id: str):
|
||||||
result = await ContentService().retrieve_content_as_media_data(document_id)
|
result = await ContentService().retrieve_content_as_media_data(document_id)
|
||||||
return JSONResponse(content=jsonable_encoder(result))
|
return JSONResponse(content=jsonable_encoder(result))
|
||||||
|
|||||||
@ -3,7 +3,6 @@ from fastapi.encoders import jsonable_encoder
|
|||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from backend.content.content_service import ContentService
|
from backend.content.content_service import ContentService
|
||||||
from common.constants.region import UserRegion
|
from common.constants.region import UserRegion
|
||||||
from fastapi_cache.decorator import cache # Import the cache decorator
|
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
@ -15,10 +14,7 @@ router = APIRouter()
|
|||||||
description="retrieve directories for a folder, such as testimony, legal, etc",
|
description="retrieve directories for a folder, such as testimony, legal, etc",
|
||||||
response_description="The list of directories under the folder",
|
response_description="The list of directories under the folder",
|
||||||
)
|
)
|
||||||
@cache(
|
# @cache(expire=300) # Cache results for 5 minutes
|
||||||
expire=300, # Cache for 300 seconds
|
|
||||||
key_builder=lambda func, *args, **kwargs: f"folder:{kwargs.get('folder_name', args[0] if len(args) > 0 else '')}:region:{kwargs.get('region', args[1] if len(args) > 1 else '')}",
|
|
||||||
)
|
|
||||||
async def retrieve_directories_for_folder(folder_name: str, region: UserRegion):
|
async def retrieve_directories_for_folder(folder_name: str, region: UserRegion):
|
||||||
result = await ContentService().retrieve_directories_for_folder(folder_name, region)
|
result = await ContentService().retrieve_directories_for_folder(folder_name, region)
|
||||||
return JSONResponse(content=jsonable_encoder(result))
|
return JSONResponse(content=jsonable_encoder(result))
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user