fix: 生物由来Excelの出力件数制限を実装（メモリオーバー対策）

2023-08-24 15:47:43 +09:00 · 2023-08-24 15:47:43 +09:00 · 143fda6e26
commit 143fda6e26
parent 920ae17836
6 changed files with 26 additions and 8 deletions
--- a/ecs/jskult-webapp/.env.example
+++ b/ecs/jskult-webapp/.env.example
@ -23,6 +23,7 @@ AWS_REGION=ap-northeast-1
 AUTHORIZE_ENDPOINT=oauth2/authorize
 TOKEN_ENDPOINT=oauth2/token
 BIO_SEARCH_RESULT_MAX_COUNT=35000
+BIO_EXCEL_RESULT_MAX_COUNT=6500
 ULTMARC_SEARCH_RESULT_MAX_COUNT=500
 SESSION_EXPIRE_MINUTE=20
 LOG_LEVEL=DEBUG
--- a/ecs/jskult-webapp/src/controller/bio_download.py
+++ b/ecs/jskult-webapp/src/controller/bio_download.py
@ -51,7 +51,7 @@ async def download_bio_data(
        return {'status': 'batch_processing'}
    # 生物由来データを検索
    # 検索に使用したクエリも取得
-    search_result_df, query = _search_bio_data(bio_service, search_param, download_param.user_id)
+    search_result_df, query = _search_bio_data(bio_service, search_param, download_param)
    # アクセスログを記録
    bio_service.write_access_log(query, search_param, download_param.user_id, current_timestamp, download_file_name)

@ -99,10 +99,19 @@ async def download_bio_data(
    return json_response


-def _search_bio_data(bio_service: BioViewService, search_param: BioModel, user_id: str) -> pd.DataFrame:
+def _search_bio_data(
+    bio_service: BioViewService,
+    search_param: BioModel,
+    download_param: BioDownloadModel
+) -> pd.DataFrame:
    try:
        # 生物由来データを検索
-        search_result_df, query = bio_service.search_download_bio_data(search_param)
+        # Excelの場合、出力件数を絞る
+        if download_param.ext == 'xlsx':
+            search_result_df, query = bio_service.search_download_bio_data(
+                search_param, limitation=environment.BIO_EXCEL_RESULT_MAX_COUNT)
+        elif download_param.ext == 'csv':
+            search_result_df, query = bio_service.search_download_bio_data(search_param)
    except DBException as e:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
--- a/ecs/jskult-webapp/src/repositories/bio_sales_lot_repository.py
+++ b/ecs/jskult-webapp/src/repositories/bio_sales_lot_repository.py
@ -81,12 +81,12 @@ class BioSalesLotRepository(BaseRepository):
        finally:
            self._database.disconnect()

-    def fetch_as_data_frame(self, parameter: BioModel):
+    def fetch_as_data_frame(self, parameter: BioModel, limitation: int):
        try:
            self._database.connect()
            logger.debug('DB参照実行')
            where_clause = self.__build_condition(parameter)
-            query = self.FETCH_SQL.format(where_clause=where_clause, limit=environment.BIO_SEARCH_RESULT_MAX_COUNT + 1)
+            query = self.FETCH_SQL.format(where_clause=where_clause, limit=limitation)
            logger.debug(f'SQL: {query}')
            df = self._to_data_frame(query, parameter)
            logger.debug(f'count= {len(df.index)}')
--- a/ecs/jskult-webapp/src/services/bio_view_service.py
+++ b/ecs/jskult-webapp/src/services/bio_view_service.py
@ -69,9 +69,15 @@ class BioViewService(BaseService):

        return display_bio_data

-    def search_download_bio_data(self, search_params: BioModel):
+    def search_download_bio_data(
+            self,
+            search_params: BioModel,
+            limitation=(environment.BIO_SEARCH_RESULT_MAX_COUNT + 1)
+    ):
        # 生物由来データをダウンロードするために、DBから検索した結果をデータフレームに変換
-        bio_sales_data_frame, query = self.bio_sales_repository.fetch_as_data_frame(parameter=search_params)
+        bio_sales_data_frame, query = self.bio_sales_repository.fetch_as_data_frame(
+            parameter=search_params, limitation=limitation
+        )
        return bio_sales_data_frame, query

    def write_access_log(
--- a/ecs/jskult-webapp/src/system_var/environment.py
+++ b/ecs/jskult-webapp/src/system_var/environment.py
@ -20,7 +20,8 @@ DB_PASSWORD = os.environ['DB_PASSWORD']
 DB_SCHEMA = os.environ['DB_SCHEMA']

 BIO_SEARCH_RESULT_MAX_COUNT = int(os.environ.get('BIO_SEARCH_RESULT_MAX_COUNT', 35000))
-ULTMARC_SEARCH_RESULT_MAX_COUNT = int(os.environ['ULTMARC_SEARCH_RESULT_MAX_COUNT'])
+BIO_EXCEL_RESULT_MAX_COUNT = int(os.environ.get('BIO_EXCEL_RESULT_MAX_COUNT', 6500))
+ULTMARC_SEARCH_RESULT_MAX_COUNT = int(os.environ.get('ULTMARC_SEARCH_RESULT_MAX_COUNT', 500))
 SESSION_EXPIRE_MINUTE = int(os.environ['SESSION_EXPIRE_MINUTE'])

 LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO')
--- a/s3/config/jskult/task_settings/web_task_settings.env
+++ b/s3/config/jskult/task_settings/web_task_settings.env
@ -3,5 +3,6 @@ AWS_REGION=ap-northeast-1
 AUTHORIZE_ENDPOINT=oauth2/authorize
 TOKEN_ENDPOINT=oauth2/token
 BIO_SEARCH_RESULT_MAX_COUNT=35000
+BIO_EXCEL_RESULT_MAX_COUNT=6500
 SESSION_EXPIRE_MINUTE=20
 ULTMARC_SEARCH_RESULT_MAX_COUNT=500