diff --git a/ecs/jskult-webapp/.env.example b/ecs/jskult-webapp/.env.example index 10844a21..6afc42c6 100644 --- a/ecs/jskult-webapp/.env.example +++ b/ecs/jskult-webapp/.env.example @@ -23,6 +23,7 @@ AWS_REGION=ap-northeast-1 AUTHORIZE_ENDPOINT=oauth2/authorize TOKEN_ENDPOINT=oauth2/token BIO_SEARCH_RESULT_MAX_COUNT=35000 +BIO_EXCEL_RESULT_MAX_COUNT=15000 ULTMARC_SEARCH_RESULT_MAX_COUNT=500 SESSION_EXPIRE_MINUTE=20 LOG_LEVEL=DEBUG \ No newline at end of file diff --git a/ecs/jskult-webapp/Dockerfile b/ecs/jskult-webapp/Dockerfile index d7590655..aea8870f 100644 --- a/ecs/jskult-webapp/Dockerfile +++ b/ecs/jskult-webapp/Dockerfile @@ -16,4 +16,4 @@ RUN \ COPY src ./src -CMD ["gunicorn", "src.main:app", "-w", "4", "-k" ,"uvicorn.workers.UvicornWorker", "-b", "0.0.0.0:80", "--timeout", "300"] +CMD ["gunicorn", "src.main:app", "-w", "1", "-k" ,"uvicorn.workers.UvicornWorker", "-b", "0.0.0.0:80", "--timeout", "300"] diff --git a/ecs/jskult-webapp/src/controller/bio_download.py b/ecs/jskult-webapp/src/controller/bio_download.py index 454ff857..f076f28b 100644 --- a/ecs/jskult-webapp/src/controller/bio_download.py +++ b/ecs/jskult-webapp/src/controller/bio_download.py @@ -51,7 +51,7 @@ async def download_bio_data( return {'status': 'batch_processing'} # 生物由来データを検索 # 検索に使用したクエリも取得 - search_result_df, query = _search_bio_data(bio_service, search_param, download_param.user_id) + search_result_df, query = _search_bio_data(bio_service, search_param, download_param) # アクセスログを記録 bio_service.write_access_log(query, search_param, download_param.user_id, current_timestamp, download_file_name) @@ -61,7 +61,9 @@ async def download_bio_data( return {'status': 'ok', 'download_url': None} # ファイルを書き出し(Excel or CSV) - local_file_path = _write_bio_data_to_file(bio_service, download_param, search_result_df, download_file_name) + local_file_path = await _write_bio_data_to_file(bio_service, download_param, search_result_df, download_file_name) + + logger.info('ファイル書き出し完了') # ローカルファイルからS3にアップロードし、ダウンロード用URLを取得する try: @@ -99,10 +101,19 @@ async def download_bio_data( return json_response -def _search_bio_data(bio_service: BioViewService, search_param: BioModel, user_id: str) -> pd.DataFrame: +def _search_bio_data( + bio_service: BioViewService, + search_param: BioModel, + download_param: BioDownloadModel +) -> pd.DataFrame: try: # 生物由来データを検索 - search_result_df, query = bio_service.search_download_bio_data(search_param) + # Excelの場合、出力件数を絞る + if download_param.ext == 'xlsx': + search_result_df, query = bio_service.search_download_bio_data( + search_param, limitation=environment.BIO_EXCEL_RESULT_MAX_COUNT) + elif download_param.ext == 'csv': + search_result_df, query = bio_service.search_download_bio_data(search_param) except DBException as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -112,7 +123,7 @@ def _search_bio_data(bio_service: BioViewService, search_param: BioModel, user_i return search_result_df, query -def _write_bio_data_to_file( +async def _write_bio_data_to_file( bio_service: BioViewService, download_param: BioDownloadModel, df: pd.DataFrame, @@ -122,11 +133,11 @@ def _write_bio_data_to_file( local_file_path = '' if download_param.ext == 'xlsx': logger.info('今回はExcelファイルに出力する') - local_file_path = bio_service.write_excel_file( + local_file_path = await bio_service.write_excel_file( df, download_param.user_id, download_file_name=download_file_name) elif download_param.ext == 'csv': logger.info('今回はCSVファイルに出力する') - local_file_path = bio_service.write_csv_file( + local_file_path = await bio_service.write_csv_file( df, download_param.user_id, header=constants.BIO_CSV_HEADER, download_file_name=download_file_name) return local_file_path diff --git a/ecs/jskult-webapp/src/controller/master_mainte.py b/ecs/jskult-webapp/src/controller/master_mainte.py index 7543c12e..ac9fa50b 100644 --- a/ecs/jskult-webapp/src/controller/master_mainte.py +++ b/ecs/jskult-webapp/src/controller/master_mainte.py @@ -234,7 +234,7 @@ def inst_emp_csv_download_view( @router.post('/download', response_class=HTMLResponse) -async def inst_emp_csv_download( +def inst_emp_csv_download( request: Request, csv_download_form: Optional[MasterMainteCsvDlModel] = Depends(MasterMainteCsvDlModel.as_form), master_mainte_service: MasterMainteService = Depends(get_service(MasterMainteService)), diff --git a/ecs/jskult-webapp/src/depends/database.py b/ecs/jskult-webapp/src/depends/database.py index 65a8a967..2dea5dab 100644 --- a/ecs/jskult-webapp/src/depends/database.py +++ b/ecs/jskult-webapp/src/depends/database.py @@ -5,4 +5,9 @@ from src.db.database import Database def get_database(request: Request) -> Database: # medaca_routerでDB接続エンジンが初期化される - return request.app.state._db + db = getattr(request.app.state, '_db', None) + # uvicornのワーカーが起動したタイミングでは、dbがセットされていないので、ここでセットここでセットする + if db is None: + db = Database.get_instance() + setattr(request.app.state, '_db', db) + return db diff --git a/ecs/jskult-webapp/src/model/view/bio_view_model.py b/ecs/jskult-webapp/src/model/view/bio_view_model.py index bd43cd7a..7ad9e8cb 100644 --- a/ecs/jskult-webapp/src/model/view/bio_view_model.py +++ b/ecs/jskult-webapp/src/model/view/bio_view_model.py @@ -19,6 +19,7 @@ class BioViewModel(BaseModel): phm_models: list[PharmacyProductMasterModel] bio_data: Optional[list[BisDisplayModel]] = None form_data: BioModel = None + excel_max_lines: int = str(environment.BIO_EXCEL_RESULT_MAX_COUNT) def display_wholesaler_names(self): display_names = [ diff --git a/ecs/jskult-webapp/src/repositories/bio_sales_lot_repository.py b/ecs/jskult-webapp/src/repositories/bio_sales_lot_repository.py index 00dc4b68..30e2b562 100644 --- a/ecs/jskult-webapp/src/repositories/bio_sales_lot_repository.py +++ b/ecs/jskult-webapp/src/repositories/bio_sales_lot_repository.py @@ -81,12 +81,12 @@ class BioSalesLotRepository(BaseRepository): finally: self._database.disconnect() - def fetch_as_data_frame(self, parameter: BioModel): + def fetch_as_data_frame(self, parameter: BioModel, limitation: int): try: self._database.connect() logger.debug('DB参照実行') where_clause = self.__build_condition(parameter) - query = self.FETCH_SQL.format(where_clause=where_clause, limit=environment.BIO_SEARCH_RESULT_MAX_COUNT + 1) + query = self.FETCH_SQL.format(where_clause=where_clause, limit=limitation) logger.debug(f'SQL: {query}') df = self._to_data_frame(query, parameter) logger.debug(f'count= {len(df.index)}') diff --git a/ecs/jskult-webapp/src/services/bio_view_service.py b/ecs/jskult-webapp/src/services/bio_view_service.py index dd348d06..cf9961ac 100644 --- a/ecs/jskult-webapp/src/services/bio_view_service.py +++ b/ecs/jskult-webapp/src/services/bio_view_service.py @@ -1,3 +1,5 @@ +import asyncio +import functools import os import shutil from datetime import datetime @@ -69,9 +71,15 @@ class BioViewService(BaseService): return display_bio_data - def search_download_bio_data(self, search_params: BioModel): + def search_download_bio_data( + self, + search_params: BioModel, + limitation=(environment.BIO_SEARCH_RESULT_MAX_COUNT + 1) + ): # 生物由来データをダウンロードするために、DBから検索した結果をデータフレームに変換 - bio_sales_data_frame, query = self.bio_sales_repository.fetch_as_data_frame(parameter=search_params) + bio_sales_data_frame, query = self.bio_sales_repository.fetch_as_data_frame( + parameter=search_params, limitation=limitation + ) return bio_sales_data_frame, query def write_access_log( @@ -125,7 +133,9 @@ class BioViewService(BaseService): # S3にアップロード self.upload_bio_access_log_file(access_log_file_path) - def write_excel_file(self, data_frame: pd.DataFrame, user_id: str, download_file_name: str): + async def write_excel_file(self, data_frame: pd.DataFrame, user_id: str, download_file_name: str): + # 非同期処理用のイベントループを取得 + loop = asyncio.get_event_loop() # Excelに書き込み output_file_path = os.path.join(constants.BIO_TEMPORARY_FILE_DIR_PATH, download_file_name) @@ -141,11 +151,27 @@ class BioViewService(BaseService): # `sheet_name`引数を省略した場合は、「Sheet1」に書き込む。 # DF内のヘッダと連番を書き込みたくない場合、`header`と`index`をFalseに指定する。 # `startrow`と`startcol`で、Excelの書き込み位置を決定する。省略した場合はA1セルから書く。 - data_frame.to_excel(writer, header=False, index=False, startrow=1, startcol=0) + + data_frame_chunk_size = 2000 # 各部分データフレームのサイズ + is_first = True # ループの初回 + data_frame_range = len(data_frame) + # 指定行数ごとにデータを分割して処理 + for i in range(0, data_frame_range, data_frame_chunk_size): + chunk_df = data_frame.iloc[i:i + data_frame_chunk_size] + start_row = i + 1 + # 1回目の分割では、ヘッダ部分を上書きしないようにする + if is_first: + is_first = False + start_row = 1 + await loop.run_in_executor(None, functools.partial( + chunk_df.to_excel, writer, + header=False, index=False, startrow=start_row, startcol=0)) return output_file_path - def write_csv_file(self, data_frame: pd.DataFrame, user_id: str, header: list[str], download_file_name: str): + async def write_csv_file(self, data_frame: pd.DataFrame, user_id: str, header: list[str], download_file_name: str): + # 非同期処理用のイベントループを取得 + loop = asyncio.get_event_loop() # csvに書き込み output_file_path = os.path.join(constants.BIO_TEMPORARY_FILE_DIR_PATH, download_file_name) # 横長のDataFrameとするため、ヘッダーの加工処理 @@ -156,7 +182,9 @@ class BioViewService(BaseService): header_df = pd.DataFrame([header_data], index=None) output_df = pd.concat([header_df, data_frame]) # ヘッダー行としてではなく、1レコードとして出力する - output_df.to_csv(output_file_path, index=False, header=False, encoding="utf-8_sig") + await loop.run_in_executor(None, functools.partial( + output_df.to_csv, output_file_path, + index=False, header=False, encoding="utf-8_sig")) return output_file_path diff --git a/ecs/jskult-webapp/src/system_var/environment.py b/ecs/jskult-webapp/src/system_var/environment.py index ec24aeb9..a03b02d5 100644 --- a/ecs/jskult-webapp/src/system_var/environment.py +++ b/ecs/jskult-webapp/src/system_var/environment.py @@ -20,6 +20,7 @@ DB_PASSWORD = os.environ['DB_PASSWORD'] DB_SCHEMA = os.environ['DB_SCHEMA'] BIO_SEARCH_RESULT_MAX_COUNT = int(os.environ.get('BIO_SEARCH_RESULT_MAX_COUNT', 35000)) +BIO_EXCEL_RESULT_MAX_COUNT = int(os.environ.get('BIO_EXCEL_RESULT_MAX_COUNT', 15000)) ULTMARC_SEARCH_RESULT_MAX_COUNT = int(os.environ.get('ULTMARC_SEARCH_RESULT_MAX_COUNT', 500)) SESSION_EXPIRE_MINUTE = int(os.environ['SESSION_EXPIRE_MINUTE']) diff --git a/ecs/jskult-webapp/src/templates/_modal.html b/ecs/jskult-webapp/src/templates/_modal.html index 6477db10..31d7df97 100644 --- a/ecs/jskult-webapp/src/templates/_modal.html +++ b/ecs/jskult-webapp/src/templates/_modal.html @@ -1,4 +1,4 @@ -{% with +{% with icon_data = { 'info': { 'alert': 'alert-primary', @@ -18,10 +18,14 @@