fix: 生物由来Excelの出力件数制限を実装(メモリオーバー対策)

This commit is contained in:
shimoda.m@nds-tyo.co.jp 2023-08-24 15:47:43 +09:00
parent 920ae17836
commit 143fda6e26
6 changed files with 26 additions and 8 deletions

View File

@ -23,6 +23,7 @@ AWS_REGION=ap-northeast-1
AUTHORIZE_ENDPOINT=oauth2/authorize
TOKEN_ENDPOINT=oauth2/token
BIO_SEARCH_RESULT_MAX_COUNT=35000
BIO_EXCEL_RESULT_MAX_COUNT=6500
ULTMARC_SEARCH_RESULT_MAX_COUNT=500
SESSION_EXPIRE_MINUTE=20
LOG_LEVEL=DEBUG

View File

@ -51,7 +51,7 @@ async def download_bio_data(
return {'status': 'batch_processing'}
# 生物由来データを検索
# 検索に使用したクエリも取得
search_result_df, query = _search_bio_data(bio_service, search_param, download_param.user_id)
search_result_df, query = _search_bio_data(bio_service, search_param, download_param)
# アクセスログを記録
bio_service.write_access_log(query, search_param, download_param.user_id, current_timestamp, download_file_name)
@ -99,10 +99,19 @@ async def download_bio_data(
return json_response
def _search_bio_data(bio_service: BioViewService, search_param: BioModel, user_id: str) -> pd.DataFrame:
def _search_bio_data(
bio_service: BioViewService,
search_param: BioModel,
download_param: BioDownloadModel
) -> pd.DataFrame:
try:
# 生物由来データを検索
search_result_df, query = bio_service.search_download_bio_data(search_param)
# Excelの場合、出力件数を絞る
if download_param.ext == 'xlsx':
search_result_df, query = bio_service.search_download_bio_data(
search_param, limitation=environment.BIO_EXCEL_RESULT_MAX_COUNT)
elif download_param.ext == 'csv':
search_result_df, query = bio_service.search_download_bio_data(search_param)
except DBException as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,

View File

@ -81,12 +81,12 @@ class BioSalesLotRepository(BaseRepository):
finally:
self._database.disconnect()
def fetch_as_data_frame(self, parameter: BioModel):
def fetch_as_data_frame(self, parameter: BioModel, limitation: int):
try:
self._database.connect()
logger.debug('DB参照実行')
where_clause = self.__build_condition(parameter)
query = self.FETCH_SQL.format(where_clause=where_clause, limit=environment.BIO_SEARCH_RESULT_MAX_COUNT + 1)
query = self.FETCH_SQL.format(where_clause=where_clause, limit=limitation)
logger.debug(f'SQL: {query}')
df = self._to_data_frame(query, parameter)
logger.debug(f'count= {len(df.index)}')

View File

@ -69,9 +69,15 @@ class BioViewService(BaseService):
return display_bio_data
def search_download_bio_data(self, search_params: BioModel):
def search_download_bio_data(
self,
search_params: BioModel,
limitation=(environment.BIO_SEARCH_RESULT_MAX_COUNT + 1)
):
# 生物由来データをダウンロードするために、DBから検索した結果をデータフレームに変換
bio_sales_data_frame, query = self.bio_sales_repository.fetch_as_data_frame(parameter=search_params)
bio_sales_data_frame, query = self.bio_sales_repository.fetch_as_data_frame(
parameter=search_params, limitation=limitation
)
return bio_sales_data_frame, query
def write_access_log(

View File

@ -20,7 +20,8 @@ DB_PASSWORD = os.environ['DB_PASSWORD']
DB_SCHEMA = os.environ['DB_SCHEMA']
BIO_SEARCH_RESULT_MAX_COUNT = int(os.environ.get('BIO_SEARCH_RESULT_MAX_COUNT', 35000))
ULTMARC_SEARCH_RESULT_MAX_COUNT = int(os.environ['ULTMARC_SEARCH_RESULT_MAX_COUNT'])
BIO_EXCEL_RESULT_MAX_COUNT = int(os.environ.get('BIO_EXCEL_RESULT_MAX_COUNT', 6500))
ULTMARC_SEARCH_RESULT_MAX_COUNT = int(os.environ.get('ULTMARC_SEARCH_RESULT_MAX_COUNT', 500))
SESSION_EXPIRE_MINUTE = int(os.environ['SESSION_EXPIRE_MINUTE'])
LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO')

View File

@ -3,5 +3,6 @@ AWS_REGION=ap-northeast-1
AUTHORIZE_ENDPOINT=oauth2/authorize
TOKEN_ENDPOINT=oauth2/token
BIO_SEARCH_RESULT_MAX_COUNT=35000
BIO_EXCEL_RESULT_MAX_COUNT=6500
SESSION_EXPIRE_MINUTE=20
ULTMARC_SEARCH_RESULT_MAX_COUNT=500