From 2f1b42705a5f374bbb3551de70a036e098f114fa Mon Sep 17 00:00:00 2001 From: "x.azuma.m@nds-tyo.co.jp" Date: Sat, 13 May 2023 01:06:11 +0900 Subject: [PATCH] =?UTF-8?q?=E5=8F=97=E9=A0=98=E3=83=95=E3=82=A1=E3=82=A4?= =?UTF-8?q?=E3=83=AB=E3=81=AFtar.gz=E5=BD=A2=E5=BC=8F=E3=81=AA=E3=81=AE?= =?UTF-8?q?=E3=81=A7=E8=A7=A3=E5=87=8D=E3=81=97=E3=81=A6=E3=81=8B=E3=82=89?= =?UTF-8?q?tsv=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB=E3=82=92LOAD=E3=81=99?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E5=AE=9F=E8=A3=85=E6=BC=8F=E3=82=8C?= =?UTF-8?q?=E3=82=92=E8=A3=9C=E5=AE=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ecs/jskult-batch-daily/src/aws/s3.py | 13 +++++++++++++ .../src/batch/vjsk/vjsk_importer.py | 7 +++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/ecs/jskult-batch-daily/src/aws/s3.py b/ecs/jskult-batch-daily/src/aws/s3.py index 62111409..2e98eaac 100644 --- a/ecs/jskult-batch-daily/src/aws/s3.py +++ b/ecs/jskult-batch-daily/src/aws/s3.py @@ -1,5 +1,7 @@ import io +import os import os.path as path +import tarfile import tempfile import boto3 @@ -127,3 +129,14 @@ class VjskReceiveBucket(S3Bucket): self._s3_client.download_file(self._bucket_name, data_filename, f) f.seek(0) return temporary_file_path + + def unzip_data_file(self, filename: str): + ret = [] + with tarfile.open(filename) as tar: + temp_dir = os.path.dirname(filename) + tar.extractall(path=temp_dir) + extracted_files = tar.getnames() + for extracted_file in extracted_files: + file = os.path.join(temp_dir, extracted_file) + ret.append(file) + return ret diff --git a/ecs/jskult-batch-daily/src/batch/vjsk/vjsk_importer.py b/ecs/jskult-batch-daily/src/batch/vjsk/vjsk_importer.py index f2da3ed9..2737ffca 100644 --- a/ecs/jskult-batch-daily/src/batch/vjsk/vjsk_importer.py +++ b/ecs/jskult-batch-daily/src/batch/vjsk/vjsk_importer.py @@ -171,14 +171,17 @@ def _import_file_to_db(): local_file_path = vjsk_recv_bucket.download_data_file(file_name) logger.debug(f"download s3 file done : {file_name}") - # TODO: 受領ファイルはtar.gzなので、ローカルストレージ上で解凍する + # ローカルストレージにdownloadした受領ファイル(tar.gz)を解凍する + unzip_file_path = vjsk_recv_bucket.unzip_data_file(local_file_path) + logger.debug(f"unzip done : {unzip_file_path}") # データファイル名に該当する辞書アクセス用のキーを取得する key = vjsk_mapper.get_condkey_by_s3_file_path(file_name) # 想定されたデータファイルであれば辞書登録する if key is not None: - target_dict[key] = {"condkey": key, "src_file_path": local_file_path} + # ※受領ファイル(tar.gz)の書庫構成はtsvファイルが1つだけの前提 + target_dict[key] = {"condkey": key, "src_file_path": unzip_file_path[0]} logger.debug(f'取込対象データファイル辞書{target_dict}') # DB登録 卸在庫データファイル(卸在庫データ処理対象日のみ実施)