受領ファイルはtar.gz形式なので解凍してからtsvファイルをLOADするよう実装漏れを補完

2023-05-13 01:06:11 +09:00 · 2023-05-13 01:06:11 +09:00 · 2f1b42705a
commit 2f1b42705a
parent 5a32b27e26
2 changed files with 18 additions and 2 deletions
--- a/ecs/jskult-batch-daily/src/aws/s3.py
+++ b/ecs/jskult-batch-daily/src/aws/s3.py
@ -1,5 +1,7 @@
 import io
+import os
 import os.path as path
+import tarfile
 import tempfile

 import boto3
@ -127,3 +129,14 @@ class VjskReceiveBucket(S3Bucket):
            self._s3_client.download_file(self._bucket_name, data_filename, f)
            f.seek(0)
        return temporary_file_path
+
+    def unzip_data_file(self, filename: str):
+        ret = []
+        with tarfile.open(filename) as tar:
+            temp_dir = os.path.dirname(filename)
+            tar.extractall(path=temp_dir)
+            extracted_files = tar.getnames()
+            for extracted_file in extracted_files:
+                file = os.path.join(temp_dir, extracted_file)
+                ret.append(file)
+        return ret
--- a/ecs/jskult-batch-daily/src/batch/vjsk/vjsk_importer.py
+++ b/ecs/jskult-batch-daily/src/batch/vjsk/vjsk_importer.py
@ -171,14 +171,17 @@ def _import_file_to_db():
        local_file_path = vjsk_recv_bucket.download_data_file(file_name)
        logger.debug(f"download s3 file done : {file_name}")

-        # TODO: 受領ファイルはtar.gzなので、ローカルストレージ上で解凍する
+        # ローカルストレージにdownloadした受領ファイル(tar.gz)を解凍する
+        unzip_file_path = vjsk_recv_bucket.unzip_data_file(local_file_path)
+        logger.debug(f"unzip done : {unzip_file_path}")

        # データファイル名に該当する辞書アクセス用のキーを取得する
        key = vjsk_mapper.get_condkey_by_s3_file_path(file_name)

        # 想定されたデータファイルであれば辞書登録する
        if key is not None:
-            target_dict[key] = {"condkey": key, "src_file_path": local_file_path}
+            # ※受領ファイル(tar.gz)の書庫構成はtsvファイルが1つだけの前提
+            target_dict[key] = {"condkey": key, "src_file_path": unzip_file_path[0]}
    logger.debug(f'取込対象データファイル辞書{target_dict}')

    # DB登録　卸在庫データファイル(卸在庫データ処理対象日のみ実施)