diff --git a/ecs/jskult-batch-daily/.env.example b/ecs/jskult-batch-daily/.env.example index 050356cf..500f843d 100644 --- a/ecs/jskult-batch-daily/.env.example +++ b/ecs/jskult-batch-daily/.env.example @@ -16,6 +16,8 @@ VJSK_DATA_SEND_FOLDER=send VJSK_DATA_RECEIVE_FOLDER=recv VJSK_DATA_BUCKET=************* JSKULT_CONFIG_CALENDAR_WHOLESALER_STOCK_FILE_NAME=jskult_wholesaler_stock_input_day_list.txt +JSKULT_CONFIG_CONVERT_FOLDER=jskult/convert +JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME=ultmarc_hex_convert_config.json # 連携データ抽出期間 SALES_LAUNDERING_EXTRACT_DATE_PERIOD=0 # 洗替対象テーブル名 diff --git a/ecs/jskult-batch-daily/src/aws/s3.py b/ecs/jskult-batch-daily/src/aws/s3.py index 3a5fe240..6203868d 100644 --- a/ecs/jskult-batch-daily/src/aws/s3.py +++ b/ecs/jskult-batch-daily/src/aws/s3.py @@ -103,6 +103,16 @@ class ConfigBucket(S3Bucket): f.seek(0) return temporary_file_path + def download_ultmarc_hex_convert_config(self): + # 一時ファイルとして保存する + temporary_dir = tempfile.mkdtemp() + temporary_file_path = path.join(temporary_dir, environment.JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME) + hex_convert_config_key = f'{environment.JSKULT_CONFIG_CONVERT_FOLDER}/{environment.JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME}' + with open(temporary_file_path, mode='wb') as f: + self._s3_client.download_file(self._bucket_name, hex_convert_config_key, f) + f.seek(0) + return temporary_file_path + class JskUltBackupBucket(S3Bucket): _bucket_name = environment.JSKULT_BACKUP_BUCKET diff --git a/ecs/jskult-batch-daily/src/batch/ultmarc/datfile.py b/ecs/jskult-batch-daily/src/batch/ultmarc/datfile.py index 2631eaff..6a127188 100644 --- a/ecs/jskult-batch-daily/src/batch/ultmarc/datfile.py +++ b/ecs/jskult-batch-daily/src/batch/ultmarc/datfile.py @@ -54,7 +54,7 @@ class DatFile: DatFile: このクラスのインスタンス """ # cp932(Shift-JIS Windows拡張)でファイルを読み込む - file = open(local_file_path, encoding='cp932') + file = open(local_file_path, encoding='cp932', errors='replace') instance = cls(file) file.close() return instance diff --git a/ecs/jskult-batch-daily/src/batch/ultmarc/ultmarc_process.py b/ecs/jskult-batch-daily/src/batch/ultmarc/ultmarc_process.py index 0e2ffe6a..09733854 100644 --- a/ecs/jskult-batch-daily/src/batch/ultmarc/ultmarc_process.py +++ b/ecs/jskult-batch-daily/src/batch/ultmarc/ultmarc_process.py @@ -1,11 +1,12 @@ """アルトマークデータ処理""" +import json +import subprocess from datetime import datetime -from src.aws.s3 import UltmarcBucket +from src.aws.s3 import ConfigBucket, UltmarcBucket from src.batch.common.batch_context import BatchContext from src.batch.ultmarc.datfile import DatFile - from src.batch.ultmarc.utmp_tables.ultmarc_table_mapper_factory import \ UltmarcTableMapperFactory from src.db.database import Database @@ -15,6 +16,7 @@ from src.system_var import constants logger = get_logger('アルトマークデータ処理') ultmarc_bucket = UltmarcBucket() +config_bucket = ConfigBucket() batch_context = BatchContext.get_instance() @@ -48,7 +50,11 @@ def exec_import(): logger.info(f"{dat_file_name}を取り込みます") # datファイルをダウンロード local_file_path = ultmarc_bucket.download_dat_file(dat_file_name) - dat_file = DatFile.from_path(local_file_path) + # 文字コード変換設定ファイルをダウンロード + convert_config_file_path = config_bucket.download_ultmarc_hex_convert_config() + # 文字コードを変換 + converted_file_path = _convert_character_hex(local_file_path, convert_config_file_path) + dat_file = DatFile.from_path(converted_file_path) # アルトマーク取り込み実行 _import_to_ultmarc_table(dat_file) # 処理後ファイルをバックアップ @@ -61,6 +67,20 @@ def exec_import(): raise BatchOperationException(e) +def _convert_character_hex(dat_file_path: str, config_file_path: str) -> str: + logger.debug('文字コード変換開始') + with open(config_file_path, 'r', encoding='utf-8') as f: + hex_convert_config_dict: dict = json.load(f) + + for src_hex, dst_hex in hex_convert_config_dict.items(): + # sedコマンドで置き換え + # subprocess.call([f"export LANG=C && sed -i '' -e \"s/{src_hex}/{dst_hex}/g\" {dat_file_path}"], shell=True) + subprocess.call([f"sed -e \"s/{src_hex}/{dst_hex}/g\" {dat_file_path}"], shell=True) + logger.debug('文字コード変換終了') + + return dat_file_path + + def _import_to_ultmarc_table(dat_file: DatFile): db = Database.get_instance() try: diff --git a/ecs/jskult-batch-daily/src/system_var/environment.py b/ecs/jskult-batch-daily/src/system_var/environment.py index 42ed6073..0af7a118 100644 --- a/ecs/jskult-batch-daily/src/system_var/environment.py +++ b/ecs/jskult-batch-daily/src/system_var/environment.py @@ -19,6 +19,8 @@ JSKULT_CONFIG_CALENDAR_HOLIDAY_LIST_FILE_NAME = os.environ['JSKULT_CONFIG_CALEND VJSK_DATA_SEND_FOLDER = os.environ['VJSK_DATA_SEND_FOLDER'] VJSK_DATA_BUCKET = os.environ['VJSK_DATA_BUCKET'] JSKULT_CONFIG_CALENDAR_WHOLESALER_STOCK_FILE_NAME = os.environ['JSKULT_CONFIG_CALENDAR_WHOLESALER_STOCK_FILE_NAME'] +JSKULT_CONFIG_CONVERT_FOLDER = os.environ['JSKULT_CONFIG_CONVERT_FOLDER'] +JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME = os.environ['JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME'] VJSK_DATA_RECEIVE_FOLDER = os.environ['VJSK_DATA_RECEIVE_FOLDER'] # 初期値がある環境変数 diff --git a/s3/config/jskult/convert/ultmarc_hex_convert_config.json b/s3/config/jskult/convert/ultmarc_hex_convert_config.json new file mode 100644 index 00000000..1969e2d4 --- /dev/null +++ b/s3/config/jskult/convert/ultmarc_hex_convert_config.json @@ -0,0 +1,217 @@ +{ + "\\xF9\\xEB": "\\x87\\x82", + "\\xF9\\xE1": "\\x87\\x54", + "\\xF9\\xE0": "\\x87\\x55", + "\\xF9\\xDF": "\\x87\\x56", + "\\xF9\\xDE": "\\x87\\x57", + "\\xF9\\xDD": "\\x87\\x58", + "\\xF9\\xDC": "\\x87\\x59", + "\\xF9\\xDB": "\\x87\\x5A", + "\\xF6\\xE1": "\\x87\\x5B", + "\\xF6\\xE0": "\\x87\\x5C", + "\\xF6\\xDF": "\\x87\\x5D", + "\\xF9\\xDA": "\\xFA\\x41", + "\\xF9\\xE9": "\\x87\\x8A", + "\\xF9\\xE8": "\\x87\\x8B", + "\\xF9\\xE2": "\\x87\\x8C", + "\\xF9\\xD7": "\\xFA\\x69", + "\\xF9\\xD5": "\\xFA\\x6C", + "\\xF9\\xD1": "\\xFA\\x6E", + "\\xF9\\xCF": "\\xFA\\x70", + "\\xF6\\xD3": "\\xFA\\x6F", + "\\xF9\\xCC": "\\xFA\\x72", + "\\xF9\\xCE": "\\xFA\\x71", + "\\xF9\\xC8": "\\xFA\\x75", + "\\xF7\\xB1": "\\xFA\\x74", + "\\xF9\\xC7": "\\xFA\\x78", + "\\xF9\\xC4": "\\xFA\\x7E", + "\\xF9\\xFA": "\\xFA\\x81", + "\\xF8\\x60": "\\xFA\\x84", + "\\xF9\\xBE": "\\xFA\\x86", + "\\xF7\\x83": "\\xFA\\x88", + "\\xF9\\xBA": "\\xFA\\x8B", + "\\xF8\\x92": "\\xFA\\x8D", + "\\xF9\\xB9": "\\xFA\\x8E", + "\\xF9\\xB6": "\\xFA\\x92", + "\\xF9\\xB5": "\\xFA\\x93", + "\\xF9\\xB4": "\\xFA\\x95", + "\\xF9\\xAE": "\\xFA\\x9A", + "\\xF7\\xB9": "\\xFA\\x99", + "\\xF8\\x81": "\\xFA\\x9D", + "\\xF9\\xA7": "\\xFA\\xA1", + "\\xF9\\xA6": "\\xFA\\xA2", + "\\xF9\\xA5": "\\xFA\\xA4", + "\\xF9\\xA3": "\\xFA\\xA6", + "\\xF9\\xA1": "\\xFA\\xAA", + "\\xF9\\x9E": "\\xFA\\xAB", + "\\xF8\\x9A": "\\xFA\\xAD", + "\\xF9\\x9D": "\\xFA\\xAF", + "\\xF9\\x9A": "\\xFA\\xB2", + "\\xF9\\x99": "\\xFA\\xB0", + "\\xF7\\xA1": "\\xFA\\xB4", + "\\xF8\\x69": "\\xFA\\x67", + "\\xF9\\x92": "\\xFA\\xB9", + "\\xF9\\x91": "\\xFA\\xBA", + "\\xF9\\x90": "\\xFA\\xBB", + "\\xF9\\x8E": "\\xFA\\xBE", + "\\xF8\\\\\\": "\\xFA\\xC0", + "\\xF9\\x8D": "\\xFA\\xC5", + "\\xF9\\x8C": "\\xFA\\xC4", + "\\xF9\\x89": "\\xFA\\xC8", + "\\xF9\\x87": "\\xFA\\xC9", + "\\xF9\\x86": "\\xFA\\xCA", + "\\xF7\\x89": "\\xFA\\xCE", + "\\xF8\\x59": "\\xFA\\xD1", + "\\xF9\\x80": "\\xFA\\xCF", + "\\xF9\\x7D": "\\xFA\\xD4", + "\\xF9\\x7E": "\\xFA\\x63", + "\\xF9\\x7A": "\\xFA\\xD7", + "\\xF8\\x58": "\\xFA\\xD5", + "\\xF9\\x77": "\\xFA\\xD9", + "\\xF9\\x76": "\\xFA\\xDB", + "\\xF9\\x75": "\\xFA\\xDC", + "\\xF8\\x57": "\\xFA\\xDD", + "\\xF8\\x6F": "\\xFA\\xDE", + "\\xF9\\x7C": "\\xFA\\x66", + "\\xF9\\x71": "\\xFA\\xE1", + "\\xF9\\x6E": "\\xFA\\xE2", + "\\xF9\\x6F": "\\xFA\\xE5", + "\\xF8\\x70": "\\xFA\\xE6", + "\\xF9\\x6D": "\\xFA\\xE3", + "\\xF9\\x6A": "\\xFA\\x64", + "\\xF6\\xAC": "\\xFA\\xE7", + "\\xF9\\x69": "\\xFA\\xE9", + "\\xF7\\xD9": "\\xFA\\xF0", + "\\xF9\\x66": "\\xFA\\xF1", + "\\xF7\\xDC": "\\xFA\\xF3", + "\\xF9\\x65": "\\xFA\\xF5", + "\\xF9\\x64": "\\xFA\\xF6", + "\\xF8\\x80": "\\xFA\\xF9", + "\\xF9\\x5E": "\\xFA\\xFB", + "\\xF9\\\\\\": "\\xFB\\x42", + "\\xF9\\x5A": "\\xFB\\x45", + "\\xF9\\x59": "\\xFB\\x49", + "\\xF9\\x57": "\\xFB\\x47", + "\\xF9\\x56": "\\xFB\\x4A", + "\\xF9\\x52": "\\xFB\\x4B", + "\\xF7\\x51": "\\xFB\\x4C", + "\\xF9\\x4F": "\\xFB\\x4D", + "\\xF9\\x4E": "\\xFB\\x4E", + "\\xF9\\x4D": "\\xFB\\x4F", + "\\xF9\\x4B": "\\xFB\\x50", + "\\xF9\\x49": "\\xFB\\x51", + "\\xF9\\x47": "\\xFB\\x52", + "\\xF9\\x44": "\\xFB\\x54", + "\\xF9\\x41": "\\xFB\\x57", + "\\xF9\\x42": "\\xFB\\x55", + "\\xF8\\xFC": "\\xFB\\x59", + "\\xF7\\xDF": "\\xFB\\x5A", + "\\xF7\\xA9": "\\xFB\\x5B", + "\\xF8\\x6D": "\\xFB\\x5C", + "\\xF8\\xF7": "\\xFB\\x61", + "\\xF8\\xF5": "\\xFB\\x65", + "\\xF8\\xF1": "\\xFB\\x67", + "\\xF8\\xF3": "\\xFB\\x68", + "\\xF8\\xF0": "\\xFB\\x6A", + "\\xF8\\xEE": "\\xFB\\x6B", + "\\xF8\\xED": "\\xFB\\x6D", + "\\xF8\\xE8": "\\xFB\\x6F", + "\\xF8\\x53": "\\xFB\\x71", + "\\xF8\\xE7": "\\xFB\\x72", + "\\xF6\\xB9": "\\xFB\\x7C", + "\\xF8\\x77": "\\xFB\\x83", + "\\xF8\\xDC": "\\xFB\\x84", + "\\xF8\\x72": "\\xFB\\x85", + "\\xF8\\x73": "\\xFB\\x87", + "\\xF6\\xC4": "\\xFB\\x88", + "\\xF8\\xD2": "\\xFB\\x8A", + "\\xF7\\x5F": "\\xFB\\x8D", + "\\xF7\\xE5": "\\xFB\\x8F", + "\\xF8\\xCF": "\\xFA\\x5C", + "\\xF6\\xD5": "\\xFB\\x90", + "\\xF8\\xCA": "\\xFB\\x93", + "\\xF6\\xC6": "\\xFB\\x94", + "\\xF8\\xC9": "\\xFB\\x95", + "\\xF8\\x65": "\\xFA\\x60", + "\\xF8\\xC7": "\\xFB\\x9B", + "\\xF7\\xF3": "\\xFB\\x9E", + "\\xF9\\xF1": "\\xFB\\xA2", + "\\xF8\\x4C": "\\xFA\\x5D", + "\\xF8\\xBF": "\\xFB\\xA5", + "\\xF8\\xBE": "\\xFB\\xA7", + "\\xF8\\xBC": "\\xFB\\xA8", + "\\xF8\\xBB": "\\xFB\\xAA", + "\\xF8\\x4B": "\\xFB\\xAC", + "\\xF8\\xB9": "\\xFB\\xAD", + "\\xF8\\xB8": "\\xFB\\xAE", + "\\xF8\\x4A": "\\xFB\\xAF", + "\\xF6\\xCB": "\\xFB\\xB6", + "\\xF7\\xF0": "\\xFB\\xB8", + "\\xF8\\xB4": "\\xFB\\xB9", + "\\xF8\\xB2": "\\xFB\\xBB", + "\\xF8\\xB1": "\\xFB\\xBF", + "\\xF7\\xE6": "\\xFB\\xC0", + "\\xF7\\xE2": "\\xFB\\xC4", + "\\xF8\\x49": "\\xFB\\xC7", + "\\xF8\\xAE": "\\xFA\\x5F", + "\\xF8\\xAD": "\\xFB\\xCC", + "\\xF8\\xAB": "\\xFB\\xD6", + "\\xF8\\xAC": "\\xFB\\xD5", + "\\xF8\\xAA": "\\xFB\\xD9", + "\\xF8\\x48": "\\xFB\\xDC", + "\\xF7\\x66": "\\xFB\\xDD", + "\\xF8\\xA8": "\\xFA\\x5E", + "\\xF8\\xA4": "\\xFB\\xE2", + "\\xF8\\xA5": "\\xFB\\xE3", + "\\xF8\\xA0": "\\xFB\\xEB", + "\\xF8\\x9F": "\\xFB\\xEC", + "\\xF8\\x82": "\\xFB\\xED", + "\\xF8\\x83": "\\xFB\\xEE", + "\\xF8\\x84": "\\xFB\\xF0", + "\\xF8\\x85": "\\xFB\\xF1", + "\\xF8\\x87": "\\xFB\\xF3", + "\\xF8\\x8A": "\\xFB\\xF4", + "\\xF8\\x8B": "\\xFB\\xF5", + "\\xF8\\x47": "\\xFB\\xFB", + "\\xF7\\xE0": "\\xFC\\x42", + "\\xF8\\x8D": "\\xFC\\x41", + "\\xF7\\x4B": "\\xFC\\x43", + "\\xF8\\x8E": "\\xFC\\x44", + "\\xF8\\x8F": "\\xFC\\x46", + "\\xF6\\xD7": "\\xFC\\x4A", + "\\xF8\\x9D": "\\xFC\\x4B", + "\\xF6\\xAA": "\\xFB\\xF6", + "\\xF6\\xDC": "\\xFA\\xD0", + "\\xF7\\x72": "\\xFB\\x7D", + "\\xF7\\xAA": "\\xFB\\x92", + "\\xF8\\x4D": "\\xFB\\x97", + "\\xF8\\x5D": "\\xFA\\xBD", + "\\xF8\\x63": "\\xFA\\x7D", + "\\xF8\\x66": "\\xFA\\xD8", + "\\xF8\\x79": "\\xFB\\x89", + "\\xF8\\x86": "\\xFB\\xF2", + "\\xF8\\x8C": "\\xFB\\xFC", + "\\xF8\\xB0": "\\xFB\\xC1", + "\\xF8\\xBA": "\\xFB\\xAB", + "\\xF8\\xDA": "\\xFB\\x86", + "\\xF9\\x5D": "\\xFB\\x43", + "\\xF9\\x63": "\\xFA\\xF8", + "\\xF9\\x85": "\\xFA\\xCD", + "\\xF9\\xBD": "\\xFA\\x87", + "\\xF9\\xC2": "\\xFA\\x80", + "\\xF9\\xC6": "\\xFA\\x7A", + "\\xF9\\xD4": "\\xFA\\x6D", + "\\xF9\\x73": "\\xFA\\xE0", + "\\xF8\\xA2": "\\xFB\\xE9", + "\\xF9\\xAD": "\\xFA\\x9B", + "\\xF7\\x85": "\\xFA\\x9C", + "\\xF9\\x9B": "\\xFA\\xB1", + "\\xF7\\xF4": "\\xFB\\x7E", + "\\xF8\\xE2": "\\xFB\\x80", + "\\xF7\\xDE": "\\xFB\\x82", + "\\xF8\\x95": "\\xFB\\x9F", + "\\xF7\\xDD": "\\xFB\\xA0", + "\\xF6\\xDA": "\\xFB\\xB7", + "\\xF9\\xEC": "\\xFA\\x57", + "\\xF9\\xED": "\\xFA\\x56" +} \ No newline at end of file