fix: 文字コードの変換を実装。ステージング未検証

This commit is contained in:
shimoda.m@nds-tyo.co.jp 2023-09-28 15:49:03 +09:00
parent 426fa534b2
commit ce9d8229d5
6 changed files with 255 additions and 4 deletions

View File

@ -16,6 +16,8 @@ VJSK_DATA_SEND_FOLDER=send
VJSK_DATA_RECEIVE_FOLDER=recv
VJSK_DATA_BUCKET=*************
JSKULT_CONFIG_CALENDAR_WHOLESALER_STOCK_FILE_NAME=jskult_wholesaler_stock_input_day_list.txt
JSKULT_CONFIG_CONVERT_FOLDER=jskult/convert
JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME=ultmarc_hex_convert_config.json
# 連携データ抽出期間
SALES_LAUNDERING_EXTRACT_DATE_PERIOD=0
# 洗替対象テーブル名

View File

@ -103,6 +103,16 @@ class ConfigBucket(S3Bucket):
f.seek(0)
return temporary_file_path
def download_ultmarc_hex_convert_config(self):
# 一時ファイルとして保存する
temporary_dir = tempfile.mkdtemp()
temporary_file_path = path.join(temporary_dir, environment.JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME)
hex_convert_config_key = f'{environment.JSKULT_CONFIG_CONVERT_FOLDER}/{environment.JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME}'
with open(temporary_file_path, mode='wb') as f:
self._s3_client.download_file(self._bucket_name, hex_convert_config_key, f)
f.seek(0)
return temporary_file_path
class JskUltBackupBucket(S3Bucket):
_bucket_name = environment.JSKULT_BACKUP_BUCKET

View File

@ -54,7 +54,7 @@ class DatFile:
DatFile: このクラスのインスタンス
"""
# cp932(Shift-JIS Windows拡張)でファイルを読み込む
file = open(local_file_path, encoding='cp932')
file = open(local_file_path, encoding='cp932', errors='replace')
instance = cls(file)
file.close()
return instance

View File

@ -1,11 +1,12 @@
"""アルトマークデータ処理"""
import json
import subprocess
from datetime import datetime
from src.aws.s3 import UltmarcBucket
from src.aws.s3 import ConfigBucket, UltmarcBucket
from src.batch.common.batch_context import BatchContext
from src.batch.ultmarc.datfile import DatFile
from src.batch.ultmarc.utmp_tables.ultmarc_table_mapper_factory import \
UltmarcTableMapperFactory
from src.db.database import Database
@ -15,6 +16,7 @@ from src.system_var import constants
logger = get_logger('アルトマークデータ処理')
ultmarc_bucket = UltmarcBucket()
config_bucket = ConfigBucket()
batch_context = BatchContext.get_instance()
@ -48,7 +50,11 @@ def exec_import():
logger.info(f"{dat_file_name}を取り込みます")
# datファイルをダウンロード
local_file_path = ultmarc_bucket.download_dat_file(dat_file_name)
dat_file = DatFile.from_path(local_file_path)
# 文字コード変換設定ファイルをダウンロード
convert_config_file_path = config_bucket.download_ultmarc_hex_convert_config()
# 文字コードを変換
converted_file_path = _convert_character_hex(local_file_path, convert_config_file_path)
dat_file = DatFile.from_path(converted_file_path)
# アルトマーク取り込み実行
_import_to_ultmarc_table(dat_file)
# 処理後ファイルをバックアップ
@ -61,6 +67,20 @@ def exec_import():
raise BatchOperationException(e)
def _convert_character_hex(dat_file_path: str, config_file_path: str) -> str:
logger.debug('文字コード変換開始')
with open(config_file_path, 'r', encoding='utf-8') as f:
hex_convert_config_dict: dict = json.load(f)
for src_hex, dst_hex in hex_convert_config_dict.items():
# sedコマンドで置き換え
# subprocess.call([f"export LANG=C && sed -i '' -e \"s/{src_hex}/{dst_hex}/g\" {dat_file_path}"], shell=True)
subprocess.call([f"sed -e \"s/{src_hex}/{dst_hex}/g\" {dat_file_path}"], shell=True)
logger.debug('文字コード変換終了')
return dat_file_path
def _import_to_ultmarc_table(dat_file: DatFile):
db = Database.get_instance()
try:

View File

@ -19,6 +19,8 @@ JSKULT_CONFIG_CALENDAR_HOLIDAY_LIST_FILE_NAME = os.environ['JSKULT_CONFIG_CALEND
VJSK_DATA_SEND_FOLDER = os.environ['VJSK_DATA_SEND_FOLDER']
VJSK_DATA_BUCKET = os.environ['VJSK_DATA_BUCKET']
JSKULT_CONFIG_CALENDAR_WHOLESALER_STOCK_FILE_NAME = os.environ['JSKULT_CONFIG_CALENDAR_WHOLESALER_STOCK_FILE_NAME']
JSKULT_CONFIG_CONVERT_FOLDER = os.environ['JSKULT_CONFIG_CONVERT_FOLDER']
JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME = os.environ['JSKULT_ULTMARC_HEX_CONVERT_CONFIG_FILE_NAME']
VJSK_DATA_RECEIVE_FOLDER = os.environ['VJSK_DATA_RECEIVE_FOLDER']
# 初期値がある環境変数

View File

@ -0,0 +1,217 @@
{
"\\xF9\\xEB": "\\x87\\x82",
"\\xF9\\xE1": "\\x87\\x54",
"\\xF9\\xE0": "\\x87\\x55",
"\\xF9\\xDF": "\\x87\\x56",
"\\xF9\\xDE": "\\x87\\x57",
"\\xF9\\xDD": "\\x87\\x58",
"\\xF9\\xDC": "\\x87\\x59",
"\\xF9\\xDB": "\\x87\\x5A",
"\\xF6\\xE1": "\\x87\\x5B",
"\\xF6\\xE0": "\\x87\\x5C",
"\\xF6\\xDF": "\\x87\\x5D",
"\\xF9\\xDA": "\\xFA\\x41",
"\\xF9\\xE9": "\\x87\\x8A",
"\\xF9\\xE8": "\\x87\\x8B",
"\\xF9\\xE2": "\\x87\\x8C",
"\\xF9\\xD7": "\\xFA\\x69",
"\\xF9\\xD5": "\\xFA\\x6C",
"\\xF9\\xD1": "\\xFA\\x6E",
"\\xF9\\xCF": "\\xFA\\x70",
"\\xF6\\xD3": "\\xFA\\x6F",
"\\xF9\\xCC": "\\xFA\\x72",
"\\xF9\\xCE": "\\xFA\\x71",
"\\xF9\\xC8": "\\xFA\\x75",
"\\xF7\\xB1": "\\xFA\\x74",
"\\xF9\\xC7": "\\xFA\\x78",
"\\xF9\\xC4": "\\xFA\\x7E",
"\\xF9\\xFA": "\\xFA\\x81",
"\\xF8\\x60": "\\xFA\\x84",
"\\xF9\\xBE": "\\xFA\\x86",
"\\xF7\\x83": "\\xFA\\x88",
"\\xF9\\xBA": "\\xFA\\x8B",
"\\xF8\\x92": "\\xFA\\x8D",
"\\xF9\\xB9": "\\xFA\\x8E",
"\\xF9\\xB6": "\\xFA\\x92",
"\\xF9\\xB5": "\\xFA\\x93",
"\\xF9\\xB4": "\\xFA\\x95",
"\\xF9\\xAE": "\\xFA\\x9A",
"\\xF7\\xB9": "\\xFA\\x99",
"\\xF8\\x81": "\\xFA\\x9D",
"\\xF9\\xA7": "\\xFA\\xA1",
"\\xF9\\xA6": "\\xFA\\xA2",
"\\xF9\\xA5": "\\xFA\\xA4",
"\\xF9\\xA3": "\\xFA\\xA6",
"\\xF9\\xA1": "\\xFA\\xAA",
"\\xF9\\x9E": "\\xFA\\xAB",
"\\xF8\\x9A": "\\xFA\\xAD",
"\\xF9\\x9D": "\\xFA\\xAF",
"\\xF9\\x9A": "\\xFA\\xB2",
"\\xF9\\x99": "\\xFA\\xB0",
"\\xF7\\xA1": "\\xFA\\xB4",
"\\xF8\\x69": "\\xFA\\x67",
"\\xF9\\x92": "\\xFA\\xB9",
"\\xF9\\x91": "\\xFA\\xBA",
"\\xF9\\x90": "\\xFA\\xBB",
"\\xF9\\x8E": "\\xFA\\xBE",
"\\xF8\\\\\\": "\\xFA\\xC0",
"\\xF9\\x8D": "\\xFA\\xC5",
"\\xF9\\x8C": "\\xFA\\xC4",
"\\xF9\\x89": "\\xFA\\xC8",
"\\xF9\\x87": "\\xFA\\xC9",
"\\xF9\\x86": "\\xFA\\xCA",
"\\xF7\\x89": "\\xFA\\xCE",
"\\xF8\\x59": "\\xFA\\xD1",
"\\xF9\\x80": "\\xFA\\xCF",
"\\xF9\\x7D": "\\xFA\\xD4",
"\\xF9\\x7E": "\\xFA\\x63",
"\\xF9\\x7A": "\\xFA\\xD7",
"\\xF8\\x58": "\\xFA\\xD5",
"\\xF9\\x77": "\\xFA\\xD9",
"\\xF9\\x76": "\\xFA\\xDB",
"\\xF9\\x75": "\\xFA\\xDC",
"\\xF8\\x57": "\\xFA\\xDD",
"\\xF8\\x6F": "\\xFA\\xDE",
"\\xF9\\x7C": "\\xFA\\x66",
"\\xF9\\x71": "\\xFA\\xE1",
"\\xF9\\x6E": "\\xFA\\xE2",
"\\xF9\\x6F": "\\xFA\\xE5",
"\\xF8\\x70": "\\xFA\\xE6",
"\\xF9\\x6D": "\\xFA\\xE3",
"\\xF9\\x6A": "\\xFA\\x64",
"\\xF6\\xAC": "\\xFA\\xE7",
"\\xF9\\x69": "\\xFA\\xE9",
"\\xF7\\xD9": "\\xFA\\xF0",
"\\xF9\\x66": "\\xFA\\xF1",
"\\xF7\\xDC": "\\xFA\\xF3",
"\\xF9\\x65": "\\xFA\\xF5",
"\\xF9\\x64": "\\xFA\\xF6",
"\\xF8\\x80": "\\xFA\\xF9",
"\\xF9\\x5E": "\\xFA\\xFB",
"\\xF9\\\\\\": "\\xFB\\x42",
"\\xF9\\x5A": "\\xFB\\x45",
"\\xF9\\x59": "\\xFB\\x49",
"\\xF9\\x57": "\\xFB\\x47",
"\\xF9\\x56": "\\xFB\\x4A",
"\\xF9\\x52": "\\xFB\\x4B",
"\\xF7\\x51": "\\xFB\\x4C",
"\\xF9\\x4F": "\\xFB\\x4D",
"\\xF9\\x4E": "\\xFB\\x4E",
"\\xF9\\x4D": "\\xFB\\x4F",
"\\xF9\\x4B": "\\xFB\\x50",
"\\xF9\\x49": "\\xFB\\x51",
"\\xF9\\x47": "\\xFB\\x52",
"\\xF9\\x44": "\\xFB\\x54",
"\\xF9\\x41": "\\xFB\\x57",
"\\xF9\\x42": "\\xFB\\x55",
"\\xF8\\xFC": "\\xFB\\x59",
"\\xF7\\xDF": "\\xFB\\x5A",
"\\xF7\\xA9": "\\xFB\\x5B",
"\\xF8\\x6D": "\\xFB\\x5C",
"\\xF8\\xF7": "\\xFB\\x61",
"\\xF8\\xF5": "\\xFB\\x65",
"\\xF8\\xF1": "\\xFB\\x67",
"\\xF8\\xF3": "\\xFB\\x68",
"\\xF8\\xF0": "\\xFB\\x6A",
"\\xF8\\xEE": "\\xFB\\x6B",
"\\xF8\\xED": "\\xFB\\x6D",
"\\xF8\\xE8": "\\xFB\\x6F",
"\\xF8\\x53": "\\xFB\\x71",
"\\xF8\\xE7": "\\xFB\\x72",
"\\xF6\\xB9": "\\xFB\\x7C",
"\\xF8\\x77": "\\xFB\\x83",
"\\xF8\\xDC": "\\xFB\\x84",
"\\xF8\\x72": "\\xFB\\x85",
"\\xF8\\x73": "\\xFB\\x87",
"\\xF6\\xC4": "\\xFB\\x88",
"\\xF8\\xD2": "\\xFB\\x8A",
"\\xF7\\x5F": "\\xFB\\x8D",
"\\xF7\\xE5": "\\xFB\\x8F",
"\\xF8\\xCF": "\\xFA\\x5C",
"\\xF6\\xD5": "\\xFB\\x90",
"\\xF8\\xCA": "\\xFB\\x93",
"\\xF6\\xC6": "\\xFB\\x94",
"\\xF8\\xC9": "\\xFB\\x95",
"\\xF8\\x65": "\\xFA\\x60",
"\\xF8\\xC7": "\\xFB\\x9B",
"\\xF7\\xF3": "\\xFB\\x9E",
"\\xF9\\xF1": "\\xFB\\xA2",
"\\xF8\\x4C": "\\xFA\\x5D",
"\\xF8\\xBF": "\\xFB\\xA5",
"\\xF8\\xBE": "\\xFB\\xA7",
"\\xF8\\xBC": "\\xFB\\xA8",
"\\xF8\\xBB": "\\xFB\\xAA",
"\\xF8\\x4B": "\\xFB\\xAC",
"\\xF8\\xB9": "\\xFB\\xAD",
"\\xF8\\xB8": "\\xFB\\xAE",
"\\xF8\\x4A": "\\xFB\\xAF",
"\\xF6\\xCB": "\\xFB\\xB6",
"\\xF7\\xF0": "\\xFB\\xB8",
"\\xF8\\xB4": "\\xFB\\xB9",
"\\xF8\\xB2": "\\xFB\\xBB",
"\\xF8\\xB1": "\\xFB\\xBF",
"\\xF7\\xE6": "\\xFB\\xC0",
"\\xF7\\xE2": "\\xFB\\xC4",
"\\xF8\\x49": "\\xFB\\xC7",
"\\xF8\\xAE": "\\xFA\\x5F",
"\\xF8\\xAD": "\\xFB\\xCC",
"\\xF8\\xAB": "\\xFB\\xD6",
"\\xF8\\xAC": "\\xFB\\xD5",
"\\xF8\\xAA": "\\xFB\\xD9",
"\\xF8\\x48": "\\xFB\\xDC",
"\\xF7\\x66": "\\xFB\\xDD",
"\\xF8\\xA8": "\\xFA\\x5E",
"\\xF8\\xA4": "\\xFB\\xE2",
"\\xF8\\xA5": "\\xFB\\xE3",
"\\xF8\\xA0": "\\xFB\\xEB",
"\\xF8\\x9F": "\\xFB\\xEC",
"\\xF8\\x82": "\\xFB\\xED",
"\\xF8\\x83": "\\xFB\\xEE",
"\\xF8\\x84": "\\xFB\\xF0",
"\\xF8\\x85": "\\xFB\\xF1",
"\\xF8\\x87": "\\xFB\\xF3",
"\\xF8\\x8A": "\\xFB\\xF4",
"\\xF8\\x8B": "\\xFB\\xF5",
"\\xF8\\x47": "\\xFB\\xFB",
"\\xF7\\xE0": "\\xFC\\x42",
"\\xF8\\x8D": "\\xFC\\x41",
"\\xF7\\x4B": "\\xFC\\x43",
"\\xF8\\x8E": "\\xFC\\x44",
"\\xF8\\x8F": "\\xFC\\x46",
"\\xF6\\xD7": "\\xFC\\x4A",
"\\xF8\\x9D": "\\xFC\\x4B",
"\\xF6\\xAA": "\\xFB\\xF6",
"\\xF6\\xDC": "\\xFA\\xD0",
"\\xF7\\x72": "\\xFB\\x7D",
"\\xF7\\xAA": "\\xFB\\x92",
"\\xF8\\x4D": "\\xFB\\x97",
"\\xF8\\x5D": "\\xFA\\xBD",
"\\xF8\\x63": "\\xFA\\x7D",
"\\xF8\\x66": "\\xFA\\xD8",
"\\xF8\\x79": "\\xFB\\x89",
"\\xF8\\x86": "\\xFB\\xF2",
"\\xF8\\x8C": "\\xFB\\xFC",
"\\xF8\\xB0": "\\xFB\\xC1",
"\\xF8\\xBA": "\\xFB\\xAB",
"\\xF8\\xDA": "\\xFB\\x86",
"\\xF9\\x5D": "\\xFB\\x43",
"\\xF9\\x63": "\\xFA\\xF8",
"\\xF9\\x85": "\\xFA\\xCD",
"\\xF9\\xBD": "\\xFA\\x87",
"\\xF9\\xC2": "\\xFA\\x80",
"\\xF9\\xC6": "\\xFA\\x7A",
"\\xF9\\xD4": "\\xFA\\x6D",
"\\xF9\\x73": "\\xFA\\xE0",
"\\xF8\\xA2": "\\xFB\\xE9",
"\\xF9\\xAD": "\\xFA\\x9B",
"\\xF7\\x85": "\\xFA\\x9C",
"\\xF9\\x9B": "\\xFA\\xB1",
"\\xF7\\xF4": "\\xFB\\x7E",
"\\xF8\\xE2": "\\xFB\\x80",
"\\xF7\\xDE": "\\xFB\\x82",
"\\xF8\\x95": "\\xFB\\x9F",
"\\xF7\\xDD": "\\xFB\\xA0",
"\\xF6\\xDA": "\\xFB\\xB7",
"\\xF9\\xEC": "\\xFA\\x57",
"\\xF9\\xED": "\\xFA\\x56"
}