fix: sedコマンドによる文字の置き換えは想定外の変換が行われるため、1バイトずつ読み込んで処理するように修正。それに伴い、cp932で正しく開けるようになったのでエラー処理はなくした。

This commit is contained in:
shimoda.m@nds-tyo.co.jp 2023-10-02 16:43:50 +09:00
parent 7cdb3894b4
commit 510e42822b
3 changed files with 255 additions and 223 deletions

View File

@ -54,7 +54,7 @@ class DatFile:
DatFile: このクラスのインスタンス
"""
# cp932(Shift-JIS Windows拡張)でファイルを読み込む
file = open(local_file_path, encoding='cp932', errors='replace')
file = open(local_file_path, encoding='cp932')
instance = cls(file)
file.close()
return instance

View File

@ -1,7 +1,6 @@
"""アルトマークデータ処理"""
import json
import subprocess
from datetime import datetime
from src.aws.s3 import ConfigBucket, UltmarcBucket
@ -68,18 +67,51 @@ def exec_import():
def _convert_character_hex(dat_file_path: str, config_file_path: str) -> str:
"""_summary_
アルトマークデータファイルを読みバイトレベルで文字コードを変換して新しいファイルに書き出す
Args:
dat_file_path (str): 変換前のアルトマークデータファイル
config_file_path (str): 変換設定
Returns:
str: 文字コード変換後のアルトマークデータファイルのパス
"""
logger.debug('文字コード変換開始')
# 変換設定ファイルを読み込む
with open(config_file_path, 'r', encoding='utf-8') as f:
hex_convert_config_dict: dict = json.load(f)
with open(dat_file_path, 'rb') as org_file, open(f'{dat_file_path}.converted', 'wb') as dest_file:
while True:
first_byte = org_file.read(1)
# ファイルの末尾まで読んだらループ終了
if not first_byte:
break
# 読みだした1バイトが、Shift_JIS の日本語範囲(2バイト文字)に含まれる場合、もう1バイト読む
if 0x81 <= first_byte[0] <= 0x9F or 0xE0 <= first_byte[0] <= 0xFC:
second_byte = org_file.read(1)
# ファイルの末尾まで読んだらループ終了
if not second_byte:
break
# 2バイトを結合して、HEXの4桁(大文字)に変換
japanese_bytes = first_byte + second_byte
japanese_hex = ''.join([hex(b)[2:].zfill(2).upper() for b in japanese_bytes])
# 変換対象の文字を変換設定に基づいて変換する
processed_hex = hex_convert_config_dict.get(japanese_hex)
if processed_hex is None:
processed_hex = japanese_hex
# HEXの4桁をバイト列に戻して書き出す
processed_bytes = bytes.fromhex(processed_hex)
dest_file.write(processed_bytes)
else:
# 1バイト文字はそのまま書き出す
dest_file.write(first_byte)
for src_hex, dst_hex in hex_convert_config_dict.items():
# sedコマンドで置き換え
# MacOSで実行する場合のコマンド
# subprocess.call([f"export LANG=C && sed -i '' -e \"s/{src_hex}/{dst_hex}/g\" {dat_file_path}"], shell=True)
subprocess.call([f"sed -i -e \"s/{src_hex}/{dst_hex}/g\" {dat_file_path}"], shell=True)
logger.debug('文字コード変換終了')
return dat_file_path
return f'{dat_file_path}.converted'
def _import_to_ultmarc_table(dat_file: DatFile):

View File

@ -1,217 +1,217 @@
{
"\\xF9\\xEB": "\\x87\\x82",
"\\xF9\\xE1": "\\x87\\x54",
"\\xF9\\xE0": "\\x87\\x55",
"\\xF9\\xDF": "\\x87\\x56",
"\\xF9\\xDE": "\\x87\\x57",
"\\xF9\\xDD": "\\x87\\x58",
"\\xF9\\xDC": "\\x87\\x59",
"\\xF9\\xDB": "\\x87\\x5A",
"\\xF6\\xE1": "\\x87\\x5B",
"\\xF6\\xE0": "\\x87\\x5C",
"\\xF6\\xDF": "\\x87\\x5D",
"\\xF9\\xDA": "\\xFA\\x41",
"\\xF9\\xE9": "\\x87\\x8A",
"\\xF9\\xE8": "\\x87\\x8B",
"\\xF9\\xE2": "\\x87\\x8C",
"\\xF9\\xD7": "\\xFA\\x69",
"\\xF9\\xD5": "\\xFA\\x6C",
"\\xF9\\xD1": "\\xFA\\x6E",
"\\xF9\\xCF": "\\xFA\\x70",
"\\xF6\\xD3": "\\xFA\\x6F",
"\\xF9\\xCC": "\\xFA\\x72",
"\\xF9\\xCE": "\\xFA\\x71",
"\\xF9\\xC8": "\\xFA\\x75",
"\\xF7\\xB1": "\\xFA\\x74",
"\\xF9\\xC7": "\\xFA\\x78",
"\\xF9\\xC4": "\\xFA\\x7E",
"\\xF9\\xFA": "\\xFA\\x81",
"\\xF8\\x60": "\\xFA\\x84",
"\\xF9\\xBE": "\\xFA\\x86",
"\\xF7\\x83": "\\xFA\\x88",
"\\xF9\\xBA": "\\xFA\\x8B",
"\\xF8\\x92": "\\xFA\\x8D",
"\\xF9\\xB9": "\\xFA\\x8E",
"\\xF9\\xB6": "\\xFA\\x92",
"\\xF9\\xB5": "\\xFA\\x93",
"\\xF9\\xB4": "\\xFA\\x95",
"\\xF9\\xAE": "\\xFA\\x9A",
"\\xF7\\xB9": "\\xFA\\x99",
"\\xF8\\x81": "\\xFA\\x9D",
"\\xF9\\xA7": "\\xFA\\xA1",
"\\xF9\\xA6": "\\xFA\\xA2",
"\\xF9\\xA5": "\\xFA\\xA4",
"\\xF9\\xA3": "\\xFA\\xA6",
"\\xF9\\xA1": "\\xFA\\xAA",
"\\xF9\\x9E": "\\xFA\\xAB",
"\\xF8\\x9A": "\\xFA\\xAD",
"\\xF9\\x9D": "\\xFA\\xAF",
"\\xF9\\x9A": "\\xFA\\xB2",
"\\xF9\\x99": "\\xFA\\xB0",
"\\xF7\\xA1": "\\xFA\\xB4",
"\\xF8\\x69": "\\xFA\\x67",
"\\xF9\\x92": "\\xFA\\xB9",
"\\xF9\\x91": "\\xFA\\xBA",
"\\xF9\\x90": "\\xFA\\xBB",
"\\xF9\\x8E": "\\xFA\\xBE",
"\\xF8\\\\\\": "\\xFA\\xC0",
"\\xF9\\x8D": "\\xFA\\xC5",
"\\xF9\\x8C": "\\xFA\\xC4",
"\\xF9\\x89": "\\xFA\\xC8",
"\\xF9\\x87": "\\xFA\\xC9",
"\\xF9\\x86": "\\xFA\\xCA",
"\\xF7\\x89": "\\xFA\\xCE",
"\\xF8\\x59": "\\xFA\\xD1",
"\\xF9\\x80": "\\xFA\\xCF",
"\\xF9\\x7D": "\\xFA\\xD4",
"\\xF9\\x7E": "\\xFA\\x63",
"\\xF9\\x7A": "\\xFA\\xD7",
"\\xF8\\x58": "\\xFA\\xD5",
"\\xF9\\x77": "\\xFA\\xD9",
"\\xF9\\x76": "\\xFA\\xDB",
"\\xF9\\x75": "\\xFA\\xDC",
"\\xF8\\x57": "\\xFA\\xDD",
"\\xF8\\x6F": "\\xFA\\xDE",
"\\xF9\\x7C": "\\xFA\\x66",
"\\xF9\\x71": "\\xFA\\xE1",
"\\xF9\\x6E": "\\xFA\\xE2",
"\\xF9\\x6F": "\\xFA\\xE5",
"\\xF8\\x70": "\\xFA\\xE6",
"\\xF9\\x6D": "\\xFA\\xE3",
"\\xF9\\x6A": "\\xFA\\x64",
"\\xF6\\xAC": "\\xFA\\xE7",
"\\xF9\\x69": "\\xFA\\xE9",
"\\xF7\\xD9": "\\xFA\\xF0",
"\\xF9\\x66": "\\xFA\\xF1",
"\\xF7\\xDC": "\\xFA\\xF3",
"\\xF9\\x65": "\\xFA\\xF5",
"\\xF9\\x64": "\\xFA\\xF6",
"\\xF8\\x80": "\\xFA\\xF9",
"\\xF9\\x5E": "\\xFA\\xFB",
"\\xF9\\\\\\": "\\xFB\\x42",
"\\xF9\\x5A": "\\xFB\\x45",
"\\xF9\\x59": "\\xFB\\x49",
"\\xF9\\x57": "\\xFB\\x47",
"\\xF9\\x56": "\\xFB\\x4A",
"\\xF9\\x52": "\\xFB\\x4B",
"\\xF7\\x51": "\\xFB\\x4C",
"\\xF9\\x4F": "\\xFB\\x4D",
"\\xF9\\x4E": "\\xFB\\x4E",
"\\xF9\\x4D": "\\xFB\\x4F",
"\\xF9\\x4B": "\\xFB\\x50",
"\\xF9\\x49": "\\xFB\\x51",
"\\xF9\\x47": "\\xFB\\x52",
"\\xF9\\x44": "\\xFB\\x54",
"\\xF9\\x41": "\\xFB\\x57",
"\\xF9\\x42": "\\xFB\\x55",
"\\xF8\\xFC": "\\xFB\\x59",
"\\xF7\\xDF": "\\xFB\\x5A",
"\\xF7\\xA9": "\\xFB\\x5B",
"\\xF8\\x6D": "\\xFB\\x5C",
"\\xF8\\xF7": "\\xFB\\x61",
"\\xF8\\xF5": "\\xFB\\x65",
"\\xF8\\xF1": "\\xFB\\x67",
"\\xF8\\xF3": "\\xFB\\x68",
"\\xF8\\xF0": "\\xFB\\x6A",
"\\xF8\\xEE": "\\xFB\\x6B",
"\\xF8\\xED": "\\xFB\\x6D",
"\\xF8\\xE8": "\\xFB\\x6F",
"\\xF8\\x53": "\\xFB\\x71",
"\\xF8\\xE7": "\\xFB\\x72",
"\\xF6\\xB9": "\\xFB\\x7C",
"\\xF8\\x77": "\\xFB\\x83",
"\\xF8\\xDC": "\\xFB\\x84",
"\\xF8\\x72": "\\xFB\\x85",
"\\xF8\\x73": "\\xFB\\x87",
"\\xF6\\xC4": "\\xFB\\x88",
"\\xF8\\xD2": "\\xFB\\x8A",
"\\xF7\\x5F": "\\xFB\\x8D",
"\\xF7\\xE5": "\\xFB\\x8F",
"\\xF8\\xCF": "\\xFA\\x5C",
"\\xF6\\xD5": "\\xFB\\x90",
"\\xF8\\xCA": "\\xFB\\x93",
"\\xF6\\xC6": "\\xFB\\x94",
"\\xF8\\xC9": "\\xFB\\x95",
"\\xF8\\x65": "\\xFA\\x60",
"\\xF8\\xC7": "\\xFB\\x9B",
"\\xF7\\xF3": "\\xFB\\x9E",
"\\xF9\\xF1": "\\xFB\\xA2",
"\\xF8\\x4C": "\\xFA\\x5D",
"\\xF8\\xBF": "\\xFB\\xA5",
"\\xF8\\xBE": "\\xFB\\xA7",
"\\xF8\\xBC": "\\xFB\\xA8",
"\\xF8\\xBB": "\\xFB\\xAA",
"\\xF8\\x4B": "\\xFB\\xAC",
"\\xF8\\xB9": "\\xFB\\xAD",
"\\xF8\\xB8": "\\xFB\\xAE",
"\\xF8\\x4A": "\\xFB\\xAF",
"\\xF6\\xCB": "\\xFB\\xB6",
"\\xF7\\xF0": "\\xFB\\xB8",
"\\xF8\\xB4": "\\xFB\\xB9",
"\\xF8\\xB2": "\\xFB\\xBB",
"\\xF8\\xB1": "\\xFB\\xBF",
"\\xF7\\xE6": "\\xFB\\xC0",
"\\xF7\\xE2": "\\xFB\\xC4",
"\\xF8\\x49": "\\xFB\\xC7",
"\\xF8\\xAE": "\\xFA\\x5F",
"\\xF8\\xAD": "\\xFB\\xCC",
"\\xF8\\xAB": "\\xFB\\xD6",
"\\xF8\\xAC": "\\xFB\\xD5",
"\\xF8\\xAA": "\\xFB\\xD9",
"\\xF8\\x48": "\\xFB\\xDC",
"\\xF7\\x66": "\\xFB\\xDD",
"\\xF8\\xA8": "\\xFA\\x5E",
"\\xF8\\xA4": "\\xFB\\xE2",
"\\xF8\\xA5": "\\xFB\\xE3",
"\\xF8\\xA0": "\\xFB\\xEB",
"\\xF8\\x9F": "\\xFB\\xEC",
"\\xF8\\x82": "\\xFB\\xED",
"\\xF8\\x83": "\\xFB\\xEE",
"\\xF8\\x84": "\\xFB\\xF0",
"\\xF8\\x85": "\\xFB\\xF1",
"\\xF8\\x87": "\\xFB\\xF3",
"\\xF8\\x8A": "\\xFB\\xF4",
"\\xF8\\x8B": "\\xFB\\xF5",
"\\xF8\\x47": "\\xFB\\xFB",
"\\xF7\\xE0": "\\xFC\\x42",
"\\xF8\\x8D": "\\xFC\\x41",
"\\xF7\\x4B": "\\xFC\\x43",
"\\xF8\\x8E": "\\xFC\\x44",
"\\xF8\\x8F": "\\xFC\\x46",
"\\xF6\\xD7": "\\xFC\\x4A",
"\\xF8\\x9D": "\\xFC\\x4B",
"\\xF6\\xAA": "\\xFB\\xF6",
"\\xF6\\xDC": "\\xFA\\xD0",
"\\xF7\\x72": "\\xFB\\x7D",
"\\xF7\\xAA": "\\xFB\\x92",
"\\xF8\\x4D": "\\xFB\\x97",
"\\xF8\\x5D": "\\xFA\\xBD",
"\\xF8\\x63": "\\xFA\\x7D",
"\\xF8\\x66": "\\xFA\\xD8",
"\\xF8\\x79": "\\xFB\\x89",
"\\xF8\\x86": "\\xFB\\xF2",
"\\xF8\\x8C": "\\xFB\\xFC",
"\\xF8\\xB0": "\\xFB\\xC1",
"\\xF8\\xBA": "\\xFB\\xAB",
"\\xF8\\xDA": "\\xFB\\x86",
"\\xF9\\x5D": "\\xFB\\x43",
"\\xF9\\x63": "\\xFA\\xF8",
"\\xF9\\x85": "\\xFA\\xCD",
"\\xF9\\xBD": "\\xFA\\x87",
"\\xF9\\xC2": "\\xFA\\x80",
"\\xF9\\xC6": "\\xFA\\x7A",
"\\xF9\\xD4": "\\xFA\\x6D",
"\\xF9\\x73": "\\xFA\\xE0",
"\\xF8\\xA2": "\\xFB\\xE9",
"\\xF9\\xAD": "\\xFA\\x9B",
"\\xF7\\x85": "\\xFA\\x9C",
"\\xF9\\x9B": "\\xFA\\xB1",
"\\xF7\\xF4": "\\xFB\\x7E",
"\\xF8\\xE2": "\\xFB\\x80",
"\\xF7\\xDE": "\\xFB\\x82",
"\\xF8\\x95": "\\xFB\\x9F",
"\\xF7\\xDD": "\\xFB\\xA0",
"\\xF6\\xDA": "\\xFB\\xB7",
"\\xF9\\xEC": "\\xFA\\x57",
"\\xF9\\xED": "\\xFA\\x56"
"F9EB": "8782",
"F9E1": "8754",
"F9E0": "8755",
"F9DF": "8756",
"F9DE": "8757",
"F9DD": "8758",
"F9DC": "8759",
"F9DB": "875A",
"F6E1": "875B",
"F6E0": "875C",
"F6DF": "875D",
"F9DA": "FA41",
"F9E9": "878A",
"F9E8": "878B",
"F9E2": "878C",
"F9D7": "FA69",
"F9D5": "FA6C",
"F9D1": "FA6E",
"F9CF": "FA70",
"F6D3": "FA6F",
"F9CC": "FA72",
"F9CE": "FA71",
"F9C8": "FA75",
"F7B1": "FA74",
"F9C7": "FA78",
"F9C4": "FA7E",
"F9FA": "FA81",
"F860": "FA84",
"F9BE": "FA86",
"F783": "FA88",
"F9BA": "FA8B",
"F892": "FA8D",
"F9B9": "FA8E",
"F9B6": "FA92",
"F9B5": "FA93",
"F9B4": "FA95",
"F9AE": "FA9A",
"F7B9": "FA99",
"F881": "FA9D",
"F9A7": "FAA1",
"F9A6": "FAA2",
"F9A5": "FAA4",
"F9A3": "FAA6",
"F9A1": "FAAA",
"F99E": "FAAB",
"F89A": "FAAD",
"F99D": "FAAF",
"F99A": "FAB2",
"F999": "FAB0",
"F7A1": "FAB4",
"F869": "FA67",
"F992": "FAB9",
"F991": "FABA",
"F990": "FABB",
"F98E": "FABE",
"F85C": "FAC0",
"F98D": "FAC5",
"F98C": "FAC4",
"F989": "FAC8",
"F987": "FAC9",
"F986": "FACA",
"F789": "FACE",
"F859": "FAD1",
"F980": "FACF",
"F97D": "FAD4",
"F97E": "FA63",
"F97A": "FAD7",
"F858": "FAD5",
"F977": "FAD9",
"F976": "FADB",
"F975": "FADC",
"F857": "FADD",
"F86F": "FADE",
"F97C": "FA66",
"F971": "FAE1",
"F96E": "FAE2",
"F96F": "FAE5",
"F870": "FAE6",
"F96D": "FAE3",
"F96A": "FA64",
"F6AC": "FAE7",
"F969": "FAE9",
"F7D9": "FAF0",
"F966": "FAF1",
"F7DC": "FAF3",
"F965": "FAF5",
"F964": "FAF6",
"F880": "FAF9",
"F95E": "FAFB",
"F95C": "FB42",
"F95A": "FB45",
"F959": "FB49",
"F957": "FB47",
"F956": "FB4A",
"F952": "FB4B",
"F751": "FB4C",
"F94F": "FB4D",
"F94E": "FB4E",
"F94D": "FB4F",
"F94B": "FB50",
"F949": "FB51",
"F947": "FB52",
"F944": "FB54",
"F941": "FB57",
"F942": "FB55",
"F8FC": "FB59",
"F7DF": "FB5A",
"F7A9": "FB5B",
"F86D": "FB5C",
"F8F7": "FB61",
"F8F5": "FB65",
"F8F1": "FB67",
"F8F3": "FB68",
"F8F0": "FB6A",
"F8EE": "FB6B",
"F8ED": "FB6D",
"F8E8": "FB6F",
"F853": "FB71",
"F8E7": "FB72",
"F6B9": "FB7C",
"F877": "FB83",
"F8DC": "FB84",
"F872": "FB85",
"F873": "FB87",
"F6C4": "FB88",
"F8D2": "FB8A",
"F75F": "FB8D",
"F7E5": "FB8F",
"F8CF": "FA5C",
"F6D5": "FB90",
"F8CA": "FB93",
"F6C6": "FB94",
"F8C9": "FB95",
"F865": "FA60",
"F8C7": "FB9B",
"F7F3": "FB9E",
"F9F1": "FBA2",
"F84C": "FA5D",
"F8BF": "FBA5",
"F8BE": "FBA7",
"F8BC": "FBA8",
"F8BB": "FBAA",
"F84B": "FBAC",
"F8B9": "FBAD",
"F8B8": "FBAE",
"F84A": "FBAF",
"F6CB": "FBB6",
"F7F0": "FBB8",
"F8B4": "FBB9",
"F8B2": "FBBB",
"F8B1": "FBBF",
"F7E6": "FBC0",
"F7E2": "FBC4",
"F849": "FBC7",
"F8AE": "FA5F",
"F8AD": "FBCC",
"F8AB": "FBD6",
"F8AC": "FBD5",
"F8AA": "FBD9",
"F848": "FBDC",
"F766": "FBDD",
"F8A8": "FA5E",
"F8A4": "FBE2",
"F8A5": "FBE3",
"F8A0": "FBEB",
"F89F": "FBEC",
"F882": "FBED",
"F883": "FBEE",
"F884": "FBF0",
"F885": "FBF1",
"F887": "FBF3",
"F88A": "FBF4",
"F88B": "FBF5",
"F847": "FBFB",
"F7E0": "FC42",
"F88D": "FC41",
"F74B": "FC43",
"F88E": "FC44",
"F88F": "FC46",
"F6D7": "FC4A",
"F89D": "FC4B",
"F6AA": "FBF6",
"F6DC": "FAD0",
"F772": "FB7D",
"F7AA": "FB92",
"F84D": "FB97",
"F85D": "FABD",
"F863": "FA7D",
"F866": "FAD8",
"F879": "FB89",
"F886": "FBF2",
"F88C": "FBFC",
"F8B0": "FBC1",
"F8BA": "FBAB",
"F8DA": "FB86",
"F95D": "FB43",
"F963": "FAF8",
"F985": "FACD",
"F9BD": "FA87",
"F9C2": "FA80",
"F9C6": "FA7A",
"F9D4": "FA6D",
"F973": "FAE0",
"F8A2": "FBE9",
"F9AD": "FA9B",
"F785": "FA9C",
"F99B": "FAB1",
"F7F4": "FB7E",
"F8E2": "FB80",
"F7DE": "FB82",
"F895": "FB9F",
"F7DD": "FBA0",
"F6DA": "FBB7",
"F9EC": "FA57",
"F9ED": "FA56"
}