From 7eaa2ab35463584a05c2ec89f11fee65cf09f028 Mon Sep 17 00:00:00 2001 From: dcrenl Date: Fri, 28 Feb 2025 10:41:04 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=94=B9=E7=9B=AE=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- srf => python/srf.py | 348 +++++++++++++++++++++---------------------- 1 file changed, 174 insertions(+), 174 deletions(-) rename srf => python/srf.py (97%) diff --git a/srf b/python/srf.py similarity index 97% rename from srf rename to python/srf.py index 7048cbc..fd17fbe 100644 --- a/srf +++ b/python/srf.py @@ -1,175 +1,175 @@ -""" -修正微软拼音输入法无法添加多个格式化自定义短语的问题 -Author: Scruel Tao -""" -import os -import re -import pathlib -import traceback -from pathlib import Path - -# 自定义: 下面设置自定义短语,格式<拼音 位置 短语>,一行一项,短语中可放心包含空格 -# 或也可在该脚本的同目录下,创建一个 phrases.txt,在其中以同一格式写入自定义短语 -PHRASES_TEXT = """ -dcr 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss% -dcrenl 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss% -time 1 %yyyy%%MM%%dd%%HH%%mm%%ss% -time 2 %yyyy%-%MM%-%dd% %HH%:%mm%:%ss% -date 1 %yyyy%年%MM%月%dd%日 %HH%时%mm%分%ss%秒 -""".strip() - -LEX_FILE = os.path.join(os.getenv('APPDATA'), - r'Microsoft\InputMethod\Chs\ChsWubiEUDPv1.lex') - -HEADER_LEN = 16 + 4 -PHRASE_64PCNT_POS = HEADER_LEN -TOTAL_BYTES_POS = HEADER_LEN + 4 -PHRASE_CNT_POS = HEADER_LEN + 8 - -PADDED_ENCODING = 'utf-16le' -HEADER_BYTES = bytes('mschxudp', encoding='ascii') -HEADER_BYTES = HEADER_BYTES + bytes('\x02\x60\x01\x00', PADDED_ENCODING) -PHRASE_SEPARATOR_BYTES = b'\x00\x00' -PHRASE_SEPARATOR_SIZE = len(PHRASE_SEPARATOR_BYTES) -PHRASE_LEN_FIRST_POS = PHRASE_CNT_POS + 40 - -phrase_fixed_last_bytes = b'\xA5\x2C' - -def read_bytes(position, length=1): - with open(LEX_FILE, 'rb+') as file: - file.seek(position) - return file.read(length) - - -def replace_bytes(position, value): - with open(LEX_FILE, 'rb+') as file: - file.seek(position) - data = file.read() - file.seek(position) - file.write(value + data[len(value):]) - - -def bytes2int(data): - return int.from_bytes(data, byteorder='little') - - -def int2bytes(data, length=1): - return int.to_bytes(data, length=length, byteorder='little') - - -def padded_bytes(s): - def padded_byte(c): - b = bytes(c, PADDED_ENCODING) - return b + b'\x00' if len(b) == 1 else b - return b''.join([padded_byte(c) for c in s]) - - -def get_phrase_header(header_pinyin_len, index): - return (b'\x10\x00\x10\x00' + int2bytes(header_pinyin_len, 2) - + int2bytes(index) + b'\x06\x00\x00\x00\x00' + b'\x00\x00' - + phrase_fixed_last_bytes) - -def main(): - global phrase_fixed_last_bytes - - current_dir = os.path.dirname(os.path.realpath(__file__)) - phrases_file = Path(current_dir) / 'phrases.txt' - phrases_text = PHRASES_TEXT - if phrases_file.exists(): - try: - phrases_file_text = phrases_file.read_text('utf-8') - except: - phrases_file_text = phrases_file.read_text('gbk') - phrases_text += '\n' + phrases_file_text.replace('\r\n', '\n') - phrase_items = list(set([x.strip() for x in phrases_text.split('\n') if x])) - - print(f"==================\n" - f"Author: Scruel Tao\n" - f"==================\n\n" - f"正在修正巨硬拼音并添加\n" - f"预置的日期格式化短语……\n" - f"\n" - f"短语数量:{len(phrase_items)}\n" - ) - - last_phrase_pos = 0 - phrase_list = [] # (is_new, pinyin, header, phrase)) - - if not os.path.exists(LEX_FILE): - with open(LEX_FILE, 'wb') as f: - # Initing lex file - f.write(HEADER_BYTES) - f.write((b'\x40' + b'\x00' * 3) * 3) - f.write(b'\x00' * 4) - f.write(b'\x38\xd2\xa3\x65') - f.write(b'\x00' * 32) - else: - phrase_cnt = bytes2int(read_bytes(PHRASE_CNT_POS, 4)) - phrase_block_first_pos = PHRASE_LEN_FIRST_POS + 4 * (phrase_cnt - 1) - - # Read existing phrases - for i in range(phrase_cnt): - if i == phrase_cnt - 1: - phrase_block_pos = phrase_block_len = -1 - else: - phrase_block_pos = bytes2int( - read_bytes(PHRASE_LEN_FIRST_POS + i * 4, 4)) - phrase_block_len = phrase_block_pos - last_phrase_pos - phrase_block_bytes = read_bytes( - phrase_block_first_pos + last_phrase_pos, phrase_block_len) - last_phrase_pos = phrase_block_pos - pinyin_bytes, phrase_bytes = re.match( - (b'(.+)' + PHRASE_SEPARATOR_BYTES) * 2, phrase_block_bytes[16:]).groups() - phrase_fixed_last_bytes = phrase_block_bytes[14:16] - # Prevent deleted phrases - if phrase_block_bytes[9:10] == b'\x00': - phrase_list.append((0, pinyin_bytes, - phrase_block_bytes[:16], phrase_bytes)) - - # Fix custom phrases - for item in phrase_items: - if not item: - continue - pinyin, index, phrase = item.split(maxsplit=2) - pinyin_bytes = padded_bytes(pinyin) - phrase_bytes = padded_bytes(phrase) - phrase_list = [x for x in phrase_list if x[0] or not x[1] == pinyin_bytes] - header = get_phrase_header( - 16 + len(pinyin_bytes) + PHRASE_SEPARATOR_SIZE, int(index)) - phrase_list.append((1, pinyin_bytes, header, phrase_bytes)) - - # Necessary fix, otherwise the order of phrases will be messed up. - phrase_list.sort(key=lambda x: x[1]) - - # Write phrases - tolast_phrase_pos = 0 - total_size = PHRASE_LEN_FIRST_POS - with open(LEX_FILE, 'rb+') as file: - file.seek(PHRASE_LEN_FIRST_POS) - file.truncate() - for _, *items in phrase_list[:-1]: - phrase_len = sum(map(len, items)) + PHRASE_SEPARATOR_SIZE * 2 - tolast_phrase_pos += phrase_len - file.write(int2bytes(tolast_phrase_pos, length=4)) - total_size += PHRASE_SEPARATOR_SIZE * 2 - for _, pinyin_bytes, header, phrase_bytes in phrase_list: - file.write(header) - data_bytes = PHRASE_SEPARATOR_BYTES.join( - [pinyin_bytes, phrase_bytes, b'']) - file.write(data_bytes) - total_size += len(header) + len(data_bytes) - - # Fix file header - replace_bytes(PHRASE_64PCNT_POS, int2bytes( - 64 + len(phrase_list) * 4, length=4)) - replace_bytes(PHRASE_CNT_POS, int2bytes(len(phrase_list), length=4)) - replace_bytes(TOTAL_BYTES_POS, int2bytes(total_size, length=4)) - - -if __name__ == "__main__": - try: - main() - print('Done') - except: - traceback.print_exc() +""" +修正微软拼音输入法无法添加多个格式化自定义短语的问题 +Author: Scruel Tao +""" +import os +import re +import pathlib +import traceback +from pathlib import Path + +# 自定义: 下面设置自定义短语,格式<拼音 位置 短语>,一行一项,短语中可放心包含空格 +# 或也可在该脚本的同目录下,创建一个 phrases.txt,在其中以同一格式写入自定义短语 +PHRASES_TEXT = """ +dcr 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss% +dcrenl 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss% +time 1 %yyyy%%MM%%dd%%HH%%mm%%ss% +time 2 %yyyy%-%MM%-%dd% %HH%:%mm%:%ss% +date 1 %yyyy%年%MM%月%dd%日 %HH%时%mm%分%ss%秒 +""".strip() + +LEX_FILE = os.path.join(os.getenv('APPDATA'), + r'Microsoft\InputMethod\Chs\ChsWubiEUDPv1.lex') + +HEADER_LEN = 16 + 4 +PHRASE_64PCNT_POS = HEADER_LEN +TOTAL_BYTES_POS = HEADER_LEN + 4 +PHRASE_CNT_POS = HEADER_LEN + 8 + +PADDED_ENCODING = 'utf-16le' +HEADER_BYTES = bytes('mschxudp', encoding='ascii') +HEADER_BYTES = HEADER_BYTES + bytes('\x02\x60\x01\x00', PADDED_ENCODING) +PHRASE_SEPARATOR_BYTES = b'\x00\x00' +PHRASE_SEPARATOR_SIZE = len(PHRASE_SEPARATOR_BYTES) +PHRASE_LEN_FIRST_POS = PHRASE_CNT_POS + 40 + +phrase_fixed_last_bytes = b'\xA5\x2C' + +def read_bytes(position, length=1): + with open(LEX_FILE, 'rb+') as file: + file.seek(position) + return file.read(length) + + +def replace_bytes(position, value): + with open(LEX_FILE, 'rb+') as file: + file.seek(position) + data = file.read() + file.seek(position) + file.write(value + data[len(value):]) + + +def bytes2int(data): + return int.from_bytes(data, byteorder='little') + + +def int2bytes(data, length=1): + return int.to_bytes(data, length=length, byteorder='little') + + +def padded_bytes(s): + def padded_byte(c): + b = bytes(c, PADDED_ENCODING) + return b + b'\x00' if len(b) == 1 else b + return b''.join([padded_byte(c) for c in s]) + + +def get_phrase_header(header_pinyin_len, index): + return (b'\x10\x00\x10\x00' + int2bytes(header_pinyin_len, 2) + + int2bytes(index) + b'\x06\x00\x00\x00\x00' + b'\x00\x00' + + phrase_fixed_last_bytes) + +def main(): + global phrase_fixed_last_bytes + + current_dir = os.path.dirname(os.path.realpath(__file__)) + phrases_file = Path(current_dir) / 'phrases.txt' + phrases_text = PHRASES_TEXT + if phrases_file.exists(): + try: + phrases_file_text = phrases_file.read_text('utf-8') + except: + phrases_file_text = phrases_file.read_text('gbk') + phrases_text += '\n' + phrases_file_text.replace('\r\n', '\n') + phrase_items = list(set([x.strip() for x in phrases_text.split('\n') if x])) + + print(f"==================\n" + f"Author: Scruel Tao\n" + f"==================\n\n" + f"正在修正巨硬拼音并添加\n" + f"预置的日期格式化短语……\n" + f"\n" + f"短语数量:{len(phrase_items)}\n" + ) + + last_phrase_pos = 0 + phrase_list = [] # (is_new, pinyin, header, phrase)) + + if not os.path.exists(LEX_FILE): + with open(LEX_FILE, 'wb') as f: + # Initing lex file + f.write(HEADER_BYTES) + f.write((b'\x40' + b'\x00' * 3) * 3) + f.write(b'\x00' * 4) + f.write(b'\x38\xd2\xa3\x65') + f.write(b'\x00' * 32) + else: + phrase_cnt = bytes2int(read_bytes(PHRASE_CNT_POS, 4)) + phrase_block_first_pos = PHRASE_LEN_FIRST_POS + 4 * (phrase_cnt - 1) + + # Read existing phrases + for i in range(phrase_cnt): + if i == phrase_cnt - 1: + phrase_block_pos = phrase_block_len = -1 + else: + phrase_block_pos = bytes2int( + read_bytes(PHRASE_LEN_FIRST_POS + i * 4, 4)) + phrase_block_len = phrase_block_pos - last_phrase_pos + phrase_block_bytes = read_bytes( + phrase_block_first_pos + last_phrase_pos, phrase_block_len) + last_phrase_pos = phrase_block_pos + pinyin_bytes, phrase_bytes = re.match( + (b'(.+)' + PHRASE_SEPARATOR_BYTES) * 2, phrase_block_bytes[16:]).groups() + phrase_fixed_last_bytes = phrase_block_bytes[14:16] + # Prevent deleted phrases + if phrase_block_bytes[9:10] == b'\x00': + phrase_list.append((0, pinyin_bytes, + phrase_block_bytes[:16], phrase_bytes)) + + # Fix custom phrases + for item in phrase_items: + if not item: + continue + pinyin, index, phrase = item.split(maxsplit=2) + pinyin_bytes = padded_bytes(pinyin) + phrase_bytes = padded_bytes(phrase) + phrase_list = [x for x in phrase_list if x[0] or not x[1] == pinyin_bytes] + header = get_phrase_header( + 16 + len(pinyin_bytes) + PHRASE_SEPARATOR_SIZE, int(index)) + phrase_list.append((1, pinyin_bytes, header, phrase_bytes)) + + # Necessary fix, otherwise the order of phrases will be messed up. + phrase_list.sort(key=lambda x: x[1]) + + # Write phrases + tolast_phrase_pos = 0 + total_size = PHRASE_LEN_FIRST_POS + with open(LEX_FILE, 'rb+') as file: + file.seek(PHRASE_LEN_FIRST_POS) + file.truncate() + for _, *items in phrase_list[:-1]: + phrase_len = sum(map(len, items)) + PHRASE_SEPARATOR_SIZE * 2 + tolast_phrase_pos += phrase_len + file.write(int2bytes(tolast_phrase_pos, length=4)) + total_size += PHRASE_SEPARATOR_SIZE * 2 + for _, pinyin_bytes, header, phrase_bytes in phrase_list: + file.write(header) + data_bytes = PHRASE_SEPARATOR_BYTES.join( + [pinyin_bytes, phrase_bytes, b'']) + file.write(data_bytes) + total_size += len(header) + len(data_bytes) + + # Fix file header + replace_bytes(PHRASE_64PCNT_POS, int2bytes( + 64 + len(phrase_list) * 4, length=4)) + replace_bytes(PHRASE_CNT_POS, int2bytes(len(phrase_list), length=4)) + replace_bytes(TOTAL_BYTES_POS, int2bytes(total_size, length=4)) + + +if __name__ == "__main__": + try: + main() + print('Done') + except: + traceback.print_exc() os.system('pause') \ No newline at end of file