From 70c05c5c7d37842091499031a0d4a4b289031aaa Mon Sep 17 00:00:00 2001 From: dcrenl <1017260+dcren@user.noreply.gitee.com> Date: Fri, 28 Feb 2025 02:38:13 +0000 Subject: [PATCH] =?UTF-8?q?add=20srf.=20=E8=A7=A3=E5=86=B3win11=E8=BE=93?= =?UTF-8?q?=E5=85=A5=E6=B3=95=E8=87=AA=E5=AE=9A=E4=B9=89=E7=9F=AD=E8=AF=AD?= =?UTF-8?q?=E5=A4=9A=E4=B8=AA=E5=BD=93=E5=89=8D=E6=97=A5=E6=9C=9F=E7=9F=AD?= =?UTF-8?q?=E8=AF=AD=E5=8F=AA=E6=9C=89=E6=9C=80=E5=90=8E=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E6=AD=A3=E5=B8=B8=E5=AE=9E=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: dcrenl <1017260+dcren@user.noreply.gitee.com> --- srf | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 srf diff --git a/srf b/srf new file mode 100644 index 0000000..7048cbc --- /dev/null +++ b/srf @@ -0,0 +1,175 @@ +""" +修正微软拼音输入法无法添加多个格式化自定义短语的问题 +Author: Scruel Tao +""" +import os +import re +import pathlib +import traceback +from pathlib import Path + +# 自定义: 下面设置自定义短语,格式<拼音 位置 短语>,一行一项,短语中可放心包含空格 +# 或也可在该脚本的同目录下,创建一个 phrases.txt,在其中以同一格式写入自定义短语 +PHRASES_TEXT = """ +dcr 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss% +dcrenl 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss% +time 1 %yyyy%%MM%%dd%%HH%%mm%%ss% +time 2 %yyyy%-%MM%-%dd% %HH%:%mm%:%ss% +date 1 %yyyy%年%MM%月%dd%日 %HH%时%mm%分%ss%秒 +""".strip() + +LEX_FILE = os.path.join(os.getenv('APPDATA'), + r'Microsoft\InputMethod\Chs\ChsWubiEUDPv1.lex') + +HEADER_LEN = 16 + 4 +PHRASE_64PCNT_POS = HEADER_LEN +TOTAL_BYTES_POS = HEADER_LEN + 4 +PHRASE_CNT_POS = HEADER_LEN + 8 + +PADDED_ENCODING = 'utf-16le' +HEADER_BYTES = bytes('mschxudp', encoding='ascii') +HEADER_BYTES = HEADER_BYTES + bytes('\x02\x60\x01\x00', PADDED_ENCODING) +PHRASE_SEPARATOR_BYTES = b'\x00\x00' +PHRASE_SEPARATOR_SIZE = len(PHRASE_SEPARATOR_BYTES) +PHRASE_LEN_FIRST_POS = PHRASE_CNT_POS + 40 + +phrase_fixed_last_bytes = b'\xA5\x2C' + +def read_bytes(position, length=1): + with open(LEX_FILE, 'rb+') as file: + file.seek(position) + return file.read(length) + + +def replace_bytes(position, value): + with open(LEX_FILE, 'rb+') as file: + file.seek(position) + data = file.read() + file.seek(position) + file.write(value + data[len(value):]) + + +def bytes2int(data): + return int.from_bytes(data, byteorder='little') + + +def int2bytes(data, length=1): + return int.to_bytes(data, length=length, byteorder='little') + + +def padded_bytes(s): + def padded_byte(c): + b = bytes(c, PADDED_ENCODING) + return b + b'\x00' if len(b) == 1 else b + return b''.join([padded_byte(c) for c in s]) + + +def get_phrase_header(header_pinyin_len, index): + return (b'\x10\x00\x10\x00' + int2bytes(header_pinyin_len, 2) + + int2bytes(index) + b'\x06\x00\x00\x00\x00' + b'\x00\x00' + + phrase_fixed_last_bytes) + +def main(): + global phrase_fixed_last_bytes + + current_dir = os.path.dirname(os.path.realpath(__file__)) + phrases_file = Path(current_dir) / 'phrases.txt' + phrases_text = PHRASES_TEXT + if phrases_file.exists(): + try: + phrases_file_text = phrases_file.read_text('utf-8') + except: + phrases_file_text = phrases_file.read_text('gbk') + phrases_text += '\n' + phrases_file_text.replace('\r\n', '\n') + phrase_items = list(set([x.strip() for x in phrases_text.split('\n') if x])) + + print(f"==================\n" + f"Author: Scruel Tao\n" + f"==================\n\n" + f"正在修正巨硬拼音并添加\n" + f"预置的日期格式化短语……\n" + f"\n" + f"短语数量:{len(phrase_items)}\n" + ) + + last_phrase_pos = 0 + phrase_list = [] # (is_new, pinyin, header, phrase)) + + if not os.path.exists(LEX_FILE): + with open(LEX_FILE, 'wb') as f: + # Initing lex file + f.write(HEADER_BYTES) + f.write((b'\x40' + b'\x00' * 3) * 3) + f.write(b'\x00' * 4) + f.write(b'\x38\xd2\xa3\x65') + f.write(b'\x00' * 32) + else: + phrase_cnt = bytes2int(read_bytes(PHRASE_CNT_POS, 4)) + phrase_block_first_pos = PHRASE_LEN_FIRST_POS + 4 * (phrase_cnt - 1) + + # Read existing phrases + for i in range(phrase_cnt): + if i == phrase_cnt - 1: + phrase_block_pos = phrase_block_len = -1 + else: + phrase_block_pos = bytes2int( + read_bytes(PHRASE_LEN_FIRST_POS + i * 4, 4)) + phrase_block_len = phrase_block_pos - last_phrase_pos + phrase_block_bytes = read_bytes( + phrase_block_first_pos + last_phrase_pos, phrase_block_len) + last_phrase_pos = phrase_block_pos + pinyin_bytes, phrase_bytes = re.match( + (b'(.+)' + PHRASE_SEPARATOR_BYTES) * 2, phrase_block_bytes[16:]).groups() + phrase_fixed_last_bytes = phrase_block_bytes[14:16] + # Prevent deleted phrases + if phrase_block_bytes[9:10] == b'\x00': + phrase_list.append((0, pinyin_bytes, + phrase_block_bytes[:16], phrase_bytes)) + + # Fix custom phrases + for item in phrase_items: + if not item: + continue + pinyin, index, phrase = item.split(maxsplit=2) + pinyin_bytes = padded_bytes(pinyin) + phrase_bytes = padded_bytes(phrase) + phrase_list = [x for x in phrase_list if x[0] or not x[1] == pinyin_bytes] + header = get_phrase_header( + 16 + len(pinyin_bytes) + PHRASE_SEPARATOR_SIZE, int(index)) + phrase_list.append((1, pinyin_bytes, header, phrase_bytes)) + + # Necessary fix, otherwise the order of phrases will be messed up. + phrase_list.sort(key=lambda x: x[1]) + + # Write phrases + tolast_phrase_pos = 0 + total_size = PHRASE_LEN_FIRST_POS + with open(LEX_FILE, 'rb+') as file: + file.seek(PHRASE_LEN_FIRST_POS) + file.truncate() + for _, *items in phrase_list[:-1]: + phrase_len = sum(map(len, items)) + PHRASE_SEPARATOR_SIZE * 2 + tolast_phrase_pos += phrase_len + file.write(int2bytes(tolast_phrase_pos, length=4)) + total_size += PHRASE_SEPARATOR_SIZE * 2 + for _, pinyin_bytes, header, phrase_bytes in phrase_list: + file.write(header) + data_bytes = PHRASE_SEPARATOR_BYTES.join( + [pinyin_bytes, phrase_bytes, b'']) + file.write(data_bytes) + total_size += len(header) + len(data_bytes) + + # Fix file header + replace_bytes(PHRASE_64PCNT_POS, int2bytes( + 64 + len(phrase_list) * 4, length=4)) + replace_bytes(PHRASE_CNT_POS, int2bytes(len(phrase_list), length=4)) + replace_bytes(TOTAL_BYTES_POS, int2bytes(total_size, length=4)) + + +if __name__ == "__main__": + try: + main() + print('Done') + except: + traceback.print_exc() + os.system('pause') \ No newline at end of file