From 70c05c5c7d37842091499031a0d4a4b289031aaa Mon Sep 17 00:00:00 2001
From: dcrenl <1017260+dcren@user.noreply.gitee.com>
Date: Fri, 28 Feb 2025 02:38:13 +0000
Subject: [PATCH] =?UTF-8?q?add=20srf.=20=E8=A7=A3=E5=86=B3win11=E8=BE=93?=
 =?UTF-8?q?=E5=85=A5=E6=B3=95=E8=87=AA=E5=AE=9A=E4=B9=89=E7=9F=AD=E8=AF=AD?=
 =?UTF-8?q?=E5=A4=9A=E4=B8=AA=E5=BD=93=E5=89=8D=E6=97=A5=E6=9C=9F=E7=9F=AD?=
 =?UTF-8?q?=E8=AF=AD=E5=8F=AA=E6=9C=89=E6=9C=80=E5=90=8E=E4=B8=80=E4=B8=AA?=
 =?UTF-8?q?=E6=AD=A3=E5=B8=B8=E5=AE=9E=E7=94=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: dcrenl <1017260+dcren@user.noreply.gitee.com>
---
 srf | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 175 insertions(+)
 create mode 100644 srf

diff --git a/srf b/srf
new file mode 100644
index 0000000..7048cbc
--- /dev/null
+++ b/srf
@@ -0,0 +1,175 @@
+"""
+修正微软拼音输入法无法添加多个格式化自定义短语的问题
+Author: Scruel Tao
+"""
+import os
+import re
+import pathlib
+import traceback
+from pathlib import Path
+
+# 自定义: 下面设置自定义短语，格式<拼音 位置 短语>，一行一项，短语中可放心包含空格
+# 或也可在该脚本的同目录下，创建一个 phrases.txt，在其中以同一格式写入自定义短语
+PHRASES_TEXT = """
+dcr 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
+dcrenl 1 dcrenl:%yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
+time 1 %yyyy%%MM%%dd%%HH%%mm%%ss%
+time 2 %yyyy%-%MM%-%dd% %HH%:%mm%:%ss%
+date 1 %yyyy%年%MM%月%dd%日 %HH%时%mm%分%ss%秒
+""".strip()
+
+LEX_FILE = os.path.join(os.getenv('APPDATA'),
+                        r'Microsoft\InputMethod\Chs\ChsWubiEUDPv1.lex')
+
+HEADER_LEN = 16 + 4
+PHRASE_64PCNT_POS = HEADER_LEN
+TOTAL_BYTES_POS = HEADER_LEN + 4
+PHRASE_CNT_POS = HEADER_LEN + 8
+
+PADDED_ENCODING = 'utf-16le'
+HEADER_BYTES = bytes('mschxudp', encoding='ascii')
+HEADER_BYTES = HEADER_BYTES + bytes('\x02\x60\x01\x00', PADDED_ENCODING)
+PHRASE_SEPARATOR_BYTES = b'\x00\x00'
+PHRASE_SEPARATOR_SIZE = len(PHRASE_SEPARATOR_BYTES)
+PHRASE_LEN_FIRST_POS = PHRASE_CNT_POS + 40
+
+phrase_fixed_last_bytes = b'\xA5\x2C'
+
+def read_bytes(position, length=1):
+    with open(LEX_FILE, 'rb+') as file:
+        file.seek(position)
+        return file.read(length)
+
+
+def replace_bytes(position, value):
+    with open(LEX_FILE, 'rb+') as file:
+        file.seek(position)
+        data = file.read()
+        file.seek(position)
+        file.write(value + data[len(value):])
+
+
+def bytes2int(data):
+    return int.from_bytes(data, byteorder='little')
+
+
+def int2bytes(data, length=1):
+    return int.to_bytes(data, length=length, byteorder='little')
+
+
+def padded_bytes(s):
+    def padded_byte(c):
+        b = bytes(c, PADDED_ENCODING)
+        return b + b'\x00' if len(b) == 1 else b
+    return b''.join([padded_byte(c) for c in s])
+
+
+def get_phrase_header(header_pinyin_len, index):
+    return (b'\x10\x00\x10\x00' + int2bytes(header_pinyin_len, 2)
+            + int2bytes(index) + b'\x06\x00\x00\x00\x00' + b'\x00\x00'
+            + phrase_fixed_last_bytes)
+
+def main():
+    global phrase_fixed_last_bytes
+
+    current_dir = os.path.dirname(os.path.realpath(__file__))
+    phrases_file = Path(current_dir) / 'phrases.txt'
+    phrases_text = PHRASES_TEXT
+    if phrases_file.exists():
+        try:
+            phrases_file_text = phrases_file.read_text('utf-8')
+        except:
+            phrases_file_text = phrases_file.read_text('gbk')
+        phrases_text += '\n' + phrases_file_text.replace('\r\n', '\n')
+    phrase_items = list(set([x.strip() for x in phrases_text.split('\n') if x]))
+
+    print(f"==================\n"
+          f"Author: Scruel Tao\n"
+          f"==================\n\n"
+          f"正在修正巨硬拼音并添加\n"
+          f"预置的日期格式化短语……\n"
+          f"\n"
+          f"短语数量：{len(phrase_items)}\n"
+          )
+
+    last_phrase_pos = 0
+    phrase_list = []  # (is_new, pinyin, header, phrase))
+
+    if not os.path.exists(LEX_FILE):
+        with open(LEX_FILE, 'wb') as f:
+            # Initing lex file
+            f.write(HEADER_BYTES)
+            f.write((b'\x40' + b'\x00' * 3) * 3)
+            f.write(b'\x00' * 4)
+            f.write(b'\x38\xd2\xa3\x65')
+            f.write(b'\x00' * 32)
+    else:
+        phrase_cnt = bytes2int(read_bytes(PHRASE_CNT_POS, 4))
+        phrase_block_first_pos = PHRASE_LEN_FIRST_POS + 4 * (phrase_cnt - 1)
+
+        # Read existing phrases
+        for i in range(phrase_cnt):
+            if i == phrase_cnt - 1:
+                phrase_block_pos = phrase_block_len = -1
+            else:
+                phrase_block_pos = bytes2int(
+                    read_bytes(PHRASE_LEN_FIRST_POS + i * 4, 4))
+                phrase_block_len = phrase_block_pos - last_phrase_pos
+            phrase_block_bytes = read_bytes(
+                phrase_block_first_pos + last_phrase_pos, phrase_block_len)
+            last_phrase_pos = phrase_block_pos
+            pinyin_bytes, phrase_bytes = re.match(
+                (b'(.+)' + PHRASE_SEPARATOR_BYTES) * 2, phrase_block_bytes[16:]).groups()
+            phrase_fixed_last_bytes = phrase_block_bytes[14:16]
+            # Prevent deleted phrases
+            if phrase_block_bytes[9:10] == b'\x00':
+                phrase_list.append((0, pinyin_bytes,
+                                    phrase_block_bytes[:16], phrase_bytes))
+
+    # Fix custom phrases
+    for item in phrase_items:
+        if not item:
+            continue
+        pinyin, index, phrase = item.split(maxsplit=2)
+        pinyin_bytes = padded_bytes(pinyin)
+        phrase_bytes = padded_bytes(phrase)
+        phrase_list = [x for x in phrase_list if x[0] or not x[1] == pinyin_bytes]
+        header = get_phrase_header(
+            16 + len(pinyin_bytes) + PHRASE_SEPARATOR_SIZE, int(index))
+        phrase_list.append((1, pinyin_bytes, header, phrase_bytes))
+
+    # Necessary fix, otherwise the order of phrases will be messed up.
+    phrase_list.sort(key=lambda x: x[1])
+
+    # Write phrases
+    tolast_phrase_pos = 0
+    total_size = PHRASE_LEN_FIRST_POS
+    with open(LEX_FILE, 'rb+') as file:
+        file.seek(PHRASE_LEN_FIRST_POS)
+        file.truncate()
+        for _, *items in phrase_list[:-1]:
+            phrase_len = sum(map(len, items)) + PHRASE_SEPARATOR_SIZE * 2
+            tolast_phrase_pos += phrase_len
+            file.write(int2bytes(tolast_phrase_pos, length=4))
+            total_size += PHRASE_SEPARATOR_SIZE * 2
+        for _, pinyin_bytes, header, phrase_bytes in phrase_list:
+            file.write(header)
+            data_bytes = PHRASE_SEPARATOR_BYTES.join(
+                [pinyin_bytes, phrase_bytes, b''])
+            file.write(data_bytes)
+            total_size += len(header) + len(data_bytes)
+
+    # Fix file header
+    replace_bytes(PHRASE_64PCNT_POS, int2bytes(
+        64 + len(phrase_list) * 4, length=4))
+    replace_bytes(PHRASE_CNT_POS, int2bytes(len(phrase_list), length=4))
+    replace_bytes(TOTAL_BYTES_POS, int2bytes(total_size, length=4))
+
+
+if __name__ == "__main__":
+    try:
+        main()
+        print('Done')
+    except:
+        traceback.print_exc()
+    os.system('pause')
\ No newline at end of file