From 3001c3b94f2f8ab6abb667279e28177ba0047ba3 Mon Sep 17 00:00:00 2001 From: Evan Miller Date: Mon, 21 Dec 2020 09:11:42 -0500 Subject: [PATCH] Use the language ID from the DB header for indexing According to the HACKING file, the file's default language ID is stored in the database header. Use this value instead of a generic English language locale for indexing JET4 files. Columns can have their own text sorting rules, including language ID distinct from the file's language ID, but this is not addressed as we'd have to break the mdb_index_hash_text function signature, which I'm not prepared to do just yet. There appear to be two bytes after the language ID that may indicate additional sorting flags. These bytes need additional research. --- include/mdbtools.h | 1 + src/libmdb/file.c | 5 +++++ src/libmdb/index.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/mdbtools.h b/include/mdbtools.h index 5b93cfb..171da38 100644 --- a/include/mdbtools.h +++ b/include/mdbtools.h @@ -247,6 +247,7 @@ typedef struct { /* reference count */ int refs; guint16 code_page; + guint16 lang_id; } MdbFile; /* offset to row count on data pages...version dependant */ diff --git a/src/libmdb/file.c b/src/libmdb/file.c index 09e5b70..636fb32 100644 --- a/src/libmdb/file.c +++ b/src/libmdb/file.c @@ -231,6 +231,11 @@ static MdbHandle *mdb_handle_from_stream(FILE *stream, MdbFileFlags flags) { RC4_set_key(&rc4_key, 4, (unsigned char *)&tmp_key); RC4(&rc4_key, mdb->f->jet_version == MDB_VER_JET3 ? 126 : 128, mdb->pg_buf + 0x18); + if (mdb->f->jet_version == MDB_VER_JET3) { + mdb->f->lang_id = mdb_get_int16(mdb->pg_buf, 0x3a); + } else { + mdb->f->lang_id = mdb_get_int16(mdb->pg_buf, 0x6e); + } mdb->f->code_page = mdb_get_int16(mdb->pg_buf, 0x3c); mdb->f->db_key = mdb_get_int32(mdb->pg_buf, 0x3e); if (mdb->f->jet_version == MDB_VER_JET3) { diff --git a/src/libmdb/index.c b/src/libmdb/index.c index 6fa7a5a..1717190 100644 --- a/src/libmdb/index.c +++ b/src/libmdb/index.c @@ -389,7 +389,7 @@ mdb_index_hash_text(MdbHandle *mdb, char *text, char *hash) out_ptr[i*2] = text[i]; out_ptr[i*2+1] = 0; } - if (!(k=DBLCMapStringW(MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT), 0), + if (!(k=DBLCMapStringW(MAKELCID(mdb->f->lang_id, 0), LCMAP_LINGUISTIC_CASING | LCMAP_SORTKEY | NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH, (WCHAR*)out_ptr, len, (LPBYTE)hash, len*2))) {