Use the language ID from the DB header for indexing

According to the HACKING file, the file's default language ID is stored
in the database header. Use this value instead of a generic English
language locale for indexing JET4 files.

Columns can have their own text sorting rules, including language ID
distinct from the file's language ID, but this is not addressed as we'd
have to break the mdb_index_hash_text function signature, which I'm not
prepared to do just yet.

There appear to be two bytes after the language ID that may indicate
additional sorting flags. These bytes need additional research.
This commit is contained in:
Evan Miller
2020-12-21 09:11:42 -05:00
parent ec994b6f43
commit 3001c3b94f
3 changed files with 7 additions and 1 deletions

View File

@@ -247,6 +247,7 @@ typedef struct {
/* reference count */ /* reference count */
int refs; int refs;
guint16 code_page; guint16 code_page;
guint16 lang_id;
} MdbFile; } MdbFile;
/* offset to row count on data pages...version dependant */ /* offset to row count on data pages...version dependant */

View File

@@ -231,6 +231,11 @@ static MdbHandle *mdb_handle_from_stream(FILE *stream, MdbFileFlags flags) {
RC4_set_key(&rc4_key, 4, (unsigned char *)&tmp_key); RC4_set_key(&rc4_key, 4, (unsigned char *)&tmp_key);
RC4(&rc4_key, mdb->f->jet_version == MDB_VER_JET3 ? 126 : 128, mdb->pg_buf + 0x18); RC4(&rc4_key, mdb->f->jet_version == MDB_VER_JET3 ? 126 : 128, mdb->pg_buf + 0x18);
if (mdb->f->jet_version == MDB_VER_JET3) {
mdb->f->lang_id = mdb_get_int16(mdb->pg_buf, 0x3a);
} else {
mdb->f->lang_id = mdb_get_int16(mdb->pg_buf, 0x6e);
}
mdb->f->code_page = mdb_get_int16(mdb->pg_buf, 0x3c); mdb->f->code_page = mdb_get_int16(mdb->pg_buf, 0x3c);
mdb->f->db_key = mdb_get_int32(mdb->pg_buf, 0x3e); mdb->f->db_key = mdb_get_int32(mdb->pg_buf, 0x3e);
if (mdb->f->jet_version == MDB_VER_JET3) { if (mdb->f->jet_version == MDB_VER_JET3) {

View File

@@ -389,7 +389,7 @@ mdb_index_hash_text(MdbHandle *mdb, char *text, char *hash)
out_ptr[i*2] = text[i]; out_ptr[i*2] = text[i];
out_ptr[i*2+1] = 0; out_ptr[i*2+1] = 0;
} }
if (!(k=DBLCMapStringW(MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_DEFAULT), 0), if (!(k=DBLCMapStringW(MAKELCID(mdb->f->lang_id, 0),
LCMAP_LINGUISTIC_CASING | LCMAP_SORTKEY | NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH, LCMAP_LINGUISTIC_CASING | LCMAP_SORTKEY | NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH,
(WCHAR*)out_ptr, len, (LPBYTE)hash, len*2))) (WCHAR*)out_ptr, len, (LPBYTE)hash, len*2)))
{ {