Merge pull request #224 from evanmiller/jet3-encodings

Simplify header-reading logic and support JET3 code pages
This commit is contained in:
Evan Miller
2020-12-20 20:12:06 -05:00
committed by GitHub
3 changed files with 59 additions and 30 deletions

View File

@@ -246,6 +246,7 @@ typedef struct {
unsigned char *free_map;
/* reference count */
int refs;
guint16 code_page;
} MdbFile;
/* offset to row count on data pages...version dependant */

View File

@@ -168,9 +168,6 @@ static char *mdb_find_file(const char *file_name)
* Return value: The handle on success, NULL on failure
*/
static MdbHandle *mdb_handle_from_stream(FILE *stream, MdbFileFlags flags) {
int key[] = {0x86, 0xfb, 0xec, 0x37, 0x5d, 0x44, 0x9c, 0xfa, 0xc6, 0x5e, 0x28, 0xe6, 0x13, 0xb6};
int j, pos;
MdbHandle *mdb = (MdbHandle *) g_malloc0(sizeof(MdbHandle));
mdb_set_default_backend(mdb, "access");
mdb_set_date_fmt(mdb, "%x %X");
@@ -216,27 +213,22 @@ static MdbHandle *mdb_handle_from_stream(FILE *stream, MdbFileFlags flags) {
mdb_close(mdb);
return NULL;
}
RC4_KEY rc4_key;
unsigned int tmp_key = 0x6b39dac7;
RC4_set_key(&rc4_key, 4, (unsigned char *)&tmp_key);
RC4(&rc4_key, mdb->f->jet_version == MDB_VER_JET3 ? 126 : 128, mdb->pg_buf + 0x18);
mdb->f->code_page = mdb_get_int16(mdb->pg_buf, 0x3c);
mdb->f->db_key = mdb_get_int32(mdb->pg_buf, 0x3e);
/* I don't know if this value is valid for some versions?
* it doesn't seem to be valid for the databases I have
*
* f->db_key ^= 0xe15e01b9;
*/
mdb->f->db_key ^= 0x4ebc8afb;
/* fprintf(stderr, "Encrypted file, RC4 key seed= %d\n", mdb->f->db_key); */
if (mdb->f->jet_version == MDB_VER_JET3) {
/* JET4 needs additional masking with the DB creation date, currently unsupported */
/* Bug - JET3 supports 20 byte passwords, this is currently just 14 bytes */
memcpy(mdb->f->db_passwd, mdb->pg_buf + 0x42, sizeof(mdb->f->db_passwd));
}
/* write is not supported for encrypted files yet */
mdb->f->writable = mdb->f->writable && !mdb->f->db_key;
/* get the db password located at 0x42 bytes into the file */
for (pos=0;pos<14;pos++) {
j = mdb_get_int32(mdb->pg_buf, 0x42+pos);
j ^= key[pos];
if ( j != 0)
mdb->f->db_passwd[pos] = j;
else
mdb->f->db_passwd[pos] = '\0';
}
mdb_iconv_init(mdb);
return mdb;

View File

@@ -47,7 +47,7 @@ static size_t decompress_unicode(const char *src, size_t slen, char *dst, size_t
}
#if HAVE_ICONV
static size_t decompressed2ascii_with_iconv(MdbHandle *mdb, const char *in_ptr, size_t len_in, char *dest, size_t dlen) {
static size_t decompressed_to_utf8_with_iconv(MdbHandle *mdb, const char *in_ptr, size_t len_in, char *dest, size_t dlen) {
char *out_ptr = dest;
size_t len_out = dlen - 1;
@@ -72,8 +72,27 @@ static size_t decompressed2ascii_with_iconv(MdbHandle *mdb, const char *in_ptr,
return dlen;
}
#else
static size_t decompressed2ascii_without_iconv(MdbHandle *mdb, const char *in_ptr, size_t len_in, char *dest, size_t dlen) {
static size_t latin1_to_utf8_without_iconv(const char *in_ptr, size_t len_in, char *dest, size_t dlen) {
char *out = dest;
size_t i;
for(i=0; i<len_in && out < dest + dlen - 1 - ((unsigned char)in_ptr[i] >> 7); i++) {
unsigned char c = in_ptr[i];
if(c & 0x80) {
*out++ = 0xC0 | (c >> 6);
*out++ = 0x80 | (c & 0x3F);
} else {
*out++ = c;
}
}
*out = '\0';
return out - dest;
}
static size_t decompressed_to_utf8_without_iconv(MdbHandle *mdb, const char *in_ptr, size_t len_in, char *dest, size_t dlen) {
if (IS_JET3(mdb)) {
if (mdb->f->code_page == 1252) {
return latin1_to_utf8_without_iconv(in_ptr, len_in, dest, dlen);
}
int count = 0;
snprintf(dest, dlen, "%.*s%n", (int)len_in, in_ptr, &count);
return count;
@@ -135,9 +154,9 @@ mdb_unicode2ascii(MdbHandle *mdb, const char *src, size_t slen, char *dest, size
}
#if HAVE_ICONV
dlen = decompressed2ascii_with_iconv(mdb, in_ptr, len_in, dest, dlen);
dlen = decompressed_to_utf8_with_iconv(mdb, in_ptr, len_in, dest, dlen);
#else
dlen = decompressed2ascii_without_iconv(mdb, in_ptr, len_in, dest, dlen);
dlen = decompressed_to_utf8_without_iconv(mdb, in_ptr, len_in, dest, dlen);
#endif
if (tmp) g_free(tmp);
@@ -253,13 +272,30 @@ void mdb_iconv_init(MdbHandle *mdb)
mdb->iconv_out = iconv_open("UCS-2LE", iconv_code);
mdb->iconv_in = iconv_open(iconv_code, "UCS-2LE");
} else {
/* According to Microsoft Knowledge Base pages 289525 and */
/* 202427, code page info is not contained in the database */
const char *jet3_iconv_code;
/* check environment variable */
if (!(jet3_iconv_code=getenv("MDB_JET3_CHARSET"))) {
jet3_iconv_code="CP1252";
const char *jet3_iconv_code = getenv("MDB_JET3_CHARSET");
if (!jet3_iconv_code) {
/* Use code page embedded in the database */
/* Note that individual columns can override this value,
* but per-column code pages are not supported by libmdb */
switch (mdb->f->code_page) {
case 874: jet3_iconv_code="WINDOWS-874"; break;
case 932: jet3_iconv_code="SHIFT-JIS"; break;
case 936: jet3_iconv_code="WINDOWS-936"; break;
case 950: jet3_iconv_code="BIG-5"; break;
case 951: jet3_iconv_code="BIG5-HKSCS"; break;
case 1250: jet3_iconv_code="WINDOWS-1250"; break;
case 1251: jet3_iconv_code="WINDOWS-1251"; break;
case 1252: jet3_iconv_code="WINDOWS-1252"; break;
case 1253: jet3_iconv_code="WINDOWS-1253"; break;
case 1254: jet3_iconv_code="WINDOWS-1254"; break;
case 1255: jet3_iconv_code="WINDOWS-1255"; break;
case 1256: jet3_iconv_code="WINDOWS-1256"; break;
case 1257: jet3_iconv_code="WINDOWS-1257"; break;
case 1258: jet3_iconv_code="WINDOWS-1258"; break;
default: jet3_iconv_code="CP1252"; break;
}
}
mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code);