diff --git a/include/mdbtools.h b/include/mdbtools.h index 8c19783..5b93cfb 100644 --- a/include/mdbtools.h +++ b/include/mdbtools.h @@ -246,6 +246,7 @@ typedef struct { unsigned char *free_map; /* reference count */ int refs; + guint16 code_page; } MdbFile; /* offset to row count on data pages...version dependant */ diff --git a/src/libmdb/file.c b/src/libmdb/file.c index c60c703..28b536f 100644 --- a/src/libmdb/file.c +++ b/src/libmdb/file.c @@ -168,9 +168,6 @@ static char *mdb_find_file(const char *file_name) * Return value: The handle on success, NULL on failure */ static MdbHandle *mdb_handle_from_stream(FILE *stream, MdbFileFlags flags) { - int key[] = {0x86, 0xfb, 0xec, 0x37, 0x5d, 0x44, 0x9c, 0xfa, 0xc6, 0x5e, 0x28, 0xe6, 0x13, 0xb6}; - int j, pos; - MdbHandle *mdb = (MdbHandle *) g_malloc0(sizeof(MdbHandle)); mdb_set_default_backend(mdb, "access"); mdb_set_date_fmt(mdb, "%x %X"); @@ -216,27 +213,22 @@ static MdbHandle *mdb_handle_from_stream(FILE *stream, MdbFileFlags flags) { mdb_close(mdb); return NULL; } + + RC4_KEY rc4_key; + unsigned int tmp_key = 0x6b39dac7; + RC4_set_key(&rc4_key, 4, (unsigned char *)&tmp_key); + RC4(&rc4_key, mdb->f->jet_version == MDB_VER_JET3 ? 126 : 128, mdb->pg_buf + 0x18); + + mdb->f->code_page = mdb_get_int16(mdb->pg_buf, 0x3c); mdb->f->db_key = mdb_get_int32(mdb->pg_buf, 0x3e); - /* I don't know if this value is valid for some versions? - * it doesn't seem to be valid for the databases I have - * - * f->db_key ^= 0xe15e01b9; - */ - mdb->f->db_key ^= 0x4ebc8afb; - /* fprintf(stderr, "Encrypted file, RC4 key seed= %d\n", mdb->f->db_key); */ + if (mdb->f->jet_version == MDB_VER_JET3) { + /* JET4 needs additional masking with the DB creation date, currently unsupported */ + /* Bug - JET3 supports 20 byte passwords, this is currently just 14 bytes */ + memcpy(mdb->f->db_passwd, mdb->pg_buf + 0x42, sizeof(mdb->f->db_passwd)); + } /* write is not supported for encrypted files yet */ mdb->f->writable = mdb->f->writable && !mdb->f->db_key; - /* get the db password located at 0x42 bytes into the file */ - for (pos=0;pos<14;pos++) { - j = mdb_get_int32(mdb->pg_buf, 0x42+pos); - j ^= key[pos]; - if ( j != 0) - mdb->f->db_passwd[pos] = j; - else - mdb->f->db_passwd[pos] = '\0'; - } - mdb_iconv_init(mdb); return mdb; diff --git a/src/libmdb/iconv.c b/src/libmdb/iconv.c index 87264f1..a779245 100644 --- a/src/libmdb/iconv.c +++ b/src/libmdb/iconv.c @@ -47,7 +47,7 @@ static size_t decompress_unicode(const char *src, size_t slen, char *dst, size_t } #if HAVE_ICONV -static size_t decompressed2ascii_with_iconv(MdbHandle *mdb, const char *in_ptr, size_t len_in, char *dest, size_t dlen) { +static size_t decompressed_to_utf8_with_iconv(MdbHandle *mdb, const char *in_ptr, size_t len_in, char *dest, size_t dlen) { char *out_ptr = dest; size_t len_out = dlen - 1; @@ -72,8 +72,27 @@ static size_t decompressed2ascii_with_iconv(MdbHandle *mdb, const char *in_ptr, return dlen; } #else -static size_t decompressed2ascii_without_iconv(MdbHandle *mdb, const char *in_ptr, size_t len_in, char *dest, size_t dlen) { +static size_t latin1_to_utf8_without_iconv(const char *in_ptr, size_t len_in, char *dest, size_t dlen) { + char *out = dest; + size_t i; + for(i=0; i> 7); i++) { + unsigned char c = in_ptr[i]; + if(c & 0x80) { + *out++ = 0xC0 | (c >> 6); + *out++ = 0x80 | (c & 0x3F); + } else { + *out++ = c; + } + } + *out = '\0'; + return out - dest; +} + +static size_t decompressed_to_utf8_without_iconv(MdbHandle *mdb, const char *in_ptr, size_t len_in, char *dest, size_t dlen) { if (IS_JET3(mdb)) { + if (mdb->f->code_page == 1252) { + return latin1_to_utf8_without_iconv(in_ptr, len_in, dest, dlen); + } int count = 0; snprintf(dest, dlen, "%.*s%n", (int)len_in, in_ptr, &count); return count; @@ -135,9 +154,9 @@ mdb_unicode2ascii(MdbHandle *mdb, const char *src, size_t slen, char *dest, size } #if HAVE_ICONV - dlen = decompressed2ascii_with_iconv(mdb, in_ptr, len_in, dest, dlen); + dlen = decompressed_to_utf8_with_iconv(mdb, in_ptr, len_in, dest, dlen); #else - dlen = decompressed2ascii_without_iconv(mdb, in_ptr, len_in, dest, dlen); + dlen = decompressed_to_utf8_without_iconv(mdb, in_ptr, len_in, dest, dlen); #endif if (tmp) g_free(tmp); @@ -253,13 +272,30 @@ void mdb_iconv_init(MdbHandle *mdb) mdb->iconv_out = iconv_open("UCS-2LE", iconv_code); mdb->iconv_in = iconv_open(iconv_code, "UCS-2LE"); } else { - /* According to Microsoft Knowledge Base pages 289525 and */ - /* 202427, code page info is not contained in the database */ - const char *jet3_iconv_code; - /* check environment variable */ - if (!(jet3_iconv_code=getenv("MDB_JET3_CHARSET"))) { - jet3_iconv_code="CP1252"; + const char *jet3_iconv_code = getenv("MDB_JET3_CHARSET"); + + if (!jet3_iconv_code) { + /* Use code page embedded in the database */ + /* Note that individual columns can override this value, + * but per-column code pages are not supported by libmdb */ + switch (mdb->f->code_page) { + case 874: jet3_iconv_code="WINDOWS-874"; break; + case 932: jet3_iconv_code="SHIFT-JIS"; break; + case 936: jet3_iconv_code="WINDOWS-936"; break; + case 950: jet3_iconv_code="BIG-5"; break; + case 951: jet3_iconv_code="BIG5-HKSCS"; break; + case 1250: jet3_iconv_code="WINDOWS-1250"; break; + case 1251: jet3_iconv_code="WINDOWS-1251"; break; + case 1252: jet3_iconv_code="WINDOWS-1252"; break; + case 1253: jet3_iconv_code="WINDOWS-1253"; break; + case 1254: jet3_iconv_code="WINDOWS-1254"; break; + case 1255: jet3_iconv_code="WINDOWS-1255"; break; + case 1256: jet3_iconv_code="WINDOWS-1256"; break; + case 1257: jet3_iconv_code="WINDOWS-1257"; break; + case 1258: jet3_iconv_code="WINDOWS-1258"; break; + default: jet3_iconv_code="CP1252"; break; + } } mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code);