diff --git a/ChangeLog b/ChangeLog index 1277624..8e01b37 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Wed Dec 1 18:18:02 EST 2004 Brian Bruns + * src/libmdb/data.c: + * src/libmdb/iconv.c: + * src/libmdb/index.c: + * src/libmdb/sargs.c: + * src/libmdb/table.c: patch from Wind Li for iconv on jet3 + * src/libmdb/write.c: write out leaf page with added index entry, not quite working yet (bitmap not written correctly and key hash value wrong) but closer. + * src/util/mdb-import.c: temporary kludge for problems caused by using g_strsplit() and friends + + Wed Dec 1 00:33:38 CST 2004 Jeff Smith * src/libmdb/table.c: * src/libmdb/iconv.c: diff --git a/src/libmdb/data.c b/src/libmdb/data.c index e0d2592..9be6205 100644 --- a/src/libmdb/data.c +++ b/src/libmdb/data.c @@ -634,12 +634,7 @@ static char *mdb_memo_to_string(MdbHandle *mdb, int start, int size) pg_row & 0xff, row_start, len); buffer_dump(mdb->pg_buf, row_start, row_start + len); #endif - if (IS_JET3(mdb)) { - strncpy(text, buf + row_start, len); - text[len]='\0'; - } else { - mdb_unicode2ascii(mdb, buf, row_start, len, text, MDB_BIND_SIZE); - } + mdb_unicode2ascii(mdb, buf, row_start, len, text, MDB_BIND_SIZE); return text; } else { /* if (memo_flags == 0x0000) { */ pg_row = mdb_get_int32(mdb->pg_buf, start+4); @@ -742,18 +737,7 @@ char *mdb_col_to_string(MdbHandle *mdb, unsigned char *buf, int start, int datat if (size<0) { return ""; } - if (IS_JET4(mdb)) { -/* - for (i=0;ipg_buf[start+i], mdb->pg_buf[start+i]); - } - fprintf(stdout, "\n"); -*/ - mdb_unicode2ascii(mdb, mdb->pg_buf, start, size, text, MDB_BIND_SIZE); - } else { - strncpy(text, &buf[start], size); - text[size]='\0'; - } + mdb_unicode2ascii(mdb, mdb->pg_buf, start, size, text, MDB_BIND_SIZE); return text; break; case MDB_SDATETIME: diff --git a/src/libmdb/iconv.c b/src/libmdb/iconv.c index 0d1a48f..2dfbfa3 100644 --- a/src/libmdb/iconv.c +++ b/src/libmdb/iconv.c @@ -35,7 +35,8 @@ mdb_unicode2ascii(MdbHandle *mdb, unsigned char *buf, int offset, unsigned int l len_in = len; len_out = dest_sz; - if (in_ptr[0]==0xff && in_ptr[1]==0xfe) { + + if (buf[offset]==0xff && buf[offset+1]==0xfe) { len_in -= 2; in_ptr += 2; ret = iconv(mdb->iconv_compress, (char **)&in_ptr, &len_in, (char **)&out_ptr, &len_out); @@ -111,16 +112,23 @@ void mdb_iconv_init(MdbHandle *mdb) iconv_code="UTF-8"; } + #ifdef HAVE_ICONV if (IS_JET4(mdb)) { mdb->iconv_out = iconv_open("UCS-2LE", iconv_code); mdb->iconv_in = iconv_open(iconv_code, "UCS-2LE"); mdb->iconv_compress = iconv_open(iconv_code, "ISO8859-1"); } else { - /* XXX - need to determine character set from file */ - mdb->iconv_out = iconv_open("ISO8859-1", iconv_code); - mdb->iconv_in = iconv_open(iconv_code, "ISO8859-1"); - mdb->iconv_compress = (iconv_t)-1; + /* ToDO - need to determine character set from file */ + /* But according to MS kb289525 and kb202427, there is not such info in jet3 db */ + char *jet3_iconv_code; + + /* check environment variable */ + if (!(jet3_iconv_code=(char *)getenv("MDB_JET3_CHARSET"))) { + jet3_iconv_code="ISO8859-1"; + } + mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code); + mdb->iconv_in = iconv_open(iconv_code, jet3_iconv_code); } #endif } @@ -129,6 +137,8 @@ void mdb_iconv_close(MdbHandle *mdb) #ifdef HAVE_ICONV if (mdb->iconv_out != (iconv_t)-1) iconv_close(mdb->iconv_out); if (mdb->iconv_in != (iconv_t)-1) iconv_close(mdb->iconv_in); - if (mdb->iconv_compress != (iconv_t)-1) iconv_close(mdb->iconv_compress); + if (IS_JET4(mdb)) { + if (mdb->iconv_compress != (iconv_t)-1) iconv_close(mdb->iconv_compress); + } #endif } diff --git a/src/libmdb/index.c b/src/libmdb/index.c index 6eb3cf2..5009352 100644 --- a/src/libmdb/index.c +++ b/src/libmdb/index.c @@ -104,18 +104,15 @@ mdb_read_indices(MdbTableDef *table) if (IS_JET4(mdb)) { name_sz=read_pg_if_16(mdb, &cur_pos); cur_pos += 2; - tmpbuf = g_malloc(name_sz); - read_pg_if_n(mdb, tmpbuf, &cur_pos, name_sz); - cur_pos += name_sz; - mdb_unicode2ascii(mdb, tmpbuf, 0, name_sz, pidx->name, name_sz); - g_free(tmpbuf); } else { read_pg_if(mdb, &cur_pos, 0); name_sz=mdb->pg_buf[cur_pos++]; - read_pg_if_n(mdb, pidx->name, &cur_pos, name_sz); - cur_pos += name_sz; - pidx->name[name_sz]='\0'; } + tmpbuf = g_malloc(name_sz); + read_pg_if_n(mdb, tmpbuf, &cur_pos, name_sz); + cur_pos += name_sz; + mdb_unicode2ascii(mdb, tmpbuf, 0, name_sz, pidx->name, name_sz); + g_free(tmpbuf); //fprintf(stderr, "index name %s\n", pidx->name); } @@ -180,6 +177,9 @@ mdb_index_hash_text(guchar *text, guchar *hash) } hash[strlen(text)]=0; } +/* + * reverse the order of the column for hashing + */ void mdb_index_swap_n(unsigned char *src, int sz, unsigned char *dest) { @@ -321,6 +321,7 @@ mdb_index_pack_bitmap(MdbHandle *mdb, MdbIndexPage *ipg) start = ipg->idx_starts[elem++]; while (start) { + fprintf(stdout, "elem %d is %d\n", elem, ipg->idx_starts[elem]); len = ipg->idx_starts[elem] - start; fprintf(stdout, "len is %d\n", len); for (i=0; i < len; i++) { diff --git a/src/libmdb/sargs.c b/src/libmdb/sargs.c index 610c6c7..fa9f338 100644 --- a/src/libmdb/sargs.c +++ b/src/libmdb/sargs.c @@ -154,13 +154,7 @@ int lastchar; return mdb_test_int(node, (gint32)mdb_get_int32(field->value, 0)); break; case MDB_TEXT: - if (IS_JET4(mdb)) { - mdb_unicode2ascii(mdb, field->value, 0, field->siz, tmpbuf, 256); - } else { - strncpy(tmpbuf, field->value, 255); - lastchar = field->siz > 255 ? 255 : field->siz; - tmpbuf[lastchar]='\0'; - } + mdb_unicode2ascii(mdb, field->value, 0, field->siz, tmpbuf, 256); return mdb_test_string(node, tmpbuf); default: fprintf(stderr, "Calling mdb_test_sarg on unknown type. Add code to mdb_test_sarg() for type %d\n",col->col_type); diff --git a/src/libmdb/table.c b/src/libmdb/table.c index 478ce5e..6de3aec 100644 --- a/src/libmdb/table.c +++ b/src/libmdb/table.c @@ -271,25 +271,24 @@ GPtrArray *mdb_read_columns(MdbTableDef *table) for (i=0;inum_cols;i++) { pcol = g_ptr_array_index(table->columns, i); + char *tmp_buf; if (IS_JET4(mdb)) { - char *tmp_buf; name_sz = read_pg_if_16(mdb, &cur_pos); cur_pos += 2; - tmp_buf = (char *) g_malloc(name_sz); - read_pg_if_n(mdb, tmp_buf, &cur_pos, name_sz); - mdb_unicode2ascii(mdb, tmp_buf, 0, name_sz, pcol->name, name_sz); - g_free(tmp_buf); - cur_pos += name_sz; } else if (IS_JET3(mdb)) { read_pg_if(mdb, &cur_pos, 0); name_sz = mdb->pg_buf[cur_pos]; cur_pos++; - read_pg_if_n(mdb, pcol->name, &cur_pos, name_sz); - pcol->name[name_sz]='\0'; - cur_pos += name_sz; } else { fprintf(stderr,"Unknown MDB version\n"); + continue; } + tmp_buf = (char *) g_malloc(name_sz); + read_pg_if_n(mdb, tmp_buf, &cur_pos, name_sz); + mdb_unicode2ascii(mdb, tmp_buf, 0, name_sz, pcol->name, name_sz); + g_free(tmp_buf); + cur_pos += name_sz; + } /* Sort the columns by col_num */ diff --git a/src/libmdb/write.c b/src/libmdb/write.c index 1cff96a..45e3a7a 100644 --- a/src/libmdb/write.c +++ b/src/libmdb/write.c @@ -27,7 +27,7 @@ //static int mdb_copy_index_pg(MdbTableDef *table, MdbIndex *idx, MdbIndexPage *ipg); -static int mdb_add_row_to_leaf_pg(MdbTableDef *table, MdbIndex *idx, MdbIndexPage *ipg, MdbField *idx_fields); +static int mdb_add_row_to_leaf_pg(MdbTableDef *table, MdbIndex *idx, MdbIndexPage *ipg, MdbField *idx_fields, guint32 pgnum, guint16 rownum); void _mdb_put_int16(unsigned char *buf, guint32 offset, guint32 value) @@ -47,6 +47,24 @@ _mdb_put_int32(unsigned char *buf, guint32 offset, guint32 value) value /= 256; buf[offset+3] = value % 256; } +void +_mdb_put_int24(unsigned char *buf, guint32 offset, guint32 value) +{ + buf[offset] = value % 256; + value /= 256; + buf[offset+1] = value % 256; + value /= 256; + buf[offset+2] = value % 256; +} +void +_mdb_put_int24_msb(unsigned char *buf, guint32 offset, guint32 value) +{ + buf[offset+2] = value % 256; + value /= 256; + buf[offset+1] = value % 256; + value /= 256; + buf[offset] = value % 256; +} ssize_t mdb_write_pg(MdbHandle *mdb, unsigned long pg) { @@ -572,7 +590,7 @@ mdb_update_index(MdbTableDef *table, MdbIndex *idx, unsigned int num_fields, Mdb printf("pg = %" G_GUINT32_FORMAT "\n", chain->pages[chain->cur_depth-1].pg); //mdb_copy_index_pg(table, idx, &chain->pages[chain->cur_depth-1]); - mdb_add_row_to_leaf_pg(table, idx, &chain->pages[chain->cur_depth-1], idx_fields); + mdb_add_row_to_leaf_pg(table, idx, &chain->pages[chain->cur_depth-1], idx_fields, pgnum, rownum); return 1; } @@ -814,7 +832,7 @@ int i, pos; return 0; } static int -mdb_add_row_to_leaf_pg(MdbTableDef *table, MdbIndex *idx, MdbIndexPage *ipg, MdbField *idx_fields) +mdb_add_row_to_leaf_pg(MdbTableDef *table, MdbIndex *idx, MdbIndexPage *ipg, MdbField *idx_fields, guint32 pgnum, guint16 rownum) /*, guint32 pgnum, guint16 rownum) static int mdb_copy_index_pg(MdbTableDef *table, MdbIndex *idx, MdbIndexPage *ipg) @@ -829,6 +847,7 @@ mdb_copy_index_pg(MdbTableDef *table, MdbIndex *idx, MdbIndexPage *ipg) unsigned char key_hash[256]; unsigned char iflag; int keycol; + int i; new_pg = mdb_new_leaf_pg(entry); @@ -860,18 +879,42 @@ mdb_copy_index_pg(MdbTableDef *table, MdbIndex *idx, MdbIndexPage *ipg) pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4); row = mdb->pg_buf[ipg->offset + ipg->len - 1]; iflag = mdb->pg_buf[ipg->offset]; + + /* turn the key hash back into a value */ mdb_index_swap_n(&mdb->pg_buf[ipg->offset + 1], col->col_size, key_hash); key_hash[col->col_size - 1] &= 0x7f; + printf("length = %d\n", ipg->len); + printf("offset = %d\n", ipg->offset); printf("iflag = %d pg = %" G_GUINT32_FORMAT " row = %" G_GUINT16_FORMAT "\n", iflag, pg, row); buffer_dump(mdb->pg_buf, ipg->offset, ipg->offset + ipg->len - 1); buffer_dump(mdb->pg_buf, ipg->offset + 1, ipg->offset + col->col_size); buffer_dump(key_hash, 0, col->col_size - 1); + + memcpy(&new_pg[ipg->offset], &mdb->pg_buf[ipg->offset], ipg->len); ipg->offset += ipg->len; ipg->len = 0; + row++; } + //_mdb_put_int16(new_pg, mdb->fmt->row_count_offset, row); + /* free space left */ + _mdb_put_int16(new_pg, 2, mdb->fmt->pg_size - ipg->offset); + printf("offset = %d\n", ipg->offset); + + mdb_index_swap_n(idx_fields[0].value, col->col_size, key_hash); + new_pg[ipg->offset] = 0x7f; + memcpy(&new_pg[ipg->offset + 1], key_hash, col->col_size); + _mdb_put_int24_msb(new_pg, ipg->offset + 5, pgnum); + new_pg[ipg->offset + 8] = rownum; + printf("row %d offset %d\n", row, ipg->offset); + ipg->idx_starts[row++] = ipg->offset + ipg->len; + //ipg->idx_starts[row] = ipg->offset + ipg->len; + buffer_dump(mdb->pg_buf, 0, mdb->fmt->pg_size-1); + memcpy(mdb->pg_buf, new_pg, mdb->fmt->pg_size); + mdb_index_pack_bitmap(mdb, ipg); + buffer_dump(mdb->pg_buf, 0, mdb->fmt->pg_size-1); g_free(new_pg); return ipg->len; diff --git a/src/util/mdb-import.c b/src/util/mdb-import.c index 3362724..db413b8 100644 --- a/src/util/mdb-import.c +++ b/src/util/mdb-import.c @@ -116,6 +116,7 @@ prep_row(MdbTableDef *table, unsigned char *line, MdbField *fields, char *delim) g_strdelimit(line, delim, '\n'); sarray = g_strsplit (line, "\n", 0); for (i=0; (s = sarray[i]); i++) { + if (!strlen(s)) continue; if (i >= table->num_cols) { fprintf(stderr, "Number of columns in file exceeds number in table.\n"); g_strfreev(sarray); @@ -143,7 +144,7 @@ prep_row(MdbTableDef *table, unsigned char *line, MdbField *fields, char *delim) free_values(fields, i); return 0; } - return i; + return i-1; } int main(int argc, char **argv)