patch 'ole' from Nirgal Vourgère

This commit is contained in:
Brian Bruns 2011-02-16 18:57:40 -05:00
parent 0eea4c148f
commit fa83ed0af0
4 changed files with 117 additions and 66 deletions

View File

@ -457,6 +457,7 @@ extern int mdb_col_fixed_size(MdbColumn *col);
extern int mdb_col_disp_size(MdbColumn *col); extern int mdb_col_disp_size(MdbColumn *col);
extern size_t mdb_ole_read_next(MdbHandle *mdb, MdbColumn *col, void *ole_ptr); extern size_t mdb_ole_read_next(MdbHandle *mdb, MdbColumn *col, void *ole_ptr);
extern size_t mdb_ole_read(MdbHandle *mdb, MdbColumn *col, void *ole_ptr, int chunk_size); extern size_t mdb_ole_read(MdbHandle *mdb, MdbColumn *col, void *ole_ptr, int chunk_size);
extern void* mdb_ole_read_full(MdbHandle *mdb, MdbColumn *col, size_t *size);
extern void mdb_set_date_fmt(const char *); extern void mdb_set_date_fmt(const char *);
extern int mdb_read_row(MdbTableDef *table, unsigned int row); extern int mdb_read_row(MdbTableDef *table, unsigned int row);

View File

@ -19,6 +19,7 @@
#include "mdbtools.h" #include "mdbtools.h"
#include "time.h" #include "time.h"
#include "math.h"
#ifdef DMALLOC #ifdef DMALLOC
#include "dmalloc.h" #include "dmalloc.h"
@ -346,8 +347,8 @@ int mdb_read_next_dpg(MdbTableDef *table)
/* On rare occasion, mdb_map_find_next will return a wrong page */ /* On rare occasion, mdb_map_find_next will return a wrong page */
/* Found in a big file, over 4,000,000 records */ /* Found in a big file, over 4,000,000 records */
fprintf(stderr, fprintf(stderr,
"warning: page %d from map doesn't match: Type=%d, buf[4..7]=%d Expected table_pg=%d\n", "warning: page %d from map doesn't match: Type=%d, buf[4..7]=%ld Expected table_pg=%ld\n",
next_pg, mdb_get_int32(mdb->pg_buf, 4), entry->table_pg); next_pg, mdb->pg_buf[0], mdb_get_int32(mdb->pg_buf, 4), entry->table_pg);
} }
fprintf(stderr, "Warning: defaulting to brute force read\n"); fprintf(stderr, "Warning: defaulting to brute force read\n");
#endif #endif
@ -466,6 +467,12 @@ int i;
return text; return text;
} }
#endif #endif
/*
* ole_ptr should point to the original blob value of the field.
* If omited, there will be no multi-page check to that the caller is
* responsible for not calling this function. Then, it doesn't have to
* preserve the original value.
*/
size_t size_t
mdb_ole_read_next(MdbHandle *mdb, MdbColumn *col, void *ole_ptr) mdb_ole_read_next(MdbHandle *mdb, MdbColumn *col, void *ole_ptr)
{ {
@ -474,24 +481,30 @@ mdb_ole_read_next(MdbHandle *mdb, MdbColumn *col, void *ole_ptr)
int row_start; int row_start;
size_t len; size_t len;
ole_len = mdb_get_int32(ole_ptr, 0); if (ole_ptr) {
ole_len = mdb_get_int32(ole_ptr, 0);
mdb_debug(MDB_DEBUG_OLE,"ole len = %d ole flags = %02x",
ole_len & 0x00ffffff, ole_len >> 24);
if ((ole_len & 0x80000000) if ((ole_len & 0x80000000)
|| (ole_len & 0x40000000)) { || (ole_len & 0x40000000))
/* inline or single-page fields don't have a next */ /* inline or single-page fields don't have a next */
return 0; return 0;
} else {
if (mdb_find_pg_row(mdb, col->cur_blob_pg_row,
&buf, &row_start, &len)) {
return 0;
}
if (col->bind_ptr)
memcpy(col->bind_ptr, buf + row_start + 4, len - 4);
col->cur_blob_pg_row = mdb_get_int32(buf, row_start);
return len;
} }
return 0; mdb_debug(MDB_DEBUG_OLE, "pg_row %d", col->cur_blob_pg_row);
if (!col->cur_blob_pg_row)
return 0; /* we are done */
if (mdb_find_pg_row(mdb, col->cur_blob_pg_row,
&buf, &row_start, &len)) {
return 0;
}
mdb_debug(MDB_DEBUG_OLE,"start %d len %d", row_start, len);
if (col->bind_ptr)
memcpy(col->bind_ptr, buf + row_start + 4, len - 4);
col->cur_blob_pg_row = mdb_get_int32(buf, row_start);
return len - 4;
} }
size_t size_t
mdb_ole_read(MdbHandle *mdb, MdbColumn *col, void *ole_ptr, int chunk_size) mdb_ole_read(MdbHandle *mdb, MdbColumn *col, void *ole_ptr, int chunk_size)
@ -540,21 +553,59 @@ mdb_ole_read(MdbHandle *mdb, MdbColumn *col, void *ole_ptr, int chunk_size)
return len; return len;
} else if ((ole_len & 0xff000000) == 0) { } else if ((ole_len & 0xff000000) == 0) {
col->cur_blob_pg_row = mdb_get_int32(ole_ptr, 4); col->cur_blob_pg_row = mdb_get_int32(ole_ptr, 4);
mdb_debug(MDB_DEBUG_OLE,"ole row = %d ole pg = %ld",
col->cur_blob_pg_row & 0xff,
col->cur_blob_pg_row >> 8);
if (mdb_find_pg_row(mdb, col->cur_blob_pg_row, if (mdb_find_pg_row(mdb, col->cur_blob_pg_row,
&buf, &row_start, &len)) { &buf, &row_start, &len)) {
return 0; return 0;
} }
mdb_debug(MDB_DEBUG_OLE,"start %d len %d", row_start, len);
if (col->bind_ptr) if (col->bind_ptr)
memcpy(col->bind_ptr, buf + row_start + 4, len - 4); memcpy(col->bind_ptr, buf + row_start + 4, len - 4);
col->cur_blob_pg_row = mdb_get_int32(buf, row_start); col->cur_blob_pg_row = mdb_get_int32(buf, row_start);
mdb_debug(MDB_DEBUG_OLE, "next pg_row %d", col->cur_blob_pg_row);
return len; return len - 4;
} else { } else {
fprintf(stderr,"Unhandled ole field flags = %02x\n", ole_len >> 24); fprintf(stderr,"Unhandled ole field flags = %02x\n", ole_len >> 24);
return 0; return 0;
} }
} }
/*
* mdb_ole_read_full calls mdb_ole_read then loop over mdb_ole_read_next as much as necessary.
* returns the result in a big buffer.
* The call must free it.
* Note that this function is not indempotent: It may be called only once per column after each bind.
*/
void*
mdb_ole_read_full(MdbHandle *mdb, MdbColumn *col, size_t *size)
{
char ole_ptr[MDB_MEMO_OVERHEAD];
char *result = malloc(MDB_BIND_SIZE);
size_t result_buffer_size = MDB_BIND_SIZE;
size_t len, pos;
memcpy(ole_ptr, col->bind_ptr, MDB_MEMO_OVERHEAD);
len = mdb_ole_read(mdb, col, ole_ptr, MDB_BIND_SIZE);
memcpy(result, col->bind_ptr, len);
pos = len;
while ((len = mdb_ole_read_next(mdb, col, ole_ptr))) {
if (pos+len >= result_buffer_size) {
result_buffer_size += MDB_BIND_SIZE;
result = realloc(result, result_buffer_size);
}
memcpy(result + pos, col->bind_ptr, len);
pos += len;
}
if (size)
*size = pos;
return result;
}
#ifdef MDB_COPY_OLE #ifdef MDB_COPY_OLE
static size_t mdb_copy_ole(MdbHandle *mdb, void *dest, int start, int size) static size_t mdb_copy_ole(MdbHandle *mdb, void *dest, int start, int size)
{ {
@ -863,7 +914,6 @@ char *mdb_col_to_string(MdbHandle *mdb, void *buf, int start, int datatype, int
td = mdb_get_double(buf, start); td = mdb_get_double(buf, start);
text = g_strdup_printf("%.16e", td); text = g_strdup_printf("%.16e", td);
break; break;
case MDB_BINARY:
case MDB_TEXT: case MDB_TEXT:
if (size<0) { if (size<0) {
text = g_strdup(""); text = g_strdup("");
@ -877,6 +927,7 @@ char *mdb_col_to_string(MdbHandle *mdb, void *buf, int start, int datatype, int
text = mdb_date_to_string(mdb, start); text = mdb_date_to_string(mdb, start);
break; break;
case MDB_MEMO: case MDB_MEMO:
case MDB_BINARY:
text = mdb_memo_to_string(mdb, start, size); text = mdb_memo_to_string(mdb, start, size);
break; break;
case MDB_MONEY: case MDB_MONEY:

View File

@ -34,34 +34,35 @@ void
print_col(gchar *col_val, int quote_text, int col_type, int bin_length, char *quote_char, char *escape_char) print_col(gchar *col_val, int quote_text, int col_type, int bin_length, char *quote_char, char *escape_char)
{ {
gchar *s; gchar *s;
unsigned char c;
if (quote_text && is_text_type(col_type)) { if (quote_text && is_text_type(col_type)) {
fprintf(stdout,quote_char); fputs(quote_char,stdout);
if (col_type == MDB_OLE || col_type == MDB_BINARY) { if (col_type == MDB_OLE || col_type == MDB_BINARY) {
while (bin_length--) { while (bin_length--) {
unsigned char c = (unsigned char)*col_val++; c = (unsigned char)*col_val++;
if (c>=32 && c<=127) if (strlen(quote_char)==1 && c==quote_char[0] || c==escape_char[0]) {
putc(c, stdout); if (escape_char)
else fputs(escape_char,stdout);
fprintf(stdout, "\\%03o", c); else /* double the quote char if no escape char passed */
fputs(quote_char,stdout);
}
putc(c, stdout);
} }
} }
else else
for (s=col_val;*s;s++) { for (s=col_val;(c=*s);s++) {
if (strlen(quote_char)==1 && *s==quote_char[0]) { if (strlen(quote_char)==1 && c==quote_char[0] || c==escape_char[0]) {
/* double the char if no escape char passed */ if (escape_char)
if (!escape_char) { fputs(escape_char,stdout);
fprintf(stdout,"%s%s",quote_char,quote_char); else /* double the quote char if no escape char passed */
} else { fputs(quote_char,stdout);
fprintf(stdout,"%s%s",escape_char,quote_char);
}
} }
else fprintf(stdout,"%c",*s); putc(c, stdout);
} }
fprintf(stdout,quote_char); fputs(quote_char,stdout);
} else { } else
fprintf(stdout,"%s",col_val); fputs(col_val,stdout);
}
} }
int int
main(int argc, char **argv) main(int argc, char **argv)
@ -82,6 +83,8 @@ main(int argc, char **argv)
char sanitize = 0; char sanitize = 0;
char *namespace = ""; char *namespace = "";
int opt; int opt;
char *value;
size_t length;
while ((opt=getopt(argc, argv, "HQq:X:d:D:R:I:N:S"))!=-1) { while ((opt=getopt(argc, argv, "HQq:X:d:D:R:I:N:S"))!=-1) {
switch (opt) { switch (opt) {
@ -168,6 +171,7 @@ main(int argc, char **argv)
if (insert_dialect) if (insert_dialect)
if (!mdb_set_default_backend(mdb, insert_dialect)) { if (!mdb_set_default_backend(mdb, insert_dialect)) {
fprintf(stderr, "Invalid backend type\n"); fprintf(stderr, "Invalid backend type\n");
if (escape_char) g_free (escape_char);
mdb_exit(); mdb_exit();
exit(1); exit(1);
} }
@ -227,22 +231,26 @@ main(int argc, char **argv)
} }
for (j=0;j<table->num_cols;j++) { for (j=0;j<table->num_cols;j++) {
if (j>0)
fputs(delimiter, stdout);
col=g_ptr_array_index(table->columns,j); col=g_ptr_array_index(table->columns,j);
if ((col->col_type == MDB_OLE)
&& ((j==0) || (col->cur_value_len))) {
mdb_ole_read(mdb, col, bound_values[j], MDB_BIND_SIZE);
}
if (j>0) {
fprintf(stdout,delimiter);
}
if (!bound_lens[j]) { if (!bound_lens[j]) {
print_col(insert_dialect?"NULL":"",0,col->col_type, 0, quote_char, escape_char); if (insert_dialect)
fputs("NULL", stdout);
} else { } else {
print_col(bound_values[j], quote_text, col->col_type, bound_lens[j], quote_char, escape_char); if (col->col_type == MDB_OLE) {
value = mdb_ole_read_full(mdb, col, &length);
} else {
value = bound_values[j];
length = bound_lens[j];
}
print_col(value, quote_text, col->col_type, length, quote_char, escape_char);
if (col->col_type == MDB_OLE)
free(value);
} }
} }
if (insert_dialect) fprintf(stdout,");"); if (insert_dialect) fputs(");", stdout);
fprintf(stdout, row_delimiter); fputs(row_delimiter, stdout);
} }
for (j=0;j<table->num_cols;j++) { for (j=0;j<table->num_cols;j++) {
g_free(bound_values[j]); g_free(bound_values[j]);
@ -258,7 +266,7 @@ main(int argc, char **argv)
mdb_close(mdb); mdb_close(mdb);
mdb_exit(); mdb_exit();
exit(0); return 0;
} }
static char *escapes(char *s) static char *escapes(char *s)

View File

@ -80,21 +80,11 @@ main(int argc, char **argv)
} }
if (found) { if (found) {
MdbColumn *col; MdbColumn *col = g_ptr_array_index(table->columns, col_num - 1);
gchar kkd_ptr[MDB_MEMO_OVERHEAD]; size_t size;
void *kkd_pg = g_malloc(200000); void *kkd = mdb_ole_read_full(mdb, col, &size);
size_t len, pos; dump_kkd(mdb, kkd, size);
memcpy(kkd_ptr, buf, MDB_MEMO_OVERHEAD); free(kkd);
col = g_ptr_array_index(table->columns, col_num - 1);
len = mdb_ole_read(mdb, col, kkd_ptr, MDB_BIND_SIZE);
memcpy(kkd_pg, buf, len);
pos = len;
while ((len = mdb_ole_read_next(mdb, col, kkd_ptr))) {
memcpy(kkd_pg + pos, buf, len);
pos += len;
}
dump_kkd(mdb, kkd_pg, pos);
g_free(kkd_pg);
} }
g_free(buf); g_free(buf);
@ -121,6 +111,7 @@ void dump_kkd(MdbHandle *mdb, void *kkd, size_t len)
#endif #endif
if (strcmp("KKD", kkd)) { if (strcmp("KKD", kkd)) {
fprintf(stderr, "Unrecognized format.\n"); fprintf(stderr, "Unrecognized format.\n");
buffer_dump(kkd, 0, len);
return; return;
} }