support for compressed index keys and trailing leaves

This commit is contained in:
brianb 2004-02-11 22:05:13 +00:00
parent 06115eaf59
commit a62f26df50
5 changed files with 139 additions and 44 deletions

View File

@ -525,13 +525,18 @@ mdb_read_next_dpg(MdbTableDef *table)
MdbCatalogEntry *entry = table->entry; MdbCatalogEntry *entry = table->entry;
MdbHandle *mdb = entry->mdb; MdbHandle *mdb = entry->mdb;
int map_type; int map_type;
guint32 pg;
#ifndef SLOW_READ #ifndef SLOW_READ
map_type = table->usage_map[0]; map_type = table->usage_map[0];
if (map_type==0) { if (map_type==0) {
return mdb_read_next_dpg_by_map0(table); pg = mdb_read_next_dpg_by_map0(table);
//printf("Next dpg = %lu\n", pg);
return pg;
} else if (map_type==1) { } else if (map_type==1) {
return mdb_read_next_dpg_by_map1(table); pg = mdb_read_next_dpg_by_map1(table);
//printf("Next dpg = %lu\n", pg);
return pg;
} else { } else {
fprintf(stderr,"Warning: unrecognized usage map type: %d, defaulting to brute force read\n",table->usage_map[0]); fprintf(stderr,"Warning: unrecognized usage map type: %d, defaulting to brute force read\n",table->usage_map[0]);
} }
@ -541,7 +546,7 @@ int map_type;
if (!mdb_read_pg(mdb, table->cur_phys_pg++)) if (!mdb_read_pg(mdb, table->cur_phys_pg++))
return 0; return 0;
} while (mdb->pg_buf[0]!=0x01 || mdb_pg_get_int32(mdb, 4)!=entry->table_pg); } while (mdb->pg_buf[0]!=0x01 || mdb_pg_get_int32(mdb, 4)!=entry->table_pg);
/* fprintf(stderr,"returning new page %ld\n", table->cur_phys_pg); */ /* fprintf(stderr,"returning new page %ld\n", table->cur_phys_pg); */
return table->cur_phys_pg; return table->cur_phys_pg;
} }
int mdb_rewind_table(MdbTableDef *table) int mdb_rewind_table(MdbTableDef *table)

View File

@ -23,6 +23,25 @@
#include "dmalloc.h" #include "dmalloc.h"
#endif #endif
/*
typedef struct {
int pg_size;
guint16 row_count_offset;
guint16 tab_num_rows_offset;
guint16 tab_num_cols_offset;
guint16 tab_num_idxs_offset;
guint16 tab_num_ridxs_offset;
guint16 tab_usage_map_offset;
guint16 tab_first_dpg_offset;
guint16 tab_cols_start_offset;
guint16 tab_ridx_entry_size;
guint16 col_fixed_offset;
guint16 col_size_offset;
guint16 col_num_offset;
guint16 tab_col_entry_size;
guint16 tab_free_map_offset;
} MdbFormatConstants;
*/
MdbFormatConstants MdbJet4Constants = { MdbFormatConstants MdbJet4Constants = {
4096, 0x0c, 16, 45, 47, 51, 55, 56, 63, 12, 15, 23, 5, 25, 59 4096, 0x0c, 16, 45, 47, 51, 55, 56, 63, 12, 15, 23, 5, 25, 59
}; };

View File

@ -212,7 +212,7 @@ mdb_index_cache_sarg(MdbColumn *col, MdbSarg *sarg, MdbSarg *idx_sarg)
//cache_int = sarg->value.i * -1; //cache_int = sarg->value.i * -1;
c = (unsigned char *) &(idx_sarg->value.i); c = (unsigned char *) &(idx_sarg->value.i);
c[0] |= 0x80; c[0] |= 0x80;
printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]); //printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]);
break; break;
case MDB_INT: case MDB_INT:
@ -252,7 +252,7 @@ int lastchar;
} }
#endif #endif
int int
mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, int offset, int len) mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, unsigned char *buf, int len)
{ {
int i, j; int i, j;
MdbColumn *col; MdbColumn *col;
@ -265,16 +265,17 @@ mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, int offset, int len)
//fprintf(stderr,"mdb_index_test_sargs called on "); //fprintf(stderr,"mdb_index_test_sargs called on ");
//for (i=0;i<len;i++) //for (i=0;i<len;i++)
//fprintf(stderr,"%02x ",mdb->pg_buf[offset+i]); //fprintf(stderr,"%02x ",buf[i]); //mdb->pg_buf[offset+i]);
//fprintf(stderr,"\n"); //fprintf(stderr,"\n");
for (i=0;i<idx->num_keys;i++) { for (i=0;i<idx->num_keys;i++) {
c_offset++; /* the per column null indicator/flags */ //c_offset++; /* the per column null indicator/flags */
col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1);
/* /*
* This will go away eventually * This will go away eventually
*/ */
if (col->col_type==MDB_TEXT) { if (col->col_type==MDB_TEXT) {
c_len = strlen(&mdb->pg_buf[offset + c_offset]); //c_len = strlen(&mdb->pg_buf[offset + c_offset]);
c_len = strlen(buf);
} else { } else {
c_len = col->col_size; c_len = col->col_size;
//fprintf(stderr,"Only text types currently supported. How did we get here?\n"); //fprintf(stderr,"Only text types currently supported. How did we get here?\n");
@ -299,7 +300,8 @@ mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, int offset, int len)
/* XXX - kludge */ /* XXX - kludge */
node.op = sarg->op; node.op = sarg->op;
node.value = sarg->value; node.value = sarg->value;
field.value = &mdb->pg_buf[offset + c_offset]; //field.value = &mdb->pg_buf[offset + c_offset];
field.value = buf;
field.siz = c_len; field.siz = c_len;
field.is_null = FALSE; field.is_null = FALSE;
if (!mdb_test_sarg(mdb, col, &node, &field)) { if (!mdb_test_sarg(mdb, col, &node, &field)) {
@ -364,8 +366,13 @@ mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
* we are simply done. (there is no page to find * we are simply done. (there is no page to find
*/ */
if (mdb->pg_buf[0]==MDB_PAGE_LEAF) if (mdb->pg_buf[0]==MDB_PAGE_LEAF) {
/* Indexes can have leaves at the end that don't appear
* in the upper tree, stash the last index found so
* we can follow it at the end. */
chain->last_leaf_found = ipg->pg;
return ipg; return ipg;
}
/* /*
* apply sargs here, currently we don't * apply sargs here, currently we don't
@ -387,7 +394,7 @@ mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
*/ */
newipg = mdb_chain_add_page(mdb, chain, pg); newipg = mdb_chain_add_page(mdb, chain, pg);
newipg = mdb_find_next_leaf(mdb, idx, chain); newipg = mdb_find_next_leaf(mdb, idx, chain);
printf("returning pg %lu\n",newipg->pg); //printf("returning pg %lu\n",newipg->pg);
return newipg; return newipg;
} while (!passed); } while (!passed);
/* no more pages */ /* no more pages */
@ -438,6 +445,35 @@ mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
return ipg; return ipg;
} }
/*
* unwind the stack and search for new leaf node
*/
MdbIndexPage *
mdb_index_unwind(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain)
{
MdbIndexPage *ipg;
//printf("page %lu finished\n",ipg->pg);
if (chain->cur_depth==1) {
//printf("cur_depth == 1 we're out\n");
return NULL;
}
/*
* unwind the stack until we find something or reach
* the top.
*/
ipg = NULL;
while (chain->cur_depth>1 && ipg==NULL) {
//printf("chain depth %d\n", chain->cur_depth);
chain->cur_depth--;
ipg = mdb_find_next_leaf(mdb, idx, chain);
if (ipg) mdb_index_find_next_on_page(mdb, ipg);
}
if (chain->cur_depth==1) {
//printf("last leaf %lu\n", chain->last_leaf_found);
return NULL;
}
}
/* /*
* the main index function. * the main index function.
* caller provides an index chain which is the current traversal of index * caller provides an index chain which is the current traversal of index
@ -455,7 +491,8 @@ mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32
{ {
MdbIndexPage *ipg; MdbIndexPage *ipg;
int passed = 0; int passed = 0;
int idx_sz;
int idx_start = 0;
ipg = mdb_index_read_bottom_pg(mdb, idx, chain); ipg = mdb_index_read_bottom_pg(mdb, idx, chain);
@ -468,30 +505,44 @@ mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32
* if no more rows on this leaf, try to find a new leaf * if no more rows on this leaf, try to find a new leaf
*/ */
if (!mdb_index_find_next_on_page(mdb, ipg)) { if (!mdb_index_find_next_on_page(mdb, ipg)) {
//printf("page %lu finished\n",ipg->pg); if (!chain->clean_up_mode) {
if (chain->cur_depth==1) { if (!(ipg = mdb_index_unwind(mdb, idx, chain)))
//printf("cur_depth == 1 we're out\n"); chain->clean_up_mode = 1;
return 0;
} }
/* if (chain->clean_up_mode) {
* unwind the stack until we find something or reach //printf("in cleanup mode\n");
* the top.
*/ if (!chain->last_leaf_found) return 0;
ipg = 0; mdb_read_pg(mdb, chain->last_leaf_found);
while (chain->cur_depth>1 && ipg==0) { chain->last_leaf_found = mdb_pg_get_int24(mdb, 0x0c);
//printf("chain depth %d\n", chain->cur_depth); //printf("next leaf %lu\n", chain->last_leaf_found);
chain->cur_depth--; mdb_read_pg(mdb, chain->last_leaf_found);
ipg = mdb_find_next_leaf(mdb, idx, chain); /* reuse the chain for cleanup mode */
if (ipg) mdb_index_find_next_on_page(mdb, ipg); chain->cur_depth = 1;
ipg = &chain->pages[0];
mdb_index_page_init(ipg);
ipg->pg = chain->last_leaf_found;
//printf("next on page %d\n",
if (!mdb_index_find_next_on_page(mdb, ipg))
return 0;
} }
if (chain->cur_depth==1)
return 0;
} }
*row = mdb->pg_buf[ipg->offset + ipg->len - 1]; *row = mdb->pg_buf[ipg->offset + ipg->len - 1];
*pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4); *pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4);
//printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len); //printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len);
idx_sz = 4;
if (ipg->len - 4 < idx_sz) {
//printf("short index found\n");
//buffer_dump(ipg->cache_value, 0, idx_sz);
memcpy(&ipg->cache_value[idx_sz - (ipg->len - 4)], &mdb->pg_buf[ipg->offset], ipg->len);
//buffer_dump(ipg->cache_value, 0, idx_sz);
} else {
idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
memcpy(ipg->cache_value, &mdb->pg_buf[idx_start], idx_sz);
}
passed = mdb_index_test_sargs(mdb, idx, ipg->offset, ipg->len); //idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
passed = mdb_index_test_sargs(mdb, idx, ipg->cache_value, idx_sz);
ipg->offset += ipg->len; ipg->offset += ipg->len;
} while (!passed); } while (!passed);

View File

@ -44,13 +44,15 @@ unsigned char mdb_col_needs_size(int col_type)
} }
} }
MdbTableDef *mdb_read_table(MdbCatalogEntry *entry) MdbTableDef *
mdb_read_table(MdbCatalogEntry *entry)
{ {
MdbTableDef *table; MdbTableDef *table;
MdbHandle *mdb = entry->mdb; MdbHandle *mdb = entry->mdb;
MdbFormatConstants *fmt = mdb->fmt; MdbFormatConstants *fmt = mdb->fmt;
int len; int len;
int rownum, row_start, row_end; int rownum, row_start, row_end;
guint32 pg;
table = mdb_alloc_tabledef(entry); table = mdb_alloc_tabledef(entry);
@ -66,7 +68,8 @@ int rownum, row_start, row_end;
/* grab a copy of the usage map */ /* grab a copy of the usage map */
rownum = mdb->pg_buf[fmt->tab_usage_map_offset]; rownum = mdb->pg_buf[fmt->tab_usage_map_offset];
mdb_read_alt_pg(mdb, mdb_pg_get_int24(mdb, fmt->tab_usage_map_offset + 1)); pg = mdb_pg_get_int24(mdb, fmt->tab_usage_map_offset + 1);
mdb_read_alt_pg(mdb, pg);
mdb_swap_pgbuf(mdb); mdb_swap_pgbuf(mdb);
row_start = mdb_pg_get_int16(mdb, (fmt->row_count_offset + 2) + (rownum*2)); row_start = mdb_pg_get_int16(mdb, (fmt->row_count_offset + 2) + (rownum*2));
row_end = mdb_find_end_of_row(mdb, rownum); row_end = mdb_find_end_of_row(mdb, rownum);
@ -79,7 +82,7 @@ int rownum, row_start, row_end;
/* swap back */ /* swap back */
mdb_swap_pgbuf(mdb); mdb_swap_pgbuf(mdb);
#if MDB_DEBUG_USAGE #if MDB_DEBUG_USAGE
printf ("usage map found on page %ld start %d end %d\n", mdb_pg_get_int24(mdb, fmt->tab_usage_map_offset + 1), row_start, row_end); printf ("usage map found on page %ld rownum %d start %d end %d\n", mdb_pg_get_int24(mdb, fmt->tab_usage_map_offset + 1), rownum, row_start, row_end);
#endif #endif
@ -96,6 +99,9 @@ int rownum, row_start, row_end;
memcpy(table->free_usage_map, &mdb->pg_buf[row_start], table->freemap_sz); memcpy(table->free_usage_map, &mdb->pg_buf[row_start], table->freemap_sz);
mdb_swap_pgbuf(mdb); mdb_swap_pgbuf(mdb);
#endif #endif
#if MDB_DEBUG_USAGE
printf ("free map found on page %ld rownum %d start %d end %d\n", mdb_pg_get_int24(mdb, fmt->tab_free_map_offset + 1), rownum, row_start, row_end);
#endif
table->first_data_pg = mdb_pg_get_int16(mdb, fmt->tab_first_dpg_offset); table->first_data_pg = mdb_pg_get_int16(mdb, fmt->tab_first_dpg_offset);
@ -326,6 +332,8 @@ guint32 pgnum;
} }
if (table->usage_map) { if (table->usage_map) {
printf("pages reserved by this object\n"); printf("pages reserved by this object\n");
printf("usage map pg %lu\n", table->map_base_pg);
printf("free map pg %lu\n", table->freemap_base_pg);
pgnum = mdb_get_int32(table->usage_map,1); pgnum = mdb_get_int32(table->usage_map,1);
/* the first 5 bytes of the usage map mean something */ /* the first 5 bytes of the usage map mean something */
coln = 0; coln = 0;
@ -342,5 +350,6 @@ guint32 pgnum;
pgnum++; pgnum++;
} }
} }
printf("\n");
} }
} }

View File

@ -134,12 +134,8 @@ int eod, len; /* end of data */
/* find the end of data pointer */ /* find the end of data pointer */
eod = mdb_pg_get_int16(mdb, row_end - 3 - var_cols*2 - bitmask_sz); eod = mdb_pg_get_int16(mdb, row_end - 3 - var_cols*2 - bitmask_sz);
if (IS_JET4(mdb)) { col_start = 2;
col_start = 2;
} else {
/* data starts at 1 */
col_start = 1;
}
/* actual cols on this row */ /* actual cols on this row */
fixed_cols_found = 0; fixed_cols_found = 0;
var_cols_found = 0; var_cols_found = 0;
@ -199,7 +195,11 @@ int num_of_jumps = 0, jumps_used = 0;
int eod, len; /* end of data */ int eod, len; /* end of data */
num_cols = mdb->pg_buf[row_start]; num_cols = mdb->pg_buf[row_start];
if (num_cols != table->num_cols) {
fprintf(stderr,"WARNING: number of table columns does not match number of row columns, strange results may occur\n");
}
/* how many fixed cols? */
for (i = 0; i < table->num_cols; i++) { for (i = 0; i < table->num_cols; i++) {
col = g_ptr_array_index (table->columns, i); col = g_ptr_array_index (table->columns, i);
if (mdb_is_fixed_col(col)) { if (mdb_is_fixed_col(col)) {
@ -209,6 +209,7 @@ int eod, len; /* end of data */
fields[totcols++].is_fixed = 1; fields[totcols++].is_fixed = 1;
} }
} }
/* how many var cols? */
for (i = 0; i < table->num_cols; i++) { for (i = 0; i < table->num_cols; i++) {
col = g_ptr_array_index (table->columns, i); col = g_ptr_array_index (table->columns, i);
if (!mdb_is_fixed_col(col)) { if (!mdb_is_fixed_col(col)) {
@ -251,7 +252,6 @@ int eod, len; /* end of data */
} }
} }
//fprintf(stderr, "col_start: %d\n", col_start);
/* if fixed columns add up to more than 256, we need a jump */ /* if fixed columns add up to more than 256, we need a jump */
int col_ptr = row_end - bitmask_sz - num_of_jumps - 1; int col_ptr = row_end - bitmask_sz - num_of_jumps - 1;
if (col_start >= 256) { if (col_start >= 256) {
@ -261,6 +261,9 @@ int eod, len; /* end of data */
} }
col_start = row_start; col_start = row_start;
/* compute the number of jumps (row size - overhead) / 256
* but you have to include the jump table itself, thus
* the loop. */
while (col_start+256 < row_end-bitmask_sz-1-var_cols-num_of_jumps){ while (col_start+256 < row_end-bitmask_sz-1-var_cols-num_of_jumps){
col_start += 256; col_start += 256;
num_of_jumps++; num_of_jumps++;
@ -268,12 +271,18 @@ int eod, len; /* end of data */
if (mdb->pg_buf[col_ptr]==0xFF) { if (mdb->pg_buf[col_ptr]==0xFF) {
col_ptr--; col_ptr--;
} }
/* col_start is now the offset to the first variable length field */
col_start = mdb->pg_buf[col_ptr]; col_start = mdb->pg_buf[col_ptr];
for (j=0;j<table->num_cols;j++) { for (j=0;j<table->num_cols;j++) {
col = g_ptr_array_index(table->columns,j); col = g_ptr_array_index(table->columns,j);
/* if it's a var_col and we aren't looking at a column
* added after this row was created */
if (!mdb_is_fixed_col(col) && ++var_cols_found <= var_cols) { if (!mdb_is_fixed_col(col) && ++var_cols_found <= var_cols) {
/* if the position of this var_col matches the number
* in the current jump table entry, then increment
* the jump_used and adjust the col/row_start */
if (var_cols_found == mdb->pg_buf[row_end-bitmask_sz-jumps_used-1] && if (var_cols_found == mdb->pg_buf[row_end-bitmask_sz-jumps_used-1] &&
jumps_used < num_of_jumps) { jumps_used < num_of_jumps) {
row_start += 256; row_start += 256;
@ -281,6 +290,8 @@ int eod, len; /* end of data */
jumps_used++; jumps_used++;
} }
/* if we have the last var_col, use the eod offset to
* figure out where the end is */
if (var_cols_found==var_cols) { if (var_cols_found==var_cols) {
len=eod - col_start; len=eod - col_start;
//printf("len = %d eod %d col_start %d\n",len, eod, col_start); //printf("len = %d eod %d col_start %d\n",len, eod, col_start);