2001-04-02 06:10:15 +08:00
/* MDB Tools - A library for reading MS Access database file
2004-02-15 03:40:35 +08:00
* Copyright ( C ) 2000 - 2004 Brian Bruns
2001-04-02 06:10:15 +08:00
*
* This library is free software ; you can redistribute it and / or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation ; either
* version 2 of the License , or ( at your option ) any later version .
*
* This library is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* Library General Public License for more details .
*
* You should have received a copy of the GNU Library General Public
2011-08-29 07:53:29 +08:00
* License along with this library ; if not , write to the Free Software
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
2001-04-02 06:10:15 +08:00
*/
# include "mdbtools.h"
2017-12-05 07:13:28 +08:00
# ifdef HAVE_LIBMSWSTR
# include <mswstr/mswstr.h>
# endif
2003-01-29 07:51:06 +08:00
# ifdef DMALLOC
# include "dmalloc.h"
# endif
2004-01-10 05:05:56 +08:00
MdbIndexPage * mdb_index_read_bottom_pg ( MdbHandle * mdb , MdbIndex * idx , MdbIndexChain * chain ) ;
MdbIndexPage * mdb_chain_add_page ( MdbHandle * mdb , MdbIndexChain * chain , guint32 pg ) ;
2001-07-11 06:35:37 +08:00
char idx_to_text [ ] = {
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0-7 0x00-0x07 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 8-15 0x09-0x0f */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 16-23 0x10-0x17 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 24-31 0x19-0x1f */
2003-01-10 04:24:19 +08:00
' ' , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 32-39 0x20-0x27 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , ' ' , ' ' , 0x00 , /* 40-47 0x29-0x2f */
' V ' , ' W ' , ' X ' , ' Y ' , ' Z ' , ' [ ' , ' \\ ' , ' ] ' , /* 48-55 0x30-0x37 */
' ^ ' , ' _ ' , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 56-63 0x39-0x3f */
0x00 , ' ` ' , ' a ' , ' b ' , ' d ' , ' f ' , ' g ' , ' h ' , /* 64-71 0x40-0x47 */
' i ' , ' j ' , ' k ' , ' l ' , ' m ' , ' o ' , ' p ' , ' r ' , /* 72-79 0x49-0x4f H */
' s ' , ' t ' , ' u ' , ' v ' , ' w ' , ' x ' , ' z ' , ' { ' , /* 80-87 0x50-0x57 P */
' | ' , ' } ' , ' ~ ' , ' 5 ' , ' 6 ' , ' 7 ' , ' 8 ' , ' 9 ' , /* 88-95 0x59-0x5f */
0x00 , ' ` ' , ' a ' , ' b ' , ' d ' , ' f ' , ' g ' , ' h ' , /* 96-103 0x60-0x67 */
' i ' , ' j ' , ' k ' , ' l ' , ' m ' , ' o ' , ' p ' , ' r ' , /* 014-111 0x69-0x6f h */
' s ' , ' t ' , ' u ' , ' v ' , ' w ' , ' x ' , ' z ' , ' { ' , /* 112-119 0x70-0x77 p */
' | ' , ' } ' , ' ~ ' , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 120-127 0x78-0x7f */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 128-135 0x80-0x87 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0x88-0x8f */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0x90-0x97 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0x98-0x9f */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0xa0-0xa7 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0xa8-0xaf */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0xb0-0xb7 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0xb8-0xbf */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , ' ` ' , 0x00 , 0x00 , /* 0xc0-0xc7 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0xc8-0xcf */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0xd0-0xd7 */
0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0xd8-0xdf */
0x00 , ' ` ' , 0x00 , ' ` ' , ' ` ' , ' ` ' , 0x00 , 0x00 , /* 0xe0-0xe7 */
' f ' , ' f ' , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , /* 0xe8-0xef */
0x00 , 0x00 , 0x00 , ' r ' , 0x00 , 0x00 , ' r ' , 0x00 , /* 0xf0-0xf7 */
0x81 , 0x00 , 0x00 , 0x00 , ' x ' , 0x00 , 0x00 , 0x00 , /* 0xf8-0xff */
2001-07-11 06:35:37 +08:00
} ;
2001-04-02 06:10:15 +08:00
2017-12-05 07:13:28 +08:00
/* This table doesn't really work accurately, as it is missing
* a lot of special processing , therefore do not use !
* This is just some kind of fallback if MSWSTR cannot be used
* for whatever reason and may not work for most indexes , i . e .
* those containing hyphens etc .
*/
char idx_to_text_ling [ ] = {
0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , /* 0-7 0x00-0x07 */
0x01 , 0x08 , 0x08 , 0x08 , 0x08 , 0x08 , 0x01 , 0x01 , /* 8-15 0x08-0x0F */
0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , /* 16-23 0x10-0x17 */
0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , /* 24-31 0x18-0x1F */
0x07 , 0x09 , 0x0A , 0x0C , 0x0E , 0x10 , 0x12 , 0x01 , /* 32-39 0x20-0x27 */
0x14 , 0x16 , 0x18 , ' , ' , 0x1A , 0x01 , 0x1C , 0x1E , /* 40-47 0x28-0x2F */
' 6 ' , ' 8 ' , ' : ' , ' < ' , ' > ' , ' @ ' , ' B ' , ' D ' , /* 48-55 0x30-0x37 */
' F ' , ' H ' , ' ' , ' " ' , ' . ' , ' 0 ' , ' 2 ' , ' $ ' , /* 56-63 0x38-0x3F */
' & ' , ' J ' , ' L ' , ' M ' , ' O ' , ' Q ' , ' S ' , ' U ' , /* 64-71 0x40-0x47 */
' W ' , ' Y ' , ' [ ' , ' \\ ' , ' ^ ' , ' ` ' , ' b ' , ' d ' , /* 72-79 0x48-0x4F */
' f ' , ' h ' , ' i ' , ' k ' , ' m ' , ' o ' , ' q ' , ' s ' , /* 80-87 0x50-0x57 */
' u ' , ' v ' , ' x ' , ' \' ' , ' ) ' , ' * ' , ' + ' , ' + ' , /* 88-95 0x58-0x5F */
' + ' , ' J ' , ' L ' , ' M ' , ' O ' , ' Q ' , ' S ' , ' U ' , /* 96-103 0x60-0x67 */
' W ' , ' Y ' , ' [ ' , ' \\ ' , ' ^ ' , ' ` ' , ' b ' , ' d ' , /* 104-111 0x68-0x6F */
' f ' , ' h ' , ' i ' , ' k ' , ' m ' , ' o ' , ' q ' , ' s ' , /* 112-119 0x70-0x77 */
' u ' , ' v ' , ' x ' , ' + ' , ' + ' , ' + ' , ' + ' , 0x01 , /* 120-127 0x78-0x7F */
0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , /* 128-135 0x80-0x87 */
0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , /* 136-143 0x88-0x8F */
0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , /* 144-151 0x90-0x97 */
0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , 0x01 , /* 152-159 0x98-0x9F */
0x08 , ' + ' , ' 4 ' , ' 4 ' , ' 4 ' , ' 4 ' , ' + ' , ' 4 ' , /* 160-167 0xA0-0xA7 */
' + ' , ' 4 ' , ' J ' , ' 3 ' , ' 4 ' , 0x01 , ' 4 ' , ' + ' , /* 168-175 0xA8-0xAF */
' 4 ' , ' 3 ' , ' : ' , ' < ' , ' + ' , ' 4 ' , ' 4 ' , ' 4 ' , /* 176-183 0xB0-0xB7 */
' + ' , ' 8 ' , ' d ' , ' 3 ' , ' 7 ' , ' 7 ' , ' 7 ' , ' + ' , /* 184-191 0xB8-0xBF */
' J ' , ' J ' , ' J ' , ' J ' , ' J ' , ' J ' , ' J ' , ' M ' , /* 192-199 0xC0-0xC7 */
' Q ' , ' Q ' , ' Q ' , ' Q ' , ' Y ' , ' Y ' , ' Y ' , ' Y ' , /* 200-207 0xC8-0xCF */
' O ' , ' b ' , ' d ' , ' d ' , ' d ' , ' d ' , ' d ' , ' 3 ' , /* 208-215 0xD0-0xD7 */
' d ' , ' o ' , ' o ' , ' o ' , ' o ' , ' v ' , ' m ' , ' k ' , /* 216-223 0xD8-0xDF */
' J ' , ' J ' , ' J ' , ' J ' , ' J ' , ' J ' , ' J ' , ' M ' , /* 224-231 0xE0-0xE7 */
' Q ' , ' Q ' , ' Q ' , ' Q ' , ' Y ' , ' Y ' , ' Y ' , ' Y ' , /* 232-239 0xE8-0xEF */
' O ' , ' b ' , ' d ' , ' d ' , ' d ' , ' d ' , ' d ' , ' 3 ' , /* 240-247 0xF0-0xF7 */
' d ' , ' o ' , ' o ' , ' o ' , ' o ' , ' v ' , ' m ' , ' v ' , /* 248-255 0xF8-0xFF */
} ;
2016-04-08 06:23:10 +08:00
/* JET Red (v4) Index definition byte layouts
*
* Based on :
*
* http : //jabakobob.net/mdb/table-page.html
* https : //github.com/jahlborn/jackcess
*
* plus inspection of JET ( Red ) 4 databases . ( JET 3 format has fewer
* fields - - some of the ones below omitted , and others narrower . )
*
* See also JET Blue ( Extensible Storage Engine ) format information :
*
* https : //github.com/libyal/libesedb/blob/master/documentation/Extensible%20Storage%20Engine%20%28ESE%29%20Database%20File%20%28EDB%29%20format.asciidoc
*
* which is a later Microsoft embedded database format with the same
* early base format .
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* Index Column Definitions :
* - for each " non foreign key " index ( ie pidx - > index_type ! = 2 ) , a list
* of columns indexed
*
* Repeated table - > num_real_idxs times :
*
* Offset Bytes Meaning
* 0x0000 4 UNKNOWN ; seems to be type marker , usually 1923 or 0
*
* 0x0004 2 Column 1 ID
* 0x0006 1 Column 1 Flags
* 0x0007 2 Column 2 ID
* 0x0009 1 Column 2 Flags
* 0x000A 2 Column 3 ID
* 0x000C 1 Column 3 Flags
* 0x000D 2 Column 4 ID
* 0x000F 1 Column 4 Flags
* 0x0010 2 Column 5 ID
* 0x0012 1 Column 5 Flags
* 0x0013 2 Column 6 ID
* 0x0015 1 Column 6 Flags
* 0x0016 2 Column 7 ID
* 0x0018 1 Column 7 Flags
* 0x0019 2 Column 8 ID
* 0x001B 1 Column 8 Flags
* 0x001C 2 Column 9 ID
* 0x001E 1 Column 9 Flags
* 0x001F 2 Column 10 ID
* 0x0021 1 Column 10 Flags
*
* 0x0022 1 Usage Map row
* 0x0023 3 Usage Map page ( 24 - bit )
* 0x0026 4 First index page
* 0x002A 4 UNKNOWN
* 0x002E 2 Index Flags
* 0x0030 4 UNKNOWN ; seems to always be 0
* 0x0034
*
* Column ID of 0xFFFF ( - 1 ) means " not used " or " end of used columns " .
* Column Flags :
* - 0x01 = Ascending
*
* Index Flags :
* - 0x0001 = Unique index
* - 0x0002 = Ignore NULLs
* - 0x0008 = Required Index
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* Index Definitions
* - for each index ( normal , primary key , foreign key ) , details on the
* index .
*
* - this appears to be the union of information required for normal /
* primary key indexes , and the information required for foreign key
* indexes .
*
* Repeated table - > num_idxs times :
*
* Offset Bytes Meaning
* 0x0000 4 UNKNOWN ; apparently a type marker , usually 1625 or 0
* 0x0004 4 Logical Index Number
* 0x0008 4 Index Column Definition Entry
* 0x000C 1 FK Index Type
* 0x000D 4 FK Index Number
* 0x0011 4 FK Index Table Page Number
* 0x0015 1 Flags : Update Action
* 0x0016 1 Flags : Delete Action
* 0x0017 1 Index Type
* 0x0018 4 UNKNNOWN ; seems to always be 0
* 0x001B
*
* Where Index Type is :
* 0x01 = normal
* 0x01 = primary key
* 0x02 = foreign key index reference
*/
/* Debugging helper to dump out raw hex values of index definition */
/*
static void hexdump ( unsigned char * tmpbuf , int size ) {
2017-12-04 01:18:21 +08:00
mdb_buffer_dump ( tmpbuf , 0 , size ) ;
2016-04-08 06:23:10 +08:00
}
*/
2003-01-29 07:51:06 +08:00
2003-01-13 06:59:41 +08:00
GPtrArray *
mdb_read_indices ( MdbTableDef * table )
2001-04-02 06:10:15 +08:00
{
2004-06-18 13:10:44 +08:00
MdbCatalogEntry * entry = table - > entry ;
MdbHandle * mdb = entry - > mdb ;
MdbFormatConstants * fmt = mdb - > fmt ;
MdbIndex * pidx ;
2011-02-08 10:57:13 +08:00
unsigned int i , j , k ;
int key_num , col_num , cleaned_col_num ;
2004-06-18 13:10:44 +08:00
int cur_pos , name_sz , idx2_sz , type_offset ;
int index_start_pg = mdb - > cur_pg ;
gchar * tmpbuf ;
2001-04-02 06:10:15 +08:00
2011-02-08 10:57:13 +08:00
table - > indices = g_ptr_array_new ( ) ;
2001-04-02 06:10:15 +08:00
2012-07-03 06:31:23 +08:00
if ( IS_JET3 ( mdb ) ) {
2003-01-29 07:51:06 +08:00
cur_pos = table - > index_start + 39 * table - > num_real_idxs ;
2004-06-18 13:10:44 +08:00
idx2_sz = 20 ;
type_offset = 19 ;
2012-07-03 06:31:23 +08:00
} else {
cur_pos = table - > index_start + 52 * table - > num_real_idxs ;
idx2_sz = 28 ;
type_offset = 23 ;
2003-01-29 07:51:06 +08:00
}
2001-04-02 06:10:15 +08:00
2016-04-08 06:23:10 +08:00
/* Read in the definitions of table indexes, into table->indices */
/* num_real_idxs should be the number of indexes other than type 2.
2011-02-08 10:57:13 +08:00
* It ' s not always the case . Happens on Northwind Orders table .
*/
2016-04-08 06:23:10 +08:00
//fprintf(stderr, "num_idxs:%d num_real_idxs:%d\n", table->num_idxs, table->num_real_idxs);
2011-02-08 10:57:13 +08:00
table - > num_real_idxs = 0 ;
2004-06-18 13:10:44 +08:00
tmpbuf = ( gchar * ) g_malloc ( idx2_sz ) ;
2001-04-02 06:10:15 +08:00
for ( i = 0 ; i < table - > num_idxs ; i + + ) {
2004-06-18 13:10:44 +08:00
read_pg_if_n ( mdb , tmpbuf , & cur_pos , idx2_sz ) ;
2016-04-08 06:23:10 +08:00
//fprintf(stderr, "Index defn: ");
//hexdump((unsigned char *)tmpbuf, idx2_sz);
2004-06-18 13:10:44 +08:00
pidx = ( MdbIndex * ) g_malloc0 ( sizeof ( MdbIndex ) ) ;
pidx - > table = table ;
pidx - > index_num = mdb_get_int16 ( tmpbuf , 4 ) ;
2011-02-08 10:57:13 +08:00
pidx - > index_type = tmpbuf [ type_offset ] ;
2004-06-18 13:10:44 +08:00
g_ptr_array_add ( table - > indices , pidx ) ;
2011-02-08 10:57:13 +08:00
/*
{
2016-04-08 06:23:10 +08:00
gint32 idx_marker = mdb_get_int32 ( tmpbuf , 0 ) ;
gint32 index_col_def_num = mdb_get_int16 ( tmpbuf , 8 ) ;
gint8 rel_idx_type = tmpbuf [ 0x0c ] ;
gint32 rel_idx_number = mdb_get_int32 ( tmpbuf , 0x0d ) ;
gint32 rel_idx_page = mdb_get_int32 ( tmpbuf , 0x11 ) ;
gint8 update_action_flags = tmpbuf [ 0x15 ] ;
gint8 delete_action_flags = tmpbuf [ 0x16 ] ;
fprintf ( stderr , " idx #%d: num2:%d num3:%d type:%d \n " , i , pidx - > index_num , index_col_def_num , pidx - > index_type ) ;
fprintf ( stderr , " idx #%d: %d %d %d %d %d/%d \n " , i , idx_marker , rel_idx_type , rel_idx_number , rel_idx_page , update_action_flags , delete_action_flags ) ;
2011-02-08 10:57:13 +08:00
} */
if ( pidx - > index_type ! = 2 )
table - > num_real_idxs + + ;
2001-04-02 06:10:15 +08:00
}
2011-02-08 10:57:13 +08:00
//fprintf(stderr, "num_idxs:%d num_real_idxs:%d\n", table->num_idxs, table->num_real_idxs);
2004-06-18 13:10:44 +08:00
g_free ( tmpbuf ) ;
2001-04-02 06:10:15 +08:00
2016-04-08 06:23:10 +08:00
/* Pick up the names of each index */
2001-04-02 06:10:15 +08:00
for ( i = 0 ; i < table - > num_idxs ; i + + ) {
pidx = g_ptr_array_index ( table - > indices , i ) ;
2012-07-03 06:31:23 +08:00
if ( IS_JET3 ( mdb ) ) {
2005-12-17 23:59:18 +08:00
name_sz = read_pg_if_8 ( mdb , & cur_pos ) ;
2012-07-03 06:31:23 +08:00
} else {
name_sz = read_pg_if_16 ( mdb , & cur_pos ) ;
2003-01-29 07:51:06 +08:00
}
2004-12-02 07:31:48 +08:00
tmpbuf = g_malloc ( name_sz ) ;
read_pg_if_n ( mdb , tmpbuf , & cur_pos , name_sz ) ;
2004-12-11 14:07:20 +08:00
mdb_unicode2ascii ( mdb , tmpbuf , name_sz , pidx - > name , MDB_MAX_OBJ_NAME ) ;
2004-12-02 07:31:48 +08:00
g_free ( tmpbuf ) ;
2011-02-08 10:57:13 +08:00
//fprintf(stderr, "index %d type %d name %s\n", pidx->index_num, pidx->index_type, pidx->name);
2001-04-02 06:10:15 +08:00
}
2001-05-23 09:42:46 +08:00
2016-04-08 06:23:10 +08:00
/* Pick up the column definitions for normal/primary key indexes */
/* NOTE: Match should possibly be by index_col_def_num, rather
* than index_num ; but in files encountered both seem to be the
* same ( so left with index_num until a counter example is found ) .
*/
2003-01-29 07:51:06 +08:00
mdb_read_alt_pg ( mdb , entry - > table_pg ) ;
2004-06-18 13:10:44 +08:00
mdb_read_pg ( mdb , index_start_pg ) ;
cur_pos = table - > index_start ;
2001-05-23 09:42:46 +08:00
for ( i = 0 ; i < table - > num_real_idxs ; i + + ) {
2016-04-08 06:23:10 +08:00
/* Debugging print out, commented out
{
gchar * tmpbuf = ( gchar * ) g_malloc ( 0x34 ) ;
int now_pos = cur_pos ;
read_pg_if_n ( mdb , tmpbuf , & now_pos , 0x34 ) ;
fprintf ( stderr , " Index defn: " ) ;
hexdump ( ( unsigned char * ) tmpbuf , 0x34 ) ;
g_free ( tmpbuf ) ;
} */
2012-07-03 06:31:23 +08:00
if ( ! IS_JET3 ( mdb ) ) cur_pos + = 4 ;
2011-02-08 10:57:13 +08:00
/* look for index number i */
for ( j = 0 ; j < table - > num_idxs ; + + j ) {
pidx = g_ptr_array_index ( table - > indices , j ) ;
if ( pidx - > index_type ! = 2 & & pidx - > index_num = = i )
break ;
2001-05-23 09:42:46 +08:00
}
2012-07-01 18:50:00 +08:00
if ( j = = table - > num_idxs ) {
2011-02-08 10:57:13 +08:00
fprintf ( stderr , " ERROR: can't find index #%d. \n " , i ) ;
2012-07-01 18:50:00 +08:00
continue ;
}
2011-02-08 10:57:13 +08:00
//fprintf(stderr, "index %d #%d (%s) index_type:%d\n", i, pidx->index_num, pidx->name, pidx->index_type);
2001-07-11 06:35:37 +08:00
2003-04-30 01:55:09 +08:00
pidx - > num_rows = mdb_get_int32 ( mdb - > alt_pg_buf ,
2003-01-29 07:51:06 +08:00
fmt - > tab_cols_start_offset +
2011-02-08 10:57:13 +08:00
( pidx - > index_num * fmt - > tab_ridx_entry_size ) ) ;
/*
fprintf ( stderr , " ridx block1 i:%d data1:0x%08x data2:0x%08x \n " ,
i ,
2016-04-08 06:23:10 +08:00
( unsigned int ) mdb_get_int32 ( mdb - > pg_buf ,
2011-02-08 10:57:13 +08:00
fmt - > tab_cols_start_offset + pidx - > index_num * fmt - > tab_ridx_entry_size ) ,
2016-04-08 06:23:10 +08:00
( unsigned int ) mdb_get_int32 ( mdb - > pg_buf ,
2011-02-08 10:57:13 +08:00
fmt - > tab_cols_start_offset + pidx - > index_num * fmt - > tab_ridx_entry_size + 4 ) ) ;
fprintf ( stderr , " pidx->num_rows:%d \n " , pidx - > num_rows ) ; */
2001-07-11 06:35:37 +08:00
2016-04-08 06:23:10 +08:00
/* Read columns in each index */
2001-05-23 09:42:46 +08:00
key_num = 0 ;
for ( j = 0 ; j < MDB_MAX_IDX_COLS ; j + + ) {
2003-01-29 07:51:06 +08:00
col_num = read_pg_if_16 ( mdb , & cur_pos ) ;
2005-12-17 23:59:18 +08:00
if ( col_num = = 0xFFFF ) {
cur_pos + + ;
2004-06-18 13:10:44 +08:00
continue ;
2005-12-17 23:59:18 +08:00
}
2011-02-08 10:57:13 +08:00
/* here we have the internal column number that does not
* always match the table columns because of deletions */
cleaned_col_num = - 1 ;
2011-08-29 06:52:16 +08:00
for ( k = 0 ; k < table - > num_cols ; k + + ) {
2011-02-08 10:57:13 +08:00
MdbColumn * col = g_ptr_array_index ( table - > columns , k ) ;
if ( col - > col_num = = col_num ) {
cleaned_col_num = k ;
break ;
}
}
if ( cleaned_col_num = = - 1 ) {
fprintf ( stderr , " CRITICAL: can't find column with internal id %d in index %s \n " ,
col_num , pidx - > name ) ;
cur_pos + + ;
continue ;
}
2004-06-18 13:10:44 +08:00
/* set column number to a 1 based column number and store */
2011-02-08 10:57:13 +08:00
pidx - > key_col_num [ key_num ] = cleaned_col_num + 1 ;
2004-06-18 13:10:44 +08:00
pidx - > key_col_order [ key_num ] =
2005-12-17 23:59:18 +08:00
( read_pg_if_8 ( mdb , & cur_pos ) ) ? MDB_ASC : MDB_DESC ;
2011-02-08 10:57:13 +08:00
//fprintf(stderr, "component %d using column #%d (internally %d)\n", j, cleaned_col_num, col_num);
2004-06-18 13:10:44 +08:00
key_num + + ;
2001-05-23 09:42:46 +08:00
}
2001-07-11 06:35:37 +08:00
pidx - > num_keys = key_num ;
2004-06-18 13:10:44 +08:00
2016-04-08 06:23:10 +08:00
if ( 0 ) // DEBUGGING ONLY
{
gint32 usage_map = read_pg_if_32 ( mdb , & cur_pos ) ;
fprintf ( stderr , " pidx->unknown_pre_first_pg:0x%08x \n " , usage_map ) ;
} else {
cur_pos + = 4 ; // Skip Usage map information
}
2003-01-29 07:51:06 +08:00
pidx - > first_pg = read_pg_if_32 ( mdb , & cur_pos ) ;
2016-04-08 06:23:10 +08:00
if ( ! IS_JET3 ( mdb ) ) cur_pos + = 4 ;
2005-12-17 23:59:18 +08:00
pidx - > flags = read_pg_if_8 ( mdb , & cur_pos ) ;
2011-02-08 10:57:13 +08:00
//fprintf(stderr, "pidx->first_pg:%d pidx->flags:0x%02x\n", pidx->first_pg, pidx->flags);
2016-04-08 06:23:10 +08:00
if ( ! IS_JET3 ( mdb ) ) cur_pos + = 5 ;
2001-05-23 09:42:46 +08:00
}
2003-01-21 00:04:24 +08:00
return NULL ;
2001-04-02 06:10:15 +08:00
}
2003-01-10 04:24:19 +08:00
void
2017-12-05 07:13:28 +08:00
mdb_index_hash_text ( MdbHandle * mdb , char * text , char * hash )
2003-01-10 04:24:19 +08:00
{
2017-12-05 07:13:28 +08:00
unsigned int k , len = strlen ( text ) ;
char * transtbl = NULL ;
2003-01-10 04:24:19 +08:00
2017-12-05 07:13:28 +08:00
if ( ! IS_JET3 ( mdb ) )
{
# ifdef __MSWSTR_H__
char * out_ptr = alloca ( ( len + 1 ) * 2 ) ;
int i ;
// mdb_ascii2unicode doesn't work, we don't want unicode compression!
for ( i = 0 ; i < len + 1 ; i + + ) {
out_ptr [ i * 2 ] = text [ i ] ;
out_ptr [ i * 2 + 1 ] = 0 ;
}
if ( ! ( k = DBLCMapStringW ( MAKELCID ( MAKELANGID ( LANG_ENGLISH , SUBLANG_DEFAULT ) , 0 ) ,
LCMAP_LINGUISTIC_CASING | LCMAP_SORTKEY | NORM_IGNORECASE | NORM_IGNOREKANATYPE | NORM_IGNOREWIDTH ,
( WCHAR * ) out_ptr , len , ( LPBYTE ) hash , len * 2 ) ) )
{
len + + ;
# endif
transtbl = idx_to_text_ling ;
# ifdef __MSWSTR_H__
}
# endif
}
else
{
transtbl = idx_to_text ;
}
if ( transtbl )
{
for ( k = 0 ; k < len ; k + + ) {
unsigned char c = ( ( unsigned char * ) ( text ) ) [ k ] ;
hash [ k ] = transtbl [ c ] ;
if ( ! ( hash [ k ] ) ) fprintf ( stderr ,
" No translation available for %02x %d \n " , c , c ) ;
}
hash [ len ] = ' \0 ' ;
2003-01-10 04:24:19 +08:00
}
2017-12-05 07:13:28 +08:00
//printf ("mdb_index_hash_text %s -> %s (%d -> %d)\n", text, hash, len, k);
2003-01-10 04:24:19 +08:00
}
2004-12-02 07:31:48 +08:00
/*
* reverse the order of the column for hashing
*/
2004-04-13 08:39:12 +08:00
void
mdb_index_swap_n ( unsigned char * src , int sz , unsigned char * dest )
{
int i , j = 0 ;
2005-02-12 00:24:01 +08:00
for ( i = sz - 1 ; i > = 0 ; i - - ) {
2004-04-13 08:39:12 +08:00
dest [ j + + ] = src [ i ] ;
}
}
2003-01-21 00:04:24 +08:00
void
mdb_index_cache_sarg ( MdbColumn * col , MdbSarg * sarg , MdbSarg * idx_sarg )
2003-01-10 04:24:19 +08:00
{
2003-01-21 00:04:24 +08:00
//guint32 cache_int;
2003-01-10 04:24:19 +08:00
unsigned char * c ;
switch ( col - > col_type ) {
case MDB_TEXT :
2017-12-05 07:13:28 +08:00
mdb_index_hash_text ( col - > table - > mdbidx , sarg - > value . s , idx_sarg - > value . s ) ;
2003-01-10 04:24:19 +08:00
break ;
case MDB_LONGINT :
2004-06-18 14:05:33 +08:00
idx_sarg - > value . i = GUINT32_SWAP_LE_BE ( sarg - > value . i ) ;
2003-01-10 04:24:19 +08:00
//cache_int = sarg->value.i * -1;
2003-01-21 00:04:24 +08:00
c = ( unsigned char * ) & ( idx_sarg - > value . i ) ;
2003-01-10 04:24:19 +08:00
c [ 0 ] | = 0x80 ;
2004-02-12 06:05:13 +08:00
//printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]);
2003-01-10 04:24:19 +08:00
break ;
case MDB_INT :
break ;
default :
break ;
}
}
2004-01-10 09:52:56 +08:00
#if 0
2003-01-21 00:04:24 +08:00
int
mdb_index_test_sarg ( MdbHandle * mdb , MdbColumn * col , MdbSarg * sarg , int offset , int len )
{
char tmpbuf [ 256 ] ;
int lastchar ;
switch ( col - > col_type ) {
case MDB_BYTE :
2003-04-30 01:55:09 +08:00
return mdb_test_int ( sarg , mdb_pg_get_byte ( mdb , offset ) ) ;
2003-01-21 00:04:24 +08:00
break ;
case MDB_INT :
2003-04-30 01:55:09 +08:00
return mdb_test_int ( sarg , mdb_pg_get_int16 ( mdb , offset ) ) ;
2003-01-21 00:04:24 +08:00
break ;
case MDB_LONGINT :
2003-04-30 01:55:09 +08:00
return mdb_test_int ( sarg , mdb_pg_get_int32 ( mdb , offset ) ) ;
2003-01-21 00:04:24 +08:00
break ;
case MDB_TEXT :
strncpy ( tmpbuf , & mdb - > pg_buf [ offset ] , 255 ) ;
lastchar = len > 255 ? 255 : len ;
tmpbuf [ lastchar ] = ' \0 ' ;
return mdb_test_string ( sarg , tmpbuf ) ;
default :
fprintf ( stderr , " Calling mdb_test_sarg on unknown type. Add code to mdb_test_sarg() for type %d \n " , col - > col_type ) ;
break ;
}
return 1 ;
}
2004-01-10 09:52:56 +08:00
# endif
2003-01-10 04:24:19 +08:00
int
2005-05-02 20:46:33 +08:00
mdb_index_test_sargs ( MdbHandle * mdb , MdbIndex * idx , char * buf , int len )
2003-01-10 04:24:19 +08:00
{
2004-07-17 22:21:43 +08:00
unsigned int i , j ;
2003-01-10 04:24:19 +08:00
MdbColumn * col ;
MdbTableDef * table = idx - > table ;
MdbSarg * idx_sarg ;
MdbSarg * sarg ;
2004-02-07 07:55:18 +08:00
MdbField field ;
2003-01-21 00:04:24 +08:00
MdbSargNode node ;
2004-02-15 03:40:35 +08:00
//int c_offset = 0,
int c_len ;
2003-01-10 04:24:19 +08:00
2004-02-06 10:34:20 +08:00
//fprintf(stderr,"mdb_index_test_sargs called on ");
2017-12-04 01:18:21 +08:00
//mdb_buffer_dump(buf, 0, len);
2004-02-06 10:34:20 +08:00
//fprintf(stderr,"\n");
2003-01-10 04:24:19 +08:00
for ( i = 0 ; i < idx - > num_keys ; i + + ) {
2004-02-12 06:05:13 +08:00
//c_offset++; /* the per column null indicator/flags */
2003-01-10 04:24:19 +08:00
col = g_ptr_array_index ( table - > columns , idx - > key_col_num [ i ] - 1 ) ;
/*
* This will go away eventually
*/
if ( col - > col_type = = MDB_TEXT ) {
2004-02-12 06:05:13 +08:00
//c_len = strlen(&mdb->pg_buf[offset + c_offset]);
c_len = strlen ( buf ) ;
2003-01-10 04:24:19 +08:00
} else {
c_len = col - > col_size ;
2003-01-13 06:59:41 +08:00
//fprintf(stderr,"Only text types currently supported. How did we get here?\n");
2003-01-10 04:24:19 +08:00
}
/*
* If we have no cached index values for this column ,
* create them .
*/
if ( col - > num_sargs & & ! col - > idx_sarg_cache ) {
col - > idx_sarg_cache = g_ptr_array_new ( ) ;
for ( j = 0 ; j < col - > num_sargs ; j + + ) {
sarg = g_ptr_array_index ( col - > sargs , j ) ;
idx_sarg = g_memdup ( sarg , sizeof ( MdbSarg ) ) ;
2003-01-13 06:59:41 +08:00
//printf("calling mdb_index_cache_sarg\n");
2003-01-10 04:24:19 +08:00
mdb_index_cache_sarg ( col , sarg , idx_sarg ) ;
g_ptr_array_add ( col - > idx_sarg_cache , idx_sarg ) ;
}
}
for ( j = 0 ; j < col - > num_sargs ; j + + ) {
sarg = g_ptr_array_index ( col - > idx_sarg_cache , j ) ;
2003-01-21 00:04:24 +08:00
/* XXX - kludge */
node . op = sarg - > op ;
node . value = sarg - > value ;
2004-02-12 06:05:13 +08:00
//field.value = &mdb->pg_buf[offset + c_offset];
field . value = buf ;
2004-02-07 07:55:18 +08:00
field . siz = c_len ;
field . is_null = FALSE ;
2017-12-04 06:20:11 +08:00
/* In Jet 4 Index text hashes don't need to be converted from Unicode */
if ( ! IS_JET3 ( mdb ) & & col - > col_type = = MDB_TEXT )
{
if ( ! mdb_test_string ( & node , buf ) ) return 0 ;
}
else if ( ! mdb_test_sarg ( mdb , col , & node , & field ) ) {
2003-01-21 00:04:24 +08:00
/* sarg didn't match, no sense going on */
return 0 ;
2003-01-10 04:24:19 +08:00
}
}
}
return 1 ;
}
2004-02-15 14:00:25 +08:00
/*
* pack the pages bitmap
*/
int
mdb_index_pack_bitmap ( MdbHandle * mdb , MdbIndexPage * ipg )
{
int mask_bit = 0 ;
2017-12-04 06:20:11 +08:00
int mask_pos = IS_JET3 ( mdb ) ? 0x16 : 0x1b ;
2004-07-03 12:56:07 +08:00
int mask_byte = 0 ;
2004-02-15 14:00:25 +08:00
int elem = 0 ;
int len , start , i ;
start = ipg - > idx_starts [ elem + + ] ;
while ( start ) {
2005-02-12 00:24:01 +08:00
//fprintf(stdout, "elem %d is %d\n", elem, ipg->idx_starts[elem]);
2004-02-15 14:00:25 +08:00
len = ipg - > idx_starts [ elem ] - start ;
2005-02-12 00:24:01 +08:00
//fprintf(stdout, "len is %d\n", len);
2004-02-15 14:00:25 +08:00
for ( i = 0 ; i < len ; i + + ) {
mask_bit + + ;
if ( mask_bit = = 8 ) {
mask_bit = 0 ;
mdb - > pg_buf [ mask_pos + + ] = mask_byte ;
mask_byte = 0 ;
}
/* upon reaching the len, set the bit */
}
mask_byte = ( 1 < < mask_bit ) | mask_byte ;
2005-02-12 00:24:01 +08:00
//fprintf(stdout, "mask byte is %02x at %d\n", mask_byte, mask_pos);
2004-02-15 14:00:25 +08:00
start = ipg - > idx_starts [ elem + + ] ;
}
/* flush the last byte if any */
mdb - > pg_buf [ mask_pos + + ] = mask_byte ;
/* remember to zero the rest of the bitmap */
for ( i = mask_pos ; i < 0xf8 ; i + + ) {
mdb - > pg_buf [ mask_pos + + ] = 0 ;
}
return 0 ;
}
2004-02-14 02:49:51 +08:00
/*
* unpack the pages bitmap
*/
int
2004-02-15 14:00:25 +08:00
mdb_index_unpack_bitmap ( MdbHandle * mdb , MdbIndexPage * ipg )
2004-02-14 02:49:51 +08:00
{
int mask_bit = 0 ;
2017-12-04 06:20:11 +08:00
int mask_pos = IS_JET3 ( mdb ) ? 0x16 : 0x1b ;
2004-02-14 02:49:51 +08:00
int mask_byte ;
2017-12-04 06:20:11 +08:00
int jet_start = IS_JET3 ( mdb ) ? 0xf8 : 0x1e0 ;
int start = jet_start ;
2004-02-14 02:49:51 +08:00
int elem = 0 ;
int len = 0 ;
ipg - > idx_starts [ elem + + ] = start ;
//fprintf(stdout, "Unpacking index page %lu\n", ipg->pg);
do {
len = 0 ;
do {
mask_bit + + ;
if ( mask_bit = = 8 ) {
mask_bit = 0 ;
mask_pos + + ;
}
mask_byte = mdb - > pg_buf [ mask_pos ] ;
len + + ;
2017-12-04 06:20:11 +08:00
} while ( mask_pos < = jet_start & & ! ( ( 1 < < mask_bit ) & mask_byte ) ) ;
2004-02-14 02:49:51 +08:00
//fprintf(stdout, "%d %d %d %d\n", mask_pos, mask_bit, mask_byte, len);
start + = len ;
2017-12-04 06:20:11 +08:00
if ( mask_pos < jet_start ) ipg - > idx_starts [ elem + + ] = start ;
2004-02-14 02:49:51 +08:00
2017-12-04 06:20:11 +08:00
} while ( mask_pos < jet_start ) ;
2004-02-14 02:49:51 +08:00
2004-02-15 03:40:35 +08:00
/* if we zero the next element, so we don't pick up the last pages starts*/
ipg - > idx_starts [ elem ] = 0 ;
2004-02-14 02:49:51 +08:00
return elem ;
}
2003-01-13 06:59:41 +08:00
/*
* find the next entry on a page ( either index or leaf ) . Uses state information
* stored in the MdbIndexPage across calls .
*/
2003-01-10 04:24:19 +08:00
int
mdb_index_find_next_on_page ( MdbHandle * mdb , MdbIndexPage * ipg )
{
2004-02-14 02:49:51 +08:00
if ( ! ipg - > pg ) return 0 ;
/* if this page has not been unpacked to it */
2004-02-15 03:40:35 +08:00
if ( ! ipg - > idx_starts [ 0 ] ) {
//fprintf(stdout, "Unpacking page %d\n", ipg->pg);
2004-02-15 14:00:25 +08:00
mdb_index_unpack_bitmap ( mdb , ipg ) ;
2004-02-15 03:40:35 +08:00
}
2004-02-14 02:49:51 +08:00
if ( ipg - > idx_starts [ ipg - > start_pos + 1 ] = = 0 ) return 0 ;
ipg - > len = ipg - > idx_starts [ ipg - > start_pos + 1 ] - ipg - > idx_starts [ ipg - > start_pos ] ;
ipg - > start_pos + + ;
2004-02-15 03:40:35 +08:00
//fprintf(stdout, "Start pos %d\n", ipg->start_pos);
2004-02-14 02:49:51 +08:00
2003-01-10 04:24:19 +08:00
return ipg - > len ;
}
2017-12-04 06:20:11 +08:00
void mdb_index_page_reset ( MdbHandle * mdb , MdbIndexPage * ipg )
2003-01-10 04:24:19 +08:00
{
2017-12-04 06:20:11 +08:00
ipg - > offset = IS_JET3 ( mdb ) ? 0xf8 : 0x1e0 ; /* start byte of the index entries */
2004-02-14 02:49:51 +08:00
ipg - > start_pos = 0 ;
2003-01-10 04:24:19 +08:00
ipg - > len = 0 ;
2004-02-15 03:40:35 +08:00
ipg - > idx_starts [ 0 ] = 0 ;
2003-01-10 04:24:19 +08:00
}
2017-12-04 06:20:11 +08:00
void mdb_index_page_init ( MdbHandle * mdb , MdbIndexPage * ipg )
2004-02-06 10:34:20 +08:00
{
memset ( ipg , 0 , sizeof ( MdbIndexPage ) ) ;
2017-12-04 06:20:11 +08:00
mdb_index_page_reset ( mdb , ipg ) ;
2004-02-06 10:34:20 +08:00
}
2003-01-13 06:59:41 +08:00
/*
* find the next leaf page if any given a chain . Assumes any exhausted leaf
* pages at the end of the chain have been peeled off before the call .
*/
MdbIndexPage *
2004-01-10 05:05:56 +08:00
mdb_find_next_leaf ( MdbHandle * mdb , MdbIndex * idx , MdbIndexChain * chain )
2003-01-10 04:24:19 +08:00
{
2003-01-13 06:59:41 +08:00
MdbIndexPage * ipg , * newipg ;
guint32 pg ;
guint passed = 0 ;
2004-01-10 05:05:56 +08:00
ipg = mdb_index_read_bottom_pg ( mdb , idx , chain ) ;
2003-01-13 06:59:41 +08:00
2003-01-10 04:24:19 +08:00
/*
* If we are at the first page deep and it ' s not an index page then
* we are simply done . ( there is no page to find
*/
2004-02-12 06:05:13 +08:00
if ( mdb - > pg_buf [ 0 ] = = MDB_PAGE_LEAF ) {
/* Indexes can have leaves at the end that don't appear
* in the upper tree , stash the last index found so
* we can follow it at the end . */
chain - > last_leaf_found = ipg - > pg ;
2003-01-13 06:59:41 +08:00
return ipg ;
2004-02-12 06:05:13 +08:00
}
2003-01-13 06:59:41 +08:00
/*
* apply sargs here , currently we don ' t
*/
do {
ipg - > len = 0 ;
//printf("finding next on pg %lu\n", ipg->pg);
2004-02-06 10:34:20 +08:00
if ( ! mdb_index_find_next_on_page ( mdb , ipg ) ) {
//printf("find_next_on_page returned 0\n");
2003-01-13 06:59:41 +08:00
return 0 ;
2004-02-06 10:34:20 +08:00
}
2005-06-23 15:04:17 +08:00
pg = mdb_get_int32_msb ( mdb - > pg_buf , ipg - > offset + ipg - > len - 3 ) > > 8 ;
2003-01-13 06:59:41 +08:00
//printf("Looking at pg %lu at %lu %d\n", pg, ipg->offset, ipg->len);
ipg - > offset + = ipg - > len ;
2003-01-10 04:24:19 +08:00
2003-01-13 06:59:41 +08:00
/*
* add to the chain and call this function
* recursively .
*/
2004-01-10 05:05:56 +08:00
newipg = mdb_chain_add_page ( mdb , chain , pg ) ;
newipg = mdb_find_next_leaf ( mdb , idx , chain ) ;
2004-02-12 06:05:13 +08:00
//printf("returning pg %lu\n",newipg->pg);
2003-01-13 06:59:41 +08:00
return newipg ;
} while ( ! passed ) ;
2003-01-10 04:24:19 +08:00
/* no more pages */
2003-01-13 06:59:41 +08:00
return NULL ;
2003-01-10 04:24:19 +08:00
}
2004-01-10 05:05:56 +08:00
MdbIndexPage *
mdb_chain_add_page ( MdbHandle * mdb , MdbIndexChain * chain , guint32 pg )
{
MdbIndexPage * ipg ;
chain - > cur_depth + + ;
if ( chain - > cur_depth > MDB_MAX_INDEX_DEPTH ) {
fprintf ( stderr , " Error! maximum index depth of %d exceeded. This is probably due to a programming bug, If you are confident that your indexes really are this deep, adjust MDB_MAX_INDEX_DEPTH in mdbtools.h and recompile. \n " , MDB_MAX_INDEX_DEPTH ) ;
exit ( 1 ) ;
}
ipg = & ( chain - > pages [ chain - > cur_depth - 1 ] ) ;
2017-12-04 06:20:11 +08:00
mdb_index_page_init ( mdb , ipg ) ;
2004-01-10 05:05:56 +08:00
ipg - > pg = pg ;
return ipg ;
}
2004-02-06 10:34:20 +08:00
/*
* returns the bottom page of the IndexChain , if IndexChain is empty it
* initializes it by reading idx - > first_pg ( the root page )
*/
2004-01-10 05:05:56 +08:00
MdbIndexPage *
mdb_index_read_bottom_pg ( MdbHandle * mdb , MdbIndex * idx , MdbIndexChain * chain )
{
MdbIndexPage * ipg ;
/*
* if it ' s new use the root index page ( idx - > first_pg )
*/
if ( ! chain - > cur_depth ) {
ipg = & ( chain - > pages [ 0 ] ) ;
2017-12-04 06:20:11 +08:00
mdb_index_page_init ( mdb , ipg ) ;
2004-01-10 05:05:56 +08:00
chain - > cur_depth = 1 ;
ipg - > pg = idx - > first_pg ;
if ( ! ( ipg = mdb_find_next_leaf ( mdb , idx , chain ) ) )
return 0 ;
} else {
ipg = & ( chain - > pages [ chain - > cur_depth - 1 ] ) ;
ipg - > len = 0 ;
}
mdb_read_pg ( mdb , ipg - > pg ) ;
return ipg ;
}
2004-02-12 06:05:13 +08:00
/*
* unwind the stack and search for new leaf node
*/
MdbIndexPage *
mdb_index_unwind ( MdbHandle * mdb , MdbIndex * idx , MdbIndexChain * chain )
{
MdbIndexPage * ipg ;
//printf("page %lu finished\n",ipg->pg);
if ( chain - > cur_depth = = 1 ) {
//printf("cur_depth == 1 we're out\n");
return NULL ;
}
/*
* unwind the stack until we find something or reach
* the top .
*/
ipg = NULL ;
while ( chain - > cur_depth > 1 & & ipg = = NULL ) {
//printf("chain depth %d\n", chain->cur_depth);
chain - > cur_depth - - ;
ipg = mdb_find_next_leaf ( mdb , idx , chain ) ;
if ( ipg ) mdb_index_find_next_on_page ( mdb , ipg ) ;
}
if ( chain - > cur_depth = = 1 ) {
//printf("last leaf %lu\n", chain->last_leaf_found);
return NULL ;
}
2004-02-15 03:40:35 +08:00
return ipg ;
2004-02-12 06:05:13 +08:00
}
2003-01-13 06:59:41 +08:00
/*
* the main index function .
* caller provides an index chain which is the current traversal of index
* pages from the root page to the leaf . Initially passed as blank ,
* mdb_index_find_next will store it ' s state information here . Each invocation
* then picks up where the last one left off , allowing us to scroll through
* the index one by one .
*
* Sargs are applied here but also need to be applied on the whole row b / c
* text columns may return false positives due to hashing and non - index
* columns with sarg values can ' t be tested here .
*/
2003-01-10 04:24:19 +08:00
int
mdb_index_find_next ( MdbHandle * mdb , MdbIndex * idx , MdbIndexChain * chain , guint32 * pg , guint16 * row )
{
MdbIndexPage * ipg ;
int passed = 0 ;
2004-02-12 06:05:13 +08:00
int idx_sz ;
int idx_start = 0 ;
2017-12-04 06:20:11 +08:00
unsigned short compress_bytes ;
2004-03-13 23:07:18 +08:00
MdbColumn * col ;
2005-06-23 15:04:17 +08:00
guint32 pg_row ;
2003-01-10 04:24:19 +08:00
2004-01-10 05:05:56 +08:00
ipg = mdb_index_read_bottom_pg ( mdb , idx , chain ) ;
2003-01-10 04:24:19 +08:00
2003-01-13 06:59:41 +08:00
/*
* loop while the sargs don ' t match
*/
2003-01-10 04:24:19 +08:00
do {
ipg - > len = 0 ;
2003-01-13 06:59:41 +08:00
/*
* if no more rows on this leaf , try to find a new leaf
*/
if ( ! mdb_index_find_next_on_page ( mdb , ipg ) ) {
2004-02-12 06:05:13 +08:00
if ( ! chain - > clean_up_mode ) {
2017-12-04 06:25:35 +08:00
if ( ipg - > rc = = 1 | | ! ( ipg = mdb_index_unwind ( mdb , idx , chain ) ) )
2004-02-12 06:05:13 +08:00
chain - > clean_up_mode = 1 ;
2004-02-06 10:34:20 +08:00
}
2004-02-12 06:05:13 +08:00
if ( chain - > clean_up_mode ) {
2004-02-15 03:40:35 +08:00
//fprintf(stdout,"in cleanup mode\n");
2004-02-12 06:05:13 +08:00
if ( ! chain - > last_leaf_found ) return 0 ;
mdb_read_pg ( mdb , chain - > last_leaf_found ) ;
2005-06-23 15:04:17 +08:00
chain - > last_leaf_found = mdb_get_int32 (
mdb - > pg_buf , 0x0c ) ;
2004-02-12 06:05:13 +08:00
//printf("next leaf %lu\n", chain->last_leaf_found);
mdb_read_pg ( mdb , chain - > last_leaf_found ) ;
/* reuse the chain for cleanup mode */
chain - > cur_depth = 1 ;
ipg = & chain - > pages [ 0 ] ;
2017-12-04 06:20:11 +08:00
mdb_index_page_init ( mdb , ipg ) ;
2004-02-12 06:05:13 +08:00
ipg - > pg = chain - > last_leaf_found ;
//printf("next on page %d\n",
if ( ! mdb_index_find_next_on_page ( mdb , ipg ) )
return 0 ;
2003-01-13 06:59:41 +08:00
}
}
2005-06-23 15:04:17 +08:00
pg_row = mdb_get_int32_msb ( mdb - > pg_buf , ipg - > offset + ipg - > len - 4 ) ;
* row = pg_row & 0xff ;
* pg = pg_row > > 8 ;
2004-02-06 10:34:20 +08:00
//printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len);
2004-03-13 23:07:18 +08:00
col = g_ptr_array_index ( idx - > table - > columns , idx - > key_col_num [ 0 ] - 1 ) ;
idx_sz = mdb_col_fixed_size ( col ) ;
/* handle compressed indexes, single key indexes only? */
2017-12-04 06:20:11 +08:00
if ( idx_sz < 0 ) idx_sz = ipg - > len - ( ipg - > start_pos = = 1 ? 5 : 4 ) ; // Length from Index - the 4 trailing bytes (data page/row), Skip flags on first page
compress_bytes = * ( unsigned short * ) & mdb - > pg_buf [ IS_JET3 ( mdb ) ? 0x14 : 0x18 ] ;
if ( idx - > num_keys = = 1 & & idx_sz > 0 & & compress_bytes > 1 & & ipg - > start_pos > 1 /*ipg->len - 4 < idx_sz*/ ) {
2004-02-12 06:05:13 +08:00
//printf("short index found\n");
2011-08-29 07:43:52 +08:00
//mdb_buffer_dump(ipg->cache_value, 0, idx_sz);
2017-12-04 06:20:11 +08:00
memcpy ( & ipg - > cache_value [ compress_bytes - 1 ] , & mdb - > pg_buf [ ipg - > offset ] , ipg - > len ) ;
2011-08-29 07:43:52 +08:00
//mdb_buffer_dump(ipg->cache_value, 0, idx_sz);
2004-02-12 06:05:13 +08:00
} else {
idx_start = ipg - > offset + ( ipg - > len - 4 - idx_sz ) ;
memcpy ( ipg - > cache_value , & mdb - > pg_buf [ idx_start ] , idx_sz ) ;
}
2003-01-10 04:24:19 +08:00
2004-02-12 06:05:13 +08:00
//idx_start = ipg->offset + (ipg->len - 4 - idx_sz);
2005-05-02 20:46:33 +08:00
passed = mdb_index_test_sargs ( mdb , idx , ( char * ) ( ipg - > cache_value ) , idx_sz ) ;
2017-12-04 06:25:35 +08:00
if ( passed ) ipg - > rc = 1 ; else if ( ipg - > rc ) return 0 ;
2003-01-10 04:24:19 +08:00
ipg - > offset + = ipg - > len ;
} while ( ! passed ) ;
//fprintf(stdout,"len = %d pos %d\n", ipg->len, ipg->mask_pos);
2011-08-29 07:43:52 +08:00
//mdb_buffer_dump(mdb->pg_buf, ipg->offset, ipg->len);
2003-01-10 04:24:19 +08:00
return ipg - > len ;
}
2004-02-06 10:34:20 +08:00
/*
* XXX - FIX ME
* This function is grossly inefficient . It scans the entire index building
* an IndexChain to a specific row . We should be checking the index pages
* for matches against the indexed fields to find the proper leaf page , but
* getting it working first and then make it fast !
*/
int
mdb_index_find_row ( MdbHandle * mdb , MdbIndex * idx , MdbIndexChain * chain , guint32 pg , guint16 row )
{
MdbIndexPage * ipg ;
int passed = 0 ;
2005-06-23 15:04:17 +08:00
guint32 pg_row = ( pg < < 8 ) | ( row & 0xff ) ;
guint32 datapg_row ;
2004-02-06 10:34:20 +08:00
ipg = mdb_index_read_bottom_pg ( mdb , idx , chain ) ;
do {
ipg - > len = 0 ;
/*
* if no more rows on this leaf , try to find a new leaf
*/
if ( ! mdb_index_find_next_on_page ( mdb , ipg ) ) {
/* back to top? We're done */
if ( chain - > cur_depth = = 1 )
return 0 ;
/*
* unwind the stack until we find something or reach
* the top .
*/
while ( chain - > cur_depth > 1 ) {
chain - > cur_depth - - ;
if ( ! ( ipg = mdb_find_next_leaf ( mdb , idx , chain ) ) )
return 0 ;
mdb_index_find_next_on_page ( mdb , ipg ) ;
}
if ( chain - > cur_depth = = 1 )
return 0 ;
}
/* test row and pg */
2005-06-23 15:04:17 +08:00
datapg_row = mdb_get_int32_msb ( mdb - > pg_buf , ipg - > offset + ipg - > len - 4 ) ;
if ( pg_row = = datapg_row ) {
2004-02-06 10:34:20 +08:00
passed = 1 ;
}
ipg - > offset + = ipg - > len ;
} while ( ! passed ) ;
/* index chain from root to leaf should now be in "chain" */
return 1 ;
}
2001-07-11 06:35:37 +08:00
void mdb_index_walk ( MdbTableDef * table , MdbIndex * idx )
{
2011-08-29 07:48:44 +08:00
/*
MdbHandle * mdb = table - > entry - > mdb ;
int cur_pos = 0 ;
unsigned char marker ;
MdbColumn * col ;
unsigned int i ;
2001-07-11 06:35:37 +08:00
if ( idx - > num_keys ! = 1 ) return ;
mdb_read_pg ( mdb , idx - > first_pg ) ;
cur_pos = 0xf8 ;
for ( i = 0 ; i < idx - > num_keys ; i + + ) {
marker = mdb - > pg_buf [ cur_pos + + ] ;
col = g_ptr_array_index ( table - > columns , idx - > key_col_num [ i ] - 1 ) ;
2003-01-21 00:04:24 +08:00
//printf("column %d coltype %d col_size %d (%d)\n",i,col->col_type, mdb_col_fixed_size(col), col->col_size);
2001-07-11 06:35:37 +08:00
}
2011-08-29 07:48:44 +08:00
*/
2001-07-11 06:35:37 +08:00
}
2003-01-13 06:59:41 +08:00
void
mdb_index_dump ( MdbTableDef * table , MdbIndex * idx )
2001-04-02 06:10:15 +08:00
{
2004-07-17 22:21:43 +08:00
unsigned int i ;
2003-01-13 06:59:41 +08:00
MdbColumn * col ;
2001-04-02 06:10:15 +08:00
fprintf ( stdout , " index number %d \n " , idx - > index_num ) ;
fprintf ( stdout , " index name %s \n " , idx - > name ) ;
fprintf ( stdout , " index first page %d \n " , idx - > first_pg ) ;
2001-07-11 06:35:37 +08:00
fprintf ( stdout , " index rows %d \n " , idx - > num_rows ) ;
2001-05-23 09:42:46 +08:00
if ( idx - > index_type = = 1 ) fprintf ( stdout , " index is a primary key \n " ) ;
2001-07-11 06:35:37 +08:00
for ( i = 0 ; i < idx - > num_keys ; i + + ) {
col = g_ptr_array_index ( table - > columns , idx - > key_col_num [ i ] - 1 ) ;
2003-01-02 06:29:39 +08:00
fprintf ( stdout , " Column %s(%d) Sorted %s Unique: %s \n " ,
2001-07-11 06:35:37 +08:00
col - > name ,
idx - > key_col_num [ i ] ,
2003-01-02 06:29:39 +08:00
idx - > key_col_order [ i ] = = MDB_ASC ? " ascending " : " descending " ,
idx - > flags & MDB_IDX_UNIQUE ? " Yes " : " No "
2001-07-11 06:35:37 +08:00
) ;
2001-05-23 09:42:46 +08:00
}
2001-07-11 06:35:37 +08:00
mdb_index_walk ( table , idx ) ;
2001-04-02 06:10:15 +08:00
}
2004-02-06 10:34:20 +08:00
/*
* compute_cost tries to assign a cost to a given index using the sargs
* available in this query .
*
* Indexes with no matching sargs are assigned 0
* Unique indexes are preferred over non - uniques
* Operator preference is equal , like , isnull , others
*/
2003-01-21 00:04:24 +08:00
int mdb_index_compute_cost ( MdbTableDef * table , MdbIndex * idx )
{
2004-07-17 22:21:43 +08:00
unsigned int i ;
2003-01-21 00:04:24 +08:00
MdbColumn * col ;
2004-07-03 12:56:07 +08:00
MdbSarg * sarg = NULL ;
2003-01-21 00:04:24 +08:00
int not_all_equal = 0 ;
if ( ! idx - > num_keys ) return 0 ;
if ( idx - > num_keys > 1 ) {
for ( i = 0 ; i < idx - > num_keys ; i + + ) {
col = g_ptr_array_index ( table - > columns , idx - > key_col_num [ i ] - 1 ) ;
2004-02-06 10:34:20 +08:00
if ( col - > sargs ) sarg = g_ptr_array_index ( col - > sargs , 0 ) ;
2003-01-21 00:04:24 +08:00
if ( ! sarg | | sarg - > op ! = MDB_EQUAL ) not_all_equal + + ;
}
}
col = g_ptr_array_index ( table - > columns , idx - > key_col_num [ 0 ] - 1 ) ;
/*
* if this is the first key column and there are no sargs ,
* then this index is useless .
*/
if ( ! col - > num_sargs ) return 0 ;
sarg = g_ptr_array_index ( col - > sargs , 0 ) ;
/*
* a like with a wild card first is useless as a sarg */
if ( sarg - > op = = MDB_LIKE & & sarg - > value . s [ 0 ] = = ' % ' )
return 0 ;
/*
* this needs a lot of tweaking .
*/
if ( idx - > flags & MDB_IDX_UNIQUE ) {
if ( idx - > num_keys = = 1 ) {
//printf("op is %d\n", sarg->op);
switch ( sarg - > op ) {
case MDB_EQUAL :
return 1 ; break ;
case MDB_LIKE :
return 4 ; break ;
case MDB_ISNULL :
return 12 ; break ;
default :
return 8 ; break ;
}
} else {
switch ( sarg - > op ) {
case MDB_EQUAL :
if ( not_all_equal ) return 2 ;
else return 1 ;
break ;
case MDB_LIKE :
return 6 ; break ;
case MDB_ISNULL :
return 12 ; break ;
default :
return 9 ; break ;
}
}
} else {
if ( idx - > num_keys = = 1 ) {
switch ( sarg - > op ) {
case MDB_EQUAL :
return 2 ; break ;
case MDB_LIKE :
return 5 ; break ;
case MDB_ISNULL :
return 12 ; break ;
default :
return 10 ; break ;
}
} else {
switch ( sarg - > op ) {
case MDB_EQUAL :
if ( not_all_equal ) return 3 ;
else return 2 ;
break ;
case MDB_LIKE :
return 7 ; break ;
case MDB_ISNULL :
return 12 ; break ;
default :
return 11 ; break ;
}
}
}
return 0 ;
}
2004-02-06 10:34:20 +08:00
/*
* choose_index runs mdb_index_compute_cost for each available index and picks
* the best .
*
* Returns strategy to use ( table scan , or index scan )
*/
2003-01-21 00:04:24 +08:00
MdbStrategy
mdb_choose_index ( MdbTableDef * table , int * choice )
{
2004-07-09 20:47:04 +08:00
unsigned int i ;
2003-01-21 00:04:24 +08:00
MdbIndex * idx ;
int cost = 0 ;
int least = 99 ;
* choice = - 1 ;
for ( i = 0 ; i < table - > num_idxs ; i + + ) {
idx = g_ptr_array_index ( table - > indices , i ) ;
cost = mdb_index_compute_cost ( table , idx ) ;
//printf("cost for %s is %d\n", idx->name, cost);
if ( cost & & cost < least ) {
least = cost ;
* choice = i ;
}
}
/* and the winner is: *choice */
2003-01-29 07:51:06 +08:00
if ( least = = 99 ) return MDB_TABLE_SCAN ;
2003-01-21 00:04:24 +08:00
return MDB_INDEX_SCAN ;
}
void
mdb_index_scan_init ( MdbHandle * mdb , MdbTableDef * table )
{
int i ;
2004-02-16 10:00:45 +08:00
if ( mdb_get_option ( MDB_USE_INDEX ) & & mdb_choose_index ( table , & i ) = = MDB_INDEX_SCAN ) {
2003-01-21 00:04:24 +08:00
table - > strategy = MDB_INDEX_SCAN ;
table - > scan_idx = g_ptr_array_index ( table - > indices , i ) ;
table - > chain = g_malloc0 ( sizeof ( MdbIndexChain ) ) ;
table - > mdbidx = mdb_clone_handle ( mdb ) ;
mdb_read_pg ( table - > mdbidx , table - > scan_idx - > first_pg ) ;
//printf("best index is %s\n",table->scan_idx->name);
}
2003-01-29 07:51:06 +08:00
//printf("TABLE SCAN? %d\n", table->strategy);
2003-01-21 00:04:24 +08:00
}
void
mdb_index_scan_free ( MdbTableDef * table )
{
if ( table - > chain ) {
g_free ( table - > chain ) ;
table - > chain = NULL ;
}
if ( table - > mdbidx ) {
mdb_close ( table - > mdbidx ) ;
table - > mdbidx = NULL ;
}
}
2004-06-18 13:10:44 +08:00
void mdb_free_indices ( GPtrArray * indices )
{
unsigned int i ;
if ( ! indices ) return ;
for ( i = 0 ; i < indices - > len ; i + + )
g_free ( g_ptr_array_index ( indices , i ) ) ;
g_ptr_array_free ( indices , TRUE ) ;
}