Better compressed text handling

This commit is contained in:
whydoubt
2004-12-11 06:07:20 +00:00
parent fa8d24dd2b
commit d271b5fae5
10 changed files with 192 additions and 89 deletions

View File

@@ -634,7 +634,7 @@ static char *mdb_memo_to_string(MdbHandle *mdb, int start, int size)
pg_row & 0xff, row_start, len);
buffer_dump(mdb->pg_buf, row_start, row_start + len);
#endif
mdb_unicode2ascii(mdb, buf, row_start, len, text, MDB_BIND_SIZE);
mdb_unicode2ascii(mdb, buf + row_start, len, text, MDB_BIND_SIZE);
return text;
} else { /* if (memo_flags == 0x0000) { */
pg_row = mdb_get_int32(mdb->pg_buf, start+4);
@@ -737,7 +737,7 @@ char *mdb_col_to_string(MdbHandle *mdb, unsigned char *buf, int start, int datat
if (size<0) {
return "";
}
mdb_unicode2ascii(mdb, mdb->pg_buf, start, size, text, MDB_BIND_SIZE);
mdb_unicode2ascii(mdb, mdb->pg_buf + start, size, text, MDB_BIND_SIZE);
return text;
break;
case MDB_SDATETIME:

View File

@@ -23,86 +23,159 @@
#include "dmalloc.h"
#endif
/*
* This function is used in reading text data from an MDB table.
*/
int
mdb_unicode2ascii(MdbHandle *mdb, unsigned char *buf, int offset, unsigned int len, char *dest, unsigned int dest_sz)
mdb_unicode2ascii(MdbHandle *mdb, unsigned char *src, unsigned int slen, unsigned char *dest, unsigned int dlen)
{
unsigned int i, ret;
int len_in, len_out;
unsigned char *in_ptr, *out_ptr;
unsigned char *tmp = NULL;
unsigned int tlen = 0;
int len_in, len_out;
char *in_ptr, *out_ptr;
in_ptr = &buf[offset];
out_ptr = dest;
len_in = len;
len_out = dest_sz;
if ((!src) || (!dest))
return 0;
/* Uncompress 'Unicode Compressed' string into tmp */
if (IS_JET4(mdb) && (slen>=2) && (src[0]==0xff) && (src[1]==0xfe)) {
unsigned int compress=1;
src += 2;
slen -= 2;
tmp = (char *)g_malloc(slen*2);
while (slen) {
if (*src == 0) {
compress = (compress) ? 0 : 1;
src++;
slen--;
} else if (compress) {
tmp[tlen++] = *src++;
tmp[tlen++] = 0;
slen--;
} else if (slen >= 2){
tmp[tlen++] = *src++;
tmp[tlen++] = *src++;
slen-=2;
}
}
}
if (buf[offset]==0xff && buf[offset+1]==0xfe) {
len_in -= 2;
in_ptr += 2;
ret = iconv(mdb->iconv_compress, (char **)&in_ptr, &len_in, (char **)&out_ptr, &len_out);
dest[dest_sz - len_out]='\0';
return dest_sz - len_out;
//strncpy(dest, in_ptr+2, len-2);
//dest[len-2]='\0';
in_ptr = (tmp) ? tmp : src;
out_ptr = dest;
len_in = (tmp) ? tlen : slen;
len_out = dlen;
#if HAVE_ICONV
//printf("1 len_in %d len_out %d\n",len_in, len_out);
while (1) {
iconv(mdb->iconv_in, &in_ptr, &len_in, &out_ptr, &len_out);
if (!len_in) break;
/* Don't bail if impossible conversion is encountered */
in_ptr += (IS_JET4(mdb)) ? 2 : 1;
len_in -= (IS_JET4(mdb)) ? 2 : 1;
*out_ptr++ = '?';
len_out--;
}
//printf("2 len_in %d len_out %d\n",len_in, len_out);
dlen -= len_out;
#else
if (IS_JET3(mdb)) {
strncpy(out_ptr, in_ptr, len_in);
dlen = len_in;
} else {
#ifdef HAVE_ICONV
if (mdb->iconv_in) {
//printf("1 len_in %d len_out %d\n",len_in, len_out);
ret = iconv(mdb->iconv_in, (char **)&in_ptr, &len_in, (char **)&out_ptr, &len_out);
//printf("2 len_in %d len_out %d\n",len_in, len_out);
dest[dest_sz - len_out]='\0';
//printf("dest %s\n",dest);
return dest_sz - len_out;
}
/* rough UCS-2LE to ISO-8859-1 conversion */
unsigned int i;
for (i=0; i<len_in; i+=2)
dest[i/2] = (in_ptr[i+1] == 0) ? in_ptr[i] : '?';
dlen = len_in/2;
}
#endif
/* convert unicode to ascii, rather sloppily */
for (i=0;i<len;i+=2)
dest[i/2] = in_ptr[i];
dest[len/2]='\0';
}
return len;
if (tmp) g_free(tmp);
dest[dlen]='\0';
//printf("dest %s\n",dest);
return dlen;
}
/*
* This function is used in writing text data to an MDB table.
* If slen is 0, strlen will be used to calculate src's length.
*/
int
mdb_ascii2unicode(MdbHandle *mdb, unsigned char *buf, int offset, unsigned int len, char *dest, unsigned int dest_sz)
mdb_ascii2unicode(MdbHandle *mdb, unsigned char *src, unsigned int slen, unsigned char *dest, unsigned int dlen)
{
unsigned int i = 0, ret;
size_t len_in, len_out, len_orig;
size_t len_in, len_out;
char *in_ptr, *out_ptr;
in_ptr = &buf[offset];
out_ptr = dest;
len_orig = strlen(in_ptr);
len_in = len_orig;
len_out = dest_sz;
if ((!src) || (!dest))
return 0;
if (!buf) return 0;
in_ptr = src;
out_ptr = dest;
len_in = (slen) ? slen : strlen(src);
len_out = dlen;
#ifdef HAVE_ICONV
if (mdb->iconv_out) {
ret = iconv(mdb->iconv_out, &in_ptr, &len_in, &out_ptr, &len_out);
//printf("len_in %d len_out %d\n",len_in, len_out);
dest[dest_sz - len_out]='\0';
dest[dest_sz - len_out + 1]='\0';
return dest_sz - len_out;
iconv(mdb->iconv_out, &in_ptr, &len_in, &out_ptr, &len_out);
//printf("len_in %d len_out %d\n", len_in, len_out);
dlen -= len_out;
#else
if (IS_JET3(mdb)) {
dlen = MIN(len_in, len_out);
strncpy(out_ptr, in_ptr, dlen);
} else {
unsigned int i;
slen = MIN(len_in, len_out/2);
dlen = slen*2;
for (i=0; i<slen; i++) {
out_ptr[i*2] = in_ptr[i];
out_ptr[i*2+1] = 0;
}
}
#endif
if (IS_JET3(mdb)) {
strncpy(dest, in_ptr, len);
dest[len]='\0';
return strlen(dest);
/* Unicode Compression */
if(IS_JET4(mdb) && (dlen>4)) {
char *tmp = g_malloc(dlen);
int tptr = 0, dptr = 0;
int comp = 1;
tmp[tptr++] = 0xff;
tmp[tptr++] = 0xfe;
while((dptr < dlen) && (tptr < dlen)) {
if (((dest[dptr+1]==0) && (comp==0))
|| ((dest[dptr+1]!=0) && (comp==1))) {
/* switch encoding mode */
tmp[tptr++] = 0;
comp = (comp) ? 0 : 1;
} else if (dest[dptr]==0) {
/* this string cannot be compressed */
tptr = dlen;
} else if (comp==1) {
/* encode compressed character */
tmp[tptr++] = dest[dptr];
dptr += 2;
} else if (tptr+1 < dlen) {
/* encode uncompressed character */
tmp[tptr++] = dest[dptr];
tmp[tptr++] = dest[dptr+1];
dptr += 2;
} else {
/* could not encode uncompressed character
* into single byte */
tptr = dlen;
}
}
if (tptr < dlen) {
memcpy(dest, tmp, tptr);
dlen = tptr;
}
g_free(tmp);
}
while (i<strlen(in_ptr) && (i*2+2)<len) {
dest[i*2] = in_ptr[i];
dest[i*2+1] = 0;
i++;
}
return (i*2);
return dlen;
}
void mdb_iconv_init(MdbHandle *mdb)
{
char *iconv_code;
@@ -112,21 +185,20 @@ void mdb_iconv_init(MdbHandle *mdb)
iconv_code="UTF-8";
}
#ifdef HAVE_ICONV
if (IS_JET4(mdb)) {
mdb->iconv_out = iconv_open("UCS-2LE", iconv_code);
mdb->iconv_in = iconv_open(iconv_code, "UCS-2LE");
mdb->iconv_compress = iconv_open(iconv_code, "ISO8859-1");
} else {
/* ToDO - need to determine character set from file */
/* But according to MS kb289525 and kb202427, there is not such info in jet3 db */
/* According to Microsoft Knowledge Base pages 289525 and */
/* 202427, code page info is not contained in the database */
char *jet3_iconv_code;
/* check environment variable */
if (!(jet3_iconv_code=(char *)getenv("MDB_JET3_CHARSET"))) {
jet3_iconv_code="ISO8859-1";
jet3_iconv_code="CP1252";
}
mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code);
mdb->iconv_in = iconv_open(iconv_code, jet3_iconv_code);
}
@@ -137,8 +209,5 @@ void mdb_iconv_close(MdbHandle *mdb)
#ifdef HAVE_ICONV
if (mdb->iconv_out != (iconv_t)-1) iconv_close(mdb->iconv_out);
if (mdb->iconv_in != (iconv_t)-1) iconv_close(mdb->iconv_in);
if (IS_JET4(mdb)) {
if (mdb->iconv_compress != (iconv_t)-1) iconv_close(mdb->iconv_compress);
}
#endif
}

View File

@@ -111,7 +111,7 @@ mdb_read_indices(MdbTableDef *table)
tmpbuf = g_malloc(name_sz);
read_pg_if_n(mdb, tmpbuf, &cur_pos, name_sz);
cur_pos += name_sz;
mdb_unicode2ascii(mdb, tmpbuf, 0, name_sz, pidx->name, name_sz);
mdb_unicode2ascii(mdb, tmpbuf, name_sz, pidx->name, MDB_MAX_OBJ_NAME);
g_free(tmpbuf);
//fprintf(stderr, "index name %s\n", pidx->name);
}

View File

@@ -154,7 +154,7 @@ int lastchar;
return mdb_test_int(node, (gint32)mdb_get_int32(field->value, 0));
break;
case MDB_TEXT:
mdb_unicode2ascii(mdb, field->value, 0, field->siz, tmpbuf, 256);
mdb_unicode2ascii(mdb, field->value, field->siz, tmpbuf, 256);
return mdb_test_string(node, tmpbuf);
default:
fprintf(stderr, "Calling mdb_test_sarg on unknown type. Add code to mdb_test_sarg() for type %d\n",col->col_type);

View File

@@ -269,9 +269,9 @@ GPtrArray *mdb_read_columns(MdbTableDef *table)
** column names - ordered the same as the column attributes table
*/
for (i=0;i<table->num_cols;i++) {
char *tmp_buf;
pcol = g_ptr_array_index(table->columns, i);
char *tmp_buf;
if (IS_JET4(mdb)) {
name_sz = read_pg_if_16(mdb, &cur_pos);
cur_pos += 2;
@@ -285,7 +285,7 @@ GPtrArray *mdb_read_columns(MdbTableDef *table)
}
tmp_buf = (char *) g_malloc(name_sz);
read_pg_if_n(mdb, tmp_buf, &cur_pos, name_sz);
mdb_unicode2ascii(mdb, tmp_buf, 0, name_sz, pcol->name, name_sz);
mdb_unicode2ascii(mdb, tmp_buf, name_sz, pcol->name, MDB_MAX_OBJ_NAME);
g_free(tmp_buf);
cur_pos += name_sz;

View File

@@ -27,7 +27,7 @@
#include "connectparams.h"
static char software_version[] = "$Id: odbc.c,v 1.26 2004/11/27 18:18:55 whydoubt Exp $";
static char software_version[] = "$Id: odbc.c,v 1.27 2004/12/11 06:07:22 whydoubt Exp $";
static void *no_unused_var_warn[] = {software_version,
no_unused_var_warn};
@@ -1081,9 +1081,9 @@ SQLRETURN SQL_API SQLColumns(
for (j=0; j<table->num_cols; j++) {
col = g_ptr_array_index(table->columns, j);
ts2 = mdb_ascii2unicode(mdb, table->name, 0, 100, t2, MDB_BIND_SIZE);
ts3 = mdb_ascii2unicode(mdb, col->name, 0, 100, t3, MDB_BIND_SIZE);
ts5 = mdb_ascii2unicode(mdb, "FIX ME", 0, 100, t5, MDB_BIND_SIZE);
ts2 = mdb_ascii2unicode(mdb, table->name, 0, t2, MDB_BIND_SIZE);
ts3 = mdb_ascii2unicode(mdb, col->name, 0, t3, MDB_BIND_SIZE);
ts5 = mdb_ascii2unicode(mdb, "FIX ME", 0, t5, MDB_BIND_SIZE);
nullable = SQL_NO_NULLS;
datatype = _odbc_get_client_type(col->col_type);
sqldatatype = _odbc_get_client_type(col->col_type);
@@ -1384,11 +1384,11 @@ SQLRETURN SQL_API SQLGetTypeInfo(
if (fSqlType && (fSqlType != type_info[i].data_type))
continue;
ts0 = mdb_ascii2unicode(mdb, type_info[i].type_name, 0, 100, t0, MDB_BIND_SIZE);
ts3 = mdb_ascii2unicode(mdb, type_info[i].literal_prefix, 0, 100, t3, MDB_BIND_SIZE);
ts4 = mdb_ascii2unicode(mdb, type_info[i].literal_suffix, 0, 100, t4, MDB_BIND_SIZE);
ts5 = mdb_ascii2unicode(mdb, type_info[i].create_params, 0, 100, t5, MDB_BIND_SIZE);
ts12 = mdb_ascii2unicode(mdb, type_info[i].local_type_name, 0, 100, t12, MDB_BIND_SIZE);
ts0 = mdb_ascii2unicode(mdb, type_info[i].type_name, 0, t0, MDB_BIND_SIZE);
ts3 = mdb_ascii2unicode(mdb, type_info[i].literal_prefix, 0, t3, MDB_BIND_SIZE);
ts4 = mdb_ascii2unicode(mdb, type_info[i].literal_suffix, 0, t4, MDB_BIND_SIZE);
ts5 = mdb_ascii2unicode(mdb, type_info[i].create_params, 0, t5, MDB_BIND_SIZE);
ts12 = mdb_ascii2unicode(mdb, type_info[i].local_type_name, 0, t12, MDB_BIND_SIZE);
FILL_FIELD(&fields[0], t0, ts0);
FILL_FIELD(&fields[1],&type_info[i].data_type, 0);
@@ -1542,8 +1542,8 @@ SQLRETURN SQL_API SQLTables(
FILL_FIELD(&fields[j], NULL, 0);
}
ts2 = mdb_ascii2unicode(mdb, entry->object_name, 0, 100, t2, MDB_BIND_SIZE);
ts3 = mdb_ascii2unicode(mdb, table_types[ttype], 0, 100, t3, MDB_BIND_SIZE);
ts2 = mdb_ascii2unicode(mdb, entry->object_name, 0, t2, MDB_BIND_SIZE);
ts3 = mdb_ascii2unicode(mdb, table_types[ttype], 0, t3, MDB_BIND_SIZE);
FILL_FIELD(&fields[2], t2, ts2);
FILL_FIELD(&fields[3], t3, ts3);

View File

@@ -540,7 +540,7 @@ void mdb_sql_listtables(MdbSQL *sql)
entry = g_ptr_array_index (mdb->catalog, i);
if (mdb_is_user_table(entry)) {
//col = g_ptr_array_index(table->columns,0);
tmpsiz = mdb_ascii2unicode(mdb, entry->object_name, 0, strlen(entry->object_name), tmpstr, 100);
tmpsiz = mdb_ascii2unicode(mdb, entry->object_name, 0, tmpstr, 100);
mdb_fill_temp_field(&fields[0],tmpstr, tmpsiz, 0,0,0,0);
row_size = mdb_pack_row(ttable, row_buffer, 1, fields);
mdb_add_row_to_pg(ttable,row_buffer, row_size);
@@ -605,15 +605,15 @@ void mdb_sql_describe_table(MdbSQL *sql)
for (i=0;i<table->num_cols;i++) {
col = g_ptr_array_index(table->columns,i);
tmpsiz = mdb_ascii2unicode(mdb, col->name, 0, strlen(col->name), col_name, 100);
tmpsiz = mdb_ascii2unicode(mdb, col->name, 0, col_name, 100);
mdb_fill_temp_field(&fields[0],col_name, tmpsiz, 0,0,0,0);
strcpy(tmpstr, mdb_get_coltype_string(mdb->default_backend, col->col_type));
tmpsiz = mdb_ascii2unicode(mdb, tmpstr, 0, strlen(col->name), col_type, 100);
tmpsiz = mdb_ascii2unicode(mdb, tmpstr, 0, col_type, 100);
mdb_fill_temp_field(&fields[1],col_type, tmpsiz, 0,0,0,1);
sprintf(tmpstr,"%d",col->col_size);
tmpsiz = mdb_ascii2unicode(mdb, tmpstr, 0, strlen(tmpstr), col_size, 100);
tmpsiz = mdb_ascii2unicode(mdb, tmpstr, 0, col_size, 100);
mdb_fill_temp_field(&fields[2],col_size, tmpsiz, 0,0,0,2);
row_size = mdb_pack_row(ttable, row_buffer, 3, fields);