Clarify mdb_unicode2ascii API and fix buffer overrun (#220)

There was some confusion as to whether the destination buffer length
should include space for the null terminator. Some callers of the
function assumed that a terminator would be added beyond the end
of the stated buffer size, while others did not. Make everything
consistent and also fix an overrun when there was insufficient
space for the output in the non-iconv implementation.

As stated in a code comment, a better solution would follow the lead
of libxls and use wcstombs and friends when iconv is not available.
But this gets into the weeds with conversion functions named differently
across platforms. The goal here is to fix the buffer overrun.

See oss-fuzz/28773
This commit is contained in:
Evan Miller
2020-12-19 08:24:32 -05:00
committed by GitHub
parent 7ce75142e8
commit 1b96ef4b0d
4 changed files with 20 additions and 12 deletions

View File

@@ -25,6 +25,9 @@
/*
* This function is used in reading text data from an MDB table.
* 'dest' will receive a converted, null-terminated string.
* dlen is the available size of the destination buffer.
* Returns the length of the converted string, not including the terminator.
*/
int
mdb_unicode2ascii(MdbHandle *mdb, const char *src, size_t slen, char *dest, size_t dlen)
@@ -58,6 +61,8 @@ mdb_unicode2ascii(MdbHandle *mdb, const char *src, size_t slen, char *dest, size
tmp[tlen++] = *src++;
tmp[tlen++] = *src++;
slen-=2;
} else { // Odd # of bytes
break;
}
}
}
@@ -65,7 +70,7 @@ mdb_unicode2ascii(MdbHandle *mdb, const char *src, size_t slen, char *dest, size
in_ptr = (tmp) ? tmp : src;
out_ptr = dest;
len_in = (tmp) ? tlen : slen;
len_out = dlen;
len_out = dlen - 1;
#if HAVE_ICONV
//printf("1 len_in %d len_out %d\n",len_in, len_out);
@@ -86,22 +91,25 @@ mdb_unicode2ascii(MdbHandle *mdb, const char *src, size_t slen, char *dest, size
len_out--;
}
//printf("2 len_in %d len_out %d\n",len_in, len_out);
dlen -= len_out;
dlen -= len_out + 1;
dest[dlen] = '\0';
#else
if (IS_JET3(mdb)) {
dlen = MIN(len_in, len_out);
strncpy(out_ptr, in_ptr, dlen);
int count = 0;
snprintf(out_ptr, dlen, "%.*s%n", (int)len_in, src, &count);
dlen = count;
} else {
/* rough UCS-2LE to ISO-8859-1 conversion */
/* wcstombs would be better; see libxls implementation for
* a multi-platform solution */
unsigned int i;
for (i=0; i<len_in; i+=2)
dest[i/2] = (in_ptr[i+1] == 0) ? in_ptr[i] : '?';
dlen = len_in/2;
for (i=0; 2*i+1<len_in && i<dlen-1; i++)
dest[i] = (in_ptr[2*i+1] == 0) ? in_ptr[2*i] : '?';
dest[(dlen=i)] = '\0';
}
#endif
if (tmp) g_free(tmp);
dest[dlen]='\0';
//printf("dest %s\n",dest);
return dlen;
}

View File

@@ -40,7 +40,7 @@ mdb_read_props_list(MdbHandle *mdb, gchar *kkd, int len)
mdb_buffer_dump(kkd, pos - 2, record_len + 2);
}
name = g_malloc(3*record_len + 1); /* worst case scenario is 3 bytes out per byte in */
mdb_unicode2ascii(mdb, &kkd[pos], record_len, name, 3*record_len);
mdb_unicode2ascii(mdb, &kkd[pos], record_len, name, 3*record_len + 1);
pos += record_len;
g_ptr_array_add(names, name);
@@ -107,7 +107,7 @@ mdb_read_props(MdbHandle *mdb, GPtrArray *names, gchar *kkd, int len)
props = mdb_alloc_props();
if (name_len) {
props->name = g_malloc(3*name_len + 1);
mdb_unicode2ascii(mdb, kkd+pos, name_len, props->name, 3*name_len);
mdb_unicode2ascii(mdb, kkd+pos, name_len, props->name, 3*name_len + 1);
mdb_debug(MDB_DEBUG_PROPS,"prop block named: %s", props->name);
}
pos += name_len;

View File

@@ -237,7 +237,7 @@ mdb_test_sarg(MdbHandle *mdb, MdbColumn *col, MdbSargNode *node, MdbField *field
ret = mdb_test_double(node->op, node->value.d, mdb_get_double(field->value, 0));
break;
case MDB_TEXT:
mdb_unicode2ascii(mdb, field->value, field->siz, tmpbuf, 256);
mdb_unicode2ascii(mdb, field->value, field->siz, tmpbuf, sizeof(tmpbuf));
ret = mdb_test_string(node, tmpbuf);
break;
case MDB_MEMO:

View File

@@ -306,7 +306,7 @@ GPtrArray *mdb_read_columns(MdbTableDef *table)
name_sz = read_pg_if_16(mdb, &cur_pos);
tmp_buf = (char *) g_malloc(name_sz);
read_pg_if_n(mdb, tmp_buf, &cur_pos, name_sz);
mdb_unicode2ascii(mdb, tmp_buf, name_sz, pcol->name, MDB_MAX_OBJ_NAME);
mdb_unicode2ascii(mdb, tmp_buf, name_sz, pcol->name, sizeof(pcol->name));
g_free(tmp_buf);
}