Merge branch 'export-mongodb-json' of github.com:rutsky/mdbtools into mdb-exportjson

This commit is contained in:
Evan Miller 2020-09-02 09:18:52 -04:00
commit a598148afc
5 changed files with 289 additions and 1 deletions

1
.gitignore vendored
View File

@ -39,6 +39,7 @@ src/sql/parser.h
src/util/mdb-array
src/util/mdb-count
src/util/mdb-export
src/util/mdb-exportjson
src/util/mdb-header
src/util/mdb-import
src/util/mdb-parsecsv

View File

@ -276,6 +276,8 @@ if test x$enable_gtk_doc = xauto ; then
fi
AM_CONDITIONAL(ENABLE_GTK_DOC, test x$enable_gtk_doc = xyes)
AM_CONDITIONAL(HAVE_GNOME_DOC_UTILS, test x$enable_gtk_doc = xyes)
AM_CONDITIONAL(ENABLE_SK, test x$enable_gtk_doc = xyes)
##################################################
# Check for txt2man

View File

@ -1,6 +1,6 @@
AUTOMAKE_OPTIONS = subdir-objects
SUBDIRS = bash-completion
bin_PROGRAMS = mdb-export mdb-array mdb-schema mdb-tables mdb-parsecsv mdb-header mdb-sql mdb-ver mdb-prop mdb-count mdb-queries
bin_PROGRAMS = mdb-export mdb-array mdb-schema mdb-tables mdb-parsecsv mdb-header mdb-sql mdb-ver mdb-prop mdb-count mdb-queries mdb-exportjson
noinst_PROGRAMS = mdb-import prtable prcat prdata prkkd prdump prole updrow prindex
mdb_export_SOURCES = mdb-export.c
mdb_schema_SOURCES = mdb-schema.c

80
src/util/base64.h Normal file
View File

@ -0,0 +1,80 @@
// https://en.wikibooks.org/wiki/Algorithm_Implementation/Miscellaneous/Base64
#include <inttypes.h>
#include <string.h>
// TODO: split on header and implementation
static int base64encode(const void* data_buf, size_t dataLength, char* result, size_t resultSize)
{
const char base64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
const uint8_t *data = (const uint8_t *)data_buf;
size_t resultIndex = 0;
size_t x;
uint32_t n = 0;
int padCount = dataLength % 3;
uint8_t n0, n1, n2, n3;
/* increment over the length of the string, three characters at a time */
for (x = 0; x < dataLength; x += 3)
{
/* these three 8-bit (ASCII) characters become one 24-bit number */
n = ((uint32_t)data[x]) << 16; //parenthesis needed, compiler depending on flags can do the shifting before conversion to uint32_t, resulting to 0
if((x+1) < dataLength)
n += ((uint32_t)data[x+1]) << 8;//parenthesis needed, compiler depending on flags can do the shifting before conversion to uint32_t, resulting to 0
if((x+2) < dataLength)
n += data[x+2];
/* this 24-bit number gets separated into four 6-bit numbers */
n0 = (uint8_t)(n >> 18) & 63;
n1 = (uint8_t)(n >> 12) & 63;
n2 = (uint8_t)(n >> 6) & 63;
n3 = (uint8_t)n & 63;
/*
* if we have one byte available, then its encoding is spread
* out over two characters
*/
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = base64chars[n0];
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = base64chars[n1];
/*
* if we have only two bytes available, then their encoding is
* spread out over three chars
*/
if((x+1) < dataLength)
{
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = base64chars[n2];
}
/*
* if we have all three bytes available, then their encoding is spread
* out over four characters
*/
if((x+2) < dataLength)
{
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = base64chars[n3];
}
}
/*
* create and add padding that is required if we did not have a multiple of 3
* number of characters available
*/
if (padCount > 0)
{
for (; padCount < 3; padCount++)
{
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = '=';
}
}
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex] = 0;
return 0; /* indicate success */
}

205
src/util/mdb-exportjson.c Normal file
View File

@ -0,0 +1,205 @@
/* MDB Tools - A library for reading MS Access database file
* Copyright (C) 2000 Brian Bruns
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "mdbtools.h"
#include "base64.h"
#ifdef DMALLOC
#include "dmalloc.h"
#endif
#undef MDB_BIND_SIZE
#define MDB_BIND_SIZE 200000
#define is_quote_type(x) (x==MDB_TEXT || x==MDB_OLE || x==MDB_MEMO || x==MDB_DATETIME || x==MDB_BINARY || x==MDB_REPID)
#define is_binary_type(x) (x==MDB_OLE || x==MDB_BINARY || x==MDB_REPID)
static char *quote_char = "\"";
static char *escape_char = "\\";
static char *separator_char = ":";
static char *row_start = "{";
static char *row_end = "}\n";
static char *delimiter = ",";
static size_t quote_len = 1; //strlen(quote_char); /* multibyte */
static size_t orig_escape_len = 1; //strlen(escape_char);
static int drop_nonascii;
//#define DONT_ESCAPE_ESCAPE
static void
print_quoted_value(FILE *outfile, char* value, int bin_len) {
fputs(quote_char, outfile);
int is_binary = (bin_len != -1);
while (1) {
if (is_binary) {
if (!bin_len--)
break;
} else /* use \0 sentry */
if (!*value)
break;
if (quote_len && !strncmp(value, quote_char, quote_len)) {
fprintf(outfile, "%s%s", escape_char, quote_char);
value += quote_len;
#ifndef DONT_ESCAPE_ESCAPE
} else if (orig_escape_len && !strncmp(value, escape_char, orig_escape_len)) {
fprintf(outfile, "%s%s", escape_char, escape_char);
value += orig_escape_len;
#endif
} else if ((unsigned char)*value < 0x20) {
if (drop_nonascii) {
putc(' ', outfile);
++value;
} else {
// escape control codes / binary data.
fprintf(outfile, "\\u00%02x", *(unsigned char*)value++);
}
} else {
putc(*value++, outfile);
}
}
fputs(quote_char, outfile);
}
static void
print_binary_value(FILE *outfile, char const * value, int bin_len) {
fputs("{\"$binary\": \"", outfile);
size_t const base64_buf_len = (bin_len / 3 + 1) * 4 + 1;
char * base64_buf = g_malloc(base64_buf_len);
if (base64encode(value, bin_len, base64_buf, base64_buf_len) != 0) {
fprintf(stderr, "Error: Base64 serialization failed.\n");
}
fputs(base64_buf, outfile);
g_free(base64_buf);
fputs("\", \"$type\": \"00\"}", outfile);
}
static void
print_col(FILE *outfile, char* col_name, gchar *col_val, int col_type, int bin_len) {
print_quoted_value(outfile, col_name, -1);
fputs(separator_char, outfile);
if (is_quote_type(col_type)) {
if (is_binary_type(col_type)) {
print_binary_value(outfile, col_val, bin_len);
bin_len = -1;
} else {
print_quoted_value(outfile, col_val, bin_len);
}
} else
fputs(col_val, outfile);
}
int
main(int argc, char **argv)
{
unsigned int i;
MdbHandle *mdb;
MdbTableDef *table;
MdbColumn *col;
char **bound_values;
int *bound_lens;
FILE *outfile = stdout;
drop_nonascii = 0;
int opt;
char *value;
size_t length;
while ((opt=getopt(argc, argv, "AD:"))!=-1) {
switch (opt) {
case 'A':
drop_nonascii = 1;
break;
case 'D':
mdb_set_date_fmt(optarg);
break;
default:
break;
}
}
/*
** optind is now the position of the first non-option arg,
** see getopt(3)
*/
if (argc-optind < 2) {
fprintf(stderr,"Usage: %s [options] <file> <table>\n",argv[0]);
fprintf(stderr,"where options are:\n");
fprintf(stderr," -D <format> set the date format (see strftime(3) for details)\n");
fprintf(stderr," -A drop non ascii characters in non-binary fields\n");
exit(1);
}
if (!(mdb = mdb_open(argv[optind], MDB_NOFLAGS))) {
exit(1);
}
table = mdb_read_table_by_name(mdb, argv[argc-1], MDB_TABLE);
if (!table) {
fprintf(stderr, "Error: Table %s does not exist in this database.\n", argv[argc-1]);
mdb_close(mdb);
exit(1);
}
/* read table */
mdb_read_columns(table);
mdb_rewind_table(table);
bound_values = (char **) g_malloc(table->num_cols * sizeof(char *));
bound_lens = (int *) g_malloc(table->num_cols * sizeof(int));
for (i=0;i<table->num_cols;i++) {
/* bind columns */
bound_values[i] = (char *) g_malloc0(MDB_BIND_SIZE);
mdb_bind_column(table, i+1, bound_values[i], &bound_lens[i]);
}
while(mdb_fetch_row(table)) {
fputs(row_start, outfile);
int add_delimiter = 0;
for (i=0;i<table->num_cols;i++) {
col=g_ptr_array_index(table->columns,i);
if (bound_lens[i]) {
if (add_delimiter) {
fputs(delimiter, outfile);
add_delimiter = 0;
}
if (col->col_type == MDB_OLE) {
value = mdb_ole_read_full(mdb, col, &length);
} else {
value = bound_values[i];
length = bound_lens[i];
}
print_col(outfile, col->name, value, col->col_type, length);
add_delimiter = 1;
if (col->col_type == MDB_OLE)
free(value);
}
}
fputs(row_end, outfile);
}
/* free the memory used to bind */
for (i=0;i<table->num_cols;i++) {
g_free(bound_values[i]);
}
g_free(bound_values);
g_free(bound_lens);
mdb_free_tabledef(table);
mdb_close(mdb);
return 0;
}