Merge pull request #42 from evanmiller/mdb-exportjson

mdb-json tool
This commit is contained in:
Evan Miller 2020-09-02 12:05:02 -04:00 committed by GitHub
commit b7dd44d0d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 291 additions and 13 deletions

1
.gitignore vendored
View File

@ -39,6 +39,7 @@ src/sql/parser.h
src/util/mdb-array
src/util/mdb-count
src/util/mdb-export
src/util/mdb-exportjson
src/util/mdb-header
src/util/mdb-import
src/util/mdb-parsecsv

View File

@ -165,8 +165,8 @@ before_script:
script:
- ./configure --disable-silent-rules $CONFIGURE_FLAGS
- make
- ./src/util/mdb-array test/data/ASampleDatabase.accdb "Asset Items"
- ./src/util/mdb-array test/data/nwind.mdb "Customers"
- ./src/util/mdb-json test/data/ASampleDatabase.accdb "Asset Items"
- ./src/util/mdb-json test/data/nwind.mdb "Customers"
- ./src/util/mdb-count test/data/ASampleDatabase.accdb "Asset Items"
- ./src/util/mdb-count test/data/nwind.mdb "Customers"
- ./src/util/mdb-prop test/data/ASampleDatabase.accdb "Asset Items"

View File

@ -50,7 +50,8 @@ Provides command line utilities, including:
| ------- | ----------- |
| `mdb-ver` | Prints the version (JET 3 or 4) of an mdb file. |
| `mdb-schema` | Prints DDL for the specified table. |
| `mdb-export` | Export table to CSV format. |
| `mdb-export` | Export table to CSV or SQL formats. |
| `mdb-json` | Export table to JSON format. |
| `mdb-tables` | A simple dump of table names to be used with shell scripts. |
| `mdb-count` | A simple count of number of rows in a table, to be used in shell scripts and ETL pipelines. |
| `mdb-header` | Generates a C header to be used in exporting mdb data to a C prog. |

View File

@ -276,6 +276,8 @@ if test x$enable_gtk_doc = xauto ; then
fi
AM_CONDITIONAL(ENABLE_GTK_DOC, test x$enable_gtk_doc = xyes)
AM_CONDITIONAL(HAVE_GNOME_DOC_UTILS, test x$enable_gtk_doc = xyes)
AM_CONDITIONAL(ENABLE_SK, test x$enable_gtk_doc = xyes)
##################################################
# Check for txt2man

View File

@ -1,15 +1,7 @@
AUTOMAKE_OPTIONS = subdir-objects
SUBDIRS = bash-completion
bin_PROGRAMS = mdb-export mdb-array mdb-schema mdb-tables mdb-parsecsv mdb-header mdb-sql mdb-ver mdb-prop mdb-count mdb-queries
bin_PROGRAMS = mdb-export mdb-array mdb-schema mdb-tables mdb-parsecsv mdb-header mdb-sql mdb-ver mdb-prop mdb-count mdb-queries mdb-json
noinst_PROGRAMS = mdb-import prtable prcat prdata prkkd prdump prole updrow prindex
mdb_export_SOURCES = mdb-export.c
mdb_schema_SOURCES = mdb-schema.c
mdb_tables_SOURCES = mdb-tables.c
mdb_sql_SOURCES = mdb-sql.c
mdb_ver_SOURCES = mdb-ver.c
mdb_import_SOURCES = mdb-import.c
mdb_queries_SOURCES = mdb-queries.c
updrow_SOURCES = updrow.c
LIBS = $(GLIB_LIBS) @LIBS@
DEFS = @DEFS@ -DLOCALEDIR=\"$(localedir)\"
AM_CFLAGS = -I$(top_srcdir)/include $(GLIB_CFLAGS) -Wsign-compare

80
src/util/base64.h Normal file
View File

@ -0,0 +1,80 @@
// https://en.wikibooks.org/wiki/Algorithm_Implementation/Miscellaneous/Base64
#include <inttypes.h>
#include <string.h>
// TODO: split on header and implementation
static int base64encode(const void* data_buf, size_t dataLength, char* result, size_t resultSize)
{
const char base64chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
const uint8_t *data = (const uint8_t *)data_buf;
size_t resultIndex = 0;
size_t x;
uint32_t n = 0;
int padCount = dataLength % 3;
uint8_t n0, n1, n2, n3;
/* increment over the length of the string, three characters at a time */
for (x = 0; x < dataLength; x += 3)
{
/* these three 8-bit (ASCII) characters become one 24-bit number */
n = ((uint32_t)data[x]) << 16; //parenthesis needed, compiler depending on flags can do the shifting before conversion to uint32_t, resulting to 0
if((x+1) < dataLength)
n += ((uint32_t)data[x+1]) << 8;//parenthesis needed, compiler depending on flags can do the shifting before conversion to uint32_t, resulting to 0
if((x+2) < dataLength)
n += data[x+2];
/* this 24-bit number gets separated into four 6-bit numbers */
n0 = (uint8_t)(n >> 18) & 63;
n1 = (uint8_t)(n >> 12) & 63;
n2 = (uint8_t)(n >> 6) & 63;
n3 = (uint8_t)n & 63;
/*
* if we have one byte available, then its encoding is spread
* out over two characters
*/
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = base64chars[n0];
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = base64chars[n1];
/*
* if we have only two bytes available, then their encoding is
* spread out over three chars
*/
if((x+1) < dataLength)
{
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = base64chars[n2];
}
/*
* if we have all three bytes available, then their encoding is spread
* out over four characters
*/
if((x+2) < dataLength)
{
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = base64chars[n3];
}
}
/*
* create and add padding that is required if we did not have a multiple of 3
* number of characters available
*/
if (padCount > 0)
{
for (; padCount < 3; padCount++)
{
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex++] = '=';
}
}
if(resultIndex >= resultSize) return 1; /* indicate failure: buffer too small */
result[resultIndex] = 0;
return 0; /* indicate success */
}

View File

@ -107,7 +107,7 @@ main(int argc, char **argv)
{"row-delimiter", 'R', 0, G_OPTION_ARG_STRING, &row_delimiter, "Specify a row delimiter", "char"},
{"quote", 'q', 0, G_OPTION_ARG_STRING, &quote_char, "Use <char> to wrap text-like fields. Default is double quote.", "char"},
{"backend", 'I', 0, G_OPTION_ARG_STRING, &insert_dialect, "INSERT statements (instead of CSV)", "backend"},
{"date_format", 'D', 0, G_OPTION_ARG_STRING, &date_fmt, "Set the date format (see strftime(3) for details)", "format"},
{"date-format", 'D', 0, G_OPTION_ARG_STRING, &date_fmt, "Set the date format (see strftime(3) for details)", "format"},
{"escape", 'X', 0, G_OPTION_ARG_STRING, &escape_char, "Use <char> to escape quoted characters within a field. Default is doubling.", "format"},
{"namespace", 'N', 0, G_OPTION_ARG_STRING, &namespace, "Prefix identifiers with namespace", "namespace"},
{"null", '0', 0, G_OPTION_ARG_STRING, &null_text, "Use <char> to represent a NULL value", "char"},

202
src/util/mdb-json.c Normal file
View File

@ -0,0 +1,202 @@
/* MDB Tools - A library for reading MS Access database file
* Copyright (C) 2000 Brian Bruns
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include "mdbtools.h"
#include "base64.h"
#undef MDB_BIND_SIZE
#define MDB_BIND_SIZE 200000
#define is_quote_type(x) (x==MDB_TEXT || x==MDB_OLE || x==MDB_MEMO || x==MDB_DATETIME || x==MDB_BINARY || x==MDB_REPID)
#define is_binary_type(x) (x==MDB_OLE || x==MDB_BINARY || x==MDB_REPID)
static char *quote_char = "\"";
static char *escape_char = "\\";
static char *separator_char = ":";
static char *row_start = "{";
static char *row_end = "}\n";
static char *delimiter = ",";
static size_t quote_len = 1; //strlen(quote_char); /* multibyte */
static size_t orig_escape_len = 1; //strlen(escape_char);
static int drop_nonascii = 0;
//#define DONT_ESCAPE_ESCAPE
static void
print_quoted_value(FILE *outfile, char* value, int bin_len) {
fputs(quote_char, outfile);
int is_binary = (bin_len != -1);
while (1) {
if (is_binary) {
if (!bin_len--)
break;
} else /* use \0 sentry */
if (!*value)
break;
if (quote_len && !strncmp(value, quote_char, quote_len)) {
fprintf(outfile, "%s%s", escape_char, quote_char);
value += quote_len;
#ifndef DONT_ESCAPE_ESCAPE
} else if (orig_escape_len && !strncmp(value, escape_char, orig_escape_len)) {
fprintf(outfile, "%s%s", escape_char, escape_char);
value += orig_escape_len;
#endif
} else if ((unsigned char)*value < 0x20) {
if (drop_nonascii) {
putc(' ', outfile);
++value;
} else {
// escape control codes / binary data.
fprintf(outfile, "\\u00%02x", *(unsigned char*)value++);
}
} else {
putc(*value++, outfile);
}
}
fputs(quote_char, outfile);
}
static void
print_binary_value(FILE *outfile, char const * value, int bin_len) {
fputs("{\"$binary\": \"", outfile);
size_t const base64_buf_len = (bin_len / 3 + 1) * 4 + 1;
char * base64_buf = g_malloc(base64_buf_len);
if (base64encode(value, bin_len, base64_buf, base64_buf_len) != 0) {
fprintf(stderr, "Error: Base64 serialization failed.\n");
}
fputs(base64_buf, outfile);
g_free(base64_buf);
fputs("\", \"$type\": \"00\"}", outfile);
}
static void
print_col(FILE *outfile, char* col_name, gchar *col_val, int col_type, int bin_len) {
print_quoted_value(outfile, col_name, -1);
fputs(separator_char, outfile);
if (is_quote_type(col_type)) {
if (is_binary_type(col_type)) {
print_binary_value(outfile, col_val, bin_len);
bin_len = -1;
} else {
print_quoted_value(outfile, col_val, bin_len);
}
} else
fputs(col_val, outfile);
}
int
main(int argc, char **argv)
{
unsigned int i;
MdbHandle *mdb;
MdbTableDef *table;
MdbColumn *col;
char **bound_values;
int *bound_lens;
FILE *outfile = stdout;
char *date_fmt = NULL;
char *value;
size_t length;
GOptionEntry entries[] = {
{"date-format", 'D', 0, G_OPTION_ARG_STRING, &date_fmt, "Set the date format (see strftime(3) for details)", "format"},
{"no-unprintable", 'U', 0, G_OPTION_ARG_NONE, &drop_nonascii, "Change unprintable characters to spaces (otherwise escaped as \\u00XX)", NULL},
{NULL}
};
GError *error = NULL;
GOptionContext *opt_context;
opt_context = g_option_context_new("<file> <table> - export data from Access file to JSON");
g_option_context_add_main_entries(opt_context, entries, NULL /*i18n*/);
if (!g_option_context_parse (opt_context, &argc, &argv, &error))
{
fprintf(stderr, "option parsing failed: %s\n", error->message);
fputs(g_option_context_get_help(opt_context, TRUE, NULL), stderr);
exit (1);
}
if (argc != 3) {
fputs("Wrong number of arguments.\n\n", stderr);
fputs(g_option_context_get_help(opt_context, TRUE, NULL), stderr);
exit(1);
}
if (!(mdb = mdb_open(argv[1], MDB_NOFLAGS))) {
exit(1);
}
if (date_fmt)
mdb_set_date_fmt(mdb, date_fmt);
table = mdb_read_table_by_name(mdb, argv[2], MDB_TABLE);
if (!table) {
fprintf(stderr, "Error: Table %s does not exist in this database.\n", argv[argc-1]);
mdb_close(mdb);
exit(1);
}
/* read table */
mdb_read_columns(table);
mdb_rewind_table(table);
bound_values = (char **) g_malloc(table->num_cols * sizeof(char *));
bound_lens = (int *) g_malloc(table->num_cols * sizeof(int));
for (i=0;i<table->num_cols;i++) {
/* bind columns */
bound_values[i] = (char *) g_malloc0(MDB_BIND_SIZE);
mdb_bind_column(table, i+1, bound_values[i], &bound_lens[i]);
}
while(mdb_fetch_row(table)) {
fputs(row_start, outfile);
int add_delimiter = 0;
for (i=0;i<table->num_cols;i++) {
col=g_ptr_array_index(table->columns,i);
if (bound_lens[i]) {
if (add_delimiter) {
fputs(delimiter, outfile);
add_delimiter = 0;
}
if (col->col_type == MDB_OLE) {
value = mdb_ole_read_full(mdb, col, &length);
} else {
value = bound_values[i];
length = bound_lens[i];
}
print_col(outfile, col->name, value, col->col_type, length);
add_delimiter = 1;
if (col->col_type == MDB_OLE)
free(value);
}
}
fputs(row_end, outfile);
}
/* free the memory used to bind */
for (i=0;i<table->num_cols;i++) {
g_free(bound_values[i]);
}
g_free(bound_values);
g_free(bound_lens);
mdb_free_tabledef(table);
mdb_close(mdb);
return 0;
}