From 5057419da9ca0c1938c79793c2149f9692a95d4c Mon Sep 17 00:00:00 2001 From: Ugo Di Girolamo Date: Wed, 21 May 2014 18:17:55 -0400 Subject: [PATCH] Add new binary util - mdb-exportjson A new binary util for exporting mdb rows as json lines. This is very useful for importing mdb data to mongo in combination with mongoimport. The code is vastly similar to the mdb-export but there are enough dissimilarities and disagreement about flag values that I thought a separate util is warranted. --- configure.ac | 2 + src/util/Makefile.am | 2 +- src/util/mdb-exportjson.c | 186 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 src/util/mdb-exportjson.c diff --git a/configure.ac b/configure.ac index 6a04fe3..22ace6d 100644 --- a/configure.ac +++ b/configure.ac @@ -196,6 +196,8 @@ if test x$enable_gtk_doc = xauto ; then fi AM_CONDITIONAL(ENABLE_GTK_DOC, test x$enable_gtk_doc = xyes) +AM_CONDITIONAL(HAVE_GNOME_DOC_UTILS, test x$enable_gtk_doc = xyes) +AM_CONDITIONAL(ENABLE_SK, test x$enable_gtk_doc = xyes) ################################################## # Check for txt2man diff --git a/src/util/Makefile.am b/src/util/Makefile.am index 084044a..ed39eee 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -1,4 +1,4 @@ -bin_PROGRAMS = mdb-export mdb-array mdb-schema mdb-tables mdb-parsecsv mdb-header mdb-sql mdb-ver mdb-prop +bin_PROGRAMS = mdb-export mdb-array mdb-schema mdb-tables mdb-parsecsv mdb-header mdb-sql mdb-ver mdb-prop mdb-exportjson noinst_PROGRAMS = mdb-import prtable prcat prdata prkkd prdump prole updrow prindex LIBS = $(GLIB_LIBS) @LIBS@ @LEXLIB@ DEFS = @DEFS@ -DLOCALEDIR=\"$(localedir)\" diff --git a/src/util/mdb-exportjson.c b/src/util/mdb-exportjson.c new file mode 100644 index 0000000..8cb675a --- /dev/null +++ b/src/util/mdb-exportjson.c @@ -0,0 +1,186 @@ +/* MDB Tools - A library for reading MS Access database file + * Copyright (C) 2000 Brian Bruns + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "mdbtools.h" + +#ifdef DMALLOC +#include "dmalloc.h" +#endif + +#undef MDB_BIND_SIZE +#define MDB_BIND_SIZE 200000 + +#define is_quote_type(x) (x==MDB_TEXT || x==MDB_OLE || x==MDB_MEMO || x==MDB_DATETIME || x==MDB_BINARY || x==MDB_REPID) +#define is_binary_type(x) (x==MDB_OLE || x==MDB_BINARY || x==MDB_REPID) + +static char *quote_char = "\""; +static char *escape_char = "\\"; +static char *separator_char = ":"; +static char *row_start = "{"; +static char *row_end = "}\n"; +static char *delimiter = ","; +static size_t quote_len = 1; //strlen(quote_char); /* multibyte */ +static size_t orig_escape_len = 1; //strlen(escape_char); +static int drop_nonascii; + + +//#define DONT_ESCAPE_ESCAPE +static void +print_quoted_value(FILE *outfile, char* value, int bin_len) { + fputs(quote_char, outfile); + int is_binary = (bin_len != -1); + while (1) { + if (is_binary) { + if (!bin_len--) + break; + } else /* use \0 sentry */ + if (!*value) + break; + + if (quote_len && !strncmp(value, quote_char, quote_len)) { + fprintf(outfile, "%s%s", escape_char, quote_char); + value += quote_len; +#ifndef DONT_ESCAPE_ESCAPE + } else if (orig_escape_len && !strncmp(value, escape_char, orig_escape_len)) { + fprintf(outfile, "%s%s", escape_char, escape_char); + value += orig_escape_len; +#endif + } else if (*value < 0x20) { + if (!is_binary || drop_nonascii) { + putc(' ', outfile); + ++value; + } else { + // escape control codes / binary data. + fprintf(outfile, "\\x%02x", *(unsigned char*)value++); + } + } else { + putc(*value++, outfile); + } + } + fputs(quote_char, outfile); +} + +static void +print_col(FILE *outfile, char* col_name, gchar *col_val, int col_type, int bin_len) { + print_quoted_value(outfile, col_name, -1); + fputs(separator_char, outfile); + if (is_quote_type(col_type)) { + if (!is_binary_type(col_type)) { + bin_len = -1; + } + print_quoted_value(outfile, col_val, bin_len); + } else + fputs(col_val, outfile); +} +int +main(int argc, char **argv) +{ + unsigned int i; + MdbHandle *mdb; + MdbTableDef *table; + MdbColumn *col; + char **bound_values; + int *bound_lens; + FILE *outfile = stdout; + drop_nonascii = 1; + int opt; + char *value; + size_t length; + + while ((opt=getopt(argc, argv, "AD:"))!=-1) { + switch (opt) { + case 'A': + drop_nonascii = 0; + break; + case 'D': + mdb_set_date_fmt(optarg); + break; + default: + break; + } + } + /* + ** optind is now the position of the first non-option arg, + ** see getopt(3) + */ + if (argc-optind < 2) { + fprintf(stderr,"Usage: %s [options] \n",argv[0]); + fprintf(stderr,"where options are:\n"); + fprintf(stderr," -D set the date format (see strftime(3) for details)\n"); + fprintf(stderr," -A drop non ascii characters in non-binary fields\n"); + exit(1); + } + + if (!(mdb = mdb_open(argv[optind], MDB_NOFLAGS))) { + exit(1); + } + + table = mdb_read_table_by_name(mdb, argv[argc-1], MDB_TABLE); + if (!table) { + fprintf(stderr, "Error: Table %s does not exist in this database.\n", argv[argc-1]); + mdb_close(mdb); + exit(1); + } + + /* read table */ + mdb_read_columns(table); + mdb_rewind_table(table); + + bound_values = (char **) g_malloc(table->num_cols * sizeof(char *)); + bound_lens = (int *) g_malloc(table->num_cols * sizeof(int)); + for (i=0;inum_cols;i++) { + /* bind columns */ + bound_values[i] = (char *) g_malloc0(MDB_BIND_SIZE); + mdb_bind_column(table, i+1, bound_values[i], &bound_lens[i]); + } + + while(mdb_fetch_row(table)) { + fputs(row_start, outfile); + int add_delimiter = 0; + for (i=0;inum_cols;i++) { + if (add_delimiter) + fputs(delimiter, outfile); + add_delimiter = 0; + col=g_ptr_array_index(table->columns,i); + if (bound_lens[i]) { + if (col->col_type == MDB_OLE) { + value = mdb_ole_read_full(mdb, col, &length); + } else { + value = bound_values[i]; + length = bound_lens[i]; + } + print_col(outfile, col->name, value, col->col_type, length); + add_delimiter = 1; + if (col->col_type == MDB_OLE) + free(value); + } + } + fputs(row_end, outfile); + } + + /* free the memory used to bind */ + for (i=0;inum_cols;i++) { + g_free(bound_values[i]); + } + g_free(bound_values); + g_free(bound_lens); + mdb_free_tabledef(table); + + mdb_close(mdb); + return 0; +}