Merge pull request #222 from evanmiller/merge-108

Revise and merge in #108 (new --escape-invisible flag to mdb-export)
This commit is contained in:
Evan Miller 2020-12-29 12:50:04 -05:00 committed by GitHub
commit 0ad6fb33f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 74 additions and 44 deletions

View File

@ -2,7 +2,7 @@ NAME
mdb-export - Export data in an MDB database table to CSV format. mdb-export - Export data in an MDB database table to CSV format.
SYNOPSIS SYNOPSIS
mdb-export [--no-header] [--delimiter delim] [--row-delimiter delim] [[--no-quote] | [--quote char [--escape char]]] [--date-format fmt] [--datetime-format fmt] [--bin strip|raw|octal|hex] [--boolean-words] database table mdb-export [--no-header] [--delimiter delim] [--row-delimiter delim] [[--no-quote] | [--quote char [--escape char]]] [--escape-invisible] [--date-format fmt] [--datetime-format fmt] [--bin strip|raw|octal|hex] [--boolean-words] database table
mdb-export --insert backend [--namespace prefix] [--batch-size int] database table mdb-export --insert backend [--namespace prefix] [--batch-size int] database table
mdb-export -h|--help mdb-export -h|--help
@ -20,6 +20,7 @@ OPTIONS
-Q, --no-quote Don't wrap text-like fields (text, memo, date) in quotes. If not specified text fiels will be surrounded by " (double quote) characters. -Q, --no-quote Don't wrap text-like fields (text, memo, date) in quotes. If not specified text fiels will be surrounded by " (double quote) characters.
-q, --quote char Use char to wrap text-like fields. Default is " (double quote). -q, --quote char Use char to wrap text-like fields. Default is " (double quote).
-X, --escape char Use char to escape quoted characters within a field. Default is doubling. -X, --escape char Use char to escape quoted characters within a field. Default is doubling.
-e, --escape-invisible Use C-style escaping for return (\\r), tab (\\t), line-feed (\\n), and back-slash (\\\\) characters. Default is to leave as they are.
-I, --insert backend INSERT statements (instead of CSV). You must specify which SQL backend dialect to use. Allowed values are: access, sybase, oracle, postgres, mysql and sqlite. -I, --insert backend INSERT statements (instead of CSV). You must specify which SQL backend dialect to use. Allowed values are: access, sybase, oracle, postgres, mysql and sqlite.
-N, --namespace prefix Prefix identifiers with prefix. -N, --namespace prefix Prefix identifiers with prefix.
-S, --batch-size int Size of insert batches on supported platforms. -S, --batch-size int Size of insert batches on supported platforms.

View File

@ -190,10 +190,11 @@ enum {
/* csv export binary options */ /* csv export binary options */
enum { enum {
MDB_BINEXPORT_STRIP, MDB_EXPORT_BINARY_STRIP = (1 << 0),
MDB_BINEXPORT_RAW, MDB_EXPORT_BINARY_RAW = (1 << 1),
MDB_BINEXPORT_OCTAL, MDB_EXPORT_BINARY_OCTAL = (1 << 2),
MDB_BINEXPORT_HEXADECIMAL MDB_EXPORT_BINARY_HEXADECIMAL = (1 << 3),
MDB_EXPORT_ESCAPE_CONTROL_CHARS = (1 << 4)
}; };
#define IS_JET4(mdb) (mdb->f->jet_version==MDB_VER_JET4) /* obsolete */ #define IS_JET4(mdb) (mdb->f->jet_version==MDB_VER_JET4) /* obsolete */
@ -557,7 +558,7 @@ void mdb_register_backend(MdbHandle *mdb, char *backend_name, guint32 capabiliti
gchar* (*quote_schema_name)(const gchar*, const gchar*)); gchar* (*quote_schema_name)(const gchar*, const gchar*));
int mdb_set_default_backend(MdbHandle *mdb, const char *backend_name); int mdb_set_default_backend(MdbHandle *mdb, const char *backend_name);
void mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace, guint32 export_options); void mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace, guint32 export_options);
void mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int bin_mode); void mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int flags);
/* sargs.c */ /* sargs.c */
int mdb_test_sargs(MdbTableDef *table, MdbField *fields, int num_fields); int mdb_test_sargs(MdbTableDef *table, MdbField *fields, int num_fields);

View File

@ -1004,48 +1004,66 @@ mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace
#define is_quote_type(x) (is_binary_type(x) || x==MDB_TEXT || x==MDB_MEMO || x==MDB_DATETIME) #define is_quote_type(x) (is_binary_type(x) || x==MDB_TEXT || x==MDB_MEMO || x==MDB_DATETIME)
//#define DONT_ESCAPE_ESCAPE //#define DONT_ESCAPE_ESCAPE
void void
mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int bin_mode) mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len,
char *quote_char, char *escape_char, int flags)
/* quote_text: Don't quote if 0. /* quote_text: Don't quote if 0.
*/ */
{ {
size_t quote_len = strlen(quote_char); /* multibyte */ size_t quote_len = strlen(quote_char); /* multibyte */
size_t orig_escape_len = escape_char ? strlen(escape_char) : 0; size_t orig_escape_len = escape_char ? strlen(escape_char) : 0;
int quoting = quote_text && is_quote_type(col_type);
int bin_mode = (flags & 0x0F);
int escape_cr_lf = !!(flags & MDB_EXPORT_ESCAPE_CONTROL_CHARS);
/* double the quote char if no escape char passed */ /* double the quote char if no escape char passed */
if (!escape_char) if (!escape_char)
escape_char = quote_char; escape_char = quote_char;
if (quote_text && is_quote_type(col_type)) { if (quoting)
fputs(quote_char, outfile); fputs(quote_char, outfile);
while (1) {
if (is_binary_type(col_type)) {
if (bin_mode == MDB_BINEXPORT_STRIP)
break;
if (!bin_len--)
break;
} else /* use \0 sentry */
if (!*col_val)
break;
int is_binary_hex_col = is_binary_type(col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL; while (1) {
if (is_binary_type(col_type)) {
if (bin_mode == MDB_EXPORT_BINARY_STRIP)
break;
if (!bin_len--)
break;
} else /* use \0 sentry */
if (!*col_val)
break;
if (quote_len && !strncmp(col_val, quote_char, quote_len) && !is_binary_hex_col) { if (is_binary_type(col_type) && bin_mode == MDB_EXPORT_BINARY_OCTAL) {
fprintf(outfile, "%s%s", escape_char, quote_char); fprintf(outfile, "\\%03o", *(unsigned char*)col_val++);
col_val += quote_len; } else if (is_binary_type(col_type) && bin_mode == MDB_EXPORT_BINARY_HEXADECIMAL) {
fprintf(outfile, "%02X", *(unsigned char*)col_val++);
} else if (quoting && quote_len && !strncmp(col_val, quote_char, quote_len)) {
fprintf(outfile, "%s%s", escape_char, quote_char);
col_val += quote_len;
#ifndef DONT_ESCAPE_ESCAPE #ifndef DONT_ESCAPE_ESCAPE
} else if (orig_escape_len && !strncmp(col_val, escape_char, orig_escape_len) && !is_binary_hex_col) { } else if (quoting && orig_escape_len && !strncmp(col_val, escape_char, orig_escape_len)) {
fprintf(outfile, "%s%s", escape_char, escape_char); fprintf(outfile, "%s%s", escape_char, escape_char);
col_val += orig_escape_len; col_val += orig_escape_len;
#endif #endif
} else if (is_binary_type(col_type) && bin_mode == MDB_BINEXPORT_OCTAL) { } else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\r') {
fprintf(outfile, "\\%03o", *(unsigned char*)col_val++); col_val++;
} else if (is_binary_hex_col) { putc('\\', outfile);
fprintf(outfile, "%02X", *(unsigned char*)col_val++); putc('r', outfile);
} else } else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\n') {
putc(*col_val++, outfile); col_val++;
} putc('\\', outfile);
putc('n', outfile);
} else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\t') {
col_val++;
putc('\\', outfile);
putc('t', outfile);
} else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\\') {
col_val++;
putc('\\', outfile);
putc('\\', outfile);
} else
putc(*col_val++, outfile);
}
if (quoting)
fputs(quote_char, outfile); fputs(quote_char, outfile);
} else
fputs(col_val, outfile);
} }

View File

@ -42,13 +42,14 @@ main(int argc, char **argv)
int quote_text = 1; int quote_text = 1;
int boolean_words = 0; int boolean_words = 0;
int batch_size = 1000; int batch_size = 1000;
int escape_cr_lf = 0;
char *insert_dialect = NULL; char *insert_dialect = NULL;
char *shortdate_fmt = NULL; char *shortdate_fmt = NULL;
char *date_fmt = NULL; char *date_fmt = NULL;
char *namespace = NULL; char *namespace = NULL;
char *str_bin_mode = NULL; char *str_bin_mode = NULL;
char *null_text = NULL; char *null_text = NULL;
int bin_mode = MDB_BINEXPORT_RAW; int export_flags = 0;
char *value; char *value;
size_t length; size_t length;
int ret; int ret;
@ -60,6 +61,7 @@ main(int argc, char **argv)
{"no-quote", 'Q', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &quote_text, "Don't wrap text-like fields in quotes.", NULL}, {"no-quote", 'Q', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &quote_text, "Don't wrap text-like fields in quotes.", NULL},
{"quote", 'q', 0, G_OPTION_ARG_STRING, &quote_char, "Use <char> to wrap text-like fields. Default is double quote.", "char"}, {"quote", 'q', 0, G_OPTION_ARG_STRING, &quote_char, "Use <char> to wrap text-like fields. Default is double quote.", "char"},
{"escape", 'X', 0, G_OPTION_ARG_STRING, &escape_char, "Use <char> to escape quoted characters within a field. Default is doubling.", "format"}, {"escape", 'X', 0, G_OPTION_ARG_STRING, &escape_char, "Use <char> to escape quoted characters within a field. Default is doubling.", "format"},
{"escape-invisible", 'e', 0, G_OPTION_ARG_NONE, &escape_cr_lf, "Use C-style escaping for return (\\r), tab (\\t), line-feed (\\n), and back-slash (\\\\) characters. Default is to leave as they are.", NULL},
{"insert", 'I', 0, G_OPTION_ARG_STRING, &insert_dialect, "INSERT statements (instead of CSV)", "backend"}, {"insert", 'I', 0, G_OPTION_ARG_STRING, &insert_dialect, "INSERT statements (instead of CSV)", "backend"},
{"namespace", 'N', 0, G_OPTION_ARG_STRING, &namespace, "Prefix identifiers with namespace", "namespace"}, {"namespace", 'N', 0, G_OPTION_ARG_STRING, &namespace, "Prefix identifiers with namespace", "namespace"},
{"batch-size", 'S', 0, G_OPTION_ARG_INT, &batch_size, "Size of insert batches on supported platforms.", "int"}, {"batch-size", 'S', 0, G_OPTION_ARG_INT, &batch_size, "Size of insert batches on supported platforms.", "int"},
@ -120,17 +122,23 @@ main(int argc, char **argv)
if (str_bin_mode) { if (str_bin_mode) {
if (!strcmp(str_bin_mode, "strip")) if (!strcmp(str_bin_mode, "strip"))
bin_mode = MDB_BINEXPORT_STRIP; export_flags |= MDB_EXPORT_BINARY_STRIP;
else if (!strcmp(str_bin_mode, "raw")) else if (!strcmp(str_bin_mode, "raw"))
bin_mode = MDB_BINEXPORT_RAW; export_flags |= MDB_EXPORT_BINARY_RAW;
else if (!strcmp(str_bin_mode, "octal")) else if (!strcmp(str_bin_mode, "octal"))
bin_mode = MDB_BINEXPORT_OCTAL; export_flags |= MDB_EXPORT_BINARY_OCTAL;
else if (!strcmp(str_bin_mode, "hex")) else if (!strcmp(str_bin_mode, "hex"))
bin_mode = MDB_BINEXPORT_HEXADECIMAL; export_flags |= MDB_EXPORT_BINARY_HEXADECIMAL;
else { else {
fputs("Invalid binary mode\n", stderr); fputs("Invalid binary mode\n", stderr);
exit(1); exit(1);
} }
} else {
export_flags |= MDB_EXPORT_BINARY_RAW;
}
if (escape_cr_lf) {
export_flags |= MDB_EXPORT_ESCAPE_CONTROL_CHARS;
} }
/* Open file */ /* Open file */
@ -229,7 +237,7 @@ main(int argc, char **argv)
value = bound_values[i]; value = bound_values[i];
length = bound_lens[i]; length = bound_lens[i];
} }
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, bin_mode); mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, export_flags);
if (col->col_type == MDB_OLE) if (col->col_type == MDB_OLE)
free(value); free(value);
} }
@ -282,21 +290,23 @@ main(int argc, char **argv)
length = bound_lens[i]; length = bound_lens[i];
} }
/* Correctly handle insertion of binary blobs into SQLite using the string literal notation of X'1234ABCD...' */ /* Correctly handle insertion of binary blobs into SQLite using the string literal notation of X'1234ABCD...' */
if (!strcmp(mdb->backend_name, "sqlite") && is_binary_type(col->col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL) { if (!strcmp(mdb->backend_name, "sqlite") && is_binary_type(col->col_type)
&& (export_flags & MDB_EXPORT_BINARY_HEXADECIMAL)) {
char *quote_char_binary_sqlite = (char *) g_strdup("'"); char *quote_char_binary_sqlite = (char *) g_strdup("'");
fputs("X", outfile); fputs("X", outfile);
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_sqlite, escape_char, bin_mode); mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_sqlite, escape_char, export_flags);
g_free (quote_char_binary_sqlite); g_free (quote_char_binary_sqlite);
/* Correctly handle insertion of binary blobs into PostgreSQL using the notation of decode('1234ABCD...', 'hex') */ /* Correctly handle insertion of binary blobs into PostgreSQL using the notation of decode('1234ABCD...', 'hex') */
} else if (!strcmp(mdb->backend_name, "postgres") && is_binary_type(col->col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL) { } else if (!strcmp(mdb->backend_name, "postgres") && is_binary_type(col->col_type)
&& (export_flags & MDB_EXPORT_BINARY_HEXADECIMAL)) {
char *quote_char_binary_postgres = (char *) g_strdup("'"); char *quote_char_binary_postgres = (char *) g_strdup("'");
fputs("decode(", outfile); fputs("decode(", outfile);
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_postgres, escape_char, bin_mode); mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_postgres, escape_char, export_flags);
fputs(", 'hex')", outfile); fputs(", 'hex')", outfile);
g_free (quote_char_binary_postgres); g_free (quote_char_binary_postgres);
/* No special treatment for other backends or when hexadecimal notation hasn't been selected with the -b hex command line option */ /* No special treatment for other backends or when hexadecimal notation hasn't been selected with the -b hex command line option */
} else { } else {
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, bin_mode); mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, export_flags);
} }
if (col->col_type == MDB_OLE) if (col->col_type == MDB_OLE)
free(value); free(value);