diff --git a/doc/mdb-sql.txt b/doc/mdb-sql.txt index 89f33b8..82333ca 100644 --- a/doc/mdb-sql.txt +++ b/doc/mdb-sql.txt @@ -50,7 +50,7 @@ SQL LANGUAGE limit clause: LIMIT - operator: =, =>, =<, <>, like, <, > + operator: =, =>, =<, <>, like, ilike, <, > literal: integers, floating point numbers, or string literal in single quotes @@ -63,6 +63,10 @@ NOTES The -i command can be passed the string 'stdin' to test entering text as if using a pipe. + The 'like' operator performs a case-sensitive pattern match, with ANSI-style wildcards. An underscore in the pattern will match any single character, and a percent sign will match any run of characters. + + The 'ilike' operator is similar, but performs a case-insensitive pattern match. + ENVIRONMENT LC_COLLATE Defines the locale for string-comparison operations. See locale(1). MDB_JET3_CHARSET Defines the charset of the input JET3 (access 97) file. Default is CP1252. See iconv(1). diff --git a/include/mdbfakeglib.h b/include/mdbfakeglib.h index 3d06767..2f5040f 100644 --- a/include/mdbfakeglib.h +++ b/include/mdbfakeglib.h @@ -144,6 +144,7 @@ void g_printerr(const gchar *format, ...); gint g_unichar_to_utf8(gunichar c, gchar *dst); gchar *g_locale_to_utf8(const gchar *opsysstring, size_t len, size_t *bytes_read, size_t *bytes_written, GError **error); +gchar *g_utf8_casefold(const gchar *str, gssize len); gchar *g_utf8_strdown(const gchar *str, gssize len); /* GString */ diff --git a/include/mdbtools.h b/include/mdbtools.h index 9b969ad..aad705d 100644 --- a/include/mdbtools.h +++ b/include/mdbtools.h @@ -129,7 +129,8 @@ enum { MDB_LTEQ, MDB_LIKE, MDB_ISNULL, - MDB_NOTNULL + MDB_NOTNULL, + MDB_ILIKE, }; typedef enum { @@ -164,6 +165,7 @@ enum { x == MDB_GTEQ || \ x == MDB_LTEQ || \ x == MDB_LIKE || \ + x == MDB_ILIKE || \ x == MDB_ISNULL || \ x == MDB_NOTNULL ) @@ -611,6 +613,7 @@ void mdb_dump_stats(MdbHandle *mdb); /* like.c */ int mdb_like_cmp(char *s, char *r); +int mdb_ilike_cmp(char *s, char *r); /* write.c */ void mdb_put_int16(void *buf, guint32 offset, guint32 value); diff --git a/src/libmdb/fakeglib.c b/src/libmdb/fakeglib.c index 5198e9f..e6b67d6 100644 --- a/src/libmdb/fakeglib.c +++ b/src/libmdb/fakeglib.c @@ -244,11 +244,16 @@ gchar *g_locale_to_utf8(const gchar *opsysstring, size_t len, size_t *bytes_read, size_t *bytes_written, GError **error) { if (len == (size_t)-1) len = strlen(opsysstring); - wchar_t *utf16 = malloc(sizeof(wchar_t)*(len+1)); - if (mbstowcs(utf16, opsysstring, len+1) == (size_t)-1) { - free(utf16); - return g_strndup(opsysstring, len); + size_t wlen = mbstowcs(NULL, opsysstring, 0); + if (wlen == (size_t)-1) { + if (error) { + *error = malloc(sizeof(GError)); + (*error)->message = g_strdup_printf("Invalid multibyte string: %s\n", opsysstring); + } + return NULL; } + wchar_t *utf16 = malloc(sizeof(wchar_t)*(wlen+1)); + mbstowcs(utf16, opsysstring, wlen+1); gchar *utf8 = malloc(3*len+1); gchar *dst = utf8; for (size_t i=0; imessage = malloc(100); if (optopt) { - snprintf((*error)->message, 100, "Unrecognized option: -%c", optopt); + (*error)->message = g_strdup_printf("Unrecognized option: -%c", optopt); } else { - snprintf((*error)->message, 100, "Unrecognized option: %s", (*argv)[optind-1]); + (*error)->message = g_strdup_printf("Unrecognized option: %s", (*argv)[optind-1]); } free(short_opts); free(long_opts); diff --git a/src/libmdb/index.c b/src/libmdb/index.c index a6f5c60..480808b 100644 --- a/src/libmdb/index.c +++ b/src/libmdb/index.c @@ -1014,7 +1014,7 @@ int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx) /* * a like with a wild card first is useless as a sarg */ - if (sarg->op == MDB_LIKE && sarg->value.s[0]=='%') + if ((sarg->op == MDB_LIKE || sarg->op == MDB_ILIKE) && sarg->value.s[0]=='%') return 0; /* @@ -1027,6 +1027,7 @@ int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx) case MDB_EQUAL: return 1; break; case MDB_LIKE: + case MDB_ILIKE: return 4; break; case MDB_ISNULL: return 12; break; @@ -1040,6 +1041,7 @@ int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx) else return 1; break; case MDB_LIKE: + case MDB_ILIKE: return 6; break; case MDB_ISNULL: return 12; break; @@ -1053,6 +1055,7 @@ int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx) case MDB_EQUAL: return 2; break; case MDB_LIKE: + case MDB_ILIKE: return 5; break; case MDB_ISNULL: return 12; break; @@ -1066,6 +1069,7 @@ int mdb_index_compute_cost(MdbTableDef *table, MdbIndex *idx) else return 2; break; case MDB_LIKE: + case MDB_ILIKE: return 7; break; case MDB_ISNULL: return 12; break; diff --git a/src/libmdb/like.c b/src/libmdb/like.c index 17e19ed..fe57752 100644 --- a/src/libmdb/like.c +++ b/src/libmdb/like.c @@ -39,11 +39,7 @@ int mdb_like_cmp(char *s, char *r) mdb_debug(MDB_DEBUG_LIKE, "comparing %s and %s", s, r); switch (r[0]) { case '\0': - if (s[0]=='\0') { - return 1; - } else { - return 0; - } + return (s[0]=='\0'); case '_': /* skip one character */ return mdb_like_cmp(&s[1],&r[1]); @@ -71,3 +67,25 @@ int mdb_like_cmp(char *s, char *r) } } } + +/** + * + * @param s: String to search within. + * @param r: Case-insensitive search pattern. + * + * Tests the string @s to see if it matches the search pattern @r without + * regard to case; this mimics the behavior of the Access LIKE operator. In the + * search pattern, a percent sign indicates matching on any number of + * characters, and an underscore indicates matching any single character. + * + * @Returns: 1 if the string matches, 0 if the string does not match. + */ +int mdb_ilike_cmp(char *s, char *r) { + char *s1 = g_utf8_casefold(s, -1); + char *r1 = g_utf8_casefold(r, -1); + int result = mdb_like_cmp(s1, r1); + g_free(s1); + g_free(r1); + return result; +} + diff --git a/src/libmdb/sargs.c b/src/libmdb/sargs.c index 0a3b4bd..a6c0299 100644 --- a/src/libmdb/sargs.c +++ b/src/libmdb/sargs.c @@ -47,6 +47,9 @@ int rc; if (node->op == MDB_LIKE) { return mdb_like_cmp(s,node->value.s); } + if (node->op == MDB_ILIKE) { + return mdb_ilike_cmp(s,node->value.s); + } rc = strcoll(node->value.s, s); switch (node->op) { case MDB_EQUAL: diff --git a/src/sql/lexer.l b/src/sql/lexer.l index e5717b7..d9b1b2a 100644 --- a/src/sql/lexer.l +++ b/src/sql/lexer.l @@ -71,6 +71,7 @@ null { return NUL; } "<" { return LT; } ">" { return GT; } like { return LIKE; } +ilike { return ILIKE; } limit { return LIMIT; } top { return TOP; } percent { return PERCENT; } diff --git a/src/sql/mdbsql.c b/src/sql/mdbsql.c index 4ffeb59..dafaa1b 100644 --- a/src/sql/mdbsql.c +++ b/src/sql/mdbsql.c @@ -324,6 +324,9 @@ mdb_sql_dump_node(MdbSargNode *node, int level) case MDB_LIKE: printf(" like %s\n", node->value.s); break; + case MDB_ILIKE: + printf(" ilike %s\n", node->value.s); + break; case MDB_EQUAL: printf(" = %d\n", node->value.i); break; @@ -398,6 +401,7 @@ mdb_sql_eval_expr(MdbSQL *sql, char *const1, int op, char *const2) case MDB_LT: compar = (value < 0); break; case MDB_LTEQ: compar = (value <= 0); break; case MDB_LIKE: compar = mdb_like_cmp(const1,const2); break; + case MDB_ILIKE: compar = mdb_ilike_cmp(const1,const2); break; default: illop = 1; } } else if (const1[0]!='\'' && const2[0]!='\'') { diff --git a/src/sql/parser.y b/src/sql/parser.y index 3a4982c..0bacc87 100644 --- a/src/sql/parser.y +++ b/src/sql/parser.y @@ -63,7 +63,7 @@ typedef struct sql_context %token IDENT NAME PATH STRING NUMBER OPENING CLOSING %token SELECT FROM WHERE CONNECT DISCONNECT TO LIST TABLES AND OR NOT LIMIT COUNT STRPTIME %token DESCRIBE TABLE TOP PERCENT -%token LTEQ GTEQ LIKE IS NUL +%token LTEQ GTEQ LIKE ILIKE IS NUL %type database %type constant @@ -81,7 +81,7 @@ typedef struct sql_context %left OR %left AND %right NOT -%left EQ LTEQ GTEQ LT GT LIKE IS +%left EQ LTEQ GTEQ LT GT LIKE ILIKE IS %% @@ -193,6 +193,7 @@ operator: | LTEQ { $$ = MDB_LTEQ; } | GTEQ { $$ = MDB_GTEQ; } | LIKE { $$ = MDB_LIKE; } + | ILIKE { $$ = MDB_ILIKE; } ; nulloperator: diff --git a/src/util/mdb-sql.c b/src/util/mdb-sql.c index 45564c7..4257f5d 100644 --- a/src/util/mdb-sql.c +++ b/src/util/mdb-sql.c @@ -413,7 +413,10 @@ main(int argc, char **argv) while (1) { line ++; - if (s) free(s); + if (s) { + free(s); + s = NULL; + } if (in) { s=calloc(bufsz, 1); @@ -434,9 +437,13 @@ main(int argc, char **argv) s[strlen(s)-1]=0; } else { snprintf(prompt, sizeof(prompt), "%d => ", line); - s=readline(prompt); - if (!s) + locale = setlocale(LC_CTYPE, ""); + char *l = readline(prompt); + setlocale(LC_CTYPE, locale); + if (!l) break; + s=g_locale_to_utf8(l, -1, NULL, NULL, NULL); + free(l); } if (!strcmp(s,"exit") || !strcmp(s,"quit") || !strcmp(s,"bye"))