/* Unix SMB/CIFS implementation. Main metadata server / Spotlight routines / Elasticsearch backend Copyright (C) Ralph Boehme 2019 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ %{ #include "includes.h" #include "rpc_server/mdssvc/mdssvc.h" #include "rpc_server/mdssvc/mdssvc_es.h" #include "rpc_server/mdssvc/es_parser.tab.h" #include "rpc_server/mdssvc/es_mapping.h" #include "lib/util/smb_strtox.h" #include /* * allow building with -O3 -Wp,-D_FORTIFY_SOURCE=2 * * /tmp/samba-testbase/.../mdssvc/es_parser.y: In function * ‘mdsyylparse’: * es_parser.tab.c:1124:6: error: assuming pointer wraparound * does not occur when comparing P +- C1 with P +- C2 * [-Werror=strict-overflow] * * The generated code in es_parser.tab.c looks like this: * * if (yyss + yystacksize - 1 <= yyssp) */ #pragma GCC diagnostic ignored "-Wstrict-overflow" #define YYMALLOC SMB_MALLOC #define YYREALLOC SMB_REALLOC struct yy_buffer_state; typedef struct yy_buffer_state *YY_BUFFER_STATE; int mdsyyllex(void); void mdsyylerror(char const *); void *mdsyylterminate(void); YY_BUFFER_STATE mdsyyl_scan_string(const char *str); void mdsyyl_delete_buffer(YY_BUFFER_STATE buffer); /* forward declarations */ static char *isodate_to_sldate(const char *s); static char *map_expr(const struct es_attr_map *attr, char op, const char *val1, const char *val2); /* global vars, eg needed by the lexer */ struct es_parser_state { TALLOC_CTX *frame; json_t *kmd_map; json_t *mime_map; bool ignore_unknown_attribute; bool ignore_unknown_type; bool type_error; YY_BUFFER_STATE s; const char *result; } *global_es_parser_state; %} %code provides { #include #include #include "rpc_server/mdssvc/mdssvc.h" /* 2001-01-01T00:00:00Z - Unix Epoch = SP_RAW_TIME_OFFSET */ #define SP_RAW_TIME_OFFSET 978307200 int mdsyylwrap(void); bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx, json_t *mappings, const char *path_scope, const char *query_string, char **_es_query); } %union { bool bval; const char *sval; struct es_attr_map *attr_map; } %name-prefix "mdsyyl" %expect 1 %error-verbose %type match expr line function value isodate %type attribute %token WORD PHRASE %token BOOLEAN %token FUNC_INRANGE %token DATE_ISO %token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE %left OR %left AND %% input: /* empty */ | input line ; line: expr { if ($1 == NULL) { YYABORT; } if (global_es_parser_state->type_error) { YYABORT; } global_es_parser_state->result = $1; } ; expr: OBRACE expr CBRACE { if ($2 == NULL) { $$ = NULL; } else { $$ = talloc_asprintf(talloc_tos(), "(%s)", $2); if ($$ == NULL) YYABORT; } } | expr AND expr { if ($1 == NULL && $3 == NULL) { $$ = NULL; } else if ($1 == NULL) { $$ = $3; } else if ($3 == NULL) { $$ = $1; } else { $$ = talloc_asprintf(talloc_tos(), "(%s) AND (%s)", $1, $3); if ($$ == NULL) YYABORT; } } | expr OR expr { if ($1 == NULL && $3 == NULL) { $$ = NULL; } else if ($1 == NULL) { $$ = $3; } else if ($3 == NULL) { $$ = $1; } else { $$ = talloc_asprintf(talloc_tos(), "%s OR %s", $1, $3); if ($$ == NULL) YYABORT; } } | match { $$ = $1; } | BOOLEAN { /* * We can't properly handle these in expressions, fortunately this * is probably only ever used by OS X as sole element in an * expression ie "False" (when Finder window selected our share * but no search string entered yet). Packet traces showed that OS * X Spotlight server then returns a failure (ie -1) which is what * we do here too by calling YYABORT. */ YYABORT; }; match: attribute EQUAL value { if ($1 == NULL) { $$ = NULL; } else { $$ = map_expr($1, '=', $3, NULL); } } | attribute UNEQUAL value { if ($1 == NULL) { $$ = NULL; } else { $$ = map_expr($1, '!', $3, NULL); } } | attribute LT value { if ($1 == NULL) { $$ = NULL; } else { $$ = map_expr($1, '<', $3, NULL); } } | attribute GT value { if ($1 == NULL) { $$ = NULL; } else { $$ = map_expr($1, '>', $3, NULL); } } | function { $$ = $1; } | match WORD { $$ = $1; }; function: FUNC_INRANGE OBRACE attribute COMMA WORD COMMA WORD CBRACE { if ($3 == NULL) { $$ = NULL; } else { $$ = map_expr($3, '~', $5, $7); } }; attribute: WORD { $$ = es_map_sl_attr(global_es_parser_state->frame, global_es_parser_state->kmd_map, $1); if ($$ == NULL && !global_es_parser_state->ignore_unknown_attribute) { YYABORT; } }; value: PHRASE { $$ = $1; } | isodate { $$ = $1; }; isodate: DATE_ISO OBRACE WORD CBRACE { $$ = isodate_to_sldate($3); if ($$ == NULL) YYABORT; }; %% /* * Spotlight has two date formats: * - seconds since 2001-01-01 00:00:00Z * - as string "$time.iso(%Y-%m-%dT%H:%M:%SZ)" * This function converts the latter to the former as string, so the parser * can work on a uniform format. */ static char *isodate_to_sldate(const char *isodate) { struct es_parser_state *s = global_es_parser_state; struct tm tm; const char *p = NULL; char *tstr = NULL; time_t t; p = strptime(isodate, "%Y-%m-%dT%H:%M:%SZ", &tm); if (p == NULL) { DBG_ERR("strptime [%s] failed\n", isodate); return NULL; } t = timegm(&tm); t -= SP_RAW_TIME_OFFSET; tstr = talloc_asprintf(s->frame, "%jd", (intmax_t)t); if (tstr == NULL) { return NULL; } return tstr; } static char *map_type(const struct es_attr_map *attr, char op, const char *val) { struct es_parser_state *s = global_es_parser_state; const char *mime_type_list = NULL; char *esc_mime_type_list = NULL; const char *not = NULL; const char *end = NULL; char *es = NULL; mime_type_list = es_map_sl_type(s->mime_map, val); if (mime_type_list == NULL) { DBG_DEBUG("Mapping type [%s] failed\n", val); if (!s->ignore_unknown_type) { s->type_error = true; } return NULL; } esc_mime_type_list = es_escape_str(s->frame, mime_type_list, "* "); if (esc_mime_type_list == NULL) { return NULL; } switch (op) { case '=': not = ""; end = ""; break; case '!': not = "(NOT "; end = ")"; break; default: DBG_ERR("Mapping type [%s] unexpected op [%c]\n", val, op); return NULL; } es = talloc_asprintf(s->frame, "%s%s:(%s)%s", not, attr->name, esc_mime_type_list, end); if (es == NULL) { return NULL; } return es; } static char *map_num(const struct es_attr_map *attr, char op, const char *val1, const char *val2) { struct es_parser_state *s = global_es_parser_state; char *es = NULL; switch (op) { case '>': es = talloc_asprintf(s->frame, "%s:{%s TO *}", attr->name, val1); break; case '<': es = talloc_asprintf(s->frame, "%s:{* TO %s}", attr->name, val1); break; case '~': es = talloc_asprintf(s->frame, "%s:[%s TO %s]", attr->name, val1, val2); break; case '=': es = talloc_asprintf(s->frame, "%s:%s", attr->name, val1); break; case '!': es = talloc_asprintf(s->frame, "(NOT %s:%s)", attr->name, val1); break; default: DBG_ERR("Mapping num unexpected op [%c]\n", op); return NULL; } if (es == NULL) { return NULL; } return es; } static char *map_fts(const struct es_attr_map *attr, char op, const char *val) { struct es_parser_state *s = global_es_parser_state; const char *not = NULL; const char *end = NULL; char *esval = NULL; char *es = NULL; esval = es_escape_str(s->frame, val, "*\\\""); if (esval == NULL) { yyerror("es_escape_str failed"); return NULL; } switch (op) { case '=': not = ""; end = ""; break; case '!': not = "(NOT "; end = ")"; break; default: DBG_ERR("Mapping fts [%s] unexpected op [%c]\n", val, op); return NULL; } es = talloc_asprintf(s->frame, "%s%s%s", not, esval, end); if (es == NULL) { return NULL; } return es; } static char *map_str(const struct es_attr_map *attr, char op, const char *val) { struct es_parser_state *s = global_es_parser_state; char *esval = NULL; char *es = NULL; const char *not = NULL; const char *end = NULL; esval = es_escape_str(s->frame, val, "*\\\""); if (esval == NULL) { yyerror("es_escape_str failed"); return NULL; } switch (op) { case '=': not = ""; end = ""; break; case '!': not = "(NOT "; end = ")"; break; default: DBG_ERR("Mapping string [%s] unexpected op [%c]\n", val, op); return NULL; } es = talloc_asprintf(s->frame, "%s%s:%s%s", not, attr->name, esval, end); if (es == NULL) { return NULL; } return es; } /* * Convert Spotlight date seconds since 2001-01-01 00:00:00Z * to a date string in the format %Y-%m-%dT%H:%M:%SZ. */ static char *map_sldate_to_esdate(TALLOC_CTX *mem_ctx, const char *sldate) { struct tm *tm = NULL; char *esdate = NULL; char buf[21]; size_t len; time_t t; int error; t = (time_t)smb_strtoull(sldate, NULL, 10, &error, SMB_STR_STANDARD); if (error != 0) { DBG_ERR("smb_strtoull [%s] failed\n", sldate); return NULL; } t += SP_RAW_TIME_OFFSET; tm = gmtime(&t); if (tm == NULL) { DBG_ERR("localtime [%s] failed\n", sldate); return NULL; } len = strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", tm); if (len != 20) { DBG_ERR("strftime [%s] failed\n", sldate); return NULL; } esdate = es_escape_str(mem_ctx, buf, NULL); if (esdate == NULL) { yyerror("es_escape_str failed"); return NULL; } return esdate; } static char *map_date(const struct es_attr_map *attr, char op, const char *sldate1, const char *sldate2) { struct es_parser_state *s = global_es_parser_state; char *esdate1 = NULL; char *esdate2 = NULL; char *es = NULL; if (op == '~' && sldate2 == NULL) { DBG_ERR("Date range query, but second date is NULL\n"); return NULL; } esdate1 = map_sldate_to_esdate(s->frame, sldate1); if (esdate1 == NULL) { DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate1); return NULL; } if (sldate2 != NULL) { esdate2 = map_sldate_to_esdate(s->frame, sldate2); if (esdate2 == NULL) { DBG_ERR("map_sldate_to_esdate [%s] failed\n", sldate2); return NULL; } } switch (op) { case '>': es = talloc_asprintf(s->frame, "%s:{%s TO *}", attr->name, esdate1); break; case '<': es = talloc_asprintf(s->frame, "%s:{* TO %s}", attr->name, esdate1); break; case '~': es = talloc_asprintf(s->frame, "%s:[%s TO %s]", attr->name, esdate1, esdate2); break; case '=': es = talloc_asprintf(s->frame, "%s:%s", attr->name, esdate1); break; case '!': es = talloc_asprintf(s->frame, "(NOT %s:%s)", attr->name, esdate1); break; } if (es == NULL) { return NULL; } return es; } static char *map_expr(const struct es_attr_map *attr, char op, const char *val1, const char *val2) { char *es = NULL; switch (attr->type) { case ssmt_type: es = map_type(attr, op, val1); break; case ssmt_num: es = map_num(attr, op, val1, val2); break; case ssmt_fts: es = map_fts(attr, op, val1); break; case ssmt_str: es = map_str(attr, op, val1); break; case ssmt_date: es = map_date(attr, op, val1, val2); break; default: break; } if (es == NULL) { DBG_DEBUG("Mapping [%s %c %s (%s)] failed\n", attr->name, op, val1, val2 ? val2 : ""); return NULL; } return es; } void mdsyylerror(const char *str) { DBG_ERR("Parser failed: %s\n", str); } int mdsyylwrap(void) { return 1; } /** * Map a Spotlight RAW query string to a ES query string **/ bool map_spotlight_to_es_query(TALLOC_CTX *mem_ctx, json_t *mappings, const char *path_scope, const char *query_string, char **_es_query) { struct es_parser_state s = { .frame = talloc_stackframe(), }; int result; char *es_query = NULL; s.kmd_map = json_object_get(mappings, "attribute_mappings"); if (s.kmd_map == NULL) { DBG_ERR("Failed to load attribute_mappings from JSON\n"); return false; } s.mime_map = json_object_get(mappings, "mime_mappings"); if (s.mime_map == NULL) { DBG_ERR("Failed to load mime_mappings from JSON\n"); return false; } s.s = mdsyyl_scan_string(query_string); if (s.s == NULL) { DBG_WARNING("Failed to parse [%s]\n", query_string); TALLOC_FREE(s.frame); return false; } s.ignore_unknown_attribute = lp_parm_bool(GLOBAL_SECTION_SNUM, "elasticsearch", "ignore unknown attribute", false); s.ignore_unknown_type = lp_parm_bool(GLOBAL_SECTION_SNUM, "elasticsearch", "ignore unknown type", false); global_es_parser_state = &s; result = mdsyylparse(); global_es_parser_state = NULL; mdsyyl_delete_buffer(s.s); if (result != 0) { TALLOC_FREE(s.frame); return false; } es_query = talloc_asprintf(mem_ctx, "(%s) AND path.real.fulltext:\\\"%s\\\"", s.result, path_scope); TALLOC_FREE(s.frame); if (es_query == NULL) { return false; } *_es_query = es_query; return true; }