/* * Copyright (c) 2010, Swedish Institute of Computer Science * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the Institute nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /** * \file * A recursive parser for AQL, the Antelope Query Language. * \author * Nicolas Tsiftes */ #include "attribute.h" #include "db-options.h" #include "index.h" #include "aql.h" #include "lvm.h" #include #include #include #include #include #define DEBUG DEBUG_NONE #include "debug.h" #if DEBUG static char error_message[DB_ERROR_BUF_SIZE]; static int error_line; static const char *error_function; #define RETURN(value) \ do { \ if(error_message[0] == '\0') { \ strncpy(error_message, lexer->input, sizeof(error_message) - 1); \ error_line = __LINE__; \ error_function = __func__; \ } \ } while(0); \ return (value) #define RESET_ERROR() \ do { \ error_message[0] = '\0'; \ error_line = 0; \ error_function = NULL; \ } while(0) #else #define RETURN(value) return (value) #define RESET_ERROR() #endif #define PARSER(name) \ static aql_status_t \ parse_##name(lexer_t *lexer) #define PARSER_ARG(name, arg) \ static aql_status_t \ parse_##name(lexer_t *lexer, arg) #define PARSER_TOKEN(name) \ static token_t \ parse_##name(lexer_t *lexer) #define PARSE(name) \ !AQL_ERROR(parse_##name(lexer)) #define PARSE_TOKEN(name) \ parse_##name(lexer) #define NEXT lexer_next(lexer) #define REWIND lexer_rewind(lexer); RESET_ERROR() #define TOKEN *lexer->token #define VALUE *lexer->value #define CONSUME(token) \ do { \ NEXT; \ if(TOKEN != (token)) { \ RETURN(SYNTAX_ERROR); \ } \ } while(0) /* The parsing of AQL results in this aql_adt_t object. */ static aql_adt_t *adt; /* Conditional statements are compiled into VM bytecode, which is stored in an instance of the LogicVM. */ static lvm_instance_t p; static unsigned char vmcode[DB_VM_BYTECODE_SIZE]; /* Parsing functions for AQL. */ PARSER_TOKEN(cmp) { NEXT; switch(TOKEN) { case EQUAL: case NOT_EQUAL: case GT: case LT: case GEQ: case LEQ: return TOKEN; default: return NONE; } } PARSER_TOKEN(op) { NEXT; switch(TOKEN) { case ADD: case SUB: case MUL: case DIV: case RIGHT_PAREN: return TOKEN; default: return NONE; } } PARSER_TOKEN(aggregator) { NEXT; switch(TOKEN) { case COUNT: case SUM: case MEAN: case MEDIAN: case MAX: case MIN: return TOKEN; default: return NONE; } } PARSER(attributes) { token_t token; aql_aggregator_t function; token = PARSE_TOKEN(aggregator); if(token != NONE) { switch(TOKEN) { case COUNT: function = AQL_COUNT; break; case SUM: function = AQL_SUM; break; case MEAN: function = AQL_MEAN; break; case MEDIAN: function = AQL_MEDIAN; break; case MAX: function = AQL_MAX; break; case MIN: function = AQL_MIN; break; default: RETURN(SYNTAX_ERROR); } AQL_SET_FLAG(adt, AQL_FLAG_AGGREGATE); PRINTF("aggregator: %d\n", TOKEN); /* Parse the attribute to aggregate. */ CONSUME(LEFT_PAREN); CONSUME(IDENTIFIER); AQL_ADD_AGGREGATE(adt, function, VALUE); PRINTF("aggregated attribute: %s\n", VALUE); CONSUME(RIGHT_PAREN); goto check_more_attributes; } else { REWIND; } /* Plain identifier. */ CONSUME(IDENTIFIER); AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); check_more_attributes: NEXT; if(TOKEN == COMMA) { if(!PARSE(attributes)) { RETURN(SYNTAX_ERROR); } } else { REWIND; } RETURN(OK); } PARSER(relations) { /* Parse comma-separated identifiers for relations. */ CONSUME(IDENTIFIER); AQL_ADD_RELATION(adt, VALUE); NEXT; if(TOKEN == COMMA) { if(!PARSE(relations)) { RETURN(SYNTAX_ERROR); } } else { REWIND; } RETURN(OK); } PARSER(values) { /* Parse comma-separated attribute values. */ NEXT; switch(TOKEN) { case STRING_VALUE: AQL_ADD_VALUE(adt, DOMAIN_STRING, VALUE); break; case INTEGER_VALUE: AQL_ADD_VALUE(adt, DOMAIN_INT, VALUE); break; default: RETURN(SYNTAX_ERROR); } NEXT; if(TOKEN == COMMA) { return PARSE(values); } else { REWIND; } RETURN(OK); } PARSER(operand) { NEXT; switch(TOKEN) { case IDENTIFIER: lvm_register_variable(VALUE, LVM_LONG); lvm_set_variable(&p, VALUE); AQL_ADD_PROCESSING_ATTRIBUTE(adt, VALUE); break; case STRING_VALUE: break; case FLOAT_VALUE: break; case INTEGER_VALUE: lvm_set_long(&p, *(long *)lexer->value); break; default: RETURN(SYNTAX_ERROR); } RETURN(OK); } PARSER(expr) { token_t token; size_t saved_end; operator_t op; saved_end = lvm_get_end(&p); NEXT; if(TOKEN == LEFT_PAREN) { if(!PARSE(expr)) { RETURN(SYNTAX_ERROR); } CONSUME(RIGHT_PAREN); } else { REWIND; if(!PARSE(operand)) { RETURN(SYNTAX_ERROR); } } while(1) { token = PARSE_TOKEN(op); if(token == NONE) { saved_end = lvm_get_end(&p); REWIND; break; } else if (token == RIGHT_PAREN) { break; } if(!PARSE(operand) && !PARSE(expr)) { RETURN(SYNTAX_ERROR); } saved_end = lvm_shift_for_operator(&p, saved_end); switch(token) { case ADD: op = LVM_ADD; break; case SUB: op = LVM_SUB; break; case MUL: op = LVM_MUL; break; case DIV: op = LVM_DIV; break; default: RETURN(SYNTAX_ERROR); } lvm_set_op(&p, op); lvm_set_end(&p, saved_end); } return OK; } PARSER(comparison) { token_t token; size_t saved_end; operator_t rel; saved_end = lvm_jump_to_operand(&p); if(!PARSE(expr)) { RETURN(SYNTAX_ERROR); } saved_end = lvm_set_end(&p, saved_end); token = PARSE_TOKEN(cmp); if(token == NONE) { RETURN(SYNTAX_ERROR); } switch(token) { case GT: rel = LVM_GE; break; case GEQ: rel = LVM_GEQ; break; case LT: rel = LVM_LE; break; case LEQ: rel = LVM_LEQ; break; case EQUAL: rel = LVM_EQ; break; case NOT_EQUAL: rel = LVM_NEQ; break; default: RETURN(SYNTAX_ERROR); } lvm_set_relation(&p, rel); lvm_set_end(&p, saved_end); if(!PARSE(expr)) { RETURN(SYNTAX_ERROR); } RETURN(OK); } PARSER(where) { int r; operator_t connective; size_t saved_end; if(!PARSE(comparison)) { RETURN(SYNTAX_ERROR); } saved_end = 0; /* The WHERE clause can consist of multiple prepositions. */ for(;;) { NEXT; if(TOKEN != AND && TOKEN != OR) { REWIND; break; } connective = TOKEN == AND ? LVM_AND : LVM_OR; saved_end = lvm_shift_for_operator(&p, saved_end); lvm_set_relation(&p, connective); lvm_set_end(&p, saved_end); NEXT; if(TOKEN == LEFT_PAREN) { r = PARSE(where); if(!r) { RETURN(SYNTAX_ERROR); } CONSUME(RIGHT_PAREN); } else { REWIND; r = PARSE(comparison); if(!r) { RETURN(r); } } } lvm_print_code(&p); return OK; } PARSER(join) { AQL_SET_TYPE(adt, AQL_TYPE_JOIN); CONSUME(IDENTIFIER); PRINTF("Left relation: %s\n", VALUE); AQL_ADD_RELATION(adt, VALUE); CONSUME(COMMA); CONSUME(IDENTIFIER); PRINTF("Right relation: %s\n", VALUE); AQL_ADD_RELATION(adt, VALUE); CONSUME(ON); CONSUME(IDENTIFIER); PRINTF("Join on attribute %s\n", VALUE); AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); CONSUME(PROJECT); /* projection attributes... */ if(!PARSE(attributes)) { RETURN(SYNTAX_ERROR); } CONSUME(END); RETURN(OK); } PARSER(select) { AQL_SET_TYPE(adt, AQL_TYPE_SELECT); /* projection attributes... */ if(!PARSE(attributes)) { RETURN(SYNTAX_ERROR); } CONSUME(FROM); if(!PARSE(relations)) { RETURN(SYNTAX_ERROR); } NEXT; if(TOKEN == WHERE) { lvm_reset(&p, vmcode, sizeof(vmcode)); if(!PARSE(where)) { RETURN(SYNTAX_ERROR); } AQL_SET_CONDITION(adt, &p); } else { REWIND; RETURN(OK); } CONSUME(END); return OK; } PARSER(insert) { AQL_SET_TYPE(adt, AQL_TYPE_INSERT); CONSUME(LEFT_PAREN); if(!PARSE(values)) { RETURN(SYNTAX_ERROR); } CONSUME(RIGHT_PAREN); CONSUME(INTO); if(!PARSE(relations)) { RETURN(SYNTAX_ERROR); } RETURN(OK); } PARSER(remove_attribute) { AQL_SET_TYPE(adt, AQL_TYPE_REMOVE_ATTRIBUTE); CONSUME(IDENTIFIER); AQL_ADD_RELATION(adt, VALUE); CONSUME(DOT); CONSUME(IDENTIFIER); PRINTF("Removing the index for the attribute %s\n", VALUE); AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); RETURN(OK); } #if DB_FEATURE_REMOVE PARSER(remove_from) { AQL_SET_TYPE(adt, AQL_TYPE_REMOVE_TUPLES); /* Use a temporary persistent relation to assign the query result to. */ AQL_SET_FLAG(adt, AQL_FLAG_ASSIGN); AQL_ADD_RELATION(adt, REMOVE_RELATION); CONSUME(IDENTIFIER); AQL_ADD_RELATION(adt, VALUE); CONSUME(WHERE); lvm_reset(&p, vmcode, sizeof(vmcode)); AQL_SET_CONDITION(adt, &p); return PARSE(where); } #endif /* DB_FEATURE_REMOVE */ PARSER(remove_index) { AQL_SET_TYPE(adt, AQL_TYPE_REMOVE_INDEX); CONSUME(IDENTIFIER); AQL_ADD_RELATION(adt, VALUE); CONSUME(DOT); CONSUME(IDENTIFIER); PRINTF("remove index: %s\n", VALUE); AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); RETURN(OK); } PARSER(remove_relation) { AQL_SET_TYPE(adt, AQL_TYPE_REMOVE_RELATION); CONSUME(IDENTIFIER); PRINTF("remove relation: %s\n", VALUE); AQL_ADD_RELATION(adt, VALUE); RETURN(OK); } PARSER(remove) { aql_status_t r; NEXT; switch(TOKEN) { case ATTRIBUTE: r = PARSE(remove_attribute); break; #if DB_FEATURE_REMOVE case FROM: r = PARSE(remove_from); break; #endif case INDEX: r = PARSE(remove_index); break; case RELATION: r = PARSE(remove_relation); break; default: RETURN(SYNTAX_ERROR); } if(!r) { RETURN(SYNTAX_ERROR); } CONSUME(END); RETURN(OK); } PARSER_TOKEN(index_type) { index_type_t type; NEXT; switch(TOKEN) { case INLINE: type = INDEX_INLINE; break; case MAXHEAP: type = INDEX_MAXHEAP; break; case MEMHASH: type = INDEX_MEMHASH; break; default: return NONE; }; AQL_SET_INDEX_TYPE(adt, type); return TOKEN; } PARSER(create_index) { AQL_SET_TYPE(adt, AQL_TYPE_CREATE_INDEX); CONSUME(IDENTIFIER); AQL_ADD_RELATION(adt, VALUE); CONSUME(DOT); CONSUME(IDENTIFIER); PRINTF("Creating an index for the attribute %s\n", VALUE); AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); CONSUME(TYPE); if(PARSE_TOKEN(index_type) == NONE) { RETURN(SYNTAX_ERROR); } RETURN(OK); } PARSER(create_relation) { CONSUME(IDENTIFIER); AQL_SET_TYPE(adt, AQL_TYPE_CREATE_RELATION); AQL_ADD_RELATION(adt, VALUE); RETURN(OK); } PARSER_ARG(domain, char *name) { domain_t domain; unsigned element_size; NEXT; switch(TOKEN) { case STRING: domain = DOMAIN_STRING; /* Parse the amount of characters for this domain. */ CONSUME(LEFT_PAREN); CONSUME(INTEGER_VALUE); element_size = *(long *)lexer->value; CONSUME(RIGHT_PAREN); break; case LONG: domain = DOMAIN_LONG; element_size = 4; break; case INT: domain = DOMAIN_INT; element_size = 2; break; default: return NONE; } AQL_ADD_ATTRIBUTE(adt, name, domain, element_size); return OK; } PARSER(create_attributes) { aql_status_t r; char name[ATTRIBUTE_NAME_LENGTH]; AQL_SET_TYPE(adt, AQL_TYPE_CREATE_ATTRIBUTE); CONSUME(IDENTIFIER); strncpy(name, VALUE, sizeof(name) - 1); name[sizeof(name) - 1] = '\0'; CONSUME(DOMAIN); r = parse_domain(lexer, name); if(AQL_ERROR(r)) { RETURN(r); } CONSUME(IN); CONSUME(IDENTIFIER); AQL_ADD_RELATION(adt, VALUE); RETURN(OK); } PARSER(create) { aql_status_t r; NEXT; switch(TOKEN) { case ATTRIBUTE: r = PARSE(create_attributes); break; case INDEX: r = PARSE(create_index); break; case RELATION: r = PARSE(create_relation); break; default: RETURN(SYNTAX_ERROR); } if(!r) { RETURN(SYNTAX_ERROR); } CONSUME(END); RETURN(OK); } aql_status_t aql_parse(aql_adt_t *external_adt, char *input_string) { lexer_t lex; token_t token = NONE; value_t value; aql_status_t result; RESET_ERROR(); PRINTF("Parsing \"%s\"\n", input_string); adt = external_adt; AQL_CLEAR(adt); AQL_SET_CONDITION(adt, NULL); lexer_start(&lex, input_string, &token, &value); result = lexer_next(&lex); if(!AQL_ERROR(result)) { switch(token) { case IDENTIFIER: PRINTF("Assign the result to relation %s\n", *lex.value); AQL_ADD_RELATION(adt, *lex.value); AQL_SET_FLAG(adt, AQL_FLAG_ASSIGN); if(AQL_ERROR(lexer_next(&lex))) { result = SYNTAX_ERROR; break; } if(*lex.token != ASSIGN) { result = SYNTAX_ERROR; break; } if(AQL_ERROR(lexer_next(&lex))) { result = SYNTAX_ERROR; break; } switch(*lex.token) { case SELECT: result = parse_select(&lex); break; case JOIN: result = parse_join(&lex); break; default: result = SYNTAX_ERROR; } break; case JOIN: result = parse_join(&lex); break; case CREATE: result = parse_create(&lex); break; case REMOVE: result = parse_remove(&lex); break; case INSERT: result = parse_insert(&lex); break; case SELECT: result = parse_select(&lex); break; case NONE: case COMMENT: result = OK; case END: break; default: result = SYNTAX_ERROR; } } if(AQL_ERROR(result)) { PRINTF("Error in function %s, line %d: input \"%s\"\n", error_function, error_line, error_message); } return result; }