From 50342fa80190adbdbedd3415c01c951502603103 Mon Sep 17 00:00:00 2001 From: Nicolas Tsiftes Date: Fri, 2 Dec 2011 18:58:12 +0100 Subject: [PATCH] Publishing Antelope - a DBMS for sensor devices. For details, see A Database in Every Sensor, N. Tsiftes and A. Dunkels, in Proceedings of ACM SenSys 2011. --- apps/antelope/Makefile.antelope | 4 + apps/antelope/antelope.c | 156 ++++ apps/antelope/antelope.h | 53 ++ apps/antelope/aql-adt.c | 149 ++++ apps/antelope/aql-exec.c | 240 ++++++ apps/antelope/aql-lexer.c | 274 +++++++ apps/antelope/aql-parser.c | 877 ++++++++++++++++++++++ apps/antelope/aql.h | 221 ++++++ apps/antelope/attribute.h | 89 +++ apps/antelope/db-options.h | 173 +++++ apps/antelope/db-types.h | 64 ++ apps/antelope/debug.h | 64 ++ apps/antelope/index-inline.c | 231 ++++++ apps/antelope/index-maxheap.c | 747 +++++++++++++++++++ apps/antelope/index-memhash.c | 194 +++++ apps/antelope/index.c | 424 +++++++++++ apps/antelope/index.h | 113 +++ apps/antelope/lvm.c | 976 ++++++++++++++++++++++++ apps/antelope/lvm.h | 144 ++++ apps/antelope/relation.c | 1222 +++++++++++++++++++++++++++++++ apps/antelope/relation.h | 102 +++ apps/antelope/result.c | 185 +++++ apps/antelope/result.h | 80 ++ apps/antelope/storage-cfs.c | 583 +++++++++++++++ apps/antelope/storage.h | 76 ++ 25 files changed, 7441 insertions(+) create mode 100644 apps/antelope/Makefile.antelope create mode 100644 apps/antelope/antelope.c create mode 100644 apps/antelope/antelope.h create mode 100644 apps/antelope/aql-adt.c create mode 100644 apps/antelope/aql-exec.c create mode 100644 apps/antelope/aql-lexer.c create mode 100644 apps/antelope/aql-parser.c create mode 100644 apps/antelope/aql.h create mode 100644 apps/antelope/attribute.h create mode 100644 apps/antelope/db-options.h create mode 100644 apps/antelope/db-types.h create mode 100644 apps/antelope/debug.h create mode 100644 apps/antelope/index-inline.c create mode 100644 apps/antelope/index-maxheap.c create mode 100644 apps/antelope/index-memhash.c create mode 100644 apps/antelope/index.c create mode 100644 apps/antelope/index.h create mode 100644 apps/antelope/lvm.c create mode 100644 apps/antelope/lvm.h create mode 100644 apps/antelope/relation.c create mode 100644 apps/antelope/relation.h create mode 100644 apps/antelope/result.c create mode 100644 apps/antelope/result.h create mode 100644 apps/antelope/storage-cfs.c create mode 100644 apps/antelope/storage.h diff --git a/apps/antelope/Makefile.antelope b/apps/antelope/Makefile.antelope new file mode 100644 index 000000000..a27a1f087 --- /dev/null +++ b/apps/antelope/Makefile.antelope @@ -0,0 +1,4 @@ +antelope_src = antelope.c aql-adt.c aql-exec.c aql-lexer.c aql-parser.c \ + index.c index-inline.c index-maxheap.c lvm.c relation.c \ + result.c storage-cfs.c +antelope_dsc = diff --git a/apps/antelope/antelope.c b/apps/antelope/antelope.c new file mode 100644 index 000000000..2bead1a7a --- /dev/null +++ b/apps/antelope/antelope.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Main functions for Antelope. + * \author + * Nicolas Tsiftes + */ + +#include + +#include "antelope.h" + +static db_output_function_t output = printf; + +void +db_init(void) +{ + relation_init(); + index_init(); +} + +void +db_set_output_function(db_output_function_t f) +{ + output = f; +} + +const char * +db_get_result_message(db_result_t code) +{ + switch(code) { + case DB_FINISHED: + return "Iteration finished"; + case DB_OK: + return "Operation succeeded"; + case DB_LIMIT_ERROR: + return "Limit reached"; + case DB_ALLOCATION_ERROR: + return "Allocation error"; + case DB_STORAGE_ERROR: + return "Storage error"; + case DB_PARSING_ERROR: + return "Parsing error"; + case DB_NAME_ERROR: + return "Invalid name"; + case DB_RELATIONAL_ERROR: + return "Relational algebra error"; + case DB_TYPE_ERROR: + return "Type error"; + case DB_IMPLEMENTATION_ERROR: + return "Implementation error"; + case DB_INDEX_ERROR: + return "Index error"; + case DB_BUSY_ERROR: + return "Busy with processing"; + case DB_INCONSISTENCY_ERROR: + return "Inconsistent handle"; + case DB_ARGUMENT_ERROR: + return "Invalid argument"; + default: + return "Unknown result code"; + }; +} + +db_result_t +db_print_header(db_handle_t *handle) +{ + int column; + attribute_t *attr; + + output("[relation = %s, attributes = (", handle->result_rel->name); + attr = list_head(handle->result_rel->attributes); + for(column = 0; column < handle->ncolumns; column++) { + if(attr == NULL) { + return DB_IMPLEMENTATION_ERROR; + } else if(attr->flags & ATTRIBUTE_FLAG_NO_STORE) { + continue; + } + output("%s%s", column > 0 ? ", " : "", attr->name); + attr = attr->next; + } + output(")]\n"); + return DB_OK; +} + +db_result_t +db_print_tuple(db_handle_t *handle) +{ + int column; + attribute_value_t value; + db_result_t result; + + output("Row %lu:\t", (unsigned long)handle->current_row); + + for(column = 0; column < handle->ncolumns; column++) { + result = db_get_value(&value, handle, column); + if(DB_ERROR(result)) { + output("Unable to get the value for row %lu, column %u: %s\n", + (unsigned long)handle->current_row, column, + db_get_result_message(result)); + break; + } + + switch(value.domain) { + case DOMAIN_STRING: + output("\"%s\"\t", VALUE_STRING(&value)); + break; + case DOMAIN_INT: + output("%d\t", (int)VALUE_INT(&value)); + break; + case DOMAIN_LONG: + output("%ld\t", (long)VALUE_LONG(&value)); + break; + default: + output("\nUnrecognized domain: %d\n", value.domain); + return DB_IMPLEMENTATION_ERROR; + } + } + output("\n"); + + return DB_OK; +} + +int +db_processing(db_handle_t *handle) +{ + return handle->flags & DB_HANDLE_FLAG_PROCESSING; +} diff --git a/apps/antelope/antelope.h b/apps/antelope/antelope.h new file mode 100644 index 000000000..40f63303b --- /dev/null +++ b/apps/antelope/antelope.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Declarations of the main Antelope functions. + * \author + * Nicolas Tsiftes + */ + +#ifndef DB_H +#define DB_H + +#include "db-types.h" +#include "result.h" +#include "aql.h" + +typedef int (*db_output_function_t)(const char *, ...); + +void db_init(void); +void db_set_output_function(db_output_function_t f); +const char *db_get_result_message(db_result_t code); +db_result_t db_print_header(db_handle_t *handle); +db_result_t db_print_tuple(db_handle_t *handle); +int db_processing(db_handle_t *handle); + +#endif /* DB_H */ diff --git a/apps/antelope/aql-adt.c b/apps/antelope/aql-adt.c new file mode 100644 index 000000000..3187f0a1c --- /dev/null +++ b/apps/antelope/aql-adt.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file is part of the Contiki operating system. + * + */ + +/** + * \file + * Utilities for building the internal representation of an AQL command. + * \author + * Nicolas Tsiftes + */ + +#include + +#include "aql.h" + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +static unsigned char char_buf[DB_MAX_CHAR_SIZE_PER_ROW]; +static uint8_t next_free_offset; + +static aql_attribute_t * +get_attribute(aql_adt_t *adt, char *name) +{ + int i; + + for(i = 0; i < AQL_ATTRIBUTE_COUNT(adt); i++) { + if(strcmp(adt->attributes[i].name, name) == 0) { + return &adt->attributes[i]; + } + } + return NULL; +} + +static unsigned char * +save_char(unsigned char *ptr, size_t length) +{ + unsigned char *start_ptr; + + if(length + next_free_offset > DB_MAX_CHAR_SIZE_PER_ROW) { + return NULL; + } + + start_ptr = char_buf + next_free_offset; + memcpy(start_ptr, ptr, length); + next_free_offset += length; + + return start_ptr; +} + +void +aql_clear(aql_adt_t *adt) +{ + char_buf[0] = 0; + next_free_offset = 0; + + adt->optype = AQL_TYPE_NONE; + adt->relation_count = 0; + adt->attribute_count = 0; + adt->value_count = 0; + adt->flags = 0; + memset(adt->aggregators, 0, sizeof(adt->aggregators)); +} + +db_result_t +aql_add_attribute(aql_adt_t *adt, char *name, domain_t domain, + unsigned element_size, int processed_only) +{ + aql_attribute_t *attr; + + if(adt->attribute_count == AQL_ATTRIBUTE_LIMIT) { + return DB_LIMIT_ERROR; + } + + if(processed_only && get_attribute(adt, name)) { + /* No need to have multiple instances of attributes that are only + used for processing in the PLE. */ + return DB_OK; + } + + attr = &adt->attributes[adt->attribute_count++]; + + if(strlen(name) + 1 > sizeof(attr->name)) { + return DB_LIMIT_ERROR; + } + + strcpy(attr->name, name); + attr->domain = domain; + attr->element_size = element_size; + attr->flags = processed_only ? ATTRIBUTE_FLAG_NO_STORE : 0; + + return DB_OK; +} + +db_result_t +aql_add_value(aql_adt_t *adt, domain_t domain, void *value_ptr) +{ + attribute_value_t *value; + + if(adt->value_count == AQL_ATTRIBUTE_LIMIT) { + return DB_LIMIT_ERROR; + } + + value = &adt->values[adt->value_count++]; + value->domain = domain; + + switch(domain) { + case DOMAIN_INT: + VALUE_LONG(value) = *(long *)value_ptr; + break; + case DOMAIN_STRING: + VALUE_STRING(value) = save_char(value_ptr, strlen(value_ptr) + 1); + if(VALUE_STRING(value) != NULL) { + break; + } + default: + return DB_TYPE_ERROR; + } + + return DB_OK; +} diff --git a/apps/antelope/aql-exec.c b/apps/antelope/aql-exec.c new file mode 100644 index 000000000..ad1275197 --- /dev/null +++ b/apps/antelope/aql-exec.c @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file is part of the Contiki operating system. + * + */ + +/** + * \file + * Query execution functions for AQL. + * \author + * Nicolas Tsiftes + */ + +#include +#include +#include + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +#include "index.h" +#include "relation.h" +#include "result.h" +#include "aql.h" + +static aql_adt_t adt; + +static void +clear_handle(db_handle_t *handle) +{ + memset(handle, 0, sizeof(*handle)); + + handle->result_rel = NULL; + handle->left_rel = NULL; + handle->right_rel = NULL; + handle->join_rel = NULL; +} + +static db_result_t +aql_execute(db_handle_t *handle, aql_adt_t *adt) +{ + uint8_t optype; + int first_rel_arg; + db_result_t result; + relation_t *rel; + aql_attribute_t *attr; + attribute_t *relattr; + + optype = AQL_GET_TYPE(adt); + if(optype == AQL_TYPE_NONE) { + /* No-ops always succeed. These can be generated by + empty lines or comments in the query language. */ + return DB_OK; + } + + /* If the ASSIGN flag is set, the first relation in the array is + the desired result relation. */ + first_rel_arg = !!(adt->flags & AQL_FLAG_ASSIGN); + + if(optype != AQL_TYPE_CREATE_RELATION && + optype != AQL_TYPE_REMOVE_RELATION && + optype != AQL_TYPE_JOIN) { + rel = relation_load(adt->relations[first_rel_arg]); + if(rel == NULL) { + return DB_NAME_ERROR; + } + } else { + rel = NULL; + } + + result = DB_RELATIONAL_ERROR; + switch(optype) { + case AQL_TYPE_CREATE_ATTRIBUTE: + attr = &adt->attributes[0]; + if(relation_attribute_add(rel, DB_MEMORY, attr->name, attr->domain, + attr->element_size) != NULL) { + result = DB_OK; + } + break; + case AQL_TYPE_CREATE_INDEX: + relattr = relation_attribute_get(rel, adt->attributes[0].name); + if(relattr == NULL) { + result = DB_NAME_ERROR; + break; + } + result = index_create(AQL_GET_INDEX_TYPE(adt), rel, relattr); + break; + case AQL_TYPE_CREATE_RELATION: + if(relation_create(adt->relations[0], DB_STORAGE) != NULL) { + result = DB_OK; + } + break; + case AQL_TYPE_REMOVE_ATTRIBUTE: + result = relation_attribute_remove(rel, adt->attributes[0].name); + break; + case AQL_TYPE_REMOVE_INDEX: + relattr = relation_attribute_get(rel, adt->attributes[0].name); + if(relattr != NULL) { + if(relattr->index != NULL) { + result = index_destroy(relattr->index); + } else { + result = DB_OK; + } + } else { + result = DB_NAME_ERROR; + } + break; + case AQL_TYPE_REMOVE_RELATION: + result = relation_remove(adt->relations[0], 1); + break; +#if DB_FEATURE_REMOVE + case AQL_TYPE_REMOVE_TUPLES: + /* Overwrite the attribute array with a full copy of the original + relation's attributes. */ + adt->attribute_count = 0; + for(relattr = list_head(rel->attributes); + relattr != NULL; + relattr = relattr->next) { + AQL_ADD_ATTRIBUTE(adt, relattr->name, DOMAIN_UNSPECIFIED, 0); + } + AQL_SET_FLAG(adt, AQL_FLAG_INVERSE_LOGIC); +#endif /* DB_FEATURE_REMOVE */ + case AQL_TYPE_SELECT: + if(handle == NULL) { + result = DB_ARGUMENT_ERROR; + break; + } + result = relation_select(handle, rel, adt); + break; + case AQL_TYPE_INSERT: + result = relation_insert(rel, adt->values); + break; +#if DB_FEATURE_JOIN + case AQL_TYPE_JOIN: + if(handle == NULL) { + result = DB_ARGUMENT_ERROR; + break; + } + handle->left_rel = relation_load(adt->relations[first_rel_arg]); + if(handle->left_rel == NULL) { + break; + } + handle->right_rel = relation_load(adt->relations[first_rel_arg + 1]); + if(handle->right_rel == NULL) { + relation_release(handle->left_rel); + break; + } + result = relation_join(handle, adt); + break; +#endif /* DB_FEATURE_JOIN */ + default: + break; + } + + if(rel != NULL) { + if(handle == NULL || !(handle->flags & DB_HANDLE_FLAG_PROCESSING)) { + relation_release(rel); + } + } + + return result; +} + +db_result_t +db_query(db_handle_t *handle, const char *format, ...) +{ + va_list ap; + char query_string[AQL_MAX_QUERY_LENGTH]; + + va_start(ap, format); + vsnprintf(query_string, sizeof(query_string), format, ap); + va_end(ap); + + if(handle != NULL) { + clear_handle(handle); + } + + if(AQL_ERROR(aql_parse(&adt, query_string))) { + return DB_PARSING_ERROR; + } + + /*aql_optimize(&adt);*/ + + return aql_execute(handle, &adt); +} + +db_result_t +db_process(db_handle_t *handle) +{ + uint8_t optype; + + optype = ((aql_adt_t *)handle->adt)->optype; + + switch(optype) { +#if DB_FEATURE_REMOVE + case AQL_TYPE_REMOVE_TUPLES: + return relation_process_remove(handle); + break; +#endif + case AQL_TYPE_SELECT: + return relation_process_select(handle); + break; +#if DB_FEATURE_JOIN + case AQL_TYPE_JOIN: + return relation_process_join(handle); +#endif /* DB_FEATURE_JOIN */ + default: + break; + } + + PRINTF("DB: Invalid operation type: %d\n", optype); + + return DB_INCONSISTENCY_ERROR; +} diff --git a/apps/antelope/aql-lexer.c b/apps/antelope/aql-lexer.c new file mode 100644 index 000000000..0e98584ef --- /dev/null +++ b/apps/antelope/aql-lexer.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Lexical analyzer for AQL, the Antelope Query Language. + * \author + * Nicolas Tsiftes + */ + +#include "aql.h" + +#include +#include +#include +#include +#include +#include + +struct keyword { + char *string; + token_t token; +}; + +/* The keywords are arranged primarily by length and + secondarily by expected lookup frequency. */ +static struct keyword keywords[] = { + {";", END}, + {"(", LEFT_PAREN}, + {")", RIGHT_PAREN}, + {",", COMMA}, + {"=", EQUAL}, + {">", GT}, + {"<", LT}, + {".", DOT}, + {"+", ADD}, + {"-", SUB}, + {"*", MUL}, + {"/", DIV}, + {"#", COMMENT}, + + {">=", GEQ}, + {"<=", LEQ}, + {"<>", NOT_EQUAL}, + {"<-", ASSIGN}, + {"OR", OR}, + {"IS", IS}, + {"ON", ON}, + {"IN", IN}, + + {"AND", AND}, + {"NOT", NOT}, + {"SUM", SUM}, + {"MAX", MAX}, + {"MIN", MIN}, + {"INT", INT}, + + {"INTO", INTO}, + {"FROM", FROM}, + {"MEAN", MEAN}, + {"JOIN", JOIN}, + {"LONG", LONG}, + {"TYPE", TYPE}, + + {"WHERE", WHERE}, + {"COUNT", COUNT}, + {"INDEX", INDEX}, + + {"INSERT", INSERT}, + {"SELECT", SELECT}, + {"REMOVE", REMOVE}, + {"CREATE", CREATE}, + {"MEDIAN", MEDIAN}, + {"DOMAIN", DOMAIN}, + {"STRING", STRING}, + {"INLINE", INLINE}, + + {"PROJECT", PROJECT}, + {"MAXHEAP", MAXHEAP}, + {"MEMHASH", MEMHASH}, + + {"RELATION", RELATION}, + + {"ATTRIBUTE", ATTRIBUTE} +}; + +/* Provides a pointer to the first keyword of a specific length. */ +static const int8_t skip_hint[] = {0, 13, 21, 27, 33, 36, 44, 47, 48}; + +static char separators[] = "#.;,() \t\n"; + +int +lexer_start(lexer_t *lexer, char *input, token_t *token, value_t *value) +{ + lexer->input = input; + lexer->prev_pos = input; + lexer->token = token; + lexer->value = value; + + return 0; +} + +static token_t +get_token_id(const char *string, const size_t length) +{ + int start, end; + int i; + + if(sizeof(skip_hint) < length || length < 1) { + return NONE; + } + + + start = skip_hint[length - 1]; + if(sizeof(skip_hint) == length) { + end = sizeof(keywords) / sizeof(keywords[0]); + } else { + end = skip_hint[length]; + } + + for(i = start; i < end; i++) { + if(strncasecmp(keywords[i].string, string, length) == 0) { + return keywords[i].token; + } + } + + return NONE; +} + +static int +next_real(lexer_t *lexer, const char *s) +{ + char *end; + long long_value; +#if DB_FEATURE_FLOATS + float float_value; +#endif /* DB_FEATURE_FLOATS */ + + errno = 0; + long_value = strtol(s, &end, 10); + +#if DB_FEATURE_FLOATS + if(*end == '.') { + /* Process a float value. */ + float_value = strtof(s, &end); + if(float_value == 0 && s == end) { + return -1; + } + memcpy(lexer->value, &float_value, sizeof(float_value)); + *lexer->token = FLOAT_VALUE; + lexer->input = end; + + return 1; + } +#endif /* DB_FEATURE_FLOATS */ + + /* Process an integer value. */ + if(long_value == 0 && errno != 0) { + return -1; + } + memcpy(lexer->value, &long_value, sizeof(long_value)); + *lexer->token = INTEGER_VALUE; + lexer->input = end; + + return 1; +} + +static int +next_string(lexer_t *lexer, const char *s) +{ + char *end; + size_t length; + + end = strchr(s, '\''); + if(end == NULL) { + return -1; + } + + length = end - s; + *lexer->token = STRING_VALUE; + lexer->input = end + 1; /* Skip the closing delimiter. */ + + memcpy(lexer->value, s, length); + (*lexer->value)[length] = '\0'; + + return 1; +} + +static int +next_token(lexer_t *lexer, const char *s) +{ + size_t length; + + length = strcspn(s, separators); + if(length == 0) { + /* We encountered a separator, so we try to get a token of + precisely 1 byte. */ + length = 1; + } + + *lexer->token = get_token_id(s, length); + lexer->input = s + length; + if(*lexer->token != NONE) { + return 1; + } + + /* The input did not constitute a valid token, + so we regard it as an identifier. */ + + *lexer->token = IDENTIFIER; + + memcpy(lexer->value, s, length); + (*lexer->value)[length] = '\0'; + + return 1; +} + +int +lexer_next(lexer_t *lexer) +{ + const char *s; + + *lexer->token = NONE; + s = lexer->input; + s += strspn(s, " \t\n"); + lexer->prev_pos = s; + + switch(*s) { + case '\'': + /* Process the string that follows the delimiter. */ + return next_string(lexer, s + 1); + case '\0': + return 0; + default: + if(isdigit((int)*s) || (*s == '-' && isdigit((int)s[1]))) { + return next_real(lexer, s); + } + + /* Process a token. */ + return next_token(lexer, s); + } +} + +void +lexer_rewind(lexer_t *lexer) +{ + lexer->input = lexer->prev_pos; +} diff --git a/apps/antelope/aql-parser.c b/apps/antelope/aql-parser.c new file mode 100644 index 000000000..eecf31f8f --- /dev/null +++ b/apps/antelope/aql-parser.c @@ -0,0 +1,877 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * A recursive parser for AQL, the Antelope Query Language. + * \author + * Nicolas Tsiftes + */ + +#include "attribute.h" +#include "db-options.h" +#include "index.h" +#include "aql.h" +#include "lvm.h" + +#include +#include +#include +#include +#include + +#define DEBUG DEBUG_NONE +#include "debug.h" + +#if DEBUG +static char error_message[DB_ERROR_BUF_SIZE]; +static int error_line; +static const char *error_function; +#define RETURN(value) \ + do { \ + if(error_message[0] == '\0') { \ + strncpy(error_message, lexer->input, sizeof(error_message) - 1); \ + error_line = __LINE__; \ + error_function = __func__; \ + } \ + } while(0); \ + return (value) +#define RESET_ERROR() \ + do { \ + error_message[0] = '\0'; \ + error_line = 0; \ + error_function = NULL; \ + } while(0) +#else +#define RETURN(value) return (value) +#define RESET_ERROR() +#endif +#define PARSER(name) \ + static aql_status_t \ + parse_##name(lexer_t *lexer) +#define PARSER_ARG(name, arg) \ + static aql_status_t \ + parse_##name(lexer_t *lexer, arg) +#define PARSER_TOKEN(name) \ + static token_t \ + parse_##name(lexer_t *lexer) +#define PARSE(name) \ + !AQL_ERROR(parse_##name(lexer)) +#define PARSE_TOKEN(name) \ + parse_##name(lexer) + +#define NEXT lexer_next(lexer) +#define REWIND lexer_rewind(lexer); RESET_ERROR() +#define TOKEN *lexer->token +#define VALUE *lexer->value + +#define CONSUME(token) \ + do { \ + NEXT; \ + if(TOKEN != (token)) { \ + RETURN(SYNTAX_ERROR); \ + } \ + } while(0) + + +/* + * The grammar of this language is defined in Extended Backus-Naur Form, + * where capitalized strings correspond to lexical tokens defined in + * aql.h and interpreted in lexer.c. + * + * operand = LEFT_PAREN, expr, RIGHT_PAREN | INTEGER | FLOAT | + * IDENTIFIER | STRING ; + * operator = ADD | SUB | MUL | DIV ; + * expr = operand, operator, operand ; + * + * comparison-operator = GE | GEQ | LE | LEQ | EQ | NEQ ; + * comparison = expr, comparison-operator, expr ; + * condition = comparison, [(AND | OR), comparison] ; + * relation-list = IDENTIFIER, {COMMA, relation-list} ; + * attribute-list = IDENTIFIER, {COMMA, attribute-list} ; + * select = SELECT, attribute-list, FROM, relation-list, WHERE, condition, END ; + * + * value = INTEGER | FLOAT | STRING ; + * value-list = value, {COMMA, value} ; + * insert = INSERT, LEFT_PAREN, value-list, RIGHT_PAREN, INTO, IDENTIFIER, END ; + * + * sqrl = select | insert ; + */ + +static aql_adt_t *adt; + +static lvm_instance_t p; +static unsigned char vmcode[128]; + +PARSER_TOKEN(cmp) +{ + NEXT; + switch(TOKEN) { + case EQUAL: + case NOT_EQUAL: + case GT: + case LT: + case GEQ: + case LEQ: + return TOKEN; + default: + return NONE; + } +} + +PARSER_TOKEN(op) +{ + NEXT; + switch(TOKEN) { + case ADD: + case SUB: + case MUL: + case DIV: + case RIGHT_PAREN: + return TOKEN; + default: + return NONE; + } +} + +PARSER_TOKEN(aggregator) +{ + NEXT; + switch(TOKEN) { + case COUNT: + case SUM: + case MEAN: + case MEDIAN: + case MAX: + case MIN: + return TOKEN; + default: + return NONE; + } +} + +PARSER(attributes) +{ + token_t token; + aql_aggregator_t function; + + token = PARSE_TOKEN(aggregator); + if(token != NONE) { + switch(TOKEN) { + case COUNT: + function = AQL_COUNT; + break; + case SUM: + function = AQL_SUM; + break; + case MEAN: + function = AQL_MEAN; + break; + case MEDIAN: + function = AQL_MEDIAN; + break; + case MAX: + function = AQL_MAX; + break; + case MIN: + function = AQL_MIN; + break; + default: + RETURN(SYNTAX_ERROR); + } + + AQL_SET_FLAG(adt, AQL_FLAG_AGGREGATE); + + PRINTF("aggregator: %d\n", TOKEN); + + /* Parse the attribute to aggregate. */ + CONSUME(LEFT_PAREN); + CONSUME(IDENTIFIER); + + AQL_ADD_AGGREGATE(adt, function, VALUE); + PRINTF("aggregated attribute: %s\n", VALUE); + + CONSUME(RIGHT_PAREN); + goto check_more_attributes; + } else { + REWIND; + } + + /* Plain identifier. */ + + CONSUME(IDENTIFIER); + + AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); + +check_more_attributes: + NEXT; + if(TOKEN == COMMA) { + if(!PARSE(attributes)) { + RETURN(SYNTAX_ERROR); + } + } else { + REWIND; + } + + RETURN(OK); +} + +PARSER(relations) +{ + /* Parse comma-separated identifiers for relations. */ + CONSUME(IDENTIFIER); + + AQL_ADD_RELATION(adt, VALUE); + NEXT; + if(TOKEN == COMMA) { + if(!PARSE(relations)) { + RETURN(SYNTAX_ERROR); + } + } else { + REWIND; + } + + RETURN(OK); +} + +PARSER(values) +{ + /* Parse comma-separated attribute values. */ + NEXT; + switch(TOKEN) { + case STRING_VALUE: + AQL_ADD_VALUE(adt, DOMAIN_STRING, VALUE); + break; + case INTEGER_VALUE: + AQL_ADD_VALUE(adt, DOMAIN_INT, VALUE); + break; + default: + RETURN(SYNTAX_ERROR); + } + + NEXT; + if(TOKEN == COMMA) { + return PARSE(values); + } else { + REWIND; + } + + RETURN(OK); +} + +PARSER(operand) +{ + NEXT; + switch(TOKEN) { + case IDENTIFIER: + lvm_register_variable(VALUE, LVM_LONG); + lvm_set_variable(&p, VALUE); + AQL_ADD_PROCESSING_ATTRIBUTE(adt, VALUE); + break; + case STRING_VALUE: + break; + case FLOAT_VALUE: + break; + case INTEGER_VALUE: + lvm_set_long(&p, *(long *)lexer->value); + break; + default: + RETURN(SYNTAX_ERROR); + } + + RETURN(OK); +} + +PARSER(expr) +{ + token_t token; + size_t saved_end; + operator_t op; + + saved_end = lvm_get_end(&p); + + NEXT; + if(TOKEN == LEFT_PAREN) { + if(!PARSE(expr)) { + RETURN(SYNTAX_ERROR); + } + CONSUME(RIGHT_PAREN); + } else { + REWIND; + if(!PARSE(operand)) { + RETURN(SYNTAX_ERROR); + } + } + + while(1) { + token = PARSE_TOKEN(op); + if(token == NONE) { + saved_end = lvm_get_end(&p); + REWIND; + break; + } else if (token == RIGHT_PAREN) { + break; + } + + if(!PARSE(operand) && !PARSE(expr)) { + RETURN(SYNTAX_ERROR); + } + + saved_end = lvm_shift_for_operator(&p, saved_end); + + switch(token) { + case ADD: + op = LVM_ADD; + break; + case SUB: + op = LVM_SUB; + break; + case MUL: + op = LVM_MUL; + break; + case DIV: + op = LVM_DIV; + break; + default: + RETURN(SYNTAX_ERROR); + } + lvm_set_op(&p, op); + lvm_set_end(&p, saved_end); + } + + return OK; +} + +PARSER(comparison) +{ + token_t token; + size_t saved_end; + operator_t rel; + + saved_end = lvm_jump_to_operand(&p); + + if(!PARSE(expr)) { + RETURN(SYNTAX_ERROR); + } + + saved_end = lvm_set_end(&p, saved_end); + + token = PARSE_TOKEN(cmp); + if(token == NONE) { + RETURN(SYNTAX_ERROR); + } + + switch(token) { + case GT: + rel = LVM_GE; + break; + case GEQ: + rel = LVM_GEQ; + break; + case LT: + rel = LVM_LE; + break; + case LEQ: + rel = LVM_LEQ; + break; + case EQUAL: + rel = LVM_EQ; + break; + case NOT_EQUAL: + rel = LVM_NEQ; + break; + default: + RETURN(SYNTAX_ERROR); + } + + lvm_set_relation(&p, rel); + lvm_set_end(&p, saved_end); + + if(!PARSE(expr)) { + RETURN(SYNTAX_ERROR); + } + + RETURN(OK); +} + +PARSER(where) +{ + int r; + operator_t connective; + size_t saved_end; + + if(!PARSE(comparison)) { + RETURN(SYNTAX_ERROR); + } + + saved_end = 0; + + /* The WHERE clause can consist of multiple prepositions. */ + for(;;) { + NEXT; + if(TOKEN != AND && TOKEN != OR) { + REWIND; + break; + } + + connective = TOKEN == AND ? LVM_AND : LVM_OR; + + saved_end = lvm_shift_for_operator(&p, saved_end); + lvm_set_relation(&p, connective); + lvm_set_end(&p, saved_end); + + NEXT; + if(TOKEN == LEFT_PAREN) { + r = PARSE(where); + if(!r) { + RETURN(SYNTAX_ERROR); + } + CONSUME(RIGHT_PAREN); + } else { + REWIND; + r = PARSE(comparison); + if(!r) { + RETURN(r); + } + } + } + + lvm_print_code(&p); + + return OK; +} + +PARSER(join) +{ + AQL_SET_TYPE(adt, AQL_TYPE_JOIN); + + CONSUME(IDENTIFIER); + + PRINTF("Left relation: %s\n", VALUE); + AQL_ADD_RELATION(adt, VALUE); + + CONSUME(COMMA); + CONSUME(IDENTIFIER); + + PRINTF("Right relation: %s\n", VALUE); + AQL_ADD_RELATION(adt, VALUE); + + CONSUME(ON); + CONSUME(IDENTIFIER); + + PRINTF("Join on attribute %s\n", VALUE); + AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); + + CONSUME(PROJECT); + + /* projection attributes... */ + if(!PARSE(attributes)) { + RETURN(SYNTAX_ERROR); + } + + CONSUME(END); + + RETURN(OK); +} + +PARSER(select) +{ + AQL_SET_TYPE(adt, AQL_TYPE_SELECT); + + /* projection attributes... */ + if(!PARSE(attributes)) { + RETURN(SYNTAX_ERROR); + } + + CONSUME(FROM); + if(!PARSE(relations)) { + RETURN(SYNTAX_ERROR); + } + + NEXT; + if(TOKEN == WHERE) { + lvm_reset(&p, vmcode, sizeof(vmcode)); + + if(!PARSE(where)) { + RETURN(SYNTAX_ERROR); + } + + AQL_SET_CONDITION(adt, &p); + } else { + REWIND; + RETURN(OK); + } + + CONSUME(END); + + return OK; +} + +PARSER(insert) +{ + AQL_SET_TYPE(adt, AQL_TYPE_INSERT); + + CONSUME(LEFT_PAREN); + + if(!PARSE(values)) { + RETURN(SYNTAX_ERROR); + } + + CONSUME(RIGHT_PAREN); + CONSUME(INTO); + + if(!PARSE(relations)) { + RETURN(SYNTAX_ERROR); + } + + RETURN(OK); +} + +PARSER(remove_attribute) +{ + AQL_SET_TYPE(adt, AQL_TYPE_REMOVE_ATTRIBUTE); + + CONSUME(IDENTIFIER); + AQL_ADD_RELATION(adt, VALUE); + + CONSUME(DOT); + CONSUME(IDENTIFIER); + + PRINTF("Removing the index for the attribute %s\n", VALUE); + AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); + + RETURN(OK); +} + +#if DB_FEATURE_REMOVE +PARSER(remove_from) +{ + AQL_SET_TYPE(adt, AQL_TYPE_REMOVE_TUPLES); + + /* Use a temporary persistent relation to assign the query result to. */ + AQL_SET_FLAG(adt, AQL_FLAG_ASSIGN); + AQL_ADD_RELATION(adt, TEMP_RELATION); + + CONSUME(IDENTIFIER); + AQL_ADD_RELATION(adt, VALUE); + + CONSUME(WHERE); + + lvm_reset(&p, vmcode, sizeof(vmcode)); + AQL_SET_CONDITION(adt, &p); + + return PARSE(where); + +} +#endif /* DB_FEATURE_REMOVE */ + +PARSER(remove_index) +{ + AQL_SET_TYPE(adt, AQL_TYPE_REMOVE_INDEX); + + CONSUME(IDENTIFIER); + AQL_ADD_RELATION(adt, VALUE); + + CONSUME(DOT); + CONSUME(IDENTIFIER); + + PRINTF("remove index: %s\n", VALUE); + AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); + + RETURN(OK); +} + +PARSER(remove_relation) +{ + AQL_SET_TYPE(adt, AQL_TYPE_REMOVE_RELATION); + + CONSUME(IDENTIFIER); + PRINTF("remove relation: %s\n", VALUE); + AQL_ADD_RELATION(adt, VALUE); + + RETURN(OK); +} + +PARSER(remove) +{ + aql_status_t r; + + NEXT; + switch(TOKEN) { + case ATTRIBUTE: + r = PARSE(remove_attribute); + break; +#if DB_FEATURE_REMOVE + case FROM: + r = PARSE(remove_from); + break; +#endif + case INDEX: + r = PARSE(remove_index); + break; + case RELATION: + r = PARSE(remove_relation); + break; + default: + RETURN(SYNTAX_ERROR); + } + + if(!r) { + RETURN(SYNTAX_ERROR); + } + + CONSUME(END); + + RETURN(OK); +} + +PARSER_TOKEN(index_type) +{ + index_type_t type; + + NEXT; + switch(TOKEN) { + case INLINE: + type = INDEX_INLINE; + break; + case MAXHEAP: + type = INDEX_MAXHEAP; + break; + case MEMHASH: + type = INDEX_MEMHASH; + break; + default: + return NONE; + }; + + AQL_SET_INDEX_TYPE(adt, type); + return TOKEN; +} + +PARSER(create_index) +{ + AQL_SET_TYPE(adt, AQL_TYPE_CREATE_INDEX); + + CONSUME(IDENTIFIER); + AQL_ADD_RELATION(adt, VALUE); + + CONSUME(DOT); + CONSUME(IDENTIFIER); + + PRINTF("Creating an index for the attribute %s\n", VALUE); + AQL_ADD_ATTRIBUTE(adt, VALUE, DOMAIN_UNSPECIFIED, 0); + + CONSUME(TYPE); + + if(PARSE_TOKEN(index_type) == NONE) { + RETURN(SYNTAX_ERROR); + } + + RETURN(OK); +} + +PARSER(create_relation) +{ + CONSUME(IDENTIFIER); + + AQL_SET_TYPE(adt, AQL_TYPE_CREATE_RELATION); + AQL_ADD_RELATION(adt, VALUE); + + RETURN(OK); +} + +PARSER_ARG(domain, char *name) +{ + domain_t domain; + unsigned element_size; + + NEXT; + switch(TOKEN) { + case STRING: + domain = DOMAIN_STRING; + + /* Parse the amount of characters for this domain. */ + CONSUME(LEFT_PAREN); + CONSUME(INTEGER_VALUE); + element_size = *(long *)lexer->value; + CONSUME(RIGHT_PAREN); + + break; + case LONG: + domain = DOMAIN_LONG; + element_size = 4; + break; + case INT: + domain = DOMAIN_INT; + element_size = 2; + break; + default: + return NONE; + } + + AQL_ADD_ATTRIBUTE(adt, name, domain, element_size); + + return OK; +} + +PARSER(create_attributes) +{ + aql_status_t r; + char name[ATTRIBUTE_NAME_LENGTH]; + + AQL_SET_TYPE(adt, AQL_TYPE_CREATE_ATTRIBUTE); + + CONSUME(IDENTIFIER); + + strncpy(name, VALUE, sizeof(name) - 1); + name[sizeof(name) - 1] = '\0'; + + CONSUME(DOMAIN); + + r = parse_domain(lexer, name); + if(AQL_ERROR(r)) { + RETURN(r); + } + + CONSUME(IN); + CONSUME(IDENTIFIER); + + AQL_ADD_RELATION(adt, VALUE); + + RETURN(OK); +} + +PARSER(create) +{ + aql_status_t r; + + NEXT; + switch(TOKEN) { + case ATTRIBUTE: + r = PARSE(create_attributes); + break; + case INDEX: + r = PARSE(create_index); + break; + case RELATION: + r = PARSE(create_relation); + break; + default: + RETURN(SYNTAX_ERROR); + } + + if(!r) { + RETURN(SYNTAX_ERROR); + } + + CONSUME(END); + + RETURN(OK); +} + +aql_status_t +aql_parse(aql_adt_t *external_adt, char *input_string) +{ + lexer_t lex; + token_t token = NONE; + value_t value; + aql_status_t result; + + RESET_ERROR(); + + PRINTF("Parsing \"%s\"\n", input_string); + + adt = external_adt; + AQL_CLEAR(adt); + AQL_SET_CONDITION(adt, NULL); + + lexer_start(&lex, input_string, &token, &value); + + result = lexer_next(&lex); + if(!AQL_ERROR(result)) { + switch(token) { + case IDENTIFIER: + PRINTF("Assign the result to relation %s\n", *lex.value); + AQL_ADD_RELATION(adt, *lex.value); + AQL_SET_FLAG(adt, AQL_FLAG_ASSIGN); + if(AQL_ERROR(lexer_next(&lex))) { + result = SYNTAX_ERROR; + break; + } + if(*lex.token != ASSIGN) { + result = SYNTAX_ERROR; + break; + } + if(AQL_ERROR(lexer_next(&lex))) { + result = SYNTAX_ERROR; + break; + } + switch(*lex.token) { + case SELECT: + result = parse_select(&lex); + break; + case JOIN: + result = parse_join(&lex); + break; + default: + result = SYNTAX_ERROR; + } + break; + case JOIN: + result = parse_join(&lex); + break; + case CREATE: + result = parse_create(&lex); + break; + case REMOVE: + result = parse_remove(&lex); + break; + case INSERT: + result = parse_insert(&lex); + break; + case SELECT: + result = parse_select(&lex); + break; + case NONE: + case COMMENT: + result = OK; + case END: + break; + default: + result = SYNTAX_ERROR; + } + } + + if(AQL_ERROR(result)) { + PRINTF("Error in function %s, line %d: input \"%s\"\n", + error_function, error_line, error_message); + } + + return result; +} diff --git a/apps/antelope/aql.h b/apps/antelope/aql.h new file mode 100644 index 000000000..ab29f7e5d --- /dev/null +++ b/apps/antelope/aql.h @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file is part of the Contiki operating system. + * + */ + +/** + * \file + * Definitions and declarations for AQL, the Antelope Query Language. + * \author + * Nicolas Tsiftes + */ + +#ifndef AQL_H +#define AQL_H + +#include "db-options.h" +#include "index.h" +#include "relation.h" +#include "result.h" + +enum aql_status { + OK = 2, + SYNTAX_ERROR = 3, + INVALID_TOKEN = 9, + PLE_ERROR = 12 +}; +typedef enum aql_status aql_status_t; +#define AQL_ERROR(x) ((x) >= 3) + +enum token { + END = 0, + LEFT_PAREN = 1, + RIGHT_PAREN = 2, + COMMA = 3, + EQUAL = 4, + GT = 5, + LT = 6, + DOT = 7, + ADD = 8, + SUB = 9, + MUL = 10, + DIV = 11, + COMMENT = 12, + GEQ = 13, + LEQ = 14, + NOT_EQUAL = 15, + ASSIGN = 16, + OR = 17, + IS = 18, + ON = 19, + IN = 20, + AND = 21, + NOT = 22, + SUM = 23, + MAX = 24, + MIN = 25, + INT = 26, + INTO = 27, + FROM = 28, + MEAN = 29, + JOIN = 30, + LONG = 31, + TYPE = 32, + WHERE = 33, + COUNT = 34, + INDEX = 35, + INSERT = 36, + SELECT = 37, + REMOVE = 38, + CREATE = 39, + MEDIAN = 40, + DOMAIN = 41, + STRING = 42, + INLINE = 43, + PROJECT = 44, + MAXHEAP = 45, + MEMHASH = 46, + RELATION = 47, + ATTRIBUTE = 48, + + INTEGER_VALUE = 251, + FLOAT_VALUE = 252, + STRING_VALUE = 253, + IDENTIFIER = 254, + NONE = 255 +}; + +typedef enum token token_t; + +typedef char value_t[DB_MAX_ELEMENT_SIZE]; + +struct lexer { + const char *input; + const char *prev_pos; + token_t *token; + value_t *value; +}; + +typedef struct lexer lexer_t; + +enum aql_aggregator { + AQL_NONE = 0, + AQL_COUNT = 1, + AQL_SUM = 2, + AQL_MIN = 3, + AQL_MAX = 4, + AQL_MEAN = 5, + AQL_MEDIAN = 6 +}; + +typedef enum aql_aggregator aql_aggregator_t; + +struct aql_attribute { + domain_t domain; + uint8_t element_size; + uint8_t flags; + char name[ATTRIBUTE_NAME_LENGTH + 1]; +}; +typedef struct aql_attribute aql_attribute_t; + +struct aql_adt { + char relations[AQL_RELATION_LIMIT][RELATION_NAME_LENGTH + 1]; + aql_attribute_t attributes[AQL_ATTRIBUTE_LIMIT]; + aql_aggregator_t aggregators[AQL_ATTRIBUTE_LIMIT]; + attribute_value_t values[AQL_ATTRIBUTE_LIMIT]; + index_type_t index_type; + uint8_t relation_count; + uint8_t attribute_count; + uint8_t value_count; + uint8_t optype; + uint8_t flags; + void *lvm_instance; +}; +typedef struct aql_adt aql_adt_t; + +#define AQL_TYPE_NONE 0 +#define AQL_TYPE_SELECT 1 +#define AQL_TYPE_INSERT 2 +#define AQL_TYPE_UPDATE 3 +#define AQL_TYPE_DROP 4 +#define AQL_TYPE_DELETE 5 +#define AQL_TYPE_RENAME 6 +#define AQL_TYPE_CREATE_ATTRIBUTE 7 +#define AQL_TYPE_CREATE_INDEX 8 +#define AQL_TYPE_CREATE_RELATION 9 +#define AQL_TYPE_REMOVE_ATTRIBUTE 10 +#define AQL_TYPE_REMOVE_INDEX 11 +#define AQL_TYPE_REMOVE_RELATION 12 +#define AQL_TYPE_REMOVE_TUPLES 13 +#define AQL_TYPE_JOIN 14 + +#define AQL_FLAG_AGGREGATE 1 +#define AQL_FLAG_ASSIGN 2 +#define AQL_FLAG_INVERSE_LOGIC 4 + +#define AQL_CLEAR(adt) aql_clear(adt) +#define AQL_SET_TYPE(adt, type) (((adt))->optype = (type)) +#define AQL_GET_TYPE(adt) ((adt)->optype) +#define AQL_SET_INDEX_TYPE(adt, type) ((adt)->index_type = (type)) +#define AQL_GET_INDEX_TYPE(adt) ((adt)->index_type) + +#define AQL_SET_FLAG(adt, flag) (((adt)->flags) |= (flag)) +#define AQL_GET_FLAGS(adt) ((adt)->flags) +#define AQL_ADD_RELATION(adt, rel) \ + strcpy((adt)->relations[(adt)->relation_count++], (rel)) +#define AQL_RELATION_COUNT(adt) ((adt)->relation_count) +#define AQL_ADD_ATTRIBUTE(adt, attr, dom, size) \ + aql_add_attribute(adt, attr, dom, size, 0) +#define AQL_ADD_PROCESSING_ATTRIBUTE(adt, attr) \ + aql_add_attribute((adt), (attr), DOMAIN_UNSPECIFIED, 0, 1) +#define AQL_ADD_AGGREGATE(adt, function, attr) \ + do { \ + (adt)->aggregators[(adt)->attribute_count] = (function); \ + aql_add_attribute((adt), (attr), DOMAIN_UNSPECIFIED, 0, 0); \ + } while(0) +#define AQL_ATTRIBUTE_COUNT(adt) ((adt)->attribute_count) +#define AQL_SET_CONDITION(adt, cond) ((adt)->lvm_instance = (cond)) +#define AQL_ADD_VALUE(adt, domain, value) \ + aql_add_value((adt), (domain), (value)) + +int lexer_start(lexer_t *, char *, token_t *, value_t *); +int lexer_next(lexer_t *); +void lexer_rewind(lexer_t *); + +void aql_clear(aql_adt_t *adt); +aql_status_t aql_parse(aql_adt_t *adt, char *query_string); +db_result_t aql_add_attribute(aql_adt_t *adt, char *name, + domain_t domain, unsigned element_size, + int processed_only); +db_result_t aql_add_value(aql_adt_t *adt, domain_t domain, void *value); +db_result_t db_query(db_handle_t *handle, const char *format, ...); +db_result_t db_process(db_handle_t *handle); + +#endif /* !AQL_H */ diff --git a/apps/antelope/attribute.h b/apps/antelope/attribute.h new file mode 100644 index 000000000..03278919e --- /dev/null +++ b/apps/antelope/attribute.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Definitions for attributes. + * \author + * Nicolas Tsiftes + */ + +#ifndef ATTRIBUTE_H +#define ATTRIBUTE_H + +#include +#include + +#include "lib/list.h" + +#include "db-options.h" + +typedef enum { + DOMAIN_UNSPECIFIED = 0, + DOMAIN_INT = 1, + DOMAIN_LONG = 2, + DOMAIN_STRING = 3, + DOMAIN_FLOAT = 4 +} domain_t; + +#define ATTRIBUTE_FLAG_NO_STORE 0x1 +#define ATTRIBUTE_FLAG_INVALID 0x2 +#define ATTRIBUTE_FLAG_PRIMARY_KEY 0x4 +#define ATTRIBUTE_FLAG_UNIQUE 0x8 + +struct attribute { + struct attribute *next; + void *index; + long aggregation_value; + uint8_t aggregator; + uint8_t domain; + uint8_t element_size; + uint8_t flags; + char name[ATTRIBUTE_NAME_LENGTH + 1]; +}; + +typedef struct attribute attribute_t; +typedef uint8_t attribute_id_t; + +struct attribute_value { + union { + int int_value; + long long_value; + unsigned char *string_value; + } u; + domain_t domain; +}; + +typedef struct attribute_value attribute_value_t; + +#define VALUE_LONG(value) (value)->u.long_value +#define VALUE_INT(value) (value)->u.int_value +#define VALUE_STRING(value) (value)->u.string_value + +#endif /* ATTRIBUTES_H */ diff --git a/apps/antelope/db-options.h b/apps/antelope/db-options.h new file mode 100644 index 000000000..7f4dce4cc --- /dev/null +++ b/apps/antelope/db-options.h @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Database configuration options. + * \author + * Nicolas Tsiftes + */ + +#ifndef DB_OPTIONS_H +#define DB_OPTIONS_H + +#include "contiki-conf.h" + +/* Features. Include only what is needed in order to save space. */ +#ifndef DB_FEATURE_JOIN +#define DB_FEATURE_JOIN 1 +#endif /* DB_FEATURE_JOIN */ + +#ifndef DB_FEATURE_REMOVE +#define DB_FEATURE_REMOVE 1 +#endif /* DB_FEATURE_REMOVE */ + +#ifndef DB_FEATURE_FLOATS +#define DB_FEATURE_FLOATS 0 +#endif /* DB_FEATURE_FLOATS */ + +#ifndef DB_FEATURE_COFFEE +#define DB_FEATURE_COFFEE 1 +#endif /* DB_FEATURE_COFFEE */ + +#ifndef DB_FEATURE_INTEGRITY +#define DB_FEATURE_INTEGRITY 0 +#endif /* DB_FEATURE_INTEGRITY */ + + +/* Configuration parameters that may be trimmed to save space. */ +#ifndef DB_ERROR_BUF_SIZE +#define DB_ERROR_BUF_SIZE 50 +#endif /* DB_ERROR_BUF_SIZE */ + +#ifndef DB_INDEX_POOL_SIZE +#define DB_INDEX_POOL_SIZE 3 +#endif /* DB_INDEX_POOL_SIZE */ + +#ifndef DB_RELATION_POOL_SIZE +#define DB_RELATION_POOL_SIZE 5 +#endif /* DB_RELATION_POOL_SIZE */ + +#ifndef DB_ATTRIBUTE_POOL_SIZE +#define DB_ATTRIBUTE_POOL_SIZE 16 +#endif /* DB_ATTRIBUTE_POOL_SIZE */ + +#ifndef DB_MAX_ATTRIBUTES_PER_RELATION +#define DB_MAX_ATTRIBUTES_PER_RELATION 6 +#endif /* DB_MAX_ATTRIBUTES_PER_RELATION */ + +#ifndef DB_MAX_ELEMENT_SIZE +#define DB_MAX_ELEMENT_SIZE 16 +#endif /* DB_MAX_ELEMENT_SIZE */ + + +/* Language options. */ +#ifndef AQL_MAX_QUERY_LENGTH +#define AQL_MAX_QUERY_LENGTH 128 +#endif /* AQL_MAX_QUERY_LENGTH */ + +#ifndef AQL_MAX_VALUE_LENGTH +#define AQL_MAX_VALUE_LENGTH DB_MAX_ELEMENT_SIZE +#endif /* AQL_MAX_VALUE_LENGTH */ + +#ifndef AQL_RELATION_LIMIT +#define AQL_RELATION_LIMIT 3 +#endif /* AQL_RELATION_LIMIT */ + +#ifndef AQL_ATTRIBUTE_LIMIT +#define AQL_ATTRIBUTE_LIMIT 5 +#endif /* AQL_ATTRIBUTE_LIMIT */ + + +/* Physical storage options. Changing these may cause compatibility problems. */ +#ifndef DB_COFFEE_RESERVE_SIZE +#define DB_COFFEE_RESERVE_SIZE (128 * 1024UL) +#endif /* DB_COFFEE_RESERVE_SIZE */ + +#ifndef DB_MAX_CHAR_SIZE_PER_ROW +#define DB_MAX_CHAR_SIZE_PER_ROW 64 +#endif /* DB_MAX_CHAR_SIZE_PER_ROW */ + +#ifndef DB_MAX_FILENAME_LENGTH +#define DB_MAX_FILENAME_LENGTH 16 +#endif /* DB_MAX_FILENAME_LENGTH */ + +#ifndef ATTRIBUTE_NAME_LENGTH +#define ATTRIBUTE_NAME_LENGTH 12 +#endif /* ATTRIBUTE_NAME_LENGTH */ + +#ifndef RELATION_NAME_LENGTH +#define RELATION_NAME_LENGTH 10 +#endif /* RELATION_NAME_LENGTH */ + +#ifndef RESULT_RELATION +#define RESULT_RELATION "db-result" +#endif /* RESULT_RELATION */ + +#ifndef TEMP_RELATION +#define TEMP_RELATION "db-temp" +#endif /* TEMP_RELATION */ + +/* Index options. */ +#ifndef DB_INDEX_COST +#define DB_INDEX_COST 64 +#endif /* DB_INDEX_COST */ + +#ifndef DB_MEMHASH_INDEX_LIMIT +#define DB_MEMHASH_INDEX_LIMIT 1 +#endif /* DB_MEMHASH_INDEX_LIMIT */ + +#ifndef DB_MEMHASH_TABLE_SIZE +#define DB_MEMHASH_TABLE_SIZE 61 +#endif /* DB_MEMHASH_TABLE_SIZE */ + +#ifndef DB_HEAP_INDEX_LIMIT +#define DB_HEAP_INDEX_LIMIT 1 +#endif /* DB_HEAP_INDEX_LIMIT */ + +#ifndef DB_HEAP_CACHE_LIMIT +#define DB_HEAP_CACHE_LIMIT 1 +#endif /* DB_HEAP_CACHE_LIMIT */ + + +/* Propositional Logic Engine options. */ +#ifndef PLE_MAX_NAME_LENGTH +#define PLE_MAX_NAME_LENGTH ATTRIBUTE_NAME_LENGTH +#endif /* PLE_MAX_NAME_LENGTH */ + +#ifndef PLE_MAX_VARIABLE_ID +#define PLE_MAX_VARIABLE_ID AQL_ATTRIBUTE_LIMIT - 1 +#endif /* PLE_MAX_VARIABLE_ID */ + +#ifndef PLE_USE_FLOATS +#define PLE_USE_FLOATS DB_FEATURE_FLOATS +#endif /* PLE_USE_FLOATS */ + + +#endif /* !DB_OPTIONS_H */ diff --git a/apps/antelope/db-types.h b/apps/antelope/db-types.h new file mode 100644 index 000000000..bf2ad97ce --- /dev/null +++ b/apps/antelope/db-types.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * . + * \author + * Nicolas Tsiftes + */ + +#ifndef DB_TYPES_H +#define DB_TYPES_H + +enum db_result { + DB_FINISHED = 3, + DB_GOT_ROW = 2, + DB_OK = 1, + DB_LIMIT_ERROR = -1, + DB_ALLOCATION_ERROR = -2, + DB_STORAGE_ERROR = -3, + DB_PARSING_ERROR = -4, + DB_NAME_ERROR = -5, + DB_RELATIONAL_ERROR = -6, + DB_TYPE_ERROR = -7, + DB_IMPLEMENTATION_ERROR = -8, + DB_INDEX_ERROR = -9, + DB_BUSY_ERROR = -10, + DB_INCONSISTENCY_ERROR = -11, + DB_ARGUMENT_ERROR = -12 +}; + +typedef enum db_result db_result_t; +typedef int db_storage_id_t; + +#define DB_ERROR(result_code) ((result_code) < DB_OK) +#define DB_SUCCESS(result_code) !DB_ERROR(result_code) + +#endif /* !DB_TYPES_H */ diff --git a/apps/antelope/debug.h b/apps/antelope/debug.h new file mode 100644 index 000000000..edb39b8ed --- /dev/null +++ b/apps/antelope/debug.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file is part of the Contiki operating system. + * + * $Id: uip-debug.h,v 1.1 2010/04/30 13:20:57 joxe Exp $ + */ +/** + * \file + * A set of debugging macros. + * + * \author Nicolas Tsiftes + * Niclas Finne + * Joakim Eriksson + */ + +#ifndef UIP_DEBUG_H +#define UIP_DEBUG_H + +#define DEBUG_NONE 0 +#define DEBUG_PRINT 1 +#define DEBUG_ANNOTATE 2 +#define DEBUG_FULL DEBUG_ANNOTATE | DEBUG_PRINT + +#if (DEBUG) & DEBUG_ANNOTATE +#include +#define ANNOTATE(...) printf(__VA_ARGS__) +#else +#define ANNOTATE(...) +#endif /* (DEBUG) & DEBUG_ANNOTATE */ + +#if (DEBUG) & DEBUG_PRINT +#include +#define PRINTF(...) printf(__VA_ARGS__) +#else +#define PRINTF(...) +#endif /* (DEBUG) & DEBUG_PRINT */ + +#endif diff --git a/apps/antelope/index-inline.c b/apps/antelope/index-inline.c new file mode 100644 index 000000000..2654ce5b5 --- /dev/null +++ b/apps/antelope/index-inline.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * A binary search index for attributes that are constrained to be + * monotonically increasing, which is a rather common pattern for + * time series or keys. Since this index has no storage overhead, + * it does not wear out the flash memory nor does it occupy scarce + * scarce space. Furthermore, unlike B+-trees, it has a O(1) memory + * footprint in relation to the number of data items. + * \author + * Nicolas Tsiftes + */ + +#include +#include + +#include "index.h" +#include "relation.h" +#include "result.h" +#include "storage.h" + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +struct search_handle { + index_t *index; + tuple_id_t start_row; + tuple_id_t end_row; +}; + +struct search_handle handle; + +static db_result_t null_op(index_t *); +static db_result_t insert(index_t *, attribute_value_t *, tuple_id_t); +static db_result_t delete(index_t *, attribute_value_t *); +static tuple_id_t get_next(index_iterator_t *); + +/* + * The create, destroy, load, release, insert, and delete operations + * of the index API always succeed because the index does not store + * items separately from the row file. The four former operations share + * the same signature, and are thus implemented by the null_op function + * to save space. + */ +index_api_t index_inline = { + INDEX_INLINE, + INDEX_API_EXTERNAL | INDEX_API_COMPLETE | INDEX_API_RANGE_QUERIES, + null_op, + null_op, + null_op, + null_op, + insert, + delete, + get_next +}; + +static attribute_value_t * +get_value(tuple_id_t *index, relation_t *rel, attribute_t *attr) +{ + unsigned char *row; + static attribute_value_t value; + + row = alloca(rel->row_length); + if(row == NULL) { + return NULL; + } + + if(DB_ERROR(storage_get_row(rel, index, row))) { + return NULL; + } + + if(DB_ERROR(relation_get_value(rel, attr, row, &value))) { + PRINTF("DB: Unable to retrieve a value from tuple %ld\n", (long)(*index)); + return NULL; + } + + return &value; +} + +static tuple_id_t +binary_search(index_iterator_t *index_iterator, + attribute_value_t *target_value, + int exact_match) +{ + relation_t *rel; + attribute_t *attr; + attribute_value_t *cmp_value; + tuple_id_t min; + tuple_id_t max; + tuple_id_t center; + + rel = index_iterator->index->rel; + attr = index_iterator->index->attr; + + max = relation_cardinality(rel); + if(max == INVALID_TUPLE) { + return INVALID_TUPLE; + } + max--; + min = 0; + + do { + center = min + ((max - min) / 2); + + cmp_value = get_value(¢er, rel, attr); + if(cmp_value == NULL) { + PRINTF("DB: Failed to get the center value, index = %ld\n", + (long)center); + return INVALID_TUPLE; + } + + if(db_value_to_long(target_value) > db_value_to_long(cmp_value)) { + min = center + 1; + } else { + max = center - 1; + } + } while(min <= max && db_value_to_long(target_value) != db_value_to_long(cmp_value)); + + if(exact_match && + db_value_to_long(target_value) != db_value_to_long(cmp_value)) { + PRINTF("DB: Could not find value %ld in the inline index\n", + db_value_to_long(target_value)); + return INVALID_TUPLE; + } + + return center; +} + +static tuple_id_t +range_search(index_iterator_t *index_iterator, + tuple_id_t *start, tuple_id_t *end) +{ + attribute_value_t *low_target; + attribute_value_t *high_target; + int exact_match; + + low_target = &index_iterator->min_value; + high_target = &index_iterator->max_value; + + PRINTF("DB: Search index for value range (%ld, %ld)\n", + db_value_to_long(low_target), db_value_to_long(high_target)); + + exact_match = db_value_to_long(low_target) == db_value_to_long(high_target); + + /* Optimize later so that the other search uses the result + from the first one. */ + *start = binary_search(index_iterator, low_target, exact_match); + if(*start == INVALID_TUPLE) { + return DB_INDEX_ERROR; + } + + *end = binary_search(index_iterator, high_target, exact_match); + if(*end == INVALID_TUPLE) { + return DB_INDEX_ERROR; + } + return DB_OK; +} + +static db_result_t +null_op(index_t *index) +{ + return DB_OK; +} + +static db_result_t +insert(index_t *index, attribute_value_t *value, tuple_id_t tuple_id) +{ + return DB_OK; +} + +static db_result_t +delete(index_t *index, attribute_value_t *value) +{ + return DB_OK; +} + +static tuple_id_t +get_next(index_iterator_t *iterator) +{ + static tuple_id_t cached_start; + static tuple_id_t cached_end; + + if(iterator->next_item_no == 0) { + /* + * We conduct the actual index search when the caller attempts to + * access the first item in the iteration. The first and last tuple + * id:s of the result get cached for subsequent iterations. + */ + if(DB_ERROR(range_search(iterator, &cached_start, &cached_end))) { + cached_start = 0; + cached_end = 0; + return INVALID_TUPLE; + } + PRINTF("DB: Cached the tuple range (%ld,%ld)\n", + (long)cached_start, (long)cached_end); + ++iterator->next_item_no; + return cached_start; + } else if(cached_start + iterator->next_item_no <= cached_end) { + return cached_start + iterator->next_item_no++; + } + + return INVALID_TUPLE; +} diff --git a/apps/antelope/index-maxheap.c b/apps/antelope/index-maxheap.c new file mode 100644 index 000000000..bc1c8455a --- /dev/null +++ b/apps/antelope/index-maxheap.c @@ -0,0 +1,747 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * An binary maximum heap for data indexing over flash memory. + * + * The idea behind this method is to write entries sequentially + * into small buckets, which are indexed in a binary maximum heap. + * Although sequential writes make the entries unsorted within a + * bucket, the time to load and scan a single bucket is small. The + * sequential write is important for flash memories, which are + * unable to handle multiple rewrites of the same page without doing + * an expensive erase operation between the rewrites. + * + * Each bucket specifies a range (a,b) of values that it accepts. + * Once a bucket fills up, two buckets are created with the ranges + * (a,mean) and (mean+1, b), respectively. The entries from the + * original bucket are then copied into the appropriate new bucket + * before the old bucket gets deleted. + * \author + * Nicolas Tsiftes + */ + +#include +#include +#include +#include + +#include "cfs/cfs.h" +#include "cfs/cfs-coffee.h" +#include "lib/memb.h" +#include "lib/random.h" + +#include "db-options.h" +#include "index.h" +#include "result.h" +#include "storage.h" + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +#define BRANCH_FACTOR 2 +#define BUCKET_SIZE 128 +#define NODE_LIMIT 511 +#define NODE_DEPTH 9 + +#if (1 << NODE_DEPTH) != (NODE_LIMIT + 1) +#error "NODE_DEPTH is set incorrectly." +#endif + +#define EMPTY_NODE(node) ((node)->min == 0 && (node)->max == 0) +#define EMPTY_PAIR(pair) ((pair)->key == 0 && (pair)->value == 0) + +typedef uint16_t maxheap_key_t; +typedef uint16_t maxheap_value_t; + +#define KEY_MIN 0 +#define KEY_MAX 65535 + +struct heap_node { + maxheap_key_t min; + maxheap_key_t max; +}; +typedef struct heap_node heap_node_t; + +struct key_value_pair { + maxheap_key_t key; + maxheap_value_t value; +}; + +struct bucket { + struct key_value_pair pairs[BUCKET_SIZE]; +}; +typedef struct bucket bucket_t; + +struct heap { + db_storage_id_t heap_storage; + db_storage_id_t bucket_storage; + /* Remember where the next free slot for each bucket is located. */ + uint8_t next_free_slot[NODE_LIMIT]; +}; +typedef struct heap heap_t; + +struct bucket_cache { + heap_t *heap; + uint16_t bucket_id; + bucket_t bucket; +}; + +static struct bucket_cache bucket_cache[DB_HEAP_CACHE_LIMIT]; +MEMB(heaps, heap_t, DB_HEAP_INDEX_LIMIT); + +static struct bucket_cache *get_cache(heap_t *, int); +static struct bucket_cache *get_cache_free(void); +static void invalidate_cache(void); +static maxheap_key_t transform_key(maxheap_key_t); +static int heap_read(heap_t *, int, heap_node_t *); +static int heap_write(heap_t *, int, heap_node_t *); +static int heap_insert(heap_t *, maxheap_key_t, maxheap_key_t); +static int heap_find(heap_t *, maxheap_key_t key, int *iterator); +#if HEAP_DEBUG +static void heap_print(heap_t *); +#endif +static int bucket_read(heap_t *, int, bucket_t *); +static struct bucket_cache *bucket_load(heap_t *, int); +static int bucket_append(heap_t *, int, struct key_value_pair *); +static int bucket_split(heap_t *, int); + +static db_result_t create(index_t *); +static db_result_t destroy(index_t *); +static db_result_t load(index_t *); +static db_result_t release(index_t *); +static db_result_t insert(index_t *, attribute_value_t *, tuple_id_t); +static db_result_t delete(index_t *, attribute_value_t *); +static tuple_id_t get_next(index_iterator_t *); + +index_api_t index_maxheap = { + INDEX_MAXHEAP, + INDEX_API_EXTERNAL, + create, + destroy, + load, + release, + insert, + delete, + get_next +}; + +static struct bucket_cache * +get_cache(heap_t *heap, int bucket_id) +{ + int i; + + for(i = 0; i < DB_HEAP_CACHE_LIMIT; i++) { + if(bucket_cache[i].heap == heap && bucket_cache[i].bucket_id == bucket_id) { + return &bucket_cache[i]; + } + } + return NULL; +} + +static struct bucket_cache * +get_cache_free(void) +{ + int i; + + for(i = 0; i < DB_HEAP_CACHE_LIMIT; i++) { + if(bucket_cache[i].heap == NULL) { + return &bucket_cache[i]; + } + } + return NULL; +} + +static void +invalidate_cache(void) +{ + int i; + + for(i = 0; i < DB_HEAP_CACHE_LIMIT; i++) { + if(bucket_cache[i].heap != NULL) { + bucket_cache[i].heap = NULL; + break; + } + } +} + +static maxheap_key_t +transform_key(maxheap_key_t key) +{ + random_init(key); + return random_rand(); +} + +static int +heap_read(heap_t *heap, int bucket_id, heap_node_t *node) +{ + if(DB_ERROR(storage_read(heap->heap_storage, node, + DB_MAX_FILENAME_LENGTH + (unsigned long)bucket_id * sizeof(*node), sizeof(*node)))) { + return 0; + } + + return 1; +} + +static int +heap_write(heap_t *heap, int bucket_id, heap_node_t *node) +{ + if(DB_ERROR(storage_write(heap->heap_storage, node, + DB_MAX_FILENAME_LENGTH + (unsigned long)bucket_id * sizeof(*node), sizeof(*node)))) { + return 0; + } + + return 1; +} + +static int +heap_insert(heap_t *heap, maxheap_key_t min, maxheap_key_t max) +{ + int i; + heap_node_t node; + + PRINTF("DB: Insert node (%ld,%ld) into the heap\n", (long)min, (long)max); + + if(min > max) { + return -1; + } + + for(i = 0; i < NODE_LIMIT;) { + if(heap_read(heap, i, &node) == 0) { + PRINTF("DB: Failed to read heap node %d\n", i); + return -1; + } + + if(EMPTY_NODE(&node)) { + node.min = min; + node.max = max; + if(heap_write(heap, i, &node) == 0) { + PRINTF("DB: Failed to write heap node %d\n", i); + return -1; + } + return i; + } else if(node.min <= min && max <= node.max) { + i = BRANCH_FACTOR * i + 1; + } else { + i++; + } + } + + PRINTF("DB: No more nodes available\n"); + return -1; +} + +static int +heap_find(heap_t *heap, maxheap_key_t key, int *iterator) +{ + maxheap_key_t hashed_key; + int i; + int first_child; + static heap_node_t node; + + hashed_key = transform_key(key); + + for(i = *iterator; i < NODE_LIMIT;) { + if(heap_read(heap, i, &node) == 0) { + break; + } + if(EMPTY_NODE(&node)) { + break; + } else if(node.min <= hashed_key && hashed_key <= node.max) { + first_child = BRANCH_FACTOR * i + 1; + + if(first_child >= NODE_LIMIT) { + break; + } + *iterator = first_child; + return i; + } else { + i++; + } + } + + return -1; +} + +#if HEAP_DEBUG +static void +heap_print(heap_t *heap) +{ + int level_count; + int branch_count; + int branch_amount; + int i, j; + heap_node_t node; + + level_count = 0; + branch_count = 0; + branch_amount = BRANCH_FACTOR; + + for(i = 0;; i++) { + if(heap_read(heap, i, &node) == 0 || EMPTY_NODE(&node)) { + break; + } + + for(j = 0; j < level_count; j++) { + PRINTF("\t"); + } + PRINTF("(%ld,%ld)\n", (long)node.min, (long)node.max); + if(level_count == 0) { + level_count++; + } else if(branch_count + 1 == branch_amount) { + level_count++; + branch_count = 0; + branch_amount = branch_amount * BRANCH_FACTOR; + } else { + branch_count++; + } + } +} +#endif /* HEAP_DEBUG */ + +static int +bucket_read(heap_t *heap, int bucket_id, bucket_t *bucket) +{ + size_t size; + + if(heap->next_free_slot[bucket_id] == 0) { + size = BUCKET_SIZE; + } else { + size = heap->next_free_slot[bucket_id]; + } + + size *= sizeof(struct key_value_pair); + + if(DB_ERROR(storage_read(heap->bucket_storage, bucket, + (unsigned long)bucket_id * sizeof(*bucket), size))) { + return 0; + } + + return 1; +} + +static struct bucket_cache * +bucket_load(heap_t *heap, int bucket_id) +{ + int i; + struct bucket_cache *cache; + + cache = get_cache(heap, bucket_id); + if(cache != NULL) { + return cache; + } + + cache = get_cache_free(); + if(cache == NULL) { + invalidate_cache(); + cache = get_cache_free(); + if(cache == NULL) { + return NULL; + } + } + + if(bucket_read(heap, bucket_id, &cache->bucket) == 0) { + return NULL; + } + + cache->heap = heap; + cache->bucket_id = bucket_id; + + if(heap->next_free_slot[bucket_id] == 0) { + for(i = 0; i < BUCKET_SIZE; i++) { + if(EMPTY_PAIR(&cache->bucket.pairs[i])) { + break; + } + } + + heap->next_free_slot[bucket_id] = i; + } + + PRINTF("DB: Loaded bucket %d, the next free slot is %u\n", bucket_id, + (unsigned)heap->next_free_slot[bucket_id]); + + return cache; +} + +static int +bucket_append(heap_t *heap, int bucket_id, struct key_value_pair *pair) +{ + unsigned long offset; + + if(heap->next_free_slot[bucket_id] >= BUCKET_SIZE) { + PRINTF("DB: Invalid write attempt to the full bucket %d\n", bucket_id); + return 0; + } + + offset = (unsigned long)bucket_id * sizeof(bucket_t); + offset += heap->next_free_slot[bucket_id] * sizeof(struct key_value_pair); + + if(DB_ERROR(storage_write(heap->bucket_storage, pair, offset, sizeof(*pair)))) { + return 0; + } + + heap->next_free_slot[bucket_id]++; + + return 1; +} + +static int +bucket_split(heap_t *heap, int bucket_id) +{ + heap_node_t node; + maxheap_key_t mean; + int small_bucket_index; + int large_bucket_index; + + if(heap_read(heap, bucket_id, &node) == 0) { + return 0; + } + + mean = node.min + ((node.max - node.min) / 2); + + PRINTF("DB: Split bucket %d (%ld, %ld) at mean value %ld\n", bucket_id, + (long)node.min, (long)node.max, (long)mean); + + small_bucket_index = heap_insert(heap, node.min, mean); + if(small_bucket_index < 0) { + return 0; + } + + large_bucket_index = heap_insert(heap, mean + 1, node.max); + if(large_bucket_index < 0) { + /*heap_remove(small_bucket);*/ + return 0; + } + + return 1; +} + +int +insert_item(heap_t *heap, maxheap_key_t key, maxheap_value_t value) +{ + int heap_iterator; + int bucket_id, last_good_bucket_id; + struct key_value_pair pair; + + for(heap_iterator = 0, last_good_bucket_id = -1;;) { + bucket_id = heap_find(heap, key, &heap_iterator); + if(bucket_id < 0) { + break; + } + last_good_bucket_id = bucket_id; + } + bucket_id = last_good_bucket_id; + + if(bucket_id < 0) { + PRINTF("DB: No bucket for key %ld\n", (long)key); + return 0; + } + + pair.key = key; + pair.value = value; + + if(heap->next_free_slot[bucket_id] == BUCKET_SIZE) { + PRINTF("DB: Bucket %d is full\n", bucket_id); + if(bucket_split(heap, bucket_id) == 0) { + return 0; + } + + /* Select one of the newly created buckets. */ + bucket_id = heap_find(heap, key, &heap_iterator); + if(bucket_id < 0) { + return 0; + } + } + + if(bucket_append(heap, bucket_id, &pair) == 0) { + return 0; + } + + PRINTF("DB: Inserted key %ld (hash %ld) into the heap at bucket_id %d\n", + (long)key, (long)transform_key(key), bucket_id); + + return 1; +} + +static db_result_t +create(index_t *index) +{ + char heap_filename[DB_MAX_FILENAME_LENGTH]; + char bucket_filename[DB_MAX_FILENAME_LENGTH]; + char *filename; + db_result_t result; + heap_t *heap; + + heap = NULL; + filename = NULL; + bucket_filename[0] = '\0'; + + /* Generate the heap file, which is the main index file that is + inserted into the metadata of the relation. */ + filename = storage_generate_file("heap", + (unsigned long)NODE_LIMIT * sizeof(heap_node_t)); + if(filename == NULL) { + PRINTF("DB: Failed to generate a heap file\n"); + return DB_INDEX_ERROR; + } + + memcpy(index->descriptor_file, filename, + sizeof(index->descriptor_file)); + + PRINTF("DB: Generated the heap file \"%s\" using %lu bytes of space\n", + index->descriptor_file, (unsigned long)NODE_LIMIT * sizeof(heap_node_t)); + + index->opaque_data = heap = memb_alloc(&heaps); + if(heap == NULL) { + PRINTF("DB: Failed to allocate a heap\n"); + result = DB_ALLOCATION_ERROR; + goto end; + } + heap->heap_storage = -1; + heap->bucket_storage = -1; + + /* Generate the bucket file, which stores the (key, value) pairs. */ + filename = storage_generate_file("bucket", + (unsigned long)NODE_LIMIT * sizeof(bucket_t)); + if(filename == NULL) { + PRINTF("DB: Failed to generate a bucket file\n"); + result = DB_INDEX_ERROR; + goto end; + } + memcpy(bucket_filename, filename, sizeof(bucket_filename)); + + PRINTF("DB: Generated the bucket file \"%s\" using %lu bytes of space\n", + bucket_filename, (unsigned long)NODE_LIMIT * sizeof(bucket_t)); + + /* Initialize the heap. */ + memset(&heap->next_free_slot, 0, sizeof(heap->next_free_slot)); + + heap->heap_storage = storage_open(index->descriptor_file); + heap->bucket_storage = storage_open(bucket_filename); + if(heap->heap_storage < 0 || heap->bucket_storage < 0) { + result = DB_STORAGE_ERROR; + goto end; + } + + if(DB_ERROR(storage_write(heap->heap_storage, &bucket_filename, 0, + sizeof(bucket_filename)))) { + result = DB_STORAGE_ERROR; + goto end; + } + + if(heap_insert(heap, KEY_MIN, KEY_MAX) < 0) { + PRINTF("DB: Heap insertion error\n"); + result = DB_INDEX_ERROR; + goto end; + } + + PRINTF("DB: Created a heap index\n"); + result = DB_OK; + + end: + if(result != DB_OK) { + if(heap != NULL) { + storage_close(heap->bucket_storage); + storage_close(heap->heap_storage); + memb_free(&heaps, heap); + } + if(index->descriptor_file[0] != '\0') { + cfs_remove(heap_filename); + index->descriptor_file[0] = '\0'; + } + if(bucket_filename[0] != '\0') { + cfs_remove(bucket_filename); + } + } + return result; +} + +static db_result_t +destroy(index_t *index) +{ + release(index); + return DB_INDEX_ERROR; +} + +static db_result_t +load(index_t *index) +{ + heap_t *heap; + db_storage_id_t fd; + char bucket_file[DB_MAX_FILENAME_LENGTH]; + + index->opaque_data = heap = memb_alloc(&heaps); + if(heap == NULL) { + PRINTF("DB: Failed to allocate a heap\n"); + return DB_ALLOCATION_ERROR; + } + + fd = storage_open(index->descriptor_file); + if(fd < 0) { + return DB_STORAGE_ERROR; + } + + if(storage_read(fd, bucket_file, 0, sizeof(bucket_file)) != + sizeof(bucket_file)) { + storage_close(fd); + return DB_STORAGE_ERROR; + } + + storage_close(fd); + + heap->heap_storage = storage_open(index->descriptor_file); + heap->bucket_storage = storage_open(bucket_file); + + memset(&heap->next_free_slot, 0, sizeof(heap->next_free_slot)); + + PRINTF("DB: Loaded max-heap index from file %s and bucket file %s\n", + index->descriptor_file, bucket_file); + + return DB_OK; +} + +static db_result_t +release(index_t *index) +{ + heap_t *heap; + + heap = index->opaque_data; + + storage_close(heap->bucket_storage); + storage_close(heap->heap_storage); + memb_free(&heaps, index->opaque_data); + return DB_INDEX_ERROR; +} + +static db_result_t +insert(index_t *index, attribute_value_t *key, tuple_id_t value) +{ + heap_t *heap; + long long_key; + + heap = (heap_t *)index->opaque_data; + + long_key = db_value_to_long(key); + + if(insert_item(heap, (maxheap_key_t)long_key, + (maxheap_value_t)value) == 0) { + PRINTF("DB: Failed to insert key %ld into a max-heap index\n", long_key); + return DB_INDEX_ERROR; + } + return DB_OK; +} + +static db_result_t +delete(index_t *index, attribute_value_t *value) +{ + return DB_INDEX_ERROR; +} + +static tuple_id_t +get_next(index_iterator_t *iterator) +{ + struct iteration_cache { + index_iterator_t *index_iterator; + int heap_iterator; + tuple_id_t found_items; + uint8_t start; + int visited_buckets[NODE_DEPTH]; + int end; + }; + static struct iteration_cache cache; + heap_t *heap; + maxheap_key_t key; + int bucket_id; + int tmp_heap_iterator; + int i; + struct bucket_cache *bcache; + uint8_t next_free_slot; + + heap = (heap_t *)iterator->index->opaque_data; + key = *(maxheap_key_t *)&iterator->min_value; + + if(cache.index_iterator != iterator || iterator->next_item_no == 0) { + /* Initialize the cache for a new search. */ + cache.end = NODE_DEPTH - 1; + cache.found_items = cache.start = 0; + cache.index_iterator = iterator; + + /* Find a path of heap nodes which can contain the key. */ + for(i = tmp_heap_iterator = 0; i < NODE_DEPTH; i++) { + cache.visited_buckets[i] = heap_find(heap, key, &tmp_heap_iterator); + if(cache.visited_buckets[i] < 0) { + cache.end = i - 1; + break; + } + } + cache.heap_iterator = cache.end; + } + + /* + * Search for the key in each heap node, starting from the bottom + * of the heap. There is a much higher chance that the key will be + * there rather than at the top. + */ + for(; cache.heap_iterator >= 0; cache.heap_iterator--) { + bucket_id = cache.visited_buckets[cache.heap_iterator]; + + PRINTF("DB: Find key %lu in bucket %d\n", (unsigned long)key, bucket_id); + + if((bcache = bucket_load(heap, bucket_id)) == NULL) { + PRINTF("DB: Failed to load bucket %d\n", bucket_id); + return INVALID_TUPLE; + } + + /* Compare the key against the bucket_ids in the bucket sequentially because + they are placed in arbitrary order. */ + next_free_slot = heap->next_free_slot[bucket_id]; + for(i = cache.start; i < next_free_slot; i++) { + if(bcache->bucket.pairs[i].key == key) { + if(cache.found_items++ == iterator->next_item_no) { + iterator->next_item_no++; + cache.start = i + 1; + PRINTF("DB: Found key %ld with value %lu\n", (long)key, + (unsigned long)bcache->bucket.pairs[i].value); + return (tuple_id_t)bcache->bucket.pairs[i].value; + } + } + } + } + + if(VALUE_INT(&iterator->min_value) == VALUE_INT(&iterator->max_value)) { + PRINTF("DB: Could not find key %ld in the index\n", (long)key); + return INVALID_TUPLE; + } + + iterator->next_item_no = 0; + VALUE_INT(&iterator->min_value)++; + + return get_next(iterator); +} diff --git a/apps/antelope/index-memhash.c b/apps/antelope/index-memhash.c new file mode 100644 index 000000000..db3c3fbe3 --- /dev/null +++ b/apps/antelope/index-memhash.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * A memory-resident hash map used as a DB index. + * \author + * Nicolas Tsiftes + */ + +#include + +#include "lib/memb.h" + +#include "db-options.h" +#include "index.h" + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +static db_result_t create(index_t *); +static db_result_t destroy(index_t *); +static db_result_t load(index_t *); +static db_result_t release(index_t *); +static db_result_t insert(index_t *, attribute_value_t *, tuple_id_t); +static db_result_t delete(index_t *, attribute_value_t *); +static tuple_id_t get_next(index_iterator_t *); + +index_api_t index_memhash = { + INDEX_MEMHASH, + INDEX_API_INTERNAL, + create, + destroy, + load, + release, + insert, + delete, + get_next +}; + +struct hash_item { + tuple_id_t tuple_id; + attribute_value_t value; +}; +typedef struct hash_item hash_item_t; + +typedef hash_item_t hash_map_t[DB_MEMHASH_TABLE_SIZE]; + +MEMB(hash_map_memb, hash_map_t, DB_MEMHASH_INDEX_LIMIT); + +static unsigned +calculate_hash(attribute_value_t *value) +{ + unsigned char *cp, *end; + unsigned hash_value; + + cp = (unsigned char *)value; + end = cp + sizeof(*value); + hash_value = 0; + + while(cp < end) { + hash_value = hash_value * 33 + *cp++; + } + + return hash_value % DB_MEMHASH_TABLE_SIZE; +} + +static db_result_t +create(index_t *index) +{ + int i; + hash_map_t *hash_map; + + PRINTF("Creating a memory-resident hash map index\n"); + + hash_map = memb_alloc(&hash_map_memb); + if(hash_map == NULL) { + return DB_ALLOCATION_ERROR; + } + + for(i = 0; i < DB_MEMHASH_TABLE_SIZE; i++) { + hash_map[i]->tuple_id = INVALID_TUPLE; + } + + index->opaque_data = hash_map; + + return DB_OK; +} + +static db_result_t +destroy(index_t *index) +{ + memb_free(&hash_map_memb, index->opaque_data); + + return DB_OK; +} + +static db_result_t +load(index_t *index) +{ + return create(index); +} + +static db_result_t +release(index_t *index) +{ + return destroy(index); +} + +static db_result_t +insert(index_t *index, attribute_value_t *value, tuple_id_t tuple_id) +{ + hash_map_t *hash_map; + uint16_t hash_value; + + hash_map = index->opaque_data; + + hash_value = calculate_hash(value); + hash_map[hash_value]->tuple_id = tuple_id; + hash_map[hash_value]->value = *value; + + PRINTF("DB: Inserted value %ld into the hash table\n", VALUE_LONG(value)); + + return DB_OK; +} + +static db_result_t +delete(index_t *index, attribute_value_t *value) +{ + hash_map_t *hash_map; + uint16_t hash_value; + + hash_map = index->opaque_data; + + hash_value = calculate_hash(value); + if(memcmp(&hash_map[hash_value]->value, value, sizeof(*value)) != 0) { + return DB_INDEX_ERROR; + } + + hash_map[hash_value]->tuple_id = INVALID_TUPLE; + return DB_OK; +} + +static tuple_id_t +get_next(index_iterator_t *iterator) +{ + hash_map_t *hash_map; + uint16_t hash_value; + + if(iterator->next_item_no == 1) { + /* The memhash supports only unique values at the moment. */ + return INVALID_TUPLE; + } + + hash_map = iterator->index->opaque_data; + + hash_value = calculate_hash(&iterator->min_value); + if(memcmp(&hash_map[hash_value]->value, &iterator->min_value, sizeof(iterator->min_value)) != 0) { + return INVALID_TUPLE; + } + + iterator->next_item_no++; + + PRINTF("DB: Found value %ld in the hash table\n", + VALUE_LONG(&iterator->min_value)); + + return hash_map[hash_value]->tuple_id; +} diff --git a/apps/antelope/index.c b/apps/antelope/index.c new file mode 100644 index 000000000..be86bbbe7 --- /dev/null +++ b/apps/antelope/index.c @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * This component forwards index calls using the generic index + * API to specific implementations. + * \author + * Nicolas Tsiftes + */ + +#include "contiki.h" +#include "lib/memb.h" +#include "lib/list.h" + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +#include "antelope.h" +#include "attribute.h" +#include "db-options.h" +#include "index.h" +#include "storage.h" + +static index_api_t *index_components[] = {&index_inline, + &index_maxheap}; + +LIST(indices); +MEMB(index_memb, index_t, DB_INDEX_POOL_SIZE); + +static process_event_t load_request_event; +PROCESS(db_indexer, "DB Indexer"); + +static index_api_t * +find_index_api(index_type_t index_type) +{ + int i; + + for(i = 0; i < sizeof(index_components) / sizeof(index_components[0]); i++) { + if(index_components[i]->type == index_type) { + return index_components[i]; + } + } + + return NULL; +} + +void +index_init(void) +{ + list_init(indices); + memb_init(&index_memb); + process_start(&db_indexer, NULL); +} + +db_result_t +index_create(index_type_t index_type, relation_t *rel, attribute_t *attr) +{ + tuple_id_t cardinality; + index_t *index; + index_api_t *api; + + cardinality = relation_cardinality(rel); + if(cardinality == INVALID_TUPLE) { + return DB_STORAGE_ERROR; + } + + api = find_index_api(index_type); + if(api == NULL) { + PRINTF("DB: No API for index type %d\n", (int)index_type); + return DB_INDEX_ERROR; + } + + if(attr->index != NULL) { + /* Refuse to overwrite the old index. */ + PRINTF("DB: The attribute %s is already indexed\n", attr->name); + return DB_INDEX_ERROR; + } + + index = memb_alloc(&index_memb); + if(index == NULL) { + PRINTF("DB: Failed to allocate an index\n"); + return DB_ALLOCATION_ERROR; + } + + index->rel = rel; + index->attr = attr; + index->api = api; + index->flags = 0; + index->opaque_data = NULL; + index->descriptor_file[0] = '\0'; + index->type = index_type; + + if(DB_ERROR(api->create(index))) { + memb_free(&index_memb, index); + PRINTF("DB: Index-specific creation failed for attribute %s\n", attr->name); + return DB_INDEX_ERROR; + } + + attr->index = index; + list_push(indices, index); + + if(index->descriptor_file[0] != '\0' && + DB_ERROR(storage_put_index(index))) { + api->destroy(index); + memb_free(&index_memb, index); + PRINTF("DB: Failed to store index data in file \"%s\"\n", + index->descriptor_file); + return DB_INDEX_ERROR; + } + + if(!(api->flags & INDEX_API_INLINE) && cardinality > 0) { + PRINTF("DB: Created an index for an old relation; issuing a load request\n"); + index->flags = INDEX_LOAD_NEEDED; + process_post(&db_indexer, load_request_event, NULL); + } else { + PRINTF("DB: Index created for attribute %s\n", attr->name); + index->flags |= INDEX_READY; + } + + return DB_OK; +} + +db_result_t +index_destroy(index_t *index) +{ + if(DB_ERROR(index_release(index)) || + DB_ERROR(index->api->destroy(index))) { + return DB_INDEX_ERROR; + } + + return DB_OK; +} + +db_result_t +index_load(relation_t *rel, attribute_t *attr) +{ + index_t *index; + index_api_t *api; + + PRINTF("DB: Attempting to load an index over %s.%s\n", rel->name, attr->name); + + index = memb_alloc(&index_memb); + if(index == NULL) { + PRINTF("DB: No more index objects available\n"); + return DB_ALLOCATION_ERROR; + } + + if(DB_ERROR(storage_get_index(index, rel, attr))) { + PRINTF("DB: Failed load an index descriptor from storage\n"); + memb_free(&index_memb, index); + return DB_INDEX_ERROR; + } + + index->rel = rel; + index->attr = attr; + index->opaque_data = NULL; + + api = find_index_api(index->type); + if(api == NULL) { + PRINTF("DB: No API for index type %d\n", index->type); + return DB_INDEX_ERROR; + } + + index->api = api; + + if(DB_ERROR(api->load(index))) { + PRINTF("DB: Index-specific load failed\n"); + return DB_INDEX_ERROR; + } + + list_push(indices, index); + attr->index = index; + index->flags = INDEX_READY; + + return DB_OK; +} + +db_result_t +index_release(index_t *index) +{ + if(DB_ERROR(index->api->release(index))) { + return DB_INDEX_ERROR; + } + + index->attr->index = NULL; + list_remove(indices, index); + memb_free(&index_memb, index); + + return DB_OK; +} + +db_result_t +index_insert(index_t *index, attribute_value_t *value, + tuple_id_t tuple_id) +{ + return index->api->insert(index, value, tuple_id); +} + +db_result_t +index_delete(index_t *index, attribute_value_t *value) +{ + if(index->flags != INDEX_READY) { + return DB_INDEX_ERROR; + } + + return index->api->delete(index, value); +} + +db_result_t +index_get_iterator(index_iterator_t *iterator, index_t *index, + attribute_value_t *min_value, + attribute_value_t *max_value) +{ + tuple_id_t cardinality; + unsigned long range; + unsigned long max_range; + long max; + long min; + + cardinality = relation_cardinality(index->rel); + if(cardinality == INVALID_TUPLE) { + return DB_STORAGE_ERROR; + } + + if(index->flags != INDEX_READY) { + return DB_INDEX_ERROR; + } + + min = db_value_to_long(min_value); + max = db_value_to_long(max_value); + + range = (unsigned long)max - min; + if(range > 0) { + /* + * Index structures that do not have a natural ability to handle + * range queries (e.g., a hash index) can nevertheless emulate them. + * + * The range query emulation attempts to look up the key for each + * value in the search range. If the search range is sparse, this + * iteration will incur a considerable overhead per found key. + * + * Hence, The emulation is preferable when an external module wants + * to iterate over a narrow range of keys, for which the total + * search cost is smaller than that of an iteration over all tuples + * in the relation. + */ + if(!(index->api->flags & INDEX_API_RANGE_QUERIES)) { + PRINTF("DB: Range query requested for an index that does not support it\n"); + max_range = cardinality / DB_INDEX_COST; + if(range > max_range) { + return DB_INDEX_ERROR; + } + PRINTF("DB: Using the index anyway because the range is small enough (%lu <= %lu)\n", + range, max_range); + } + } + + iterator->index = index; + iterator->min_value = *min_value; + iterator->max_value = *max_value; + iterator->next_item_no = 0; + + PRINTF("DB: Acquired an index iterator for %s.%s over the range (%ld,%ld)\n", + index->rel->name, index->attr->name, + min_value->u.long_value, max_value->u.long_value); + + return DB_OK; +} + +tuple_id_t +index_get_next(index_iterator_t *iterator) +{ + long min; + long max; + + if(iterator->index == NULL) { + /* This attribute is not indexed. */ + return INVALID_TUPLE; + } + + if((iterator->index->attr->flags & ATTRIBUTE_FLAG_UNIQUE) && + iterator->next_item_no == 1) { + min = db_value_to_long(&iterator->min_value); + max = db_value_to_long(&iterator->max_value); + if(min == max) { + /* + * We stop if this is an equivalence search on an attribute + * whose values are unique, and we already found one item. + */ + PRINTF("DB: Equivalence search finished\n"); + return INVALID_TUPLE; + } + } + + return iterator->index->api->get_next(iterator); +} + +int +index_exists(attribute_t *attr) +{ + index_t *index; + + index = (index_t *)attr->index; + if(index == NULL || index->flags != INDEX_READY) { + return 0; + } + + return 1; +} + +static index_t * +get_next_index_to_load(void) +{ + index_t *index; + + for(index = list_head(indices); index != NULL; index = index->next) { + if(index->flags & INDEX_LOAD_NEEDED) { + return index; + } + } + + return NULL; +} + +PROCESS_THREAD(db_indexer, ev, data) +{ + static index_t *index; + static db_handle_t handle; + static tuple_id_t row; + db_result_t result; + attribute_value_t value; + int column; + + PROCESS_BEGIN(); + load_request_event = process_alloc_event(); + + for(;;) { + PROCESS_WAIT_EVENT_UNTIL(ev == load_request_event); + + index = get_next_index_to_load(); + if(index == NULL) { + PRINTF("DB: Request to load an index, but no index is set to be loaded\n"); + continue; + } + + PRINTF("DB: Loading the index for %s.%s...\n", + index->rel->name, index->attr->name); + + if(DB_ERROR(db_query(&handle, "SELECT %s FROM %s;", index->attr->name, index->rel->name))) { + index->flags |= INDEX_LOAD_ERROR; + index->flags &= ~INDEX_LOAD_NEEDED; + continue; + } + + for(;; row++) { + PROCESS_PAUSE(); + + result = db_process(&handle); + if(DB_ERROR(result)) { + PRINTF("DB: Index loading failed while processing: %s\n", + db_get_result_message(result)); + index->flags |= INDEX_LOAD_ERROR; + goto cleanup; + } + if(result == DB_FINISHED) { + break; + } + + for(column = 0; column < handle.ncolumns; column++) { + if(DB_ERROR(db_get_value(&value, &handle, column))) { + index->flags |= INDEX_LOAD_ERROR; + goto cleanup; + } + + if(DB_ERROR(index_insert(index, &value, row))) { + index->flags |= INDEX_LOAD_ERROR; + goto cleanup; + } + } + } + + PRINTF("DB: Loaded %lu rows into the index\n", + (unsigned long)handle.current_row); + +cleanup: + if(index->flags & INDEX_LOAD_ERROR) { + PRINTF("DB: Failed to load the index for %s.%s\n", + index->rel->name, index->attr->name); + } + index->flags &= ~INDEX_LOAD_NEEDED; + index->flags |= INDEX_READY; + db_free(&handle); + } + + + PROCESS_END(); +} diff --git a/apps/antelope/index.h b/apps/antelope/index.h new file mode 100644 index 000000000..de7b28b36 --- /dev/null +++ b/apps/antelope/index.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * . + * \author + * Nicolas Tsiftes + */ + +#ifndef INDEX_H +#define INDEX_H + +#include "relation.h" + +typedef enum { + INDEX_NONE = 0, + INDEX_INLINE = 1, + INDEX_MEMHASH = 2, + INDEX_MAXHEAP = 3 +} index_type_t; + +#define INDEX_READY 0x00 +#define INDEX_LOAD_NEEDED 0x01 +#define INDEX_LOAD_ERROR 0x02 + +#define INDEX_API_INTERNAL 0x01 +#define INDEX_API_EXTERNAL 0x02 +#define INDEX_API_INLINE 0x04 +#define INDEX_API_COMPLETE 0x08 +#define INDEX_API_RANGE_QUERIES 0x10 + +struct index_api; + +struct index { + struct index *next; + char descriptor_file[DB_MAX_FILENAME_LENGTH]; + relation_t *rel; + attribute_t *attr; + struct index_api *api; + void *opaque_data; + index_type_t type; + uint8_t flags; +}; + +typedef struct index index_t; + +struct index_iterator { + index_t *index; + attribute_value_t min_value; + attribute_value_t max_value; + tuple_id_t next_item_no; + tuple_id_t found_items; +}; +typedef struct index_iterator index_iterator_t; + +struct index_api { + index_type_t type; + uint8_t flags; + db_result_t (*create)(index_t *); + db_result_t (*destroy)(index_t *); + db_result_t (*load)(index_t *); + db_result_t (*release)(index_t *); + db_result_t (*insert)(index_t *, attribute_value_t *, tuple_id_t); + db_result_t (*delete)(index_t *, attribute_value_t *); + tuple_id_t (*get_next)(index_iterator_t *); +}; + +typedef struct index_api index_api_t; + +extern index_api_t index_inline; +extern index_api_t index_maxheap; +extern index_api_t index_memhash; + +void index_init(void); +db_result_t index_create(index_type_t, relation_t *, attribute_t *); +db_result_t index_destroy(index_t *); +db_result_t index_load(relation_t *, attribute_t *); +db_result_t index_release(index_t *); +db_result_t index_insert(index_t *, attribute_value_t *, tuple_id_t); +db_result_t index_delete(index_t *, attribute_value_t *); +db_result_t index_get_iterator(index_iterator_t *, index_t *, + attribute_value_t *, attribute_value_t *); +tuple_id_t index_get_next(index_iterator_t *); +int index_exists(attribute_t *); + +#endif /* !INDEX_H */ diff --git a/apps/antelope/lvm.c b/apps/antelope/lvm.c new file mode 100644 index 000000000..4d05cc594 --- /dev/null +++ b/apps/antelope/lvm.c @@ -0,0 +1,976 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Logic engine used for quickly evaluating data constraints in relations. + * \author + * Nicolas Tsiftes + */ + +#include +#include +#include +#include + +#include "aql.h" +#include "lvm.h" + +#define DEBUG DEBUG_NONE +#include "debug.h" + +/* + * The logic engine determines whether a logical predicate is true for + * each tuple in a relation. It uses a stack-based execution model of + * operations that are arranged in prefix (Polish) notation. + */ + +/* Default option values. */ +#ifndef LVM_MAX_NAME_LENGTH +#define LVM_MAX_NAME_LENGTH 16 +#endif + +#ifndef LVM_MAX_VARIABLE_ID +#define LVM_MAX_VARIABLE_ID 8 +#endif + +#ifndef LVM_USE_FLOATS +#define LVM_USE_FLOATS 0 +#endif + +#define IS_CONNECTIVE(op) ((op) & LVM_CONNECTIVE) + +struct variable { + operand_type_t type; + operand_value_t value; + char name[LVM_MAX_NAME_LENGTH + 1]; +}; +typedef struct variable variable_t; + +struct derivation { + operand_value_t max; + operand_value_t min; + uint8_t derived; +}; +typedef struct derivation derivation_t; + +/* Registered variables for a LVM expression. Their values may be + changed between executions of the expression. */ +static variable_t variables[LVM_MAX_VARIABLE_ID - 1]; + +/* Range derivations of variables that are used for index searches. */ +static derivation_t derivations[LVM_MAX_VARIABLE_ID - 1]; + +#if DEBUG +static void +print_derivations(derivation_t *d) +{ + int i; + + for(i = 0; i < LVM_MAX_VARIABLE_ID; i++) { + if(d[i].derived) { + printf("%s is constrained to (%ld,%ld)\n", variables[i].name, + d[i].min.l, d[i].max.l); + } + } +} +#endif /* DEBUG */ + +static variable_id_t +lookup(char *name) +{ + variable_t *var; + + for(var = variables; var <= &variables[LVM_MAX_VARIABLE_ID - 1] && var->name[0] != '\0'; var++) { + if(strcmp(var->name, name) == 0) { + break; + } + } + + return (variable_id_t)(var - &variables[0]); +} + +static operator_t * +get_operator(lvm_instance_t *p) +{ + operator_t *operator; + + operator = (operator_t *)&p->code[p->ip]; + p->ip += sizeof(operator_t); + return operator; +} + +static void +get_operand(lvm_instance_t *p, operand_t *operand) +{ + memcpy(operand, &p->code[p->ip], sizeof(*operand)); + p->ip += sizeof(*operand); +} + +static node_type_t +get_type(lvm_instance_t *p) +{ + node_type_t node_type; + + node_type = *(node_type_t *)(p->code + p->ip); + p->ip += sizeof(node_type); + + return node_type; +} + +static long +operand_to_long(operand_t *operand) +{ + switch(operand->type) { + case LVM_LONG: + return operand->value.l; +#if LVM_USE_FLOATS + case LVM_FLOAT: + return (long)operand->value.f; + break; +#endif /* LVM_USE_FLOATS */ + case LVM_VARIABLE: + return variables[operand->value.id].value.l; + default: + return 0; + } +} + +static lvm_status_t +eval_expr(lvm_instance_t *p, operator_t op, operand_t *result) +{ + int i; + node_type_t type; + operator_t *operator; + operand_t operand[2]; + long value[2]; + long result_value; + lvm_status_t r; + + for(i = 0; i < 2; i++) { + type = get_type(p); + switch(type) { + case LVM_ARITH_OP: + operator = get_operator(p); + r = eval_expr(p, *operator, &operand[i]); + if(LVM_ERROR(r)) { + return r; + } + break; + case LVM_OPERAND: + get_operand(p, &operand[i]); + break; + default: + return SEMANTIC_ERROR; + } + value[i] = operand_to_long(&operand[i]); + } + + switch(op) { + case LVM_ADD: + result_value = value[0] + value[1]; + break; + case LVM_SUB: + result_value = value[0] - value[1]; + break; + case LVM_MUL: + result_value = value[0] * value[1]; + break; + case LVM_DIV: + if(value[1] == 0) { + return MATH_ERROR; + } + result_value = value[0] / value[1]; + break; + default: + return EXECUTION_ERROR; + } + + result->type = LVM_LONG; + result->value.l = result_value; + + return TRUE; +} + +static int +eval_logic(lvm_instance_t *p, operator_t *op) +{ + int i; + int r; + operand_t operand; + long result[2]; + node_type_t type; + operator_t *operator; + long l1, l2; + int logic_result[2]; + unsigned arguments; + + if(IS_CONNECTIVE(*op)) { + arguments = *op == LVM_NOT ? 1 : 2; + for(i = 0; i < arguments; i++) { + type = get_type(p); + if(type != LVM_CMP_OP) { + return SEMANTIC_ERROR; + } + operator = get_operator(p); + logic_result[i] = eval_logic(p, operator); + if(LVM_ERROR(logic_result[i])) { + return logic_result[i]; + } + } + + if(*op == LVM_NOT) { + return !logic_result[0]; + } else if(*op == LVM_AND) { + return logic_result[0] == TRUE && logic_result[1] == TRUE; + } else { + return logic_result[0] == TRUE || logic_result[1] == TRUE; + } + } + + for(i = 0; i < 2; i++) { + type = get_type(p); + switch(type) { + case LVM_ARITH_OP: + operator = get_operator(p); + r = eval_expr(p, *operator, &operand); + if(LVM_ERROR(r)) { + return r; + } + break; + case LVM_OPERAND: + get_operand(p, &operand); + break; + default: + return SEMANTIC_ERROR; + } + result[i] = operand_to_long(&operand); + } + + l1 = result[0]; + l2 = result[1]; + PRINTF("Result1: %ld\nResult2: %ld\n", l1, l2); + + switch(*op) { + case LVM_EQ: + return l1 == l2; + case LVM_NEQ: + return l1 != l2; + case LVM_GE: + return l1 > l2; + case LVM_GEQ: + return l1 >= l2; + case LVM_LE: + return l1 < l2; + case LVM_LEQ: + return l1 <= l2; + default: + break; + } + + return EXECUTION_ERROR; +} + +void +lvm_reset(lvm_instance_t *p, unsigned char *code, lvm_ip_t size) +{ + memset(code, 0, size); + p->code = code; + p->size = size; + p->end = 0; + p->ip = 0; + p->error = 0; + + memset(variables, 0, sizeof(variables)); + memset(derivations, 0, sizeof(derivations)); +} + +lvm_ip_t +lvm_jump_to_operand(lvm_instance_t *p) +{ + lvm_ip_t old_end; + + old_end = p->end; + p->end += sizeof(operator_t) + sizeof(node_type_t); + if(p->end >= p->size) { + p->error = __LINE__; + p->end = old_end; + } + + return old_end; +} + +lvm_ip_t +lvm_shift_for_operator(lvm_instance_t *p, lvm_ip_t end) +{ + unsigned char *ptr; + lvm_ip_t old_end; + + old_end = p->end; + + if(p->end + sizeof(operator_t) > p->size || end >= old_end) { + p->error = __LINE__; + return 0; + } + + ptr = p->code + end; + + memmove(ptr + sizeof(operator_t) + sizeof(node_type_t), ptr, old_end - end); + p->end = end; + + return old_end + sizeof(operator_t) + sizeof(node_type_t); +} + +lvm_ip_t +lvm_get_end(lvm_instance_t *p) +{ + return p->end; +} + +lvm_ip_t +lvm_set_end(lvm_instance_t *p, lvm_ip_t end) +{ + lvm_ip_t old_end; + + if(end >= p->size) { + p->error = __LINE__; + return p->end; + } + + old_end = p->end; + p->end = end; + + return old_end; +} + +void +lvm_set_type(lvm_instance_t *p, node_type_t type) +{ + *(node_type_t *)(p->code + p->end) = type; + p->end += sizeof(type); +} + +lvm_status_t +lvm_execute(lvm_instance_t *p) +{ + node_type_t type; + operator_t *operator; + lvm_status_t status; + + p->ip = 0; + status = EXECUTION_ERROR; + type = get_type(p); + switch(type) { + case LVM_CMP_OP: + operator = get_operator(p); + status = eval_logic(p, operator); + if(!LVM_ERROR(status)) { + PRINTF("The statement is %s\n", status == TRUE ? "true" : "false"); + } else { + PRINTF("Execution error: %d\n", (int)status); + } + break; + default: + PRINTF("Error: The code must start with a relational operator\n"); + } + + return status; +} + +void +lvm_set_op(lvm_instance_t *p, operator_t op) +{ + lvm_set_type(p, LVM_ARITH_OP); + memcpy(&p->code[p->end], &op, sizeof(op)); + p->end += sizeof(op); +} + +void +lvm_set_relation(lvm_instance_t *p, operator_t op) +{ + lvm_set_type(p, LVM_CMP_OP); + memcpy(&p->code[p->end], &op, sizeof(op)); + p->end += sizeof(op); +} + +void +lvm_set_operand(lvm_instance_t *p, operand_t *op) +{ + lvm_set_type(p, LVM_OPERAND); + memcpy(&p->code[p->end], op, sizeof(*op)); + p->end += sizeof(*op); +} + +void +lvm_set_long(lvm_instance_t *p, long l) +{ + operand_t op; + + op.type = LVM_LONG; + op.value.l = l; + + lvm_set_operand(p, &op); +} + +lvm_status_t +lvm_register_variable(char *name, operand_type_t type) +{ + variable_id_t id; + variable_t *var; + + id = lookup(name); + if(id == LVM_MAX_VARIABLE_ID) { + return VARIABLE_LIMIT_REACHED; + } + + var = &variables[id]; + if(var->name[0] == '\0') { + strncpy(var->name, name, sizeof(var->name) - 1); + var->name[sizeof(var->name) - 1] = '\0'; + var->type = type; + } + + return TRUE; +} + +lvm_status_t +lvm_set_variable_value(char *name, operand_value_t value) +{ + variable_id_t id; + + id = lookup(name); + if(id == LVM_MAX_VARIABLE_ID) { + return INVALID_IDENTIFIER; + } + variables[id].value = value; + return TRUE; +} + +void +lvm_set_variable(lvm_instance_t *p, char *name) +{ + operand_t op; + variable_id_t id; + + id = lookup(name); + if(id < LVM_MAX_VARIABLE_ID) { + PRINTF("var id = %d\n", id); + op.type = LVM_VARIABLE; + op.value.id = id; + lvm_set_operand(p, &op); + } +} + +void +lvm_clone(lvm_instance_t *dst, lvm_instance_t *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static void +create_intersection(derivation_t *result, derivation_t *d1, derivation_t *d2) +{ + int i; + + for(i = 0; i < LVM_MAX_VARIABLE_ID; i++) { + if(!d1[i].derived && !d2[i].derived) { + continue; + } else if(d1[i].derived && !d2[i].derived) { + result[i].min.l = d1[i].min.l; + result[i].max.l = d1[i].max.l; + } else if(!d1[i].derived && d2[i].derived) { + result[i].min.l = d2[i].min.l; + result[i].max.l = d2[i].max.l; + } else { + /* Both derivations have been made; create an + intersection of the ranges. */ + if(d1[i].min.l > d2[i].min.l) { + result[i].min.l = d1[i].min.l; + } else { + result[i].min.l = d2[i].min.l; + } + + if(d1[i].max.l < d2[i].max.l) { + result[i].max.l = d1[i].max.l; + } else { + result[i].max.l = d2[i].max.l; + } + } + result[i].derived = 1; + } + +#if DEBUG + PRINTF("Created an intersection of D1 and D2\n"); + PRINTF("D1: \n"); + print_derivations(d1); + PRINTF("D2: \n"); + print_derivations(d2); + PRINTF("Result: \n"); + print_derivations(result); +#endif /* DEBUG */ +} + +static void +create_union(derivation_t *result, derivation_t *d1, derivation_t *d2) +{ + int i; + + for(i = 0; i < LVM_MAX_VARIABLE_ID; i++) { + if(!d1[i].derived && !d2[i].derived) { + continue; + } else if(d1[i].derived && !d2[i].derived) { + result[i].min.l = d1[i].min.l; + result[i].max.l = d1[i].max.l; + } else if(!d1[i].derived && d2[i].derived) { + result[i].min.l = d2[i].min.l; + result[i].max.l = d2[i].max.l; + } else { + /* Both derivations have been made; create a + union of the ranges. */ + if(d1[i].min.l > d2[i].min.l) { + result[i].min.l = d2[i].min.l; + } else { + result[i].min.l = d1[i].min.l; + } + + if(d1[i].max.l < d2[i].max.l) { + result[i].max.l = d2[i].max.l; + } else { + result[i].max.l = d1[i].max.l; + } + } + result[i].derived = 1; + } + +#if DEBUG + PRINTF("Created a union of D1 and D2\n"); + PRINTF("D1: \n"); + print_derivations(d1); + PRINTF("D2: \n"); + print_derivations(d2); + PRINTF("Result: \n"); + print_derivations(result); +#endif /* DEBUG */ +} + +static int +derive_relation(lvm_instance_t *p, derivation_t *local_derivations) +{ + operator_t *operator; + node_type_t type; + operand_t operand[2]; + int i; + int var; + int variable_id; + operand_value_t *value; + derivation_t *derivation; + + type = get_type(p); + operator = get_operator(p); + + if(IS_CONNECTIVE(*operator)) { + derivation_t d1[LVM_MAX_VARIABLE_ID]; + derivation_t d2[LVM_MAX_VARIABLE_ID]; + + if(*operator != LVM_AND && *operator != LVM_OR) { + return DERIVATION_ERROR; + } + + PRINTF("Attempting to infer ranges from a logical connective\n"); + + memset(d1, 0, sizeof(d1)); + memset(d2, 0, sizeof(d2)); + + if(LVM_ERROR(derive_relation(p, d1)) || + LVM_ERROR(derive_relation(p, d2))) { + return DERIVATION_ERROR; + } + + if(*operator == LVM_AND) { + create_intersection(local_derivations, d1, d2); + } else if(*operator == LVM_OR) { + create_union(local_derivations, d1, d2); + } + return TRUE; + } + + for(i = 0; i < 2; i++) { + type = get_type(p); + switch(type) { + case LVM_OPERAND: + get_operand(p, &operand[i]); + break; + default: + return DERIVATION_ERROR; + } + } + + if(operand[0].type == LVM_VARIABLE && operand[1].type == LVM_VARIABLE) { + return DERIVATION_ERROR; + } + + /* Determine which of the operands that is the variable. */ + if(operand[0].type == LVM_VARIABLE) { + if(operand[1].type == LVM_VARIABLE) { + return DERIVATION_ERROR; + } + var = 0; + variable_id = operand[0].value.id; + value = &operand[1].value; + } else { + var = 1; + variable_id = operand[1].value.id; + value = &operand[0].value; + } + + if(variable_id >= LVM_MAX_VARIABLE_ID) { + return DERIVATION_ERROR; + } + + PRINTF("variable id %d, value %ld\n", variable_id, *(long *)value); + + derivation = local_derivations + variable_id; + /* Default values. */ + derivation->max.l = LONG_MAX; + derivation->min.l = LONG_MIN; + + switch(*operator) { + case LVM_EQ: + derivation->max = *value; + derivation->min = *value; + break; + case LVM_GE: + derivation->min.l = value->l + 1; + break; + case LVM_GEQ: + derivation->min.l = value->l; + break; + case LVM_LE: + derivation->max.l = value->l - 1; + break; + case LVM_LEQ: + derivation->max.l = value->l; + break; + default: + return DERIVATION_ERROR; + } + + derivation->derived = 1; + + return TRUE; +} + +lvm_status_t +lvm_derive(lvm_instance_t *p) +{ + return derive_relation(p, derivations); +} + +lvm_status_t +lvm_get_derived_range(lvm_instance_t *p, char *name, + operand_value_t *min, operand_value_t *max) +{ + int i; + + for(i = 0; i < LVM_MAX_VARIABLE_ID; i++) { + if(strcmp(name, variables[i].name) == 0) { + if(derivations[i].derived) { + *min = derivations[i].min; + *max = derivations[i].max; + return TRUE; + } + return DERIVATION_ERROR; + } + } + return INVALID_IDENTIFIER; +} + +#if DEBUG +static lvm_ip_t +print_operator(lvm_instance_t *p, lvm_ip_t index) +{ + operator_t operator; + struct operator_map { + operator_t op; + char *representation; + }; + struct operator_map operator_map[] = { + {LVM_ADD, "+"}, + {LVM_SUB, "-"}, + {LVM_MUL, "*"}, + {LVM_DIV, "/"}, + {LVM_GE, ">"}, + {LVM_GEQ, ">="}, + {LVM_LE, "<"}, + {LVM_LEQ, "<="}, + {LVM_EQ, "="}, + {LVM_NEQ, "<>"}, + {LVM_AND, "/\\"}, + {LVM_OR, "\\/"}, + {LVM_NOT, "!"} + }; + int i; + + memcpy(&operator, p->code + index, sizeof(operator)); + + for(i = 0; i < sizeof(operator_map) / sizeof(operator_map[0]); i++) { + if(operator_map[i].op == operator) { + PRINTF("%s ", operator_map[i].representation); + break; + } + } + + return index + sizeof(operator_t); +} + +static lvm_ip_t +print_operand(lvm_instance_t *p, lvm_ip_t index) +{ + operand_t operand; + + memcpy(&operand, p->code + index, sizeof(operand)); + + switch(operand.type) { + case LVM_VARIABLE: + if(operand.value.id >= LVM_MAX_VARIABLE_ID || variables[operand.value.id].name == NULL) { + PRINTF("var(id:%d):?? ", operand.value.id); + } else { + PRINTF("var(%s):%ld ", variables[operand.value.id].name, + variables[operand.value.id].value.l); + } + break; + case LVM_LONG: + PRINTF("long:%ld ", operand.value.l); + break; + default: + PRINTF("?? "); + break; + } + + return index + sizeof(operand_t); +} + +static lvm_ip_t +print_relation(lvm_instance_t *p, lvm_ip_t index) +{ + /* Relational operators are stored as ordinary operators. */ + return print_operator(p, index); +} +#endif /* DEBUG */ + +void +lvm_print_code(lvm_instance_t *p) +{ +#if DEBUG + lvm_ip_t ip; + + PRINTF("Code: "); + + for(ip = 0; ip < p->end;) { + switch(*(node_type_t *)(p->code + ip)) { + case LVM_CMP_OP: + ip = print_relation(p, ip + sizeof(node_type_t)); + break; + case LVM_ARITH_OP: + ip = print_operator(p, ip + sizeof(node_type_t)); + break; + case LVM_OPERAND: + ip = print_operand(p, ip + sizeof(node_type_t)); + break; + default: + PRINTF("Invalid opcode: 0x%x ", p->code[ip]); + ip = p->end; + break; + } + } + putchar('\n'); +#endif +} + +void +lvm_print_derivations(lvm_instance_t *p) +{ +#if DEBUG + print_derivations(derivations); +#endif /* DEBUG */ +} + +#ifdef TEST +int +main(void) +{ + lvm_instance_t p; + unsigned char code[256]; + + lvm_reset(&p, code, sizeof(code)); + + lvm_register_variable("z", LVM_LONG); + lvm_set_variable_value("z", (operand_value_t)15L); + + lvm_register_variable("y", LVM_LONG); + lvm_set_variable_value("y", (operand_value_t)109L); + + /* Infix: 109 = y /\ 20 > 70 - (6 + z * 3) => 109 = 109 /\ 20 > 19 => true */ + lvm_set_relation(&p, LVM_AND); + lvm_set_relation(&p, LVM_EQ); + lvm_set_long(&p, 109); + lvm_set_variable(&p, "y"); + lvm_set_relation(&p, LVM_GE); + lvm_set_long(&p, 20); + lvm_set_op(&p, LVM_SUB); + lvm_set_long(&p, 70); + lvm_set_op(&p, LVM_ADD); + lvm_set_long(&p, 6); + lvm_set_op(&p, LVM_MUL); + lvm_set_variable(&p, "z"); + lvm_set_long(&p, 3); + + lvm_print_code(&p); + + lvm_execute(&p); + + /* Infix: !(9999 + 1 < -1 + 10001) => !(10000 < 10000) => true */ + lvm_reset(&p, code, sizeof(code)); + lvm_set_relation(&p, LVM_NOT); + lvm_set_relation(&p, LVM_LE); + lvm_set_op(&p, LVM_ADD); + lvm_set_long(&p, 9999); + lvm_set_long(&p, 1); + lvm_set_op(&p, LVM_ADD); + lvm_set_long(&p, -1); + lvm_set_long(&p, 10001); + + lvm_print_code(&p); + + lvm_execute(&p); + + /* Derivation tests */ + + /* Infix: a = 5 => a:(5,5) */ + lvm_reset(&p, code, sizeof(code)); + lvm_register_variable("a", LVM_LONG); + lvm_set_relation(&p, LVM_EQ); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 5); + + lvm_derive(&p); + lvm_print_derivations(&p); + + /* Infix: a < 10 => a:(-oo,9) */ + lvm_reset(&p, code, sizeof(code)); + lvm_register_variable("a", LVM_LONG); + lvm_set_relation(&p, LVM_LE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 10); + + lvm_derive(&p); + lvm_print_derivations(&p); + + /* Infix: a < 100 /\ 10 < a => a:(11,99) */ + lvm_reset(&p, code, sizeof(code)); + lvm_register_variable("a", LVM_LONG); + lvm_set_relation(&p, LVM_AND); + lvm_set_relation(&p, LVM_LE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 100); + lvm_set_relation(&p, LVM_GE); + lvm_set_long(&p, 10); + lvm_set_variable(&p, "a"); + + lvm_derive(&p); + lvm_print_derivations(&p); + + /* Infix: a < 100 /\ b > 100 => a:(-oo,99), b:(101,oo) */ + lvm_reset(&p, code, sizeof(code)); + lvm_register_variable("a", LVM_LONG); + lvm_register_variable("b", LVM_LONG); + lvm_set_relation(&p, LVM_AND); + lvm_set_relation(&p, LVM_LE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 100); + lvm_set_relation(&p, LVM_GE); + lvm_set_variable(&p, "b"); + lvm_set_long(&p, 100); + + lvm_derive(&p); + lvm_print_derivations(&p); + + /* Infix: a < 100 \/ a < 1000 \/ a < 1902 => a:(-oo,1901) */ + lvm_reset(&p, code, sizeof(code)); + lvm_register_variable("a", LVM_LONG); + lvm_set_relation(&p, LVM_OR); + lvm_set_relation(&p, LVM_LE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 100); + lvm_set_relation(&p, LVM_OR); + lvm_set_relation(&p, LVM_LE); + lvm_set_long(&p, 1000); + lvm_set_variable(&p, "a"); + lvm_set_relation(&p, LVM_LE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 1902); + + lvm_derive(&p); + lvm_print_derivations(&p); + + /* Infix: (a < 100 /\ a < 90 /\ a > 80 /\ a < 105) \/ b > 10000 => + a:(81,89), b:(10001:oo) */ + lvm_reset(&p, code, sizeof(code)); + lvm_register_variable("a", LVM_LONG); + lvm_register_variable("b", LVM_LONG); + + lvm_set_relation(&p, LVM_OR); + lvm_set_relation(&p, LVM_GE); + lvm_set_variable(&p, "b"); + lvm_set_long(&p, 10000); + + lvm_set_relation(&p, LVM_AND); + lvm_set_relation(&p, LVM_LE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 100); + lvm_set_relation(&p, LVM_AND); + lvm_set_relation(&p, LVM_LE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 90); + lvm_set_relation(&p, LVM_AND); + lvm_set_relation(&p, LVM_GE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 80); + lvm_set_relation(&p, LVM_LE); + lvm_set_variable(&p, "a"); + lvm_set_long(&p, 105); + + lvm_derive(&p); + lvm_print_derivations(&p); + + printf("Done\n"); + + return 0; +} +#endif diff --git a/apps/antelope/lvm.h b/apps/antelope/lvm.h new file mode 100644 index 000000000..0969501d9 --- /dev/null +++ b/apps/antelope/lvm.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file is part of the Contiki operating system. + * + */ + +/** + * \file + * Definitions and declarations for the Propositional Logic Engine. + * \author + * Nicolas Tsiftes + */ + +#ifndef LVM_H +#define LVM_H + +#include + +#include "db-options.h" + +enum lvm_status { + FALSE = 0, + TRUE = 1, + INVALID_IDENTIFIER = 2, + SEMANTIC_ERROR = 3, + MATH_ERROR = 4, + STACK_OVERFLOW = 5, + TYPE_ERROR = 6, + VARIABLE_LIMIT_REACHED = 7, + EXECUTION_ERROR = 8, + DERIVATION_ERROR = 9 +}; + +typedef enum lvm_status lvm_status_t; + +#define LVM_ERROR(x) (x >= 2) + +typedef int lvm_ip_t; + +struct lvm_instance { + unsigned char *code; + lvm_ip_t size; + lvm_ip_t end; + lvm_ip_t ip; + unsigned error; +}; +typedef struct lvm_instance lvm_instance_t; + +enum node_type { + LVM_ARITH_OP = 0x10, + LVM_OPERAND = 0x20, + LVM_CMP_OP = 0x40, + LVM_CONNECTIVE = 0x80 +}; +typedef enum node_type node_type_t; + +enum operator { + LVM_ADD = LVM_ARITH_OP | 1, + LVM_SUB = LVM_ARITH_OP | 2, + LVM_MUL = LVM_ARITH_OP | 3, + LVM_DIV = LVM_ARITH_OP | 4, + LVM_EQ = LVM_CMP_OP | 1, + LVM_NEQ = LVM_CMP_OP | 2, + LVM_GE = LVM_CMP_OP | 3, + LVM_GEQ = LVM_CMP_OP | 4, + LVM_LE = LVM_CMP_OP | 5, + LVM_LEQ = LVM_CMP_OP | 6, + LVM_AND = LVM_CONNECTIVE | 1, + LVM_OR = LVM_CONNECTIVE | 2, + LVM_NOT = LVM_CONNECTIVE | 3 +}; +typedef enum operator operator_t; + +enum operand_type { + LVM_VARIABLE, + LVM_FLOAT, + LVM_LONG +}; +typedef enum operand_type operand_type_t; + +typedef unsigned char variable_id_t; + +typedef union { + long l; +#if LVM_USE_FLOATS + float f; +#endif + variable_id_t id; +} operand_value_t; + +struct operand { + operand_type_t type; + operand_value_t value; +}; +typedef struct operand operand_t; + +void lvm_reset(lvm_instance_t *p, unsigned char *code, lvm_ip_t size); +void lvm_clone(lvm_instance_t *dst, lvm_instance_t *src); +lvm_status_t lvm_derive(lvm_instance_t *p); +lvm_status_t lvm_get_derived_range(lvm_instance_t *p, char *name, + operand_value_t *min, + operand_value_t *max); +void lvm_print_derivations(lvm_instance_t *p); +lvm_status_t lvm_execute(lvm_instance_t *p); +lvm_status_t lvm_register_variable(char *name, operand_type_t type); +lvm_status_t lvm_set_variable_value(char *name, operand_value_t value); +void lvm_print_code(lvm_instance_t *p); +lvm_ip_t lvm_jump_to_operand(lvm_instance_t *p); +lvm_ip_t lvm_shift_for_operator(lvm_instance_t *p, lvm_ip_t end); +lvm_ip_t lvm_get_end(lvm_instance_t *p); +lvm_ip_t lvm_set_end(lvm_instance_t *p, lvm_ip_t end); +void lvm_set_op(lvm_instance_t *p, operator_t op); +void lvm_set_relation(lvm_instance_t *p, operator_t op); +void lvm_set_operand(lvm_instance_t *p, operand_t *op); +void lvm_set_long(lvm_instance_t *p, long l); +void lvm_set_variable(lvm_instance_t *p, char *name); + +#endif /* LVM_H */ diff --git a/apps/antelope/relation.c b/apps/antelope/relation.c new file mode 100644 index 000000000..6576f01ca --- /dev/null +++ b/apps/antelope/relation.c @@ -0,0 +1,1222 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Logic for relational databases. + * \author + * Nicolas Tsiftes + */ + +#include +#include + +#include "lib/crc16.h" +#include "lib/list.h" +#include "lib/memb.h" + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +#include "db-options.h" +#include "index.h" +#include "lvm.h" +#include "relation.h" +#include "result.h" +#include "storage.h" +#include "aql.h" + +/* + * The source_dest_map structure is used for mapping the pointers to + * data in a source row and in the corresponding destination row. The + * structure is calculated just before processing a relational + * selection, and then used to improve the performance when processing + * each row. +*/ +struct source_dest_map { + attribute_t *from_attr; + attribute_t *to_attr; + unsigned from_offset; + unsigned to_offset; +}; + +static struct source_dest_map attr_map[AQL_ATTRIBUTE_LIMIT]; + +#if DB_FEATURE_JOIN +/* + * The source_map structure is used for mapping attributes to + * their offsets in rows. + */ +struct source_map { + attribute_t *attr; + unsigned char *from_ptr; +}; + +static struct source_map source_map[AQL_ATTRIBUTE_LIMIT]; +#endif /* DB_FEATURE_JOIN */ + +static unsigned char row[DB_MAX_ATTRIBUTES_PER_RELATION * DB_MAX_ELEMENT_SIZE]; +static unsigned char extra_row[DB_MAX_ATTRIBUTES_PER_RELATION * DB_MAX_ELEMENT_SIZE]; +static unsigned char result_row[AQL_ATTRIBUTE_LIMIT * DB_MAX_ELEMENT_SIZE]; +static unsigned char * const left_row = row; +static unsigned char * const right_row = extra_row; +static unsigned char * const join_row = result_row; + +LIST(relations); +MEMB(relations_memb, relation_t, DB_RELATION_POOL_SIZE); +MEMB(attributes_memb, attribute_t, DB_ATTRIBUTE_POOL_SIZE); + +static relation_t *relation_find(char *); +static attribute_t *attribute_find(relation_t *, char *); +static int get_attribute_value_offset(relation_t *, attribute_t *); +static void attribute_free(relation_t *, attribute_t *); +static void purge_relations(void); +static void relation_clear(relation_t *); +static relation_t *relation_allocate(void); +static void relation_free(relation_t *); + +static relation_t * +relation_find(char *name) +{ + relation_t *rel; + + for(rel = list_head(relations); rel != NULL; rel = rel->next) { + if(strcmp(rel->name, name) == 0) { + return rel; + } + } + + return NULL; +} + +static attribute_t * +attribute_find(relation_t *rel, char *name) +{ + attribute_t *attr; + + for(attr = list_head(rel->attributes); attr != NULL; attr = attr->next) { + if(strcmp(attr->name, name) == 0) { + return attr; + } + } + return NULL; +} + +static int +get_attribute_value_offset(relation_t *rel, attribute_t *attr) +{ + attribute_t *ptr; + int offset; + + for(offset = 0, ptr = list_head(rel->attributes); + ptr != NULL; + ptr = ptr->next) { + if(ptr == attr) { + return offset; + } + offset += ptr->element_size; + } + + return -1; +} + +static void +attribute_free(relation_t *rel, attribute_t *attr) +{ + if(attr->index != NULL) { + index_release(attr->index); + } + memb_free(&attributes_memb, attr); + rel->attribute_count--; +} + +static void +purge_relations(void) +{ + relation_t *rel; + relation_t *next; + + for(rel = list_head(relations); rel != NULL;) { + next = rel->next; + if(rel->references == 0) { + relation_free(rel); + } + rel = next; + } +} + +static void +relation_clear(relation_t *rel) +{ + memset(rel, 0, sizeof(*rel)); + rel->tuple_storage = -1; + rel->cardinality = INVALID_TUPLE; + rel->dir = DB_STORAGE; + LIST_STRUCT_INIT(rel, attributes); +} + +static relation_t * +relation_allocate(void) +{ + relation_t *rel; + + rel = memb_alloc(&relations_memb); + if(rel == NULL) { + purge_relations(); + rel = memb_alloc(&relations_memb); + if(rel == NULL) { + PRINTF("DB: Failed to allocate a relation\n"); + return NULL; + } + } + + relation_clear(rel); + return rel; +} + +static void +relation_free(relation_t *rel) +{ + attribute_t *attr; + + while((attr = list_pop(rel->attributes)) != NULL) { + attribute_free(rel, attr); + } + + list_remove(relations, rel); + memb_free(&relations_memb, rel); +} + +db_result_t +relation_init(void) +{ + list_init(relations); + memb_init(&relations_memb); + memb_init(&attributes_memb); + + return DB_OK; +} + +relation_t * +relation_load(char *name) +{ + relation_t *rel; + + rel = relation_find(name); + if(rel != NULL) { + rel->references++; + goto end; + } + + rel = relation_allocate(); + if(rel == NULL) { + return NULL; + } + + if(DB_ERROR(storage_get_relation(rel, name))) { + memb_free(&relations_memb, rel); + return NULL; + } + + memcpy(rel->name, name, sizeof(rel->name)); + rel->name[sizeof(rel->name) - 1] = '\0'; + rel->references = 1; + list_add(relations, rel); + +end: + if(rel->dir == DB_STORAGE && DB_ERROR(storage_load(rel))) { + relation_release(rel); + return NULL; + } + + return rel; +} + +db_result_t +relation_release(relation_t *rel) +{ + if(rel->references > 0) { + rel->references--; + } + + if(rel->references == 0) { + storage_unload(rel); + } + + return DB_OK; +} + +relation_t * +relation_create(char *name, db_direction_t dir) +{ + relation_t old_rel; + relation_t *rel; + + if(*name != '\0') { + relation_clear(&old_rel); + + if(storage_get_relation(&old_rel, name) == DB_OK) { + /* Reject a creation request if the relation already exists. */ + PRINTF("DB: Attempted to create a relation that already exists (%s)\n", + name); + return NULL; + } + + rel = relation_allocate(); + if(rel == NULL) { + return NULL; + } + + rel->cardinality = 0; + + strncpy(rel->name, name, sizeof(rel->name) - 1); + rel->name[sizeof(rel->name) - 1] = '\0'; + rel->dir = dir; + + if(dir == DB_STORAGE) { + storage_drop_relation(rel, 1); + + if(storage_put_relation(rel) == DB_OK) { + list_add(relations, rel); + return rel; + } + memb_free(&relations_memb, rel); + } else { + list_add(relations, rel); + return rel; + } + } + + return NULL; +} + +#if DB_FEATURE_REMOVE +db_result_t +relation_rename(char *old_name, char *new_name) +{ + if(DB_ERROR(relation_remove(new_name, 0)) || + DB_ERROR(storage_rename_relation(old_name, new_name))) { + return DB_STORAGE_ERROR; + } + + return DB_OK; +} +#endif /* DB_FEATURE_REMOVE */ + +attribute_t * +relation_attribute_add(relation_t *rel, db_direction_t dir, char *name, + domain_t domain, size_t element_size) +{ + attribute_t *attribute; + tuple_id_t cardinality; + + cardinality = relation_cardinality(rel); + if(cardinality != INVALID_TUPLE && cardinality > 0) { + PRINTF("DB: Attempt to create an attribute in a non-empty relation\n"); + return NULL; + } + + if(element_size == 0 || element_size > DB_MAX_ELEMENT_SIZE) { + PRINTF("DB: Unacceptable element size: %u\n", element_size); + return NULL; + } + + attribute = memb_alloc(&attributes_memb); + if(attribute == NULL) { + PRINTF("DB: Failed to allocate attribute \"%s\"!\n", name); + return NULL; + } + + strncpy(attribute->name, name, sizeof(attribute->name) - 1); + attribute->name[sizeof(attribute->name) - 1] = '\0'; + attribute->domain = domain; + attribute->element_size = element_size; + attribute->aggregator = 0; + attribute->index = NULL; + attribute->flags = 0 /*ATTRIBUTE_FLAG_UNIQUE*/; + + rel->row_length += element_size; + + list_add(rel->attributes, attribute); + rel->attribute_count++; + + if(dir == DB_STORAGE) { + if(DB_ERROR(storage_put_attribute(rel, attribute))) { + PRINTF("DB: Failed to store attribute %s\n", attribute->name); + memb_free(&attributes_memb, attribute); + return NULL; + } + } else { + index_load(rel, attribute); + } + + return attribute; +} + +attribute_t * +relation_attribute_get(relation_t *rel, char *name) +{ + attribute_t *attr; + + attr = attribute_find(rel, name); + if(attr != NULL && !(attr->flags & ATTRIBUTE_FLAG_INVALID)) { + return attr; + } + + return NULL; +} + +db_result_t +relation_attribute_remove(relation_t *rel, char *name) +{ + /* Not implemented completely. */ + return DB_IMPLEMENTATION_ERROR; +#if 0 + attribute_t *attr; + + if(rel->references > 1) { + return DB_BUSY_ERROR; + } + + attr = relation_attribute_get(rel, name); + if(attr == NULL) { + return DB_NAME_ERROR; + } + + list_remove(rel->attributes, attr); + attribute_free(rel, attr); + return DB_OK; +#endif +} + +db_result_t +relation_get_value(relation_t *rel, attribute_t *attr, + unsigned char *row_ptr, attribute_value_t *value) +{ + int offset; + unsigned char *from_ptr; + + offset = get_attribute_value_offset(rel, attr); + if(offset < 0) { + return DB_IMPLEMENTATION_ERROR; + } + from_ptr = row_ptr + offset; + + return db_phy_to_value(value, attr, from_ptr); +} + +db_result_t +relation_set_primary_key(relation_t *rel, char *name) +{ + attribute_t *attribute; + + attribute = relation_attribute_get(rel, name); + if(attribute == NULL) { + return DB_NAME_ERROR; + } + + attribute->flags = ATTRIBUTE_FLAG_PRIMARY_KEY; + PRINTF("DB: New primary key: %s\n", attribute->name); + + return DB_OK; +} + +db_result_t +relation_remove(char *name, int remove_tuples) +{ + relation_t *rel; + db_result_t result; + + rel = relation_load(name); + if(rel == NULL) { + /* + * Attempt to remove an inexistent relation. To allow for this + * operation to be used for setting up repeatable tests and + * experiments, we do not signal an error. + */ + return DB_OK; + } + + if(rel->references > 1) { + return DB_BUSY_ERROR; + } + + result = storage_drop_relation(rel, remove_tuples); + relation_free(rel); + return result; +} + +db_result_t +relation_insert(relation_t *rel, attribute_value_t *values) +{ + size_t size; + attribute_t *attr; + storage_row_t record; + unsigned char *ptr; + attribute_value_t *value; + db_result_t result; + + value = values; + + size = rel->row_length; + PRINTF("DB: Relation %s has a record size of %d bytes\n", + rel->name, (int)size); + ptr = record = alloca(size); + + PRINTF("DB: Insert ("); + + for(attr = list_head(rel->attributes); attr != NULL; attr = attr->next, value++) { + /* Verify that the value is in the expected domain. An exception + to this rule is that INT may be promoted to LONG. */ + if(attr->domain != value->domain && + !(attr->domain == DOMAIN_LONG && value->domain == DOMAIN_INT)) { + PRINTF("DB: The value domain %d does not match the domain %d of attribute %s\n", + value->domain, attr->domain, attr->name); + return DB_RELATIONAL_ERROR; + } + + /* Set the data area for removed attributes to 0. */ + if(attr->flags & ATTRIBUTE_FLAG_INVALID) { + memset(ptr, 0, attr->element_size); + ptr += attr->element_size; + continue; + } + + result = db_value_to_phy((unsigned char *)ptr, attr, value); + if(DB_ERROR(result)) { + return result; + } + +#if DEBUG + switch(attr->domain) { + case DOMAIN_INT: + PRINTF("%s=%d", attr->name, VALUE_INT(value)); + break; + case DOMAIN_LONG: + PRINTF("%s=%ld", attr->name, VALUE_LONG(value)); + break; + case DOMAIN_STRING: + PRINTF("%s='%s", attr->name, VALUE_STRING(value)); + break; + default: + PRINTF(")\nDB: Unhandled attribute domain: %d\n", attr->domain); + return DB_TYPE_ERROR; + } + + if(attr->next != NULL) { + PRINTF(", "); + } +#endif /* DEBUG */ + + ptr += attr->element_size; + if(attr->index != NULL) { + if(DB_ERROR(index_insert(attr->index, value, rel->next_row))) { + return DB_INDEX_ERROR; + } + } + } + + PRINTF(")\n"); + + rel->cardinality++; + rel->next_row++; + return storage_put_row(rel, record); +} + +static void +aggregate(attribute_t *attr, attribute_value_t *value) +{ + long long_value; + + switch(value->domain) { + case DOMAIN_INT: + long_value = VALUE_INT(value); + break; + case DOMAIN_LONG: + long_value = VALUE_LONG(value); + break; + default: + return; + } + + switch(attr->aggregator) { + case AQL_COUNT: + attr->aggregation_value++; + break; + case AQL_SUM: + attr->aggregation_value += long_value; + break; + case AQL_MEAN: + break; + case AQL_MEDIAN: + break; + case AQL_MAX: + if(long_value > attr->aggregation_value) { + attr->aggregation_value = long_value; + } + break; + case AQL_MIN: + if(long_value < attr->aggregation_value) { + attr->aggregation_value = long_value; + } + break; + default: + break; + } +} + +static db_result_t +generate_attribute_map(struct source_dest_map *attr_map, unsigned attribute_count, + relation_t *from_rel, relation_t *to_rel, + unsigned char *from_row, unsigned char *to_row) +{ + attribute_t *from_attr; + attribute_t *to_attr; + unsigned size_sum; + struct source_dest_map *attr_map_ptr; + int offset; + + attr_map_ptr = attr_map; + for(size_sum = 0, to_attr = list_head(to_rel->attributes); + to_attr != NULL; + to_attr = to_attr->next) { + from_attr = attribute_find(from_rel, to_attr->name); + if(from_attr == NULL) { + PRINTF("DB: Invalid attribute in the result relation: %s\n", + to_attr->name); + return DB_NAME_ERROR; + } + + attr_map_ptr->from_attr = from_attr; + attr_map_ptr->to_attr = to_attr; + offset = get_attribute_value_offset(from_rel, from_attr); + if(offset < 0) { + return DB_IMPLEMENTATION_ERROR; + } + attr_map_ptr->from_offset = offset; + attr_map_ptr->to_offset = size_sum; + + size_sum += to_attr->element_size; + attr_map_ptr++; + } + + return DB_OK; +} + +static void +select_index(db_handle_t *handle, lvm_instance_t *lvm_instance) +{ + index_t *index; + attribute_t *attr; + operand_value_t min; + operand_value_t max; + attribute_value_t av_min; + attribute_value_t av_max; + long range; + unsigned long min_range; + + index = NULL; + min_range = ULONG_MAX; + + /* Find all indexed and derived attributes, and select the index of + the attribute with the smallest range. */ + for(attr = list_head(handle->rel->attributes); + attr != NULL; + attr = attr->next) { + if(attr->index != NULL && + !LVM_ERROR(lvm_get_derived_range(lvm_instance, attr->name, &min, &max))) { + range = (unsigned long)max.l - (unsigned long)min.l; + PRINTF("DB: The search range for attribute \"%s\" comprises %ld values\n", + attr->name, range + 1); + + if(range <= min_range) { + index = attr->index; + av_min.domain = av_max.domain = DOMAIN_INT; + VALUE_LONG(&av_min) = min.l; + VALUE_LONG(&av_max) = max.l; + } + } + } + + if(index != NULL) { + /* We found a suitable index; get an iterator for it. */ + if(index_get_iterator(&handle->index_iterator, index, + &av_min, &av_max) == DB_OK) { + handle->flags |= DB_HANDLE_FLAG_SEARCH_INDEX; + } + } +} + +static db_result_t +generate_selection_result(db_handle_t *handle, relation_t *rel, aql_adt_t *adt) +{ + relation_t *result_rel; + unsigned attribute_count; + attribute_t *attr; + + result_rel = handle->result_rel; + + handle->current_row = 0; + handle->ncolumns = 0; + handle->tuple_id = 0; + for(attr = list_head(result_rel->attributes); attr != NULL; attr = attr->next) { + if(attr->flags & ATTRIBUTE_FLAG_NO_STORE) { + continue; + } + handle->ncolumns++; + } + handle->tuple = (tuple_t)result_row; + + attribute_count = result_rel->attribute_count; + if(DB_ERROR(generate_attribute_map(attr_map, attribute_count, rel, result_rel, row, result_row))) { + return DB_IMPLEMENTATION_ERROR; + } + + if(adt->lvm_instance != NULL) { + /* Try to establish acceptable ranges for the attribute values. */ + if(!LVM_ERROR(lvm_derive(adt->lvm_instance))) { + select_index(handle, adt->lvm_instance); + } + } + + handle->flags |= DB_HANDLE_FLAG_PROCESSING; + + return DB_OK; +} + +#if DB_FEATURE_REMOVE +db_result_t +relation_process_remove(void *handle_ptr) +{ + db_handle_t *handle; + aql_adt_t *adt; + db_result_t result; + + handle = (db_handle_t *)handle_ptr; + adt = handle->adt; + + result = relation_process_select(handle_ptr); + if(result == DB_FINISHED) { + PRINTF("DB: Finished removing tuples. Overwriting relation %s with the result\n", + adt->relations[1]); + relation_release(handle->rel); + relation_rename(adt->relations[0], adt->relations[1]); + } + + return result; +} +#endif + +db_result_t +relation_process_select(void *handle_ptr) +{ + db_handle_t *handle; + aql_adt_t *adt; + db_result_t result; + unsigned attribute_count; + struct source_dest_map *attr_map_ptr, *attr_map_end; + attribute_t *result_attr; + unsigned char *from_ptr; + unsigned char *to_ptr; + operand_value_t operand_value; + uint8_t intbuf[2]; + attribute_value_t value; + lvm_status_t wanted_result; + + handle = (db_handle_t *)handle_ptr; + adt = (aql_adt_t *)handle->adt; + + attribute_count = handle->result_rel->attribute_count; + attr_map_end = attr_map + attribute_count; + + if(handle->flags & DB_HANDLE_FLAG_SEARCH_INDEX) { + handle->tuple_id = index_get_next(&handle->index_iterator); + if(handle->tuple_id == INVALID_TUPLE) { + PRINTF("DB: An attribute value could not be found in the index\n"); + if(handle->index_iterator.next_item_no == 0) { + return DB_INDEX_ERROR; + } + + if(adt->flags & AQL_FLAG_AGGREGATE) { + goto end_aggregation; + } + + return DB_FINISHED; + } + } + + /* Put the tuples fulfilling the given condition into a new relation. + The tuples may be projected. */ + result = storage_get_row(handle->rel, &handle->tuple_id, row); + handle->tuple_id++; + if(DB_ERROR(result)) { + PRINTF("DB: Failed to get a row in relation %s!\n", handle->rel->name); + return result; + } else if(result == DB_FINISHED) { + if(AQL_GET_FLAGS(adt) & AQL_FLAG_AGGREGATE) { + goto end_aggregation; + } + return DB_FINISHED; + } + + /* Process the attributes in the result relation. */ + for(attr_map_ptr = attr_map; attr_map_ptr < attr_map_end; attr_map_ptr++) { + from_ptr = row + attr_map_ptr->from_offset; + result_attr = attr_map_ptr->to_attr; + + /* Update the internal state of the PLE. */ + if(result_attr->domain == DOMAIN_INT) { + operand_value.l = from_ptr[0] << 8 | from_ptr[1]; + lvm_set_variable_value(result_attr->name, operand_value); + } else if(result_attr->domain == DOMAIN_LONG) { + operand_value.l = (uint32_t)from_ptr[0] << 24 | + (uint32_t)from_ptr[1] << 16 | + (uint32_t)from_ptr[2] << 8 | + from_ptr[3]; + lvm_set_variable_value(result_attr->name, operand_value); + } + + if(result_attr->flags & ATTRIBUTE_FLAG_NO_STORE) { + /* The attribute is used just for the predicate, + so do not copy the current value into the result. */ + continue; + } + + if(!(AQL_GET_FLAGS(adt) & AQL_FLAG_AGGREGATE)) { + /* No aggregators. Copy the original value into the resulting tuple. */ + memcpy(result_row + attr_map_ptr->to_offset, from_ptr, + result_attr->element_size); + } + } + + wanted_result = TRUE; + if(AQL_GET_FLAGS(adt) & AQL_FLAG_INVERSE_LOGIC) { + wanted_result = FALSE; + } + + /* Check whether the given predicate is true for this tuple. */ + if(adt->lvm_instance == NULL || + lvm_execute(adt->lvm_instance) == wanted_result) { + if(AQL_GET_FLAGS(adt) & AQL_FLAG_AGGREGATE) { + for(attr_map_ptr = attr_map; attr_map_ptr < attr_map_end; attr_map_ptr++) { + from_ptr = row + attr_map_ptr->from_offset; + result = db_phy_to_value(&value, attr_map_ptr->to_attr, from_ptr); + if(DB_ERROR(result)) { + return result; + } + aggregate(attr_map_ptr->to_attr, &value); + } + } else { + if(AQL_GET_FLAGS(adt) & AQL_FLAG_ASSIGN) { + if(DB_ERROR(storage_put_row(handle->result_rel, result_row))) { + PRINTF("DB: Failed to store a row in the result relation!\n"); + return DB_STORAGE_ERROR; + } + } + handle->current_row++; + return DB_GOT_ROW; + } + } + + return DB_OK; + +end_aggregation: + /* Generate aggregated result if requested. */ + for(attr_map_ptr = attr_map; attr_map_ptr < attr_map_end; attr_map_ptr++) { + result_attr = attr_map_ptr->to_attr; + to_ptr = result_row + attr_map_ptr->to_offset; + + intbuf[0] = result_attr->aggregation_value >> 8; + intbuf[1] = result_attr->aggregation_value & 0xff; + from_ptr = intbuf; + memcpy(to_ptr, from_ptr, result_attr->element_size); + } + + if(AQL_GET_FLAGS(adt) & AQL_FLAG_ASSIGN) { + if(DB_ERROR(storage_put_row(handle->result_rel, result_row))) { + PRINTF("DB: Failed to store a row in the result relation!\n"); + return DB_STORAGE_ERROR; + } + } + + handle->current_row = 1; + AQL_GET_FLAGS(adt) &= ~AQL_FLAG_AGGREGATE; /* Stop the aggregation. */ + + return DB_GOT_ROW; +} + +db_result_t +relation_select(void *handle_ptr, relation_t *rel, void *adt_ptr) +{ + aql_adt_t *adt; + db_handle_t *handle; + char *name; + db_direction_t dir; + char *attribute_name; + attribute_t *attr; + int i; + int normal_attributes; + + adt = (aql_adt_t *)adt_ptr; + + handle = (db_handle_t *)handle_ptr; + handle->rel = rel; + handle->adt = adt; + + if(AQL_GET_FLAGS(adt) & AQL_FLAG_ASSIGN) { + name = adt->relations[0]; + dir = DB_STORAGE; + } else { + name = RESULT_RELATION; + dir = DB_MEMORY; + } + relation_remove(name, 1); + relation_create(name, dir); + handle->result_rel = relation_load(name); + + if(handle->result_rel == NULL) { + PRINTF("DB: Failed to load a relation for the query result\n"); + return DB_ALLOCATION_ERROR; + } + + for(i = normal_attributes = 0; i < AQL_ATTRIBUTE_COUNT(adt); i++) { + attribute_name = adt->attributes[i].name; + + attr = relation_attribute_get(rel, attribute_name); + if(attr == NULL) { + PRINTF("DB: Select for invalid attribute %s in relation %s!\n", + attribute_name, rel->name); + return DB_NAME_ERROR; + } + + PRINTF("DB: Found attribute %s in relation %s\n", + attribute_name, rel->name); + + attr = relation_attribute_add(handle->result_rel, dir, + attribute_name, + adt->aggregators[i] ? DOMAIN_INT : attr->domain, + attr->element_size); + if(attr == NULL) { + PRINTF("DB: Failed to add a result attribute\n"); + relation_release(handle->result_rel); + return DB_ALLOCATION_ERROR; + } + + attr->aggregator = adt->aggregators[i]; + switch(attr->aggregator) { + case AQL_NONE: + if(!(adt->attributes[i].flags & ATTRIBUTE_FLAG_NO_STORE)) { + /* Only count attributes projected into the result set. */ + normal_attributes++; + } + break; + case AQL_MAX: + attr->aggregation_value = LONG_MIN; + break; + case AQL_MIN: + attr->aggregation_value = LONG_MAX; + break; + default: + attr->aggregation_value = 0; + break; + } + + attr->flags = adt->attributes[i].flags; + } + + /* Preclude mixes of normal attributes and aggregated ones in + selection results. */ + if(normal_attributes > 0 && + handle->result_rel->attribute_count > normal_attributes) { + return DB_RELATIONAL_ERROR; + } + + return generate_selection_result(handle, rel, adt); +} + +#if DB_FEATURE_JOIN +db_result_t +relation_process_join(void *handle_ptr) +{ + db_handle_t *handle; + db_result_t result; + relation_t *left_rel; + relation_t *right_rel; + relation_t *join_rel; + unsigned char *join_next_attribute_ptr; + size_t element_size; + tuple_id_t right_tuple_id; + attribute_value_t value; + int i; + + handle = (db_handle_t *)handle_ptr; + left_rel = handle->left_rel; + right_rel = handle->right_rel; + join_rel = handle->join_rel; + + if(!(handle->flags & DB_HANDLE_FLAG_INDEX_STEP)) { + goto inner_loop; + } + + /* Equi-join for indexed attributes only. In the outer loop, we iterate over + each tuple in the left relation. */ + for(handle->tuple_id = 0;; handle->tuple_id++) { + result = storage_get_row(left_rel, &handle->tuple_id, left_row); + if(DB_ERROR(result)) { + PRINTF("DB: Failed to get a row in left relation %s!\n", left_rel->name); + return result; + } else if(result == DB_FINISHED) { + return DB_FINISHED; + } + + if(DB_ERROR(relation_get_value(left_rel, handle->left_join_attr, left_row, &value))) { + PRINTF("DB: Failed to get a value of the attribute \"%s\" to join on\n", + handle->left_join_attr->name); + return DB_IMPLEMENTATION_ERROR; + } + + if(DB_ERROR(index_get_iterator(&handle->index_iterator, + handle->right_join_attr->index, + &value, &value))) { + PRINTF("DB: Failed to get an index iterator\n"); + return DB_INDEX_ERROR; + } + handle->flags &= ~DB_HANDLE_FLAG_INDEX_STEP; + + /* In the inner loop, we iterate over all rows with a matching value for the + join attribute. The index component provides an iterator for this purpose. */ +inner_loop: + for(;;) { + /* Get all rows matching the attribute value in the right relation. */ + right_tuple_id = index_get_next(&handle->index_iterator); + if(right_tuple_id == INVALID_TUPLE) { + /* Exclude this row from the left relation in the result, + and step to the next value in the index iteration. */ + handle->flags |= DB_HANDLE_FLAG_INDEX_STEP; + break; + } + + result = storage_get_row(right_rel, &right_tuple_id, right_row); + if(DB_ERROR(result)) { + PRINTF("DB: Failed to get a row in right relation %s!\n", right_rel->name); + return result; + } else if(result == DB_FINISHED) { + PRINTF("DB: The index refers to an invalid row: %lu\n", + (unsigned long)right_tuple_id); + return DB_IMPLEMENTATION_ERROR; + } + + /* Use the source attribute map to fill in the physical representation + of the resulting tuple. */ + join_next_attribute_ptr = join_row; + + for(i = 0; i < join_rel->attribute_count; i++) { + element_size = source_map[i].attr->element_size; + + memcpy(join_next_attribute_ptr, source_map[i].from_ptr, element_size); + join_next_attribute_ptr += element_size; + } + + if(((aql_adt_t *)handle->adt)->flags & AQL_FLAG_ASSIGN) { + if(DB_ERROR(storage_put_row(join_rel, join_row))) { + return DB_STORAGE_ERROR; + } + } + + handle->current_row++; + return DB_GOT_ROW; + } + } + + return DB_OK; +} + +static db_result_t +generate_join_result(db_handle_t *handle) +{ + relation_t *left_rel; + relation_t *right_rel; + relation_t *join_rel; + attribute_t *attr; + attribute_t *result_attr; + struct source_map *source_pair; + int i; + int offset; + unsigned char *from_ptr; + + handle->tuple = (tuple_t)join_row; + handle->tuple_id = 0; + + left_rel = handle->left_rel; + right_rel = handle->right_rel; + join_rel = handle->join_rel; + + /* Generate a map over the source attributes for each + attribute in the join relation. */ + for(i = 0, result_attr = list_head(join_rel->attributes); + result_attr != NULL; + result_attr = result_attr->next, i++) { + source_pair = &source_map[i]; + attr = attribute_find(left_rel, result_attr->name); + if(attr != NULL) { + offset = get_attribute_value_offset(left_rel, attr); + from_ptr = left_row + offset; + } else if((attr = attribute_find(right_rel, result_attr->name)) != NULL) { + offset = get_attribute_value_offset(right_rel, attr); + from_ptr = right_row + offset; + } else { + PRINTF("DB: The attribute %s could not be found\n", result_attr->name); + return DB_NAME_ERROR; + } + + if(offset < 0) { + PRINTF("DB: Unable to retrieve attribute values for the JOIN result\n"); + return DB_IMPLEMENTATION_ERROR; + } + + source_pair->attr = attr; + source_pair->from_ptr = from_ptr; + } + + handle->flags |= DB_HANDLE_FLAG_PROCESSING; + + return DB_OK; +} + +db_result_t +relation_join(void *query_result, void *adt_ptr) +{ + aql_adt_t *adt; + db_handle_t *handle; + relation_t *left_rel; + relation_t *right_rel; + relation_t *join_rel; + char *name; + db_direction_t dir; + int i; + char *attribute_name; + attribute_t *attr; + + adt = (aql_adt_t *)adt_ptr; + + handle = (db_handle_t *)query_result; + handle->current_row = 0; + handle->ncolumns = 0; + handle->adt = adt; + handle->flags = DB_HANDLE_FLAG_INDEX_STEP; + + if(AQL_GET_FLAGS(adt) & AQL_FLAG_ASSIGN) { + name = adt->relations[0]; + dir = DB_STORAGE; + } else { + name = RESULT_RELATION; + dir = DB_MEMORY; + } + relation_remove(name, 1); + relation_create(name, dir); + join_rel = relation_load(name); + handle->result_rel = join_rel; + + if(join_rel == NULL) { + PRINTF("DB: Failed to create a join relation!\n"); + return DB_ALLOCATION_ERROR; + } + + handle->join_rel = handle->result_rel = join_rel; + left_rel = handle->left_rel; + right_rel = handle->right_rel; + + handle->left_join_attr = relation_attribute_get(left_rel, adt->attributes[0].name); + handle->right_join_attr = relation_attribute_get(right_rel, adt->attributes[0].name); + if(handle->left_join_attr == NULL || handle->right_join_attr == NULL) { + PRINTF("DB: The attribute (\"%s\") to join on does not exist in both relations\n", + adt->attributes[0].name); + return DB_RELATIONAL_ERROR; + } + + if(!index_exists(handle->right_join_attr)) { + PRINTF("DB: The attribute to join on is not indexed\n"); + return DB_INDEX_ERROR; + } + + /* + * Define the resulting relation. We start from 1 when counting attributes + * because the first attribute is only the one to join, and is not included + * by default in the projected attributes. + */ + for(i = 1; i < AQL_ATTRIBUTE_COUNT(adt); i++) { + attribute_name = adt->attributes[i].name; + attr = relation_attribute_get(left_rel, attribute_name); + if(attr == NULL) { + attr = relation_attribute_get(right_rel, attribute_name); + if(attr == NULL) { + PRINTF("DB: The projection attribute \"%s\" does not exist in any of the relations to join\n", + attribute_name); + return DB_RELATIONAL_ERROR; + } + } + + if(relation_attribute_add(join_rel, dir, attr->name, attr->domain, + attr->element_size) == NULL) { + PRINTF("DB: Failed to add an attribute to the join relation\n"); + return DB_ALLOCATION_ERROR; + } + + handle->ncolumns++; + } + + return generate_join_result(handle); +} +#endif /* DB_FEATURE_JOIN */ + +tuple_id_t +relation_cardinality(relation_t *rel) +{ + tuple_id_t tuple_id; + + + if(rel->cardinality != INVALID_TUPLE) { + return rel->cardinality; + } + + if(!RELATION_HAS_TUPLES(rel)) { + return 0; + } + + if(DB_ERROR(storage_get_row_amount(rel, &tuple_id))) { + return INVALID_TUPLE; + } + + rel->cardinality = tuple_id; + + PRINTF("DB: Relation %s has cardinality %lu\n", rel->name, + (unsigned long)tuple_id); + + return tuple_id; +} diff --git a/apps/antelope/relation.h b/apps/antelope/relation.h new file mode 100644 index 000000000..a881e6e07 --- /dev/null +++ b/apps/antelope/relation.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * . + * \author + * Nicolas Tsiftes + */ + +#ifndef RELATION_H +#define RELATION_H + +#include +#include + +#include "lib/list.h" + +#include "attribute.h" +#include "db-options.h" +#include "db-types.h" + +typedef uint32_t tuple_id_t; +#define INVALID_TUPLE (tuple_id_t)-1 + +typedef enum db_direction { + DB_MEMORY = 0, + DB_STORAGE = 1 +} db_direction_t; + +#define RELATION_HAS_TUPLES(rel) ((rel)->tuple_storage >= 0) + +/* + * A relation consists of a name, a set of domains, a set of indexes, + * and a set of keys. Each relation must have a primary key. + */ +struct relation { + struct relation *next; + LIST_STRUCT(attributes); + attribute_t *primary_key; + size_t row_length; + attribute_id_t attribute_count; + tuple_id_t cardinality; + tuple_id_t next_row; + db_storage_id_t tuple_storage; + db_direction_t dir; + uint8_t references; + char name[RELATION_NAME_LENGTH + 1]; + char tuple_filename[RELATION_NAME_LENGTH + 1]; +}; + +typedef struct relation relation_t; + +/* API for relations. */ +db_result_t relation_init(void); +db_result_t relation_process_remove(void *); +db_result_t relation_process_select(void *); +db_result_t relation_process_join(void *); +relation_t *relation_load(char *); +db_result_t relation_release(relation_t *); +relation_t *relation_create(char *, db_direction_t); +db_result_t relation_rename(char *, char *); +attribute_t *relation_attribute_add(relation_t *, db_direction_t, char *, + domain_t, size_t); +attribute_t *relation_attribute_get(relation_t *, char *); +db_result_t relation_get_value(relation_t *, attribute_t *, + unsigned char *, attribute_value_t *); +db_result_t relation_attribute_remove(relation_t *, char *); +db_result_t relation_set_primary_key(relation_t *, char *); +db_result_t relation_remove(char *, int); +db_result_t relation_insert(relation_t *, attribute_value_t *); +db_result_t relation_select(void *, relation_t *, void *); +db_result_t relation_join(void *, void *); +tuple_id_t relation_cardinality(relation_t *); + +#endif /* RELATION_H */ diff --git a/apps/antelope/result.c b/apps/antelope/result.c new file mode 100644 index 000000000..e4e9fdf55 --- /dev/null +++ b/apps/antelope/result.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Result acquisition interface for AQL queries. + * \author + * Nicolas Tsiftes + */ + +#include + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +#include "result.h" +#include "storage.h" + +/* db_get_value: Retrieve the value of the specified attribute in + the current tuple. */ +db_result_t +db_get_value(attribute_value_t *value, db_handle_t *handle, unsigned col) +{ + attribute_t *attr; + unsigned char *buf; + + if(col >= handle->ncolumns) { + PRINTF("DB: Requested value (%d) is out of bounds; max = (%d)\n", + col, handle->ncolumns); + return DB_LIMIT_ERROR; + } + + buf = handle->tuple; + + for(attr = list_head(handle->result_rel->attributes); attr != NULL; attr = attr->next) { + if(attr->flags & ATTRIBUTE_FLAG_NO_STORE) { + /* This attribute was used for processing only. */ + continue; + } + PRINTF("Found attribute %s in the result. The element size is %d\n", + attr->name, attr->element_size); + if(col == 0) { + break; + } + --col; + buf += attr->element_size; + } + + if(attr == NULL) { + return DB_NAME_ERROR; + } + + return db_phy_to_value(value, attr, buf); +} + +/* db_phy_to_value: Convert a value from the physical storage + representation to the internal RAM representation. */ +db_result_t +db_phy_to_value(attribute_value_t *value, attribute_t *attr, + unsigned char *ptr) +{ + int int_value; + long long_value; + + value->domain = attr->domain; + + switch(attr->domain) { + case DOMAIN_STRING: + ptr[attr->element_size - 1] = '\0'; + VALUE_STRING(value) = ptr; + PRINTF("DB: %s = %s\n", attr->name, ptr); + break; + case DOMAIN_INT: + int_value = (ptr[0] << 8) | ((unsigned)ptr[1] & 0xff); + VALUE_INT(value) = int_value; + PRINTF("DB: %s = %d\n", attr->name, int_value); + break; + case DOMAIN_LONG: + long_value = (long)ptr[0] << 24 | (long)ptr[1] << 16 | + (long)ptr[2] << 8 | (long)ptr[3]; + VALUE_LONG(value) = long_value; + PRINTF("DB: %s = %ld\n", attr->name, long_value); + break; + default: + return DB_TYPE_ERROR; + } + + return DB_OK; +} + +/* db_value_to_phy: Convert a value from the internal RAM representation + to the physical storage representation. */ +db_result_t +db_value_to_phy(unsigned char *ptr, attribute_t *attr, + attribute_value_t *value) +{ + int int_value; + long long_value; + + switch(attr->domain) { + case DOMAIN_STRING: + memcpy(ptr, VALUE_STRING(value), attr->element_size); + ptr[attr->element_size - 1] = '\0'; + break; + case DOMAIN_INT: + int_value = VALUE_INT(value); + ptr[0] = int_value >> 8; + ptr[1] = int_value & 0xff; + break; + case DOMAIN_LONG: + long_value = VALUE_LONG(value); + ptr[0] = long_value >> 24; + ptr[1] = long_value >> 16; + ptr[2] = long_value >> 8; + ptr[3] = long_value & 0xff; + break; + default: + return DB_TYPE_ERROR; + } + + return DB_OK; +} + +/* db_value_to_long: Convert an attribute value + to a value of the C long type. */ +long +db_value_to_long(attribute_value_t *value) +{ + switch(value->domain) { + case DOMAIN_INT: + return (long)VALUE_INT(value); + case DOMAIN_LONG: + return (long)VALUE_LONG(value); + default: + return 0; + } +} + +/* db_free: Free all the resources that are referenced in a DB handle. */ +db_result_t +db_free(db_handle_t *handle) +{ + if(handle->rel != NULL) { + relation_release(handle->rel); + } + if(handle->result_rel != NULL) { + relation_release(handle->result_rel); + } + if(handle->left_rel != NULL) { + relation_release(handle->left_rel); + } + if(handle->right_rel != NULL) { + relation_release(handle->right_rel); + } + + handle->flags = 0; + + return DB_OK; +} diff --git a/apps/antelope/result.h b/apps/antelope/result.h new file mode 100644 index 000000000..0788b77f6 --- /dev/null +++ b/apps/antelope/result.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Declarations for the result acquisition API. + * \author + * Nicolas Tsiftes + */ + +#ifndef RESULT_H +#define RESULT_H + +#include "index.h" +#include "relation.h" +#include "storage.h" + +#define RESULT_TUPLE_INVALID(tuple) ((tuple) == NULL) +#define RESULT_TUPLE_SIZE(handle) (handle).rel->row_length + +typedef unsigned char *tuple_t; + +#define DB_HANDLE_FLAG_INDEX_STEP 0x01 +#define DB_HANDLE_FLAG_SEARCH_INDEX 0x02 +#define DB_HANDLE_FLAG_PROCESSING 0x04 + +struct db_handle { + index_iterator_t index_iterator; + tuple_id_t tuple_id; + tuple_id_t current_row; + relation_t *rel; + relation_t *left_rel; + relation_t *join_rel; + relation_t *right_rel; + relation_t *result_rel; + attribute_t *left_join_attr; + attribute_t *right_join_attr; + tuple_t tuple; + uint8_t flags; + uint8_t ncolumns; + void *adt; +}; +typedef struct db_handle db_handle_t; + +db_result_t db_get_value(attribute_value_t *value, + db_handle_t *handle, unsigned col); +db_result_t db_phy_to_value(attribute_value_t *value, + attribute_t *attr, unsigned char *ptr); +db_result_t db_value_to_phy(unsigned char *ptr, + attribute_t *attr, attribute_value_t *value); +long db_value_to_long(attribute_value_t *value); +db_result_t db_free(db_handle_t *handle); + +#endif /* !RESULT_H */ diff --git a/apps/antelope/storage-cfs.c b/apps/antelope/storage-cfs.c new file mode 100644 index 000000000..425873e2a --- /dev/null +++ b/apps/antelope/storage-cfs.c @@ -0,0 +1,583 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * Contiki File System (CFS) backend for the storage abstraction + * used by the database. + * \author + * Nicolas Tsiftes + */ + +#include +#include + +#include "cfs/cfs.h" +#include "cfs/cfs-coffee.h" +#include "lib/random.h" + +#define DEBUG DEBUG_NONE +#include "net/uip-debug.h" + +#include "db-options.h" +#include "storage.h" + +struct attribute_record { + char name[ATTRIBUTE_NAME_LENGTH]; + uint8_t domain; + uint8_t element_size; +}; + +struct index_record { + char attribute_name[ATTRIBUTE_NAME_LENGTH]; + char file_name[DB_MAX_FILENAME_LENGTH]; + uint8_t type; +}; + +#if DB_FEATURE_COFFEE +#define DB_COFFEE_CATALOG_SIZE RELATION_NAME_LENGTH + \ + (DB_MAX_ATTRIBUTES_PER_RELATION * \ + sizeof(struct attribute_record)) +#endif + +#define ROW_XOR 0xf6U + +static void +merge_strings(char *dest, char *prefix, char *suffix) +{ + strcpy(dest, prefix); + strcat(dest, suffix); +} + +char * +storage_generate_file(char *prefix, unsigned long size) +{ + static char filename[ATTRIBUTE_NAME_LENGTH + sizeof(".ffff")]; +#if !DB_FEATURE_COFFEE + int fd; +#endif + + snprintf(filename, sizeof(filename), "%s.%x", prefix, + (unsigned)(random_rand() & 0xffff)); + +#if DB_FEATURE_COFFEE + PRINTF("DB: Reserving %lu bytes in %s\n", size, filename); + if(cfs_coffee_reserve(filename, size) < 0) { + PRINTF("DB: Failed to reserve\n"); + return NULL; + } + return filename; +#else + fd = cfs_open(filename, CFS_WRITE); + cfs_close(fd); + return fd < 0 ? NULL : filename; +#endif /* DB_FEATURE_COFFEE */ +} + +db_result_t +storage_load(relation_t *rel) +{ + PRINTF("DB: Opening the tuple file %s\n", rel->tuple_filename); + rel->tuple_storage = cfs_open(rel->tuple_filename, + CFS_READ | CFS_WRITE | CFS_APPEND); + if(rel->tuple_storage < 0) { + PRINTF("DB: Failed to open the tuple file\n"); + return DB_STORAGE_ERROR; + } + + return DB_OK; +} + +void +storage_unload(relation_t *rel) +{ + if(RELATION_HAS_TUPLES(rel)) { + PRINTF("DB: Unload tuple file %s\n", rel->tuple_filename); + + cfs_close(rel->tuple_storage); + rel->tuple_storage = -1; + } +} + +db_result_t +storage_get_relation(relation_t *rel, char *name) +{ + int fd; + int r; + int i; + struct attribute_record record; + db_result_t result; + + fd = cfs_open(name, CFS_READ); + if(fd < 0) { + return DB_STORAGE_ERROR; + } + + r = cfs_read(fd, rel->name, sizeof(rel->name)); + if(r != sizeof(rel->name)) { + cfs_close(fd); + PRINTF("DB: Failed to read name, got %d of %d bytes\n", + r, sizeof(rel->name)); + return DB_STORAGE_ERROR; + } + + r = cfs_read(fd, rel->tuple_filename, sizeof(rel->tuple_filename)); + if(r != sizeof(rel->name)) { + cfs_close(fd); + PRINTF("DB: Failed to read tuple filename\n"); + return DB_STORAGE_ERROR; + } + + rel->tuple_filename[sizeof(rel->tuple_filename) - 1] ^= ROW_XOR; + + /* Read attribute records. */ + result = DB_OK; + for(i = 0;; i++) { + r = cfs_read(fd, &record, sizeof(record)); + if(r == 0) { + break; + } + if(r != sizeof(record)) { + PRINTF("DB: Failed to read attribute record %d (r = %d)\n", i, r); + result = DB_STORAGE_ERROR; + break; + } + + if(relation_attribute_add(rel, DB_MEMORY, record.name, + record.domain, record.element_size) == NULL) { + PRINTF("DB: Failed to add the attribute %s\n", record.name); + result = DB_STORAGE_ERROR; + break; + } + } + + PRINTF("DB: Read %d attributes\n", i); + + cfs_close(fd); + return result; +} + +db_result_t +storage_put_relation(relation_t *rel) +{ + int fd; + int r; + char *str; + unsigned char *last_byte; + + PRINTF("DB: put_relation(%s)\n", rel->name); + + cfs_remove(rel->name); + +#if DB_FEATURE_COFFEE + cfs_coffee_reserve(rel->name, DB_COFFEE_CATALOG_SIZE); +#endif + + fd = cfs_open(rel->name, CFS_WRITE | CFS_READ); + if(fd < 0) { + return DB_STORAGE_ERROR; + } + + r = cfs_write(fd, rel->name, sizeof(rel->name)); + if(r != sizeof(rel->name)) { + cfs_close(fd); + cfs_remove(rel->name); + return DB_STORAGE_ERROR; + } + + if(rel->tuple_filename[0] == '\0') { + str = storage_generate_file("tuple", DB_COFFEE_RESERVE_SIZE); + if(str == NULL) { + cfs_close(fd); + cfs_remove(rel->name); + return DB_STORAGE_ERROR; + } + + strncpy(rel->tuple_filename, str, sizeof(rel->tuple_filename) - 1); + rel->tuple_filename[sizeof(rel->tuple_filename) - 1] = '\0'; + } + + /* + * Encode the last byte to ensure that the filename is not + * null-terminated. This will make the Coffee FS determine + * the correct length when re-opening the file. + */ + last_byte = (unsigned char *)&rel->tuple_filename[sizeof(rel->tuple_filename) - 1]; + *last_byte ^= ROW_XOR; + + r = cfs_write(fd, rel->tuple_filename, sizeof(rel->tuple_filename)); + + *last_byte ^= ROW_XOR; + + if(r != sizeof(rel->tuple_filename)) { + cfs_close(fd); + cfs_remove(rel->tuple_filename); + return DB_STORAGE_ERROR; + } + + PRINTF("DB: Saved relation %s\n", rel->name); + + cfs_close(fd); + return DB_OK; +} + +db_result_t +storage_put_attribute(relation_t *rel, attribute_t *attr) +{ + int fd; + struct attribute_record record; + int r; + + PRINTF("DB: put_attribute(%s, %s)\n", rel->name, attr->name); + + fd = cfs_open(rel->name, CFS_WRITE | CFS_APPEND); + if(fd < 0) { + return DB_STORAGE_ERROR; + } + + memset(&record.name, 0, sizeof(record.name)); + memcpy(record.name, attr->name, sizeof(record.name)); + record.domain = attr->domain; + record.element_size = attr->element_size; + r = cfs_write(fd, &record, sizeof(record)); + if(r != sizeof(record)) { + cfs_close(fd); + cfs_remove(rel->name); + return DB_STORAGE_ERROR; + } + + cfs_close(fd); + return DB_OK; +} + +db_result_t +storage_drop_relation(relation_t *rel, int remove_tuples) +{ + if(remove_tuples && RELATION_HAS_TUPLES(rel)) { + cfs_remove(rel->tuple_filename); + } + return cfs_remove(rel->name) < 0 ? DB_STORAGE_ERROR : DB_OK; +} + +#if DB_FEATURE_REMOVE +db_result_t +storage_rename_relation(char *old_name, char *new_name) +{ + db_result_t result; + int old_fd; + int new_fd; + int r; + char buf[64]; + + result = DB_STORAGE_ERROR; + old_fd = new_fd = -1; + + old_fd = cfs_open(old_name, CFS_READ); + new_fd = cfs_open(new_name, CFS_WRITE); + if(old_fd < 0 || new_fd < 0) { + goto error; + } + + for(;;) { + r = cfs_read(old_fd, buf, sizeof(buf)); + if(r < 0) { + goto error; + } else if(r == 0) { + break; + } + if(cfs_write(new_fd, buf, r) != r) { + goto error; + } + }; + + cfs_remove(old_name); + result = DB_OK; + +error: + cfs_close(old_fd); + cfs_close(new_fd); + + if(result != DB_OK) { + cfs_remove(new_name); + } + return result; +} +#endif /* DB_FEATURE_REMOVE */ + +db_result_t +storage_get_index(index_t *index, relation_t *rel, attribute_t *attr) +{ + char filename[INDEX_NAME_LENGTH]; + int fd; + int r; + struct index_record record; + db_result_t result; + + merge_strings(filename, rel->name, INDEX_NAME_SUFFIX); + + fd = cfs_open(filename, CFS_READ); + if(fd < 0) { + return DB_STORAGE_ERROR; + } + + for(result = DB_STORAGE_ERROR;;) { + r = cfs_read(fd, &record, sizeof(record)); + if(r < sizeof(record)) { + break; + } + if(strcmp(attr->name, record.attribute_name) == 0) { + PRINTF("DB: Found the index record for %s.%s: type %d, filename %s\n", + rel->name, attr->name, record.type, record.file_name); + index->type = record.type; + memcpy(index->descriptor_file, record.file_name, + sizeof(index->descriptor_file)); + result = DB_OK; + } + } + + cfs_close(fd); + + return result; +} + +db_result_t +storage_put_index(index_t *index) +{ + char filename[INDEX_NAME_LENGTH]; + int fd; + int r; + struct index_record record; + db_result_t result; + + merge_strings(filename, index->rel->name, INDEX_NAME_SUFFIX); + + fd = cfs_open(filename, CFS_WRITE | CFS_APPEND); + if(fd < 0) { + return DB_STORAGE_ERROR; + } + + strcpy(record.attribute_name, index->attr->name); + memcpy(record.file_name, index->descriptor_file, sizeof(record.file_name)); + record.type = index->type; + + result = DB_OK; + r = cfs_write(fd, &record, sizeof(record)); + if(r < sizeof(record)) { + result = DB_STORAGE_ERROR; + } else { + PRINTF("DB: Wrote an index record for %s.%s, type %d\n", + index->rel->name, index->attr->name, record.type); + } + + cfs_close(fd); + + return result; +} + +db_result_t +storage_get_row(relation_t *rel, tuple_id_t *tuple_id, storage_row_t row) +{ + int r; + tuple_id_t nrows; + + if(DB_ERROR(storage_get_row_amount(rel, &nrows))) { + return DB_STORAGE_ERROR; + } + + if(*tuple_id >= nrows) { + return DB_FINISHED; + } + + if(cfs_seek(rel->tuple_storage, *tuple_id * rel->row_length, CFS_SEEK_SET) == + (cfs_offset_t)-1) { + return DB_STORAGE_ERROR; + } + + r = cfs_read(rel->tuple_storage, row, rel->row_length); + if(r < 0) { + PRINTF("DB: Reading failed on fd %d\n", rel->tuple_storage); + return DB_STORAGE_ERROR; + } else if(r == 0) { + return DB_FINISHED; + } else if(r < rel->row_length) { + PRINTF("DB: Incomplete record: %d < %d\n", r, rel->row_length); + return DB_STORAGE_ERROR; + } + + row[rel->row_length - 1] ^= ROW_XOR; + + PRINTF("DB: Read %d bytes from relation %s\n", rel->row_length, rel->name); + + return DB_OK; +} + +db_result_t +storage_put_row(relation_t *rel, storage_row_t row) +{ + cfs_offset_t end; + unsigned remaining; + int r; + unsigned char *last_byte; +#if DB_FEATURE_INTEGRITY + int missing_bytes; + char buf[rel->row_length]; +#endif + + end = cfs_seek(rel->tuple_storage, 0, CFS_SEEK_END); + if(end == (cfs_offset_t)-1) { + return DB_STORAGE_ERROR; + } + +#if DB_FEATURE_INTEGRITY + missing_bytes = end % rel->row_length; + if(missing_bytes > 0) { + memset(buf, 0xff, sizeof(buf)); + r = cfs_write(rel->tuple_storage, buf, sizeof(buf)); + if(r != missing_bytes) { + return DB_STORAGE_ERROR; + } + } +#endif + + /* Ensure that last written byte is separated from 0, to make file + lengths correct in Coffee. */ + last_byte = row + rel->row_length - 1; + *last_byte ^= ROW_XOR; + + remaining = rel->row_length; + do { + r = cfs_write(rel->tuple_storage, row, remaining); + if(r < 0) { + PRINTF("DB: Failed to store %u bytes\n", remaining); + *last_byte ^= ROW_XOR; + return DB_STORAGE_ERROR; + } + row += r; + remaining -= r; + } while(remaining > 0); + + PRINTF("DB: Stored a of %d bytes\n", rel->row_length); + + *last_byte ^= ROW_XOR; + + return DB_OK; +} + +db_result_t +storage_get_row_amount(relation_t *rel, tuple_id_t *amount) +{ + cfs_offset_t offset; + + if(rel->row_length == 0) { + *amount = 0; + } else { + offset = cfs_seek(rel->tuple_storage, 0, CFS_SEEK_END); + if(offset == (cfs_offset_t)-1) { + return DB_STORAGE_ERROR; + } + + *amount = (tuple_id_t)(offset / rel->row_length); + } + + return DB_OK; +} + +db_storage_id_t +storage_open(const char *filename) +{ + int fd; + + fd = cfs_open(filename, CFS_WRITE | CFS_READ); +#if DB_FEATURE_COFFEE + if(fd >= 0) { + cfs_coffee_set_io_semantics(fd, CFS_COFFEE_IO_FLASH_AWARE); + } +#endif + return fd; +} + +void +storage_close(db_storage_id_t fd) +{ + cfs_close(fd); +} + +db_result_t +storage_read(db_storage_id_t fd, + void *buffer, unsigned long offset, unsigned length) +{ + char *ptr; + int r; + + /* Extend the file if necessary, so that previously unwritten bytes + will be read in as zeroes. */ + if(cfs_seek(fd, offset + length, CFS_SEEK_SET) == (cfs_offset_t)-1) { + return DB_STORAGE_ERROR; + } + + if(cfs_seek(fd, offset, CFS_SEEK_SET) == (cfs_offset_t)-1) { + return DB_STORAGE_ERROR; + } + + ptr = buffer; + while(length > 0) { + r = cfs_read(fd, ptr, length); + if(r <= 0) { + return DB_STORAGE_ERROR; + } + ptr += r; + length -= r; + } + + return DB_OK; +} + +db_result_t +storage_write(db_storage_id_t fd, + void *buffer, unsigned long offset, unsigned length) +{ + char *ptr; + int r; + + if(cfs_seek(fd, offset, CFS_SEEK_SET) == (cfs_offset_t)-1) { + return DB_STORAGE_ERROR; + } + + ptr = buffer; + while(length > 0) { + r = cfs_write(fd, ptr, length); + if(r <= 0) { + return DB_STORAGE_ERROR; + } + ptr += r; + length -= r; + } + + return DB_OK; +} diff --git a/apps/antelope/storage.h b/apps/antelope/storage.h new file mode 100644 index 000000000..b40496c8e --- /dev/null +++ b/apps/antelope/storage.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2010, Swedish Institute of Computer Science + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/** + * \file + * The storage interface used by the database. + * \author + * Nicolas Tsiftes + */ + +#ifndef STORAGE_H +#define STORAGE_H + +#include "index.h" +#include "relation.h" + +#define TABLE_NAME_SUFFIX ".row" +#define TABLE_NAME_LENGTH (RELATION_NAME_LENGTH + \ + sizeof(TABLE_NAME_SUFFIX) - 1) + +#define INDEX_NAME_SUFFIX ".idx" +#define INDEX_NAME_LENGTH (RELATION_NAME_LENGTH + \ + sizeof(INDEX_NAME_SUFFIX) - 1) + +typedef unsigned char * storage_row_t; + +char *storage_generate_file(char *, unsigned long); + +db_result_t storage_load(relation_t *); +void storage_unload(relation_t *); + +db_result_t storage_get_relation(relation_t *, char *); +db_result_t storage_put_relation(relation_t *); +db_result_t storage_drop_relation(relation_t *, int); +db_result_t storage_rename_relation(char *, char *); + +db_result_t storage_put_attribute(relation_t *, attribute_t *); +db_result_t storage_get_index(index_t *, relation_t *, attribute_t *); +db_result_t storage_put_index(index_t *); + +db_result_t storage_get_row(relation_t *, tuple_id_t *, storage_row_t); +db_result_t storage_put_row(relation_t *, storage_row_t); +db_result_t storage_get_row_amount(relation_t *, tuple_id_t *); + +db_storage_id_t storage_open(const char *); +void storage_close(db_storage_id_t); +db_result_t storage_read(db_storage_id_t, void *, unsigned long, unsigned); +db_result_t storage_write(db_storage_id_t, void *, unsigned long, unsigned); + +#endif /* STORAGE_H */