utils: Add a generic hashmap and tests for it

In order to be able to use a generic hashmap in things such
as the fs_backing_store we want one to exist.  Here it is,
along with some moderately comprehensive tests.

Current limits:

1. All keys and values are owned by the hashmap
2. The hashmap, while capable of different bucket counts
   only has a single fixed count for now

Signed-off-by: Daniel Silverstone <dsilvers@digital-scurf.org>
This commit is contained in:
Daniel Silverstone 2020-02-23 15:12:37 +00:00
parent f3b7a0c44c
commit a653e1e86e
No known key found for this signature in database
GPG Key ID: C30DF439F2987D74
4 changed files with 824 additions and 0 deletions

View File

@ -7,6 +7,7 @@ TESTS := \
nsoption \
bloom \
hashtable \
hashmap \
urlescape \
utils \
messages \
@ -52,6 +53,10 @@ bloom_SRCS := utils/bloom.c test/bloom.c
# hash table test sources
hashtable_SRCS := utils/hashtable.c test/log.c test/hashtable.c
# hashmap test sources
hashmap_SRCS := $(NSURL_SOURCES) utils/hashmap.c utils/corestrings.c test/log.c test/hashmap.c
hashmap_LD := -lmalloc_fig
# url escape test sources
urlescape_SRCS := utils/url.c test/log.c test/urlescape.c

467
test/hashmap.c Normal file
View File

@ -0,0 +1,467 @@
/*
* Copyright 2020 Daniel Silverstone <dsilvers@netsurf-browser.org>
* Copyright 2016 Vincent Sanders <vince@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
* Tests for hashmap.
*
* In part, borrows from the corestrings tests
*/
#include "utils/config.h"
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <check.h>
#include <limits.h>
#include <libwapcaplet/libwapcaplet.h>
#include "utils/nsurl.h"
#include "utils/corestrings.h"
#include "utils/hashmap.h"
#include "test/malloc_fig.h"
/* Low level fixtures */
static void
corestring_create(void)
{
ck_assert(corestrings_init() == NSERROR_OK);
}
/**
* iterator for any remaining strings in teardown fixture
*/
static void
netsurf_lwc_iterator(lwc_string *str, void *pw)
{
fprintf(stderr,
"[%3u] %.*s",
str->refcnt,
(int)lwc_string_length(str),
lwc_string_data(str));
}
static void
corestring_teardown(void)
{
corestrings_fini();
lwc_iterate_strings(netsurf_lwc_iterator, NULL);
}
/* Infra */
static ssize_t keys;
static ssize_t values;
typedef struct {
nsurl *key;
} hashmap_test_value_t;
static void *
key_clone(void *key)
{
/* Pretend cloning costs memory so that it can fail for
* testing error return pathways
*/
void *temp = malloc(1);
if (temp == NULL) return NULL;
free(temp);
/* In reality we just ref the nsurl */
keys++;
return nsurl_ref((nsurl *)key);
}
static void
key_destroy(void *key)
{
keys--;
nsurl_unref((nsurl *)key);
}
static uint32_t
key_hash(void *key)
{
/* Deliberately bad hash.
* returns 0, 1, 2, or 3 to force bucket chaining
*/
return nsurl_hash((nsurl *)key) & 3;
}
static bool
key_eq(void *key1, void *key2)
{
return nsurl_compare((nsurl *)key1, (nsurl*)key2, NSURL_COMPLETE);
}
static void *
value_alloc(void *key)
{
hashmap_test_value_t *ret = malloc(sizeof(hashmap_test_value_t));
if (ret == NULL)
return NULL;
ret->key = (nsurl *)key;
values++;
return ret;
}
static void
value_destroy(void *value)
{
hashmap_test_value_t *val = value;
/* Do nothing for now */
free(val);
values--;
}
static hashmap_parameters_t test_params = {
.key_clone = key_clone,
.key_hash = key_hash,
.key_eq = key_eq,
.key_destroy = key_destroy,
.value_alloc = value_alloc,
.value_destroy = value_destroy,
};
/* Fixtures for basic tests */
static hashmap_t *test_hashmap = NULL;
static void
basic_fixture_create(void)
{
corestring_create();
test_hashmap = hashmap_create(&test_params);
ck_assert(test_hashmap != NULL);
ck_assert_int_eq(keys, 0);
ck_assert_int_eq(values, 0);
}
static void
basic_fixture_teardown(void)
{
hashmap_destroy(test_hashmap);
test_hashmap = NULL;
ck_assert_int_eq(keys, 0);
ck_assert_int_eq(values, 0);
corestring_teardown();
}
/* basic api tests */
START_TEST(empty_hashmap_create_destroy)
{
/* Do nothing in here, all the checks are in the fixture */
}
END_TEST
START_TEST(check_not_present)
{
/* We're checking for a key which should not be present */
ck_assert(hashmap_lookup(test_hashmap, corestring_nsurl_about_blank) == NULL);
}
END_TEST
START_TEST(insert_works)
{
hashmap_test_value_t *value = hashmap_insert(test_hashmap, corestring_nsurl_about_blank);
ck_assert(value != NULL);
ck_assert(value->key == corestring_nsurl_about_blank);
}
END_TEST
START_TEST(remove_not_present)
{
ck_assert(hashmap_remove(test_hashmap, corestring_nsurl_about_blank) == false);
}
END_TEST
START_TEST(insert_then_remove)
{
hashmap_test_value_t *value = hashmap_insert(test_hashmap, corestring_nsurl_about_blank);
ck_assert(value != NULL);
ck_assert(value->key == corestring_nsurl_about_blank);
ck_assert_int_eq(keys, 1);
ck_assert_int_eq(values, 1);
ck_assert(hashmap_remove(test_hashmap, corestring_nsurl_about_blank) == true);
ck_assert_int_eq(keys, 0);
ck_assert_int_eq(values, 0);
}
END_TEST
START_TEST(insert_then_lookup)
{
hashmap_test_value_t *value = hashmap_insert(test_hashmap, corestring_nsurl_about_blank);
ck_assert(value != NULL);
ck_assert(value->key == corestring_nsurl_about_blank);
ck_assert(hashmap_lookup(test_hashmap, corestring_nsurl_about_blank) == value);
}
END_TEST
static TCase *basic_api_case_create(void)
{
TCase *tc;
tc = tcase_create("Basic API");
tcase_add_unchecked_fixture(tc,
basic_fixture_create,
basic_fixture_teardown);
tcase_add_test(tc, empty_hashmap_create_destroy);
tcase_add_test(tc, check_not_present);
tcase_add_test(tc, insert_works);
tcase_add_test(tc, remove_not_present);
tcase_add_test(tc, insert_then_remove);
tcase_add_test(tc, insert_then_lookup);
return tc;
}
/* Chain verification test suite */
typedef struct {
const char *url;
nsurl *nsurl;
} case_pair;
/* The hobbled hash has only 4 values
* By having at least 12 test cases, we can be confident that
* at worst they'll all be on one chain, but at best there'll
* be four chains of 3 entries which means we should be able
* to validate prevptr and next in all cases.
*/
static case_pair chain_pairs[] = {
{ "https://www.google.com/", NULL },
{ "https://www.google.co.uk/", NULL },
{ "https://www.netsurf-browser.org/", NULL },
{ "http://www.google.com/", NULL },
{ "http://www.google.co.uk/", NULL },
{ "http://www.netsurf-browser.org/", NULL },
{ "file:///tmp/test.html", NULL },
{ "file:///tmp/inner.html", NULL },
{ "about:blank", NULL },
{ "about:welcome", NULL },
{ "about:testament", NULL },
{ "resources:default.css", NULL },
{ NULL, NULL }
};
static void
chain_fixture_create(void)
{
case_pair *chain_case = chain_pairs;
basic_fixture_create();
while (chain_case->url != NULL) {
ck_assert(nsurl_create(chain_case->url, &chain_case->nsurl) == NSERROR_OK);
chain_case++;
}
}
static void
chain_fixture_teardown(void)
{
case_pair *chain_case = chain_pairs;
while (chain_case->url != NULL) {
nsurl_unref(chain_case->nsurl);
chain_case->nsurl = NULL;
chain_case++;
}
basic_fixture_teardown();
}
START_TEST(chain_add_remove_all)
{
case_pair *chain_case;
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
ck_assert(hashmap_lookup(test_hashmap, chain_case->nsurl) == NULL);
ck_assert(hashmap_insert(test_hashmap, chain_case->nsurl) != NULL);
ck_assert(hashmap_lookup(test_hashmap, chain_case->nsurl) != NULL);
ck_assert(hashmap_remove(test_hashmap, chain_case->nsurl) == true);
}
ck_assert_int_eq(keys, 0);
ck_assert_int_eq(values, 0);
}
END_TEST
START_TEST(chain_add_all_remove_all)
{
case_pair *chain_case;
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
ck_assert(hashmap_lookup(test_hashmap, chain_case->nsurl) == NULL);
ck_assert(hashmap_insert(test_hashmap, chain_case->nsurl) != NULL);
}
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
ck_assert(hashmap_remove(test_hashmap, chain_case->nsurl) == true);
}
ck_assert_int_eq(keys, 0);
ck_assert_int_eq(values, 0);
}
END_TEST
START_TEST(chain_add_all_twice_remove_all)
{
case_pair *chain_case;
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
ck_assert(hashmap_lookup(test_hashmap, chain_case->nsurl) == NULL);
ck_assert(hashmap_insert(test_hashmap, chain_case->nsurl) != NULL);
}
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
ck_assert(hashmap_lookup(test_hashmap, chain_case->nsurl) != NULL);
ck_assert(hashmap_insert(test_hashmap, chain_case->nsurl) != NULL);
}
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
ck_assert(hashmap_remove(test_hashmap, chain_case->nsurl) == true);
}
ck_assert_int_eq(keys, 0);
ck_assert_int_eq(values, 0);
}
END_TEST
#define CHAIN_TEST_MALLOC_COUNT_MAX 60
START_TEST(chain_add_all_remove_all_alloc)
{
bool failed = false;
case_pair *chain_case;
malloc_limit(_i);
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
if (hashmap_insert(test_hashmap, chain_case->nsurl) == NULL) {
failed = true;
}
}
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
if (hashmap_insert(test_hashmap, chain_case->nsurl) == NULL) {
failed = true;
}
}
for (chain_case = chain_pairs;
chain_case->url != NULL;
chain_case++) {
hashmap_remove(test_hashmap, chain_case->nsurl);
}
malloc_limit(UINT_MAX);
ck_assert_int_eq(keys, 0);
ck_assert_int_eq(values, 0);
if (_i < CHAIN_TEST_MALLOC_COUNT_MAX) {
ck_assert(failed);
} else {
ck_assert(!failed);
}
}
END_TEST
static TCase *chain_case_create(void)
{
TCase *tc;
tc = tcase_create("Bucket Chain tests");
tcase_add_unchecked_fixture(tc,
chain_fixture_create,
chain_fixture_teardown);
tcase_add_test(tc, chain_add_remove_all);
tcase_add_test(tc, chain_add_all_remove_all);
tcase_add_test(tc, chain_add_all_twice_remove_all);
tcase_add_loop_test(tc, chain_add_all_remove_all_alloc, 0, CHAIN_TEST_MALLOC_COUNT_MAX + 1);
return tc;
}
/*
* hashmap test suite creation
*/
static Suite *hashmap_suite_create(void)
{
Suite *s;
s = suite_create("Hashmap");
suite_add_tcase(s, basic_api_case_create());
suite_add_tcase(s, chain_case_create());
return s;
}
int main(int argc, char **argv)
{
int number_failed;
SRunner *sr;
sr = srunner_create(hashmap_suite_create());
srunner_run_all(sr, CK_ENV);
number_failed = srunner_ntests_failed(sr);
srunner_free(sr);
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
}

215
utils/hashmap.c Normal file
View File

@ -0,0 +1,215 @@
/*
* Copyright 2020 Daniel Silverstone <dsilvers@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <string.h>
#include "utils/hashmap.h"
/**
* The default number of buckets in the hashmaps we create.
*/
#define DEFAULT_HASHMAP_BUCKETS (4091)
/**
* Hashmaps have chains of entries in buckets.
*/
typedef struct hashmap_entry_s {
struct hashmap_entry_s **prevptr;
struct hashmap_entry_s *next;
void *key;
void *value;
uint32_t key_hash;
} hashmap_entry_t;
/**
* The content of a hashmap
*/
struct hashmap_s {
/**
* The parameters to be used for this hashmap
*/
hashmap_parameters_t *params;
/**
* The buckets for the hash chains
*/
hashmap_entry_t **buckets;
/**
* The number of buckets in this map
*/
uint32_t bucket_count;
};
/* Exported function, documented in hashmap.h */
hashmap_t *
hashmap_create(hashmap_parameters_t *params)
{
hashmap_t *ret = malloc(sizeof(hashmap_t));
ret->params = params;
ret->bucket_count = DEFAULT_HASHMAP_BUCKETS;
ret->buckets = malloc(ret->bucket_count * sizeof(hashmap_entry_t *));
memset(ret->buckets, 0, ret->bucket_count * sizeof(hashmap_entry_t *));
if (ret->buckets == NULL) {
free(ret);
return NULL;
}
return ret;
}
/* Exported function, documented in hashmap.h */
void
hashmap_destroy(hashmap_t *hashmap)
{
uint32_t bucket;
hashmap_entry_t *entry;
for (bucket = 0; bucket < hashmap->bucket_count; bucket++) {
for (entry = hashmap->buckets[bucket];
entry != NULL;
entry = entry->next) {
hashmap->params->value_destroy(entry->value);
hashmap->params->key_destroy(entry->key);
free(entry);
}
}
free(hashmap->buckets);
free(hashmap);
}
/* Exported function, documented in hashmap.h */
void *
hashmap_lookup(hashmap_t *hashmap, void *key)
{
uint32_t hash = hashmap->params->key_hash(key);
hashmap_entry_t *entry = hashmap->buckets[hash % hashmap->bucket_count];
for(;entry != NULL; entry = entry->next) {
if (entry->key_hash == hash) {
if (hashmap->params->key_eq(key, entry->key)) {
return entry->value;
}
}
}
return NULL;
}
/* Exported function, documented in hashmap.h */
void *
hashmap_insert(hashmap_t *hashmap, void *key)
{
uint32_t hash = hashmap->params->key_hash(key);
uint32_t bucket = hash % hashmap->bucket_count;
hashmap_entry_t *entry = hashmap->buckets[bucket];
void *new_key, *new_value;
for(;entry != NULL; entry = entry->next) {
if (entry->key_hash == hash) {
if (hashmap->params->key_eq(key, entry->key)) {
/* This key is already here */
new_key = hashmap->params->key_clone(key);
if (new_key == NULL) {
/* Allocation failed */
return NULL;
}
new_value = hashmap->params->value_alloc(entry->key);
if (new_value == NULL) {
/* Allocation failed */
hashmap->params->key_destroy(new_key);
return NULL;
}
hashmap->params->value_destroy(entry->value);
hashmap->params->key_destroy(entry->key);
entry->value = new_value;
entry->key = new_key;
return entry->value;
}
}
}
/* The key was not found in the map, so allocate a new entry */
entry = malloc(sizeof(*entry));
if (entry == NULL) {
return NULL;
}
memset(entry, 0, sizeof(*entry));
entry->key = hashmap->params->key_clone(key);
if (entry->key == NULL) {
goto err;
}
entry->key_hash = hash;
entry->value = hashmap->params->value_alloc(entry->key);
if (entry->value == NULL) {
goto err;
}
entry->prevptr = &(hashmap->buckets[bucket]);
entry->next = hashmap->buckets[bucket];
if (entry->next != NULL) {
entry->next->prevptr = &entry->next;
}
hashmap->buckets[bucket] = entry;
return entry->value;
err:
if (entry->value != NULL)
hashmap->params->value_destroy(entry->value);
if (entry->key != NULL)
hashmap->params->key_destroy(entry->key);
free(entry);
return NULL;
}
/* Exported function, documented in hashmap.h */
bool
hashmap_remove(hashmap_t *hashmap, void *key)
{
uint32_t hash = hashmap->params->key_hash(key);
hashmap_entry_t *entry = hashmap->buckets[hash % hashmap->bucket_count];
for(;entry != NULL; entry = entry->next) {
if (entry->key_hash == hash) {
if (hashmap->params->key_eq(key, entry->key)) {
hashmap->params->value_destroy(entry->value);
hashmap->params->key_destroy(entry->key);
if (entry->next != NULL) {
entry->next->prevptr = entry->prevptr;
}
*entry->prevptr = entry->next;
free(entry);
return true;
}
}
}
return false;
}

137
utils/hashmap.h Normal file
View File

@ -0,0 +1,137 @@
/*
* Copyright 2020 Daniel Silverstone <dsilvers@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NETSURF_HASHMAP_H
#define NETSURF_HASHMAP_H
#include <stdint.h>
#include <stdbool.h>
/**
* Generic hashmap.
*
* Hashmaps take ownership of the keys inserted into them by means of a
* clone function in their parameters. They also manage the value memory
* directly.
*/
typedef struct hashmap_s hashmap_t;
/**
* Parameters for hashmaps
*/
typedef struct {
/**
* A function which when called will clone a key and give
* ownership of the returned object to the hashmap
*/
void * (*key_clone)(void *key);
/**
* A function which when given a key will return its hash.
*/
uint32_t (*key_hash)(void *key);
/**
* A function to compare two keys and return if they are equal.
* Note: identity is not necessary, nor strict equality, so long
* as the function is a full equality model.
* (i.e. key1 == key2 => key2 == key1)
*/
bool (*key_eq)(void *key1, void *key2);
/**
* A function which when called will destroy a key object
*/
void (*key_destroy)(void *key);
/**
* A function which when called will allocate a value object
*/
void * (*value_alloc)(void *key);
/**
* A function which when called will destroy a value object
*/
void (*value_destroy)(void *value);
} hashmap_parameters_t;
/**
* Create a hashmap
*
* The provided hashmap parameter table will be used for map operations
* which need to allocate/free etc.
*
* \param params The hashmap parameters for this map
*/
hashmap_t* hashmap_create(hashmap_parameters_t *params);
/**
* Destroy a hashmap
*
* After this, all keys and values will have been destroyed and all memory
* associated with this hashmap will be invalidated.
*
* \param hashmap The hashmap to destroy
*/
void hashmap_destroy(hashmap_t *hashmap);
/**
* Look up a key in a hashmap
*
* If the key has an associated value in the hashmap then the pointer to it
* is returned, otherwise NULL.
*
* \param hashmap The hashmap to look up the key inside
* \param key The key to look up in the hashmap
* \return A pointer to the value if found, NULL otherwise
*/
void* hashmap_lookup(hashmap_t *hashmap, void *key);
/**
* Create an entry in a hashmap
*
* This creates a blank value using the parameters and then associates it with
* a clone of the given key, inserting it into the hashmap. If a value was
* present for the given key already, then it is destroyed first.
*
* NOTE: If allocation of the new value object fails, then any existing entry
* will be left alone, but NULL will be returned.
*
* \param hashmap The hashmap to insert into
* \param key The key to insert an entry for
* \return The value pointer for that key, or NULL if allocation failed.
*/
void *hashmap_insert(hashmap_t *hashmap, void *key);
/**
* Remove an entry from the hashmap
*
* This will remove the entry for the given key from the hashmap
* If there is no such entry, this will safely do nothing.
* The value associated with the entry will be destroyed and so should not
* be used beyond calling this function.
*
* \param hashmap The hashmap to remove the entry from
* \param key The key to remove the entry for
* \return true if an entry was removed, false otherwise
*/
bool hashmap_remove(hashmap_t *hashmap, void *key);
#endif