Add unit test for bloom filter

Adds check based unit test for teh bloom filter implementation. This
is based on Roberts original test code in utils/bloom.c and uses
/usr/share/dict as a source of strings to check bloom creation, no
false negatives and the false positive rate is below 15%.
This commit is contained in:
Vincent Sanders 2015-07-10 10:48:26 +01:00
parent 7b2d15a036
commit 94b5c95676
3 changed files with 202 additions and 59 deletions

View File

@ -1,7 +1,7 @@
#
# NetSurf unit tests
TESTS := nsurl urldbtest nsoption #llcache
TESTS := nsurl urldbtest nsoption bloom #llcache
# nsurl sources and flags
nsurl_SRCS := utils/corestrings.c utils/nsurl.c utils/idna.c \
@ -28,7 +28,8 @@ llcache_SRCS := content/fetch.c content/fetchers/curl.c \
nsoption_SRCS := utils/nsoption.c \
test/log.c test/nsoption.c
bloom_SRCS := utils/bloom.c \
test/bloom.c
# Coverage builds need additional flags
ifeq ($(MAKECMDGOALS),coverage)

195
test/bloom.c Normal file
View File

@ -0,0 +1,195 @@
/*
* Copyright 2015 Vincent Sanders <vince@netsurf-browser.org>
* Copyright 2013 Rob Kendrick <rjek@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file
* Test bloom filter operations.
*
* Implementation taken from original test rig in bloom filter code
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <check.h>
#include "utils/bloom.h"
#define BLOOM_SIZE 8192
#define FALSE_POSITIVE_RATE 15 /* acceptable false positive percentage rate */
static struct bloom_filter *dict_bloom;
/* Fixtures */
/**
* create dictionary bloom
*
* bloom constructed from the first BLOOM_SIZE entries of the
* dictionary
*/
static void dict_bloom_create(void)
{
FILE *dictf;
char buf[BUFSIZ];
int i;
dictf = fopen("/usr/share/dict/words", "r");
ck_assert(dictf != NULL);
dict_bloom = bloom_create(BLOOM_SIZE);
ck_assert(dict_bloom != NULL);
for (i = 0; i < BLOOM_SIZE; i++) {
fscanf(dictf, "%s", buf);
bloom_insert_str(dict_bloom, buf, strlen(buf));
}
fclose(dictf);
}
static void dict_bloom_teardown(void)
{
bloom_destroy(dict_bloom);
}
/* Tests */
START_TEST(bloom_create_test)
{
struct bloom_filter *b;
b = bloom_create(BLOOM_SIZE);
bloom_insert_str(b, "NetSurf", 7);
ck_assert(bloom_search_str(b, "NetSurf", 7));
ck_assert(!bloom_search_str(b, "NotSurf", 7));
bloom_destroy(b);
}
END_TEST
START_TEST(bloom_match_test)
{
FILE *dictf;
char buf[BUFSIZ];
int i;
dictf = fopen("/usr/share/dict/words", "r");
ck_assert(dictf != NULL);
for (i = 0; i < BLOOM_SIZE; i++) {
fscanf(dictf, "%s", buf);
ck_assert(bloom_search_str(dict_bloom, buf, strlen(buf)));
}
fclose(dictf);
}
END_TEST
START_TEST(bloom_falsepositive_test)
{
FILE *dictf;
char buf[BUFSIZ];
int i;
int false_positives = 0;
dictf = fopen("/usr/share/dict/words", "r");
ck_assert(dictf != NULL);
/* skip elements known presnent */
for (i = 0; i < BLOOM_SIZE; i++) {
fscanf(dictf, "%s", buf);
}
/* false positives are possible we are checking for low rate */
for (i = 0; i < BLOOM_SIZE; i++) {
fscanf(dictf, "%s", buf);
if (bloom_search_str(dict_bloom, buf, strlen(buf)) == true)
false_positives++;
}
fclose(dictf);
printf("false positive rate %d%%/%d%%\n",
(false_positives * 100)/BLOOM_SIZE,
FALSE_POSITIVE_RATE);
ck_assert(false_positives < ((BLOOM_SIZE * FALSE_POSITIVE_RATE) / 100));
}
END_TEST
/* Suite */
Suite *bloom_suite(void)
{
Suite *s;
TCase *tc_create;
TCase *tc_match;
TCase *tc_falsepositive;
s = suite_create("Bloom filter");
/* Basic API creation */
tc_create = tcase_create("Creation");
tcase_add_test(tc_create, bloom_create_test);
suite_add_tcase(s, tc_create);
/* Matching entry tests */
tc_match = tcase_create("Match");
tcase_add_checked_fixture(tc_match,
dict_bloom_create,
dict_bloom_teardown);
tcase_add_test(tc_match, bloom_match_test);
suite_add_tcase(s, tc_match);
/* Not matching tests */
tc_falsepositive = tcase_create("False positive rate");
tcase_add_checked_fixture(tc_falsepositive,
dict_bloom_create,
dict_bloom_teardown);
tcase_add_test(tc_falsepositive, bloom_falsepositive_test);
suite_add_tcase(s, tc_falsepositive);
return s;
}
int main(int argc, char **argv)
{
int number_failed;
Suite *s;
SRunner *sr;
s = bloom_suite();
sr = srunner_create(s);
srunner_run_all(sr, CK_ENV);
number_failed = srunner_ntests_failed(sr);
srunner_free(sr);
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
}

View File

@ -16,8 +16,10 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file
* Trivial bloom filter */
/**
* \file
* Trivial bloom filter
*/
#include <stdlib.h>
#include "utils/bloom.h"
@ -107,58 +109,3 @@ uint32_t bloom_items(struct bloom_filter *b)
return b->items;
}
#ifdef TEST_RIG
#include <stdio.h>
#include <string.h>
#include <assert.h>
int main(int argc, char *arg[])
{
struct bloom_filter *b = bloom_create(8192);
FILE *dict = fopen("/usr/share/dict/words", "r");
char buf[BUFSIZ];
int false_positives = 0, total = 0;
for (int i = 0; i < 8192; i++) {
fscanf(dict, "%s", buf);
printf("adding %s\n", buf);
bloom_insert_str(b, buf, strlen(buf));
}
printf("adding NetSurf\n");
bloom_insert_str(b, "NetSurf", 7);
printf("checking NetSurf (should be true)\n");
assert(bloom_search_str(b, "NetSurf", 7));
fseek(dict, 0, SEEK_SET);
for (int i = 0; i < 8192; i++) {
fscanf(dict, "%s", buf);
printf("checking %s (should be true)\n", buf);
assert(bloom_search_str(b, buf, strlen(buf)));
total++;
}
for (int i = 0; i < 8192; i++) {
fscanf(dict, "%s", buf);
printf("checking %s (should be false)\n", buf);
if (bloom_search_str(b, buf, strlen(buf)) == true)
false_positives++;
total++;
}
printf("false positives: %d of %d, %f%%\n",
false_positives, total,
((float)false_positives / total) * 100);
fclose(dict);
bloom_destroy(b);
return 0;
}
#endif /* TEST_RIG */