Add unit test for bloom filter
Adds check based unit test for teh bloom filter implementation. This is based on Roberts original test code in utils/bloom.c and uses /usr/share/dict as a source of strings to check bloom creation, no false negatives and the false positive rate is below 15%.
This commit is contained in:
parent
7b2d15a036
commit
94b5c95676
|
@ -1,7 +1,7 @@
|
|||
#
|
||||
# NetSurf unit tests
|
||||
|
||||
TESTS := nsurl urldbtest nsoption #llcache
|
||||
TESTS := nsurl urldbtest nsoption bloom #llcache
|
||||
|
||||
# nsurl sources and flags
|
||||
nsurl_SRCS := utils/corestrings.c utils/nsurl.c utils/idna.c \
|
||||
|
@ -28,7 +28,8 @@ llcache_SRCS := content/fetch.c content/fetchers/curl.c \
|
|||
nsoption_SRCS := utils/nsoption.c \
|
||||
test/log.c test/nsoption.c
|
||||
|
||||
|
||||
bloom_SRCS := utils/bloom.c \
|
||||
test/bloom.c
|
||||
|
||||
# Coverage builds need additional flags
|
||||
ifeq ($(MAKECMDGOALS),coverage)
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Copyright 2015 Vincent Sanders <vince@netsurf-browser.org>
|
||||
* Copyright 2013 Rob Kendrick <rjek@netsurf-browser.org>
|
||||
*
|
||||
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
||||
*
|
||||
* NetSurf is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; version 2 of the License.
|
||||
*
|
||||
* NetSurf is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* Test bloom filter operations.
|
||||
*
|
||||
* Implementation taken from original test rig in bloom filter code
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <check.h>
|
||||
|
||||
#include "utils/bloom.h"
|
||||
|
||||
#define BLOOM_SIZE 8192
|
||||
#define FALSE_POSITIVE_RATE 15 /* acceptable false positive percentage rate */
|
||||
|
||||
static struct bloom_filter *dict_bloom;
|
||||
|
||||
/* Fixtures */
|
||||
|
||||
|
||||
/**
|
||||
* create dictionary bloom
|
||||
*
|
||||
* bloom constructed from the first BLOOM_SIZE entries of the
|
||||
* dictionary
|
||||
*/
|
||||
static void dict_bloom_create(void)
|
||||
{
|
||||
FILE *dictf;
|
||||
char buf[BUFSIZ];
|
||||
int i;
|
||||
|
||||
dictf = fopen("/usr/share/dict/words", "r");
|
||||
ck_assert(dictf != NULL);
|
||||
|
||||
dict_bloom = bloom_create(BLOOM_SIZE);
|
||||
ck_assert(dict_bloom != NULL);
|
||||
|
||||
for (i = 0; i < BLOOM_SIZE; i++) {
|
||||
fscanf(dictf, "%s", buf);
|
||||
bloom_insert_str(dict_bloom, buf, strlen(buf));
|
||||
}
|
||||
|
||||
fclose(dictf);
|
||||
}
|
||||
|
||||
static void dict_bloom_teardown(void)
|
||||
{
|
||||
bloom_destroy(dict_bloom);
|
||||
}
|
||||
|
||||
/* Tests */
|
||||
|
||||
START_TEST(bloom_create_test)
|
||||
{
|
||||
struct bloom_filter *b;
|
||||
b = bloom_create(BLOOM_SIZE);
|
||||
bloom_insert_str(b, "NetSurf", 7);
|
||||
ck_assert(bloom_search_str(b, "NetSurf", 7));
|
||||
ck_assert(!bloom_search_str(b, "NotSurf", 7));
|
||||
bloom_destroy(b);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(bloom_match_test)
|
||||
{
|
||||
FILE *dictf;
|
||||
char buf[BUFSIZ];
|
||||
int i;
|
||||
|
||||
dictf = fopen("/usr/share/dict/words", "r");
|
||||
ck_assert(dictf != NULL);
|
||||
|
||||
for (i = 0; i < BLOOM_SIZE; i++) {
|
||||
fscanf(dictf, "%s", buf);
|
||||
ck_assert(bloom_search_str(dict_bloom, buf, strlen(buf)));
|
||||
}
|
||||
fclose(dictf);
|
||||
}
|
||||
END_TEST
|
||||
|
||||
START_TEST(bloom_falsepositive_test)
|
||||
{
|
||||
FILE *dictf;
|
||||
char buf[BUFSIZ];
|
||||
int i;
|
||||
int false_positives = 0;
|
||||
|
||||
dictf = fopen("/usr/share/dict/words", "r");
|
||||
ck_assert(dictf != NULL);
|
||||
|
||||
/* skip elements known presnent */
|
||||
for (i = 0; i < BLOOM_SIZE; i++) {
|
||||
fscanf(dictf, "%s", buf);
|
||||
}
|
||||
|
||||
/* false positives are possible we are checking for low rate */
|
||||
for (i = 0; i < BLOOM_SIZE; i++) {
|
||||
fscanf(dictf, "%s", buf);
|
||||
if (bloom_search_str(dict_bloom, buf, strlen(buf)) == true)
|
||||
false_positives++;
|
||||
}
|
||||
fclose(dictf);
|
||||
|
||||
printf("false positive rate %d%%/%d%%\n",
|
||||
(false_positives * 100)/BLOOM_SIZE,
|
||||
FALSE_POSITIVE_RATE);
|
||||
ck_assert(false_positives < ((BLOOM_SIZE * FALSE_POSITIVE_RATE) / 100));
|
||||
}
|
||||
END_TEST
|
||||
|
||||
|
||||
/* Suite */
|
||||
|
||||
Suite *bloom_suite(void)
|
||||
{
|
||||
Suite *s;
|
||||
TCase *tc_create;
|
||||
TCase *tc_match;
|
||||
TCase *tc_falsepositive;
|
||||
|
||||
s = suite_create("Bloom filter");
|
||||
|
||||
/* Basic API creation */
|
||||
tc_create = tcase_create("Creation");
|
||||
|
||||
tcase_add_test(tc_create, bloom_create_test);
|
||||
|
||||
suite_add_tcase(s, tc_create);
|
||||
|
||||
|
||||
/* Matching entry tests */
|
||||
tc_match = tcase_create("Match");
|
||||
|
||||
tcase_add_checked_fixture(tc_match,
|
||||
dict_bloom_create,
|
||||
dict_bloom_teardown);
|
||||
|
||||
tcase_add_test(tc_match, bloom_match_test);
|
||||
|
||||
suite_add_tcase(s, tc_match);
|
||||
|
||||
|
||||
/* Not matching tests */
|
||||
tc_falsepositive = tcase_create("False positive rate");
|
||||
|
||||
tcase_add_checked_fixture(tc_falsepositive,
|
||||
dict_bloom_create,
|
||||
dict_bloom_teardown);
|
||||
|
||||
tcase_add_test(tc_falsepositive, bloom_falsepositive_test);
|
||||
|
||||
suite_add_tcase(s, tc_falsepositive);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int number_failed;
|
||||
Suite *s;
|
||||
SRunner *sr;
|
||||
|
||||
s = bloom_suite();
|
||||
|
||||
sr = srunner_create(s);
|
||||
srunner_run_all(sr, CK_ENV);
|
||||
|
||||
number_failed = srunner_ntests_failed(sr);
|
||||
srunner_free(sr);
|
||||
|
||||
return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
|
@ -16,8 +16,10 @@
|
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* Trivial bloom filter */
|
||||
/**
|
||||
* \file
|
||||
* Trivial bloom filter
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "utils/bloom.h"
|
||||
|
@ -107,58 +109,3 @@ uint32_t bloom_items(struct bloom_filter *b)
|
|||
return b->items;
|
||||
}
|
||||
|
||||
#ifdef TEST_RIG
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
int main(int argc, char *arg[])
|
||||
{
|
||||
struct bloom_filter *b = bloom_create(8192);
|
||||
FILE *dict = fopen("/usr/share/dict/words", "r");
|
||||
char buf[BUFSIZ];
|
||||
int false_positives = 0, total = 0;
|
||||
|
||||
for (int i = 0; i < 8192; i++) {
|
||||
fscanf(dict, "%s", buf);
|
||||
printf("adding %s\n", buf);
|
||||
bloom_insert_str(b, buf, strlen(buf));
|
||||
}
|
||||
|
||||
printf("adding NetSurf\n");
|
||||
|
||||
bloom_insert_str(b, "NetSurf", 7);
|
||||
printf("checking NetSurf (should be true)\n");
|
||||
assert(bloom_search_str(b, "NetSurf", 7));
|
||||
|
||||
fseek(dict, 0, SEEK_SET);
|
||||
|
||||
for (int i = 0; i < 8192; i++) {
|
||||
fscanf(dict, "%s", buf);
|
||||
printf("checking %s (should be true)\n", buf);
|
||||
assert(bloom_search_str(b, buf, strlen(buf)));
|
||||
|
||||
total++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8192; i++) {
|
||||
fscanf(dict, "%s", buf);
|
||||
printf("checking %s (should be false)\n", buf);
|
||||
if (bloom_search_str(b, buf, strlen(buf)) == true)
|
||||
false_positives++;
|
||||
total++;
|
||||
}
|
||||
|
||||
printf("false positives: %d of %d, %f%%\n",
|
||||
false_positives, total,
|
||||
((float)false_positives / total) * 100);
|
||||
|
||||
fclose(dict);
|
||||
bloom_destroy(b);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* TEST_RIG */
|
||||
|
||||
|
|
Loading…
Reference in New Issue