remove urldb_add_host and path from public API

The urldb_add_host and urldb_add_path API were only public for the
urldb test code which was calling them incorrectly in any case as the
url bloom filters were not updated so serialising and adding data to
entries was inconsistent.

This also changes the tests to use the urldb_add_url API which is less
buggy and results in a smaller, cleaner urldb API.
This commit is contained in:
Vincent Sanders 2017-03-08 13:27:00 +00:00
parent b78887c7c8
commit 0da0dcc342
3 changed files with 225 additions and 255 deletions

View File

@ -2038,6 +2038,180 @@ urldb_parse_cookie(nsurl *url, const char **cookie)
}
/**
* Add a path to the database, creating any intermediate entries
*
* \param scheme URL scheme associated with path
* \param port Port number on host associated with path
* \param host Host tree node to attach to
* \param path_query Absolute path plus query to add (freed)
* \param fragment URL fragment, or NULL
* \param url URL (fragment ignored)
* \return Pointer to leaf node, or NULL on memory exhaustion
*/
static struct path_data *
urldb_add_path(lwc_string *scheme,
unsigned int port,
const struct host_part *host,
char *path_query,
lwc_string *fragment,
nsurl *url)
{
struct path_data *d, *e;
char *buf = path_query;
char *segment, *slash;
bool match;
assert(scheme && host && url);
d = (struct path_data *) &host->paths;
/* skip leading '/' */
segment = buf;
if (*segment == '/')
segment++;
/* Process path segments */
do {
slash = strchr(segment, '/');
if (!slash) {
/* last segment */
/* look for existing entry */
for (e = d->children; e; e = e->next)
if (strcmp(segment, e->segment) == 0 &&
lwc_string_isequal(scheme,
e->scheme, &match) ==
lwc_error_ok &&
match == true &&
e->port == port)
break;
d = e ? urldb_add_path_fragment(e, fragment) :
urldb_add_path_node(scheme, port,
segment, fragment, d);
break;
}
*slash = '\0';
/* look for existing entry */
for (e = d->children; e; e = e->next)
if (strcmp(segment, e->segment) == 0 &&
lwc_string_isequal(scheme, e->scheme,
&match) == lwc_error_ok &&
match == true &&
e->port == port)
break;
d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
if (!d)
break;
segment = slash + 1;
} while (1);
free(path_query);
if (d && !d->url) {
/* Insert defragmented URL */
if (nsurl_defragment(url, &d->url) != NSERROR_OK)
return NULL;
}
return d;
}
/**
* Add a host to the database, creating any intermediate entries
*
* \param host Hostname to add
* \return Pointer to leaf node, or NULL on memory exhaustion
*/
static struct host_part *urldb_add_host(const char *host)
{
struct host_part *d = (struct host_part *) &db_root, *e;
struct search_node *s;
char buf[256]; /* 256 bytes is sufficient - domain names are
* limited to 255 chars. */
char *part;
assert(host);
if (urldb__host_is_ip_address(host)) {
/* Host is an IP, so simply add as TLD */
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(host, e->part) == 0)
/* found => return it */
return e;
d = urldb_add_host_node(host, d);
s = urldb_search_insert(search_trees[ST_IP], d);
if (!s) {
/* failed */
d = NULL;
} else {
search_trees[ST_IP] = s;
}
return d;
}
/* Copy host string, so we can corrupt it */
strncpy(buf, host, sizeof buf);
buf[sizeof buf - 1] = '\0';
/* Process FQDN segments backwards */
do {
part = strrchr(buf, '.');
if (!part) {
/* last segment */
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(buf, e->part) == 0)
break;
if (e) {
d = e;
} else {
d = urldb_add_host_node(buf, d);
}
/* And insert into search tree */
if (d) {
struct search_node **r;
r = urldb_get_search_tree_direct(buf);
s = urldb_search_insert(*r, d);
if (!s) {
/* failed */
d = NULL;
} else {
*r = s;
}
}
break;
}
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(part + 1, e->part) == 0)
break;
d = e ? e : urldb_add_host_node(part + 1, d);
if (!d)
break;
*part = '\0';
} while (1);
return d;
}
/**
* Insert a cookie into the database
*
@ -4161,160 +4335,5 @@ void urldb_dump(void)
}
/* exported interface documented in content/urldb.h */
struct host_part *urldb_add_host(const char *host)
{
struct host_part *d = (struct host_part *) &db_root, *e;
struct search_node *s;
char buf[256]; /* 256 bytes is sufficient - domain names are
* limited to 255 chars. */
char *part;
assert(host);
if (urldb__host_is_ip_address(host)) {
/* Host is an IP, so simply add as TLD */
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(host, e->part) == 0)
/* found => return it */
return e;
d = urldb_add_host_node(host, d);
s = urldb_search_insert(search_trees[ST_IP], d);
if (!s) {
/* failed */
d = NULL;
} else {
search_trees[ST_IP] = s;
}
return d;
}
/* Copy host string, so we can corrupt it */
strncpy(buf, host, sizeof buf);
buf[sizeof buf - 1] = '\0';
/* Process FQDN segments backwards */
do {
part = strrchr(buf, '.');
if (!part) {
/* last segment */
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(buf, e->part) == 0)
break;
if (e) {
d = e;
} else {
d = urldb_add_host_node(buf, d);
}
/* And insert into search tree */
if (d) {
struct search_node **r;
r = urldb_get_search_tree_direct(buf);
s = urldb_search_insert(*r, d);
if (!s) {
/* failed */
d = NULL;
} else {
*r = s;
}
}
break;
}
/* Check for existing entry */
for (e = d->children; e; e = e->next)
if (strcasecmp(part + 1, e->part) == 0)
break;
d = e ? e : urldb_add_host_node(part + 1, d);
if (!d)
break;
*part = '\0';
} while (1);
return d;
}
/* exported interface documented in content/urldb.h */
struct path_data *
urldb_add_path(lwc_string *scheme,
unsigned int port,
const struct host_part *host,
char *path_query,
lwc_string *fragment,
nsurl *url)
{
struct path_data *d, *e;
char *buf = path_query;
char *segment, *slash;
bool match;
assert(scheme && host && url);
d = (struct path_data *) &host->paths;
/* skip leading '/' */
segment = buf;
if (*segment == '/')
segment++;
/* Process path segments */
do {
slash = strchr(segment, '/');
if (!slash) {
/* last segment */
/* look for existing entry */
for (e = d->children; e; e = e->next)
if (strcmp(segment, e->segment) == 0 &&
lwc_string_isequal(scheme,
e->scheme, &match) ==
lwc_error_ok &&
match == true &&
e->port == port)
break;
d = e ? urldb_add_path_fragment(e, fragment) :
urldb_add_path_node(scheme, port,
segment, fragment, d);
break;
}
*slash = '\0';
/* look for existing entry */
for (e = d->children; e; e = e->next)
if (strcmp(segment, e->segment) == 0 &&
lwc_string_isequal(scheme, e->scheme,
&match) == lwc_error_ok &&
match == true &&
e->port == port)
break;
d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
if (!d)
break;
segment = slash + 1;
} while (1);
free(path_query);
if (d && !d->url) {
/* Insert defragmented URL */
if (nsurl_defragment(url, &d->url) != NSERROR_OK)
return NULL;
}
return d;
}

View File

@ -138,29 +138,4 @@ bool urldb_set_cookie(const char *header, struct nsurl *url, struct nsurl *refer
char *urldb_get_cookie(struct nsurl *url, bool include_http_only);
/**
* Add a host to the database, creating any intermediate entries
*
* \param host Hostname to add
* \return Pointer to leaf node, or NULL on memory exhaustion
*/
struct host_part *urldb_add_host(const char *host);
/**
* Add a path to the database, creating any intermediate entries
*
* \param scheme URL scheme associated with path
* \param port Port number on host associated with path
* \param host Host tree node to attach to
* \param path_query Absolute path plus query to add (freed)
* \param fragment URL fragment, or NULL
* \param url URL (fragment ignored)
* \return Pointer to leaf node, or NULL on memory exhaustion
*/
struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port,
const struct host_part *host, char *path_query,
lwc_string *fragment, struct nsurl *url);
#endif

View File

@ -71,27 +71,6 @@ static nsurl *make_url(const char *url)
return nsurl;
}
static char *make_path_query(nsurl *url)
{
size_t len;
char *path_query;
if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
NSERROR_OK) {
LOG("failed creating path_query");
exit(1);
}
return path_query;
}
static lwc_string *make_lwc(const char *str)
{
lwc_string *lwc;
if (lwc_intern_string(str, strlen(str), &lwc) != lwc_error_ok) {
LOG("failed creating lwc_string");
exit(1);
}
return lwc;
}
static bool test_urldb_set_cookie(const char *header, const char *url,
const char *referer)
@ -208,58 +187,37 @@ START_TEST(urldb_original_test)
nsurl *urlr;
char *path_query;
h = urldb_add_host("127.0.0.1");
ck_assert_msg(h != NULL, "failed adding host");
h = urldb_add_host("intranet");
ck_assert_msg(h != NULL, "failed adding host");
/* ensure title can be set */
url = make_url("http://intranet/");
scheme = nsurl_get_component(url, NSURL_SCHEME);
p = urldb_add_path(scheme, 0, h, strdup("/"), NULL, url);
ck_assert_msg(p != NULL, "failed adding path");
lwc_string_unref(scheme);
urldb_add_url(url);
urldb_set_url_title(url, "foo");
u = urldb_get_url_data(url);
assert(u && strcmp(u->title, "foo") == 0);
ck_assert(u && strcmp(u->title, "foo") == 0);
nsurl_unref(url);
/* Get host entry */
h = urldb_add_host("netsurf.strcprstskrzkrk.co.uk");
ck_assert_msg(h != NULL, "failed adding host");
/* Get path entry */
/* fragments */
url = make_url("http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm?a=b");
scheme = nsurl_get_component(url, NSURL_SCHEME);
path_query = make_path_query(url);
fragment = make_lwc("zz");
p = urldb_add_path(scheme, 0, h, strdup(path_query), fragment, url);
ck_assert_msg(p != NULL, "failed adding path");
ck_assert(urldb_add_url(url) == true);
nsurl_unref(url);
lwc_string_unref(fragment);
url = make_url("http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm#zz?a=b");
ck_assert(urldb_add_url(url) == true);
nsurl_unref(url);
fragment = make_lwc("aa");
p = urldb_add_path(scheme, 0, h, strdup(path_query), fragment, url);
ck_assert_msg(p != NULL, "failed adding path");
url = make_url("http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm#aa?a=b");
ck_assert(urldb_add_url(url) == true);
nsurl_unref(url);
lwc_string_unref(fragment);
fragment = make_lwc("yy");
p = urldb_add_path(scheme, 0, h, strdup(path_query), fragment, url);
ck_assert_msg(p != NULL, "failed adding path");
free(path_query);
lwc_string_unref(fragment);
lwc_string_unref(scheme);
url = make_url("http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm#yy?a=b");
ck_assert(urldb_add_url(url) == true);
nsurl_unref(url);
url = make_url("file:///home/");
urldb_add_url(url);
ck_assert(urldb_add_url(url) == true);
nsurl_unref(url);
/* set cookies on urls */
url = make_url("http://www.minimarcos.org.uk/cgi-bin/forum/Blah.pl?,v=login,p=2");
urldb_set_cookie("mmblah=foo; path=/; expires=Thur, 31-Dec-2099 00:00:00 GMT\r\n", url, NULL);
nsurl_unref(url);
@ -416,6 +374,11 @@ static TCase *urldb_original_case_create(void)
return tc;
}
/**
* Session basic test case
*
* The databases are loaded and saved with no manipulation
*/
START_TEST(urldb_session_test)
{
nserror res;
@ -439,8 +402,12 @@ START_TEST(urldb_session_test)
unlink(outnam);
/* write cookies out */
outnam = tmpnam(NULL);
urldb_save_cookies(outnam);
/* remove test output */
unlink(outnam);
/* finalise options */
res = nsoption_finalise(NULL, NULL);
ck_assert_int_eq(res, NSERROR_OK);
@ -448,6 +415,11 @@ START_TEST(urldb_session_test)
}
END_TEST
/**
* Session more extensive test case
*
* The databases are loaded and saved with a host and paths added
*/
START_TEST(urldb_session_add_test)
{
nserror res;
@ -467,15 +439,12 @@ START_TEST(urldb_session_add_test)
urldb_load_cookies(test_cookies_path);
/* add something to db */
h = urldb_add_host("tree.example.com");
ck_assert_msg(h != NULL, "failed adding host");
url = make_url("http://tree.example.com/");
scheme = nsurl_get_component(url, NSURL_SCHEME);
p = urldb_add_path(scheme, 0, h, strdup("/"), NULL, url);
ck_assert_msg(p != NULL, "failed adding path");
urldb_add_url(url);
res = urldb_update_url_visit_data(url);
ck_assert_int_eq(res, NSERROR_OK);
lwc_string_unref(scheme);
nsurl_unref(url);
/* write database out */
@ -487,8 +456,12 @@ START_TEST(urldb_session_add_test)
unlink(outnam);
/* write cookies out */
outnam = tmpnam(NULL);
urldb_save_cookies(outnam);
/* remove test output */
unlink(outnam);
/* finalise options */
res = nsoption_finalise(NULL, NULL);
ck_assert_int_eq(res, NSERROR_OK);
@ -496,7 +469,12 @@ START_TEST(urldb_session_add_test)
}
END_TEST
/**
* Test case to check entire session
*
* These tests define a session as loading a url database and cookie
* database and then saving them back to disc.
*/
static TCase *urldb_session_case_create(void)
{
TCase *tc;
@ -818,19 +796,18 @@ static TCase *urldb_cookie_case_create(void)
}
/**
* Test urldb_add_host asserting on NULL.
* Test urldb_add_url asserting on NULL.
*/
START_TEST(urldb_api_add_host_assert_test)
START_TEST(urldb_api_add_url_assert_test)
{
struct host_part *res;
res = urldb_add_host(NULL);
ck_assert(res == NULL);
bool res;
res = urldb_add_url(NULL);
ck_assert(res == true);
}
END_TEST
/**
* test url database finalisation without initialisation.
*/
@ -850,12 +827,11 @@ static TCase *urldb_api_case_create(void)
tc = tcase_create("API_checks");
tcase_add_test_raise_signal(tc,
urldb_api_add_host_assert_test,
urldb_api_add_url_assert_test,
6);
tcase_add_test(tc, urldb_api_destroy_no_init_test);
return tc;
}