WebSurf/content/urldb.c

/*
 * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
 *
 * This file is part of NetSurf, http://www.netsurf-browser.org/
 *
 * NetSurf is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * NetSurf is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/** \file
 * Unified URL information database (implementation)
 *
 * URLs are stored in a tree-based structure as follows:
 *
 * The host component is extracted from each URL and, if a FQDN, split on
 * every '.'.The tree is constructed by inserting each FQDN segment in
 * reverse order. Duplicate nodes are merged.
 *
 * If the host part of an URL is an IP address, then this is added to the
 * tree verbatim (as if it were a TLD).
 *
 * This provides something looking like:
 *
 * 			      root (a sentinel)
 * 				|
 * 	-------------------------------------------------
 * 	|	|	|	|	|	|	|
 *     com     edu     gov  127.0.0.1  net     org     uk	TLDs
 * 	|	|	|		|	|	|
 *    google   ...     ...             ...     ...     co	2LDs
 * 	|						|
 *     www					       bbc  Hosts/Subdomains
 *							|
 *						       www	...
 *
 * Each of the nodes in this tree is a struct host_part. This stores the
 * FQDN segment (or IP address) with which the node is concerned. Each node
 * may contain further information about paths on a host (struct path_data)
 * or SSL certificate processing on a host-wide basis
 * (host_part::permit_invalid_certs).
 *
 * Path data is concerned with storing various metadata about the path in
 * question. This includes global history data, HTTP authentication details
 * and any associated HTTP cookies. This is stored as a tree of path segments
 * hanging off the relevant host_part node.
 *
 * Therefore, to find the last visited time of the URL
 * http://www.example.com/path/to/resource.html, the FQDN tree would be
 * traversed in the order root -> "com" -> "example" -> "www". The "www"
 * node would have attached to it a tree of struct path_data:
 *
 *			    (sentinel)
 *				|
 * 			       path
 * 				|
 * 			       to
 * 				|
 * 			   resource.html
 *
 * This represents the absolute path "/path/to/resource.html". The leaf node
 * "resource.html" contains the last visited time of the resource.
 *
 * The mechanism described above is, however, not particularly conducive to
 * fast searching of the database for a given URL (or URLs beginning with a
 * given prefix). Therefore, an anciliary data structure is used to enable
 * fast searching. This structure simply reflects the contents of the
 * database, with entries being added/removed at the same time as for the
 * core database. In order to ensure that degenerate cases are kept to a
 * minimum, we use an AAtree. This is an approximation of a Red-Black tree
 * with similar performance characteristics, but with a significantly
 * simpler implementation. Entries in this tree comprise pointers to the
 * leaf nodes of the host tree described above.
 */

#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <time.h>

#include <sys/select.h>
#include <curl/curl.h>

#include "image/bitmap.h"
#include "content/content.h"
#include "content/urldb.h"
#include "desktop/cookies.h"
#include "desktop/options.h"
#ifdef riscos
/** \todo lose this */
#include "riscos/bitmap.h"
#endif
#include "utils/log.h"
#include "utils/filename.h"
#include "utils/url.h"
#include "utils/utils.h"

struct cookie_internal_data {
	char *name;		/**< Cookie name */
	char *value;		/**< Cookie value */
	char *comment;		/**< Cookie comment */
	bool domain_from_set;	/**< Domain came from Set-Cookie: header */
	char *domain;		/**< Domain */
	bool path_from_set;	/**< Path came from Set-Cookie: header */
	char *path;		/**< Path */
	time_t expires;		/**< Expiry timestamp, or 1 for session */
	time_t last_used;	/**< Last used time */
	bool secure;		/**< Only send for HTTPS requests */
	cookie_version version;	/**< Specification compliance */
	bool no_destroy;	/**< Never destroy this cookie,
				 * unless it's expired */

	struct cookie_internal_data *prev;	/**< Previous in list */
	struct cookie_internal_data *next;	/**< Next in list */
};

struct auth_data {
	char *realm;		/**< Protection realm */
	char *auth;		/**< Authentication details in form
				 * username:password */
};

struct cache_internal_data {
	char filename[12];	/**< Cached filename, or first byte 0 for none */
};

struct url_internal_data {
	char *title;		/**< Resource title */
	unsigned int visits;	/**< Visit count */
	time_t last_visit;	/**< Last visit time */
	content_type type;	/**< Type of resource */
};

struct path_data {
	char *url;		/**< Full URL */
	char *scheme;		/**< URL scheme for data */
	unsigned int port;	/**< Port number for data */
	char *segment;		/**< Path segment for this node */
	unsigned int frag_cnt;	/**< Number of entries in ::fragment */
	char **fragment;	/**< Array of fragments */
	bool persistent;	/**< This entry should persist */

	struct bitmap *thumb;	/**< Thumbnail image of resource */
	struct url_internal_data urld;	/**< URL data for resource */
	struct cache_internal_data cache;	/**< Cache data for resource */
	struct auth_data auth;	/**< Authentication data for resource */
	struct cookie_internal_data *cookies;	/**< Cookies associated with resource */

	struct path_data *next;	/**< Next sibling */
	struct path_data *prev;	/**< Previous sibling */
	struct path_data *parent;	/**< Parent path segment */
	struct path_data *children;	/**< Child path segments */
	struct path_data *last;		/**< Last child */
};

struct host_part {
	/**< Known paths on this host. This _must_ be first so that
	 * struct host_part *h = (struct host_part *)mypath; works */
	struct path_data paths;
	bool permit_invalid_certs;	/**< Allow access to SSL protected
					 * resources on this host without
					 * verifying certificate authenticity
					 */

	char *part;		/**< Part of host string */

	struct host_part *next;	/**< Next sibling */
	struct host_part *prev;	/**< Previous sibling */
	struct host_part *parent;	/**< Parent host part */
	struct host_part *children;	/**< Child host parts */
};

struct search_node {
	const struct host_part *data;	/**< Host tree entry */

	unsigned int level;		/**< Node level */

	struct search_node *left;	/**< Left subtree */
	struct search_node *right;	/**< Right subtree */
};

/* Destruction */
static void urldb_destroy_host_tree(struct host_part *root);
static void urldb_destroy_path_tree(struct path_data *root);
static void urldb_destroy_path_node_content(struct path_data *node);
static void urldb_destroy_cookie(struct cookie_internal_data *c);
static void urldb_destroy_search_tree(struct search_node *root);

/* Saving */
static void urldb_save_search_tree(struct search_node *root, FILE *fp);
static void urldb_count_urls(const struct path_data *root, time_t expiry,
		unsigned int *count);
static void urldb_write_paths(const struct path_data *parent,
		const char *host, FILE *fp, char **path, int *path_alloc,
		int *path_used, time_t expiry);

/* Iteration */
static bool urldb_iterate_partial_host(struct search_node *root,
		const char *prefix, bool (*callback)(const char *url,
		const struct url_data *data));
static bool urldb_iterate_partial_path(const struct path_data *parent,
		const char *prefix, bool (*callback)(const char *url,
		const struct url_data *data));
static bool urldb_iterate_entries_host(struct search_node *parent,
		bool (*url_callback)(const char *url, const struct url_data *data),
		bool (*cookie_callback)(const char *domain, const struct cookie_data *data));
static bool urldb_iterate_entries_path(const struct path_data *parent,
		bool (*url_callback)(const char *url, const struct url_data *data),
		bool (*cookie_callback)(const char *domain, const struct cookie_data *data));

/* Insertion */
static struct host_part *urldb_add_host_node(const char *part,
		struct host_part *parent);
static struct host_part *urldb_add_host(const char *host);
static struct path_data *urldb_add_path_node(const char *scheme,
		unsigned int port, const char *segment, const char *fragment,
		struct path_data *parent);
static struct path_data *urldb_add_path(const char *scheme,
		unsigned int port, const struct host_part *host,
		const char *path, const char *query, const char *fragment,
		const char *url);
static int urldb_add_path_fragment_cmp(const void *a, const void *b);
static struct path_data *urldb_add_path_fragment(struct path_data *segment,
		const char *fragment);

/* Lookup */
static struct path_data *urldb_find_url(const char *url);
static struct path_data *urldb_match_path(const struct path_data *parent,
		const char *path, const char *scheme, unsigned short port);
static struct search_node **urldb_get_search_tree_direct(const char *host);
static struct search_node *urldb_get_search_tree(const char *host);

/* Dump */
static void urldb_dump_hosts(struct host_part *parent);
static void urldb_dump_paths(struct path_data *parent);
static void urldb_dump_search(struct search_node *parent, int depth);

/* Search tree */
static struct search_node *urldb_search_insert(struct search_node *root,
		const struct host_part *data);
static struct search_node *urldb_search_insert_internal(
		struct search_node *root, struct search_node *n);
static struct search_node *urldb_search_remove(struct search_node *root,
		const struct host_part *data);
static const struct host_part *urldb_search_find(struct search_node *root,
		const char *host);
static struct search_node *urldb_search_skew(struct search_node *root);
static struct search_node *urldb_search_split(struct search_node *root);
static int urldb_search_match_host(const struct host_part *a,
		const struct host_part *b);
static int urldb_search_match_string(const struct host_part *a,
		const char *b);
static int urldb_search_match_prefix(const struct host_part *a,
		const char *b);

/* Cookies */
static struct cookie_internal_data *urldb_parse_cookie(const char *url,
		const char **cookie);
static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v);
static bool urldb_insert_cookie(struct cookie_internal_data *c, const char *scheme,
		const char *url);
static void urldb_free_cookie(struct cookie_internal_data *c);
static bool urldb_concat_cookie(struct cookie_internal_data *c, int *used,
		int *alloc, char **buf);
static void urldb_delete_cookie_hosts(const char *domain, const char *path, const char *name, struct host_part *parent);
static void urldb_delete_cookie_paths(const char *domain, const char *path, const char *name, struct path_data *parent);
static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent);
static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent);

/** Root database handle */
static struct host_part db_root;

/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */
#define NUM_SEARCH_TREES 28
#define ST_IP 0
#define ST_EE 1
#define ST_DN 2
static struct search_node empty = { 0, 0, &empty, &empty };
static struct search_node *search_trees[NUM_SEARCH_TREES] = {
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
	&empty, &empty, &empty, &empty
};

#define COOKIE_FILE_VERSION 100
#define URL_FILE_VERSION 106

/**
 * Import an URL database from file, replacing any existing database
 *
 * \param filename Name of file containing data
 */
void urldb_load(const char *filename)
{
#define MAXIMUM_URL_LENGTH 4096
	char s[MAXIMUM_URL_LENGTH];
	char host[256];
	struct host_part *h;
	int urls;
	int i;
	int version;
	int length;
	FILE *fp;

	assert(filename);

	LOG(("Loading URL file"));

	fp = fopen(filename, "r");
	if (!fp) {
		LOG(("Failed to open file '%s' for reading", filename));
		return;
	}

	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		return;
	version = atoi(s);
	if (version < 105) {
		LOG(("Unsupported URL file version."));
		return;
	}
	if (version > URL_FILE_VERSION) {
		LOG(("Unknown URL file version."));
		return;
	}

	while (fgets(host, sizeof host, fp)) {
		/* get the hostname */
		length = strlen(host) - 1;
		host[length] = '\0';

		/* skip data that has ended up with a host of '' */
		if (length == 0) {
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			urls = atoi(s);
			for (i = 0; i < ((version == 105 ? 6 : 8) * urls);
					i++)
				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
					break;
			continue;
		}

		if (version == 105) {
			/* file:/ -> localhost */
			if (strcasecmp(host, "file:/") == 0)
				snprintf(host, sizeof host, "localhost");
			else {
				/* strip any port number */
				char *colon = strrchr(host, ':');
				if (colon)
					*colon = '\0';
			}
		}

		/* read number of URLs */
		if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
			break;
		urls = atoi(s);

		/* no URLs => try next host */
		if (urls == 0) {
			LOG(("No URLs for '%s'", host));
			continue;
		}

		h = urldb_add_host(host);
		if (!h) {
			LOG(("Failed adding host: '%s'", host));
			die("Memory exhausted whilst loading URL file");
		}

		/* load the non-corrupt data */
		for (i = 0; i < urls; i++) {
			struct path_data *p = NULL;

			if (version == 105) {
				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
					break;
				length = strlen(s) - 1;
				s[length] = '\0';

				if (strncasecmp(s, "file:", 5) == 0) {
					/* local file, so fudge insertion */
					char url[7 + 4096];

					snprintf(url, sizeof url,
							"file://%s", s + 5);

					p = urldb_add_path("file", 0, h,
							s + 5, NULL, NULL, url);
					if (!p) {
						LOG(("Failed inserting '%s'",
								url));
						die("Memory exhausted "
							"whilst loading "
							"URL file");
					}
				} else {
					if (!urldb_add_url(s)) {
						LOG(("Failed inserting '%s'",
								s));
					}
					p = urldb_find_url(s);
				}
			} else {
				char scheme[64], ports[10];
				char url[64 + 3 + 256 + 6 + 4096 + 1];
				unsigned int port;
				bool is_file = false;

				if (!fgets(scheme, sizeof scheme, fp))
					break;
				length = strlen(scheme) - 1;
				scheme[length] = '\0';

				if (!fgets(ports, sizeof ports, fp))
					break;
				length = strlen(ports) - 1;
				ports[length] = '\0';
				port = atoi(ports);

				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
					break;
				length = strlen(s) - 1;
				s[length] = '\0';

				if (!strcasecmp(host, "localhost") &&
						!strcasecmp(scheme, "file"))
					is_file = true;

				snprintf(url, sizeof url, "%s://%s%s%s%s",
						scheme,
						/* file URLs have no host */
						(is_file ? "" : host),
						(port ? ":" : ""),
						(port ? ports : ""),
						s);

				p = urldb_add_path(scheme, port, h, s, NULL, NULL,
						url);
				if (!p) {
					LOG(("Failed inserting '%s'", url));
					die("Memory exhausted whilst loading "
							"URL file");
				}
			}

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			if (p)
				p->urld.visits = (unsigned int)atoi(s);

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			if (p)
				p->urld.last_visit = (time_t)atoi(s);

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			if (p)
				p->urld.type = (content_type)atoi(s);

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
#ifdef riscos
			if (p && strlen(s) == 12) {
				/* ensure filename is 'XX.XX.XX.XX' */
				if ((s[2] == '.') && (s[5] == '.') &&
						(s[8] == '.')) {
					s[2] = '/';
					s[5] = '/';
					s[8] = '/';
					s[11] = '\0';
					p->thumb = bitmap_create_file(s);
				} else if ((s[2] == '/') && (s[5] == '/') &&
						(s[8] == '/')) {
					s[11] = '\0';
					p->thumb = bitmap_create_file(s);
				}
			}
#endif

			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
				break;
			length = strlen(s) - 1;
			if (p && length > 0) {
				s[length] = '\0';
				p->urld.title = malloc(length + 1);
				if (p->urld.title)
					memcpy(p->urld.title, s, length + 1);
			}
		}
	}

	fclose(fp);
	LOG(("Successfully loaded URL file"));
#undef MAXIMUM_URL_LENGTH
}

/**
 * Export the current database to file
 *
 * \param filename Name of file to export to
 */
void urldb_save(const char *filename)
{
	FILE *fp;
	int i;

	assert(filename);

	fp = fopen(filename, "w");
	if (!fp) {
		LOG(("Failed to open file '%s' for writing", filename));
		return;
	}

	/* file format version number */
	fprintf(fp, "%d\n", URL_FILE_VERSION);

	for (i = 0; i != NUM_SEARCH_TREES; i++) {
		urldb_save_search_tree(search_trees[i], fp);
	}

	fclose(fp);
}

/**
 * Save a search (sub)tree
 *
 * \param root Root of (sub)tree to save
 * \param fp File to write to
 */
void urldb_save_search_tree(struct search_node *parent, FILE *fp)
{
	char host[256];
	const struct host_part *h;
	unsigned int path_count = 0;
	char *path, *p, *end;
	int path_alloc = 64, path_used = 2;
	time_t expiry = time(NULL) - (60 * 60 * 24) * option_expire_url;

	if (parent == &empty)
		return;

	urldb_save_search_tree(parent->left, fp);

	path = malloc(path_alloc);
	if (!path)
		return;

	path[0] = '/';
	path[1] = '\0';

	for (h = parent->data, p = host, end = host + sizeof host;
			h && h != &db_root && p < end; h = h->parent) {
		int written = snprintf(p, end - p, "%s%s", h->part,
				(h->parent && h->parent->parent) ? "." : "");
		if (written < 0) {
			free(path);
			return;
		}
		p += written;
	}

	urldb_count_urls(&parent->data->paths, expiry, &path_count);

	if (path_count > 0) {
		fprintf(fp, "%s\n%i\n", host, path_count);

		urldb_write_paths(&parent->data->paths, host, fp,
				&path, &path_alloc, &path_used, expiry);
	}

	free(path);

	urldb_save_search_tree(parent->right, fp);
}

/**
 * Count number of URLs associated with a host
 *
 * \param root Root of path data tree
 * \param expiry Expiry time for URLs
 * \param count Pointer to count
 */
void urldb_count_urls(const struct path_data *root, time_t expiry,
		unsigned int *count)
{
	const struct path_data *p;

	if (!root->children) {
		if (root->persistent || ((root->urld.last_visit > expiry) &&
				(root->urld.visits > 0)))
			(*count)++;
	}

	for (p = root->children; p; p = p->next)
		urldb_count_urls(p, expiry, count);
}

/**
 * Write paths associated with a host
 *
 * \param parent Root of (sub)tree to write
 * \param host Current host name
 * \param fp File to write to
 * \param path Current path string
 * \param path_alloc Allocated size of path
 * \param path_used Used size of path
 * \param expiry Expiry time of URLs
 */
void urldb_write_paths(const struct path_data *parent, const char *host,
		FILE *fp, char **path, int *path_alloc, int *path_used,
		time_t expiry)
{
	const struct path_data *p;
	int i;
	int pused = *path_used;

	if (!parent->children) {
		/* leaf node */
		if (!(parent->persistent ||
				((parent->urld.last_visit > expiry) &&
				(parent->urld.visits > 0))))
			/* expired */
			return;

		fprintf(fp, "%s\n", parent->scheme);

		if (parent->port)
			fprintf(fp,"%d\n", parent->port);
		else
			fprintf(fp, "\n");

		fprintf(fp, "%s\n", *path);

		/** \todo handle fragments? */

		fprintf(fp, "%i\n%i\n%i\n", parent->urld.visits,
				(int)parent->urld.last_visit,
				(int)parent->urld.type);

#ifdef riscos
		if (parent->thumb)
			fprintf(fp, "%s\n", parent->thumb->filename);
		else
			fprintf(fp, "\n");
#else
		fprintf(fp, "\n");
#endif

		if (parent->urld.title) {
			char *s = parent->urld.title;
			for (i = 0; s[i] != '\0'; i++)
				if (s[i] < 32)
					s[i] = ' ';
			for (--i; ((i > 0) && (s[i] == ' ')); i--)
					s[i] = '\0';
			fprintf(fp, "%s\n", parent->urld.title);
		} else
			fprintf(fp, "\n");
	}

	for (p = parent->children; p; p = p->next) {
		int len = *path_used + strlen(p->segment) + 1;
		if (*path_alloc < len) {
			char *temp = realloc(*path,
				(len > 64) ? len : *path_alloc + 64);
			if (!temp)
				return;
			*path = temp;
			*path_alloc = (len > 64) ? len : *path_alloc + 64;
		}

		strcat(*path, p->segment);
		if (p->children) {
			strcat(*path, "/");
		} else {
			len -= 1;
		}

		*path_used = len;

		urldb_write_paths(p, host, fp, path, path_alloc, path_used,
				expiry);

		/* restore path to its state on entry to this function */
		*path_used = pused;
		(*path)[pused - 1] = '\0';
	}
}

/**
 * Set the cross-session persistence of the entry for an URL
 *
 * \param url Absolute URL to persist
 * \param persist True to persist, false otherwise
 */
void urldb_set_url_persistence(const char *url, bool persist)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return;

	p->persistent = persist;
}

/**
 * Insert an URL into the database
 *
 * \param url Absolute URL to insert
 * \return true on success, false otherwise
 */
bool urldb_add_url(const char *url)
{
	struct host_part *h;
	struct path_data *p;
	char *colon;
	const char *host;
	unsigned short port;
	url_func_result ret;
	struct url_components components;

	assert(url);

	/* extract url components */
	ret = url_get_components(url, &components);
	if (ret != URL_FUNC_OK)
		return false;

	/* Ensure scheme and authority exist */
	if (!(components.scheme && components.authority)) {
		url_destroy_components(&components);
		return false;
	}

	/* Extract host part from authority */
	host = strchr(components.authority, '@');
	if (!host)
		host = components.authority;
	else
		host++;

	/* get port and remove from host */
	colon = strrchr(host, ':');
	if (!colon) {
		port = 0;
	} else {
		*colon = '\0';
		port = atoi(colon + 1);
	}

	/* Get host entry */
	if (strcasecmp(components.scheme, "file") == 0)
		h = urldb_add_host("localhost");
	else
		h = urldb_add_host(host);
	if (!h) {
		url_destroy_components(&components);
		return false;
	}

	/* Get path entry */
	p = urldb_add_path(components.scheme, port, h,
			components.path ? components.path : "",
			components.query, components.fragment, url);

	url_destroy_components(&components);

	return (p != NULL);
}

/**
 * Set an URL's title string, replacing any existing one
 *
 * \param url The URL to look for
 * \param title The title string to use (copied)
 */
void urldb_set_url_title(const char *url, const char *title)
{
	struct path_data *p;
	char *temp;

	assert(url && title);

	p = urldb_find_url(url);
	if (!p)
		return;

	temp = strdup(title);
	if (!temp)
		return;

	free(p->urld.title);
	p->urld.title = temp;
}

/**
 * Set an URL's content type
 *
 * \param url The URL to look for
 * \param type The type to set
 */
void urldb_set_url_content_type(const char *url, content_type type)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return;

	p->urld.type = type;
}

/**
 * Update an URL's visit data
 *
 * \param url The URL to update
 */
void urldb_update_url_visit_data(const char *url)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return;

	p->urld.last_visit = time(NULL);
	p->urld.visits++;
}

/**
 * Reset an URL's visit statistics
 *
 * \param url The URL to reset
 */
void urldb_reset_url_visit_data(const char *url)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return;

	p->urld.last_visit = (time_t)0;
	p->urld.visits = 0;
}


/**
 * Find data for an URL.
 *
 * \param url Absolute URL to look for
 * \return Pointer to result struct, or NULL
 */
const struct url_data *urldb_get_url_data(const char *url)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	return (struct url_data *)&p->urld;
}

/**
 * Extract an URL from the db
 *
 * \param url URL to extract
 * \return Pointer to database's copy of URL or NULL if not found
 */
const char *urldb_get_url(const char *url)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	return p->url;
}

/**
 * Look up authentication details in database
 *
 * \param url Absolute URL to search for
 * \return Pointer to authentication details, or NULL if not found
 */
const char *urldb_get_auth_details(const char *url)
{
	struct path_data *p, *q = NULL;

	assert(url);

	/* add to the db, so our lookup will work */
	urldb_add_url(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	/* Check for any auth details attached to this node */
	if (p && p->auth.realm && p->auth.auth)
		return p->auth.auth;

	/* Now consider ancestors */
	for (; p; p = p->parent) {
		/* The parent path entry is stored hung off the
		 * parent entry with an empty (not NULL) segment string.
		 * We look for this here.
		 */
		for (q = p->children; q; q = q->next) {
			if (strlen(q->segment) == 0)
				break;
		}

		if (q && q->auth.realm && q->auth.auth)
			break;
	}

	if (!q)
		return NULL;

	return q->auth.auth;
}

/**
 * Retrieve certificate verification permissions from database
 *
 * \param url Absolute URL to search for
 * \return true to permit connections to hosts with invalid certificates,
 * false otherwise.
 */
bool urldb_get_cert_permissions(const char *url)
{
	struct path_data *p;
	struct host_part *h;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return false;

	for (; p && p->parent; p = p->parent)
		/* do nothing */;

	h = (struct host_part *)p;

	return h->permit_invalid_certs;
}

/**
 * Set authentication data for an URL
 *
 * \param url The URL to consider
 * \param realm The authentication realm
 * \param auth The authentication details (in form username:password)
 */
void urldb_set_auth_details(const char *url, const char *realm,
		const char *auth)
{
	struct path_data *p;
	char *urlt, *t1, *t2;

	assert(url && realm && auth);

	urlt = strdup(url);
	if (!urlt)
		return;

	/* strip leafname from URL */
	t1 = strrchr(urlt, '/');
	if (t1) {
		*(t1 + 1) = '\0';
	}

	/* add url, in case it's missing */
	urldb_add_url(urlt);

	p = urldb_find_url(urlt);

	free(urlt);

	if (!p)
		return;

	/** \todo search subtree for same realm/auth details
	 * and remove them (as the lookup routine searches up the tree) */

	t1 = strdup(realm);
	t2 = strdup(auth);

	if (!t1 || !t2) {
		free(t1);
		free(t2);
		return;
	}

	free(p->auth.realm);
	free(p->auth.auth);

	p->auth.realm = t1;
	p->auth.auth = t2;
}

/**
 * Set certificate verification permissions
 *
 * \param url URL to consider
 * \param permit Set to true to allow invalid certificates
 */
void urldb_set_cert_permissions(const char *url, bool permit)
{
	struct path_data *p;
	struct host_part *h;

	assert(url);

	/* add url, in case it's missing */
	urldb_add_url(url);

	p = urldb_find_url(url);
	if (!p)
		return;

	for (; p && p->parent; p = p->parent)
		/* do nothing */;

	h = (struct host_part *)p;

	h->permit_invalid_certs = permit;
}

/**
 * Set thumbnail for url, replacing any existing thumbnail
 *
 * \param url Absolute URL to consider
 * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate
 */
void urldb_set_thumbnail(const char *url, struct bitmap *bitmap)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return;

	if (p->thumb && p->thumb != bitmap)
		bitmap_destroy(p->thumb);

	p->thumb = bitmap;
}

/**
 * Retrieve thumbnail data for given URL
 *
 * \param url Absolute URL to search for
 * \return Pointer to thumbnail data, or NULL if not found.
 */
const struct bitmap *urldb_get_thumbnail(const char *url)
{
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	return p->thumb;
}

/**
 * Iterate over entries in the database which match the given prefix
 *
 * \param prefix Prefix to match
 * \param callback Callback function
 */
void urldb_iterate_partial(const char *prefix,
		bool (*callback)(const char *url,
		const struct url_data *data))
{
	char host[256];
	char buf[260]; /* max domain + "www." */
	const char *slash, *scheme_sep;
	struct search_node *tree;
	const struct host_part *h;

	assert(prefix && callback);

	/* strip scheme */
	scheme_sep = strstr(prefix, "://");
	if (scheme_sep)
		prefix = scheme_sep + 3;

	slash = strchr(prefix, '/');
	tree = urldb_get_search_tree(prefix);

	if (slash) {
		/* if there's a slash in the input, then we can
		 * assume that we're looking for a path */
		char *domain = host;

		snprintf(host, sizeof host, "%.*s",
				(int) (slash - prefix), prefix);

		h = urldb_search_find(tree, host);
		if (!h) {
			int len = slash - prefix;

			if ((len == 1 && tolower(host[0]) != 'w') ||
				(len == 2 && (tolower(host[0]) != 'w' ||
					tolower(host[1]) != 'w')) ||
				(len >= 3 &&
					strncasecmp(host, "www", 3))) {
				snprintf(buf, sizeof buf, "www.%s", host);
				h = urldb_search_find(
					search_trees[ST_DN + 'w' - 'a'],
					buf);
				if (!h)
					return;
				domain = buf;
			} else
				return;
		}

		if (h->paths.children) {
			/* Have paths, iterate them */
			urldb_iterate_partial_path(&h->paths, slash + 1,
					callback);
		}

	} else {
		int len = strlen(prefix);

		/* looking for hosts */
		if (!urldb_iterate_partial_host(tree, prefix, callback))
			return;

		if ((len == 1 && tolower(prefix[0]) != 'w') ||
				(len == 2 && (tolower(prefix[0]) != 'w' ||
					tolower(prefix[1]) != 'w')) ||
				(len >= 3 &&
					strncasecmp(prefix, "www", 3))) {
			/* now look for www.prefix */
			snprintf(buf, sizeof buf, "www.%s", prefix);
			if(!urldb_iterate_partial_host(
					search_trees[ST_DN + 'w' - 'a'],
					buf, callback))
				return;
		}
	}
}

/**
 * Partial host iterator (internal)
 *
 * \param root Root of (sub)tree to traverse
 * \param prefix Prefix to match
 * \param callback Callback function
 * \return true to continue, false otherwise
 */
bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
		bool (*callback)(const char *url,
		const struct url_data *data))
{
	int c;

	assert(root && prefix && callback);

	if (root == &empty)
		return true;

	c = urldb_search_match_prefix(root->data, prefix);

	if (c > 0)
		/* No match => look in left subtree */
		return urldb_iterate_partial_host(root->left, prefix,
				callback);
	else if (c < 0)
		/* No match => look in right subtree */
		return urldb_iterate_partial_host(root->right, prefix,
				callback);
	else {
		/* Match => iterate over l/r subtrees & process this node */
		if (!urldb_iterate_partial_host(root->left, prefix,
				callback))
			return false;

		if (root->data->paths.children) {
			/* and extract all paths attached to this host */
			if (!urldb_iterate_entries_path(&root->data->paths,
					callback, NULL)) {
				return false;
			}
		}

		if (!urldb_iterate_partial_host(root->right, prefix,
				callback))
			return false;
	}

	return true;
}

/**
 * Partial path iterator (internal)
 *
 * \param parent Root of (sub)tree to traverse
 * \param prefix Prefix to match
 * \param callback Callback function
 * \return true to continue, false otherwise
 */
bool urldb_iterate_partial_path(const struct path_data *parent,
		const char *prefix, bool (*callback)(const char *url,
		const struct url_data *data))
{
	const struct path_data *p;
	const char *slash, *end = prefix + strlen(prefix);
	int c;

	slash = strchr(prefix, '/');
	if (!slash)
		slash = end;

	if (slash == prefix && *prefix == '/')
		/* Ignore "//" */
		return true;

	for (p = parent->children; p; p = p->next) {
		if ((c = strncasecmp(p->segment, prefix, slash - prefix)) < 0)
			/* didn't match, but may be more */
			continue;
		else if (c > 0)
			/* still possible matches in a different case */
			continue;

		/* prefix matches so far */
		if (slash == end) {
			/* we've run out of prefix, so all
			 * paths below this one match */
			if (!urldb_iterate_entries_path(p, callback, NULL))
				return false;
		} else {
			/* more prefix to go => recurse */
			if (!urldb_iterate_partial_path(p, slash + 1,
					callback))
				return false;
		}
	}

	return true;
}

/**
 * Iterate over all entries in database
 *
 * \param callback Function to callback for each entry
 */
void urldb_iterate_entries(bool (*callback)(const char *url,
		const struct url_data *data))
{
	int i;

	assert(callback);

	for (i = 0; i < NUM_SEARCH_TREES; i++) {
		if (!urldb_iterate_entries_host(search_trees[i],
				callback, NULL))
			break;
	}
}

/**
 * Iterate over all cookies in database
 *
 * \param callback Function to callback for each entry
 */
void urldb_iterate_cookies(bool (*callback)(const char *domain, const struct cookie_data *data))
{
	int i;

	assert(callback);

	for (i = 0; i < NUM_SEARCH_TREES; i++) {
		if (!urldb_iterate_entries_host(search_trees[i],
				NULL, callback))
			break;
	}
}

/**
 * Host data iterator (internal)
 *
 * \param parent Root of subtree to iterate over
 * \param url_callback Callback function
 * \param cookie_callback Callback function
 * \return true to continue, false otherwise
 */
bool urldb_iterate_entries_host(struct search_node *parent,
		bool (*url_callback)(const char *url,
				const struct url_data *data),
		bool (*cookie_callback)(const char *domain,
				const struct cookie_data *data))
{
	if (parent == &empty)
		return true;

	if (!urldb_iterate_entries_host(parent->left,
			url_callback, cookie_callback))
		return false;

	if ((parent->data->paths.children) || ((cookie_callback) &&
			(parent->data->paths.cookies))) {
		/* We have paths (or domain cookies), so iterate them */
		if (!urldb_iterate_entries_path(&parent->data->paths,
				url_callback, cookie_callback)) {
			return false;
		}
	}

	if (!urldb_iterate_entries_host(parent->right,
			url_callback, cookie_callback))
		return false;

	return true;
}

/**
 * Path data iterator (internal)
 *
 * \param parent Root of subtree to iterate over
 * \param url_callback Callback function
 * \param cookie_callback Callback function
 * \return true to continue, false otherwise
 */
bool urldb_iterate_entries_path(const struct path_data *parent,
		bool (*url_callback)(const char *url,
				const struct url_data *data),
		bool (*cookie_callback)(const char *domain,
				const struct cookie_data *data))
{
	const struct path_data *p;

	if (!parent->children) {
		/* leaf node */

		/* All leaf nodes in the path tree should have an URL or
		 * cookies attached to them. If this is not the case, it
		 * indicates that there's a bug in the file loader/URL
		 * insertion code. Therefore, assert this here. */
		assert(url_callback || cookie_callback);

		/** \todo handle fragments? */
		if (url_callback) {
			assert(parent->url);
			if (!url_callback(parent->url,
					(const struct url_data *) &parent->urld))
				return false;
		} else {
			if (parent->cookies && !cookie_callback(parent->cookies->domain,
					(const struct cookie_data *) parent->cookies))
				return false;
		}
	}

	for (p = parent->children; p; p = p->next) {
		if (!urldb_iterate_entries_path(p,
				url_callback, cookie_callback))
			return false;
	}

	return true;
}

/**
 * Add a host node to the tree
 *
 * \param part Host segment to add (or whole IP address) (copied)
 * \param parent Parent node to add to
 * \return Pointer to added node, or NULL on memory exhaustion
 */
struct host_part *urldb_add_host_node(const char *part,
		struct host_part *parent)
{
	struct host_part *d;

	assert(part && parent);

	d = calloc(1, sizeof(struct host_part));
	if (!d)
		return NULL;

	d->part = strdup(part);
	if (!d->part) {
		free(d);
		return NULL;
	}

	d->next = parent->children;
	if (parent->children)
		parent->children->prev = d;
	d->parent = parent;
	parent->children = d;

	return d;
}

/**
 * Add a host to the database, creating any intermediate entries
 *
 * \param host Hostname to add
 * \return Pointer to leaf node, or NULL on memory exhaustion
 */
struct host_part *urldb_add_host(const char *host)
{
	struct host_part *d = (struct host_part *) &db_root, *e;
	struct search_node *s;
	char buf[256]; /* 256 bytes is sufficient - domain names are
			* limited to 255 chars. */
	char *part;

	assert(host);

	if (url_host_is_ip_address(host)) {
		/* Host is an IP, so simply add as TLD */

		/* Check for existing entry */
		for (e = d->children; e; e = e->next)
			if (strcasecmp(host, e->part) == 0)
				/* found => return it */
				return e;

		d = urldb_add_host_node(host, d);

		s = urldb_search_insert(search_trees[ST_IP], d);
		if (!s) {
			/* failed */
			d = NULL;
		} else {
			search_trees[ST_IP] = s;
		}

		return d;
	}

	/* Copy host string, so we can corrupt it */
	strncpy(buf, host, sizeof buf);
	buf[sizeof buf - 1] = '\0';

	/* Process FQDN segments backwards */
	do {
		part = strrchr(buf, '.');
		if (!part) {
			/* last segment */
			/* Check for existing entry */
			for (e = d->children; e; e = e->next)
				if (strcasecmp(buf, e->part) == 0)
					break;

			if (e) {
				d = e;
			} else {
				d = urldb_add_host_node(buf, d);
			}

			/* And insert into search tree */
			if (d) {
				struct search_node **r;

				r = urldb_get_search_tree_direct(buf);
				s = urldb_search_insert(*r, d);
				if (!s) {
					/* failed */
					d = NULL;
				} else {
					*r = s;
				}
			}
			break;
		}

		/* Check for existing entry */
		for (e = d->children; e; e = e->next)
			if (strcasecmp(part + 1, e->part) == 0)
				break;

		d = e ? e : urldb_add_host_node(part + 1, d);
		if (!d)
			break;

		*part = '\0';
	} while (1);

	return d;
}

/**
 * Add a path node to the tree
 *
 * \param scheme URL scheme associated with path (copied)
 * \param port Port number on host associated with path
 * \param segment Path segment to add (copied)
 * \param fragment URL fragment (copied), or NULL
 * \param parent Parent node to add to
 * \return Pointer to added node, or NULL on memory exhaustion
 */
struct path_data *urldb_add_path_node(const char *scheme, unsigned int port,
		const char *segment, const char *fragment,
		struct path_data *parent)
{
	struct path_data *d, *e;

	assert(scheme && segment && parent);

	d = calloc(1, sizeof(struct path_data));
	if (!d)
		return NULL;

	d->scheme = strdup(scheme);
	if (!d->scheme) {
		free(d);
		return NULL;
	}

	d->port = port;

	d->segment = strdup(segment);
	if (!d->segment) {
		free(d->scheme);
		free(d);
		return NULL;
	}

	if (fragment) {
		if (!urldb_add_path_fragment(d, fragment)) {
			free(d->segment);
			free(d->scheme);
			free(d);
			return NULL;
		}
	}

	for (e = parent->children; e; e = e->next)
		if (strcmp(e->segment, d->segment) > 0)
			break;

	if (e) {
		d->prev = e->prev;
		d->next = e;
		if (e->prev)
			e->prev->next = d;
		else
			parent->children = d;
		e->prev = d;
	} else if (!parent->children) {
		d->prev = d->next = NULL;
		parent->children = parent->last = d;
	} else {
		d->next = NULL;
		d->prev = parent->last;
		parent->last->next = d;
		parent->last = d;
	}
	d->parent = parent;

	return d;
}

/**
 * Add a path to the database, creating any intermediate entries
 *
 * \param scheme URL scheme associated with path
 * \param port Port number on host associated with path
 * \param host Host tree node to attach to
 * \param path Absolute path to add
 * \param query Path query to add
 * \param fragment URL fragment, or NULL
 * \param url URL (fragment ignored)
 * \return Pointer to leaf node, or NULL on memory exhaustion
 */
struct path_data *urldb_add_path(const char *scheme, unsigned int port,
		const struct host_part *host, const char *path,
		const char *query, const char *fragment, const char *url)
{
	struct path_data *d, *e;
	char *buf, *copy;
	char *segment, *slash;
	int len = 0;

	assert(scheme && host && url);
	assert(path || query);

	d = (struct path_data *) &host->paths;

	/* Copy and merge path/query strings, so we can corrupt them */
	if (path)
		len += strlen(path);
	if (query)
		len += strlen(query) + 1;
	buf = malloc(len + 1);
	if (!buf)
		return NULL;
	copy = buf;
	if (path) {
		strcpy(copy, path);
		copy += strlen(path);
	}
	if (query) {
		*copy++ = '?';
		strcpy(copy, query);
	}

	/* skip leading '/' */
	segment = buf;
	if (*segment == '/')
		segment++;

	/* Process path segments */
	do {
		slash = strchr(segment, '/');
		if (!slash) {
			/* last segment */
			/* look for existing entry */
			for (e = d->children; e; e = e->next)
				if (strcmp(segment, e->segment) == 0 &&
						strcasecmp(scheme,
						e->scheme) == 0 &&
						e->port == port)
					break;

			d = e ? urldb_add_path_fragment(e, fragment) :
					urldb_add_path_node(scheme, port,
					segment, fragment, d);
			break;
		}

		*slash = '\0';

		/* look for existing entry */
		for (e = d->children; e; e = e->next)
			if (strcmp(segment, e->segment) == 0 &&
					strcasecmp(scheme, e->scheme) == 0 &&
					e->port == port)
				break;

		d = e ? e : urldb_add_path_node(scheme, port, segment,
				NULL, d);
		if (!d)
			break;

		segment = slash + 1;
	} while (1);

	free(buf);

	if (d && !d->url) {
		/* Insert URL */
		d->url = strdup(url);
		if (!d->url)
			return NULL;
		/** remove fragment */
		segment = strrchr(d->url, '#');
		if (segment)
			*segment = '\0';
	}

	return d;
}

/**
 * Fragment comparator callback for qsort
 */
int urldb_add_path_fragment_cmp(const void *a, const void *b)
{
	return strcasecmp(*((const char **) a), *((const char **) b));
}

/**
 * Add a fragment to a path segment
 *
 * \param segment Path segment to add to
 * \param fragment Fragment to add (copied), or NULL
 * \return segment or NULL on memory exhaustion
 */
struct path_data *urldb_add_path_fragment(struct path_data *segment,
		const char *fragment)
{
	char **temp;

	assert(segment);

	/* If no fragment, this function is a NOP
	 * This may seem strange, but it makes the rest
	 * of the code cleaner */
	if (!fragment)
		return segment;

	temp = realloc(segment->fragment,
			(segment->frag_cnt + 1) * sizeof(char *));
	if (!temp)
		return NULL;

	segment->fragment = temp;
	segment->fragment[segment->frag_cnt] = strdup(fragment);
	if (!segment->fragment[segment->frag_cnt]) {
		/* Don't free temp - it's now our buffer */
		return NULL;
	}

	segment->frag_cnt++;

	/* We want fragments in alphabetical order, so sort them
	 * It may prove better to insert in alphabetical order instead */
	qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
			urldb_add_path_fragment_cmp);

	return segment;
}

/**
 * Find an URL in the database
 *
 * \param url Absolute URL to find
 * \return Pointer to path data, or NULL if not found
 */
struct path_data *urldb_find_url(const char *url)
{
	const struct host_part *h;
	struct path_data *p;
	struct search_node *tree;
	char *plq, *copy, *colon;
	const char *host;
	unsigned short port;
	url_func_result ret;
	struct url_components components;
	int len = 0;

	assert(url);

	/* Extract url components */
	ret = url_get_components(url, &components);
	if (ret != URL_FUNC_OK)
		return NULL;

	/* Ensure scheme and authority exist */
	if (!(components.scheme && components.authority)) {
		url_destroy_components(&components);
		return NULL;
	}

	/* Extract host part from authority */
	host = strchr(components.authority, '@');
	if (!host)
		host = components.authority;
	else
		host++;

	/* get port and remove from host */
	colon = strrchr(host, ':');
	if (!colon) {
		port = 0;
	} else {
		*colon = '\0';
		port = atoi(colon + 1);
	}

	/* file urls have no host, so manufacture one */
	if (strcasecmp(components.scheme, "file") == 0)
		host = "localhost";

	tree = urldb_get_search_tree(host);
	h = urldb_search_find(tree, host);
	if (!h) {
		url_destroy_components(&components);
		return NULL;
	}

	/* generate plq */
	if (components.path)
		len += strlen(components.path);
	if (components.query)
		len += strlen(components.query) + 1;

	plq = malloc(len + 1);
	if (!plq) {
		url_destroy_components(&components);
		return NULL;
	}

	copy = plq;
	if (components.path) {
		strcpy(copy, components.path);
		copy += strlen(components.path);
	}
	if (components.query) {
		*copy++ = '?';
		strcpy(copy, components.query);
	}

	p = urldb_match_path(&h->paths, plq, components.scheme, port);

	url_destroy_components(&components);
	free(plq);

	return p;
}

/**
 * Match a path string
 *
 * \param parent Path (sub)tree to look in
 * \param path The path to search for
 * \param scheme The URL scheme associated with the path
 * \param port The port associated with the path
 * \return Pointer to path data or NULL if not found.
 */
struct path_data *urldb_match_path(const struct path_data *parent,
		const char *path, const char *scheme, unsigned short port)
{
	struct path_data *p;
	const char *slash;

	if (*path == '\0')
		return (struct path_data *)parent;

	slash = strchr(path + 1, '/');
	if (!slash)
		slash = path + strlen(path);

	for (p = parent->children; p; p = p->next) {
		if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
				strcmp(p->scheme, scheme) == 0 &&
				p->port == port)
			break;
	}

	if (p) {
		return urldb_match_path(p, slash, scheme, port);
	}

	return NULL;
}

/**
 * Get the search tree for a particular host
 *
 * \param host  the host to lookup
 * \return the corresponding search tree
 */
struct search_node **urldb_get_search_tree_direct(const char *host) {
	assert(host);

	if (url_host_is_ip_address(host))
		return &search_trees[ST_IP];
	else if (isalpha(*host))
		return &search_trees[ST_DN + tolower(*host) - 'a'];
	return &search_trees[ST_EE];
}

/**
 * Get the search tree for a particular host
 *
 * \param host  the host to lookup
 * \return the corresponding search tree
 */
struct search_node *urldb_get_search_tree(const char *host) {
  	return *urldb_get_search_tree_direct(host);
}

/**
 * Dump URL database to stderr
 */
void urldb_dump(void)
{
	int i;

	urldb_dump_hosts(&db_root);

	for (i = 0; i != NUM_SEARCH_TREES; i++)
		urldb_dump_search(search_trees[i], 0);
}

/**
 * Dump URL database hosts to stderr
 *
 * \param parent Parent node of tree to dump
 */
void urldb_dump_hosts(struct host_part *parent)
{
	struct host_part *h;

	if (parent->part) {
		LOG(("%s", parent->part));

		LOG(("\t%s invalid SSL certs",
			parent->permit_invalid_certs ? "Permits" : "Denies"));
	}

	/* Dump path data */
	urldb_dump_paths(&parent->paths);

	/* and recurse */
	for (h = parent->children; h; h = h->next)
		urldb_dump_hosts(h);
}

/**
 * Dump URL database paths to stderr
 *
 * \param parent Parent node of tree to dump
 */
void urldb_dump_paths(struct path_data *parent)
{
	struct path_data *p;
	unsigned int i;

	if (parent->segment) {
		LOG(("\t%s : %u", parent->scheme, parent->port));

		LOG(("\t\t'%s'", parent->segment));

		for (i = 0; i != parent->frag_cnt; i++)
			LOG(("\t\t\t#%s", parent->fragment[i]));
	}

	/* and recurse */
	for (p = parent->children; p; p = p->next)
		urldb_dump_paths(p);
}

/**
 * Dump search tree
 *
 * \param parent Parent node of tree to dump
 * \param depth Tree depth
 */
void urldb_dump_search(struct search_node *parent, int depth)
{
	const struct host_part *h;
	int i;

	if (parent == &empty)
		return;

	urldb_dump_search(parent->left, depth + 1);

	for (i = 0; i != depth; i++)
			fputc(' ', stderr);

	for (h = parent->data; h; h = h->parent) {
		fprintf(stderr, "%s", h->part);
		if (h->parent && h->parent->parent)
			fputc('.', stderr);
	}

	fputc('\n', stderr);

	urldb_dump_search(parent->right, depth + 1);
}

/**
 * Insert a node into the search tree
 *
 * \param root Root of tree to insert into
 * \param data User data to insert
 * \return Pointer to updated root, or NULL if failed
 */
struct search_node *urldb_search_insert(struct search_node *root,
		const struct host_part *data)
{
	struct search_node *n;

	assert(root && data);

	n = malloc(sizeof(struct search_node));
	if (!n)
		return NULL;

	n->level = 1;
	n->data = data;
	n->left = n->right = &empty;

	root = urldb_search_insert_internal(root, n);

	return root;
}

/**
 * Insert node into search tree
 *
 * \param root Root of (sub)tree to insert into
 * \param n Node to insert
 * \return Pointer to updated root
 */
struct search_node *urldb_search_insert_internal(struct search_node *root,
		struct search_node *n)
{
	assert(root && n);

	if (root == &empty) {
		root = n;
	} else {
		int c = urldb_search_match_host(root->data, n->data);

		if (c > 0) {
			root->left = urldb_search_insert_internal(
					root->left, n);
		} else if (c < 0) {
			root->right = urldb_search_insert_internal(
					root->right, n);
		} else {
			/* exact match */
			free(n);
			return root;
		}

		root = urldb_search_skew(root);
		root = urldb_search_split(root);
	}

	return root;
}

/**
 * Delete a node from a search tree
 *
 * \param root Tree to remove from
 * \param data Data to delete
 * \return Updated root of tree
 */
struct search_node *urldb_search_remove(struct search_node *root,
		const struct host_part *data)
{
	static struct search_node *last, *deleted;
	int c;

	assert(root && data);

	if (root == &empty)
		return root;

	c = urldb_search_match_host(root->data, data);

	last = root;
	if (c > 0) {
		root->left = urldb_search_remove(root->left, data);
	} else {
		deleted = root;
		root->right = urldb_search_remove(root->right, data);
	}

	if (root == last) {
		if (deleted != &empty &&
				urldb_search_match_host(deleted->data,
						data) == 0) {
			deleted->data = last->data;
			deleted = &empty;
			root = root->right;
			free(last);
		}
	} else {
		if (root->left->level < root->level - 1 ||
				root->right->level < root->level - 1) {
			if (root->right->level > --root->level)
				root->right->level = root->level;

			root = urldb_search_skew(root);
			root->right = urldb_search_skew(root->right);
			root->right->right =
				urldb_search_skew(root->right->right);
			root = urldb_search_split(root);
			root->right = urldb_search_split(root->right);
		}
	}

	return root;
}

/**
 * Find a node in a search tree
 *
 * \param root Tree to look in
 * \param host Host to find
 * \return Pointer to host tree node, or NULL if not found
 */
const struct host_part *urldb_search_find(struct search_node *root,
		const char *host)
{
	int c;

	assert(root && host);

	if (root == &empty) {
		return NULL;
	}

	c = urldb_search_match_string(root->data, host);

	if (c > 0)
		return urldb_search_find(root->left, host);
	else if (c < 0)
		return urldb_search_find(root->right, host);
	else
		return root->data;
}

/**
 * Compare a pair of host_parts
 *
 * \param a
 * \param b
 * \return 0 if match, non-zero, otherwise
 */
int urldb_search_match_host(const struct host_part *a,
		const struct host_part *b)
{
	int ret;

	assert(a && b);

	/* traverse up tree to root, comparing parts as we go. */
	for (; a && a != &db_root && b && b != &db_root;
			a = a->parent, b = b->parent)
		if ((ret = strcasecmp(a->part, b->part)) != 0)
			/* They differ => return the difference here */
			return ret;

	/* If we get here then either:
	 *    a) The path lengths differ
	 * or b) The hosts are identical
	 */
	if (a && a != &db_root && (!b || b == &db_root))
		/* len(a) > len(b) */
		return 1;
	else if ((!a || a == &db_root) && b && b != &db_root)
		/* len(a) < len(b) */
		return -1;

	/* identical */
	return 0;
}

/**
 * Compare host_part with a string
 *
 * \param a
 * \param b
 * \return 0 if match, non-zero, otherwise
 */
int urldb_search_match_string(const struct host_part *a,
		const char *b)
{
	const char *end, *dot;
	int plen, ret;

	assert(a && a != &db_root && b);

	if (url_host_is_ip_address(b)) {
		/* IP address */
		return strcasecmp(a->part, b);
	}

	end = b + strlen(b) + 1;

	while (b < end && a && a != &db_root) {
		dot = strchr(b, '.');
		if (!dot) {
			/* last segment */
			dot = end - 1;
		}

		/* Compare strings (length limited) */
		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
			/* didn't match => return difference */
			return ret;

		/* The strings matched, now check that the lengths do, too */
		plen = strlen(a->part);

		if (plen > dot - b)
			/* len(a) > len(b) */
			return 1;
		else if (plen < dot - b)
			/* len(a) < len(b) */
			return -1;

		b = dot + 1;
		a = a->parent;
	}

	/* If we get here then either:
	 *    a) The path lengths differ
	 * or b) The hosts are identical
	 */
	if (a && a != &db_root && b >= end)
		/* len(a) > len(b) */
		return 1;
	else if ((!a || a == &db_root) && b < end)
		/* len(a) < len(b) */
		return -1;

	/* Identical */
	return 0;
}

/**
 * Compare host_part with prefix
 *
 * \param a
 * \param b
 * \return 0 if match, non-zero, otherwise
 */
int urldb_search_match_prefix(const struct host_part *a,
		const char *b)
{
	const char *end, *dot;
	int plen, ret;

	assert(a && a != &db_root && b);

	if (url_host_is_ip_address(b)) {
		/* IP address */
		return strncasecmp(a->part, b, strlen(b));
	}

	end = b + strlen(b) + 1;

	while (b < end && a && a != &db_root) {
		dot = strchr(b, '.');
		if (!dot) {
			/* last segment */
			dot = end - 1;
		}

		/* Compare strings (length limited) */
		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
			/* didn't match => return difference */
			return ret;

		/* The strings matched */
		if (dot < end - 1) {
			/* Consider segment lengths only in the case
			 * where the prefix contains segments */
			plen = strlen(a->part);
			if (plen > dot - b)
				/* len(a) > len(b) */
				return 1;
			else if (plen < dot - b)
				/* len(a) < len(b) */
				return -1;
		}

		b = dot + 1;
		a = a->parent;
	}

	/* If we get here then either:
	 *    a) The path lengths differ
	 * or b) The hosts are identical
	 */
	if (a && a != &db_root && b >= end)
		/* len(a) > len(b) => prefix matches */
		return 0;
	else if ((!a || a == &db_root) && b < end)
		/* len(a) < len(b) => prefix does not match */
		return -1;

	/* Identical */
	return 0;
}

/**
 * Rotate a subtree right
 *
 * \param root Root of subtree to rotate
 * \return new root of subtree
 */
struct search_node *urldb_search_skew(struct search_node *root)
{
	struct search_node *temp;

	assert(root);

	if (root->left->level == root->level) {
		temp = root->left;
		root->left = temp->right;
		temp->right = root;
		root = temp;
	}

	return root;
}

/**
 * Rotate a node left, increasing the parent's level
 *
 * \param root Root of subtree to rotate
 * \return New root of subtree
 */
struct search_node *urldb_search_split(struct search_node *root)
{
	struct search_node *temp;

	assert(root);

	if (root->right->right->level == root->level) {
		temp = root->right;
		root->right = temp->left;
		temp->left = root;
		root = temp;

		root->level++;
	}

	return root;
}

/**
 * Retrieve cookies for an URL
 *
 * \param url URL being fetched
 * \return Cookies string for libcurl (on heap), or NULL on error/no cookies
 */
char *urldb_get_cookie(const char *url)
{
	const struct path_data *p, *q;
	const struct host_part *h;
	struct cookie_internal_data *c;
	int count = 0, version = COOKIE_RFC2965;
	int ret_alloc = 4096, ret_used = 1;
	char *path;
	char *ret;
	char *scheme;
	time_t now;
	url_func_result res;

	assert(url);

//	LOG(("%s", url));

	urldb_add_url(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	scheme = p->scheme;

	ret = malloc(ret_alloc);
	if (!ret)
		return NULL;

	ret[0] = '\0';

	res = url_path(url, &path);
	if (res != URL_FUNC_OK) {
		free(ret);
		return NULL;
	}

	now = time(NULL);

	if (*(p->segment) != '\0') {
		/* Match exact path, unless directory, when prefix matching
		 * will handle this case for us. */
		for (q = p->parent->children; q; q = q->next) {
			if (strcmp(q->segment, p->segment))
				continue;

			/* Consider all cookies associated with
			 * this exact path */
			for (c = q->cookies; c; c = c->next) {
				if (c->expires != 1 && c->expires < now)
					/* cookie has expired => ignore */
					continue;

				if (c->secure && strcasecmp(
						q->scheme, "https"))
					/* secure cookie for insecure host.
					 * ignore */
					continue;

				if (!urldb_concat_cookie(c, &ret_used,
						&ret_alloc, &ret)) {
					free(path);
					free(ret);
					return NULL;
				}

				if (c->version < (unsigned int)version)
					version = c->version;

				c->last_used = now;
				cookies_update(c->domain, (struct cookie_data *)c);

				count++;
			}
		}
	}

//	LOG(("%s", ret));

	/* Now consider cookies whose paths prefix-match ours */
	for (p = p->parent; p; p = p->parent) {
		/* Find directory's path entry(ies) */
		/* There are potentially multiple due to differing schemes */
		for (q = p->children; q; q = q->next) {
			if (*(q->segment) != '\0')
				continue;

			for (c = q->cookies; c; c = c->next) {
//				LOG(("%p: %s=%s", c, c->name, c->value));
				if (c->expires != 1 && c->expires < now)
					/* cookie has expired => ignore */
					continue;

				if (c->secure && strcasecmp(
						q->scheme, "https"))
					/* Secure cookie for insecure server
					 * => ignore */
					continue;

				if (!urldb_concat_cookie(c, &ret_used,
						&ret_alloc, &ret)) {
					free(path);
					free(ret);
					return NULL;
				}

				if (c->version < (unsigned int) version)
					version = c->version;

				c->last_used = now;
				cookies_update(c->domain,
						(struct cookie_data *)c);
				count++;
			}
		}

		if (!p->parent) {
			/* No parent, so bail here. This can't go in
			 * the loop exit condition as we also want to
			 * process the top-level node.
                         *
                         * If p->parent is NULL then p->cookies are
                         * the domain cookies and thus we don't even
                         * try matching against them.
                         */
			break;
		}

		/* Consider p itself - may be the result of Path=/foo */
		for (c = p->cookies; c; c = c->next) {
			if (c->expires != 1 && c->expires < now)
				/* cookie has expired => ignore */
				continue;

			/* Ensure cookie path is a prefix of the resource */
			if (strncmp(c->path, path, strlen(c->path)) != 0)
				/* paths don't match => ignore */
				continue;

			if (c->secure && strcasecmp(p->scheme, "https"))
				/* Secure cookie for insecure server
				 * => ignore */
				continue;

			if (!urldb_concat_cookie(c, &ret_used,
					&ret_alloc, &ret)) {
				free(path);
				free(ret);
				return NULL;
			}

			if (c->version < (unsigned int) version)
				version = c->version;

			c->last_used = now;
			cookies_update(c->domain, (struct cookie_data *)c);
			count++;
		}

	}

//	LOG(("%s", ret));

	/* Finally consider domain cookies for hosts which domain match ours */
	for (h = (const struct host_part *)p; h && h != &db_root;
			h = h->parent) {
		for (c = h->paths.cookies; c; c = c->next) {
			if (c->expires != 1 && c->expires < now)
				/* cookie has expired => ignore */
				continue;

			/* Ensure cookie path is a prefix of the resource */
			if (strncmp(c->path, path, strlen(c->path)) != 0)
				/* paths don't match => ignore */
				continue;

			if (c->secure && strcasecmp(scheme, "https"))
				/* secure cookie for insecure host. ignore */
				continue;

			if (!urldb_concat_cookie(c, &ret_used, &ret_alloc,
					&ret)) {
				free(path);
				free(ret);
				return NULL;
			}

			if (c->version < (unsigned int)version)
				version = c->version;

			c->last_used = now;
			cookies_update(c->domain, (struct cookie_data *)c);

			count++;
		}
	}

//	LOG(("%s", ret));

	if (count == 0) {
		/* No cookies found */
		free(path);
		free(ret);
		return NULL;
	}

	/* and build output string */
	{
		char *temp;
		if (version > 0)
			temp = malloc(12 + ret_used);
		else
			temp = malloc(ret_used);
		if (!temp) {
			free(path);
			free(ret);
			return NULL;
		}

		if (version > 0)
			sprintf(temp, "$Version=%d%s", version, ret);
		else {
			/* Old-style cookies => no version & skip "; " */
			sprintf(temp, "%s", ret + 2);
		}

		free(path);
		free(ret);
		ret = temp;
	}

	return ret;
}

/**
 * Parse Set-Cookie header and insert cookie(s) into database
 *
 * \param header Header to parse, with Set-Cookie: stripped
 * \param url URL being fetched
 * \param referer Referring resource, or 0 for verifiable transaction
 * \return true on success, false otherwise
 */
bool urldb_set_cookie(const char *header, const char *url,
		const char *referer)
{
	const char *cur = header, *end;
	char *path, *host, *scheme, *urlt;
	url_func_result res;

	assert(url && header);

//	LOG(("'%s' : '%s'", url, header));

	/* strip fragment */
	urlt = strdup(url);
	if (!urlt)
		return false;

	scheme = strchr(urlt, '#');
	if (scheme)
		*scheme = '\0';

	res = url_scheme(url, &scheme);
	if (res != URL_FUNC_OK) {
		free(urlt);
		return false;
	}

	res = url_path(url, &path);
	if (res != URL_FUNC_OK) {
		free(scheme);
		free(urlt);
		return false;
	}

	res = url_host(url, &host);
	if (res != URL_FUNC_OK) {
		free(path);
		free(scheme);
		free(urlt);
		return false;
	}

	if (referer) {
		char *rhost;

		/* Ensure that url's host name domain matches
		 * referer's (4.3.5) */
		res = url_host(referer, &rhost);
		if (res != URL_FUNC_OK) {
			goto error;
		}

		/* Domain match host names */
		if (strcasecmp(host, rhost) != 0) {
			/* Not exact match, so try the following:
			 *
			 * 1) host = A.B; rhost = B (i.e. strip first
			 *    segment from host and compare against rhost)
			 * 2) host = A.B; rhost = C.B (i.e. strip first
			 *    segment off both hosts and compare) */
			const char *dot = strchr(host, '.');
			const char *rdot = strchr(rhost, '.');

			if (!dot || !rdot) {
				free(rhost);
				goto error;
			}

			/* 1 */
			if (strcasecmp(dot + 1, rhost) != 0) {
				/* B must contain embedded dots */
				if (strchr(rdot + 1, '.') == NULL) {
					free(rhost);
					goto error;
				}

				/* 2 */
				if (strcasecmp(dot, rdot) != 0) {
					free(rhost);
					goto error;
				}
			}
		}

		free(rhost);
	}

	end = cur + strlen(cur) - 2 /* Trailing CRLF */;

	do {
		struct cookie_internal_data *c;
		char *dot;

		c = urldb_parse_cookie(url, &cur);
		if (!c) {
			/* failed => stop parsing */
			goto error;
		}

		/* validate cookie */

		/* 4.2.2:i Cookie must have NAME and VALUE */
		if (!c->name || !c->value) {
			urldb_free_cookie(c);
			goto error;
		}

		/* 4.3.2:i Cookie path must be a prefix of URL path */
		if (strncmp(c->path, path, strlen(c->path)) != 0 ||
				strlen(c->path) > strlen(path)) {
			urldb_free_cookie(c);
			goto error;
		}

		/* 4.3.2:ii Cookie domain must contain embedded dots */
		dot = strchr(c->domain + 1, '.');
		if (!dot || *(dot + 1) == '\0') {
			/* no embedded dots */
			urldb_free_cookie(c);
			goto error;
		}

		/* Domain match fetch host with cookie domain */
		if (strcasecmp(host, c->domain) != 0) {
			int hlen, dlen;
			char *domain = c->domain;

			/* 4.3.2:iii */
			if (url_host_is_ip_address(host)) {
				/* IP address, so no partial match */
				urldb_free_cookie(c);
				goto error;
			}

			hlen = strlen(host);
			dlen = strlen(c->domain);

			if (hlen <= dlen && hlen != dlen - 1) {
				/* Partial match not possible */
				urldb_free_cookie(c);
				goto error;
			}

			if (hlen == dlen - 1) {
				/* Relax matching to allow
				 * host a.com to match .a.com */
				domain++;
				dlen--;
			}

			if (strcasecmp(host + (hlen - dlen), domain)) {
				urldb_free_cookie(c);
				goto error;
			}

			/* 4.3.2:iv Ensure H contains no dots */
			for (int i = 0; i < (hlen - dlen); i++)
				if (host[i] == '.') {
					urldb_free_cookie(c);
					goto error;
				}
		}

		/* Now insert into database */
		if (!urldb_insert_cookie(c, scheme, urlt))
			goto error;
		cookies_update(c->domain, (struct cookie_data *)c);
	} while (cur < end);

	free(host);
	free(path);
	free(scheme);
	free(urlt);

	return true;

error:
	free(host);
	free(path);
	free(scheme);
	free(urlt);

	return false;
}

/**
 * Parse a cookie
 *
 * \param url URL being fetched
 * \param cookie Pointer to cookie string (updated on exit)
 * \return Pointer to cookie structure (on heap, caller frees) or NULL
 */
struct cookie_internal_data *urldb_parse_cookie(const char *url,
		const char **cookie)
{
	struct cookie_internal_data *c;
	const char *cur;
	char name[1024], value[4096];
	char *n = name, *v = value;
	bool had_equals = false;
	bool quoted = false;
	url_func_result res;

	assert(url && cookie && *cookie);

	c = calloc(1, sizeof(struct cookie_internal_data));
	if (!c)
		return NULL;

	c->expires = -1;

	name[0] = '\0';
	value[0] = '\0';

	for (cur = *cookie; *cur && *cur != '\r' && *cur != '\n'; cur++) {
		if (had_equals && (*cur == '"' || *cur == '\'')) {
			/* Only values may be quoted */
			quoted = !quoted;
			continue;
		}

		if (!quoted && !had_equals && *cur == '=') {
			/* First equals => attr-value separator */
			had_equals = true;
			continue;
		}

		if (!quoted && *cur == ';') {
			/* Semicolon => end of current avpair */

			/* NUL-terminate tokens */
			*n = '\0';
			*v = '\0';

			if (!urldb_parse_avpair(c, name, value)) {
				/* Memory exhausted */
				urldb_free_cookie(c);
				return NULL;
			}

			/* And reset to start */
			n = name;
			v = value;
			had_equals = false;
			continue;
		}

		/* And now handle commas. These are a pain as they may mean
		 * any of the following:
		 *
		 * + End of cookie
		 * + Day separator in Expires avpair
		 * + (Invalid) comma in unquoted value
		 *
		 * Therefore, in order to handle all 3 cases (2 and 3 are
		 * identical, the difference being that 2 is in the spec and
		 * 3 isn't), we need to determine where the comma actually
		 * lies. We use the following heuristic:
		 *
		 *   Given a comma at the current input position, find the
		 *   immediately following semicolon (or end of input if none
		 *   found). Then, consider the input characters between
		 *   these two positions. If any of these characters is an
		 *  '=', we must assume that the comma signified the end of
		 *  the current cookie.
		 *
		 * This holds as the first avpair of any cookie must be
		 * NAME=VALUE, so the '=' is guaranteed to appear in the
		 * case where the comma marks the end of a cookie.
		 *
		 * This will fail, however, in the case where '=' appears in
		 * the value of the current avpair after the comma or the
		 * subsequent cookie does not start with NAME=VALUE. Neither
		 * of these is particularly likely and if they do occur, the
		 * website is more broken than we can be bothered to handle.
		 */
		if (!quoted && *cur == ',') {
			/* Find semi-colon, if any */
			const char *p;
			const char *semi = strchr(cur + 1, ';');
			if (!semi)
				semi = cur + strlen(cur) - 2 /* CRLF */;

			/* Look for equals sign between comma and semi */
			for (p = cur + 1; p < semi; p++)
				if (*p == '=')
					break;

			if (p == semi) {
				/* none found => comma internal to value */
				/* do nothing */
			} else {
				/* found one => comma marks end of cookie */
				cur++;
				break;
			}
		}

		/* Accumulate into buffers, always leaving space for a NUL */
		if (!had_equals) {
			if (n < name + 1023)
				*n++ = *cur;
		} else {
			if (v < value + 4095)
				*v++ = *cur;
		}
	}

	/* Parse final avpair */
	*n = '\0';
	*v = '\0';

	if (!urldb_parse_avpair(c, name, value)) {
		/* Memory exhausted */
		urldb_free_cookie(c);
		return NULL;
	}

	/* Now fix-up default values */
	if (!c->domain) {
		res = url_host(url, &c->domain);
		if (res != URL_FUNC_OK) {
			urldb_free_cookie(c);
			return NULL;
		}
	}

	if (!c->path) {
		res = url_path(url, &c->path);
		if (res != URL_FUNC_OK) {
			urldb_free_cookie(c);
			return NULL;
		}
	}

	if (c->expires == -1)
		c->expires = 1;

	/* Write back current position */
	*cookie = cur;

	return c;
}

/**
 * Parse a cookie avpair
 *
 * \param c Cookie struct to populate
 * \param n Name component
 * \param v Value component
 * \return true on success, false on memory exhaustion
 */
bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v)
{
	int vlen;

	assert(c && n && v);

	/* Strip whitespace from start of name */
	for (; *n; n++) {
		if (*n != ' ' && *n != '\t')
			break;
	}

	/* Strip whitespace from end of name */
	for (vlen = strlen(n); vlen; vlen--) {
		if (n[vlen] == ' ' || n[vlen] == '\t')
			n[vlen] = '\0';
		else
			break;
	}

	/* Strip whitespace from start of value */
	for (; *v; v++) {
		if (*v != ' ' && *v != '\t')
			break;
	}

	/* Strip whitespace from end of value */
	for (vlen = strlen(v); vlen; vlen--) {
		if (v[vlen] == ' ' || v[vlen] == '\t')
			v[vlen] = '\0';
		else
			break;
	}

	if (!c->comment && strcasecmp(n, "Comment") == 0) {
		c->comment = strdup(v);
		if (!c->comment)
			return false;
	} else if (!c->domain && strcasecmp(n, "Domain") == 0) {
		if (v[0] == '.') {
			/* Domain must start with a dot */
			c->domain_from_set = true;
			c->domain = strdup(v);
			if (!c->domain)
				return false;
		}
	} else if (strcasecmp(n, "Max-Age") == 0) {
		int temp = atoi(v);
		if (temp == 0)
			/* Special case - 0 means delete */
			c->expires = 0;
		else
			c->expires = time(NULL) + temp;
	} else if (!c->path && strcasecmp(n, "Path") == 0) {
		c->path_from_set = true;
		c->path = strdup(v);
		if (!c->path)
			return false;
	} else if (strcasecmp(n, "Version") == 0) {
		c->version = atoi(v);
	} else if (strcasecmp(n, "Expires") == 0) {
		char *datenoday;
		time_t expires;

		/* Strip dayname from date (these are hugely
		 * variable and liable to break the parser.
		 * They also serve no useful purpose) */
		for (datenoday = v; *datenoday && !isdigit(*datenoday);
				datenoday++)
			; /* do nothing */

		expires = curl_getdate(datenoday, NULL);
		if (expires == -1) {
			/* assume we have an unrepresentable
			 * date => force it to the maximum
			 * possible value of a 32bit time_t
			 * (this may break in 2038. We'll
			 * deal with that once we come to
			 * it) */
			expires = (time_t)0x7fffffff;
		}
		c->expires = expires;
	} else if (strcasecmp(n, "Secure") == 0) {
		c->secure = true;
	} else if (!c->name) {
		c->name = strdup(n);
		c->value = strdup(v);
		if (!c->name || !c->value)
			return false;
	}

	return true;
}

/**
 * Insert a cookie into the database
 *
 * \param c The cookie to insert
 * \param scheme URL scheme associated with cookie path
 * \param url URL (sans fragment) associated with cookie
 * \return true on success, false on memory exhaustion (c will be freed)
 */
bool urldb_insert_cookie(struct cookie_internal_data *c, const char *scheme,
		const char *url)
{
	struct cookie_internal_data *d;
	const struct host_part *h;
	struct path_data *p;

	assert(c && scheme && url);

	if (c->domain[0] == '.') {
		h = urldb_search_find(
			urldb_get_search_tree(&(c->domain[1])),
			c->domain + 1);
		if (!h) {
			h = urldb_add_host(c->domain + 1);
			if (!h) {
				urldb_free_cookie(c);
				return false;
			}
		}

		p = &h->paths;
	} else {
		h = urldb_search_find(
				urldb_get_search_tree(c->domain),
				c->domain);

		if (!h) {
			h = urldb_add_host(c->domain);
			if (!h) {
				urldb_free_cookie(c);
				return false;
			}
		}

		/* find path */
		p = urldb_add_path(scheme, 0, h,
				c->path, NULL, NULL, url);
		if (!p) {
			urldb_free_cookie(c);
			return false;
		}
	}

	/* add cookie */
	for (d = p->cookies; d; d = d->next) {
		if (!strcmp(d->domain, c->domain) &&
				!strcmp(d->path, c->path) &&
				!strcmp(d->name, c->name))
			break;
	}

	if (d) {
		if (c->expires == 0) {
			/* remove cookie */
			if (d->next)
				d->next->prev = d->prev;
			if (d->prev)
				d->prev->next = d->next;
			else
				p->cookies = d->next;
			urldb_free_cookie(d);
			urldb_free_cookie(c);
		} else {
			/* replace d with c */
			c->prev = d->prev;
			c->next = d->next;
			if (c->next)
				c->next->prev = c;
			if (c->prev)
				c->prev->next = c;
			else
				p->cookies = c;
			urldb_free_cookie(d);
//			LOG(("%p: %s=%s", c, c->name, c->value));
		}
	} else {
		c->prev = NULL;
		c->next = p->cookies;
		if (p->cookies)
			p->cookies->prev = c;
		p->cookies = c;
//		LOG(("%p: %s=%s", c, c->name, c->value));
	}

	return true;
}

/**
 * Free a cookie
 *
 * \param c The cookie to free
 */
void urldb_free_cookie(struct cookie_internal_data *c)
{
	assert(c);

	free(c->comment);
	free(c->domain);
	free(c->path);
	free(c->name);
	free(c->value);
	free(c);
}

/**
 * Concatenate a cookie into the provided buffer
 *
 * \param c Cookie to concatenate
 * \param used Pointer to amount of buffer used (updated)
 * \param alloc Pointer to allocated size of buffer (updated)
 * \param buf Pointer to Pointer to buffer (updated)
 * \return true on success, false on memory exhaustion
 */
bool urldb_concat_cookie(struct cookie_internal_data *c, int *used,
		int *alloc, char **buf)
{
	int clen;

	assert(c && used && alloc && buf && *buf);

	clen = 2 + strlen(c->name) + 1 + strlen(c->value) +
			(c->path_from_set ?
				8 + strlen(c->path) : 0) +
			(c->domain_from_set ?
				10 + strlen(c->domain) : 0);

	if (*used + clen >= *alloc) {
		char *temp = realloc(*buf, *alloc + 4096);
		if (!temp) {
			return false;
		}
		*buf = temp;
		*alloc += 4096;
	}

	/** \todo Quote value strings iff version > 0 */
	sprintf(*buf + *used - 1, "; %s=%s%s%s%s%s",
			c->name, c->value,
			(c->path_from_set ? "; $Path=" : "" ),
			(c->path_from_set ? c->path : "" ),
//			(c->path_from_set ? "\"" : ""),
			(c->domain_from_set ? "; $Domain=" : ""),
			(c->domain_from_set ? c->domain : "")
//			,(c->domain_from_set ? "\"" : "")
		);
	*used += clen;

	return true;
}

/**
 * Load a cookie file into the database
 *
 * \param filename File to load
 */
void urldb_load_cookies(const char *filename)
{
	FILE *fp;
	char s[16*1024];
	int file_version = 0;

	assert(filename);

	fp = fopen(filename, "r");
	if (!fp)
		return;

#define FIND_T {							\
		for (; *p && *p != '\t'; p++)				\
			; /* do nothing */				\
		if (p >= end) {						\
			LOG(("Overran input"));				\
			continue;					\
		}							\
		*p++ = '\0';						\
}

#define SKIP_T {							\
		for (; *p && *p == '\t'; p++)				\
			; /* do nothing */				\
		if (p >= end) {						\
			LOG(("Overran input"));				\
			continue;					\
		}							\
}

	while (fgets(s, sizeof s, fp)) {
		char *p = s, *end = 0,
			*domain, *path, *name, *value, *scheme, *url,
			*comment;
		int version, domain_specified, path_specified,
			secure, no_destroy;
		time_t expires, last_used;

		if(s[0] == 0 || s[0] == '#')
			/* Skip blank lines or comments */
			continue;

		s[strlen(s) - 1] = '\0'; /* lose terminating newline */
		end = s + strlen(s);

		/* Look for file version first
		 * (all input is ignored until this is read)
		 */
		if (strncasecmp(s, "Version:", 8) == 0) {
			FIND_T; SKIP_T; file_version = atoi(p);

			if (file_version != COOKIE_FILE_VERSION) {
				LOG(("Unknown Cookie file version"));
				break;
			}

			continue;
		} else if (file_version == 0) {
			/* Haven't yet seen version; skip this input */
			continue;
		}

		/* One cookie/line */

		/* Parse input */
		FIND_T; version = atoi(s);
		SKIP_T; domain = p; FIND_T;
		SKIP_T; domain_specified = atoi(p); FIND_T;
		SKIP_T; path = p; FIND_T;
		SKIP_T; path_specified = atoi(p); FIND_T;
		SKIP_T; secure = atoi(p); FIND_T;
		SKIP_T; expires = (time_t)atoi(p); FIND_T;
		SKIP_T; last_used = (time_t)atoi(p); FIND_T;
		SKIP_T; no_destroy = atoi(p); FIND_T;
		SKIP_T; name = p; FIND_T;
		SKIP_T; value = p; FIND_T;
		SKIP_T; scheme = p; FIND_T;
		SKIP_T; url = p; FIND_T;

		/* Comment may have no content, so don't
		 * use macros as they'll break */
		for (; *p && *p == '\t'; p++)
			; /* do nothing */
		comment = p;

		assert(p <= end);

		/* Now create cookie */
		struct cookie_internal_data *c =
				malloc(sizeof(struct cookie_internal_data));
		if (!c)
			break;

		c->name = strdup(name);
		c->value = strdup(value);
		c->comment = strdup(comment);
		c->domain_from_set = domain_specified;
		c->domain = strdup(domain);
		c->path_from_set = path_specified;
		c->path = strdup(path);
		c->expires = expires;
		c->last_used = last_used;
		c->secure = secure;
		c->version = version;
		c->no_destroy = no_destroy;

		if (!(c->name && c->value && c->comment &&
				c->domain && c->path)) {
			urldb_free_cookie(c);
			break;
		}

		/* And insert it into database */
		if (!urldb_insert_cookie(c, scheme, url)) {
			/* Cookie freed for us */
			break;
		}
	}

#undef SKIP_T
#undef FIND_T

	fclose(fp);
}

/**
 * Delete a cookie
 *
 * \param domain The cookie's domain
 * \param path The cookie's path
 * \param name The cookie's name
 */
void urldb_delete_cookie(const char *domain, const char *path,
		const char *name)
{
	urldb_delete_cookie_hosts(domain, path, name, &db_root);
}

void urldb_delete_cookie_hosts(const char *domain, const char *path,
		const char *name, struct host_part *parent)
{
	assert(parent);

	urldb_delete_cookie_paths(domain, path, name, &parent->paths);

	for (struct host_part *h = parent->children; h; h = h->next)
		urldb_delete_cookie_hosts(domain, path, name, h);
}

void urldb_delete_cookie_paths(const char *domain, const char *path,
		const char *name, struct path_data *parent)
{
	struct cookie_internal_data *c;

	assert(parent);

	for (c = parent->cookies; c; c = c->next) {
		if (!strcmp(c->domain, domain) && !strcmp(c->path, path) &&
				!strcmp(c->name, name)) {
			if (c->prev)
				c->prev->next = c->next;
			else
			  	parent->cookies = c->next;
			if (c->next)
				c->next->prev = c->prev;
			if (!parent->cookies)
				cookies_update(domain, NULL);
			urldb_free_cookie(c);
			return;
		}
	}

	for (struct path_data *p = parent->children; p; p = p->next)
		urldb_delete_cookie_paths(domain, path, name, p);
}

/**
 * Save persistent cookies to file
 *
 * \param filename Path to save to
 */
void urldb_save_cookies(const char *filename)
{
	FILE *fp;

	assert(filename);

	fp = fopen(filename, "w");
	if (!fp)
		return;

	fprintf(fp, "# >%s\n", filename);
	fprintf(fp, "# NetSurf cookies file.\n"
		    "#\n"
		    "# Lines starting with a '#' are comments, "
						"blank lines are ignored.\n"
		    "#\n"
		    "# All lines prior to \"Version:\t%d\" are discarded.\n"
		    "#\n"
		    "# Version\tDomain\tDomain from Set-Cookie\tPath\t"
			"Path from Set-Cookie\tSecure\tExpires\tLast used\t"
			"No destroy\tName\tValue\tScheme\tURL\tComment\n",
			COOKIE_FILE_VERSION);
	fprintf(fp, "Version:\t%d\n", COOKIE_FILE_VERSION);


	urldb_save_cookie_hosts(fp, &db_root);

	fclose(fp);
}

/**
 * Save a host subtree's cookies
 *
 * \param fp File pointer to write to
 * \param parent Parent host
 */
void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
{
	assert(fp && parent);

	urldb_save_cookie_paths(fp, &parent->paths);

	for (struct host_part *h = parent->children; h; h = h->next)
		urldb_save_cookie_hosts(fp, h);
}

/**
 * Save a path subtree's cookies
 *
 * \param fp File pointer to write to
 * \param parent Parent path
 */
void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
{
	time_t now = time(NULL);

	assert(fp && parent);

	if (parent->cookies) {
		for (struct cookie_internal_data *c = parent->cookies; c;
				c = c->next) {
			if (c->expires < now)
				/* Skip expired cookies */
				continue;

			fprintf(fp, "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t"
					"%s\t%s\t%s\t%s\t%s\n",
					c->version, c->domain,
					c->domain_from_set, c->path,
					c->path_from_set, c->secure,
					(int)c->expires, (int)c->last_used,
					c->no_destroy, c->name, c->value,
					parent->scheme  ? parent->scheme
							: "unused",
					parent->url ? parent->url : "unused",
					c->comment ? c->comment : "");
		}
	}

	for (struct path_data *p = parent->children; p; p = p->next)
		urldb_save_cookie_paths(fp, p);
}


/**
 * Sets the content data associated with a particular URL
 *
 * \param url the URL to associate content with
 * \param content the content to associate
 * \return true on success, false otherwise
 */
bool urldb_set_cache_data(const char *url, const struct content *content) {
	struct path_data *p;
	const char *filename;

	assert(url && content);

	p = urldb_find_url(url);
	if (!p)
		return false;

	/* new filename needed */
	if (p->cache.filename[0] == 0) {
		filename = filename_request();
		if (!filename)
			return false;
		sprintf(p->cache.filename, filename);
	}

	/* todo: save content, set cache data etc */
	return true;
}


/**
 * Gets a file:// URL for the cached data associated with a URL
 *
 * \param url the URL to get the associated content for
 * \return a local URL allocated on heap, or NULL
 */
char *urldb_get_cache_data(const char *url) {
	struct path_data *p;

	assert(url);

	p = urldb_find_url(url);
	if (!p)
		return NULL;

	/* no file cache */
	if (p->cache.filename[0] == 0)
		return NULL;

	/* todo: handle cache expiry etc */
	return filename_as_url(p->cache.filename);
}

/**
 * Destroy urldb
 */
void urldb_destroy(void)
{
	struct host_part *a, *b;

	/* Clean up search trees */
	for (int i = 0; i < NUM_SEARCH_TREES; i++) {
		if (search_trees[i] != &empty)
			urldb_destroy_search_tree(search_trees[i]);
	}

	/* And database */
	for (a = db_root.children; a; a = b) {
		b = a->next;
		urldb_destroy_host_tree(a);
	}
}

/**
 * Destroy a host tree
 *
 * \param root Root node of tree to destroy
 */
void urldb_destroy_host_tree(struct host_part *root)
{
	struct host_part *a, *b;
	struct path_data *p, *q;

	/* Destroy children */
	for (a = root->children; a; a = b) {
		b = a->next;
		urldb_destroy_host_tree(a);
	}

	/* Now clean up paths */
	for (p = root->paths.children; p; p = q) {
		q = p->next;
		urldb_destroy_path_tree(p);
	}

	/* Root path */
	urldb_destroy_path_node_content(&root->paths);

	/* And ourselves */
	free(root->part);
	free(root);
}

/**
 * Destroy a path tree
 *
 * \param root Root node of tree to destroy
 */
void urldb_destroy_path_tree(struct path_data *root)
{
	struct path_data *p, *q;

	/* Destroy children */
	for (p = root->children; p; p = q) {
		q = p->next;
		urldb_destroy_path_tree(p);
	}

	/* And ourselves */
	urldb_destroy_path_node_content(root);
	free(root);
}

/**
 * Destroy the contents of a path node
 *
 * \param node Node to destroy contents of (does not destroy node)
 */
void urldb_destroy_path_node_content(struct path_data *node)
{
	struct cookie_internal_data *a, *b;

	free(node->url);
	free(node->scheme);
	free(node->segment);
	for (unsigned int i = 0; i < node->frag_cnt; i++)
		free(node->fragment[i]);
	free(node->fragment);

	if (node->thumb)
		bitmap_destroy(node->thumb);

	free(node->urld.title);
	free(node->auth.realm);
	free(node->auth.auth);

	for (a = node->cookies; a; a = b) {
		b = a->next;
		urldb_destroy_cookie(a);
	}
}

/**
 * Destroy a cookie node
 *
 * \param c Cookie to destroy
 */
void urldb_destroy_cookie(struct cookie_internal_data *c)
{
	free(c->name);
	free(c->value);
	free(c->comment);
	free(c->domain);
	free(c->path);

	free(c);
}

/**
 * Destroy a search tree
 *
 * \param root Root node of tree to destroy
 */
void urldb_destroy_search_tree(struct search_node *root)
{
	/* Destroy children */
	if (root->left != &empty)
		urldb_destroy_search_tree(root->left);
	if (root->right != &empty)
		urldb_destroy_search_tree(root->right);

	/* And destroy ourselves */
	free(root);
}


#ifdef TEST_URLDB
int option_expire_url = 0;

bool cookies_update(const char *domain, const struct cookie_data *data)
{
	return true;
}

void die(const char *error)
{
	printf("die: %s\n", error);
	exit(1);
}


void warn_user(const char *warning, const char *detail)
{
	printf("WARNING: %s %s\n", warning, detail);
}

void bitmap_destroy(struct bitmap *bitmap)
{
}

char *path_to_url(const char *path)
{
	char *r = malloc(strlen(path) + 7 + 1);

	strcpy(r, "file://");
	strcat(r, path);

	return r;
}

int main(void)
{
	struct host_part *h;
	struct path_data *p;
	int i;

	url_init();

	h = urldb_add_host("127.0.0.1");
	if (!h) {
		LOG(("failed adding host"));
		return 1;
	}

	/* Get host entry */
	h = urldb_add_host("netsurf.strcprstskrzkrk.co.uk");
	if (!h) {
		LOG(("failed adding host"));
		return 1;
	}

	/* Get path entry */
	p = urldb_add_path("http", 80, h, "/path/to/resource.htm", "a=b", "zz",
			"http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm?a=b");
	if (!p) {
		LOG(("failed adding path"));
		return 1;
	}

	p = urldb_add_path("http", 80, h, "/path/to/resource.htm", "a=b", "aa",
			"http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm?a=b");
	if (!p) {
		LOG(("failed adding path"));
		return 1;
	}

	p = urldb_add_path("http", 80, h, "/path/to/resource.htm", "a=b", "yy",
			"http://netsurf.strcprstskrzkrk.co.uk/path/to/resource.htm?a=b");
	if (!p) {
		LOG(("failed adding path"));
		return 1;
	}

	urldb_set_cookie("mmblah=foo; path=/; expires=Thur, 31-Dec-2099 00:00:00 GMT\r\n", "http://www.minimarcos.org.uk/cgi-bin/forum/Blah.pl?,v=login,p=2");

	urldb_set_cookie("BlahPW=bar; path=/; expires=Thur, 31-Dec-2099 00:00:00 GMT\r\n", "http://www.minimarcos.org.uk/cgi-bin/forum/Blah.pl?,v=login,p=2");

	urldb_set_cookie("details=foo|bar|Sun, 03-Jun-2007;expires=Mon, 24-Jul-2006 09:53:45 GMT", "http://ccdb.cropcircleresearch.com/");

	urldb_set_cookie("PREF=ID=a:TM=b:LM=c:S=d; path=/; domain=.google.com", "http://www.google.com/");

	urldb_set_cookie("test=foo, bar, baz; path=/, quux=blah; path=/", "http://www.bbc.co.uk/");

//	urldb_set_cookie("a=b; path=/; domain=.a.com", "http://a.com/");

	urldb_set_cookie("foo=bar;Path=/blah;Secure", "https://www.foo.com/blah/moose");

	urldb_get_cookie("https://www.foo.com/blah/wxyzabc", "https://www.foo.com/blah/moose");

	/* 1563546 */
	assert(urldb_add_url("http:moodle.org") == false);
	assert(urldb_get_url("http:moodle.org") == NULL);

	/* also 1563546 */
	assert(urldb_add_url("http://a_a/"));
	assert(urldb_get_url("http://a_a/"));

	/* 1597646 */
	if (urldb_add_url("http://foo@moose.com/")) {
		LOG(("added http://foo@moose.com/"));
		assert(urldb_get_url("http://foo@moose.com/") != NULL);
	}

	/* 1535120 */
	assert(urldb_add_url("http://www2.2checkout.com/"));
	assert(urldb_get_url("http://www2.2checkout.com/"));

	urldb_dump();

	return 0;
}
#endif