[project @ 2004-03-02 18:02:17 by bursa]

Add new url functions and modify to use them.

svn path=/import/netsurf/; revision=578
This commit is contained in:
James Bursa 2004-03-02 18:02:41 +00:00
parent 2d816dda23
commit 1c85bf0429
21 changed files with 551 additions and 296 deletions

View File

@ -26,7 +26,6 @@
#include <strings.h>
#include <time.h>
#include "curl/curl.h"
#include "libxml/uri.h"
#include "netsurf/utils/config.h"
#include "netsurf/content/fetch.h"
#ifdef riscos
@ -41,6 +40,7 @@
#endif
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
@ -174,19 +174,12 @@ struct fetch * fetch_start(char *url, char *referer,
struct fetch *fetch = xcalloc(1, sizeof(*fetch)), *host_fetch;
CURLcode code;
CURLMcode codem;
xmlURI *uri;
#ifdef WITH_AUTH
struct login *li;
#endif
LOG(("fetch %p, url '%s'", fetch, url));
uri = xmlParseURI(url);
if (uri == 0) {
LOG(("warning: failed to parse url"));
return 0;
}
/* construct a new fetch structure */
fetch->callback = callback;
fetch->had_headers = false;
@ -199,9 +192,7 @@ struct fetch * fetch_start(char *url, char *referer,
fetch->referer = xstrdup(referer);
fetch->p = p;
fetch->headers = 0;
fetch->host = 0;
if (uri->server != 0)
fetch->host = xstrdup(uri->server);
fetch->host = url_host(url);
fetch->content_length = 0;
#ifdef WITH_POST
fetch->post_urlenc = 0;
@ -216,8 +207,6 @@ struct fetch * fetch_start(char *url, char *referer,
fetch->prev = 0;
fetch->next = 0;
xmlFreeURI(uri);
/* look for a fetch from the same host */
if (fetch->host != 0) {
for (host_fetch = fetch_list;

View File

@ -24,6 +24,7 @@
#include "netsurf/content/fetch.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"

View File

@ -21,6 +21,7 @@
#include "netsurf/desktop/gui.h"
#endif
#include "netsurf/utils/log.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
/**

View File

@ -30,6 +30,7 @@
#include "netsurf/render/layout.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
@ -152,12 +153,16 @@ void browser_window_go_post(struct browser_window *bw, const char *url,
browser_window_set_status(bw, messages_get("Loading"));
bw->history_add = history_add;
bw->time0 = clock();
c = fetchcache(url, 0,
browser_window_callback, bw, 0,
gui_window_get_width(bw->window), 0,
false,
post_urlenc, post_multipart,
true);
if (strncmp(url, "about:", 6) == 0)
c = about_create(url, browser_window_callback, bw, 0,
gui_window_get_width(bw->window), 0);
else
c = fetchcache(url, 0,
browser_window_callback, bw, 0,
gui_window_get_width(bw->window), 0,
false,
post_urlenc, post_multipart,
true);
if (!c) {
browser_window_set_status(bw, messages_get("FetchFailed"));
return;
@ -1743,6 +1748,8 @@ void browser_form_submit(struct browser_window *bw, struct form *form,
case method_POST_MULTIPART:
url = url_join(form->action, base);
if (!url)
break;
browser_window_go_post(bw, url, 0, success, true);
break;

View File

@ -119,4 +119,10 @@ void history_destroy(struct history *history);
void history_back(struct browser_window *bw, struct history *history);
void history_forward(struct browser_window *bw, struct history *history);
/* In platform specific about.c. */
struct content *about_create(const char *url,
void (*callback)(content_msg msg, struct content *c, void *p1,
void *p2, const char *error),
void *p1, void *p2, unsigned long width, unsigned long height);
#endif

View File

@ -12,6 +12,7 @@
#include "netsurf/utils/config.h"
#include "netsurf/desktop/401login.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
#ifdef WITH_AUTH
@ -30,7 +31,7 @@ static struct login *loginlist = &login;
void login_list_add(char *host, char* logindets) {
struct login *nli = xcalloc(1, sizeof(*nli));
char *temp = get_host_from_url(host);
char *temp = url_host(host);
char *i;
assert(temp);
@ -81,7 +82,7 @@ struct login *login_list_get(char *host) {
(strncasecmp(host, "https://", 8) != 0))
return NULL;
temphost = get_host_from_url(host);
temphost = url_host(host);
assert(temphost);
temp = xstrdup(host);
@ -89,7 +90,7 @@ struct login *login_list_get(char *host) {
* So make sure we've got that at least
*/
if (strlen(temphost) > strlen(temp)) {
temp = get_host_from_url(host);
temp = url_host(host);
assert(temp);
}

View File

@ -17,6 +17,7 @@
#include "netsurf/desktop/browser.h"
#include "netsurf/desktop/gui.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
bool netsurf_quit = false;
@ -63,6 +64,7 @@ void netsurf_init(int argc, char** argv)
#ifdef WITH_GIF
nsgif_init();
#endif
url_init();
}

View File

@ -9,7 +9,7 @@ CC_DEBUG = gcc
OBJECTS_COMMON = cache.o content.o fetch.o fetchcache.o other.o \
css.o css_enum.o parser.o ruleset.o scanner.o \
box.o form.o html.o layout.o textplain.o \
messages.o utils.o translit.o pool.o
messages.o utils.o translit.o pool.o url.o
OBJECTS = $(OBJECTS_COMMON) \
browser.o loginlist.o netsurf.o options.o \
htmlinstance.o htmlredraw.o \
@ -17,7 +17,7 @@ OBJECTS = $(OBJECTS_COMMON) \
menus.o mouseactions.o \
textselection.o theme.o window.o \
draw.o gif.o jpeg.o plugin.o png.o sprite.o \
about.o filetype.o font.o uri.o url.o history.o \
about.o filetype.o font.o uri.o url_protocol.o history.o \
version.o save_draw.o save_complete.o thumbnail.o save.o
OBJECTS_DEBUG = $(OBJECTS_COMMON) \
netsurfd.o \
@ -39,8 +39,8 @@ CFLAGS = -std=c9x -D_BSD_SOURCE -Driscos -DBOOL_DEFINED -O $(WARNFLAGS) -I.. \
-mpoke-function-name
CFLAGS_DEBUG = -std=c9x -D_BSD_SOURCE $(WARNFLAGS) -I.. -I/usr/include/libxml2 -g
LDFLAGS = -L/riscos/lib -lxml2 -lz -lcurl -lssl -lcrypto -lares -lanim -lpng \
-lifc -loslib -luri -ljpeg
LDFLAGS_DEBUG = -L/usr/lib -lxml2 -lz -lm -lcurl -lssl -lcrypto -ldl -luri
-lifc -loslib -ljpeg
LDFLAGS_DEBUG = -L/usr/lib -lxml2 -lz -lm -lcurl -lssl -lcrypto -ldl
OBJDIR = $(shell $(CC) -dumpmachine)
SOURCES=$(OBJECTS:.o=.c)

View File

@ -32,6 +32,7 @@
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/pool.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"

View File

@ -22,9 +22,10 @@
#endif
#include "netsurf/render/html.h"
#include "netsurf/render/layout.h"
#include "netsurf/utils/utils.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
#define CHUNK 4096
@ -220,7 +221,7 @@ void html_head(struct content *c, xmlNode *head)
} else if (strcmp(node->name, "base") == 0) {
char *href = (char *) xmlGetProp(node, (const xmlChar *) "href");
if (href) {
char *url = url_join(href, 0);
char *url = url_normalize(href);
if (url) {
free(c->data.html.base_url);
c->data.html.base_url = url;

View File

@ -17,6 +17,7 @@
#include "netsurf/riscos/gui.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
#ifdef WITH_AUTH
@ -65,7 +66,7 @@ void gui_401login_open(struct browser_window *bw, struct content *c, char *realm
char *murl, *host;
murl = c->url;
host = get_host_from_url(murl);
host = url_host(murl);
assert(host);
bwin = bw;

View File

@ -16,20 +16,18 @@
#include <stdio.h>
#include <time.h>
#include <unixlib/local.h> /* for __unixify */
#include "netsurf/utils/config.h"
#include "netsurf/desktop/netsurf.h"
#include "netsurf/riscos/about.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/utils.h"
#include "oslib/fileswitch.h"
#include "oslib/osargs.h"
#include "oslib/osfile.h"
#include "oslib/osfind.h"
#include "oslib/osfscontrol.h"
#include "oslib/osgbpb.h"
#include "netsurf/utils/config.h"
#include "netsurf/desktop/browser.h"
#include "netsurf/desktop/netsurf.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/utils.h"
#ifdef WITH_ABOUT
@ -41,81 +39,53 @@ static const char *paboutpl3 = "<tr valign=\"top\"><td width=\"30%%\"><font size
static const char *pabtplgft = "</table>"; /**< Plugin table footer */
static const char *paboutftr = "</div></body></html>"; /**< Page footer */
/** The about page */
struct about_page {
char *header; /**< page header */
char *browser; /**< browser details */
char *plghead; /**< plugin header */
struct plugd *plugd; /**< plugin details list */
char *plgfoot; /**< plugin footer */
char *footer; /**< page footer */
};
/** A set of plugin details */
struct plugd {
char *details; /**< plugin details */
struct plugd *next; /**< next plugin details */
};
struct plugd *new_plugin(struct plugd *pd, char* details);
/**
* Adds a plugin's details to the head of the linked list of plugin details
* Returns the new head of the list
* Create the browser about page.
*
* \param url requested url (about:...)
* \param callback content callback function, for content_add_user()
* \param p1 user parameter for callback
* \param p2 user parameter for callback
* \param width available width
* \param height available height
* \return a new content containing the about page
*/
struct plugd *new_plugin(struct plugd *pd, char* details) {
struct plugd *np = xcalloc(1, sizeof(*np));
np->details = 0;
np->details = details;
np->next = pd;
return np;
}
/**
* Creates the about page and stores it in <Wimp$ScrapDir>.WWW.Netsurf
*/
void about_create(void) {
struct about_page *abt;
struct plugd *temp;
struct content *about_create(const char *url,
void (*callback)(content_msg msg, struct content *c, void *p1,
void *p2, const char *error),
void *p1, void *p2, unsigned long width, unsigned long height)
{
struct content *c = 0;
FILE *fp;
char *buf, *val, var[20], *ptype, *pdetails, *fname, *furl;
int i, nofiles, j, w, h, size;
fileswitch_object_type fot;
os_error *e;
const char *params[] = { 0 };
abt = (struct about_page*)xcalloc(1, sizeof(*abt));
abt->plugd = 0;
c = content_create(url);
c->width = width;
c->height = height;
content_add_user(c, callback, p1, p2);
content_set_type(c, CONTENT_HTML, "text/html", params);
/* Page header */
buf = xcalloc(strlen(pabouthdr) + 50, sizeof(char));
snprintf(buf, strlen(pabouthdr) + 50, pabouthdr, "About NetSurf",
netsurf_version);
abt->header = xstrdup(buf);
xfree(buf);
content_process_data(c, buf, strlen(buf));
free(buf);
/* browser details */
xosfile_read_stamped_no_path("<NetSurf$Dir>.About.About",0,0,0,&i,0,0);
fp = fopen("<NetSurf$Dir>.About.About", "r");
buf = xcalloc((unsigned int)i + 10, sizeof(char));
fread(buf, sizeof(char), (unsigned int)i, fp);
fclose(fp);
abt->browser = xstrdup(buf);
xfree(buf);
buf = load("<NetSurf$Dir>.About.About");
content_process_data(c, buf, strlen(buf));
free(buf);
/* plugin header */
abt->plghead = xstrdup(pabtplghd);
/* plugin footer */
abt->plgfoot = xstrdup(pabtplgft);
/* Page footer */
abt->footer = xstrdup(paboutftr);
content_process_data(c, pabtplghd, strlen(pabtplghd));
/* plugins registered */
for (i=0; i!=4096; i++) {
@ -172,7 +142,7 @@ void about_create(void) {
furl = xcalloc(strlen(paboutpl1) + strlen(ptype) + strlen(pdetails) + 10, sizeof(char));
sprintf(furl, paboutpl1, ptype, pdetails);
LOG(("furl: %s", furl));
abt->plugd = new_plugin(abt->plugd, furl);
content_process_data(c, furl, strlen(furl));
xfree(pdetails);
continue;
}
@ -214,7 +184,7 @@ void about_create(void) {
furl = xcalloc(strlen(paboutpl3) + strlen(ptype) + strlen(buf) +
strlen(pdetails) + 10, sizeof(char));
sprintf(furl, paboutpl3, ptype, buf, ptype, w, h, pdetails);
abt->plugd = new_plugin(abt->plugd, furl);
content_process_data(c, furl, strlen(furl));
xfree(pdetails);
continue;
}
@ -230,7 +200,7 @@ void about_create(void) {
furl = xcalloc(strlen(paboutpl2) + strlen(ptype) + strlen(fname) + strlen(pdetails) + 10, sizeof(char));
sprintf(furl, paboutpl2, ptype, fname, ptype, pdetails);
abt->plugd = new_plugin(abt->plugd, furl);
content_process_data(c, furl, strlen(furl));
xfree(fname);
xfree(pdetails);
}
@ -241,29 +211,15 @@ void about_create(void) {
}
}
/* write file */
xosfile_create_dir("<Wimp$ScrapDir>.WWW", 77);
xosfile_create_dir("<Wimp$ScrapDir>.WWW.NetSurf", 77);
/* plugin footer */
content_process_data(c, pabtplgft, strlen(pabtplgft));
fp = fopen("<Wimp$ScrapDir>.WWW.Netsurf.About", "w+");
fprintf(fp, "%s", abt->header);
fprintf(fp, "%s", abt->browser);
fprintf(fp, "%s", abt->plghead);
while (abt->plugd != 0) {
fprintf(fp, "%s", abt->plugd->details);
temp = abt->plugd;
abt->plugd = abt->plugd->next;
xfree(temp);
}
fprintf(fp, "%s", abt->plgfoot);
fprintf(fp, "%s", abt->footer);
fclose(fp);
/* Page footer */
content_process_data(c, paboutftr, strlen(paboutftr));
xosfile_set_type("<Wimp$ScrapDir>.WWW.NetSurf.About", 0xfaf);
content_convert(c, c->width, c->height);
xfree(abt);
return;
return c;
}
#ifdef WITH_COOKIES

View File

@ -29,9 +29,6 @@
#include "netsurf/render/font.h"
#include "netsurf/render/form.h"
#include "netsurf/render/html.h"
#ifdef WITH_ABOUT
#include "netsurf/riscos/about.h"
#endif
#include "netsurf/riscos/constdata.h"
#include "netsurf/riscos/gui.h"
#include "netsurf/riscos/options.h"
@ -43,7 +40,7 @@
#include "netsurf/riscos/uri.h"
#endif
#ifdef WITH_URL
#include "netsurf/riscos/url.h"
#include "netsurf/riscos/url_protocol.h"
#endif
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
@ -257,9 +254,6 @@ void ro_gui_icon_bar_create(void)
void gui_quit(void)
{
#ifdef WITH_ABOUT
about_quit();
#endif
ro_gui_history_quit();
wimp_close_down(task_handle);
xhourglass_off();

View File

@ -6,13 +6,8 @@
*/
#include <string.h>
#include <unixlib/local.h> /* for __riscosify */
#include <uri.h> /* possibly just have accessor methods in utils.c */
#include "oslib/osfile.h"
#include "netsurf/utils/config.h"
#include "netsurf/content/content.h"
#include "netsurf/css/css.h"
@ -30,7 +25,6 @@
*/
void save_imported_sheets(struct content *c, int parent, int level, char *p, char* fn);
char* get_filename(char * url);
/* this is temporary. */
const char * const SAVE_PATH = "<NetSurf$Dir>.savetest.";
@ -46,7 +40,7 @@ void save_complete(struct content *c) {
return;
}
fname = get_filename(c->data.html.base_url);
fname = "test"; /*get_filename(c->data.html.base_url);*/
if (!fname) { /* no path -> exit */
return;
@ -135,46 +129,4 @@ void save_imported_sheets(struct content *c, int parent, int level, char *p, cha
}
}
char* get_filename(char * url) {
char *ret = 0, *offs;
uri_t *uri;
uri = uri_alloc(url, (int)strlen(url));
if (!uri) {
return 0;
}
if (uri->path) {
/* Two possible cases here:
* a) no page name given (eg http://www.blah.com/) -> index.html
* b) page name given
*/
/* case a */
if (strlen(uri->path) == 0) {
ret = xstrdup("index.html");
}
/* case b */
else {
offs = strrchr(uri->path, '/');
if (!offs) {
ret = xstrdup(uri->path);
}
else {
ret = xstrdup(offs+1);
}
}
}
uri_free(uri);
offs = xcalloc(strlen(ret)+1, sizeof(char));
__riscosify(ret, 0, 0, offs, strlen(ret)+1, 0);
xfree(ret);
return offs;
}
#endif

View File

@ -15,7 +15,7 @@
#include "netsurf/riscos/theme.h"
#include "netsurf/desktop/gui.h"
#include "netsurf/riscos/gui.h"
#include "netsurf/riscos/url.h"
#include "netsurf/riscos/url_protocol.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/utils.h"

View File

@ -27,6 +27,7 @@
#include "netsurf/riscos/theme.h"
#include "netsurf/riscos/thumbnail.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
gui_window *window_list = 0;
@ -676,6 +677,7 @@ bool ro_gui_window_keypress(gui_window *g, int key, bool toolbar)
struct content *content = g->data.browser.bw->current_content;
wimp_window_state state;
int y;
char *url;
assert(g->type == GUI_BROWSER_WINDOW);
@ -744,22 +746,11 @@ bool ro_gui_window_keypress(gui_window *g, int key, bool toolbar)
case wimp_KEY_RETURN:
if (!toolbar)
break;
else {
char *url = xcalloc(1, 10 + strlen(g->url));
char *url2;
if (g->url[strspn(g->url, "abcdefghijklmnopqrstuvwxyz")] != ':') {
strcpy(url, "http://");
strcpy(url + 7, g->url);
} else {
strcpy(url, g->url);
}
url2 = url_join(url, 0);
free(url);
if (url2) {
gui_window_set_url(g, url2);
browser_window_go(g->data.browser.bw, url2);
free(url2);
}
url = url_normalize(g->url);
if (url) {
gui_window_set_url(g, url);
browser_window_go(g->data.browser.bw, url);
free(url);
}
return true;

437
utils/url.c Normal file
View File

@ -0,0 +1,437 @@
/*
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
*/
/** \file
* URL parsing and joining (implementation).
*/
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>
#include "netsurf/utils/log.h"
#include "netsurf/utils/url.h"
#include "netsurf/utils/utils.h"
regex_t url_re, url_up_re;
/**
* Initialise URL routines.
*
* Compiles regular expressions required by the url_ functions.
*/
void url_init(void)
{
/* regex from RFC 2396 */
regcomp_wrapper(&url_re, "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)"
"(\\?([^#]*))?(#(.*))?$", REG_EXTENDED);
regcomp_wrapper(&url_up_re,
"/(|[^/]|[.][^./]|[^./][.]|[^/][^/][^/]+)/[.][.](/|$)",
REG_EXTENDED);
}
/**
* Normalize a URL.
*
* \param url an absolute URL
* \return cleaned up url, allocated on the heap, or 0 on failure
*
* If there is no scheme, http:// is added. The scheme and host are
* lower-cased. Default ports are removed (http only). An empty path is
* replaced with "/". Characters are unescaped if safe.
*/
char *url_normalize(const char *url)
{
char c;
char *res = 0;
int m;
int i;
int len;
bool http = false;
regmatch_t match[10];
m = regexec(&url_re, url, 10, match, 0);
if (m) {
LOG(("url '%s' failed to match regex", url));
return 0;
}
len = strlen(url);
if (match[1].rm_so == -1) {
/* scheme missing: add http:// and reparse */
LOG(("scheme missing: using http"));
res = malloc(strlen(url) + 13);
if (!res) {
LOG(("malloc failed"));
return 0;
}
strcpy(res, "http://");
strcpy(res + 7, url);
m = regexec(&url_re, res, 10, match, 0);
if (m) {
LOG(("url '%s' failed to match regex", res));
free(res);
return 0;
}
len += 7;
} else {
res = malloc(len + 6);
if (!res) {
LOG(("strdup failed"));
return 0;
}
strcpy(res, url);
}
/*for (unsigned int i = 0; i != 10; i++) {
if (match[i].rm_so == -1)
continue;
fprintf(stderr, "%i: '%.*s'\n", i,
match[i].rm_eo - match[i].rm_so,
res + match[i].rm_so);
}*/
/* see RFC 2616 section 3.2.3 */
/* make scheme lower-case */
if (match[2].rm_so != -1) {
for (i = match[2].rm_so; i != match[2].rm_eo; i++)
res[i] = tolower(res[i]);
if (match[2].rm_eo == 4 && res[0] == 'h' && res[1] == 't' &&
res[2] == 't' && res[3] == 'p')
http = true;
}
/* make empty path into "/" */
if (match[5].rm_so != -1 && match[5].rm_so == match[5].rm_eo) {
memmove(res + match[5].rm_so + 1, res + match[5].rm_so,
len - match[5].rm_so + 1);
res[match[5].rm_so] = '/';
len++;
}
/* make host lower-case */
if (match[4].rm_so != -1) {
for (i = match[4].rm_so; i != match[4].rm_eo; i++) {
if (res[i] == ':') {
if (http && res[i + 1] == '8' &&
res[i + 2] == '0' &&
i + 3 == match[4].rm_eo) {
memmove(res + i, res + i + 3,
len - match[4].rm_eo);
len -= 3;
res[len] = '\0';
} else if (i + 1 == match[4].rm_eo) {
memmove(res + i, res + i + 1,
len - match[4].rm_eo);
len--;
res[len] = '\0';
}
break;
}
res[i] = tolower(res[i]);
}
}
/* unescape non-"reserved" escaped characters */
for (i = 0; i != len; i++) {
if (res[i] != '%')
continue;
c = tolower(res[i + 1]);
if ('0' <= c && c <= '9')
m = 16 * (c - '0');
else if ('a' <= c && c <= 'f')
m = 16 * (c - 'a' + 10);
else
continue;
c = tolower(res[i + 2]);
if ('0' <= c && c <= '9')
m += c - '0';
else if ('a' <= c && c <= 'f')
m += c - 'a' + 10;
else
continue;
if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\""
"{}|\\^[]`", m)) {
i += 2;
continue;
}
res[i] = m;
memmove(res + i + 1, res + i + 3, len - i - 2);
len -= 2;
}
return res;
}
/**
* Resolve a relative URL to absolute form.
*
* \param rel relative URL
* \param base base URL, must be absolute and cleaned as by url_normalize()
* \return an absolute URL, allocated on the heap, or 0 on failure
*/
char *url_join(const char *rel, const char *base)
{
int m;
int i, j;
char *buf = 0;
char *res;
const char *scheme = 0, *authority = 0, *path = 0, *query = 0,
*fragment = 0;
int scheme_len = 0, authority_len = 0, path_len = 0, query_len = 0,
fragment_len = 0;
regmatch_t base_match[10];
regmatch_t rel_match[10];
regmatch_t up_match[3];
/* see RFC 2396 section 5.2 */
m = regexec(&url_re, base, 10, base_match, 0);
if (m) {
LOG(("base url '%s' failed to match regex", base));
return 0;
}
/*for (unsigned int i = 0; i != 10; i++) {
if (base_match[i].rm_so == -1)
continue;
fprintf(stderr, "%i: '%.*s'\n", i,
base_match[i].rm_eo - base_match[i].rm_so,
base + base_match[i].rm_so);
}*/
if (base_match[2].rm_so == -1) {
LOG(("base url '%s' is not absolute", base));
return 0;
}
scheme = base + base_match[2].rm_so;
scheme_len = base_match[2].rm_eo - base_match[2].rm_so;
if (base_match[4].rm_so != -1) {
authority = base + base_match[4].rm_so;
authority_len = base_match[4].rm_eo - base_match[4].rm_so;
}
path = base + base_match[5].rm_so;
path_len = base_match[5].rm_eo - base_match[5].rm_so;
/* 1) */
m = regexec(&url_re, rel, 10, rel_match, 0);
if (m) {
LOG(("relative url '%s' failed to match regex", rel));
return 0;
}
/* 2) */
if (rel_match[5].rm_so == rel_match[5].rm_eo &&
rel_match[2].rm_so == -1 &&
rel_match[4].rm_so == -1 &&
rel_match[6].rm_so == -1) {
goto step7;
}
if (rel_match[7].rm_so != -1) {
query = rel + rel_match[7].rm_so;
query_len = rel_match[7].rm_eo - rel_match[7].rm_so;
}
if (rel_match[9].rm_so != -1) {
fragment = rel + rel_match[9].rm_so;
fragment_len = rel_match[9].rm_eo - rel_match[9].rm_so;
}
/* 3) */
if (rel_match[2].rm_so != -1) {
scheme = rel + rel_match[2].rm_so;
scheme_len = rel_match[2].rm_eo - rel_match[2].rm_so;
authority = 0;
authority_len = 0;
if (rel_match[4].rm_so != -1) {
authority = rel + rel_match[4].rm_so;
authority_len = rel_match[4].rm_eo - rel_match[4].rm_so;
}
path = rel + rel_match[5].rm_so;
path_len = rel_match[5].rm_eo - rel_match[5].rm_so;
goto step7;
}
/* 4) */
if (rel_match[4].rm_so != -1) {
authority = rel + rel_match[4].rm_so;
authority_len = rel_match[4].rm_eo - rel_match[4].rm_so;
path = rel + rel_match[5].rm_so;
path_len = rel_match[5].rm_eo - rel_match[5].rm_so;
goto step7;
}
/* 5) */
if (rel[rel_match[5].rm_so] == '/') {
path = rel + rel_match[5].rm_so;
path_len = rel_match[5].rm_eo - rel_match[5].rm_so;
goto step7;
}
/* 6) */
buf = malloc(path_len + rel_match[5].rm_eo + 10);
if (!buf) {
LOG(("malloc failed"));
return 0;
}
/* a) */
strncpy(buf, path, path_len);
for (; path_len != 0 && buf[path_len - 1] != '/'; path_len--)
;
/* b) */
strncpy(buf + path_len, rel + rel_match[5].rm_so,
rel_match[5].rm_eo - rel_match[5].rm_so);
path_len += rel_match[5].rm_eo - rel_match[5].rm_so;
/* c) */
buf[path_len] = 0;
for (i = j = 0; j != path_len; ) {
if (j && buf[j - 1] == '/' && buf[j] == '.' &&
buf[j + 1] == '/')
j += 2;
else
buf[i++] = buf[j++];
}
path_len = i;
/* d) */
if (buf[path_len - 2] == '/' && buf[path_len - 1] == '.')
path_len--;
/* e) and f) */
while (1) {
buf[path_len] = 0;
m = regexec(&url_up_re, buf, 3, up_match, 0);
if (m)
break;
if (up_match[1].rm_eo + 4 <= path_len) {
memmove(buf + up_match[1].rm_so,
buf + up_match[1].rm_eo + 4,
path_len - up_match[1].rm_eo - 4);
path_len -= up_match[1].rm_eo - up_match[1].rm_so + 4;
} else
path_len -= up_match[1].rm_eo - up_match[1].rm_so + 3;
}
buf[path_len] = 0;
path = buf;
step7: /* 7) */
res = malloc(scheme_len + 1 + 2 + authority_len + path_len + 1 +
query_len + 1 + fragment_len + 1);
if (!res) {
LOG(("malloc failed"));
free(buf);
return 0;
}
strncpy(res, scheme, scheme_len);
res[scheme_len] = ':';
i = scheme_len + 1;
if (authority) {
res[i++] = '/';
res[i++] = '/';
strncpy(res + i, authority, authority_len);
i += authority_len;
}
strncpy(res + i, path, path_len);
i += path_len;
if (query) {
res[i++] = '?';
strncpy(res + i, query, query_len);
i += query_len;
}
if (fragment) {
res[i++] = '#';
strncpy(res + i, fragment, fragment_len);
i += fragment_len;
}
res[i] = 0;
free(buf);
return res;
}
/**
* Return the host name from an URL.
*
* \param url an absolute URL
* \returns host name allocated on heap, or 0 on failure
*/
char *url_host(const char *url)
{
int m;
char *host;
regmatch_t match[10];
m = regexec(&url_re, url, 10, match, 0);
if (m) {
LOG(("url '%s' failed to match regex", url));
return 0;
}
if (match[4].rm_so == -1)
return 0;
host = malloc(match[4].rm_eo - match[4].rm_so + 1);
if (!host) {
LOG(("malloc failed"));
return 0;
}
strncpy(host, url + match[4].rm_so, match[4].rm_eo - match[4].rm_so);
host[match[4].rm_eo - match[4].rm_so] = 0;
return host;
}
#ifdef TEST
int main(int argc, char *argv[])
{
int i;
char *s;
url_init();
for (i = 1; i != argc; i++) {
/* printf("==> '%s'\n", argv[i]);
s = url_normalize(argv[i]);
if (s)
printf("<== '%s'\n", s);*/
/* printf("==> '%s'\n", argv[i]);
s = url_host(argv[i]);
if (s)
printf("<== '%s'\n", s);*/
if (1 != i) {
s = url_join(argv[i], argv[1]);
if (s)
printf("'%s' + '%s' \t= '%s'\n", argv[1],
argv[i], s);
}
}
return 0;
}
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
{
char errbuf[200];
int r;
r = regcomp(preg, regex, cflags);
if (r) {
regerror(r, preg, errbuf, sizeof errbuf);
fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
fprintf(stderr, "error: %s\n", errbuf);
exit(1);
}
}
#endif

20
utils/url.h Normal file
View File

@ -0,0 +1,20 @@
/*
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
*/
/** \file
* URL parsing and joining (interface).
*/
#ifndef _NETSURF_UTILS_URL_H_
#define _NETSURF_UTILS_URL_H_
void url_init(void);
char *url_normalize(const char *url);
char *url_join(const char *rel, const char *base);
char *url_host(const char *url);
#endif

View File

@ -2,7 +2,7 @@
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
* Copyright 2003 James Bursa <bursa@users.sourceforge.net>
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
* Copyright 2003 John M Bell <jmb202@ecs.soton.ac.uk>
*/
@ -12,17 +12,11 @@
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <uri.h>
#include <sys/types.h>
#include <regex.h>
#include <time.h>
#include "libxml/encoding.h"
#include "libxml/uri.h"
#include "netsurf/utils/config.h"
#ifdef riscos
#include "netsurf/riscos/about.h"
#include "netsurf/riscos/constdata.h"
#endif
#define NDEBUG
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
@ -189,103 +183,6 @@ char *squash_tolat1(xmlChar *s)
}
/**
* Calculate an URL from a relative and base URL.
*
* base may be 0 for a new URL, in which case the URL is canonicalized and
* returned. Returns 0 in case of error.
*/
char *url_join(char *rel_url, char *base_url)
{
char *res;
uri_t *base = 0, *rel = 0, *abs;
LOG(("rel_url = %s, base_url = %s", rel_url, base_url));
#ifdef riscos
/* hacky, hacky, hacky...
* It is, however, best to do this here as it avoids
* duplicating code for clicking links and url bar handling.
* It simplifies the code it the other places too (they just
* call this as usual, then we handle it here).
*/
#ifdef WITH_ABOUT
if (strcasecmp(rel_url, "about:") == 0) {
about_create();
return xstrdup(ABOUT_URL);
}
#ifdef WITH_COOKIES
if (strcasecmp(rel_url, "about:cookies") == 0) {
cookie_create();
return xstrdup(COOKIE_URL);
}
#endif
#endif
#endif
if (!base_url) {
res = uri_cannonicalize_string(rel_url,
(int)(strlen(rel_url)),
URI_STRING_URI_STYLE);
LOG(("res = %s", res));
if (res)
return xstrdup(res);
return 0;
}
base = uri_alloc(base_url, (int)(strlen(base_url)));
rel = uri_alloc(rel_url, (int)(strlen(rel_url)));
if (!base || !rel)
goto fail;
if (!base->scheme)
goto fail;
abs = uri_abs_1(base, rel);
res = xstrdup(uri_uri(abs));
uri_free(base);
uri_free(rel);
LOG(("res = %s", res));
return res;
fail:
if (base)
uri_free(base);
if (rel)
uri_free(rel);
LOG(("error"));
return 0;
}
/**
* Extract the host name from a url.
*
* \param url an absolute URL
* \return a new string, or 0 in case of error
*/
char *get_host_from_url(char *url)
{
char *host = 0;
uri_t *uri;
uri = uri_alloc(url, (int)(strlen(url)));
if (!uri)
return 0;
if (uri->host)
host = xstrdup(uri->host);
uri_free(uri);
return host;
}
/**
* Check if a directory exists.
*/

View File

@ -2,7 +2,7 @@
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
* Copyright 2003 James Bursa <bursa@users.sourceforge.net>
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
*/
#ifndef _NETSURF_UTILS_UTILS_H_
@ -26,8 +26,6 @@ char * squash_whitespace(const char * s);
char * tolat1(xmlChar * s);
char * tolat1_pre(xmlChar * s);
char *squash_tolat1(xmlChar *s);
char *url_join(char *rel_url, char *base_url);
char *get_host_from_url(char* url);
bool is_dir(const char *path);
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
void clean_cookiejar(void);