[project @ 2005-07-23 20:43:37 by bursa]

Rewrite and simplify url_nice() to improve suggested filenames. Add option to keep extensions (no UI currently).

svn path=/import/netsurf/; revision=1814
This commit is contained in:
James Bursa 2005-07-23 20:43:37 +00:00
parent 4e9457bee1
commit 253c199d1f
6 changed files with 192 additions and 124 deletions

View File

@ -32,6 +32,7 @@
#include "oslib/wimpspriteop.h"
#include "netsurf/content/fetch.h"
#include "netsurf/desktop/gui.h"
#include "netsurf/riscos/options.h"
#include "netsurf/riscos/gui.h"
#include "netsurf/riscos/query.h"
#include "netsurf/riscos/wimp.h"
@ -177,6 +178,7 @@ struct gui_download_window *gui_download_window_create(const char *url,
url_func_result res;
char *local_path;
utf8_convert_ret err;
size_t i;
dw = malloc(sizeof *dw);
if (!dw) {
@ -242,8 +244,16 @@ struct gui_download_window *gui_download_window_create(const char *url,
download_template->icons[ICON_DOWNLOAD_ICON].data.indirected_sprite.id =
(osspriteop_id) dw->sprite_name;
if ((res = url_nice(url, &nice)) == URL_FUNC_OK) {
strcpy(dw->path, nice);
if ((res = url_nice(url, &nice, option_strip_extensions)) ==
URL_FUNC_OK) {
for (i = 0; nice[i]; i++) {
if (nice[i] == '.')
nice[i] = '/';
else if (nice[i] <= ' ' ||
strchr(":*#$&@^%\\", nice[i]))
nice[i] = '_';
}
strncpy(dw->path, nice, sizeof dw->path);
free(nice);
}
else
@ -296,7 +306,8 @@ struct gui_download_window *gui_download_window_create(const char *url,
ro_gui_dialog_open(dw->window);
/* issue the warning now, so that it appears in front of the download window! */
/* issue the warning now, so that it appears in front of the download
* window! */
if (space_warning)
warn_user("DownloadWarn", messages_get("NoDiscSpace"));

View File

@ -61,6 +61,7 @@ extern bool option_block_popups;
extern bool option_url_suggestion;
extern int option_image_memory_direct; /* -1 means auto-detect */
extern int option_image_memory_compressed; /* -1 means auto-detect */
extern bool option_strip_extensions;
#define EXTRA_OPTION_DEFINE \
bool option_use_mouse_gestures = false;\
@ -102,7 +103,8 @@ int option_font_default = CSS_FONT_FAMILY_SANS_SERIF; \
bool option_block_popups = false; \
bool option_url_suggestion = true; \
int option_image_memory_direct = -1; \
int option_image_memory_compressed = -1;
int option_image_memory_compressed = -1; \
bool option_strip_extensions = true;
#define EXTRA_OPTION_TABLE \
{ "use_mouse_gestures", OPTION_BOOL, &option_use_mouse_gestures },\
@ -144,6 +146,7 @@ int option_image_memory_compressed = -1;
{ "block_popups", OPTION_BOOL, &option_block_popups }, \
{ "url_suggestion", OPTION_BOOL, &option_url_suggestion }, \
{ "image_memory_direct", OPTION_INTEGER, &option_image_memory_direct }, \
{ "image_memory_compressed",OPTION_INTEGER, &option_image_memory_compressed }
{ "image_memory_compressed",OPTION_INTEGER, &option_image_memory_compressed }, \
{ "strip_extensions", OPTION_BOOL, &option_strip_extensions }
#endif

View File

@ -29,6 +29,7 @@
#include "netsurf/image/bitmap.h"
#include "netsurf/riscos/gui.h"
#include "netsurf/riscos/menus.h"
#include "netsurf/riscos/options.h"
#include "netsurf/riscos/save_complete.h"
#include "netsurf/riscos/save_draw.h"
#include "netsurf/riscos/thumbnail.h"
@ -50,7 +51,8 @@ static bool using_dragasprite = true;
static bool saving_from_dialog = true;
static osspriteop_area *saveas_area = NULL;
static wimp_w gui_save_sourcew = (wimp_w)-1;
static char save_leafname[32];
#define LEAFNAME_MAX 200
static char save_leafname[LEAFNAME_MAX];
typedef enum { LINK_ACORN, LINK_ANT, LINK_TEXT } link_format;
@ -58,7 +60,8 @@ static bool ro_gui_save_complete(struct content *c, char *path);
static bool ro_gui_save_content(struct content *c, char *path);
static void ro_gui_save_object_native(struct content *c, char *path);
static bool ro_gui_save_link(struct content *c, link_format format, char *path);
static void ro_gui_save_set_state(struct content *c, gui_save_type save_type, char *leaf_buf, char *icon_buf);
static void ro_gui_save_set_state(struct content *c, gui_save_type save_type,
char *leaf_buf, char *icon_buf);
static bool ro_gui_save_create_thumbnail(struct content *c, const char *name);
@ -182,7 +185,7 @@ void ro_gui_saveas_quit(void)
void ro_gui_save_prepare(gui_save_type save_type, struct content *c)
{
char name_buf[64];
char name_buf[LEAFNAME_MAX];
char icon_buf[20];
assert((save_type == GUI_SAVE_HOTLIST_EXPORT_HTML) ||
@ -329,7 +332,8 @@ void gui_drag_save_selection(struct selection *s, struct gui_window *g)
gui_save_selection = s;
ro_gui_save_set_state(NULL, GUI_SAVE_TEXT_SELECTION, save_leafname, icon_buf);
ro_gui_save_set_state(NULL, GUI_SAVE_TEXT_SELECTION, save_leafname,
icon_buf);
gui_current_drag_type = GUI_DRAG_SAVE;
@ -873,19 +877,22 @@ bool ro_gui_save_link(struct content *c, link_format format, char *path)
*
* \param c content being saved
* \param save_type type of save operation being performed
* \param leaf_buf buffer to receive suggested leafname
* \param icon_buf buffer to receive sprite name
* \param leaf_buf buffer to receive suggested leafname, length at least
* LEAFNAME_MAX
* \param icon_buf buffer to receive sprite name, length at least 13
*/
void ro_gui_save_set_state(struct content *c, gui_save_type save_type, char *leaf_buf, char *icon_buf)
void ro_gui_save_set_state(struct content *c, gui_save_type save_type,
char *leaf_buf, char *icon_buf)
{
/* filename */
const char *name = gui_save_table[save_type].name;
url_func_result res;
bool done = false;
char *nice = NULL;
char *nice;
utf8_convert_ret err;
char *local_name;
size_t i;
/* parameters that we need to remember */
gui_save_current_type = save_type;
@ -897,13 +904,23 @@ void ro_gui_save_set_state(struct content *c, gui_save_type save_type, char *lea
gui_save_filetype = ro_content_filetype(c);
/* leafname */
if (c && (res = url_nice(c->url, (char **)&nice)) == URL_FUNC_OK)
if (c && (res = url_nice(c->url, &nice, option_strip_extensions)) ==
URL_FUNC_OK) {
for (i = 0; nice[i]; i++) {
if (nice[i] == '.')
nice[i] = '/';
else if (nice[i] <= ' ' ||
strchr(":*#$&@^%\\", nice[i]))
nice[i] = '_';
}
name = nice;
else
} else {
name = messages_get(name);
}
/* filename is utf8 */
strcpy(leaf_buf, name);
strncpy(leaf_buf, name, LEAFNAME_MAX);
leaf_buf[LEAFNAME_MAX - 1] = 0;
err = utf8_to_local_encoding(name, 0, &local_name);
if (err != UTF8_CONVERT_OK) {
@ -944,15 +961,17 @@ void ro_gui_save_set_state(struct content *c, gui_save_type save_type, char *lea
error = ro_gui_wimp_get_sprite(icon_buf, &sprite);
if (error) {
LOG(("ro_gui_wimp_get_sprite: 0x%x: %s",
error->errnum, error->errmess));
error->errnum, error->errmess));
warn_user("MiscError", error->errmess);
} else {
/* the sprite area should always be large enough for file_xxx sprites */
assert(sprite->size <= saveas_area->size - saveas_area->first);
/* the sprite area should always be large enough for
* file_xxx sprites */
assert(sprite->size <= saveas_area->size -
saveas_area->first);
memcpy((byte*)saveas_area + saveas_area->first,
sprite,
sprite->size);
sprite,
sprite->size);
saveas_area->sprite_count = 1;
saveas_area->used = saveas_area->first + sprite->size;

View File

@ -203,7 +203,7 @@ void theme_install_install(bool apply)
assert(theme_install_content);
if (url_nice(theme_install_descriptor.name, &theme_file) !=
if (url_nice(theme_install_descriptor.name, &theme_file, true) !=
URL_FUNC_OK) {
warn_user("ThemeInstallErr", 0);
theme_install_close();

View File

@ -15,6 +15,7 @@
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sys/types.h>
#include <regex.h>
#include "netsurf/utils/log.h"
@ -22,7 +23,7 @@
#include "netsurf/utils/utils.h"
regex_t url_re, url_up_re, url_nice_re;
regex_t url_re, url_up_re;
/**
* Initialise URL routines.
@ -49,18 +50,15 @@ void url_init(void)
"/([^/]|[.][^./]|[^./][.]|[^./][^./]|[^/][^/][^/]+)?"
"/[.][.](/|$)",
REG_EXTENDED);
regcomp_wrapper(&url_nice_re,
"^([^.]{0,4}[.])?([^.][^.][.])?([^/?&;.=]*)"
"(=[^/?&;.]*)?[/?&;.]",
REG_EXTENDED);
}
/**
* Normalize a URL.
*
* \param url an absolute URL
* \return cleaned up url, allocated on the heap, or 0 on failure
* \param url an absolute URL
* \param result pointer to pointer to buffer to hold cleaned up url
* \return URL_FUNC_OK on success
*
* If there is no scheme, http:// is added. The scheme and host are
* lower-cased. Default ports are removed (http only). An empty path is
@ -208,9 +206,10 @@ url_func_result url_normalize(const char *url, char **result)
/**
* Resolve a relative URL to absolute form.
*
* \param rel relative URL
* \param base base URL, must be absolute and cleaned as by url_normalize()
* \return an absolute URL, allocated on the heap, or 0 on failure
* \param rel relative URL
* \param base base URL, must be absolute and cleaned as by url_normalize()
* \param result pointer to pointer to buffer to hold absolute url
* \return URL_FUNC_OK on success
*/
url_func_result url_join(const char *rel, const char *base, char **result)
@ -433,8 +432,9 @@ step7: /* 7) */
/**
* Return the host name from an URL.
*
* \param url an absolute URL
* \returns host name allocated on heap, or 0 on failure
* \param url an absolute URL
* \param result pointer to pointer to buffer to hold host name
* \return URL_FUNC_OK on success
*/
url_func_result url_host(const char *url, char **result)
@ -459,20 +459,23 @@ url_func_result url_host(const char *url, char **result)
return URL_FUNC_NOMEM;
}
strncpy((*result), url + match[URL_RE_AUTHORITY].rm_so,
match[URL_RE_AUTHORITY].rm_eo - match[4].rm_so);
match[URL_RE_AUTHORITY].rm_eo -
match[URL_RE_AUTHORITY].rm_so);
(*result)[match[URL_RE_AUTHORITY].rm_eo -
match[URL_RE_AUTHORITY].rm_so] = 0;
return URL_FUNC_OK;
}
/**
* Return the scheme name from an URL
* Return the scheme name from an URL.
*
* \param url an absolute URL
* \param result pointer to pointer to buffer to hold scheme name
* \return URL_FUNC_OK on success
* \param url an absolute URL
* \param result pointer to pointer to buffer to hold scheme name
* \return URL_FUNC_OK on success
*/
url_func_result url_scheme(const char *url, char **result)
{
int m;
@ -494,7 +497,6 @@ url_func_result url_scheme(const char *url, char **result)
LOG(("malloc failed"));
return URL_FUNC_NOMEM;
}
strncpy((*result), url + match[URL_RE_SCHEME].rm_so,
match[URL_RE_SCHEME].rm_eo -
match[URL_RE_SCHEME].rm_so);
@ -503,106 +505,126 @@ url_func_result url_scheme(const char *url, char **result)
return URL_FUNC_OK;
}
/**
* Attempt to find a nice filename for a URL.
*
* \param url an absolute URL
* \returns filename allocated on heap, or 0 on memory exhaustion
* \param url an absolute URL
* \param result pointer to pointer to buffer to hold filename
* \param remove_extensions remove any extensions from the filename
* \return URL_FUNC_OK on success
*/
url_func_result url_nice(const char *url, char **result)
url_func_result url_nice(const char *url, char **result,
bool remove_extensions)
{
unsigned int i, j, k = 0, so;
unsigned int len;
const char *colon;
char buf[40];
char *rurl;
int m;
regmatch_t match[10];
regoff_t start, end;
size_t i;
char *dot;
/* just in case */
(*result) = 0;
*result = 0;
(*result) = malloc(40);
if (!(*result))
return URL_FUNC_NOMEM;
len = strlen(url);
assert(len != 0);
rurl = malloc(len + 1);
if (!rurl) {
free((*result));
return URL_FUNC_NOMEM;
m = regexec(&url_re, url, 10, match, 0);
if (m) {
LOG(("url '%s' failed to match regex", url));
return URL_FUNC_FAILED;
}
/* reverse url into rurl */
for (i = 0, j = len - 1; i != len; i++, j--)
rurl[i] = url[j];
rurl[len] = 0;
/* extract the last component of the path, if possible */
if (match[URL_RE_PATH].rm_so == -1 || match[URL_RE_PATH].rm_so ==
match[URL_RE_PATH].rm_eo)
goto no_path; /* no path, or empty */
for (end = match[URL_RE_PATH].rm_eo - 1;
end != match[URL_RE_PATH].rm_so && url[end] == '/';
end--)
;
if (end == match[URL_RE_PATH].rm_so)
goto no_path; /* path is a string of '/' */
end++;
for (start = end - 1;
start != match[URL_RE_PATH].rm_so && url[start] != '/';
start--)
;
if (url[start] == '/')
start++;
/* prepare a fallback: always succeeds */
colon = strchr(url, ':');
if (colon)
url = colon + 1;
strncpy((*result), url, 15);
(*result)[15] = 0;
for (i = 0; (*result)[i]; i++)
if (!isalnum((*result)[i]))
(*result)[i] = '_';
/* append nice pieces */
j = 0;
do {
m = regexec(&url_nice_re, rurl + j, 10, match, 0);
if (m)
break;
if (match[3].rm_so != match[3].rm_eo) {
so = match[3].rm_so;
i = match[3].rm_eo - so;
if (15 < i) {
so = match[3].rm_eo - 15;
i = 15;
}
if (15 < k + i)
break;
if (k)
k+=2;
strncpy(buf + k, rurl + j + so, i);
k += i;
buf[k] = 160; /* nbsp */
buf[k+1] = 0xc2; /* as UTF-8 */
}
j += match[0].rm_eo;
} while (j != len);
if (k == 0) {
free(rurl);
return URL_FUNC_OK;
if (!strncasecmp(url + start, "index.", 6) ||
!strncasecmp(url + start, "default.", 8)) {
/* try again */
if (start == match[URL_RE_PATH].rm_so)
goto no_path;
for (end = start - 1;
end != match[URL_RE_PATH].rm_so &&
url[end] == '/';
end--)
;
if (end == match[URL_RE_PATH].rm_so)
goto no_path;
end++;
for (start = end - 1;
start != match[URL_RE_PATH].rm_so &&
url[start] != '/';
start--)
;
if (url[start] == '/')
start++;
}
/* reverse back */
for (i = 0, j = k - 1; i != k; i++, j--)
(*result)[i] = buf[j];
(*result)[k] = 0;
*result = malloc(end - start + 1);
if (!*result) {
LOG(("malloc failed"));
return URL_FUNC_NOMEM;
}
strncpy(*result, url + start, end - start);
(*result)[end - start] = 0;
for (i = 0; i != k; i++)
if ((*result)[i] != (char) 0xa0 && !isalnum((*result)[i]))
(*result)[i] = '_';
free(rurl);
if (remove_extensions) {
dot = strchr(*result, '.');
if (dot && dot != *result)
*dot = 0;
}
return URL_FUNC_OK;
no_path:
/* otherwise, use the host name, with '.' replaced by '_' */
if (match[URL_RE_AUTHORITY].rm_so != -1 &&
match[URL_RE_AUTHORITY].rm_so !=
match[URL_RE_AUTHORITY].rm_eo) {
*result = malloc(match[URL_RE_AUTHORITY].rm_eo -
match[URL_RE_AUTHORITY].rm_so + 1);
if (!*result) {
LOG(("malloc failed"));
return URL_FUNC_NOMEM;
}
strncpy(*result, url + match[URL_RE_AUTHORITY].rm_so,
match[URL_RE_AUTHORITY].rm_eo -
match[URL_RE_AUTHORITY].rm_so);
(*result)[match[URL_RE_AUTHORITY].rm_eo -
match[URL_RE_AUTHORITY].rm_so] = 0;
for (i = 0; (*result)[i]; i++)
if ((*result)[i] == '.')
(*result)[i] = '_';
return URL_FUNC_OK;
}
return URL_FUNC_FAILED;
}
/**
* Escape a string suitable for inclusion in an URI
* Escape a string suitable for inclusion in an URL.
*
* \param unescaped The unescaped string
* \param result Pointer to location to store escaped string
* \return URL_FUNC_OK on success
* \param unescaped the unescaped string
* \param result pointer to pointer to buffer to hold escaped string
* \return URL_FUNC_OK on success
*/
url_func_result url_escape(const char *unescaped, char **result)
{
int len;
@ -649,6 +671,7 @@ url_func_result url_escape(const char *unescaped, char **result)
return URL_FUNC_OK;
}
#ifdef TEST
int main(int argc, char *argv[])
@ -670,19 +693,30 @@ int main(int argc, char *argv[])
printf("<== '%s'\n", s);
free(s);
}*/
if (1 != i) {
/* if (1 != i) {
res = url_join(argv[i], argv[1], &s);
if (res == URL_FUNC_OK) {
printf("'%s' + '%s' \t= '%s'\n", argv[1],
argv[i], s);
free(s);
}
}
/* res = url_nice(argv[i], &s);
if (res == URL_FUNC_OK) {
printf("'%s'\n", s);
free(s);
}*/
printf("'%s' => ", argv[i]);
res = url_nice(argv[i], &s, true);
if (res == URL_FUNC_OK) {
printf("'%s', ", s);
free(s);
} else {
printf("failed %u, ", res);
}
res = url_nice(argv[i], &s, false);
if (res == URL_FUNC_OK) {
printf("'%s', ", s);
free(s);
} else {
printf("failed %u, ", res);
}
printf("\n");
}
return 0;
}

View File

@ -24,7 +24,8 @@ url_func_result url_normalize(const char *url, char **result);
url_func_result url_join(const char *rel, const char *base, char **result);
url_func_result url_host(const char *url, char **result);
url_func_result url_scheme(const char *url, char **result);
url_func_result url_nice(const char *url, char **result);
url_func_result url_nice(const char *url, char **result,
bool remove_extensions);
url_func_result url_escape(const char *unescaped, char **result);
#endif