Core part of search improvement:

+ Split up browser window and content related elements of html & text search.
+ Associate search contexts with contents, rather than browser windows.
+ Remove highlighting of search terms dependency on current_redraw_browser.
+ Fixes issues with search contexts being applied to the wrong content.


svn path=/trunk/netsurf/; revision=12646
This commit is contained in:
Michael Drake 2011-08-24 12:29:30 +00:00
parent 85ec83636d
commit 63c07ee28d
13 changed files with 934 additions and 684 deletions

View File

@ -13,8 +13,8 @@ S_CSS := css.c dump.c internal.c select.c utils.c
S_RENDER := box.c box_construct.c box_normalise.c \
font.c form.c html.c html_interaction.c html_redraw.c \
hubbub_binding.c imagemap.c layout.c list.c table.c textinput.c \
textplain.c
hubbub_binding.c imagemap.c layout.c list.c search.c table.c \
textinput.c textplain.c
S_UTILS := base64.c filename.c hashtable.c locale.c messages.c \
talloc.c url.c utf8.c utils.c useragent.c filepath.c log.c

View File

@ -486,6 +486,7 @@ void browser_window_initialise_common(struct browser_window *bw,
/* window characteristics */
bw->cur_sel = NULL;
bw->cur_search = NULL;
bw->refresh_interval = -1;
bw->reformat_pending = false;

View File

@ -187,12 +187,12 @@ struct browser_window {
/** Last time a link was followed in this window */
unsigned int last_action;
/** search context for free text search */
struct search_context *search_context;
/** Content with current selection, or NULL if none */
/** Current selection, or NULL if none */
struct selection *cur_sel;
/** Current context for free text search, or NULL if none */
struct search_context *cur_search;
/** cache of the currently displayed status text. */
char *status_text; /**< Current status bar text. */
int status_text_len; /**< Length of the ::status_text buffer. */

View File

@ -34,6 +34,7 @@
#include "desktop/selection.h"
#include "render/box.h"
#include "render/html.h"
#include "render/search.h"
#include "render/textplain.h"
#include "utils/config.h"
#include "utils/log.h"
@ -42,102 +43,39 @@
#include "utils/utils.h"
#ifndef NOF_ELEMENTS
#define NOF_ELEMENTS(array) (sizeof(array)/sizeof(*(array)))
#endif
struct list_entry {
unsigned start_idx; /* start position of match */
unsigned end_idx; /* end of match */
struct box *start_box; /* used only for html contents */
struct box *end_box;
struct selection *sel;
struct list_entry *prev;
struct list_entry *next;
};
struct search_context {
struct browser_window *bw;
char *string;
bool prev_case_sens;
bool newsearch;
bool insert;
void *p; /* front-specific data */
struct search_callbacks *callbacks;
struct list_entry *found;
struct list_entry *current; /* first for select all */
};
static void search_text(const char *string, int string_len,
struct search_context *context, search_flags_t flags);
static const char *find_pattern(const char *string, int s_len,
const char *pattern, int p_len, bool case_sens,
unsigned int *m_len);
static bool find_occurrences_html(const char *pattern, int p_len,
struct box *cur, bool case_sens,
struct search_context *context);
static bool find_occurrences_text(const char *pattern, int p_len,
struct content *c, bool case_sens,
struct search_context *context);
static struct list_entry *add_entry(unsigned start_idx, unsigned end_idx,
struct search_context *context);
static void free_matches(struct search_context *context);
/**
* create a search_context
* \param bw the browser_window the search_context is connected to
* \param callbacks the callbacks to modify appearance according to results
* \param p the pointer to send to the callbacks
* \return true for success
*/
bool search_create_context(struct browser_window *bw,
bool browser_window_search_create_context(struct browser_window *bw,
struct search_callbacks *callbacks, void *p)
{
struct search_context *context;
struct list_entry *search_head;
context = malloc(sizeof(struct search_context));
if (context == NULL) {
warn_user("NoMemory", 0);
return false;
}
search_head = malloc(sizeof(struct list_entry));
if (search_head == NULL) {
warn_user("NoMemory", 0);
free(context);
return false;
}
if (bw->search_context != NULL)
search_destroy_context(bw->search_context);
assert(bw != NULL);
if (bw->cur_search != NULL)
search_destroy_context(bw->cur_search);
bw->cur_search = NULL;
if (!bw->current_content)
return false;
bw->cur_search = search_create_context(bw->current_content,
callbacks, p);
if (bw->cur_search == NULL)
return false;
search_head->start_idx = 0;
search_head->end_idx = 0;
search_head->start_box = NULL;
search_head->end_box = NULL;
search_head->sel = NULL;
search_head->prev = NULL;
search_head->next = NULL;
context->found = search_head;
context->current = NULL;
context->string = NULL;
context->prev_case_sens = false;
context->newsearch = true;
context->insert = true;
context->bw = bw;
context->callbacks = callbacks;
context->p = p;
bw->search_context = context;
return true;
}
void browser_window_search_destroy_context(struct browser_window *bw)
{
assert(bw != NULL);
if (bw->cur_search != NULL)
search_destroy_context(bw->cur_search);
bw->cur_search = NULL;
}
/**
* to simplify calls to search_step(); checks that the browser_window is
* non-NULL, creates a new search_context in case of a new search
@ -146,579 +84,33 @@ bool search_create_context(struct browser_window *bw,
* \param p a pointer returned to the callbacks
* \return true for success
*/
bool search_verify_new(struct browser_window *bw,
bool browser_window_search_verify_new(struct browser_window *bw,
struct search_callbacks *callbacks, void *p)
{
if (bw == NULL)
return false;
if (bw->search_context == NULL)
return search_create_context(bw, callbacks, p);
return true;
}
/**
* Begins/continues the search process
* Note that this may be called many times for a single search.
*
* \param bw the browser_window to search in
* \param flags the flags forward/back etc
* \param string the string to match
*/
void search_step(struct search_context *context, search_flags_t flags,
const char *string)
{
int string_len;
int i = 0;
if ((context == NULL) || (context->callbacks == NULL)) {
warn_user("SearchError", 0);
return;
}
if (context->callbacks->add_recent != NULL)
context->callbacks->add_recent(string, context->p);
string_len = strlen(string);
for(i = 0; i < string_len; i++)
if (string[i] != '#' && string[i] != '*') break;
if (i >= string_len) {
free_matches(context);
if (context->callbacks->status != NULL)
context->callbacks->status(true, context->p);
if (context->callbacks->back_state != NULL)
context->callbacks->back_state(false, context->p);
if (context->callbacks->forward_state != NULL)
context->callbacks->forward_state(false, context->p);
browser_window_set_scroll(context->bw, 0, 0);
return;
}
search_text(string, string_len, context, flags);
}
/**
* Release the memory used by the list of matches,
* deleting selection objects too
*/
void free_matches(struct search_context *context)
{
struct list_entry *a;
struct list_entry *b;
a = context->found->next;
/* empty the list before clearing and deleting the
selections because the the clearing updates the
screen immediately, causing nested accesses to the list */
context->found->prev = NULL;
context->found->next = NULL;
for (; a; a = b) {
b = a->next;
if (a->sel) {
selection_clear(a->sel, true);
selection_destroy(a->sel);
}
free(a);
}
}
/**
* Search for a string in the box tree
*
* \param string the string to search for
* \param string_len length of search string
*/
void search_text(const char *string, int string_len,
struct search_context *context, search_flags_t flags)
{
struct rect bounds;
hlcache_handle *h;
struct box *box = NULL;
bool case_sensitive, forwards, showall;
case_sensitive = ((flags & SEARCH_FLAG_CASE_SENSITIVE) != 0) ?
true : false;
forwards = ((flags & SEARCH_FLAG_FORWARDS) != 0) ? true : false;
showall = ((flags & SEARCH_FLAG_SHOWALL) != 0) ? true : false;
if (context->bw == NULL)
return;
h = context->bw->current_content;
/* only handle html contents */
if ((!h) || (content_get_type(h) != CONTENT_HTML &&
content_get_type(h) != CONTENT_TEXTPLAIN))
return;
if (content_get_type(h) == CONTENT_HTML) {
box = html_get_box_tree(h);
if (!box)
return;
}
/* LOG(("do_search '%s' - '%s' (%p, %p) %p (%d, %d) %d",
search_data.string, string, search_data.content, c, search_data.found->next,
search_data.prev_case_sens, case_sens, forwards)); */
/* check if we need to start a new search or continue an old one */
if (context->newsearch) {
bool res;
if (context->string != NULL)
free(context->string);
context->current = NULL;
free_matches(context);
context->string = malloc(string_len + 1);
if (context->string != NULL) {
memcpy(context->string, string, string_len);
context->string[string_len] = '\0';
}
if ((context->callbacks != NULL) &&
(context->callbacks->hourglass != NULL))
context->callbacks->hourglass(true, context->p);
if (content_get_type(h) == CONTENT_HTML)
res = find_occurrences_html(string, string_len,
box, case_sensitive, context);
else {
assert(content_get_type(h) == CONTENT_TEXTPLAIN);
res = find_occurrences_text(string, string_len,
hlcache_handle_get_content(h),
case_sensitive, context);
}
if (!res) {
free_matches(context);
if ((context->callbacks != NULL) &&
(context->callbacks->hourglass !=
NULL))
context->callbacks->hourglass(false,
context->p);
return;
}
if ((context->callbacks != NULL) &&
(context->callbacks->hourglass != NULL))
context->callbacks->hourglass(false, context->p);
context->prev_case_sens = case_sensitive;
/* LOG(("%d %p %p (%p, %p)", new, search_data.found->next, search_data.current,
search_data.current->prev, search_data.current->next)); */
/* new search, beginning at the top of the page */
context->current = context->found->next;
context->newsearch = false;
}
else if (context->current != NULL) {
/* continued search in the direction specified */
if (forwards) {
if (context->current->next)
context->current = context->current->next;
}
else {
if (context->current->prev)
context->current = context->current->prev;
}
}
if (context->callbacks == NULL)
return;
if (context->callbacks->status != NULL)
context->callbacks->status((context->current != NULL),
context->p);
search_show_all(showall, context);
if (context->callbacks->back_state != NULL)
context->callbacks->back_state((context->current != NULL) &&
(context->current->prev != NULL),
context->p);
if (context->callbacks->forward_state != NULL)
context->callbacks->forward_state((context->current != NULL) &&
(context->current->next != NULL), context->p);
if (context->current == NULL)
return;
switch (content_get_type(h)) {
case CONTENT_HTML:
/* get box position and jump to it */
box_coords(context->current->start_box,
&bounds.x0, &bounds.y0);
/* \todo: move x0 in by correct idx */
box_coords(context->current->end_box,
&bounds.x1, &bounds.y1);
/* \todo: move x1 in by correct idx */
bounds.x1 += context->current->end_box->width;
bounds.y1 += context->current->end_box->height;
break;
default:
assert(content_get_type(h) == CONTENT_TEXTPLAIN);
textplain_coords_from_range(
hlcache_handle_get_content(h),
context->current->start_idx,
context->current->end_idx, &bounds);
break;
}
browser_window_scroll_visible(context->bw, &bounds);
}
/**
* Find the first occurrence of 'match' in 'string' and return its index
*
* \param string the string to be searched (unterminated)
* \param s_len length of the string to be searched
* \param pattern the pattern for which we are searching (unterminated)
* \param p_len length of pattern
* \param case_sens true iff case sensitive match required
* \param m_len accepts length of match in bytes
* \return pointer to first match, NULL if none
*/
const char *find_pattern(const char *string, int s_len, const char *pattern,
int p_len, bool case_sens, unsigned int *m_len)
{
struct { const char *ss, *s, *p; bool first; } context[16];
const char *ep = pattern + p_len;
const char *es = string + s_len;
const char *p = pattern - 1; /* a virtual '*' before the pattern */
const char *ss = string;
const char *s = string;
bool first = true;
int top = 0;
while (p < ep) {
bool matches;
if (p < pattern || *p == '*') {
char ch;
/* skip any further asterisks; one is the same as many
*/
do p++; while (p < ep && *p == '*');
/* if we're at the end of the pattern, yes, it matches
*/
if (p >= ep) break;
/* anything matches a # so continue matching from
here, and stack a context that will try to match
the wildcard against the next character */
ch = *p;
if (ch != '#') {
/* scan forwards until we find a match for
this char */
if (!case_sens) ch = toupper(ch);
while (s < es) {
if (case_sens) {
if (*s == ch) break;
} else if (toupper(*s) == ch)
break;
s++;
}
}
if (s < es) {
/* remember where we are in case the match
fails; we may then resume */
if (top < (int)NOF_ELEMENTS(context)) {
context[top].ss = ss;
context[top].s = s + 1;
context[top].p = p - 1;
/* ptr to last asterisk */
context[top].first = first;
top++;
}
if (first) {
ss = s;
/* remember first non-'*' char */
first = false;
}
matches = true;
}
else
matches = false;
}
else if (s < es) {
char ch = *p;
if (ch == '#')
matches = true;
else {
if (case_sens)
matches = (*s == ch);
else
matches = (toupper(*s) == toupper(ch));
}
if (matches && first) {
ss = s; /* remember first non-'*' char */
first = false;
}
}
else
matches = false;
if (matches) {
p++; s++;
}
else {
/* doesn't match, resume with stacked context if we have one */
if (--top < 0) return NULL; /* no match, give up */
ss = context[top].ss;
s = context[top].s;
p = context[top].p;
first = context[top].first;
}
}
/* end of pattern reached */
*m_len = max(s - ss, 1);
return ss;
}
/**
* Finds all occurrences of a given string in the html box tree
*
* \param pattern the string pattern to search for
* \param p_len pattern length
* \param cur pointer to the current box
* \param case_sens whether to perform a case sensitive search
* \return true on success, false on memory allocation failure
*/
bool find_occurrences_html(const char *pattern, int p_len, struct box *cur,
bool case_sens, struct search_context *context)
{
struct box *a;
/* ignore this box, if there's no visible text */
if (!cur->object && cur->text) {
const char *text = cur->text;
unsigned length = cur->length;
while (length > 0) {
struct list_entry *entry;
unsigned match_length;
unsigned match_offset;
const char *new_text;
const char *pos = find_pattern(text, length,
pattern, p_len, case_sens,
&match_length);
if (!pos) break;
/* found string in box => add to list */
match_offset = pos - cur->text;
entry = add_entry(cur->byte_offset + match_offset,
cur->byte_offset +
match_offset +
match_length, context);
if (!entry)
return false;
entry->start_box = cur;
entry->end_box = cur;
new_text = pos + match_length;
length -= (new_text - text);
text = new_text;
}
}
/* and recurse */
for (a = cur->children; a; a = a->next) {
if (!find_occurrences_html(pattern, p_len, a, case_sens,
context))
return false;
}
if (bw->cur_search == NULL)
return browser_window_search_create_context(bw, callbacks, p);
return true;
}
/**
* Finds all occurrences of a given string in a textplain content
*
* \param pattern the string pattern to search for
* \param p_len pattern length
* \param c the content to be searched
* \param case_sens wheteher to perform a case sensitive search
* \return true on success, false on memory allocation failure
*/
bool find_occurrences_text(const char *pattern, int p_len,
struct content *c, bool case_sens,
struct search_context *context)
void browser_window_search_step(struct browser_window *bw,
search_flags_t flags, const char *string)
{
int nlines = textplain_line_count(c);
int line;
assert(bw != NULL);
for(line = 0; line < nlines; line++) {
size_t offset, length;
const char *text = textplain_get_line(c, line,
&offset, &length);
if (text) {
while (length > 0) {
struct list_entry *entry;
unsigned match_length;
size_t start_idx;
const char *new_text;
const char *pos = find_pattern(text, length,
pattern, p_len, case_sens,
&match_length);
if (!pos) break;
/* found string in line => add to list */
start_idx = offset + (pos - text);
entry = add_entry(start_idx, start_idx +
match_length, context);
if (!entry)
return false;
new_text = pos + match_length;
offset += (new_text - text);
length -= (new_text - text);
text = new_text;
}
}
}
return true;
if (bw->cur_search != NULL)
search_step(bw->cur_search, flags, string);
}
/**
* Determines whether any portion of the given text box should be
* selected because it matches the current search string.
*
* \param bw browser window
* \param start_offset byte offset within text of string to be checked
* \param end_offset byte offset within text
* \param start_idx byte offset within string of highlight start
* \param end_idx byte offset of highlight end
* \return true iff part of the box should be highlighted
*/
bool search_term_highlighted(struct browser_window *bw,
unsigned start_offset, unsigned end_offset,
unsigned *start_idx, unsigned *end_idx,
struct search_context *context)
void browser_window_search_show_all(bool all, struct browser_window *bw)
{
if (bw == context->bw) {
struct list_entry *a;
for(a = context->found->next; a; a = a->next)
if (a->sel && selection_defined(a->sel) &&
selection_highlighted(a->sel,
start_offset, end_offset,
start_idx, end_idx))
return true;
}
assert(bw != NULL);
return false;
}
/**
* Specifies whether all matches or just the current match should
* be highlighted in the search text.
*/
void search_show_all(bool all, struct search_context *context)
{
struct list_entry *a;
for (a = context->found->next; a; a = a->next) {
bool add = true;
if (!all && a != context->current) {
add = false;
if (a->sel) {
selection_clear(a->sel, true);
selection_destroy(a->sel);
a->sel = NULL;
}
}
if (add && !a->sel) {
hlcache_handle *h = context->bw->current_content;
struct content *c = hlcache_handle_get_content(h);
switch (content_get_type(h)) {
case CONTENT_HTML:
a->sel = selection_create(c, true);
if (!a->sel)
continue;
selection_init(a->sel, html_get_box_tree(h));
break;
default:
assert(content_get_type(h) ==
CONTENT_TEXTPLAIN);
a->sel = selection_create(c, false);
if (!a->sel)
continue;
selection_init(a->sel, NULL);
break;
}
selection_set_start(a->sel, a->start_idx);
selection_set_end(a->sel, a->end_idx);
}
}
}
/**
* Add a new entry to the list of matches
*
* \param start_idx offset of match start within textual representation
* \param end_idx offset of match end
* \return pointer to added entry, NULL iff failed
*/
struct list_entry *add_entry(unsigned start_idx, unsigned end_idx,
struct search_context *context)
{
struct list_entry *entry;
/* found string in box => add to list */
entry = calloc(1, sizeof(*entry));
if (!entry) {
warn_user("NoMemory", 0);
return NULL;
}
entry->start_idx = start_idx;
entry->end_idx = end_idx;
entry->sel = NULL;
entry->next = 0;
entry->prev = context->found->prev;
if (context->found->prev == NULL)
context->found->next = entry;
else
context->found->prev->next = entry;
context->found->prev = entry;
return entry;
}
/**
* Ends the search process, invalidating all state
* freeing the list of found boxes
*/
void search_destroy_context(struct search_context *context)
{
if (context->bw != NULL)
context->bw->search_context = NULL;
if ((context->string != NULL) && (context->callbacks != NULL) &&
(context->callbacks->add_recent != NULL)) {
context->callbacks->add_recent(context->string, context->p);
free(context->string);
}
free_matches(context);
free(context);
if (bw->cur_search != NULL)
search_show_all(all, bw->cur_search);
}

View File

@ -29,11 +29,6 @@ typedef enum {
SEARCH_FLAG_FORWARDS = (1 << 1),
SEARCH_FLAG_SHOWALL = (1 << 2)
} search_flags_t;
/**
* called to clear the context; 'renews' the search too
*/
void search_destroy_context(struct search_context *context);
/**
* Change the displayed search status.
@ -80,17 +75,14 @@ struct search_callbacks {
search_add_recent_callback add_recent;
};
bool search_verify_new(struct browser_window *bw,
struct search_callbacks *callbacks, void *p);
void search_step(struct search_context *context, search_flags_t flags,
const char * string);
bool search_create_context(struct browser_window *bw,
struct search_callbacks *callbacks, void *p);
void search_show_all(bool all, struct search_context *context);
bool search_term_highlighted(struct browser_window *bw,
unsigned start_offset, unsigned end_offset,
unsigned *start_idx, unsigned *end_idx,
struct search_context *context);
bool browser_window_search_create_context(struct browser_window *bw,
struct search_callbacks *callbacks, void *p);
void browser_window_search_destroy_context(struct browser_window *bw);
bool browser_window_search_verify_new(struct browser_window *bw,
struct search_callbacks *callbacks, void *p);
void browser_window_search_step(struct browser_window *bw,
search_flags_t flags, const char *string);
void browser_window_search_show_all(bool all, struct browser_window *bw);
#endif

View File

@ -42,6 +42,7 @@
#include "render/html_internal.h"
#include "render/imagemap.h"
#include "render/layout.h"
#include "render/search.h"
#include "utils/http.h"
#include "utils/log.h"
#include "utils/messages.h"
@ -75,6 +76,7 @@ static void html_open(struct content *c, struct browser_window *bw,
struct object_params *params);
static void html_close(struct content *c);
struct selection *html_get_selection(struct content *c);
struct search_context *html_get_search(struct content *c);
static nserror html_clone(const struct content *old, struct content **newc);
static content_type html_content_type(lwc_string *mime_type);
@ -2077,6 +2079,9 @@ void html_close(struct content *c)
html_content *html = (html_content *) c;
struct content_html_object *object, *next;
if (html->search != NULL)
search_destroy_context(html->search);
html->bw = NULL;
for (object = html->object_list; object != NULL; object = next) {
@ -2107,6 +2112,37 @@ struct selection *html_get_selection(struct content *c)
return &html->sel;
}
/**
* Set an HTML content's search context
*
* \param c content of type html
* \param s search context, or NULL if none
*/
void html_set_search(struct content *c, struct search_context *s)
{
html_content *html = (html_content *) c;
html->search = s;
}
/**
* Return an HTML content's search context
*
* \param c content of type html
* \return content's search context, or NULL if none
*/
struct search_context *html_get_search(struct content *c)
{
html_content *html = (html_content *) c;
return html->search;
}
#if ALWAYS_DUMP_FRAMESET
/**
* Print a frameset tree to stderr.

View File

@ -159,7 +159,9 @@ bool text_redraw(const char *utf8_text, size_t utf8_len,
int height,
float scale,
bool excluded,
struct content *c,
const struct selection *sel,
struct search_context *search,
const struct redraw_context *ctx);
xmlDoc *html_get_document(struct hlcache_handle *h);

View File

@ -99,6 +99,9 @@ typedef struct html_content {
/** Selection state */
struct selection sel;
/** Context for free text search, or NULL if none */
struct search_context *search;
} html_content;
@ -112,6 +115,7 @@ void html_set_status(html_content *c, const char *extra);
void html__redraw_a_box(struct content *c, struct box *box);
struct browser_window *html_get_browser_window(struct content *c);
void html_set_search(struct content *c, struct search_context *s);
/* in render/html_redraw.c */
bool html_redraw(struct content *c, struct content_redraw_data *data,

View File

@ -39,7 +39,6 @@
#include "desktop/selection.h"
#include "desktop/options.h"
#include "desktop/print.h"
#include "desktop/search.h"
#include "desktop/scrollbar.h"
#include "image/bitmap.h"
#include "render/box.h"
@ -47,6 +46,7 @@
#include "render/form.h"
#include "render/html_internal.h"
#include "render/layout.h"
#include "render/search.h"
#include "utils/log.h"
#include "utils/messages.h"
#include "utils/utils.h"
@ -831,7 +831,9 @@ bool html_redraw_text_box(const html_content *html, struct box *box,
if (!text_redraw(box->text, box->length, box->byte_offset,
box->space, &fstyle, x, y,
clip, box->height, scale, excluded, &html->sel, ctx))
clip, box->height, scale, excluded,
(struct content *)html, &html->sel,
html->search, ctx))
return false;
return true;
@ -859,7 +861,8 @@ bool html_redraw_text_box(const html_content *html, struct box *box,
bool text_redraw(const char *utf8_text, size_t utf8_len,
size_t offset, int space, const plot_font_style_t *fstyle,
int x, int y, const struct rect *clip, int height,
float scale, bool excluded, const struct selection *sel,
float scale, bool excluded, struct content *c,
const struct selection *sel, struct search_context *search,
const struct redraw_context *ctx)
{
const struct plotter_table *plot = ctx->plot;
@ -883,14 +886,11 @@ bool text_redraw(const char *utf8_text, size_t utf8_len,
}
/* what about the current search operation, if any? */
if (!highlighted && (current_redraw_browser->search_context
!= NULL) &&
search_term_highlighted(
current_redraw_browser,
if (!highlighted && (search != NULL) &&
search_term_highlighted(c,
offset, offset + len,
&start_idx, &end_idx,
current_redraw_browser->
search_context)) {
search)) {
highlighted = true;
}

739
render/search.c Normal file
View File

@ -0,0 +1,739 @@
/*
* Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
* Copyright 2005 Adrian Lees <adrianl@users.sourceforge.net>
* Copyright 2009 Mark Benjamin <netsurf-browser.org.MarkBenjamin@dfgh.net>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file
* Free text search (core)
*/
#include "utils/config.h"
#include <ctype.h>
#include <string.h>
#include "content/content.h"
#include "content/hlcache.h"
#include "desktop/browser.h"
#include "desktop/gui.h"
#include "desktop/options.h"
#include "desktop/selection.h"
#include "render/box.h"
#include "render/html.h"
#include "render/html_internal.h"
#include "render/search.h"
#include "render/textplain.h"
#include "utils/config.h"
#include "utils/log.h"
#include "utils/messages.h"
#include "utils/url.h"
#include "utils/utils.h"
#ifndef NOF_ELEMENTS
#define NOF_ELEMENTS(array) (sizeof(array)/sizeof(*(array)))
#endif
struct list_entry {
unsigned start_idx; /* start position of match */
unsigned end_idx; /* end of match */
struct box *start_box; /* used only for html contents */
struct box *end_box;
struct selection *sel;
struct list_entry *prev;
struct list_entry *next;
};
struct search_context {
struct content *c;
char *string;
bool prev_case_sens;
bool newsearch;
bool is_html;
void *p; /* front-specific data */
struct search_callbacks *callbacks;
struct list_entry *found;
struct list_entry *current; /* first for select all */
};
static void search_text(const char *string, int string_len,
struct search_context *context, search_flags_t flags);
static const char *find_pattern(const char *string, int s_len,
const char *pattern, int p_len, bool case_sens,
unsigned int *m_len);
static bool find_occurrences_html(const char *pattern, int p_len,
struct box *cur, bool case_sens,
struct search_context *context);
static bool find_occurrences_text(const char *pattern, int p_len,
struct content *c, bool case_sens,
struct search_context *context);
static struct list_entry *add_entry(unsigned start_idx, unsigned end_idx,
struct search_context *context);
static void free_matches(struct search_context *context);
/**
* Find the browser window that contains the content associated with a search
*
* \param search search context
* \return the browser window, or NULL if none
*/
static struct browser_window *search_get_browser_window(
struct search_context *search)
{
if (search->c != NULL) {
if (search->is_html == true) {
return html_get_browser_window(search->c);
} else {
return textplain_get_browser_window(search->c);
}
}
return NULL;
}
/**
* create a search_context
* \param bw the browser_window the search_context is connected to
* \param callbacks the callbacks to modify appearance according to results
* \param p the pointer to send to the callbacks
* \return true for success
*/
struct search_context * search_create_context(hlcache_handle *h,
struct search_callbacks *callbacks, void *p)
{
struct search_context *context;
struct list_entry *search_head;
struct content *c = hlcache_handle_get_content(h);
if (h == NULL)
return NULL;
if (content_get_type(h) != CONTENT_HTML &&
content_get_type(h) != CONTENT_TEXTPLAIN) {
return NULL;
}
context = malloc(sizeof(struct search_context));
if (context == NULL) {
warn_user("NoMemory", 0);
return NULL;
}
search_head = malloc(sizeof(struct list_entry));
if (search_head == NULL) {
warn_user("NoMemory", 0);
free(context);
return NULL;
}
search_head->start_idx = 0;
search_head->end_idx = 0;
search_head->start_box = NULL;
search_head->end_box = NULL;
search_head->sel = NULL;
search_head->prev = NULL;
search_head->next = NULL;
context->found = search_head;
context->current = NULL;
context->string = NULL;
context->prev_case_sens = false;
context->newsearch = true;
context->c = c;
context->is_html = (content_get_type(h) == CONTENT_HTML) ? true : false;
context->callbacks = callbacks;
context->p = p;
if (context->is_html) {
html_set_search(context->c, context);
} else {
textplain_set_search(context->c, context);
}
return context;
}
/**
* Begins/continues the search process
* Note that this may be called many times for a single search.
*
* \param bw the browser_window to search in
* \param flags the flags forward/back etc
* \param string the string to match
*/
void search_step(struct search_context *context, search_flags_t flags,
const char *string)
{
int string_len;
int i = 0;
if ((context == NULL) || (context->callbacks == NULL)) {
warn_user("SearchError", 0);
return;
}
if (context->callbacks->add_recent != NULL)
context->callbacks->add_recent(string, context->p);
string_len = strlen(string);
for(i = 0; i < string_len; i++)
if (string[i] != '#' && string[i] != '*') break;
if (i >= string_len) {
struct browser_window *bw;
free_matches(context);
if (context->callbacks->status != NULL)
context->callbacks->status(true, context->p);
if (context->callbacks->back_state != NULL)
context->callbacks->back_state(false, context->p);
if (context->callbacks->forward_state != NULL)
context->callbacks->forward_state(false, context->p);
bw = search_get_browser_window(context);
if (bw != NULL)
browser_window_set_scroll(bw, 0, 0);
return;
}
search_text(string, string_len, context, flags);
}
/**
* Release the memory used by the list of matches,
* deleting selection objects too
*/
void free_matches(struct search_context *context)
{
struct list_entry *a;
struct list_entry *b;
a = context->found->next;
/* empty the list before clearing and deleting the
selections because the the clearing updates the
screen immediately, causing nested accesses to the list */
context->found->prev = NULL;
context->found->next = NULL;
for (; a; a = b) {
b = a->next;
if (a->sel) {
selection_clear(a->sel, true);
selection_destroy(a->sel);
}
free(a);
}
}
/**
* Search for a string in the box tree
*
* \param string the string to search for
* \param string_len length of search string
*/
void search_text(const char *string, int string_len,
struct search_context *context, search_flags_t flags)
{
struct rect bounds;
struct box *box = NULL;
struct browser_window *bw;
bool case_sensitive, forwards, showall;
case_sensitive = ((flags & SEARCH_FLAG_CASE_SENSITIVE) != 0) ?
true : false;
forwards = ((flags & SEARCH_FLAG_FORWARDS) != 0) ? true : false;
showall = ((flags & SEARCH_FLAG_SHOWALL) != 0) ? true : false;
if (context->c == NULL)
return;
if (context->is_html == true) {
html_content *html = (html_content *)context->c;
box = html->layout;
if (!box)
return;
}
/* LOG(("do_search '%s' - '%s' (%p, %p) %p (%d, %d) %d",
search_data.string, string, search_data.content, c, search_data.found->next,
search_data.prev_case_sens, case_sens, forwards)); */
/* check if we need to start a new search or continue an old one */
if (context->newsearch) {
bool res;
if (context->string != NULL)
free(context->string);
context->current = NULL;
free_matches(context);
context->string = malloc(string_len + 1);
if (context->string != NULL) {
memcpy(context->string, string, string_len);
context->string[string_len] = '\0';
}
if ((context->callbacks != NULL) &&
(context->callbacks->hourglass != NULL))
context->callbacks->hourglass(true, context->p);
if (context->is_html == true) {
res = find_occurrences_html(string, string_len,
box, case_sensitive, context);
} else {
res = find_occurrences_text(string, string_len,
context->c, case_sensitive, context);
}
if (!res) {
free_matches(context);
if ((context->callbacks != NULL) &&
(context->callbacks->hourglass !=
NULL))
context->callbacks->hourglass(false,
context->p);
return;
}
if ((context->callbacks != NULL) &&
(context->callbacks->hourglass != NULL))
context->callbacks->hourglass(false, context->p);
context->prev_case_sens = case_sensitive;
/* LOG(("%d %p %p (%p, %p)", new, search_data.found->next, search_data.current,
search_data.current->prev, search_data.current->next)); */
/* new search, beginning at the top of the page */
context->current = context->found->next;
context->newsearch = false;
}
else if (context->current != NULL) {
/* continued search in the direction specified */
if (forwards) {
if (context->current->next)
context->current = context->current->next;
}
else {
if (context->current->prev)
context->current = context->current->prev;
}
}
if (context->callbacks == NULL)
return;
if (context->callbacks->status != NULL)
context->callbacks->status((context->current != NULL),
context->p);
search_show_all(showall, context);
if (context->callbacks->back_state != NULL)
context->callbacks->back_state((context->current != NULL) &&
(context->current->prev != NULL),
context->p);
if (context->callbacks->forward_state != NULL)
context->callbacks->forward_state((context->current != NULL) &&
(context->current->next != NULL), context->p);
if (context->current == NULL)
return;
if (context->is_html == true) {
/* get box position and jump to it */
box_coords(context->current->start_box, &bounds.x0, &bounds.y0);
/* \todo: move x0 in by correct idx */
box_coords(context->current->end_box, &bounds.x1, &bounds.y1);
/* \todo: move x1 in by correct idx */
bounds.x1 += context->current->end_box->width;
bounds.y1 += context->current->end_box->height;
} else {
textplain_coords_from_range(context->c,
context->current->start_idx,
context->current->end_idx, &bounds);
}
bw = search_get_browser_window(context);
if (bw != NULL)
browser_window_scroll_visible(bw, &bounds);
}
/**
* Find the first occurrence of 'match' in 'string' and return its index
*
* \param string the string to be searched (unterminated)
* \param s_len length of the string to be searched
* \param pattern the pattern for which we are searching (unterminated)
* \param p_len length of pattern
* \param case_sens true iff case sensitive match required
* \param m_len accepts length of match in bytes
* \return pointer to first match, NULL if none
*/
const char *find_pattern(const char *string, int s_len, const char *pattern,
int p_len, bool case_sens, unsigned int *m_len)
{
struct { const char *ss, *s, *p; bool first; } context[16];
const char *ep = pattern + p_len;
const char *es = string + s_len;
const char *p = pattern - 1; /* a virtual '*' before the pattern */
const char *ss = string;
const char *s = string;
bool first = true;
int top = 0;
while (p < ep) {
bool matches;
if (p < pattern || *p == '*') {
char ch;
/* skip any further asterisks; one is the same as many
*/
do p++; while (p < ep && *p == '*');
/* if we're at the end of the pattern, yes, it matches
*/
if (p >= ep) break;
/* anything matches a # so continue matching from
here, and stack a context that will try to match
the wildcard against the next character */
ch = *p;
if (ch != '#') {
/* scan forwards until we find a match for
this char */
if (!case_sens) ch = toupper(ch);
while (s < es) {
if (case_sens) {
if (*s == ch) break;
} else if (toupper(*s) == ch)
break;
s++;
}
}
if (s < es) {
/* remember where we are in case the match
fails; we may then resume */
if (top < (int)NOF_ELEMENTS(context)) {
context[top].ss = ss;
context[top].s = s + 1;
context[top].p = p - 1;
/* ptr to last asterisk */
context[top].first = first;
top++;
}
if (first) {
ss = s;
/* remember first non-'*' char */
first = false;
}
matches = true;
}
else
matches = false;
}
else if (s < es) {
char ch = *p;
if (ch == '#')
matches = true;
else {
if (case_sens)
matches = (*s == ch);
else
matches = (toupper(*s) == toupper(ch));
}
if (matches && first) {
ss = s; /* remember first non-'*' char */
first = false;
}
}
else
matches = false;
if (matches) {
p++; s++;
}
else {
/* doesn't match, resume with stacked context if we have one */
if (--top < 0) return NULL; /* no match, give up */
ss = context[top].ss;
s = context[top].s;
p = context[top].p;
first = context[top].first;
}
}
/* end of pattern reached */
*m_len = max(s - ss, 1);
return ss;
}
/**
* Finds all occurrences of a given string in the html box tree
*
* \param pattern the string pattern to search for
* \param p_len pattern length
* \param cur pointer to the current box
* \param case_sens whether to perform a case sensitive search
* \return true on success, false on memory allocation failure
*/
bool find_occurrences_html(const char *pattern, int p_len, struct box *cur,
bool case_sens, struct search_context *context)
{
struct box *a;
/* ignore this box, if there's no visible text */
if (!cur->object && cur->text) {
const char *text = cur->text;
unsigned length = cur->length;
while (length > 0) {
struct list_entry *entry;
unsigned match_length;
unsigned match_offset;
const char *new_text;
const char *pos = find_pattern(text, length,
pattern, p_len, case_sens,
&match_length);
if (!pos) break;
/* found string in box => add to list */
match_offset = pos - cur->text;
entry = add_entry(cur->byte_offset + match_offset,
cur->byte_offset +
match_offset +
match_length, context);
if (!entry)
return false;
entry->start_box = cur;
entry->end_box = cur;
new_text = pos + match_length;
length -= (new_text - text);
text = new_text;
}
}
/* and recurse */
for (a = cur->children; a; a = a->next) {
if (!find_occurrences_html(pattern, p_len, a, case_sens,
context))
return false;
}
return true;
}
/**
* Finds all occurrences of a given string in a textplain content
*
* \param pattern the string pattern to search for
* \param p_len pattern length
* \param c the content to be searched
* \param case_sens wheteher to perform a case sensitive search
* \return true on success, false on memory allocation failure
*/
bool find_occurrences_text(const char *pattern, int p_len,
struct content *c, bool case_sens,
struct search_context *context)
{
int nlines = textplain_line_count(c);
int line;
for(line = 0; line < nlines; line++) {
size_t offset, length;
const char *text = textplain_get_line(c, line,
&offset, &length);
if (text) {
while (length > 0) {
struct list_entry *entry;
unsigned match_length;
size_t start_idx;
const char *new_text;
const char *pos = find_pattern(text, length,
pattern, p_len, case_sens,
&match_length);
if (!pos) break;
/* found string in line => add to list */
start_idx = offset + (pos - text);
entry = add_entry(start_idx, start_idx +
match_length, context);
if (!entry)
return false;
new_text = pos + match_length;
offset += (new_text - text);
length -= (new_text - text);
text = new_text;
}
}
}
return true;
}
/**
* Determines whether any portion of the given text box should be
* selected because it matches the current search string.
*
* \param bw browser window
* \param start_offset byte offset within text of string to be checked
* \param end_offset byte offset within text
* \param start_idx byte offset within string of highlight start
* \param end_idx byte offset of highlight end
* \return true iff part of the box should be highlighted
*/
bool search_term_highlighted(struct content *c,
unsigned start_offset, unsigned end_offset,
unsigned *start_idx, unsigned *end_idx,
struct search_context *context)
{
if (c == context->c) {
struct list_entry *a;
for(a = context->found->next; a; a = a->next)
if (a->sel && selection_defined(a->sel) &&
selection_highlighted(a->sel,
start_offset, end_offset,
start_idx, end_idx))
return true;
}
return false;
}
/**
* Specifies whether all matches or just the current match should
* be highlighted in the search text.
*/
void search_show_all(bool all, struct search_context *context)
{
struct list_entry *a;
for (a = context->found->next; a; a = a->next) {
bool add = true;
if (!all && a != context->current) {
add = false;
if (a->sel) {
selection_clear(a->sel, true);
selection_destroy(a->sel);
a->sel = NULL;
}
}
if (add && !a->sel) {
if (context->is_html == true) {
html_content *html = (html_content *)context->c;
a->sel = selection_create(context->c, true);
if (!a->sel)
continue;
selection_init(a->sel, html->layout);
} else {
a->sel = selection_create(context->c, false);
if (!a->sel)
continue;
selection_init(a->sel, NULL);
}
selection_set_start(a->sel, a->start_idx);
selection_set_end(a->sel, a->end_idx);
}
}
}
/**
* Add a new entry to the list of matches
*
* \param start_idx offset of match start within textual representation
* \param end_idx offset of match end
* \return pointer to added entry, NULL iff failed
*/
struct list_entry *add_entry(unsigned start_idx, unsigned end_idx,
struct search_context *context)
{
struct list_entry *entry;
/* found string in box => add to list */
entry = calloc(1, sizeof(*entry));
if (!entry) {
warn_user("NoMemory", 0);
return NULL;
}
entry->start_idx = start_idx;
entry->end_idx = end_idx;
entry->sel = NULL;
entry->next = 0;
entry->prev = context->found->prev;
if (context->found->prev == NULL)
context->found->next = entry;
else
context->found->prev->next = entry;
context->found->prev = entry;
return entry;
}
/**
* Ends the search process, invalidating all state
* freeing the list of found boxes
*/
void search_destroy_context(struct search_context *context)
{
assert(context != NULL);
if (context->c != NULL) {
struct browser_window *bw = search_get_browser_window(context);
/* TODO: don't poke inside bw */
if (bw->cur_search == context) {
bw->cur_search = NULL;
}
if (context->is_html)
html_set_search(context->c, NULL);
else
textplain_set_search(context->c, NULL);
}
if ((context->string != NULL) && (context->callbacks != NULL) &&
(context->callbacks->add_recent != NULL)) {
context->callbacks->add_recent(context->string, context->p);
free(context->string);
}
free_matches(context);
free(context);
}

45
render/search.h Normal file
View File

@ -0,0 +1,45 @@
/*
* Copyright 2009 Mark Benjamin <netsurf-browser.org.MarkBenjamin@dfgh.net>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _NETSURF_RENDER_SEARCH_H_
#define _NETSURF_RENDER_SEARCH_H_
#include <ctype.h>
#include <string.h>
#include "desktop/search.h"
struct search_context;
struct search_context * search_create_context(struct hlcache_handle *h,
struct search_callbacks *callbacks, void *p);
void search_destroy_context(struct search_context *context);
void search_step(struct search_context *context, search_flags_t flags,
const char * string);
void search_show_all(bool all, struct search_context *context);
bool search_term_highlighted(struct content *c,
unsigned start_offset, unsigned end_offset,
unsigned *start_idx, unsigned *end_idx,
struct search_context *context);
#endif

View File

@ -42,6 +42,7 @@
#include "desktop/selection.h"
#include "render/box.h"
#include "render/font.h"
#include "render/search.h"
#include "render/textplain.h"
#include "utils/http.h"
#include "utils/log.h"
@ -69,6 +70,9 @@ typedef struct textplain_content {
struct browser_window *bw;
struct selection sel; /** Selection state */
/** Context for free text search, or NULL if none */
struct search_context *search;
} textplain_content;
@ -112,6 +116,7 @@ static void textplain_open(struct content *c, struct browser_window *bw,
struct object_params *params);
void textplain_close(struct content *c);
struct selection *textplain_get_selection(struct content *c);
struct search_context *textplain_get_search(struct content *c);
static nserror textplain_clone(const struct content *old,
struct content **newc);
static content_type textplain_content_type(lwc_string *mime_type);
@ -793,7 +798,8 @@ bool textplain_redraw(struct content *c, struct content_redraw_data *data,
&textplain_style,
tx, y + (lineno * scaled_line_height),
clip, line_height, data->scale, false,
&text->sel, ctx))
(struct content *)text, &text->sel,
text->search, ctx))
return false;
if (next_offset >= length)
@ -823,13 +829,12 @@ bool textplain_redraw(struct content *c, struct content_redraw_data *data,
highlighted = true;
}
if (!highlighted && (bw->search_context
!= NULL)) {
if (!highlighted && (text->search != NULL)) {
unsigned start_idx, end_idx;
if (search_term_highlighted(bw,
if (search_term_highlighted(c,
tab_ofst, tab_ofst + 1,
&start_idx, &end_idx,
bw->search_context))
text->search))
highlighted = true;
}
@ -876,6 +881,9 @@ void textplain_close(struct content *c)
{
textplain_content *text = (textplain_content *) c;
if (text->search != NULL)
search_destroy_context(text->search);
text->bw = NULL;
}
@ -891,6 +899,36 @@ struct selection *textplain_get_selection(struct content *c)
return &text->sel;
}
/**
* Set an TEXTPLAIN content's search context
*
* \param c content of type text
* \param s search context, or NULL if none
*/
void textplain_set_search(struct content *c, struct search_context *s)
{
textplain_content *text = (textplain_content *) c;
text->search = s;
}
/**
* Return an TEXTPLAIN content's search context
*
* \param c content of type text
* \return content's search context, or NULL if none
*/
struct search_context *textplain_get_search(struct content *c)
{
textplain_content *text = (textplain_content *) c;
return text->search;
}
/**
* Retrieve number of lines in content
*

View File

@ -48,5 +48,6 @@ int textplain_find_line(struct content *c, unsigned offset);
char *textplain_get_raw_data(struct content *c,
unsigned start, unsigned end, size_t *plen);
struct browser_window *textplain_get_browser_window(struct content *c);
void textplain_set_search(struct content *c, struct search_context *s);
#endif