summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorRichardHitt <rbh00@netcom.com>2013-01-02 14:50:26 -0800
committerRichardHitt <rbh00@netcom.com>2013-01-02 14:50:26 -0800
commit4af624627eafdd5db9e0200bfd05c59aa60292b9 (patch)
tree602d296c8e34a36396979ea0b82cb7bb471af7da
parent7f2846a5bddbfaf78e99b303db4c9152460bb4d3 (diff)
overhauling of URL detection, including channel, nick, etc 'words'
-rw-r--r--src/common/tree.c2
-rw-r--r--src/common/tree.h2
-rw-r--r--src/common/url.c459
-rw-r--r--src/common/url.h3
-rw-r--r--src/common/userlist.c2
-rw-r--r--src/common/userlist.h2
-rw-r--r--src/fe-gtk/fe-gtk.c2
-rw-r--r--src/fe-gtk/maingui.c67
-rw-r--r--src/fe-gtk/xtext.c125
-rw-r--r--src/fe-gtk/xtext.h4
10 files changed, 395 insertions, 273 deletions
diff --git a/src/common/tree.c b/src/common/tree.c
index 33fe1d41..715b0e56 100644
--- a/src/common/tree.c
+++ b/src/common/tree.c
@@ -142,7 +142,7 @@ mybsearch (const void *key, void **array, size_t nmemb,
 }
 
 void *
-tree_find (tree *t, void *key, tree_cmp_func *cmp, void *data, int *pos)
+tree_find (tree *t, const void *key, tree_cmp_func *cmp, void *data, int *pos)
 {
 	if (!t || !t->array)
 		return NULL;
diff --git a/src/common/tree.h b/src/common/tree.h
index 4a158052..ced8e425 100644
--- a/src/common/tree.h
+++ b/src/common/tree.h
@@ -8,7 +8,7 @@ typedef int (tree_traverse_func) (const void *key, void *data);
 
 tree *tree_new (tree_cmp_func *cmp, void *data);
 void tree_destroy (tree *t);
-void *tree_find (tree *t, void *key, tree_cmp_func *cmp, void *data, int *pos);
+void *tree_find (tree *t, const void *key, tree_cmp_func *cmp, void *data, int *pos);
 int tree_remove (tree *t, void *key, int *pos);
 void *tree_remove_at_pos (tree *t, int pos);
 void tree_foreach (tree *t, tree_traverse_func *func, void *data);
diff --git a/src/common/url.c b/src/common/url.c
index c5335859..98293635 100644
--- a/src/common/url.c
+++ b/src/common/url.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  */
 
 #include <stdio.h>
@@ -32,6 +32,13 @@
 
 void *url_tree = NULL;
 GTree *url_btree = NULL;
+static int do_an_re (const char *word, int *start, int *end, int *type);
+static GRegex *re_url (void);
+static GRegex *re_host (void);
+static GRegex *re_email (void);
+static GRegex *re_nick (void);
+static GRegex *re_channel (void);
+static GRegex *re_path (void);
 
 
 static int
@@ -177,158 +184,38 @@ url_add (char *urltext, int len)
    keep it FAST! This new version was found to be almost 3x faster than
    2.4.4 release. */
 
+static int laststart = 0;
+static int lastend = 0;
+static int lasttype = 0;
+
 int
-url_check_word (const char *word, int len)
+url_check_word (const char *word)
 {
-#define D(x) (x), ((sizeof (x)) - 1)
-	static const struct {
-		const char *s;
-		int len;
-	}
-	prefix[] = {
-		{ D("irc.") },
-		{ D("ftp.") },
-		{ D("www.") },
-		{ D("irc://") },
-		{ D("ftp://") },
-		{ D("http://") },
-		{ D("https://") },
-		{ D("file://") },
-		{ D("rtsp://") },
-		{ D("ut2004://") },
-	},
-	suffix[] = {
-		{ D(".org") },
-		{ D(".net") },
-		{ D(".com") },
-		{ D(".edu") },
-		{ D(".html") },
-		{ D(".info") },
-		{ D(".name") },
-		/* Some extra common suffixes.
-		foo.blah/baz.php etc should work now, rather than
-		needing  http:// at the beginning. */
-		{ D(".php") },
-		{ D(".htm") },
-		{ D(".aero") },
-		{ D(".asia") },
-		{ D(".biz") },
-		{ D(".cat") },
-		{ D(".coop") },
-		{ D(".int") },
-		{ D(".jobs") },
-		{ D(".mobi") },
-		{ D(".museum") },
-		{ D(".pro") },
-		{ D(".tel") },
-		{ D(".travel") },
-		{ D(".xxx") },
-		{ D(".asp") },
-		{ D(".aspx") },
-		{ D(".shtml") },
-		{ D(".xml") },
-	};
-#undef D
-	const char *at, *dot;
-	int i, dots;
-
-	/* this is pretty much the same as in logmask_is_fullpath() except with length checks and .\ for portable mode */
-#ifdef WIN32
-	if ((len > 1 && word[0] == '\\') ||
-		(len > 2 && word[0] == '.' && word[1] == '\\') ||
-		(len > 2 && (((word[0] >= 'A' && word[0] <= 'Z') || (word[0] >= 'a' && word[0] <= 'z')) && word[1] == ':')))
-#else
-	if (len > 1 && word[0] == '/')
-#endif
-	{
-		return WORD_PATH;
-	}
-
-	if (len > 1 && word[1] == '#' && strchr("@+^%*#", word[0]))
-		return WORD_CHANNEL;
-
-	if ((word[0] == '#' || word[0] == '&') && word[1] != '#' && word[1] != 0)
-		return WORD_CHANNEL;
-
-	for (i = 0; i < G_N_ELEMENTS(prefix); i++)
-	{
-		int l;
-
-		l = prefix[i].len;
-		if (len > l)
-		{
-			int j;
-
-			/* This is pretty much g_ascii_strncasecmp(). */
-			for (j = 0; j < l; j++)
-			{
-				unsigned char c = word[j];
-				if (tolower(c) != prefix[i].s[j])
-					break;
-			}
-			if (j == l)
-				return WORD_URL;
-		}
-	}
-
-	at = strchr (word, '@');	  /* check for email addy */
-	dot = strrchr (word, '.');
-	if (at && dot)
-	{
-		if (at < dot)
-		{
-			if (strchr (word, '*'))
-				return WORD_HOST;
-			else
-				return WORD_EMAIL;
-		}
-	}
- 
-	/* check if it's an IP number */
-	dots = 0;
-	for (i = 0; i < len; i++)
-	{
-		if (word[i] == '.' && i > 0)
-			dots++;	/* allow 127.0.0.1:80 */
-		else if (!isdigit ((unsigned char) word[i]) && word[i] != ':')
-		{
-			dots = 0;
-			break;
-		}
-	}
-	if (dots == 3)
-		return WORD_HOST;
-
-	if (len > 5)
+	laststart = lastend = lasttype = 0;
+	if (do_an_re (word, &laststart, &lastend, &lasttype))
 	{
-		for (i = 0; i < G_N_ELEMENTS(suffix); i++)
+		switch (lasttype)
 		{
-			int l;
-
-			l = suffix[i].len;
-			if (len > l)
-			{
-				const unsigned char *p = &word[len - l];
-				int j;
-
-				/* This is pretty much g_ascii_strncasecmp(). */
-				for (j = 0; j < l; j++)
-				{
-					if (tolower(p[j]) != suffix[i].s[j])
-						break;
-				}
-				if (j == l)
-					return WORD_HOST;
-			}
+			case WORD_NICK:
+				if (!isalnum (word[laststart]))
+					laststart++;
+				if (!userlist_find (current_sess, &word[laststart]))
+					lasttype = 0;
+				return lasttype;
+			case WORD_EMAIL:
+				if (!isalnum (word[laststart]))
+					laststart++;
+				/* Fall through */
+			case WORD_URL:
+			case WORD_HOST:
+			case WORD_CHANNEL:
+				return lasttype;
+			default:
+				return 0;	/* Should not occur */
 		}
-
-		if (word[len - 3] == '.' &&
-			 isalpha ((unsigned char) word[len - 2]) &&
-				isalpha ((unsigned char) word[len - 1]))
-			return WORD_HOST;
 	}
-
-	return 0;
+	else
+		return 0;
 }
 
 /* List of IRC commands for which contents (and thus possible URLs)
@@ -346,9 +233,10 @@ static char *commands[] = {
 void
 url_check_line (char *buf, int len)
 {
+	GRegex *re(void);
+	GMatchInfo *gmi;
 	char *po = buf;
-	char *start;
-	int i, wlen;
+	int i;
 
 	/* Skip over message prefix */
 	if (*po == ':')
@@ -379,50 +267,243 @@ url_check_line (char *buf, int len)
 		return;
 	po++;
 
-	if (buf[0] == ':' && buf[1] != 0)
-		po++;
+	g_regex_match(re_url(), po, 0, &gmi);
+	while (g_match_info_matches(gmi))
+	{
+		int start, end;
+
+		g_match_info_fetch_pos(gmi, 0, &start, &end);
+		if (po[end - 1] == '\r')
+			po[--end] = 0;
+		if (g_strstr_len (po + start, end - start, "://"))
+			url_add(po + start, end - start);
+		g_match_info_next(gmi, NULL);
+	}
+	g_match_info_free(gmi);
+}
+
+int
+url_last (int *lstart, int *lend)
+{
+	*lstart = laststart;
+	*lend = lastend;
+	return lasttype;
+}
+
+static int
+do_an_re(const char *word,int *start, int *end, int *type)
+{
+	typedef struct func_s {
+		GRegex *(*fn)(void);
+		int type;
+	} func_t;
+	func_t funcs[] =
+	{
+		{ re_email, WORD_EMAIL },
+		{ re_url, WORD_URL },
+		{ re_host, WORD_HOST },
+		{ re_channel, WORD_CHANNEL },
+		{ re_path, WORD_PATH },
+		{ re_nick, WORD_NICK }
+	};
 
-	start = po;
+	GMatchInfo *gmi;
+	int k;
 
-	/* check each "word" (space separated) */
-	while (1)
+	for (k = 0; k < sizeof funcs / sizeof (func_t); k++)
 	{
-		switch (po[0])
+		g_regex_match (funcs[k].fn(), word, 0, &gmi);
+		if (!g_match_info_matches (gmi))
 		{
-		case 0:
-		case ' ':
-		case '\r':
-
-			wlen = po - start;
-			if (wlen > 2)
-			{
-				/* HACK! :( */
-				/* This is to work around not being able to detect URLs that are at
-				   the start of messages. */
-				if (start[0] == ':')
-				{
-					start++;
-					wlen--;
-				}
-				if (start[0] == '+' || start[0] == '-')
-				{
-					start++;
-					wlen--;
-				}
-
-				if (wlen > 2 && url_check_word (start, wlen) == WORD_URL)
-				{
-					url_add (start, wlen);
-				}
-			}
-			if (po[0] == 0)
-				return;
-			po++;
-			start = po;
-			break;
-
-		default:
-			po++;
+			g_match_info_free (gmi);
+			continue;
+		}
+		while (g_match_info_matches (gmi))
+		{
+			g_match_info_fetch_pos (gmi, 0, start, end);
+			g_match_info_next (gmi, NULL);
 		}
+		g_match_info_free (gmi);
+		*type = funcs[k].type;
+		return TRUE;
 	}
+
+	return FALSE;
+}
+
+/*	Miscellaneous description --- */
+#define DOMAIN "[-a-z0-9]+(\\.[-a-z0-9]+)*\\.[a-z]+"
+#define IPADDR "[0-9]+(\\.[0-9]+){3}"
+#define HOST "(" DOMAIN "|" IPADDR ")"
+#define OPT_PORT "(:[1-9][0-9]{0,4})?"
+
+GRegex *
+make_re(char *grist, char *type)
+{
+	GRegex *ret;
+	GError *err = NULL;
+
+	ret = g_regex_new (grist, G_REGEX_CASELESS + G_REGEX_OPTIMIZE, 0, &err);
+	g_free (grist);
+	return ret;
+}
+
+/*	HOST description --- */
+/* (see miscellaneous above) */
+static GRegex *
+re_host (void)
+{
+	static GRegex *host_ret;
+	char *grist;
+	grist = g_strdup_printf (
+		"("	/* HOST */
+			HOST OPT_PORT
+		")"
+	);
+	host_ret = make_re (grist, "re_host");
+	return host_ret;
+}
+
+/*	URL description --- */
+#define SCHEME "(%s)"
+#define LPAR "\\("
+#define RPAR "\\)"
+#define NOPARENS "[^() \t]*"
+
+char *prefix[] = {
+	"irc\\.",
+	"ftp\\.",
+	"www\\.",
+	"irc://",
+	"ftp://",
+	"http://",
+	"https://",
+	"file://",
+	"rtsp://",
+	NULL
+};
+
+static GRegex *
+re_url (void)
+{
+	static GRegex *url_ret;
+	char *grist;
+	char *scheme;
+
+	if (url_ret) return url_ret;
+
+	scheme = g_strjoinv ("|", prefix);
+	grist = g_strdup_printf (
+		"("	/* URL or HOST */
+			SCHEME HOST OPT_PORT
+			"("	/* Optional "/path?query_string#fragment_id" */
+				"/"	/* Must start with slash */
+				"("	
+					"(" LPAR NOPARENS RPAR ")"
+					"|"
+					"(" NOPARENS ")"
+				")*"	/* Zero or more occurrences of either of these */
+				"(?<![.,?!\\]])"	/* Not allowed to end with these */
+			")?"	/* Zero or one of this /path?query_string#fragment_id thing */
+
+		")"
+	, scheme
+	);
+	url_ret = make_re (grist, "re_url");
+	g_free (scheme);
+	return url_ret;
+}
+
+/*	EMAIL description --- */
+#define EMAIL "[a-z][-_a-z0-9]+@" "(" HOST ")"
+
+static GRegex *
+re_email (void)
+{
+	static GRegex *email_ret;
+	char *grist;
+
+	if (email_ret) return email_ret;
+
+	grist = g_strdup_printf (
+		"("	/* EMAIL */
+			EMAIL
+		")"
+	);
+	email_ret = make_re (grist, "re_email");
+	return email_ret;
+}
+
+/*	NICK description --- */
+#define NICKPRE "~+!@%%&"
+#define NICKHYP	"-"
+#define NICKLET "a-z"
+#define NICKDIG "0-9"
+/*	Note for NICKSPE:  \\\\ boils down to a single \ */
+#define NICKSPE	"\\[\\]\\\\`_^{|}"
+#define NICK0 "[" NICKPRE "]?[" NICKLET NICKDIG "]"
+#define NICK1 "[" NICKHYP NICKLET NICKDIG NICKSPE "]+"
+#define NICK	NICK0 NICK1
+
+static GRegex *
+re_nick (void)
+{
+	static GRegex *nick_ret;
+	char *grist;
+
+	if (nick_ret) return nick_ret;
+
+	grist = g_strdup_printf (
+		"("	/* NICK */
+			NICK
+		")"
+	);
+	nick_ret = make_re (grist, "re_nick");
+	return nick_ret;
+}
+
+/*	CHANNEL description --- */
+#define CHANNEL "#[^ \t\a,:]+"
+
+static GRegex *
+re_channel (void)
+{
+	static GRegex *channel_ret;
+	char *grist;
+
+	if (channel_ret) return channel_ret;
+
+	grist = g_strdup_printf (
+		"("	/* CHANNEL */
+			CHANNEL
+		")"
+	);
+	channel_ret = make_re (grist, "re_channel");
+	return channel_ret;
+}
+
+/*	PATH description --- */
+#ifdef WIN32
+/* Windows path can be \ or .\ or ..\ or e.g. C: etc */
+#define PATH "^(\\\\|\\.{1,2}\\\\|[a-z]:).*"
+#else
+/* Linux path can be / or ./ or ../ etc */
+#define PATH "^(/|\\./|\\.\\./).*"
+#endif
+
+static GRegex *
+re_path (void)
+{
+	static GRegex *path_ret;
+	char *grist;
+
+	if (path_ret) return path_ret;
+
+	grist = g_strdup_printf (
+		"("	/* PATH */
+			PATH
+		")"
+	);
+	path_ret = make_re (grist, "re_path");
+	return path_ret;
 }
diff --git a/src/common/url.h b/src/common/url.h
index 9a815fe1..b8e5c848 100644
--- a/src/common/url.h
+++ b/src/common/url.h
@@ -14,7 +14,8 @@ extern void *url_tree;
 
 void url_clear (void);
 void url_save_tree (const char *fname, const char *mode, gboolean fullpath);
-int url_check_word (const char *word, int len);
+int url_last (int *, int *);
+int url_check_word (const char *word);
 void url_check_line (char *buf, int len);
 
 #endif
diff --git a/src/common/userlist.c b/src/common/userlist.c
index f6a091a4..868f8a38 100644
--- a/src/common/userlist.c
+++ b/src/common/userlist.c
@@ -192,7 +192,7 @@ find_cmp (const char *name, struct User *user, server *serv)
 }
 
 struct User *
-userlist_find (struct session *sess, char *name)
+userlist_find (struct session *sess, const char *name)
 {
 	int pos;
 
diff --git a/src/common/userlist.h b/src/common/userlist.h
index 74ab4029..d0d79da8 100644
--- a/src/common/userlist.h
+++ b/src/common/userlist.h
@@ -26,7 +26,7 @@ int userlist_add_hostname (session *sess, char *nick,
 									char *hostname, char *realname,
 									char *servername, unsigned int away);
 void userlist_set_away (session *sess, char *nick, unsigned int away);
-struct User *userlist_find (session *sess, char *name);
+struct User *userlist_find (session *sess, const char *name);
 struct User *userlist_find_global (server *serv, char *name);
 void userlist_clear (session *sess);
 void userlist_free (session *sess);
diff --git a/src/fe-gtk/fe-gtk.c b/src/fe-gtk/fe-gtk.c
index 57ffa2f6..204b7570 100644
--- a/src/fe-gtk/fe-gtk.c
+++ b/src/fe-gtk/fe-gtk.c
@@ -1058,7 +1058,7 @@ static void
 fe_open_url_locale (const char *url)
 {
 	/* the http:// part's missing, prepend it, otherwise it won't always work */
-	if (strchr (url, ':') == NULL && url_check_word (url, strlen (url)) != WORD_PATH)
+	if (strchr (url, ':') == NULL && url_check_word (url) != WORD_PATH)
 	{
 		url = g_strdup_printf ("http://%s", url);
 		fe_open_url_inner (url);
diff --git a/src/fe-gtk/maingui.c b/src/fe-gtk/maingui.c
index a52aa9d4..b6fdbdfa 100644
--- a/src/fe-gtk/maingui.c
+++ b/src/fe-gtk/maingui.c
@@ -2242,20 +2242,14 @@ mg_create_topicbar (session *sess, GtkWidget *box)
 /* check if a word is clickable */
 
 static int
-mg_word_check (GtkWidget * xtext, char *word, int len)
+mg_word_check (GtkWidget * xtext, char *word)
 {
 	session *sess = current_sess;
 	int ret;
 
-	ret = url_check_word (word, len);	/* common/url.c */
-	if (ret == 0)
-	{
-		if (( (word[0]=='@' || word[0]=='+' || word[0]=='%') && userlist_find (sess, word+1)) || userlist_find (sess, word))
-			return WORD_NICK;
-
-		if (sess->type == SESS_DIALOG)
-			return WORD_DIALOG;
-	}
+	ret = url_check_word (word);
+	if (ret == 0 && sess->type == SESS_DIALOG)
+		return WORD_DIALOG;
 
 	return ret;
 }
@@ -2266,23 +2260,28 @@ static void
 mg_word_clicked (GtkWidget *xtext, char *word, GdkEventButton *even)
 {
 	session *sess = current_sess;
+	int word_type, start, end;
+	char *tmp;
 
-	if (even->button == 1)			/* left button */
+	if (word == NULL)
 	{
-		if (word == NULL)
-		{
+		if (even->button == 1)		/* left button */
 			mg_focus (sess);
-			return;
-		}
+		return;
+	}
+
+	word_type = mg_word_check (xtext, word);
+	url_last (&start, &end);
 
-		if ((even->state & 13) == prefs.hex_gui_url_mod)
+	if (even->button == 1 && (even->state & 13) == prefs.hex_gui_url_mod)
+	{
+		switch (word_type)
 		{
-			switch (mg_word_check (xtext, word, strlen (word)))
-			{
-			case WORD_URL:
-			case WORD_HOST:
-				fe_open_url (word);
-			}
+		case WORD_URL:
+		case WORD_HOST:
+			word[end] = 0;
+			word += start;
+			fe_open_url (word);
 		}
 		return;
 	}
@@ -2296,7 +2295,7 @@ mg_word_clicked (GtkWidget *xtext, char *word, GdkEventButton *even)
 		return;
 	}
 
-	switch (mg_word_check (xtext, word, strlen (word)))
+	switch (word_type)
 	{
 	case 0:
 	case WORD_PATH:
@@ -2304,26 +2303,22 @@ mg_word_clicked (GtkWidget *xtext, char *word, GdkEventButton *even)
 		break;
 	case WORD_URL:
 	case WORD_HOST:
+		word[end] = 0;
+		word += start;
 		menu_urlmenu (even, word);
 		break;
 	case WORD_NICK:
-		menu_nickmenu (sess, even, (word[0]=='@' || word[0]=='+' || word[0]=='%') ?
-			word+1 : word, FALSE);
+		menu_nickmenu (sess, even, word + (ispunct (*word)? 1: 0), FALSE);
 		break;
 	case WORD_CHANNEL:
-		if (*word == '@' || *word == '+' || *word=='^' || *word=='%' || *word=='*')
-			word++;
-		menu_chanmenu (sess, even, word);
+		menu_chanmenu (sess, even, word + (ispunct (*word)? 1: 0));
 		break;
 	case WORD_EMAIL:
-		{
-			char *newword = malloc (strlen (word) + 10);
-			if (*word == '~')
-				word++;
-			sprintf (newword, "mailto:%s", word);
-			menu_urlmenu (even, newword);
-			free (newword);
-		}
+		word[end] = 0;
+		word += start;
+		tmp = g_strdup_printf("mailto:%s", word + (ispunct (*word)? 1: 0));
+		menu_urlmenu (even, tmp);
+		g_free (tmp);
 		break;
 	case WORD_DIALOG:
 		menu_nickmenu (sess, even, sess->channel, FALSE);
diff --git a/src/fe-gtk/xtext.c b/src/fe-gtk/xtext.c
index e8419b5a..4d2a8a69 100644
--- a/src/fe-gtk/xtext.c
+++ b/src/fe-gtk/xtext.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  * =========================================================================
  *
  * xtext, the text widget used by X-Chat.
@@ -73,6 +73,7 @@
 #include "../common/fe.h"
 #include "../common/util.h"
 #include "../common/hexchatc.h"
+#include "../common/url.h"
 #include "fe-gtk.h"
 #include "xtext.h"
 #include "fkeys.h"
@@ -1901,7 +1902,7 @@ gtk_xtext_selection_update (GtkXText * xtext, GdkEventMotion * event, int p_y, g
 
 static char *
 gtk_xtext_get_word (GtkXText * xtext, int x, int y, textentry ** ret_ent,
-						  int *ret_off, int *ret_len)
+						  int *ret_off, int *ret_len, GSList **slp)
 {
 	textentry *ent;
 	int offset;
@@ -1950,9 +1951,9 @@ gtk_xtext_get_word (GtkXText * xtext, int x, int y, textentry ** ret_ent,
 	if (ret_off)
 		*ret_off = word - ent->str;
 	if (ret_len)
-		*ret_len = str - word;
+		*ret_len = len;		/* Length before stripping */
 
-	return gtk_xtext_strip_color (word, len, xtext->scratch_buffer, NULL, NULL, NULL, FALSE);
+	return gtk_xtext_strip_color (word, len, xtext->scratch_buffer, NULL, NULL, slp, FALSE);
 }
 
 #ifdef MOTION_MONITOR
@@ -2028,14 +2029,62 @@ gtk_xtext_check_mark_stamp (GtkXText *xtext, GdkModifierType mask)
 	return redraw;
 }
 
+static int
+gtk_xtext_get_word_adjust (GtkXText *xtext, int x, int y, textentry **word_ent, int *offset, int *len)
+{
+	GSList *slp = NULL;
+	unsigned char *word;
+	int word_type = 0;
+
+	word = gtk_xtext_get_word (xtext, x, y, word_ent, offset, len, &slp);
+	if (word)
+	{
+		int laststart, lastend;
+
+		word_type = xtext->urlcheck_function (GTK_WIDGET (xtext), word);
+		if (word_type > 0)
+		{
+			if (url_last (&laststart, &lastend))
+			{
+				int cumlen, startadj = 0, endadj = 0;
+				offlen_t o;
+				GSList *sl;
+
+				for (sl = slp, cumlen = 0; sl; sl = g_slist_next (sl))
+				{
+					o.u = GPOINTER_TO_UINT (sl->data);
+					startadj = o.o.off - cumlen;
+					cumlen += o.o.len;
+					if (laststart < cumlen)
+						break;
+				}
+				for (sl = slp, cumlen = 0; sl; sl = g_slist_next (sl))
+				{
+					o.u = GPOINTER_TO_UINT (sl->data);
+					endadj = o.o.off - cumlen;
+					cumlen += o.o.len;
+					if (lastend < cumlen)
+						break;
+				}
+				laststart += startadj;
+				*offset += laststart;
+				*len = lastend + endadj - laststart;
+			}
+		}
+	}
+	g_slist_free (slp);
+
+	return word_type;
+}
+
 static gboolean
 gtk_xtext_motion_notify (GtkWidget * widget, GdkEventMotion * event)
 {
 	GtkXText *xtext = GTK_XTEXT (widget);
 	GdkModifierType mask;
 	int redraw, tmp, x, y, offset, len, line_x;
-	unsigned char *word;
 	textentry *word_ent;
+	int word_type;
 
 	gdk_window_get_pointer (widget->window, &x, &y, &mask);
 
@@ -2104,43 +2153,40 @@ gtk_xtext_motion_notify (GtkWidget * widget, GdkEventMotion * event)
 	if (xtext->urlcheck_function == NULL)
 		return FALSE;
 
-	word = gtk_xtext_get_word (xtext, x, y, &word_ent, &offset, &len);
-	if (word)
+	word_type = gtk_xtext_get_word_adjust (xtext, x, y, &word_ent, &offset, &len);
+	if (word_type > 0)
 	{
-		if (xtext->urlcheck_function (GTK_WIDGET (xtext), word, len) > 0)
+		if (!xtext->cursor_hand ||
+			 xtext->hilight_ent != word_ent ||
+			 xtext->hilight_start != offset ||
+			 xtext->hilight_end != offset + len)
 		{
-			if (!xtext->cursor_hand ||
-				 xtext->hilight_ent != word_ent ||
-				 xtext->hilight_start != offset ||
-				 xtext->hilight_end != offset + len)
+			if (!xtext->cursor_hand)
 			{
-				if (!xtext->cursor_hand)
-				{
-					gdk_window_set_cursor (GTK_WIDGET (xtext)->window,
-											  		xtext->hand_cursor);
-					xtext->cursor_hand = TRUE;
-				}
+				gdk_window_set_cursor (GTK_WIDGET (xtext)->window,
+										  		xtext->hand_cursor);
+				xtext->cursor_hand = TRUE;
+			}
 
-				/* un-render the old hilight */
-				if (xtext->hilight_ent)
-					gtk_xtext_unrender_hilight (xtext);
+			/* un-render the old hilight */
+			if (xtext->hilight_ent)
+				gtk_xtext_unrender_hilight (xtext);
 
-				xtext->hilight_ent = word_ent;
-				xtext->hilight_start = offset;
-				xtext->hilight_end = offset + len;
+			xtext->hilight_ent = word_ent;
+			xtext->hilight_start = offset;
+			xtext->hilight_end = offset + len;
 
-				xtext->skip_border_fills = TRUE;
-				xtext->render_hilights_only = TRUE;
-				xtext->skip_stamp = TRUE;
+			xtext->skip_border_fills = TRUE;
+			xtext->render_hilights_only = TRUE;
+			xtext->skip_stamp = TRUE;
 
-				gtk_xtext_render_ents (xtext, word_ent, NULL);
+			gtk_xtext_render_ents (xtext, word_ent, NULL);
 
-				xtext->skip_border_fills = FALSE;
-				xtext->render_hilights_only = FALSE;
-				xtext->skip_stamp = FALSE;
-			}
-			return FALSE;
+			xtext->skip_border_fills = FALSE;
+			xtext->render_hilights_only = FALSE;
+			xtext->skip_stamp = FALSE;
 		}
+		return FALSE;
 	}
 
 	gtk_xtext_leave_notify (widget, NULL);
@@ -2280,7 +2326,7 @@ gtk_xtext_button_release (GtkWidget * widget, GdkEventButton * event)
 
 		if (!xtext->hilighting)
 		{
-			word = gtk_xtext_get_word (xtext, event->x, event->y, 0, 0, 0);
+			word = gtk_xtext_get_word (xtext, event->x, event->y, 0, 0, 0, 0);
 			g_signal_emit (G_OBJECT (xtext), xtext_signals[WORD_CLICK], 0, word ? word : NULL, event);
 		} else
 		{
@@ -2288,7 +2334,6 @@ gtk_xtext_button_release (GtkWidget * widget, GdkEventButton * event)
 		}
 	}
 
-
 	return FALSE;
 }
 
@@ -2305,7 +2350,7 @@ gtk_xtext_button_press (GtkWidget * widget, GdkEventButton * event)
 
 	if (event->button == 3 || event->button == 2) /* right/middle click */
 	{
-		word = gtk_xtext_get_word (xtext, x, y, 0, 0, 0);
+		word = gtk_xtext_get_word (xtext, x, y, 0, 0, 0, 0);
 		if (word)
 		{
 			g_signal_emit (G_OBJECT (xtext), xtext_signals[WORD_CLICK], 0,
@@ -2322,7 +2367,7 @@ gtk_xtext_button_press (GtkWidget * widget, GdkEventButton * event)
 	if (event->type == GDK_2BUTTON_PRESS)	/* WORD select */
 	{
 		gtk_xtext_check_mark_stamp (xtext, mask);
-		if (gtk_xtext_get_word (xtext, x, y, &ent, &offset, &len))
+		if (gtk_xtext_get_word (xtext, x, y, &ent, &offset, &len, 0))
 		{
 			if (len == 0)
 				return FALSE;
@@ -2343,7 +2388,7 @@ gtk_xtext_button_press (GtkWidget * widget, GdkEventButton * event)
 	if (event->type == GDK_3BUTTON_PRESS)	/* LINE select */
 	{
 		gtk_xtext_check_mark_stamp (xtext, mask);
-		if (gtk_xtext_get_word (xtext, x, y, &ent, 0, 0))
+		if (gtk_xtext_get_word (xtext, x, y, &ent, 0, 0, 0))
 		{
 			gtk_xtext_selection_clear (xtext->buffer);
 			ent->mark_start = 0;
@@ -2852,7 +2897,7 @@ gtk_xtext_render_flush (GtkXText * xtext, int x, int y, unsigned char *str,
 {
 	int str_width, dofill;
 	GdkDrawable *pix = NULL;
-	int dest_x, dest_y;
+	int dest_x = 0, dest_y = 0;
 
 	if (xtext->dont_render || len < 1 || xtext->hidden)
 		return 0;
@@ -5904,7 +5949,7 @@ gtk_xtext_set_tint (GtkXText *xtext, int tint_red, int tint_green, int tint_blue
 }
 
 void
-gtk_xtext_set_urlcheck_function (GtkXText *xtext, int (*urlcheck_function) (GtkWidget *, char *, int))
+gtk_xtext_set_urlcheck_function (GtkXText *xtext, int (*urlcheck_function) (GtkWidget *, char *))
 {
 	xtext->urlcheck_function = urlcheck_function;
 }
diff --git a/src/fe-gtk/xtext.h b/src/fe-gtk/xtext.h
index 48c71d0c..cc6bbebb 100644
--- a/src/fe-gtk/xtext.h
+++ b/src/fe-gtk/xtext.h
@@ -179,7 +179,7 @@ struct _GtkXText
 	unsigned char scratch_buffer[4096];
 
 	void (*error_function) (int type);
-	int (*urlcheck_function) (GtkWidget * xtext, char *word, int len);
+	int (*urlcheck_function) (GtkWidget * xtext, char *word);
 
 	int jump_out_offset;	/* point at which to stop rendering */
 	int jump_in_offset;	/* "" start rendering */
@@ -274,7 +274,7 @@ void gtk_xtext_set_show_separator (GtkXText *xtext, gboolean show_separator);
 void gtk_xtext_set_thin_separator (GtkXText *xtext, gboolean thin_separator);
 void gtk_xtext_set_time_stamp (xtext_buffer *buf, gboolean timestamp);
 void gtk_xtext_set_tint (GtkXText *xtext, int tint_red, int tint_green, int tint_blue);
-void gtk_xtext_set_urlcheck_function (GtkXText *xtext, int (*urlcheck_function) (GtkWidget *, char *, int));
+void gtk_xtext_set_urlcheck_function (GtkXText *xtext, int (*urlcheck_function) (GtkWidget *, char *));
 void gtk_xtext_set_wordwrap (GtkXText *xtext, gboolean word_wrap);
 
 xtext_buffer *gtk_xtext_buffer_new (GtkXText *xtext);