From 6bc05a8bc835d38e7e245bb70ee748809f5277b2 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Sun, 16 Jun 2013 02:57:38 +0100 Subject: Improved host matching in url: * Refined ipv4 addr * Added ipv6 addr --- src/common/url.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index 6c429ed8..84c112d1 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -349,8 +349,9 @@ do_an_re(const char *word,int *start, int *end, int *type) /* Miscellaneous description --- */ #define DOMAIN "[a-z0-9][-a-z0-9]*(\\.[-a-z0-9]+)*\\." #define TLD "[a-z][-a-z0-9]*[a-z]" -#define IPADDR "[0-9]+(\\.[0-9]+){3}" -#define HOST "(" DOMAIN TLD "|" IPADDR ")" +#define IPADDR "[0-9]{1,3}(\\.[0-9]{1,3}){3}" +#define IPV6ADDR "([0-9a-f]{0,4}(:[0-9a-f]{0,4})*:){2}[0-9a-f]{0,4}(:[0-9a-f]{0,4})*" +#define HOST "(" DOMAIN TLD "|" IPADDR "|" IPV6ADDR ")" #define OPT_PORT "(:[1-9][0-9]{0,4})?" GRegex * -- cgit 1.4.1 From 805b33552bcf0a70abe57e7d508731353ec8e385 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Sun, 16 Jun 2013 15:36:52 +0100 Subject: Rework url matching to make it easier to add schemes. The new way allows great control of what a url is composed of. Added a bunch of new schemes. --- src/common/url.c | 161 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 112 insertions(+), 49 deletions(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index 84c112d1..def77628 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -314,8 +314,8 @@ do_an_re(const char *word,int *start, int *end, int *type) } func_t; func_t funcs[] = { - { re_email, WORD_EMAIL }, { re_url, WORD_URL }, + { re_email, WORD_EMAIL }, { re_channel, WORD_CHANNEL }, { re_host, WORD_HOST }, { re_path, WORD_PATH }, @@ -360,7 +360,7 @@ make_re(char *grist, char *type) GRegex *ret; GError *err = NULL; - ret = g_regex_new (grist, G_REGEX_CASELESS + G_REGEX_OPTIMIZE, 0, &err); + ret = g_regex_new (grist, G_REGEX_CASELESS | G_REGEX_OPTIMIZE, 0, &err); g_free (grist); return ret; } @@ -389,60 +389,123 @@ re_host (void) #define LPAR "\\(" #define RPAR "\\)" #define NOPARENS "[^() \t]*" +#define PATH \ + "(" \ + "(" LPAR NOPARENS RPAR ")" \ + "|" \ + "(" NOPARENS ")" \ + ")*" /* Zero or more occurrences of either of these */ \ + "(? Date: Sun, 16 Jun 2013 19:09:50 +0100 Subject: Fixed file:// url matching. --- src/common/url.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index def77628..2fda8f6b 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -418,7 +418,7 @@ struct { const char *scheme; /* scheme name. e.g. http */ const char *path_sep; /* string that begins the path */ - int flags; /* see above (flag definitions) */ + int flags; /* see above (flag macros) */ } uri[] = { { "irc", "/", URI_AUTHORITY | URI_PATH }, { "ircs", "/", URI_AUTHORITY | URI_PATH }, @@ -437,7 +437,6 @@ struct { "mumble", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "ventrilo", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "xmpp", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, - { "file", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "h323", ";", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "imap", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "pop", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, @@ -451,7 +450,8 @@ struct { "bitcoin", "", URI_PATH }, { "gtalk", "", URI_PATH }, { "steam", "", URI_PATH }, - { NULL, '\0', 0} + { "file", "/", URI_PATH }, + { NULL, "", 0} }; static GRegex * -- cgit 1.4.1 From 2870586cf9c241877dc49890f26a8496d13d25e4 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Sun, 16 Jun 2013 19:11:26 +0100 Subject: Fixed irc:// and ircs:// url matching: irc://freenode was not working because "freenode" doesn't match a hostname. --- src/common/url.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index 2fda8f6b..ef6e3fdb 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -420,8 +420,8 @@ struct const char *path_sep; /* string that begins the path */ int flags; /* see above (flag macros) */ } uri[] = { - { "irc", "/", URI_AUTHORITY | URI_PATH }, - { "ircs", "/", URI_AUTHORITY | URI_PATH }, + { "irc", "/", URI_PATH }, + { "ircs", "/", URI_PATH }, { "rtsp", "/", URI_AUTHORITY | URI_PATH }, { "feed", "/", URI_AUTHORITY | URI_PATH }, { "teamspeak", "?", URI_AUTHORITY | URI_PATH }, -- cgit 1.4.1 From 32dee82c584776e4a5a1cbefb195ae6d0a8fe752 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Sun, 16 Jun 2013 20:00:35 +0100 Subject: Added support for passwords in userinfo of urls. --- src/common/url.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index ef6e3fdb..b57f9fc3 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -396,7 +396,7 @@ re_host (void) "(" NOPARENS ")" \ ")*" /* Zero or more occurrences of either of these */ \ "(? Date: Sun, 16 Jun 2013 20:16:58 +0100 Subject: Refined IPv6 address matching in url.c. --- src/common/url.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index b57f9fc3..3d38083c 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -350,7 +350,9 @@ do_an_re(const char *word,int *start, int *end, int *type) #define DOMAIN "[a-z0-9][-a-z0-9]*(\\.[-a-z0-9]+)*\\." #define TLD "[a-z][-a-z0-9]*[a-z]" #define IPADDR "[0-9]{1,3}(\\.[0-9]{1,3}){3}" -#define IPV6ADDR "([0-9a-f]{0,4}(:[0-9a-f]{0,4})*:){2}[0-9a-f]{0,4}(:[0-9a-f]{0,4})*" +#define IPV6GROUP "([0-9a-f]{0,4})" +#define IPV6ADDR "((" IPV6GROUP "(:" IPV6GROUP "){7})" \ + "|(" IPV6GROUP "(:" IPV6GROUP ")*:(:" IPV6GROUP ")+))" /* with :: compression */ #define HOST "(" DOMAIN TLD "|" IPADDR "|" IPV6ADDR ")" #define OPT_PORT "(:[1-9][0-9]{0,4})?" -- cgit 1.4.1 From 55734baed84e7006e919dc02ac8d352020bfebc0 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Mon, 17 Jun 2013 22:43:18 +0100 Subject: Fix IPv6 addr matching in url: must be enclosed in []. --- src/common/url.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index 3d38083c..d3456f5f 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -306,7 +306,7 @@ url_last (int *lstart, int *lend) } static int -do_an_re(const char *word,int *start, int *end, int *type) +do_an_re(const char *word, int *start, int *end, int *type) { typedef struct func_s { GRegex *(*fn)(void); @@ -354,6 +354,8 @@ do_an_re(const char *word,int *start, int *end, int *type) #define IPV6ADDR "((" IPV6GROUP "(:" IPV6GROUP "){7})" \ "|(" IPV6GROUP "(:" IPV6GROUP ")*:(:" IPV6GROUP ")+))" /* with :: compression */ #define HOST "(" DOMAIN TLD "|" IPADDR "|" IPV6ADDR ")" +/* In urls the IPv6 must be enclosed in square brackets */ +#define HOST_URL "(" DOMAIN TLD "|" IPADDR "|" "\\[" IPV6ADDR "\\]" ")" #define OPT_PORT "(:[1-9][0-9]{0,4})?" GRegex * @@ -453,7 +455,7 @@ struct { "gtalk", "", URI_PATH }, { "steam", "", URI_PATH }, { "file", "/", URI_PATH }, - { NULL, "", 0} + { NULL, "", 0} }; static GRegex * @@ -469,7 +471,7 @@ re_url (void) grist_gstr = g_string_new (NULL); /* Add regex "host/path", representing a "schemeless" url */ - g_string_append (grist_gstr, "(" HOST OPT_PORT "/" "(" PATH ")?" ")"); + g_string_append (grist_gstr, "(" HOST_URL OPT_PORT "/" "(" PATH ")?" ")"); for (i = 0; uri[i].scheme; i++) { @@ -485,7 +487,7 @@ re_url (void) g_string_append (grist_gstr, USERINFO "?"); if (uri[i].flags & URI_AUTHORITY) - g_string_append (grist_gstr, HOST OPT_PORT); + g_string_append (grist_gstr, HOST_URL OPT_PORT); if (uri[i].flags & URI_PATH) { @@ -500,7 +502,6 @@ re_url (void) g_free(sep_escaped); } - g_string_append(grist_gstr, ")"); } @@ -512,7 +513,7 @@ re_url (void) } /* EMAIL description --- */ -#define EMAIL "[a-z][-_a-z0-9]+@" "(" HOST ")" +#define EMAIL "[a-z][-_a-z0-9]+@" "(" HOST_URL ")" static GRegex * re_email (void) -- cgit 1.4.1 From 8ce8a359c18578870bcbabdb10feb5902b424105 Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Tue, 18 Jun 2013 12:01:39 +0100 Subject: Now url_check_word() returns WORD_HOST6 when a ipv6 address is matched. --- src/common/url.c | 37 ++++++++++++++++++++++++++++++------- src/common/url.h | 3 ++- 2 files changed, 32 insertions(+), 8 deletions(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index d3456f5f..dabee601 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -35,6 +35,7 @@ GTree *url_btree = NULL; static int do_an_re (const char *word, int *start, int *end, int *type); static GRegex *re_url (void); static GRegex *re_host (void); +static GRegex *re_host6 (void); static GRegex *re_email (void); static GRegex *re_nick (void); static GRegex *re_channel (void); @@ -222,6 +223,7 @@ url_check_word (const char *word) /* Fall through */ case WORD_URL: case WORD_HOST: + case WORD_HOST6: case WORD_CHANNEL: case WORD_PATH: return lasttype; @@ -317,6 +319,7 @@ do_an_re(const char *word, int *start, int *end, int *type) { re_url, WORD_URL }, { re_email, WORD_EMAIL }, { re_channel, WORD_CHANNEL }, + { re_host6, WORD_HOST6 }, { re_host, WORD_HOST }, { re_path, WORD_PATH }, { re_nick, WORD_NICK } @@ -356,7 +359,8 @@ do_an_re(const char *word, int *start, int *end, int *type) #define HOST "(" DOMAIN TLD "|" IPADDR "|" IPV6ADDR ")" /* In urls the IPv6 must be enclosed in square brackets */ #define HOST_URL "(" DOMAIN TLD "|" IPADDR "|" "\\[" IPV6ADDR "\\]" ")" -#define OPT_PORT "(:[1-9][0-9]{0,4})?" +#define PORT "(:[1-9][0-9]{0,4})" +#define OPT_PORT "(" PORT ")?" GRegex * make_re(char *grist, char *type) @@ -380,14 +384,33 @@ re_host (void) if (host_ret) return host_ret; grist = g_strdup_printf ( - "(" /* HOST */ - HOST OPT_PORT + "(" + "(" HOST_URL PORT ")|(" HOST ")" ")" ); host_ret = make_re (grist, "re_host"); return host_ret; } +static GRegex * +re_host6 (void) +{ + static GRegex *host6_ret; + char *grist; + + if (host6_ret) return host6_ret; + + grist = g_strdup_printf ( + "(" + "(" IPV6ADDR ")|(" "\\[" IPV6ADDR "\\]" PORT ")" + ")" + ); + + host6_ret = make_re (grist, "re_host6"); + + return host6_ret; +} + /* URL description --- */ #define SCHEME "(%s)" #define LPAR "\\(" @@ -524,7 +547,7 @@ re_email (void) if (email_ret) return email_ret; grist = g_strdup_printf ( - "(" /* EMAIL */ + "(" EMAIL ")" ); @@ -560,7 +583,7 @@ re_nick (void) if (nick_ret) return nick_ret; grist = g_strdup_printf ( - "(" /* NICK */ + "(" NICK ")" ); @@ -580,7 +603,7 @@ re_channel (void) if (channel_ret) return channel_ret; grist = g_strdup_printf ( - "(" /* CHANNEL */ + "(" CHANNEL ")" ); @@ -606,7 +629,7 @@ re_path (void) if (path_ret) return path_ret; grist = g_strdup_printf ( - "(" /* FS_PATH */ + "(" FS_PATH ")" ); diff --git a/src/common/url.h b/src/common/url.h index 96d9f946..2085c73e 100644 --- a/src/common/url.h +++ b/src/common/url.h @@ -26,7 +26,8 @@ extern void *url_tree; #define WORD_NICK 2 #define WORD_CHANNEL 3 #define WORD_HOST 4 -#define WORD_EMAIL 5 +#define WORD_HOST6 5 +#define WORD_EMAIL 6 /* anything >0 will be displayed as a link by gtk_xtext_motion_notify() */ #define WORD_DIALOG -1 #define WORD_PATH -2 -- cgit 1.4.1 From 8b217981eda423927380e2eda51ee3e43c28ea0d Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Tue, 18 Jun 2013 12:07:46 +0100 Subject: Removed unused argument of make_re(). --- src/common/url.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index dabee601..f65d5575 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -363,7 +363,7 @@ do_an_re(const char *word, int *start, int *end, int *type) #define OPT_PORT "(" PORT ")?" GRegex * -make_re(char *grist, char *type) +make_re (char *grist) { GRegex *ret; GError *err = NULL; @@ -388,7 +388,7 @@ re_host (void) "(" HOST_URL PORT ")|(" HOST ")" ")" ); - host_ret = make_re (grist, "re_host"); + host_ret = make_re (grist); return host_ret; } @@ -406,7 +406,7 @@ re_host6 (void) ")" ); - host6_ret = make_re (grist, "re_host6"); + host6_ret = make_re (grist); return host6_ret; } @@ -530,7 +530,7 @@ re_url (void) grist = g_string_free (grist_gstr, FALSE); - url_ret = make_re (grist, "re_url"); + url_ret = make_re (grist); return url_ret; } @@ -551,7 +551,7 @@ re_email (void) EMAIL ")" ); - email_ret = make_re (grist, "re_email"); + email_ret = make_re (grist); return email_ret; } @@ -587,7 +587,7 @@ re_nick (void) NICK ")" ); - nick_ret = make_re (grist, "re_nick"); + nick_ret = make_re (grist); return nick_ret; } @@ -607,7 +607,7 @@ re_channel (void) CHANNEL ")" ); - channel_ret = make_re (grist, "re_channel"); + channel_ret = make_re (grist); return channel_ret; } @@ -633,6 +633,6 @@ re_path (void) FS_PATH ")" ); - path_ret = make_re (grist, "re_path"); + path_ret = make_re (grist); return path_ret; } -- cgit 1.4.1 From d88c4ad411f8e64f8a0e3174cd2d48ee9dfc692e Mon Sep 17 00:00:00 2001 From: Diogo Sousa Date: Tue, 18 Jun 2013 12:32:55 +0100 Subject: Added a few more useful url schemes. --- src/common/url.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/common') diff --git a/src/common/url.c b/src/common/url.c index f65d5575..351aeb03 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -460,6 +460,7 @@ struct { "cvs", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "svn", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "git", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, + { "bzr", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "rsync", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "mumble", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, { "ventrilo", "/", URI_AUTHORITY | URI_OPT_USERINFO | URI_PATH }, @@ -478,6 +479,9 @@ struct { "gtalk", "", URI_PATH }, { "steam", "", URI_PATH }, { "file", "/", URI_PATH }, + { "callto", "", URI_PATH }, + { "skype", "", URI_PATH }, + { "geo", "", URI_PATH }, { NULL, "", 0} }; -- cgit 1.4.1