diff options
author | TingPing <tingping@tingping.se> | 2014-09-07 19:39:30 -0400 |
---|---|---|
committer | TingPing <tingping@tingping.se> | 2014-09-07 19:51:59 -0400 |
commit | 80bdd9ce114af6a6e42a20bef725bdc3dae52cf9 (patch) | |
tree | 9be5dd1644462d8dfb6f3aafe6012d3bc5d14639 | |
parent | 5f99d34c3b38324d5c92803884ae811a6550ad20 (diff) |
Detect utf8 urls
They might not be valid, but like many things they are still used
-rw-r--r-- | src/common/url.c | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/src/common/url.c b/src/common/url.c index a3922345..1321374f 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -415,8 +415,8 @@ regex_match (const GRegex *re, const char *word, int *start, int *end) } /* Miscellaneous description --- */ -#define DOMAIN "[_a-z0-9][-_a-z0-9]*(\\.[-_a-z0-9]+)*" -#define TLD "\\.[a-z][-a-z0-9]*[a-z]" +#define DOMAIN "[_\\pL\\pN][-_\\pL\\pN]*(\\.[-_\\pL\\pN]+)*" +#define TLD "\\.[\\pL][-\\pL\\pN]*[\\pL]" #define IPADDR "[0-9]{1,3}(\\.[0-9]{1,3}){3}" #define IPV6GROUP "([0-9a-f]{0,4})" #define IPV6ADDR "((" IPV6GROUP "(:" IPV6GROUP "){7})" \ |