Server line text-encoding-related fixes.

- Handle server lines that contain sequences which are invalid in the server encoding. Previously, these would cause the whole line to be interpreted in ISO-8859-1, but now they're simply replaced with an appropriate replacement character. - Removed prefs.utf8_locale. - Change default server encoding from system locale to UTF-8. - Always populate server->encoding with a non-null value - UTF-8. Fixes #1198
author: Arnavion <arnavion@gmail.com> 2015-01-18 02:10:04 -0800
committer: Arnavion <arnavion@gmail.com> 2015-01-18 02:10:04 -0800
commit: 5749c53484369515c4e6df4a4730b1f34fab82b4 (patch)
tree: 8ba88feef1e6086981f8add539572d6cd0b142c2 /src/common/server.c
parent: 5569205d1578f46a4ba4a2b23d8e48a933acd6ce (diff)
1 files changed, 20 insertions, 128 deletions
diff --git a/src/common/server.c b/src/common/server.c
index 75192dfa..f9ca809e 100644
--- a/src/common/server.c
+++ b/src/common/server.c
@@ -89,48 +89,18 @@ int
 tcp_send_real (void *ssl, int sok, char *encoding, char *buf, int len)
 {
 	int ret;
-	char *locale;
-	gsize loc_len;
 
-	if (encoding == NULL)	/* system */
-	{
-		locale = NULL;
-		if (!prefs.utf8_locale)
-		{
-			const gchar *charset;
-
-			g_get_charset (&charset);
-			locale = g_convert_with_fallback (buf, len, charset, "UTF-8", "?", 0, &loc_len, 0);
-		}
-	}
-	else
-	{
-		locale = g_convert_with_fallback (buf, len, encoding, "UTF-8", "?", 0, &loc_len, 0);
-	}
-
-	if (locale)
-	{
-		len = loc_len;
-#ifdef USE_OPENSSL
-		if (!ssl)
-			ret = send (sok, locale, len, 0);
-		else
-			ret = _SSL_send (ssl, locale, len);
-#else
-		ret = send (sok, locale, len, 0);
-#endif
-		g_free (locale);
-	} else
-	{
+	gsize buf_encoded_len;
+	gchar *buf_encoded = text_invalid_utf8_to_encoding (buf, len, encoding, &buf_encoded_len);
 #ifdef USE_OPENSSL
-		if (!ssl)
-			ret = send (sok, buf, len, 0);
-		else
-			ret = _SSL_send (ssl, buf, len);
+	if (!ssl)
+		ret = send (sok, buf_encoded, buf_encoded_len, 0);
+	else
+		ret = _SSL_send (ssl, buf_encoded, buf_encoded_len);
 #else
-		ret = send (sok, buf, len, 0);
+	ret = send (sok, buf_encoded, buf_encoded_len, 0);
 #endif
-	}
+	g_free (buf_encoded);
 
 	return ret;
 }
@@ -287,94 +257,15 @@ close_socket (int sok)
 static void
 server_inline (server *serv, char *line, gssize len)
 {
-	char *utf_line_allocated = NULL;
-
-	/* Checks whether we're set to use UTF-8 charset */
-	if ((serv->encoding == NULL && prefs.utf8_locale) /* Using system default - UTF-8 */ ||
-		g_ascii_strcasecmp (serv->encoding, "UTF8") == 0 ||
-		g_ascii_strcasecmp (serv->encoding, "UTF-8") == 0
-	)
-	{
-		utf_line_allocated = text_validate (&line, &len);
-	}
-	else
-	{
-		/* Since the user has an explicit charset set, either
-		via /charset command or from his non-UTF8 locale,
-		we don't fallback to ISO-8859-1 and instead try to remove
-		errnoeous octets till the string is convertable in the
-		said charset. */
+	gsize len_utf8;
+	line = text_invalid_encoding_to_utf8 (line, len, serv->encoding, &len_utf8);
 
-		const char *encoding = NULL;
-
-		if (serv->encoding != NULL)
-			encoding = serv->encoding;
-		else
-			g_get_charset (&encoding);
-
-		if (encoding != NULL)
-		{
-			char *conv_line; /* holds a copy of the original string */
-			gsize conv_len; /* tells g_convert how much of line to convert */
-			gsize utf_len;
-			gsize read_len;
-			GError *err;
-			gboolean retry;
-
-			conv_line = g_malloc (len + 1);
-			memcpy (conv_line, line, len);
-			conv_line[len] = 0;
-			conv_len = len;
-
-			/* if CP1255, convert it with the NUL terminator.
-				Works around SF bug #1122089 */
-			if (serv->using_cp1255)
-				conv_len++;
-
-			do
-			{
-				err = NULL;
-				retry = FALSE;
-				utf_line_allocated = g_convert_with_fallback (conv_line, conv_len, "UTF-8", encoding, "?", &read_len, &utf_len, &err);
-				if (err != NULL)
-				{
-					if (err->code == G_CONVERT_ERROR_ILLEGAL_SEQUENCE && conv_len > (read_len + 1))
-					{
-						/* Make our best bet by removing the erroneous char.
-						   This will work for casual 8-bit strings with non-standard chars. */
-						memmove (conv_line + read_len, conv_line + read_len + 1, conv_len - read_len -1);
-						conv_len--;
-						retry = TRUE;
-					}
-					g_error_free (err);
-				}
-			} while (retry);
-
-			g_free (conv_line);
-
-			/* If any conversion has occured at all. Conversion might fail
-			due to errors other than invalid sequences, e.g. unknown charset. */
-			if (utf_line_allocated != NULL)
-			{
-				line = utf_line_allocated;
-				len = utf_len;
-				if (serv->using_cp1255 && len > 0)
-					len--;
-			}
-			else
-			{
-				/* If all fails, treat as UTF-8 with fallback to ISO-8859-1. */
-				utf_line_allocated = text_validate (&line, &len);
-			}
-		}
-	}
-
-	fe_add_rawlog (serv, line, len, FALSE);
+	fe_add_rawlog (serv, line, len_utf8, FALSE);
 
 	/* let proto-irc.c handle it */
-	serv->p_inline (serv, line, len);
+	serv->p_inline (serv, line, len_utf8);
 
-	g_free (utf_line_allocated);
+	g_free (line);
 }
 
 /* read data from socket */
@@ -1749,12 +1640,7 @@ server_set_encoding (server *serv, char *new_encoding)
 {
 	char *space;
 
-	if (serv->encoding)
-	{
-		g_free (serv->encoding);
-		/* can be left as NULL to indicate system encoding */
-		serv->encoding = NULL;
-	}
+	g_free (serv->encoding);
 
 	if (new_encoding)
 	{
@@ -1772,6 +1658,10 @@ server_set_encoding (server *serv, char *new_encoding)
 			serv->encoding = g_strdup ("UTF-8");
 		}
 	}
+	else
+	{
+		serv->encoding = g_strdup ("UTF-8");
+	}
 }
 
 server *
@@ -1816,6 +1706,8 @@ server_set_defaults (server *serv)
 	serv->nick_prefixes = g_strdup ("@%+");
 	serv->nick_modes = g_strdup ("ohv");
 
+	server_set_encoding (serv, "UTF-8");
+
 	serv->nickcount = 1;
 	serv->end_of_motd = FALSE;
 	serv->is_away = FALSE;
author	Arnavion <arnavion@gmail.com>	2015-01-18 02:10:04 -0800
committer	Arnavion <arnavion@gmail.com>	2015-01-18 02:10:04 -0800
commit	5749c53484369515c4e6df4a4730b1f34fab82b4 (patch)
tree	8ba88feef1e6086981f8add539572d6cd0b142c2 /src/common/server.c
parent	5569205d1578f46a4ba4a2b23d8e48a933acd6ce (diff)