/***************************************************************************
                          misc_strings.c  -  description
                             -------------------
    begin                : Fri Nov 29 2002
    copyright            : (C) 2002 by Tim-Philipp Mller
    email                : t.i.m@orange.net
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#include "global.h"
#include "misc_strings.h"

#include <string.h>
#include <stdio.h>

/* misc_strings_unescape_url
 *
 * Some browsers pass ed2k-links to the clipboard and escape
 *  the '|' char and others by '%xx'. This routine unescapes
 *  the given string.
 *
 */

void
misc_strings_unescape_url (gchar *s)
{
	gchar	*pos, *found;

	g_return_if_fail (s != NULL);

	pos = s;
	while ((found=strchr(pos,'%')))
	{
		guint hexval;

		if (*(found+1) == '%')
		{
			pos = found+2;
			continue;
		}

		if (sscanf(found+1, "%2x", &hexval) == 1)
		{
			*found = (gchar)hexval;
			g_memmove(found+1, found+3, strlen(found+3)+1);
		}

		pos++;
	}
}



/******************************************************************************
 *
 *   misc_string_convert_to_utf8
 *
 *   Takes a string and returns a new UTF-8 string
 *    which the caller has to free when done.
 *
 *   If conversion from locale produces conversion errors,
 *    conversion from ISO-8859-15 (western europe) is
 *    attempted.
 *
 *   If that fails as well, conversion from locale is done,
 *    replacing problematic characters with codes.
 *
 *   Conversion is done with a fallback sequence, and
 *    an error message is printed to the console if
 *    a conversion fails.
 *
 ***/

gchar *
misc_string_convert_to_utf8 (const gchar *string)
{
	gchar               *utf8 = NULL;
	GError              *error = NULL;
	gsize                bytes_read = 0;
	static const gchar  *charset = NULL;

	g_return_val_if_fail (string!=NULL, g_strdup("[FIXME]"));

/*
#warning ********* temporary code: ampersand check. Remove later ***********************
	amp = strchr(string,'&');
	if (amp)
	{
		if (strncmp(amp,"&amp;",5)!=0)
		{
			g_print ("FIXME: The following string contains a '&' character that needs to be replaced by '&amp;':\n\t----- %s -----\n", string);
			// leaks! temporary fix.
			string = g_strdup(string);
			g_strdelimit((gchar*)string,"&", '_');
		}
	}
*/
	if ( g_utf8_validate(string, -1, NULL) == TRUE )
		return g_strdup(string);

	if (!charset)
		(void) g_get_charset(&charset);

	/* FIRST: try locale into UTF8 */

	utf8 = g_locale_to_utf8 (string, -1, &bytes_read, NULL, &error);

	/* did everything work out? */
	if (error == NULL)
		return utf8;

	G_FREE(utf8);
	g_error_free(error);
	error = NULL;

	/* NEXT: try western european into UTF8 */

	utf8 = g_convert (string, -1, "UTF-8", "ISO-8859-15", &bytes_read, NULL, &error);

	/* did everything work out? */
	if (error == NULL)
		return utf8;

	G_FREE(utf8);

	/* LAST: if all that doesn't work, convert from locale and replace
	 *       problematic charactes with code                           */

	g_print("FIXME: in %s: charset conversion error in g_convert().\n"
	        "\tError message: %s\n\tFirst problematic character: %s\n",
	        __FUNCTION__, error->message, string + bytes_read);


	utf8 = g_convert_with_fallback (string, -1, "UTF-8", charset, "_", NULL, NULL, NULL);

	g_error_free(error);

	return utf8;
}




const gchar *
misc_string_markup ( const gchar *str_locale,
                     const gchar *color,
                     gboolean bold,
                     gboolean italic )
{
	static gchar buf[1024];

	g_return_val_if_fail ( str_locale != NULL, "[FIXME]");

	g_snprintf ( buf, sizeof(buf)/sizeof(gchar),
	             "<span color='%s'>%s%s%s%s%s</span>",
	             (color  != NULL ) ? color  : "black",
	             (bold   == TRUE ) ? "<b>"  : "",
	             (italic == TRUE ) ? "<i>"  : "",
	             UTF8_SHORT_PRINTF("%s", str_locale),
	             (bold   == TRUE ) ? "</b>" : "",
	             (italic == TRUE ) ? "</i>" : "" );

	return buf;
}



gchar *
misc_string_utf8_printf (const gchar *format, ...)
{
	va_list   ap;
	gchar    *str, *utf8;

	g_return_val_if_fail ( format != NULL, NULL );

	va_start(ap, format);
	str = g_strdup_vprintf(format, ap);
	va_end(ap);

	if (!str)
		return NULL;

	utf8 = misc_string_convert_to_utf8(str);

	g_free(str);

	return utf8;
}



const gchar *
misc_string_utf8_short_printf (const gchar *format, ...)
{
	static gchar  buf[1024], utf8[4096];
	const gchar  *charset = NULL;
	GIConv        converter;
	gsize         inleft = sizeof(buf);
	gsize         outleft = sizeof(utf8);
	gchar        *inbuf = buf;
	gchar        *outbuf = utf8;
	size_t        ret;
	va_list	      ap;

	g_return_val_if_fail ( format != NULL , NULL );

	/* we need to check this in order to avoid double recursions of
	 *  UTF8_SHORT_PRINTF() (=this function) */
	if ( strcmp (format, "%s") == 0 )
	{
		const gchar *first_argument;

		va_start(ap, format);
		first_argument = va_arg(ap, gchar *);
		g_return_val_if_fail (  first_argument != buf, first_argument );
		va_end(ap);
	}

	va_start(ap, format);
	g_vsnprintf(buf, (gulong) (sizeof(buf)/sizeof(gchar)), format, ap);
	va_end(ap);

	if (g_utf8_validate(buf, -1, NULL))
		return buf;

	g_get_charset(&charset);

	g_return_val_if_fail ( charset != NULL, buf );

	converter = g_iconv_open ( "UTF-8", charset );

	g_return_val_if_fail ( converter != (GIConv)-1 , buf );

	ret = g_iconv (converter, &inbuf, &inleft, &outbuf, &outleft);

	g_iconv_close(converter);

	if ( ret != -1 )
		return utf8;

	/* locale -> utf8 failed? try iso-8859-15 -> utf8 */

	inleft = sizeof(buf);
	outleft = sizeof(utf8);
	inbuf = buf;
	outbuf = utf8;

	converter = g_iconv_open ( "UTF-8", "ISO-8859-15" );

	g_return_val_if_fail ( converter != (GIConv)-1 , buf );

	ret = g_iconv (converter, &inbuf, &inleft, &outbuf, &outleft);

	g_iconv_close(converter);


	g_return_val_if_fail ( ret != -1, buf );

	return utf8;
}



/******************************************************************************
 *
 *   misc_strings_utf8_to_locale
 *
 *   Takes an UTF-8 encoded string and tries to
 *    convert it to the current locale, with fallback.
 *
 *   Returned string must be freed by caller if no longer needed
 *
 *   Chars which could not be converted will be replaced by their code as
 *     digits in the current locale encoding.
 *
 ***/

gchar *
misc_strings_utf8_to_locale (const gchar *utf8)
{
	static const gchar  *charset = NULL;

	g_return_val_if_fail ( utf8 != NULL, g_strdup("[FIXME]"));

	g_return_val_if_fail ( g_utf8_validate(utf8, -1, NULL) == TRUE, g_strdup("[FIXME]"));

	if (!charset)
		(void) g_get_charset(&charset);

	return g_convert_with_fallback (utf8, -1, charset, "UTF-8", "_", NULL, NULL, NULL);
}




/******************************************************************************
 *
 *   misc_strings_unescape_text
 *
 *   returns new string which must be freed by caller when no longer needed
 *
 ***/

gchar *
misc_strings_unescape_text (const gchar *text_utf8)
{
	const gchar *pos;
	gchar       *newstr, *posnew;

	g_return_val_if_fail ( text_utf8 != NULL, NULL );

	newstr = g_new0(gchar, strlen(text_utf8)+1);

	pos    = text_utf8;
	posnew = newstr;

	while (*pos)
	{
		gchar *next;

		next = g_utf8_next_char(pos);

		if ( *pos == '&' )
		{
			gint i, inc = 0;

			if ( strncmp(pos,"&amp;",5) == 0 )
			{
				*posnew = '&';
				posnew++;
				inc = 5;
			}
			else if ( strncmp(pos,"&apos;",6) == 0 )
			{
				*posnew = '\'';
				posnew++;
				inc = 5;
			}
			else if ( strncmp(pos,"&lt;",4) == 0 )
			{
				*posnew = '<';
				posnew++;
				inc = 5;
			}
			else if ( strncmp(pos,"&gt;",4) == 0 )
			{
				*posnew = '>';
				posnew++;
				inc = 5;
			}
			else if ( strncmp(pos,"&quot;",6) == 0 )
			{
				*posnew = '"';
				posnew++;
				inc = 5;
			}
			else
			{
				gint len = (gint) (next-pos);
				memcpy(posnew,pos,len);
				posnew += len;
				inc = 1;
			}

			for (i = 0; i < inc; i++)
				next = g_utf8_next_char(next);
		}
		else
		{
			gint len = (gint) (next-pos);
			memcpy(posnew,pos,len);
			posnew += len;
		}

		pos = next;
	}

	return newstr;
}




/******************************************************************************
 *
 *   misc_strings_escape_ascii_url
 *
 *   Convert a ASCII string into an URL string compliant with
 *   URIs character encoding rules (see 'man url' for details)
 *
 *   Returned string must be freed by caller.
 *
 *   This function has been written by Elouen Lanoe
 *     ( elolan at users dot sf dot net ), patch #693928
 *
 *   XXX - what about international chars like  or  whose
 *         character code depeneds on the locale? [Tim]
 ***/

gchar *
misc_strings_escape_ascii_url (const gchar *str)
{
	/* list of unescaped characters (also include upper
	 * and lower case English letters and decimal digits */
	gchar        *valid_char = "-_.!~*'()";
	gchar        *url_string,  *pos2;
	const gchar  *pos;

	g_return_val_if_fail ( str != NULL, NULL );

	/* lets suppose that all characters will be escaped %XX */
	url_string = g_new0( gchar,  (strlen(str)*3) + 1 );

	/* note: we do not need to check for glib mem alloc return values,
	 * if memory allocation fails, the program will stop */

	pos2 = url_string;
	for ( pos = str;  *pos != 0x00;  pos++ )
	{
		if ( ( *pos >= 'a'  &&  *pos <= 'z' )
		  || ( *pos >= 'A'  &&  *pos <= 'Z' ) 
		  || ( *pos >= '0'  &&  *pos <= '9' )
		  || strchr(valid_char, *pos) != NULL )
		{
			*pos2 = *pos;
			 pos2++;  /* AND NOT *pos2++ */
		}
		else
		{
			g_snprintf( pos2, 4*sizeof(gchar), "%%%02X", (guchar) *pos );
			pos2 += 3;
		}
	}

	*pos2 = 0x00;

	return url_string;
}



