/*
 * Copyright (c) 2003-2008 Hypertriton, Inc. 
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 * USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
/*
 * Dynamically-allocated text buffer designed to perform automatic character
 * set conversion when appending text of a different encoding.
 */
#include "cgi.h"
#include 
#include 
#include 
#include 
#define TEXT_BUFFER_GROW 1024
static size_t	utf8_to_latin1(TEXT *, const char *);
static const struct {
	char	 *name;
	size_t	(*conv_fn)(TEXT *, const char *);
} convs[] = {
	{ "ISO-8859-1",	utf8_to_latin1 }
};
static const int nconvs = sizeof(convs) / sizeof(convs[0]);
/* Initialize a new text buffer. */
void
TEXT_Init(TEXT *te, size_t len, const char *encoding)
{
	Strlcpy(te->encoding, encoding, sizeof(te->encoding));
	te->buf = Malloc(len);
	te->buf_len = len;
	te->len = 0;
}
/* Release a text buffer. */
void
TEXT_Destroy(TEXT *te)
{
	free(te->buf);
}
/* Append a NUL-terminated string to a text buffer without converting. */
size_t
TEXT_CatS(TEXT *te, const char *s)
{
	size_t len;
	len = strlen(s);
	te->len += len;
	if (te->len > te->buf_len) {
		te->buf_len = te->len + TEXT_BUFFER_GROW;
		te->buf = Realloc(te->buf, te->buf_len);
	}
	memcpy(&te->buf[te->len-len], s, len);
	return (len);
}
/* Append a formatted string to a text buffer. */
size_t
TEXT_Cat(TEXT *te, const char *fmt, ...)
{
	size_t rv;
	va_list ap;
	char *s;
	va_start(ap, fmt);
	vasprintf(&s, fmt, ap);
	va_end(ap);
	rv = TEXT_CatS(te, s);
	free(s);
	return (rv);
}
/* Built-in UTF-8 to LATIN1 conversion. */
static size_t
utf8_to_latin1(TEXT *te, const char *s)
{
	const u_char *sp;
	size_t len = strlen(s);
	u_int32_t c;
	if (te->len+len > te->buf_len) {
		te->buf_len = len + TEXT_BUFFER_GROW;
		te->buf = Realloc(te->buf, te->buf_len);
	}
	for (sp = (const u_char *)&s[0]; *sp != '\0'; ) {
		int ntrail = 0;
		if (*sp < 0x80) {
			c = *(sp++);
		} else if (*sp < 0xc0) {
			c = '!';
			sp++;
		} else if (*sp < 0xe0) {
			c = *(sp++) & 0x1f;
			ntrail = 1;
		} else if (*sp < 0xf0) {
			c = *(sp++) & 0x0f;
			ntrail = 2;
		} else if (*sp < 0xf8) {
			c = *(sp++) & 0x07;
			ntrail = 3;
		} else {
			c = '?';
			sp++;
		}
		for (; ntrail > 0; ntrail--) {
			if ((*sp & 0xc0) != 0x80) {
				c = 'X';
				sp++;
				break;
			}
			c <<= 6;
			c |= *(sp++) & 0x3f;
		}
		if (c <= 0xff) {
			te->buf[te->len++] = c;
		} else {
			te->buf[te->len++] = '?';
		}
	}
	return (len);
}
/* Append an UTF-8 string to a text buffer, converting as necessary. */
size_t
TEXT_CatS_UTF8(TEXT *te, const char *s)
{
	int i;
	
	for (i = 0; i < nconvs; i++) {
		if (strcasecmp(convs[i].name, te->encoding) == 0)
			return (convs[i].conv_fn(te, s));
	}
	return TEXT_CatS(te, s);
}
/* Append a single character to a text buffer without converting. */
size_t
TEXT_CatC(TEXT *te, char c)
{
	if (++te->len > te->buf_len) {
		te->buf_len = te->len + TEXT_BUFFER_GROW;
		te->buf = Realloc(te->buf, te->buf_len);
	}
	te->buf[te->len-1] = c;
	return (1);
}