/*
** Copyright 2000-2005 Double Precision, Inc.  See COPYING for
** distribution information.
*/

#include	"config.h"
#include	"rfc2045/rfc2646.h"
#include	"rfc2646html.h"
#include	<stdlib.h>
#include	<string.h>
#include	<stdio.h>

static const char rcsid[]="$Id: rfc2646html.c,v 1.10 2005/12/26 18:44:57 mrsam Exp $";

/*
** Flowed-text format processing (see RFC2646).
**
** This module defines two callback methods used by the rfc2646 module
** (in rfc2045).
**
** rfc2646tohtml_handler:
**
** Convert flowed-text format text to HTML, for display purposes.  This
** function takes RFC2646 content from an external source and converts it
** to viewable HTML.
**
** rfc2646towiki_handler:
**
** This is an enhanced version of rfc2646tohtml_handler that does some extra
** processing.  This is used for our own flowed text format content that we've
** created ourselves.  We can impose additional formatting rules for value-
** added formatting.  Some examples are:
**
** ''italic text'' - text between two apostrophes is put inside <i> tags.
** '''bold text''' - text between three apostrophes is put inside <b> tags.
** _underlined_ - text between two underscores is put inside <ul> tags.
**
** Plus more.
*/

extern const char *skip_text_url(const char *start, const char *end);

/*
** Allocate our data structure.
*/

struct rfc2646tohtml *rfc2646tohtml_alloc( int (*f)(const char *, int, void *),
					   void *a)
{
	struct rfc2646tohtml *p=(struct rfc2646tohtml *)
		calloc(1, sizeof(struct rfc2646tohtml));

	if (!p)
		return (0);

	p->handler=f;
	p->voidarg=a;
	p->prev_was_flowed=1; /* Do not need <BR> before the first line */
	p->listindent[0]=0;   /* Not inside any list */
	return (p);
}

static int endquote(struct rfc2646tohtml *);
static int endlist(struct rfc2646tohtml *);

/*
** Emit closing tags for any lists that are currently open.  This is used
** when changing quote levels, because lists nest inside quotes.  Sorry.
*/

static int endlist_all(struct rfc2646tohtml *p)
{
	while (p->listindent[0])
	{
		int rc=endlist(p);

		if (rc)
			return rc;
	}
	return 0;
}

/*
** End of RFC2646 text.
*/

int rfc2646tohtml_free(struct rfc2646tohtml *p)
{
	int rc=0;

	/*
	** Emit any closing tags for indented content.
	*/
	while ( p->current_quote_depth )
	{
		rc=endquote(p);
		if (rc)
			break;
	}

	if (rc == 0)
		rc=endlist_all(p);

	free(p);
	return (rc);
}

/*
** Close the innermost open list.
*/
static int endlist(struct rfc2646tohtml *p)
{
	char *ptr=p->listindent;

	while (*ptr && ptr[1])
		++ptr;

	if (!*ptr)
		return 0;

	switch (*ptr) {
	case '*':
		*ptr=0;
		return (*p->handler)("</ul>", 5, p->voidarg);
	case '#':
		*ptr=0;
		return (*p->handler)("</ol>", 5, p->voidarg);
	default:
		*ptr=0;
	}
	return 0;
}

/*
** Close the most recent-most quoted content.
*/

static int endquote(struct rfc2646tohtml *p)
{
	static const char str[]="</div></blockquote>\n";
	int rc;

	rc=endlist_all(p);

	if (rc)
		return rc;

	--p->current_quote_depth;
	p->prev_was_flowed=1; /* Do not need <BR> for next line. */
	p->prev_was_0length=0;
	return ( (*p->handler)(str, sizeof(str)-1, p->voidarg));
}

/*
** Received a new flowed format physical line from the rfc2646 module.
**
** Check the quote nesting level, as returned by the rfc2646, compare to the
** most recent nesting level, and issue any needed HTML to open or close
** the given number of quoted content.
**
** This function returns an HTML tag that should be emited before this line
** of text, such as "<P>" if this is a new paragraph, or "<BR>" if there's
** a paragraph break here (or "" if no breaks are needed).
*/

static const char *rfc2646_getpfix(struct rfc2646parser *p, int isflowed,
				   struct rfc2646tohtml *r,
				   int check_lists)
{
	int rc;
	const char *str;
	const char *pfix;

	/*
	** If the rfc2646 quoting level is greater than the current quoting
	** level, then open as many quoting levels as necessary.
	**
	** Ditto if the quoting level is less than the current quoting level.
	*/

	while (r->current_quote_depth > p->quote_depth)
	{
		if ((rc=endlist_all(r)) != 0)
			return NULL;

		if ((rc=endquote(r)) != 0)
			return (NULL);
	}

	while (r->current_quote_depth < p->quote_depth)
	{
		char str[160];

		if ((rc=endlist_all(r)) != 0)
			return NULL;

		sprintf(str, "\n<blockquote type=\"cite\" class=\"cite%d\">"
			"<div class=\"quotedtext\">",
			r->current_quote_depth % 3);

		rc=(*r->handler)(str, strlen(str), p->voidarg);

		if (rc)
			return (NULL);
		++r->current_quote_depth;
		r->prev_was_flowed=1;	/* Prevent <br /> below */
		r->prev_was_0length=0;
	}

	str=p->line;

	pfix="";

	if (!r->prev_was_flowed)
	{
		if (r->prev_was_0length)
		{
			pfix="<p>";
		}
		else
		{
			pfix="<br />";
		}
	}

	if (check_lists &&  /* Enhanced formatting */
	    (p->line[0] == '*' || p->line[0] == '#'))
	{
		size_t i;
		size_t j;
		const char *li;

		/*
		** Emit any necessary HTML to open or close lists.
		*/

		for (i=0; p->line[i] == '*' || p->line[i] == '#'; ++i)
			;

		/*
		**
		** Close lists which are no longer open.
		*/

		while (i < strlen(r->listindent) ||


		       /*
		       ** If the list types change unexpectedly, keep closing
		       ** the most recently opened lists until the type of
		       ** currently opened list agrees with the leading prefix
		       ** of p->line
		       */

		       strncmp(r->listindent, p->line,
			       strlen(r->listindent)))
		{
			int rc=endlist(r);

			if (rc)
				return NULL;
		}

		li="";

		while (i < sizeof(r->listindent)-1 &&
		       i > strlen(r->listindent))
		{
			char buf[2];
			int rc;

			buf[0]=p->line[strlen(r->listindent)];
			buf[1]=0;

			if (*li &&
			    (*r->handler)(li, strlen(li), r->voidarg))
				return NULL;

			switch(buf[0]) {
			case '#':
				rc=(*r->handler)("<ol>", 4, r->voidarg);
				break;
			default:
				rc=(*r->handler)("<ul>", 4, r->voidarg);
			}
			if (rc)
				return NULL;
			strcat(r->listindent, buf);
			li="<li>";
		}
		if ((*r->handler)("<li>", 4, r->voidarg))
			return NULL;

		pfix="";
		j=0;

		while (p->line[i] == ' ')
			++i;
		while ((p->line[j]=p->line[i]) != 0)
		{
			++i;
			++j;
		}
		p->line[j]=0;
	}
	else
	{
		/*
		** Close all open lists when we encounter a fresh paragraph
		** break.
		*/

		if (*str && (!r->prev_was_flowed || r->prev_was_0length))
			if (endlist_all(r))
				return NULL;
	}

	r->prev_was_flowed=isflowed;
	r->prev_was_0length= *str == 0;

	return pfix;
}

/*
** Output plain text content.
**
** Escape & < and > characters.
**
** Convert whitespace to &nbsp;
**
** Convert tabs to requisite number of spaces
*/

#define NOSPC 0
#define ALLOWSPC 1
#define ALLOWALLSPC 2

static int emit_html(struct rfc2646tohtml *r,
		     const char *str, size_t n, unsigned *colcnt, int mode)
{
	int rc;
	size_t i;

	for (i=0; i<n; )
	{
		switch (str[i]) {
		case '&':
			rc= i ? (*r->handler)(str, i, r->voidarg):0;
			if (rc == 0)
				rc=(*r->handler)("&amp;", 5,
						 r->voidarg);
			if (rc)
				return (rc);
			++i;
			str += i;
			n -= i;
			*colcnt += i;
			i=0;
			continue;
		case '<':
			rc= i ? (*r->handler)(str, i, r->voidarg):0;
			if (rc == 0)
				rc=(*r->handler)("&lt;", 4,
						 r->voidarg);
			if (rc)
				return (rc);
			++i;
			str += i;
			n -= i;
			*colcnt += i;
			i=0;
			continue;
		case '>':
			rc= i ? (*r->handler)(str, i, r->voidarg):0;
			if (rc == 0)
				rc=(*r->handler)("&gt;", 4,
						 r->voidarg);
			++i;
			str += i;
			n -= i;
			*colcnt += i;
			i=0;
			if (rc)
				return (rc);
			continue;
		case ' ':
			rc= i ? (*r->handler)(str, i, r->voidarg):0;
			if (rc == 0)
			{
				if (mode == ALLOWALLSPC ||
				    (mode == ALLOWSPC &&
				     ( i == n || str[i+1] != ' ')))
					rc=(*r->handler)(" ", 1,
							 r->voidarg);
				else
					rc=(*r->handler)("&nbsp;", 6,
							 r->voidarg);
			}
			++i;
			str += i;
			n -= i;
			*colcnt += i;
			i=0;
			if (rc)
				return (rc);
			continue;
		case '\t':
			rc= i ? (*r->handler)(str, i, r->voidarg):0;
			*colcnt += i;

			do
			{
				if (rc == 0)
					rc=(*r->handler)("&nbsp;", 6,
							 r->voidarg);
				++*colcnt;
			} while ( (*colcnt % 8) != 0);

			++i;
			str += i;
			n -= i;
			i=0;
			if (rc)
				return (rc);
			continue;
		default:
			break;
		}
		++i;
	}
	return i ? (*r->handler)(str, i, r->voidarg):0;
}

/*
** Parse arbitrary flowed text-formatted line, convert it to html.
*/

int rfc2646tohtml_handler(struct rfc2646parser *p, int isflowed, void *vp)
{
	int  rc;
	const char *pfix;

	unsigned colcnt;
	struct rfc2646tohtml *r=(struct rfc2646tohtml *)vp;

	if ((pfix=rfc2646_getpfix(p, isflowed, r, 0)) == NULL)
		return -1;

	rc=0;

	if (*pfix)
		rc=(*r->handler)(pfix, strlen(pfix), r->voidarg);

	colcnt=0;

	if (rc == 0)
		rc=emit_html(r, p->line, strlen(p->line), &colcnt, ALLOWSPC);
	if (rc == 0)
		rc=(*r->handler)("\n", 1, r->voidarg);
	return (rc);
}

/*
** Enhanced formatting.
*/

static int fmtwikiline(struct rfc2646tohtml *r, const char *str,
		       size_t cnt)
{
	int rc=0;
	int isfixed=0;
	unsigned colcnt;
	size_t i, j;

	colcnt=0;

	if (cnt == 0)
		return 0;

	/*
	** Text that starts with a space, or which contains tabs, will
	** be wrapped inside a <tt>.
	*/

	if (*str == ' ')
		isfixed=1;
	else
	{
		size_t i;

		for (i=0; i<cnt; i++)
			if (str[i] == '\t')
			{
				isfixed=1;
				break;
			}
	}

	if (isfixed)
	{
		rc=(*r->handler)("<tt>", 4, r->voidarg);
		if (rc)
			return rc;

		while (cnt && *str == ' ')
		{
			++str;
			--cnt;

			rc=(*r->handler)("&nbsp;", 6, r->voidarg);
			if (rc)
				return rc;
		}

	}

	while (cnt && rc == 0)
	{
		for (i=0; i<cnt; i++)
		{
			if (str[i] == '\'' ||
			    str[i] == '_')
				break;

			if (i >= 7 && strncmp(str + i - 7,
					      "http://", 7) == 0)
				break;

			if (i >= 8 && strncmp(str + i - 8,
					      "https://", 8) == 0)
				break;
		}

		if (i+1<cnt && str[i] == '\'' && str[i+1] == '\'')
		{
			if (i+2 < cnt && str[i+2] == '\'')
			{
				j=i+3;
				while (j<cnt)
				{
					if (str[j] == '\'')
						break;
					++j;
				}
				if (cnt-j >= 3 &&
				    str[j] == '\'' &&
				    str[j+1] == '\'' &&
				    str[j+2] == '\'')
				{
					if (rc == 0 && i > 0)
						rc=emit_html(r, str, i,
							     &colcnt, ALLOWSPC);
					if (rc == 0)
						rc=(*r->handler)
							("<b>", 3, r->voidarg);
					if (rc == 0)
						rc=emit_html(r,
							     str+i+3, j-i-3,
							     &colcnt, ALLOWSPC);

					if (rc == 0)
						rc=(*r->handler)
							("</b>", 4,
							 r->voidarg);
					str += j+3;
					cnt -= j+3;
					continue;
				}
			}
			else
			{
				j=i+2;
				while (j<cnt)
				{
					if (str[j] == '\'')
						break;
					++j;
				}

				if (cnt-j >= 2 &&
				    str[j] == '\'' &&
				    str[j+1] == '\'')
				{
					if (rc == 0 && i >  0)
						rc=emit_html(r, str, i,
							     &colcnt, ALLOWSPC);
					if (rc == 0)
						rc=(*r->handler)
							("<i>", 3, r->voidarg);
					if (rc == 0)
						rc=emit_html(r,
							     str+i+2, j-i-2,
							     &colcnt, ALLOWSPC);
					if (rc == 0)
						rc=(*r->handler)
							("</i>", 4,
							 r->voidarg);
					str += j+2;
					cnt -= j+2;
					continue;
				}

			}
		}

		if (i < cnt && str[i] == '_' && (i == 0 || str[i-1] == ' '))
		{
			size_t j;

			for (j=i; j<cnt; j++)
				if (str[j] == ' ')
					break;

			if (str[j-1] == '_' && j - i > 2)
			{
				char *p;

				if (rc == 0 && i >  0)
					rc=emit_html(r, str, i,
						     &colcnt, ALLOWSPC);

				rc=(*r->handler)("<u>", 3, r->voidarg);


				p=malloc(j-i);
				if (!p)
					return -1;

				memcpy(p, str + i+1, j-i-2);
				p[j-i-2]=0;
				for (i=0; p[i]; ++i)
					if (p[i] == '_')
						p[i]=' ';

				if (rc == 0)
					rc=emit_html(r, p, i, &colcnt, ALLOWSPC);
				if (rc == 0)
					rc=(*r->handler)
						("</u>", 4,
						 r->voidarg);

				str += j;
				cnt -= j;
				continue;
			}
		}

		if (i >= 7 && strncmp(str + i - 7,
				      "http://", 7) == 0)
		{
			const char *q=skip_text_url(str+i, str+cnt);
			i -= 7;

			if (rc == 0 && i > 0)
				rc=emit_html(r, str, i,
					     &colcnt, ALLOWSPC);

			if (rc == 0)
				rc=(*r->handler)
					("<a href=\"", 9, r->voidarg);

			if (rc == 0 && i > 0)
				rc=emit_html(r, str+i, q-str-i,
					     &colcnt, ALLOWALLSPC);

			if (rc == 0)
				rc=(*r->handler)
					("\">", 2, r->voidarg);

			if (rc == 0 && i > 0)
				rc=emit_html(r, str+i, q-str-i,
					     &colcnt, ALLOWSPC);

			if (rc == 0)
				rc=(*r->handler)
					("</a>", 4, r->voidarg);

			i=q-str;

			str += i;
			cnt -= i;
			continue;
		}

		if (i >= 8 && strncmp(str + i - 8,
				      "https://", 8) == 0)
		{
			const char *q=skip_text_url(str+i, str+cnt);
			i -= 8;

			if (rc == 0 && i >  0)
				rc=emit_html(r, str, i,
					     &colcnt, ALLOWSPC);
			if (rc == 0)
				rc=(*r->handler)
					("<a href=\"", 9, r->voidarg);

			if (rc == 0 && i > 0)
				rc=emit_html(r, str+i, q-str-i,
					     &colcnt, ALLOWALLSPC);

			if (rc == 0)
				rc=(*r->handler)
					("\">", 2, r->voidarg);

			if (rc == 0 && i > 0)
				rc=emit_html(r, str+i, q-str-i,
					     &colcnt, ALLOWSPC);

			if (rc == 0)
				rc=(*r->handler)
					("</a>", 4, r->voidarg);

			i=q-str;

			str += i;
			cnt -= i;
			continue;
		}

		break;
	}

	if (cnt)
		rc=emit_html(r, str, cnt, &colcnt, ALLOWSPC);
	if (rc == 0 && isfixed)
		rc=(*r->handler)("</tt>", 5, r->voidarg);
	return rc;
}

/*
** Special lines.
*/

static int fmtwiki(struct rfc2646tohtml *r,
		   const char *pfix,
		   const char *str)
{
	int rc;

	if (*str == '=')
	{
		size_t i;
		const char *p;

		for (i=0; str[i] == '='; i++)
			;

		p=strchr(str+i, '=');

		if (p && i < 6)
		{
			size_t hlevel=i;

			while (i)
			{
				if (*p != '=')
					break;
				++p;
				--i;
			}

			while (*p == ' ')
				++p;
			if (*p == 0)
			{
				char nbuf[100];

				sprintf(nbuf, "%d", (int)hlevel);

				rc=(*r->handler)("<h", 2, r->voidarg);
				if (rc == 0)
					rc=(*r->handler)(nbuf, strlen(nbuf),
							 r->voidarg);
				if (rc == 0)
					rc=(*r->handler)(">", 1, r->voidarg);

				str += hlevel;
				while (*str == ' ')
					++str;

				p=strchr(str, '='); /* Must exist */
				while (p > str && p[-1] == ' ')
					--p;

				if (rc == 0)
					rc=fmtwikiline(r, str, p-str);
				if (rc == 0)
					rc=(*r->handler)("</h", 3, r->voidarg);
				if (rc == 0)
					rc=(*r->handler)(nbuf, strlen(nbuf),
							 r->voidarg);
				if (rc == 0)
					rc=(*r->handler)(">", 1, r->voidarg);
				r->prev_was_flowed=1;
				return rc;
			}
		}
	}

	rc=0;
	if (*pfix)
		rc=(*r->handler)(pfix, strlen(pfix), r->voidarg);

	if (rc == 0)
		rc=fmtwikiline(r, str, strlen(str));
	return rc;
}

/*
** Enhanced formatting of rfc2646 content.
*/

int rfc2646towiki_handler(struct rfc2646parser *p, int isflowed, void *vp)
{
	int  rc;
	struct rfc2646tohtml *r=(struct rfc2646tohtml *)vp;
	const char *pfix;

	if ((pfix=rfc2646_getpfix(p, isflowed, r, 1)) == NULL)
		return -1;

	if ((rc=fmtwiki(r, pfix, p->line)) != 0)
		return rc;
	if (rc == 0)
		rc=(*r->handler)("\n", 1, r->voidarg);
	return (rc);
}

int rfc2646tohtml_cleanup(struct rfc2646tohtml *p)
{
	while (p->current_quote_depth)
	{
		int rc=endquote(p);

		if (rc)
			return -1;
	}

	return endlist_all(p);
}

int rfc2646towiki_cleanup(struct rfc2646tohtml *p)
{
	return rfc2646tohtml_cleanup(p);
}
