/*
** dkim-arparse.c - written in milano by vesely on 6mar2013
** collected mail parsing utilities
*/
/*
* zdkimfilter - Sign outgoing, verify incoming mail messages

Copyright (C) 2013-2022 Alessandro Vesely

This file is part of zdkimfilter

zdkimfilter is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

zdkimfilter is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License version 3
along with zdkimfilter.  If not, see <http://www.gnu.org/licenses/>.

Additional permission under GNU GPLv3 section 7:

If you modify zdkimfilter, or any covered part of it, by linking or combining
it with OpenSSL, OpenDKIM, Sendmail, or any software developed by The Trusted
Domain Project or Sendmail Inc., containing parts covered by the applicable
licence, the licensor of zdkimfilter grants you additional permission to convey
the resulting work.
*/

#include <config.h>
#if !ZDKIMFILTER_DEBUG
#define NDEBUG
#endif

#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <ctype.h>
#include "dkim-arparse.h"

char *skip_comment(char const *s)
{
	assert(s && *s == '(');
	
	int comment = 1;
	for (;;)
	{
		switch (*(unsigned char*)++s)
		{
			case 0:
				return NULL;

			case '(':
				++comment;
				break;

			case ')':
				if (--comment <= 0)
					return (char*)s;
				break;

			case '\\': // quoted pair, backslash cannot be last char
				++s;    // since there must be a newline anyway
				break;

			default:
				break;
		}
	}
}

char *skip_cfws(char const *s)
{
	while (s)
	{
		int ch;
		while (isspace(ch = *(unsigned char const*)s))
			++s;
		if (ch == '(')
		{
			if ((s = skip_comment(s)) != NULL)
			{
				assert(*s == ')');
				++s;
			}
		}
		else if (ch && !isspace(ch))
			break;
		else
			s = NULL;
	}
	return (char*)s;
}

static char *skip_digits(char const *s)
{
	while (s)
	{
		int ch;
		while (isdigit(ch = *(unsigned char const*)s))
			++s;
		if (ch)
			break;
		else
			s = NULL;
	}
	return (char*)s;
}


typedef struct token
{
	char const *p, *end;
	char *q;
	int end_delimiter;
} token;

static char* a_r_scan(token *tok, int joint)
/*
* Tokenize the string as a sequence of 0-terminated words and return the type.
* On entry p and q point to the input and output respectively.
* The entry value of q is the return value.
* On error return NULL (unbalanced parentheses, quotes, escape).
*/
{
	assert(tok && tok->p && tok->q);

	char const *p = skip_cfws(tok->p);
	char *q = tok->q, *entry = q;

	int ch = 0;

	while (p)
	{
		if (q + 4 > tok->end)
			return NULL;

		while (p && (ch = *(unsigned char*)p) != 0)
		{
			if (!isalnum(ch) && ch != '-' && ch < 0x80)
				break;

			*q++ = ch;
			++p;
		}

		if (isspace(ch) || ch == '(')
		{
			p = skip_cfws(p);
			ch = p? *(unsigned char*)p: 0;
		}

		if (ch == '.')
		{
			*q++ = ch;
			ch = *(unsigned char*)++p;
			if (isspace(ch) || ch == '(')
				p = skip_cfws(p);
		}
		else
			break;
	}

	if (joint == 0) // authserv-id
	{
		if (isdigit(ch))  // discard version
		{
			p = skip_digits(p);
			ch = p? *(unsigned char*)p: 0;
			if (isspace(ch) || ch == '(')
			{
				p = skip_cfws(p);
				ch = p? *(unsigned char*)p: 0;
			}
		}
	}
	else if (joint == '/')
	{
		if (ch == '/') // skip method version
		{
			ch = *(unsigned char*)++p;
			if (isspace(ch) || ch == '(')
			{
				p = skip_cfws(p);
				ch = p? *(unsigned char*)p: 0;
			}
			if (isdigit(ch))  // discard version
			{
				p = skip_digits(p);
				ch = p? *(unsigned char*)p: 0;
				if (isspace(ch) || ch == '(')
				{
					p = skip_cfws(p);
					ch = p? *(unsigned char*)p: 0;
				}
			}
		}
	}

	/*
	* Consume delimiter
	*/
	assert(p || ch == 0);
	if (ch)
	{
		if (ch == ';' || ch == '=')
			++p;
		else
			ch = ' ';
	}

	*q++ = 0;
	tok->q = q;
	tok->p = p;
	tok->end_delimiter = p? ch: 0;

	return entry;
}

int must_be_quoted(char const *s)
/*
* value := token / quoted-string
* token := 1*<any (US-ASCII) CHAR except SPACE, CTLs, or tspecials>
*/
{
	assert(s);
	int ch;
	while ((ch = *(unsigned char const*)s++) != 0)
		if (isspace(ch) || strchr("()<>@,;:\\\"/[]?=", ch) || iscntrl(ch))
			return 1;
	return 0;
}

static char* a_r_scan_value(token *tok)
/*
* Value has more possibilities, defined in RFC2045:
*
*   value := token / quoted-string
*
*   token := 1*<any (US-ASCII) CHAR except SPACE, CTLs,
*               or tspecials>
*
*   tspecials :=  "(" / ")" / "<" / ">" / "@" /
*                 "," / ";" / ":" / "\" / <">
*                 "/" / "[" / "]" / "?" / "="
*                 ; Must be in quoted-string,
*                 ; to use within parameter values
*
* Same rules as a_r_scan.
*/
{
	assert(tok && tok->p && tok->q);

	char const *p = skip_cfws(tok->p);
	char *q = tok->q + 1, *entry = q; // save a position, if must be quoted

	int quot = 0, escape = 0, ch = 0;

	while (p && (ch = *(unsigned char*)p++) != 0)
	{
		if (q + 4 > tok->end)
			return NULL;

		if (escape)
		{
			if (ch == '\\' || ch == '"')
				*q++ = '\\';
			*q++ = ch;
			escape = 0;
			continue;
		}

		if (quot)
		{
			switch (ch)
			{
				case '\\':
					escape = 1;
					continue;

				case '"':
					quot = 0;
					continue;

				default:
					*q++ = ch;
					continue;
			}
		}

		switch (ch)
		{
			case '\\':
				escape = 1;
				continue;

			case '"':
				quot = 1;
				continue;

			case '-': // valid in keywords
				*q++ = ch;
				continue;

			default:
				break;
		}

		if (isalnum(ch))
		{
			*q++ = ch; // valid in keyword
			continue;
		}

		if (!isspace(ch) && !iscntrl(ch) &&
			strchr("()<>,;:/[]?=", ch) == NULL)
		// tspecials except '@' which is valid, '\\' and '"'
		{
			*q++ = ch;
			continue;
		}

		if ((ch & 0x80) != 0) // assume utf-8 and accept
		{
			*q++ = ch; // valid in value
			continue;
		}

		break;
	}

	if (p)
		--p;
	assert(p == NULL || ch == *(unsigned char*)p);

	int seen_space = 0;
	if (isspace(ch) || ch == '(')
	{
		seen_space = 1;
		p = skip_cfws(p);
		ch = p? *(unsigned char*)p: 0;
	}

	/*
	* If the next value is a delimiter, use it.
	* Otherwise space or zero is the delimiter.
	*/
	if (ch)
	{
		if (ch == ';')
			++p;
		else if (seen_space)
			ch = ' ';
	}

	if (escape || quot)
		return NULL;

	*q = 0;
	if (must_be_quoted(entry))
	{
		*--entry = '"';
		*q = '"';
		*++q = 0;
	}
	tok->q = q + 1;
	tok->p = p;
	tok->end_delimiter = p? ch: 0;

	return entry;
}

static int advance_past_semicolon(token *tok)
/*
* If an error occurred, try recover further resinfo's
*/
{
	assert(tok && tok->p && tok->q);

	if (tok->q + 8 > tok->end)
		return -1;

	char const *p = skip_cfws(tok->p);
	int quot = 0, escape = 0, ch = 0;

	while (p && (ch = *(unsigned char*)p++) != 0)
	{
		if (escape)
		{
			escape = 0;
			continue;
		}

		if (quot)
		{
			switch (ch)
			{
				case '\\':
					escape = 1;
					continue;

				case '"':
					quot = 0;
					continue;

				default:
					continue;
			}
		}

		switch (ch)
		{
			case '\\':
				escape = 1;
				continue;

			case '"':
				quot = 1;
				continue;

			default:
				break;
		}

		if (isspace(ch) || ch == '(')
		{
			p = skip_cfws(p - 1);
			continue;
		}

		if (ch == ';')
		{
			tok->p = p;
			return 0;
		}
	}

	return -1;
}

static char const *a_r_error_msg[6] =
{
	"No error",
	"Scan error",
	"Name not followed by '='",
	"Resinfo or authserv-id not followed by ';' or not last",
	"Too many resinfo stanzas",
	"Unknown error"
};

char const *a_r_error(int err)
{
	if (err > 0 || err < -5)
		err = -6;
	return a_r_error_msg[-err];
}


int
a_r_parse(char const *a_r, int (*cb)(void*, int, name_val*, size_t), void *cbv)
/*
* Parse a_r, which should start with the authserv-id, and call back cb with
* arguments:
*
* 1 (void*) the cbv given on entry,
* 2 (int) -1 for authserv-id, 0, 1, ... for "resinfo" stanzas, rtc for last call
* 3 (name_val*) an array of name=value pairs, and
* 4 (size_t) the number of elements in the array.
*
* The version, if given in authserv-id and in name, is ignored.
* The value is null for authserv-id.  The array itself is null on the last call.
* On error, the last call gives -1 (scan error), -2 (name not followed by '='),
* -3 (resinfo not followed by ';' or not last), -4 (too many resinfo) or
* -5 (name not starting with alpha).
*/
{
	assert(a_r);
	assert(cb);

	size_t sz = 2*strlen(a_r);
	if (sz < 32)
		sz = 32;
	char *s = malloc(sz);

	name_val resinfo[16];
	int rtc = 0, hold_rtc = 0, final_rtc = 0;

	if (s == NULL) return -1;

	enum a_r_state {
		a_r_server,
		a_r_method,
		a_r_name,
		a_r_value
	} state = a_r_server;

	// Some of the elements that expect an '=' have an optional joint.
	static const int joint[] = {
		0,   // a_r_server, [CFWS] authserv-id [ CFWS authres-version ]
		'/', // a_r_method, Keyword [ [CFWS] "/" [CFWS] method-version ]
		'.', // a_r_name, ptype [CFWS] "." [CFWS] property [CFWS]
		'@'  //a_r_value, ( value / [ [ local-part ] "@" ] domain-name )
	};

	int count = 0; // call number
	size_t n = 0;
	token tok;
	memset(&tok, 0, sizeof tok);
	tok.p = a_r;
	tok.q = s;
	tok.end = s + sz;

	do
	{
		char *r = state == a_r_value? a_r_scan_value(&tok):
			a_r_scan(&tok, joint[state]);
		if (r == NULL || *r == 0)
		{
			hold_rtc = -1;
		}
		else switch(state)
		{
			case a_r_server:
				resinfo[0].name = r;
				resinfo[0].value = NULL;
				rtc = (*cb)(cbv, -1, resinfo, 1);
				if (tok.end_delimiter == ';')
					state = a_r_method;
				else
					hold_rtc = -3;
				break;

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough="
			case a_r_method:
				n = 0;
				if (count == 0 && strcasecmp(r, "none") == 0)
					break;

				/* else thru */

			case a_r_name:
				resinfo[n].name = r;
				state = a_r_value;
				if (tok.end_delimiter != '=')
					hold_rtc = -2;
				else if (!isalpha(*(unsigned char*)r))
					hold_rtc = -5;
				break;
#pragma GCC diagnostic pop

			case a_r_value:
				resinfo[n].value = r;
				++n;

				if (tok.end_delimiter == ' ')
					state = a_r_name;

				else if (tok.end_delimiter == ';' || tok.end_delimiter == 0)
				{
					state = a_r_method;
					rtc = (*cb)(cbv, count, resinfo, n);
					++count;
				}
				else
					hold_rtc = -3;
				break;

			default:
				assert(0);
				break;
		}

		if (hold_rtc)
		{
			final_rtc = hold_rtc;

			if (tok.p && advance_past_semicolon(&tok) == 0)
			{
				hold_rtc = 0;
				state = a_r_method;
			}
			else
				rtc = hold_rtc;
		}

	} while (rtc == 0 && tok.end_delimiter != 0 &&
		n < sizeof resinfo / sizeof resinfo[0]);

	if (rtc == 0 && tok.end_delimiter != 0) final_rtc = -4;
	rtc = (*cb)(cbv, rtc? rtc: final_rtc, NULL, 0); // last call

	free(s);
	return rtc;
}

#if defined TEST_MAIN
#include <stdio.h>
#include <errno.h>

static char *hdrval(const char *a, const char *b)
// b must be without trailing ':'
// return pointer after column if headers match, NULL otherwise
{
	assert(a && b && strchr(b, ':') == NULL);
	
	int c, d;
	do c = *(unsigned char const*)a++, d = *(unsigned char const*)b++;
	while (c != 0 && d != 0 && (c == d || tolower(c) == tolower(d)));
	
	if (d != 0 || c == 0)
		return NULL;

	while (c != ':')
		if (!isspace(c) || (c = *(unsigned char const*)a++) == 0)
			return NULL;

	return (char*)a;
}

typedef enum a_r_type
{
	plain_a_r,
	old_a_r,
	arc_a_r,
	not_a_r
} a_r_type;

typedef struct a_r_parm
{
	a_r_type type;
	int passed[3], failed[3], other[3];
} a_r_parm;

static int my_ar(void* parm, int which, name_val* nv, size_t count)
{
	if (nv == NULL)
		return which;

	if (which < 0)
		return 0;

	if (count)
	{
		a_r_parm *ar = parm;
		assert(ar->type < not_a_r);

		if (strcasecmp(nv->name, "dkim") == 0)
		{
			if (strcasecmp(nv->value, "pass") == 0)
				ar->passed[ar->type] += 1;
			else if (strcasecmp(nv->value, "fail") == 0)
				ar->failed[ar->type] += 1;
			else
				ar->other[ar->type] += 1;
		}
	}

	return 0;
}

int main(int argc, char *argv[])
{
	for (int i = 1; i < argc; ++i)
	{
		FILE *fp = fopen(argv[i], "r");
		if (fp == NULL)
		{
			printf("Cannot open %s: %s\n", argv[i], strerror(errno));
			continue;
		}

		char buf[8192];
		int lineno = 0;
		unsigned int keep = 0;

		a_r_parm arp;
		memset(&arp, 0, sizeof arp);

		for (;;)
		{
			char *p = fgets(&buf[keep], sizeof buf - keep, fp);
			char *eol = p? strchr(p, '\n'): NULL;
			++lineno;
			if (eol == NULL)
			{
				printf("skip %s at line %d\n", argv[i], lineno);
				break;
			}


			// Reading algorithm: the next char is read ahead,
			// thus the value to keep in the buffer is positive
			// except after reading the very first line.
			int const next = eol >= p? fgetc(fp): '\n';
			int const cont = next != EOF && next != '\n';
			char *const start = buf;

			if (cont && isspace(next)) // wrapped
			{
				*++eol = next;
				keep = eol + 1 - start;
				continue;
			}

			/*
			* full header, including trailing \n, is in buffer
			* process it.
			*/
			*eol = 0;
			char *s;

			if ((s = hdrval(start, "Authentication-Results")) != NULL)
			{
				arp.type = plain_a_r;
			}

			else if ((s = hdrval(start, "Old-Authentication-Results")) != NULL)
			{
				arp.type = old_a_r;
			}

			else if ((s = hdrval(start, "ARC-Authentication-Results")) != NULL)
			{
				int ch;
				while (isspace(ch = *(unsigned char*)s) || isdigit(ch) ||
					ch == 'i' || ch == '=')
						++s;
				if (ch == ';')
				{
					arp.type = arc_a_r;
					++s;
				}
				else
					arp.type = not_a_r;
			}

			else
				arp.type = not_a_r;

			if (arp.type < not_a_r)
			{
				int rtc = a_r_parse(s, &my_ar, &arp);
				if (rtc)
				{
					printf("a_r_parse returned %d in %s line %d\n",
						rtc, argv[i], lineno);
				}
			}

			if (!cont)
				break;

			start[0] = next;
			keep = 1;
		}

		fclose(fp);
		printf("%s\n\tplain: %2d %2d %2d\n\told:   %2d %2d %2d\n\tarc:   %2d %2d %2d\n\n",
			argv[i],
			arp.passed[plain_a_r], arp.failed[plain_a_r], arp.other[plain_a_r],
			arp.passed[old_a_r], arp.failed[old_a_r], arp.other[old_a_r],
			arp.passed[arc_a_r], arp.failed[arc_a_r], arp.other[arc_a_r]);
	}

	return 0;
}
#elif defined TEST_UTIL
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>

static int verbose = 0;

static int my_cb(void *v, int step, name_val* nv, size_t nv_count)
{
	(void)v;
	assert(v == NULL);

	if (verbose)
		printf("%3zu value%s at step %d\n", nv_count, nv_count > 1? "s": "", step);

	if (nv == NULL)
		return step;

	if (step < 0)
		printf("%s;\n", nv[0].name);
	else
	{
		for (size_t i = 0; i < nv_count; ++i)
		{
			if (verbose)
				printf(" name/value [%zu]: ", i);
			printf(" %s=%s", nv[i].name, nv[i].value);
			if (verbose)
				putchar('\n');
		}
		putchar('\n');
	}

	return 0;
}

int main(int argc, char *argv[])
{
	char *fname = NULL;
	int i, errs = 0, rtc = 1;

	for (i = 1; i < argc; ++i)
	{
		char *arg = argv[i];

		if (arg[0] == '-' && arg[1])
		{
			int ch;
			while ((ch = *++arg) != 0)
			{
				switch (ch)
				{
					case 'v':
						++verbose;
						break;

					default:
						fprintf(stderr, "Invalid arg[%d]: %s\n", i, argv[i]);
						++errs;
						break;
				}
			}
		}
		else if (fname == NULL)
			fname = arg;
		else
		{
			fprintf(stderr, "Unexpected arg[%d]: %s\n", i, argv[i]);
			++errs;
		}
	}

	if (errs == 0)
	{
		FILE *fp = fopen(fname, "r");
		if (fp == NULL)
			perror(fname);
		else
		{
			struct stat st;
			char *buf = NULL;
			size_t stin;
			if (fstat(fileno(fp), &st) == 0 &&
				st.st_size < 65535 &&
				(
					(S_ISREG(st.st_mode) &&
					(buf = malloc(st.st_size + 1)) != NULL &&
					fread(buf, stin = st.st_size, 1, fp) == 1)
					||
					(S_ISFIFO(st.st_mode) &&
					(buf = malloc(st.st_blksize)) != NULL &&
					(stin = fread(buf, 1, st.st_blksize, fp)) > 0)
				))
			{
				token tok;
				memset(&tok, 0, sizeof tok);
				tok.p = buf;
				buf[stin] = 0;
				if (verbose)
					printf("scanning %s\n-----\n", buf);

				rtc = a_r_parse(buf, &my_cb, NULL);
				if (verbose)
					printf("-----\na_r_parse returned %d\n", rtc);
			}

			free(buf);
			fclose(fp);
		}
	}

	return errs || rtc;
}
#endif
