#include <sys/types.h>

#include <ctype.h>
#include <err.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <time.h>
#include <unistd.h>

#include "json.h"
#include "util.h"

#define STRP(s) s,sizeof(s)-1

/* a tweet */
struct tweet {
	char fullname[1024];
	int  ispinned;
	char itemusername[1024];
	char itemfullname[1024];
	char full_text[4096];
	char username[1024];
	time_t timestamp;
	char datatime[16];
	char itemid[64];
	char retweetid[64];

	struct tweet *next;
};

struct replacement {
	char search[256];
	size_t search_len;
	char replace[1024];

	struct replacement *next;
};

static struct tweet *tweets, *tc;
static struct replacement *reps, *rc;
static char expanded_url[1024], media_url[1024], url[256];

#define MAX_PINNED 5
static char pinnedids[MAX_PINNED][64];
static size_t npinned;

long long
datetounix(long long year, int mon, int day, int hour, int min, int sec)
{
	static const int secs_through_month[] = {
		0, 31 * 86400, 59 * 86400, 90 * 86400,
		120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
		243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
	int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
	long long t;

	if (year - 2ULL <= 136) {
		leaps = (year - 68) >> 2;
		if (!((year - 68) & 3)) {
			leaps--;
			is_leap = 1;
		} else {
			is_leap = 0;
		}
		t = 31536000 * (year - 70) + 86400 * leaps;
	} else {
		cycles = (year - 100) / 400;
		rem = (year - 100) % 400;
		if (rem < 0) {
			cycles--;
			rem += 400;
		}
		if (!rem) {
			is_leap = 1;
		} else {
			if (rem >= 300)
				centuries = 3, rem -= 300;
			else if (rem >= 200)
				centuries = 2, rem -= 200;
			else if (rem >= 100)
				centuries = 1, rem -= 100;
			if (rem) {
				leaps = rem / 4U;
				rem %= 4U;
				is_leap = !rem;
			}
		}
		leaps += 97 * cycles + 24 * centuries - is_leap;
		t = (year - 100) * 31536000LL + leaps * 86400LL + 946684800 + 86400;
	}
	t += secs_through_month[mon];
	if (is_leap && mon >= 2)
		t += 86400;
	t += 86400LL * (day - 1);
	t += 3600LL * hour;
	t += 60LL * min;
	t += sec;

	return t;
}

/* parse time format: "Wed May 27 04:12:34 +0000 2020"
   assumes tz offset is "+0000" */
static int
parsetime(const char *s, time_t *tp)
{
	static char *mons[] = {
		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
	};
	int year, mon = 0, mday, hour, min, sec, i;
	char tzbuf[6], monbuf[4], wdaybuf[4];

	for (; *s && isspace((unsigned char)*s); s++)
		;
	i = sscanf(s, "%3s %3s %02d %02d:%02d:%02d %5s %4d",
	           wdaybuf, monbuf, &mday, &hour, &min, &sec, tzbuf, &year);
	if (i != 8)
		return -1;
	for (i = 0; i < sizeof(mons) / sizeof(mons[0]); i++) {
		if (!strcmp(mons[i], monbuf)) {
			mon = i + 1;
			break;
		}
	}
	if (mon == 0)
		return -1;

	/* invalid range */
	if (year < 0 || year > 9999 ||
	    mon < 1 || mon > 12 ||
	    mday < 1 || mday > 31 ||
	    hour < 0 || hour > 23 ||
	    min < 0 || min> 59 ||
	    sec < 0 || sec > 59)
		return -1;

	if (tp)
		*tp = datetounix(year - 1900, mon - 1, mday, hour, min, sec);
	return 0;
}

static void
printescape(const char *s)
{
	for (; *s; s++) {
		if (!iscntrl((unsigned char)*s))
			putchar(*s);
	}
}

/* print text and expand urls */
static void
printexpand(const char *s)
{
	struct replacement *r;

	for (; *s; s++) {
		if (isspace((unsigned char)*s)) {
			putchar(' ');
			continue;
		} else if (iscntrl((unsigned char)*s)) {
			continue;
		}
		for (r = reps; r; r = r->next) {
			if (!strncmp(s, r->search, r->search_len)) {
				s += r->search_len - 1;
				printescape(r->replace);
				break;
			}
		}
		if (!r)
			putchar(*s);
	}
}

static void
printtweet(struct tweet *t)
{
	if (t->timestamp != -1)
		printf("%lld", (long long)t->timestamp);
	putchar('\t');
	printescape(t->username);
	putchar('\t');
	printescape(t->fullname);
	putchar('\t');
	printexpand(t->full_text);
	putchar('\t');
	printescape(t->itemid);
	putchar('\t');
	if (t->itemusername[0])
		printescape(t->itemusername);
	else
		printescape(t->username);
	putchar('\t');
	if (t->itemfullname[0])
		printescape(t->itemfullname);
	else
		printescape(t->fullname);
	putchar('\t');
	printescape(t->retweetid);
	putchar('\t');
	printf("%d", t->ispinned);
	putchar('\n');
}

void
addpinned(const char *str)
{
	if (npinned + 1 >= MAX_PINNED)
		return;
	strlcpy(pinnedids[npinned], str, sizeof(pinnedids[0]));
	npinned++;
}

void
addtweet(void)
{
	struct tweet *t;

	if (!(t = calloc(1, sizeof(*t))))
		err(1, "calloc");
	t->timestamp = -1;
	if (tweets)
		tc = tc->next = t;
	else
		tweets = tc = t;
}

void
addreplacement(const char *search, const char *replace)
{
	struct replacement *r;

	for (r = reps; r; r = r->next) {
		if (!strncmp(search, r->search, r->search_len))
			return;
	}

	if (!(r = calloc(1, sizeof(*r))))
		err(1, "calloc");
	strlcpy(r->search, search, sizeof(r->search));
	r->search_len = strlen(r->search);
	strlcpy(r->replace, replace, sizeof(r->replace));

	if (reps)
		rc = rc->next = r;
	else
		reps = rc = r;
}

void
processnodes(struct json_node *nodes, size_t depth, const char *str)
{
	if (depth == 2 &&
	    nodes[0].type == JSON_TYPE_ARRAY &&
	    nodes[1].type == JSON_TYPE_OBJECT) {
		addtweet();
	}

	if (tc) {
		if (depth == 3 &&
		    nodes[0].type == JSON_TYPE_ARRAY &&
		    nodes[1].type == JSON_TYPE_OBJECT &&
		    nodes[2].type == JSON_TYPE_STRING) {
			if (!strcmp(nodes[2].name, "created_at")) {
				parsetime(str, &tc->timestamp);
			} else if (!strcmp(nodes[2].name, "id_str")) {
				strlcpy(tc->itemid, str, sizeof(tc->itemid));
			} else if (!strcmp(nodes[2].name, "full_text")) {
				/* if set by retweet text don't override */
				if (!tc->full_text[0])
					strlcpy(tc->full_text, str, sizeof(tc->full_text));
			}
		}
		if (depth == 4 &&
		    nodes[0].type == JSON_TYPE_ARRAY &&
		    nodes[1].type == JSON_TYPE_OBJECT &&
		    nodes[2].type == JSON_TYPE_OBJECT &&
		    !strcmp(nodes[2].name, "user")) {
			if (nodes[3].type == JSON_TYPE_STRING) {
				if (!strcmp(nodes[3].name, "name")) {
					strlcpy(tc->fullname, str, sizeof(tc->fullname));
				} else if (!strcmp(nodes[3].name, "screen_name")) {
					strlcpy(tc->username, str, sizeof(tc->username));
				}
			}
		}

		if (depth == 4 &&
		    nodes[0].type == JSON_TYPE_ARRAY &&
		    nodes[1].type == JSON_TYPE_OBJECT &&
		    nodes[2].type == JSON_TYPE_OBJECT &&
		    nodes[3].type == JSON_TYPE_STRING &&
		    !strcmp(nodes[2].name, "retweeted_status")) {
			if (!strcmp(nodes[3].name, "id_str")) {
				strlcpy(tc->retweetid, str, sizeof(tc->retweetid));
			} else if (!strcmp(nodes[3].name, "full_text")) {
				strlcpy(tc->full_text, str, sizeof(tc->full_text));
			}
		}

		if (depth == 5 &&
		    nodes[0].type == JSON_TYPE_ARRAY &&
		    nodes[1].type == JSON_TYPE_OBJECT &&
		    nodes[2].type == JSON_TYPE_OBJECT &&
		    nodes[3].type == JSON_TYPE_OBJECT &&
		    nodes[4].type == JSON_TYPE_STRING &&
		    !strcmp(nodes[2].name, "retweeted_status") &&
		    !strcmp(nodes[3].name, "user")) {
			if (!strcmp(nodes[4].name, "name")) {
				strlcpy(tc->itemfullname, str, sizeof(tc->itemfullname));
			} else if (!strcmp(nodes[4].name, "screen_name")) {
				strlcpy(tc->itemusername, str, sizeof(tc->itemusername));
			}
		}
	}

	if (depth == 5 &&
	    nodes[0].type == JSON_TYPE_ARRAY &&
	    nodes[1].type == JSON_TYPE_OBJECT &&
	    nodes[2].type == JSON_TYPE_OBJECT &&
	    !strcmp(nodes[2].name, "user")) {
		if (nodes[3].type == JSON_TYPE_ARRAY &&
		    !strcmp(nodes[3].name, "pinned_tweet_ids")) {
			if (nodes[4].type == JSON_TYPE_NUMBER) {
				addpinned(str);
			}
		}
	}

	if (depth == 6 &&
	    nodes[0].type == JSON_TYPE_ARRAY &&
	    nodes[1].type == JSON_TYPE_OBJECT &&
	    nodes[2].type == JSON_TYPE_OBJECT &&
	    nodes[3].type == JSON_TYPE_ARRAY &&
	    nodes[4].type == JSON_TYPE_OBJECT &&
	    nodes[5].type == JSON_TYPE_STRING &&
	    !strcmp(nodes[2].name, "entities") &&
	    !strcmp(nodes[3].name, "urls")) {
		if (!strcmp(nodes[5].name, "url")) {
			strlcpy(url, str, sizeof(url));
		} else if (!strcmp(nodes[5].name, "expanded_url")) {
			/* assumes "expanded_url" is specified after "url" */
			addreplacement(url, str);
			url[0] = '\0';
		}
	}

	/* [].extended_entities.media[].url */
	if (depth == 6 &&
	    nodes[0].type == JSON_TYPE_ARRAY &&
	    nodes[1].type == JSON_TYPE_OBJECT &&
	    nodes[2].type == JSON_TYPE_OBJECT &&
	    nodes[3].type == JSON_TYPE_ARRAY &&
	    nodes[4].type == JSON_TYPE_OBJECT &&
	    nodes[5].type == JSON_TYPE_STRING &&
	    !strcmp(nodes[2].name, "extended_entities") &&
	    !strcmp(nodes[3].name, "media")) {
		if (!strcmp(nodes[5].name, "media_url_https")) {
			strlcpy(media_url, str, sizeof(media_url));
		} else if (!strcmp(nodes[5].name, "url")) {
			strlcpy(url, str, sizeof(url));
		} else if (!strcmp(nodes[5].name, "expanded_url")) {
			strlcpy(expanded_url, str, sizeof(expanded_url));
		} else if (!strcmp(nodes[5].name, "type")) {
			if (!strcmp(str, "photo")) {
				addreplacement(url, media_url);
			} else {
				addreplacement(url, expanded_url);
			}
			media_url[0] = url[0] = expanded_url[0] = '\0';
		}
	}

	if (depth == 7 &&
	    nodes[0].type == JSON_TYPE_ARRAY &&
	    nodes[1].type == JSON_TYPE_OBJECT &&
	    nodes[2].type == JSON_TYPE_OBJECT &&
	    nodes[3].type == JSON_TYPE_OBJECT &&
	    nodes[4].type == JSON_TYPE_ARRAY &&
	    nodes[5].type == JSON_TYPE_OBJECT &&
	    nodes[6].type == JSON_TYPE_STRING &&
	    !strcmp(nodes[2].name, "retweeted_status") &&
	    !strcmp(nodes[3].name, "entities") &&
	    !strcmp(nodes[4].name, "urls")) {
		if (!strcmp(nodes[6].name, "url")) {
			strlcpy(url, str, sizeof(url));
		} else if (!strcmp(nodes[6].name, "expanded_url")) {
			addreplacement(url, str);
			url[0] = '\0';
		}
	}

	/* [].retweeted_status.extended_entities.media[].url */
	if (depth == 7 &&
	    nodes[0].type == JSON_TYPE_ARRAY &&
	    nodes[1].type == JSON_TYPE_OBJECT &&
	    nodes[2].type == JSON_TYPE_OBJECT &&
	    nodes[3].type == JSON_TYPE_OBJECT &&
	    nodes[4].type == JSON_TYPE_ARRAY &&
	    nodes[5].type == JSON_TYPE_OBJECT &&
	    nodes[6].type == JSON_TYPE_STRING &&
	    !strcmp(nodes[2].name, "retweeted_status") &&
	    !strcmp(nodes[3].name, "extended_entities") &&
	    !strcmp(nodes[4].name, "media")) {
		if (!strcmp(nodes[6].name, "media_url_https")) {
			strlcpy(media_url, str, sizeof(media_url));
		} else if (!strcmp(nodes[6].name, "url")) {
			strlcpy(url, str, sizeof(url));
		} else if (!strcmp(nodes[6].name, "expanded_url")) {
			strlcpy(expanded_url, str, sizeof(expanded_url));
		} else if (!strcmp(nodes[6].name, "type")) {
			if (!strcmp(str, "photo")) {
				addreplacement(url, media_url);
			} else {
				addreplacement(url, expanded_url);
			}
			media_url[0] = url[0] = expanded_url[0] = '\0';
		}
	}
}

int
main(void)
{
	struct tweet *t;
	size_t i;

	if (pledge("stdio", NULL) == -1)
		err(1, "pledge");

	if (parsejson(processnodes))
		errx(2, "invalid JSON");

	/* replace some HTML entities */
	addreplacement("&lt;", "<");
	addreplacement("&gt;", ">");
	addreplacement("&amp;", "&");

	for (t = tweets; t; t = t->next) {
		/* check for pinned tweets */
		for (i = 0; i < npinned; i++) {
			if (!strcmp(t->itemid, pinnedids[i])) {
				t->ispinned = 1;
				break;
			}
		}
		printtweet(t);
	}

	return 0;
}
