/*****************************************************************************
 *  ENTROPY - emerging network to reduce orwellian potency yield
 *
 *  Copyright (C) 2002 Juergen Buchmueller <pullmoll@stop1984.com>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software Foundation,
 *  Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
 *
 *	$Id: sock.c,v 1.19 2005/10/06 12:17:06 pullmoll Exp $
 *****************************************************************************/
#include "osd.h"
#include "config.h"
#include "memalloc.h"
#include "sock.h"
#include "peer.h"
#include "logger.h"

#define	BWLIMIT_LOCK() osd_sem_wait(&g_peer->sem_bwlimit)
#define	BWLIMIT_UNLOCK() osd_sem_post(&g_peer->sem_bwlimit)
#define	SOCK_USLEEP	((int64_t)1000000*3/TICKS_PER_SEC)

#define	MINSIZE_MIN	16
#define	MINSIZE_MAX	64
#define	MAXSIZE_MIN	1024
#define	MAXSIZE_MAX	65536

#define	INVLIMIT	((uint32_t)-1)

/**
 * @brief Tell if an IP adress is from a local network
 *
 * Test the IP address addr for local network ranges,
 * and compare it agains my own address.
 * If it is either local or my own address, return
 * 1, otherwise return 0.
 *
 * @param addr internet address to test
 * @param myaddr my own internet address
 *
 * @result zero if not local IP, 1 if local IP
 */
int is_local_ip(uint32_t addr, uint32_t myaddr)
{
	if (addr == myaddr ||						/* my own address/32 */
		(addr & 0xff000000) == 0x00000000 ||	/* 0.0.0.0/8 */
		(addr & 0xff000000) == 0x7f000000 ||	/* 127.0.0.0/8 */
		(addr & 0xffff0000) == 0xa1fe0000 ||	/* 169.254.0.0/16 */
		(addr & 0xffffff00) == 0xc0000200 ||	/* 192.0.2.0/24 */
		(addr & 0xf0000000) == 0xe0000000 ||	/* 224.0.0.0/4 */
		(addr & 0xf0000000) == 0xf0000000 ||	/* 240.0.0.0/4 */
		(addr & 0xff000000) == 0x0a000000 ||	/* 10.0.0.0/8 */
		(addr & 0xfff00000) == 0xac100000 ||	/* 172.16.0.0/12 */
		(addr & 0xffff0000) == 0xc0a80000 ||	/* 192.168.0.0/16 */
		addr == 0xffffffff)						/* 255.255.255.255/32 */
		return 1;

	return 0;
}

/**
 * @brief Convert ASCII IP address and port to struct sockaddr_in fields
 *
 * Convert an ASCII IP address and port of the format "a.b.c.d:p"
 * to struct sockaddr_in fields sin_addr.s_addr and sin_port.
 *
 * @param dst pointer to struct sockaddr_in to receive the result
 * @param src pointer to NUL terminated string with ASCII IP address
 *
 * @result zero on success, -1 and errno = EINVAL on format error
 */
int sock_aton(struct sockaddr_in *dst, const char *src)
{
	const char *s = src;
	uint32_t addr = 0;
	uint16_t port = 0;
	uint32_t val = 0;
	int state = 0;
	FUN("sock_aton");

	memset(dst, 0, sizeof(*dst));
	dst->sin_family = AF_INET;

	while (*s) {
		switch (state) {
		case 0:
			if (*s >= '0' && *s <= '9') {
				val = val * 10 + *s - '0';
				if (val > 255) {
					LOGS(L_SOCK,L_ERROR,("invalid addr0 in '%s'\n", src));
					errno = EINVAL;
					return -1;
				}
			} else if (*s == '.') {
				LOGS(L_SOCK,L_DEBUG,("addr0: %#02x\n", val));
				addr = val << 24;
				val = 0;
				state = 1;
			} else {
				LOGS(L_SOCK,L_ERROR,("invalid char after addr0 '%s'\n", src));
				errno = EINVAL;
				return -1;
			}
			break;
		case 1:
			if (*s >= '0' && *s <= '9') {
				val = val * 10 + *s - '0';
				if (val > 255) {
					LOGS(L_SOCK,L_ERROR,("invalid addr1 in '%s'\n", src));
					errno = EINVAL;
					return -1;
				}
			} else if (*s == '.') {
				LOGS(L_SOCK,L_DEBUG,("addr1: %#02x\n", val));
				addr |= val << 16;
				val = 0;
				state = 2;
			} else {
				LOGS(L_SOCK,L_ERROR,("invalid char after addr1 '%s'\n", src));
				errno = EINVAL;
				return -1;
			}
			break;
		case 2:
			if (*s >= '0' && *s <= '9') {
				val = val * 10 + *s - '0';
				if (val > 255) {
					LOGS(L_SOCK,L_ERROR,("invalid addr2 in '%s'\n", src));
					errno = EINVAL;
					return -1;
				}
			} else if (*s == '.') {
				LOGS(L_SOCK,L_DEBUG,("addr2: %#02x\n", val));
				addr |= val << 8;
				val = 0;
				state = 3;
			} else {
				LOGS(L_SOCK,L_ERROR,("invalid char after addr3 '%s'\n", src));
				errno = EINVAL;
				return -1;
			}
			break;
		case 3:
			if (*s >= '0' && *s <= '9') {
				val = val * 10 + *s - '0';
				if (val > 255) {
					LOGS(L_SOCK,L_ERROR,("invalid addr3 in '%s'\n", src));
					errno = EINVAL;
					return -1;
				}
			} else if (*s == ':') {
				LOGS(L_SOCK,L_DEBUG,("addr3: %#02x\n", val));
				addr |= val;
				val = 0;
				state = 4;
			} else {
				LOGS(L_SOCK,L_ERROR,("invalid char after addr3 '%s'\n", src));
				errno = EINVAL;
				return -1;
			}
			break;
		case 4:
			if (*s >= '0' && *s <= '9') {
				val = val * 10 + *s - '0';
				if (val > 65535) {
					LOGS(L_SOCK,L_ERROR,("invalid port in '%s'\n", src));
					errno = EINVAL;
					return -1;
				}
			} else {
				LOGS(L_SOCK,L_ERROR,("invalid char after port '%s'\n", src));
				errno = EINVAL;
				return -1;
			}
			break;
		default:
			LOGS(L_SOCK,L_ERROR,("invalid state %d '%s'\n", state, src));
			errno = EINVAL;
			return -1;
		}
		s++;
	}

	LOGS(L_SOCK,L_DEBUG,("port: %#04x\n", val));
	port = val;
	val = 0;
	dst->sin_addr.s_addr = htonl(addr);
	dst->sin_port = htons(port);
	return 0;
}

/**
 * @brief Convert struct sockaddr_in fields to ASCII IP address
 *
 * Convert sockaddr_in fields to an ASCII IP address string of
 * the format "a.b.c.d:p", i.e. dotted quad and colon separated
 * port number.
 *
 * @param src pointer to a struct sockaddr_in to convert
 *
 * @result pointer to a string containing the ASCII IP address
 */
const char *sock_ntoa(const struct sockaddr_in *src)
{
#undef	NUMBUF
#define	NUMBUF	32
	static char *buff[NUMBUF] = {
		NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
		NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
		NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
		NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
	};
	static int which = 0;
	uint32_t addr = ntohl(src->sin_addr.s_addr);
	uint16_t port = ntohs(src->sin_port);
	int len;
	FUN("sock_ntoa");

	which = (which + 1) % NUMBUF;
	xfree(buff[which]);

	len = pm_asprintf(&buff[which], "%u.%u.%u.%u:%u",
		(addr >> 24) & 0xff,
		(addr >> 16) & 0xff,
		(addr >> 8) & 0xff,
		addr & 0xff,
		port);

	if (len <= 0) {
		LOGS(L_SOCK,L_ERROR,("number conversion failed (%s)\n",
			strerror(errno)));
		return NULL;
	}

	return buff[which];
}

const char *conn_status_str(int status)
{
	switch (status) {
	case CONN_CLOSED:
		return "closed";
	case CONN_DNSLOOKUP:
		return "DNS lookup";
	case CONN_CONNECTING:
		return "connecting";
	case CONN_ACCEPTING:
		return "accepting";
	case CONN_CONNECTED:
		return "connected";
	case CONN_SELECTERROR:
		return "select error";
	case CONN_EXCEPTION:
		return "exception";
	case CONN_READERROR:
		return "read error";
	case CONN_WRITEERROR:
		return "write error";
	case CONN_EOF:
		return "end of file";
	case CONN_SHUTDOWN:
		return "shutdown";
	case CONN_CLOSING:
		return "closing";
	}
	return "unknown";
}

void conn_status_change(conn_t *conn, int status)
{
	FUN("conn_status_change");

	LOGS(L_SOCK,L_MINOR,("sk %d status %s -> %s\n",
		conn->socket, conn_status_str(conn->status), conn_status_str(status)));
	conn->status = status;
}

int sock_outgoing(conn_t *conn, const char *hostname, int port, int limited)
{
	struct in_addr addr;
	int sk, connected;
	struct hostent *host;
	struct linger li;
	osd_socklen_t len;
	int e, rc = 0;
	FUN("sock_outgoing");

	if (port < 1 || port > 65535) {
		LOGS(L_SOCK,L_ERROR,("invalid port %d for %s\n",
			port, hostname));
		errno = EINVAL;
		return -1;
	}

	conn_status_change(conn, CONN_DNSLOOKUP);
	addr.s_addr = inet_addr(hostname);
	if (addr.s_addr == (uint32_t)-1) {
		LOGS(L_SOCK,L_DEBUG,("'%s' is a hostname?\n", hostname));
		/* inet_addr failed, probably a hostname */
		host = gethostbyname(hostname);
		if (host == NULL) {
			LOGS(L_SOCK,L_ERROR,("hostname '%s' not found (%s)\n",
				hostname, hstrerror(h_errno)));
			return -1;
		}
		addr.s_addr = ((struct in_addr *) *host->h_addr_list)->s_addr;
		LOGS(L_SOCK,L_DEBUG,("'%s' is %s\n",
			hostname, inet_ntoa(addr)));
	}

	memset(&conn->address, 0, sizeof(conn->address));
	conn->address.sin_family = AF_INET;
	conn->address.sin_port = htons(port);
	conn->address.sin_addr.s_addr = addr.s_addr;
	pm_snprintf(conn->peername, sizeof(conn->peername), "%s",
		sock_ntoa(&conn->address));

	sk = osd_socket(AF_INET, SOCK_STREAM, PF_UNSPEC);
	if (sk < 0) {
		LOGS(L_SOCK,L_ERROR,("osd_socket(AF_INET,SOCK_STREAM,PF_UNSPEC) failed (%s)\n",
			strerror(errno)));
		conn->socket = -1;
		return -1;
	}
	LOGS(L_SOCK,L_DEBUG,("sk %d\n", sk));

	conn_status_change(conn, CONN_CONNECTING);
	connected = connect(sk,
		(struct sockaddr*) &conn->address, sizeof(conn->address));
	if (connected < 0) {
		e = errno;
		LOGS(L_SOCK,L_MINOR,("connect(%s) failed (%s)\n", 
			sock_ntoa(&conn->address),
			strerror(errno)));
		osd_closesocket(sk);
		errno = e;
		return -1;
	}

	/* for logging purposes buffer the dotted quad IP address and port */
	pm_snprintf(conn->peeraddr, sizeof(conn->peeraddr), "%s",
		sock_ntoa(&conn->address));

	rc = osd_fcntl3(sk, F_SETFL, O_NONBLOCK);
	if (-1 == rc) {
		LOGS(L_SOCK,L_ERROR,("osd_fcntl3(%d,...) call failed (%s)\n", 
			sk, strerror(errno)));
	}

	li.l_onoff = 0;
	li.l_linger = 0;
	rc = setsockopt(sk, SOL_SOCKET, SO_LINGER, (void *)&li, sizeof(li));
	if (-1 == rc) {
		LOGS(L_SOCK,L_ERROR,("setsockopt(%d,...,SO_LINGER) call failed (%s)\n",
			sk, strerror(errno)));
	}

#ifdef	__CYGWIN__
	len = 0;
	conn->maxsize = MAXSIZE_MAX;
	conn->minsize = MINSIZE_MIN;
#else
#ifdef	SO_SNDBUF
	len = sizeof(conn->maxsize);
	rc = getsockopt(sk, SOL_SOCKET, SO_SNDBUF,
		(void *)&conn->maxsize, &len);
	if (-1 == rc || 0 == len) {
		LOGS(L_SOCK,L_DEBUG,("getsockopt(%d,...,SO_SNDBUF) call failed (%s)\n",
			sk, strerror(errno)));
		conn->maxsize = MAXSIZE_MAX;
		rc = 0;
	}
#else
	conn->maxsize = MAXSIZE_MAX;
#endif

#ifdef	SO_SNDLOWAT
	len = sizeof(conn->minsize);
	rc = getsockopt(sk, SOL_SOCKET, SO_SNDLOWAT,
		(void *)&conn->minsize, &len);
	if (-1 == rc || 0 == len) {
		LOGS(L_SOCK,L_DEBUG,("getsockopt(%d,...,SO_SNDLOWAT) call failed (%s)\n",
			sk, strerror(errno)));
		conn->minsize = MINSIZE_MIN;
		rc = 0;
	}
#else
	conn->minsize = MINSIZE_MIN;
#endif
#endif
	/* use sane value in case getsockopt() failed */
	if (conn->maxsize < MAXSIZE_MIN)
		conn->maxsize = MAXSIZE_MIN;
	if (conn->maxsize > MAXSIZE_MAX)
		conn->maxsize = MAXSIZE_MAX;
	/* reduce bwlimit overhead: at least MINSIZE_MIN bytes */
	if (conn->minsize < MINSIZE_MIN)
		conn->minsize = MINSIZE_MIN;
	if (conn->minsize > MINSIZE_MAX)
		conn->minsize = MINSIZE_MAX;

	LOGS(L_SOCK,L_MINOR,("socket sendspace maxsize %d, minsize %d\n",
		conn->maxsize, conn->minsize));

	conn->socket = sk;
	conn->last_io = time(NULL);
#if	TICKS_PER_SEC
	conn->limited =
		is_local_ip(
			ntohl(addr.s_addr),
			ntohl(g_conf->node.sin_addr.s_addr)) ?
			0 : limited;
#else
	(void)limited;
	conn->limited = 0;
#endif
	conn->in_total = 0;
	conn->out_total = 0;
	conn_status_change(conn, CONN_CONNECTED);
	LOGS(L_SOCK,L_DEBUG,("connected to %s:%d\n", hostname, port));

	return 0;
}

int sock_incoming(const char *hostname, int port, int limited,
	void (*child)(conn_t *conn), int (*post_accept)(void),
	int niceness, const char *listener)
{
	char forkinfo[MAXPATHLEN];
	struct in_addr addr;
	int sk, sk2;
	struct hostent *host;
	int reuse_addr = 1;
	struct sockaddr_in listening;
	time_t starttime;
	osd_socklen_t len;
	int e, rc = 0;
	FUN("sock_incoming");

	LOGS(L_SOCK,L_DEBUG,("%s:%d\n", hostname, port));
	if (port < 1 || port > 65535) {
		LOGS(L_SOCK,L_ERROR,("invalid port %d\n", port));
		errno = EINVAL;
		return -1;
	}

	addr.s_addr = inet_addr(hostname);
	if (addr.s_addr == (uint32_t)-1) {
		LOGS(L_SOCK,L_DEBUG,("'%s' is a hostname?\n", hostname));
		/* inet_addr failed, probably a hostname */
		host = gethostbyname(hostname);
		if (host == NULL) {
			LOGS(L_SOCK,L_ERROR,("hostname '%s' not found (%s)\n",
				hostname, hstrerror(h_errno)));
			return -1;
		}
		addr.s_addr = ((struct in_addr *) *host->h_addr_list)->s_addr;
		LOGS(L_SOCK,L_DEBUG,("'%s' is %s\n",
			hostname, inet_ntoa(addr)));
	}

	memset(&listening, 0, sizeof(listening));
	listening.sin_family = AF_INET;
	listening.sin_port = htons(port);
	listening.sin_addr.s_addr = addr.s_addr;

	sk2 = osd_socket(AF_INET, SOCK_STREAM, PF_UNSPEC);
	if (sk2 < 0) {
		LOGS(L_SOCK,L_ERROR,("osd_socket(AF_INET, SOCK_STREAM, PF_UNSPEC) call failed (%s)\n",
			strerror(errno)));
		return -1;
	}
	LOGS(L_SOCK,L_DEBUG,("sk2 %d\n", sk2));

	if (-1 == setsockopt(sk2, SOL_SOCKET, SO_REUSEADDR,
		(void *)&reuse_addr, sizeof(reuse_addr))) {
		LOGS(L_SOCK,L_ERROR,("setsockopt(%d, SOL_SOCKET, SO_REUSEADDR, %p, %u) call failed (%s)\n",
			sk2, &reuse_addr, (unsigned)sizeof(reuse_addr),
			strerror(errno)));
	}

	if (bind(sk2, (struct sockaddr *)&listening,
		sizeof(listening)) < 0) {
		e = errno;
		LOGS(L_SOCK,L_ERROR,("bind(%d/%s) call failed (%s)\n",
			listening.sin_family,
			sock_ntoa(&listening),
			strerror(errno)));
		osd_closesocket(sk2);
		errno = e;
		return -1;
	}

	starttime = time(NULL);
	LOGS(L_SOCK,L_ERROR,("listening on %s\n",
		sock_ntoa(&listening)));

	if (-1 == listen(sk2, g_conf->backlog)) {
		e = errno;
		LOGS(L_SOCK,L_ERROR,("listen(%d,%d) call failed (%s)\n",
			sk2, g_conf->backlog, strerror(errno)));
		osd_closesocket(sk2);
		errno = e;
		return -1;
	}

	for (;;) {
		conn_t connection, *conn = &connection;
		struct linger li;

		memset(conn, 0, sizeof(*conn));
		conn_status_change(conn, CONN_ACCEPTING);
		len = sizeof(conn->address);
		sk = accept(sk2, (struct sockaddr *)&conn->address, &len);
		if (sk < 0) {
			LOGS(L_SOCK,L_ERROR,("accept() call failed (%s)\n",
				strerror(errno)));
			continue;
		}
		/* for logging purposes buffer the dotted quad IP address and port */
		pm_snprintf(conn->peeraddr, sizeof(conn->peeraddr), "%s",
			sock_ntoa(&conn->address));
		LOGS(L_SOCK,L_DEBUG,("accepted connection on sk %d from %s\n",
			sk, conn->peeraddr));

		pm_snprintf(forkinfo, sizeof(forkinfo), "%s %s",
			listener, conn->peeraddr);
		switch (osd_fork2(forkinfo, niceness, sk)) {
		case -1:
			LOGS(L_SOCK,L_ERROR,("osd_fork2('%s',%d) call failed (%s)\n",
				forkinfo, g_conf->niceness, strerror(errno)));
			osd_closesocket(sk2);
			sock_shutdown(conn);
			osd_exit(-1);
		case 0:
			LOGS(L_SOCK,L_DEBUG,(">>>> '%s' child is here; sk:%d\n",
				forkinfo, sk));
			conn->last_io = time(NULL);
			conn->socket = sk;
			conn_status_change(conn, CONN_CONNECTED);

			rc = osd_fcntl3(sk, F_SETFL, O_NONBLOCK);
			if (-1 == rc) {
				LOGS(L_SOCK,L_ERROR,("osd_fcntl3(%d,...) call failed (%s)\n", 
					sk, strerror(errno)));
			}
			li.l_onoff = 0;
			li.l_linger = 0;

			rc = setsockopt(sk, SOL_SOCKET, SO_LINGER,
				(void *)&li, sizeof(li));
			if (-1 == rc) {
				LOGS(L_SOCK,L_ERROR,("setsockopt(%d,...,SO_LINGER) call failed (%s)\n",
					sk, strerror(errno)));
			}

#ifdef	__CYGWIN__
			len = 0;
			conn->maxsize = MAXSIZE_MAX;
			conn->minsize = MINSIZE_MIN;
#else

#ifdef	SO_RCVBUF
			len = sizeof(conn->maxsize);
			rc = getsockopt(sk, SOL_SOCKET, SO_RCVBUF,
				(void *)&conn->maxsize, &len);
			if (-1 == rc || 0 == len) {
				LOGS(L_SOCK,L_DEBUG,("getsockopt(%d,...,SO_RCVBUF) call failed (%s)\n",
					sk, strerror(errno)));
				conn->maxsize = MAXSIZE_MAX;
				rc = 0;
			}
#else
			conn->maxsize = MAXSIZE_MAX;
#endif

#ifdef	SO_RCVLOWAT
			len = sizeof(conn->minsize);
			rc = getsockopt(sk, SOL_SOCKET, SO_RCVLOWAT,
				(void *)&conn->minsize, &len);
			if (-1 == rc || 0 == len) {
				LOGS(L_SOCK,L_DEBUG,("getsockopt(%d,...,SO_RCVLOWAT) call failed (%s)\n",
					sk, strerror(errno)));
				rc = 0;
				conn->minsize = MINSIZE_MIN;
			}
#else
			conn->minsize = MINSIZE_MIN;
#endif
#endif
			/* use sane value in case getsockopt() failed */
			if (conn->maxsize < MAXSIZE_MIN)
				conn->maxsize = MAXSIZE_MIN;
			if (conn->maxsize > MAXSIZE_MAX)
				conn->maxsize = MAXSIZE_MAX;
			if (conn->minsize < MINSIZE_MIN)
				conn->minsize = MINSIZE_MIN;
			if (conn->minsize > MINSIZE_MAX)
				conn->minsize = MINSIZE_MAX;
			LOGS(L_SOCK,L_MINOR,("socket recvspace maxsize %d, minsize %d\n",
				conn->maxsize, conn->minsize));

#if	TICKS_PER_SEC
			conn->limited = is_local_ip(
				ntohl(conn->address.sin_addr.s_addr),
				ntohl(g_conf->node.sin_addr.s_addr)) ?
					0 : limited;
#else
			(void)limited;
			conn->limited = 0;
#endif
			LOGS(L_SOCK,L_DEBUG,(">>>> '%s' calling child\n",
				forkinfo));
			(void)(*child)(conn);
			LOGS(L_SOCK,L_DEBUG,(">>>> '%s' child is back? - did not exit\n",
				forkinfo));
			osd_exit(rc);
		default:
			LOGS(L_SOCK,L_DEBUG,(">>>> '%s' parent is back\n", forkinfo));
			osd_closesocket(sk);
			rc = 0;
			/* If the caller wants to sleep (or whatever) after each accept */
			if (NULL != post_accept)
				rc = (*post_accept)();
			if (0 != rc) {
				LOGS(L_SOCK,L_NORMAL,("post_accept() returned %d\n", rc));
				osd_closesocket(sk2);
				osd_exit(0);
			}
		}
	}

	osd_closesocket(sk2);
	return 0;
}

#if	TICKS_PER_SEC
/**
 * @brief Acquire some amount of inbound bandwidth
 *
 * Try to acquire some amount of inbound bandwidth,
 * up to limit bytes (per second), from the current
 * g_peer inbound bandwidth limit.
 *
 * @param conn pointer to the connection context
 * @param limit number of bytes we want to receive
 *
 * @result >0 on success, =0 no bandwidth, INVLIMIT on error (errno = reason)
 */
uint32_t sock_acquire_in(conn_t *conn, uint32_t limit)
{
	uint32_t clip;
	FUN("sock_acquire_in");

	if (NULL == g_peer || 0 == conn->limited)
		return limit;

	if (limit > g_conf->bwlimit_in)
		limit = g_conf->bwlimit_in;
	if (limit > conn->maxsize)
		limit = conn->maxsize;
	clip = conn->minsize;
	LOGS(L_SOCK,L_DEBUG,("avail:%u limit:%u clip:%u\n",
		g_peer->in.limit, limit, clip));
	if (0 != BWLIMIT_LOCK()) {
		errno = EINVAL;
		return INVLIMIT;
	}
	while (g_peer->in.limit < clip) {
		BWLIMIT_UNLOCK();
		if (-1 == conn->socket || CONN_CONNECTED != conn->status) {
			LOGS(L_SOCK,L_ERROR,("socket was shut down\n"));
			errno = ENOTSOCK;
			return INVLIMIT;
		}
		osd_usleep(SOCK_USLEEP);
		if (0 != BWLIMIT_LOCK()) {
			errno = EINVAL;
			return INVLIMIT;
		}
	}
	/* Broke out of the loop, so we have some bandwidth */
	if (g_peer->in.limit > conn->minsize) {
		/*
		 * Take 1/16th of it, so we could serve 16 sockets
	 	 * at least a little bit
		 */
		clip = (uint32_t)(g_peer->in.limit / 16);
	} else {
		/* Take half of the remaining */
		clip = g_peer->in.limit / 2;
	}
	/* No less than 16, though */
	if (clip < 16 && g_peer->in.limit >= 16)
		clip = (uint32_t)g_peer->in.limit;
	/* Wanted more than available? */
	if (limit > clip)
		limit = clip;
	/* limit < 1? better try again... */
	if (limit < 1) {
		BWLIMIT_UNLOCK();
		osd_usleep(SOCK_USLEEP);
		return 0;
	}
	LOGS(L_SOCK,L_DEBUG,("avail:%u use:%u\n",
		g_peer->in.limit, limit));
	g_peer->in.limit -= limit;
	BWLIMIT_UNLOCK();
	return limit;
}

/**
 * @brief Give back the unused amount of inbound bandwidth
 *
 * After a call to osd_recv(), the amount of data actually
 * read might be smaller than what we would have allowed.
 * Give back the remaining bandwidth to the g_peer->in.limit
 * for a bandwidth limited connection.
 * Make sure we don't exceed the per tick limit, though.
 *
 * @param conn pointer to the connection context
 * @param count number of bytes actually received
 * @param limit number of bytes that were allowed
 *
 * @result zero on success, -1 and errno = EINVAL if locking failed
 */
int sock_give_back_in(conn_t *conn, ssize_t count, uint32_t limit)
{
	FUN("sock_give_back_in");

	if (0 != conn->limited) {
		if (0 != BWLIMIT_LOCK()) {
			errno = EINVAL;
			return -1;
		}
		g_peer->in.used[59] += count;
		if (count < (ssize_t)limit) {
			/* give back unused bandwidth */
			g_peer->in.limit += limit - count;
			/* but don't exceed the per tick limit */
			if (g_peer->in.limit > g_conf->bwlimit_in / TICKS_PER_SEC)
				g_peer->in.limit = g_conf->bwlimit_in / TICKS_PER_SEC;
		}
		BWLIMIT_UNLOCK();
	}
	return 0;
}

/**
 * @brief Acquire some amount of inbound bandwidth
 *
 * Try to acquire some amount of inbound bandwidth,
 * up to limit bytes (per second), from the current
 * g_peer inbound bandwidth limit.
 *
 * @param conn pointer to the connection context
 * @param limit number of bytes we want to receive
 *
 * @result >0 on success, =0 no bandwidth, INVLIMIT on error (errno = reason)
 */
uint32_t sock_acquire_out(conn_t *conn, uint32_t limit)
{
	uint32_t clip;
	FUN("sock_acquire_out");

	if (NULL == g_peer || 0 == conn->limited)
		return limit;

	if (limit > g_conf->bwlimit_out)
		limit = g_conf->bwlimit_out;
	if (limit > conn->maxsize)
		limit = conn->maxsize;
	clip = conn->minsize;
	LOGS(L_SOCK,L_DEBUG,("avail:%u limit:%u clip:%u\n",
		g_peer->out.limit, limit, clip));
	if (0 != BWLIMIT_LOCK()) {
		errno = EINVAL;
		return INVLIMIT;
	}
	while (g_peer->out.limit < clip) {
		BWLIMIT_UNLOCK();
		if (-1 == conn->socket || CONN_CONNECTED != conn->status) {
			LOGS(L_SOCK,L_ERROR,("socket was shut down\n"));
			errno = ENOTSOCK;
			return INVLIMIT;
		}
		osd_usleep(SOCK_USLEEP);
		if (0 != BWLIMIT_LOCK()) {
			errno = EINVAL;
			return INVLIMIT;
		}
	}
	/* Broke out of the loop, so we have some bandwidth */
	if (g_peer->out.limit > conn->minsize) {
		/*
		 * Take 1/16th of it, so we could serve 16 sockets
	 	 * at least a little bit
		 */
		clip = (uint32_t)(g_peer->out.limit / 16);
	} else {
		/* Take half of the remaining */
		clip = g_peer->out.limit / 2;
	}
	/* No less than 16, though */
	if (clip < 16 && g_peer->out.limit >= 16)
		clip = (uint32_t)g_peer->out.limit;
	/* Wanted more than available? */
	if (limit > clip)
		limit = clip;
	/* limit < 1? better try again... */
	if (limit < 1) {
		BWLIMIT_UNLOCK();
		osd_usleep(SOCK_USLEEP);
		return 0;
	}
	LOGS(L_SOCK,L_DEBUG,("avail:%u use:%u\n",
		g_peer->out.limit, limit));
	g_peer->out.limit -= limit;
	BWLIMIT_UNLOCK();
	return limit;
}

/**
 * @brief Give back the unused amount of outbound bandwidth
 *
 * After a call to osd_send(), the amount of data actually
 * written might be smaller than what we would have allowed.
 * Give back the remaining bandwidth to the g_conf->out.limit
 * for a bandwidth limited connection.
 * Make sure we don't exceed the per tick limit, though.
 *
 * @param conn pointer to the connection context
 * @param count number of bytes actually received
 * @param limit number of bytes that were allowed
 *
 * @result zero on success, -1 and errno = EINVAL if locking failed
 */
int sock_give_back_out(conn_t *conn, ssize_t count, uint32_t limit)
{
	FUN("sock_give_back_out");

	if (0 != conn->limited) {
		if (0 != BWLIMIT_LOCK()) {
			errno = EINVAL;
			return -1;
		}
		g_peer->out.used[59] += count;
		if (count < (ssize_t)limit) {
			/* give back unused bandwidth */
			g_peer->out.limit += limit - count;
			/* but don't exceed the per tick limit */
			if (g_peer->out.limit > g_conf->bwlimit_out / TICKS_PER_SEC)
				g_peer->out.limit = g_conf->bwlimit_out / TICKS_PER_SEC;
		}
		BWLIMIT_UNLOCK();
	}
	return 0;
}

#else	/* !TICKS_PER_SEC */

/**
 * @brief Dummy if bandwidth limiting is disabled
 */
int32_t sock_acquire_in(conn_t *conn, int32_t limit)
{
	(void)conn;
	return limit;
}

/**
 * @brief Dummy if bandwidth limiting is disabled
 */
int sock_give_back_in(conn_t *conn, ssize_t count, uint32_t limit)
{
	(void)conn;
	(void)count;
	(void)limit;
	return 0;
}

/**
 * @brief Dummy if bandwidth limiting is disabled
 */
int32_t sock_acquire_out(conn_t *conn, int32_t limit)
{
	(void)conn;
	return limit;
}

/**
 * @brief Dummy if bandwidth limiting is disabled
 */
int sock_give_back_out(conn_t *conn, ssize_t count, uint32_t limit)
{
	(void)conn;
	(void)count;
	(void)limit;
	return 0;
}
#endif	/* !TICKS_PER_SEC */

int sock_readall(conn_t *conn, void *buffer, size_t size)
{
	struct timeval t0, t1;
	char *buff = (char *)buffer;
	size_t offs = 0;
	int e;
	FUN("sock_readall");

	LOGS(L_SOCK,L_DEBUG,("reading %#x bytes\n",
		(unsigned)size));

	gettimeofday(&t0, NULL);
	while (offs < size) {
		ssize_t count;
		uint32_t limit;
		int sk;

		sk = conn->socket;
		if (-1 == sk || CONN_CONNECTED != conn->status) {
			LOGS(L_SOCK,L_ERROR,("socket was shut down\n"));
			errno = ENOTSOCK;
			return -1;
		}

		/* acquire some of the inbound bandwidth */
		limit = size - offs;
		limit = sock_acquire_in(conn, limit);
		if (INVLIMIT == limit) {
			e = errno;
			LOGS(L_SOCK,L_ERROR,("error acquiring bandwidth (%s)\n",
				strerror(errno)));
			errno = e;
			return -1;
		}
		if (0 == limit)
			continue;

		sk = conn->socket;
		LOGS(L_SOCK,L_DEBUG,("osd_recv(%d,@%#x,%#x)\n",
			sk, (unsigned)offs, (unsigned)limit));
		count = osd_recv(sk, &buff[offs], limit);
		if (-1 == count && EAGAIN == errno) {
			struct timeval tv;
			fd_set rdfds, exfds;
			int rc;

			sock_give_back_in(conn, 0, limit);
			LOGS(L_SOCK,L_DEBUGX,("osd_recv(%d,@%#x,%#x) would block (%s)\n",
				sk, (unsigned)offs, (unsigned)limit, strerror(errno)));
			memset(&tv, 0, sizeof(tv));
			tv.tv_sec = 60;
			tv.tv_usec = 0;
			FD_ZERO(&rdfds);
			FD_ZERO(&exfds);
			FD_SET(sk, &rdfds);
			FD_SET(sk, &exfds);
			LOGS(L_SOCK,L_DEBUG,("select(%d,...,%us) @%#x/%#x\n",
				sk, (unsigned)tv.tv_sec, (unsigned)offs, (unsigned)size));
			rc = select(sk+1, &rdfds, NULL, &exfds, &tv);
			if (-1 == rc) {
				e = errno;
				LOGS(L_SOCK,L_DEBUG,("osd_recv(%d,@%#x,%d) select (%s)\n",
					sk, (unsigned)offs, limit, strerror(errno)));
				conn_status_change(conn, CONN_SELECTERROR);
				errno = e;
				return -1;
			}
			if (FD_ISSET(sk, &exfds)) {
				e = errno;
				LOGS(L_SOCK,L_DEBUG,("osd_recv(%d,@%#x,%d) exfds (%s)\n",
					sk, (unsigned)offs, limit, strerror(errno)));
				conn_status_change(conn, CONN_EXCEPTION);
				errno = e;
				return -1;
			}
			continue;
		}
		if (count < 1) {
			e = errno;
			sock_give_back_in(conn, 0, limit);
			if (0 == offs && 0 == count) {
				LOGS(L_SOCK,L_DEBUG,("osd_recv(%d,@%#x,%#x) EOF\n",
					sk, (unsigned)offs, (unsigned)limit));
				conn_status_change(conn, CONN_EOF);
				errno = EIO;
				return -1;
			}
			if (EINTR == errno) {
				LOGS(L_SOCK,L_ERROR,("osd_recv(%d,@%#x,%#x) EINTR (%s)\n",
					sk, (unsigned)offs, (unsigned)limit, strerror(errno)));
				continue;
			}
			if (0 != errno) {
				LOGS(L_SOCK,L_ERROR,("osd_recv(%d,@%#x,%#x) failed (%s)\n",
					sk, (unsigned)offs, (unsigned)limit, strerror(errno)));
			}
			conn_status_change(conn, CONN_READERROR);
			errno = e;
			return -1;
		}
		offs += count;
		conn->in_total += count;
		sock_give_back_in(conn, count, limit);
		conn->last_io = time(NULL);
	}

	gettimeofday(&t1, NULL);
	LOGS(L_SOCK,L_MINOR,("read %#x bytes, %s\n",
		(unsigned)size, usec_str(tv_diff(&t1,&t0))));
	return 0;
}

int sock_writeall(conn_t *conn, const void *buffer, size_t size)
{
	struct timeval t0, t1;
	const char *buff = (const char *)buffer;
	size_t offs = 0;
	int e;
	FUN("sock_writeall");

	LOGS(L_SOCK,L_DEBUG,("sending %#x bytes\n",
		(unsigned)size));

	gettimeofday(&t0, NULL);
	while (offs < size) {
		ssize_t count;
		uint32_t limit;
		int sk;

		sk = conn->socket;
		if (-1 == sk || CONN_CONNECTED != conn->status) {
			LOGS(L_SOCK,L_ERROR,("socket was shut down\n"));
			errno = ENOTSOCK;
			return -1;
		}

		/* acquire some of the outbound bandwidth */
		limit = size - offs;
		limit = sock_acquire_out(conn, limit);
		if (INVLIMIT == limit) {
			e = errno;
			LOGS(L_SOCK,L_ERROR,("error acquiring bandwidth (%s)\n",
				strerror(errno)));
			errno = e;
			return -1;
		}
		if (0 == limit)
			continue;

		sk = conn->socket;
		LOGS(L_SOCK,L_DEBUG,("osd_send(%d,@%#x,%#x)\n",
			sk, (unsigned)offs, (unsigned)limit));
		count = osd_send(sk, &buff[offs], limit);
		if (-1 == count && EAGAIN == errno) {
			struct timeval tv;
			fd_set wrfds, exfds;
			int rc;

			sock_give_back_out(conn, 0, limit);
			LOGS(L_SOCK,L_DEBUGX,("osd_send(%d,@%#x,%#x) would block (%s)\n",
				sk, (unsigned)offs, (unsigned)limit, strerror(errno)));
			memset(&tv, 0, sizeof(tv));
			tv.tv_sec = 60;
			tv.tv_usec = 0;
			FD_ZERO(&wrfds);
			FD_ZERO(&exfds);
			FD_SET(sk, &wrfds);
			FD_SET(sk, &exfds);
			LOGS(L_SOCK,L_DEBUG,("select(%d,...,%us) @%#x/%#x\n",
				sk, (unsigned)tv.tv_sec, (unsigned)offs, (unsigned)size));
			rc = select(sk+1, NULL, &wrfds, &exfds, &tv);
			if (-1 == rc) {
				e = errno;
				LOGS(L_SOCK,L_DEBUG,("osd_send(%d,@%#x,%#x) select (%s)\n",
					sk, (unsigned)offs, (unsigned)limit,
					strerror(errno)));
				conn_status_change(conn, CONN_SELECTERROR);
				errno = e;
				return -1;
			}
			if (FD_ISSET(sk, &exfds)) {
				e = errno;
				LOGS(L_SOCK,L_DEBUG,("osd_send(%d,@%#x,%#x) exfds (%s)\n",
					sk, (unsigned)offs, (unsigned)limit,
					strerror(errno)));
				conn_status_change(conn, CONN_EXCEPTION);
				errno = e;
				return -1;
			}
			continue;
		}
		if (count < 1) {
			e = errno;
			sock_give_back_out(conn, 0, limit);
			if (EINTR == e) {
				LOGS(L_SOCK,L_ERROR,("osd_send(%d,@%#x,%#x) EINTR (%s)\n",
					sk, (unsigned)offs, (unsigned)limit, strerror(errno)));
				continue;
			}
			LOGS(L_SOCK,L_ERROR,("osd_send(%d,@%#x,%#x) failed (%s)\n",
				sk, (unsigned)offs, (unsigned)limit, strerror(errno)));
			conn_status_change(conn, CONN_WRITEERROR);
			errno = e;
			return -1;
		}
		offs += count;
		conn->out_total += count;
		sock_give_back_out(conn, count, limit);
		conn->last_io = time(NULL);
	}

	gettimeofday(&t1, NULL);
	LOGS(L_SOCK,L_MINOR,("sent %#x bytes, %s\n",
		(unsigned)size, usec_str(tv_diff(&t1,&t0))));
	return 0;
}

int sock_gets(conn_t *conn, char *dst, size_t size)
{
	char ch;
	size_t offs;
	int e;
	FUN("sock_gets");

	for (offs = 0; offs < size - 1; /* */) {
		ssize_t count;
		int sk = conn->socket;

		dst[offs] = '\0';

		if (-1 == sk || CONN_CONNECTED != conn->status) {
			LOGS(L_SOCK,L_DEBUG,("socket was shut down\n"));
			errno = ENOTSOCK;
			return -1;
		}

		count = osd_recv(sk, &ch, 1);
		if (-1 == count && EAGAIN == errno) {
			struct timeval tv;
			fd_set rdfds, exfds;
			int rc;

			conn->last_io = time(NULL);
			LOGS(L_SOCK,L_DEBUGX,("osd_recv(%d,@%#x,%#x) would block (%s)\n",
				sk, (unsigned)offs, (unsigned)1, strerror(errno)));
			memset(&tv, 0, sizeof(tv));
			tv.tv_sec = 60;
			tv.tv_usec = 0;
			FD_ZERO(&rdfds);
			FD_ZERO(&exfds);
			FD_SET(sk, &rdfds);
			FD_SET(sk, &exfds);
			LOGS(L_SOCK,L_DEBUG,("select(%d,...,%us) @%#x/%#x\n",
				sk, (unsigned)tv.tv_sec, (unsigned)offs, (unsigned)size));
			rc = select(sk+1, &rdfds, NULL, &exfds, &tv);
			if (-1 == rc) {
				e = errno;
				LOGS(L_SOCK,L_DEBUG,("osd_recv(%d,@%#x,%#x) select (%s)\n",
					sk, (unsigned)offs, (unsigned)1,
					strerror(errno)));
				conn_status_change(conn, CONN_SELECTERROR);
				errno = e;
				return -1;
			}
			if (FD_ISSET(sk, &exfds)) {
				e = errno;
				LOGS(L_SOCK,L_DEBUG,("osd_recv(%d,@%#x,%#x) exfds (%s)\n",
					sk, (unsigned)offs, (unsigned)1,
					strerror(errno)));
				conn_status_change(conn, CONN_EXCEPTION);
				errno = e;
				return -1;
			}
			continue;
		}
		if (count < 1) {
			e = errno;
			if (0 == offs && 0 == count) {
				LOGS(L_SOCK,L_DEBUG,("EOF\n"));
				conn_status_change(conn, CONN_EOF);
				errno = EIO;
				return -1;
			}
			if (EINTR == errno) {
				LOGS(L_SOCK,L_ERROR,("osd_recv(%d,@%#x,%#x) EINTR (%s)\n",
					sk, (unsigned)offs, (unsigned)1, strerror(errno)));
				continue;
			}
			if (0 != errno) {
				LOGS(L_SOCK,L_ERROR,("osd_recv(%d,@%#x,%#x) failed (%s)\n",
					sk, (unsigned)offs, (unsigned)1, strerror(errno)));
			}
			conn_status_change(conn, CONN_READERROR);
			errno = e;
			return -1;
		}
		/* set last_io time only for first char */
		if (0 == offs) {
			conn->last_io = time(NULL);
		}
		dst[offs++] = ch;
		conn->in_total += 1;
		if ('\n' == ch)
			break;
	}
	/* set last_io afterwards */
	conn->last_io = time(NULL);

	dst[offs] = '\0';
	return 0;
}

/* read a line and increase buffer as required while reading a line */
int sock_agets(conn_t *conn, char **pdst)
{
	size_t linesize = 256;
	char *line = xcalloc(linesize, sizeof(char));
	int rc;
	FUN("sock_agets");

	*pdst = NULL;
	if (0 != (rc = sock_gets(conn, line, linesize))) {
		return rc;
	}
	/* if there's no carriage return in the buffer, continue to read */
	while (NULL == strchr(line, '\n')) {
		size_t offs = strlen(line);
		linesize *= 2;
		line = xrealloc(line, linesize);
		if (0 != (rc = sock_gets(conn, &line[offs], linesize - offs))) {
			return rc;
		}
	}

	*pdst = line;
	return rc;
}

int sock_printf(conn_t *conn, const char *fmt, ...)
{
	va_list ap;
	char *buff = NULL;
	int len, rc;
	FUN("sock_printf");

	va_start(ap, fmt);
	len = pm_vasprintf(&buff, fmt, ap);
	va_end(ap);

	if (len <= 0)
		return -1;

	rc = sock_writeall(conn, buff, len);
	xfree(buff);

	return rc;
}

int sock_shutdown(conn_t *conn)
{
	int sk, rc;
	FUN("sock_shutdown");

	if (NULL == conn) {
		LOGS(L_SOCK,L_ERROR,("conn is NULL\n"));
		errno = EINVAL;
		return -1;
	}
	sk = conn->socket;
	switch (conn->status) {
	case CONN_CLOSED:
		LOGS(L_SOCK,L_ERROR,("sk %d already closed\n", sk));
		return 0;
	case CONN_CLOSING:
		LOGS(L_SOCK,L_ERROR,("sk %d already in close()\n", sk));
		return 0;
	case CONN_SHUTDOWN:
		LOGS(L_SOCK,L_ERROR,("sk %d already in shutdown()\n", sk));
		return 0;
	case CONN_DNSLOOKUP:
		LOGS(L_SOCK,L_DEBUG,("sk %d in gethostbyname()\n", sk));
		break;
	case CONN_CONNECTING:
		LOGS(L_SOCK,L_DEBUG,("sk %d during connect()\n", sk));
		break;
	case CONN_ACCEPTING:
		LOGS(L_SOCK,L_DEBUG,("sk %d during accept()\n", sk));
		break;
	case CONN_CONNECTED:
		LOGS(L_SOCK,L_DEBUG,("sk %d while connected\n", sk));
		break;
	case CONN_SELECTERROR:
		LOGS(L_SOCK,L_DEBUG,("sk %d after select() failed\n", sk));
		break;
	case CONN_EXCEPTION:
		LOGS(L_SOCK,L_DEBUG,("sk %d after exception FD_ISSET(sk,exfds)\n", sk));
		break;
	case CONN_READERROR:
		LOGS(L_SOCK,L_DEBUG,("sk %d after read() error\n", sk));
		break;
	case CONN_WRITEERROR:
		LOGS(L_SOCK,L_DEBUG,("sk %d after write() error\n", sk));
		break;
	case CONN_EOF:
		LOGS(L_SOCK,L_DEBUG,("sk %d at EOF\n", sk));
		break;
	}
	conn->socket = -1;
	rc = 0;
	if (-1 == sk) {
		LOGS(L_SOCK,L_DEBUG,("sk %d (invalid)\n", sk));
		conn_status_change(conn, CONN_CLOSED);
		return 0;
	}
	conn_status_change(conn, CONN_SHUTDOWN);
	if (0 != (rc = shutdown(sk, SHUT_RDWR))) {
		LOGS(L_SOCK,L_DEBUG,("shutdown(%d, SHUT_RDWR) call failed (%s)\n",
			sk, strerror(errno)));
		errno = EIO;
		rc = -1;
		/* Keep going... */
	}
	conn_status_change(conn, CONN_CLOSING);
	if (0 != osd_closesocket(sk)) {
		LOGS(L_SOCK,L_DEBUG,("osd_closesocket(%d) call failed (%s)\n",
			sk, strerror(errno)));
		errno = EIO;
		rc = -1;
		/* Keep going... */
	}
	conn_status_change(conn, CONN_CLOSED);
	LOGS(L_SOCK,L_MINOR,("sock %d closed after %llu in, %llu out\n",
		sk,
		(unsigned long long)conn->in_total,
		(unsigned long long)conn->out_total));

	return rc;
}
