Blob Blame History Raw
/*
 * Copyright (c) 2009-2012 Niels Provos and Nick Mathewson
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "event2/event-config.h"

#ifdef _EVENT_HAVE_SYS_TIME_H
#include <sys/time.h>
#endif

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef _EVENT_HAVE_STDARG_H
#include <stdarg.h>
#endif
#ifdef _EVENT_HAVE_UNISTD_H
#include <unistd.h>
#endif

#ifdef WIN32
#include <winsock2.h>
#include <ws2tcpip.h>
#endif

#include <sys/queue.h>

#include "event2/util.h"
#include "event2/bufferevent.h"
#include "event2/buffer.h"
#include "event2/bufferevent_struct.h"
#include "event2/event.h"
#include "event2/util.h"
#include "event-internal.h"
#include "log-internal.h"
#include "mm-internal.h"
#include "bufferevent-internal.h"
#include "util-internal.h"
#include "iocp-internal.h"

#ifndef SO_UPDATE_CONNECT_CONTEXT
/* Mingw is sometimes missing this */
#define SO_UPDATE_CONNECT_CONTEXT 0x7010
#endif

/* prototypes */
static int be_async_enable(struct bufferevent *, short);
static int be_async_disable(struct bufferevent *, short);
static void be_async_destruct(struct bufferevent *);
static int be_async_flush(struct bufferevent *, short, enum bufferevent_flush_mode);
static int be_async_ctrl(struct bufferevent *, enum bufferevent_ctrl_op, union bufferevent_ctrl_data *);

struct bufferevent_async {
	struct bufferevent_private bev;
	struct event_overlapped connect_overlapped;
	struct event_overlapped read_overlapped;
	struct event_overlapped write_overlapped;
	size_t read_in_progress;
	size_t write_in_progress;
	unsigned ok : 1;
	unsigned read_added : 1;
	unsigned write_added : 1;
};

const struct bufferevent_ops bufferevent_ops_async = {
	"socket_async",
	evutil_offsetof(struct bufferevent_async, bev.bev),
	be_async_enable,
	be_async_disable,
	be_async_destruct,
	_bufferevent_generic_adj_timeouts,
	be_async_flush,
	be_async_ctrl,
};

static inline struct bufferevent_async *
upcast(struct bufferevent *bev)
{
	struct bufferevent_async *bev_a;
	if (bev->be_ops != &bufferevent_ops_async)
		return NULL;
	bev_a = EVUTIL_UPCAST(bev, struct bufferevent_async, bev.bev);
	return bev_a;
}

static inline struct bufferevent_async *
upcast_connect(struct event_overlapped *eo)
{
	struct bufferevent_async *bev_a;
	bev_a = EVUTIL_UPCAST(eo, struct bufferevent_async, connect_overlapped);
	EVUTIL_ASSERT(BEV_IS_ASYNC(&bev_a->bev.bev));
	return bev_a;
}

static inline struct bufferevent_async *
upcast_read(struct event_overlapped *eo)
{
	struct bufferevent_async *bev_a;
	bev_a = EVUTIL_UPCAST(eo, struct bufferevent_async, read_overlapped);
	EVUTIL_ASSERT(BEV_IS_ASYNC(&bev_a->bev.bev));
	return bev_a;
}

static inline struct bufferevent_async *
upcast_write(struct event_overlapped *eo)
{
	struct bufferevent_async *bev_a;
	bev_a = EVUTIL_UPCAST(eo, struct bufferevent_async, write_overlapped);
	EVUTIL_ASSERT(BEV_IS_ASYNC(&bev_a->bev.bev));
	return bev_a;
}

static void
bev_async_del_write(struct bufferevent_async *beva)
{
	struct bufferevent *bev = &beva->bev.bev;

	if (beva->write_added) {
		beva->write_added = 0;
		event_base_del_virtual(bev->ev_base);
	}
}

static void
bev_async_del_read(struct bufferevent_async *beva)
{
	struct bufferevent *bev = &beva->bev.bev;

	if (beva->read_added) {
		beva->read_added = 0;
		event_base_del_virtual(bev->ev_base);
	}
}

static void
bev_async_add_write(struct bufferevent_async *beva)
{
	struct bufferevent *bev = &beva->bev.bev;

	if (!beva->write_added) {
		beva->write_added = 1;
		event_base_add_virtual(bev->ev_base);
	}
}

static void
bev_async_add_read(struct bufferevent_async *beva)
{
	struct bufferevent *bev = &beva->bev.bev;

	if (!beva->read_added) {
		beva->read_added = 1;
		event_base_add_virtual(bev->ev_base);
	}
}

static void
bev_async_consider_writing(struct bufferevent_async *beva)
{
	size_t at_most;
	int limit;
	struct bufferevent *bev = &beva->bev.bev;

	/* Don't write if there's a write in progress, or we do not
	 * want to write, or when there's nothing left to write. */
	if (beva->write_in_progress || beva->bev.connecting)
		return;
	if (!beva->ok || !(bev->enabled&EV_WRITE) ||
	    !evbuffer_get_length(bev->output)) {
		bev_async_del_write(beva);
		return;
	}

	at_most = evbuffer_get_length(bev->output);

	/* This is safe so long as bufferevent_get_write_max never returns
	 * more than INT_MAX.  That's true for now. XXXX */
	limit = (int)_bufferevent_get_write_max(&beva->bev);
	if (at_most >= (size_t)limit && limit >= 0)
		at_most = limit;

	if (beva->bev.write_suspended) {
		bev_async_del_write(beva);
		return;
	}

	/*  XXXX doesn't respect low-water mark very well. */
	bufferevent_incref(bev);
	if (evbuffer_launch_write(bev->output, at_most,
	    &beva->write_overlapped)) {
		bufferevent_decref(bev);
		beva->ok = 0;
		_bufferevent_run_eventcb(bev, BEV_EVENT_ERROR);
	} else {
		beva->write_in_progress = at_most;
		_bufferevent_decrement_write_buckets(&beva->bev, at_most);
		bev_async_add_write(beva);
	}
}

static void
bev_async_consider_reading(struct bufferevent_async *beva)
{
	size_t cur_size;
	size_t read_high;
	size_t at_most;
	int limit;
	struct bufferevent *bev = &beva->bev.bev;

	/* Don't read if there is a read in progress, or we do not
	 * want to read. */
	if (beva->read_in_progress || beva->bev.connecting)
		return;
	if (!beva->ok || !(bev->enabled&EV_READ)) {
		bev_async_del_read(beva);
		return;
	}

	/* Don't read if we're full */
	cur_size = evbuffer_get_length(bev->input);
	read_high = bev->wm_read.high;
	if (read_high) {
		if (cur_size >= read_high) {
			bev_async_del_read(beva);
			return;
		}
		at_most = read_high - cur_size;
	} else {
		at_most = 16384; /* FIXME totally magic. */
	}

	/* XXXX This over-commits. */
	/* XXXX see also not above on cast on _bufferevent_get_write_max() */
	limit = (int)_bufferevent_get_read_max(&beva->bev);
	if (at_most >= (size_t)limit && limit >= 0)
		at_most = limit;

	if (beva->bev.read_suspended) {
		bev_async_del_read(beva);
		return;
	}

	bufferevent_incref(bev);
	if (evbuffer_launch_read(bev->input, at_most, &beva->read_overlapped)) {
		beva->ok = 0;
		_bufferevent_run_eventcb(bev, BEV_EVENT_ERROR);
		bufferevent_decref(bev);
	} else {
		beva->read_in_progress = at_most;
		_bufferevent_decrement_read_buckets(&beva->bev, at_most);
		bev_async_add_read(beva);
	}

	return;
}

static void
be_async_outbuf_callback(struct evbuffer *buf,
    const struct evbuffer_cb_info *cbinfo,
    void *arg)
{
	struct bufferevent *bev = arg;
	struct bufferevent_async *bev_async = upcast(bev);

	/* If we added data to the outbuf and were not writing before,
	 * we may want to write now. */

	_bufferevent_incref_and_lock(bev);

	if (cbinfo->n_added)
		bev_async_consider_writing(bev_async);

	_bufferevent_decref_and_unlock(bev);
}

static void
be_async_inbuf_callback(struct evbuffer *buf,
    const struct evbuffer_cb_info *cbinfo,
    void *arg)
{
	struct bufferevent *bev = arg;
	struct bufferevent_async *bev_async = upcast(bev);

	/* If we drained data from the inbuf and were not reading before,
	 * we may want to read now */

	_bufferevent_incref_and_lock(bev);

	if (cbinfo->n_deleted)
		bev_async_consider_reading(bev_async);

	_bufferevent_decref_and_unlock(bev);
}

static int
be_async_enable(struct bufferevent *buf, short what)
{
	struct bufferevent_async *bev_async = upcast(buf);

	if (!bev_async->ok)
		return -1;

	if (bev_async->bev.connecting) {
		/* Don't launch anything during connection attempts. */
		return 0;
	}

	if (what & EV_READ)
		BEV_RESET_GENERIC_READ_TIMEOUT(buf);
	if (what & EV_WRITE)
		BEV_RESET_GENERIC_WRITE_TIMEOUT(buf);

	/* If we newly enable reading or writing, and we aren't reading or
	   writing already, consider launching a new read or write. */

	if (what & EV_READ)
		bev_async_consider_reading(bev_async);
	if (what & EV_WRITE)
		bev_async_consider_writing(bev_async);
	return 0;
}

static int
be_async_disable(struct bufferevent *bev, short what)
{
	struct bufferevent_async *bev_async = upcast(bev);
	/* XXXX If we disable reading or writing, we may want to consider
	 * canceling any in-progress read or write operation, though it might
	 * not work. */

	if (what & EV_READ) {
		BEV_DEL_GENERIC_READ_TIMEOUT(bev);
		bev_async_del_read(bev_async);
	}
	if (what & EV_WRITE) {
		BEV_DEL_GENERIC_WRITE_TIMEOUT(bev);
		bev_async_del_write(bev_async);
	}

	return 0;
}

static void
be_async_destruct(struct bufferevent *bev)
{
	struct bufferevent_async *bev_async = upcast(bev);
	struct bufferevent_private *bev_p = BEV_UPCAST(bev);
	evutil_socket_t fd;

	EVUTIL_ASSERT(!upcast(bev)->write_in_progress &&
			!upcast(bev)->read_in_progress);

	bev_async_del_read(bev_async);
	bev_async_del_write(bev_async);

	fd = _evbuffer_overlapped_get_fd(bev->input);
	if (bev_p->options & BEV_OPT_CLOSE_ON_FREE) {
		/* XXXX possible double-close */
		evutil_closesocket(fd);
	}
	/* delete this in case non-blocking connect was used */
	if (event_initialized(&bev->ev_write)) {
		event_del(&bev->ev_write);
		_bufferevent_del_generic_timeout_cbs(bev);
	}
}

/* GetQueuedCompletionStatus doesn't reliably yield WSA error codes, so
 * we use WSAGetOverlappedResult to translate. */
static void
bev_async_set_wsa_error(struct bufferevent *bev, struct event_overlapped *eo)
{
	DWORD bytes, flags;
	evutil_socket_t fd;

	fd = _evbuffer_overlapped_get_fd(bev->input);
	WSAGetOverlappedResult(fd, &eo->overlapped, &bytes, FALSE, &flags);
}

static int
be_async_flush(struct bufferevent *bev, short what,
    enum bufferevent_flush_mode mode)
{
	return 0;
}

static void
connect_complete(struct event_overlapped *eo, ev_uintptr_t key,
    ev_ssize_t nbytes, int ok)
{
	struct bufferevent_async *bev_a = upcast_connect(eo);
	struct bufferevent *bev = &bev_a->bev.bev;
	evutil_socket_t sock;

	BEV_LOCK(bev);

	EVUTIL_ASSERT(bev_a->bev.connecting);
	bev_a->bev.connecting = 0;
	sock = _evbuffer_overlapped_get_fd(bev_a->bev.bev.input);
	/* XXXX Handle error? */
	setsockopt(sock, SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, NULL, 0);

	if (ok)
		bufferevent_async_set_connected(bev);
	else
		bev_async_set_wsa_error(bev, eo);

	_bufferevent_run_eventcb(bev,
			ok? BEV_EVENT_CONNECTED : BEV_EVENT_ERROR);

	event_base_del_virtual(bev->ev_base);

	_bufferevent_decref_and_unlock(bev);
}

static void
read_complete(struct event_overlapped *eo, ev_uintptr_t key,
    ev_ssize_t nbytes, int ok)
{
	struct bufferevent_async *bev_a = upcast_read(eo);
	struct bufferevent *bev = &bev_a->bev.bev;
	short what = BEV_EVENT_READING;
	ev_ssize_t amount_unread;
	BEV_LOCK(bev);
	EVUTIL_ASSERT(bev_a->read_in_progress);

	amount_unread = bev_a->read_in_progress - nbytes;
	evbuffer_commit_read(bev->input, nbytes);
	bev_a->read_in_progress = 0;
	if (amount_unread)
		_bufferevent_decrement_read_buckets(&bev_a->bev, -amount_unread);

	if (!ok)
		bev_async_set_wsa_error(bev, eo);

	if (bev_a->ok) {
		if (ok && nbytes) {
			BEV_RESET_GENERIC_READ_TIMEOUT(bev);
			if (evbuffer_get_length(bev->input) >= bev->wm_read.low)
				_bufferevent_run_readcb(bev);
			bev_async_consider_reading(bev_a);
		} else if (!ok) {
			what |= BEV_EVENT_ERROR;
			bev_a->ok = 0;
			_bufferevent_run_eventcb(bev, what);
		} else if (!nbytes) {
			what |= BEV_EVENT_EOF;
			bev_a->ok = 0;
			_bufferevent_run_eventcb(bev, what);
		}
	}

	_bufferevent_decref_and_unlock(bev);
}

static void
write_complete(struct event_overlapped *eo, ev_uintptr_t key,
    ev_ssize_t nbytes, int ok)
{
	struct bufferevent_async *bev_a = upcast_write(eo);
	struct bufferevent *bev = &bev_a->bev.bev;
	short what = BEV_EVENT_WRITING;
	ev_ssize_t amount_unwritten;

	BEV_LOCK(bev);
	EVUTIL_ASSERT(bev_a->write_in_progress);

	amount_unwritten = bev_a->write_in_progress - nbytes;
	evbuffer_commit_write(bev->output, nbytes);
	bev_a->write_in_progress = 0;

	if (amount_unwritten)
		_bufferevent_decrement_write_buckets(&bev_a->bev,
		                                     -amount_unwritten);


	if (!ok)
		bev_async_set_wsa_error(bev, eo);

	if (bev_a->ok) {
		if (ok && nbytes) {
			BEV_RESET_GENERIC_WRITE_TIMEOUT(bev);
			if (evbuffer_get_length(bev->output) <=
			    bev->wm_write.low)
				_bufferevent_run_writecb(bev);
			bev_async_consider_writing(bev_a);
		} else if (!ok) {
			what |= BEV_EVENT_ERROR;
			bev_a->ok = 0;
			_bufferevent_run_eventcb(bev, what);
		} else if (!nbytes) {
			what |= BEV_EVENT_EOF;
			bev_a->ok = 0;
			_bufferevent_run_eventcb(bev, what);
		}
	}

	_bufferevent_decref_and_unlock(bev);
}

struct bufferevent *
bufferevent_async_new(struct event_base *base,
    evutil_socket_t fd, int options)
{
	struct bufferevent_async *bev_a;
	struct bufferevent *bev;
	struct event_iocp_port *iocp;

	options |= BEV_OPT_THREADSAFE;

	if (!(iocp = event_base_get_iocp(base)))
		return NULL;

	if (fd >= 0 && event_iocp_port_associate(iocp, fd, 1)<0) {
		int err = GetLastError();
		/* We may have alrady associated this fd with a port.
		 * Let's hope it's this port, and that the error code
		 * for doing this neer changes. */
		if (err != ERROR_INVALID_PARAMETER)
			return NULL;
	}

	if (!(bev_a = mm_calloc(1, sizeof(struct bufferevent_async))))
		return NULL;

	bev = &bev_a->bev.bev;
	if (!(bev->input = evbuffer_overlapped_new(fd))) {
		mm_free(bev_a);
		return NULL;
	}
	if (!(bev->output = evbuffer_overlapped_new(fd))) {
		evbuffer_free(bev->input);
		mm_free(bev_a);
		return NULL;
	}

	if (bufferevent_init_common(&bev_a->bev, base, &bufferevent_ops_async,
		options)<0)
		goto err;

	evbuffer_add_cb(bev->input, be_async_inbuf_callback, bev);
	evbuffer_add_cb(bev->output, be_async_outbuf_callback, bev);

	event_overlapped_init(&bev_a->connect_overlapped, connect_complete);
	event_overlapped_init(&bev_a->read_overlapped, read_complete);
	event_overlapped_init(&bev_a->write_overlapped, write_complete);

	bev_a->ok = fd >= 0;
	if (bev_a->ok)
		_bufferevent_init_generic_timeout_cbs(bev);

	return bev;
err:
	bufferevent_free(&bev_a->bev.bev);
	return NULL;
}

void
bufferevent_async_set_connected(struct bufferevent *bev)
{
	struct bufferevent_async *bev_async = upcast(bev);
	bev_async->ok = 1;
	_bufferevent_init_generic_timeout_cbs(bev);
	/* Now's a good time to consider reading/writing */
	be_async_enable(bev, bev->enabled);
}

int
bufferevent_async_can_connect(struct bufferevent *bev)
{
	const struct win32_extension_fns *ext =
	    event_get_win32_extension_fns();

	if (BEV_IS_ASYNC(bev) &&
	    event_base_get_iocp(bev->ev_base) &&
	    ext && ext->ConnectEx)
		return 1;

	return 0;
}

int
bufferevent_async_connect(struct bufferevent *bev, evutil_socket_t fd,
	const struct sockaddr *sa, int socklen)
{
	BOOL rc;
	struct bufferevent_async *bev_async = upcast(bev);
	struct sockaddr_storage ss;
	const struct win32_extension_fns *ext =
	    event_get_win32_extension_fns();

	EVUTIL_ASSERT(ext && ext->ConnectEx && fd >= 0 && sa != NULL);

	/* ConnectEx() requires that the socket be bound to an address
	 * with bind() before using, otherwise it will fail. We attempt
	 * to issue a bind() here, taking into account that the error
	 * code is set to WSAEINVAL when the socket is already bound. */
	memset(&ss, 0, sizeof(ss));
	if (sa->sa_family == AF_INET) {
		struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
		sin->sin_family = AF_INET;
		sin->sin_addr.s_addr = INADDR_ANY;
	} else if (sa->sa_family == AF_INET6) {
		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
		sin6->sin6_family = AF_INET6;
		sin6->sin6_addr = in6addr_any;
	} else {
		/* Well, the user will have to bind() */
		return -1;
	}
	if (bind(fd, (struct sockaddr *)&ss, sizeof(ss)) < 0 &&
	    WSAGetLastError() != WSAEINVAL)
		return -1;

	event_base_add_virtual(bev->ev_base);
	bufferevent_incref(bev);
	rc = ext->ConnectEx(fd, sa, socklen, NULL, 0, NULL,
			    &bev_async->connect_overlapped.overlapped);
	if (rc || WSAGetLastError() == ERROR_IO_PENDING)
		return 0;

	event_base_del_virtual(bev->ev_base);
	bufferevent_decref(bev);

	return -1;
}

static int
be_async_ctrl(struct bufferevent *bev, enum bufferevent_ctrl_op op,
    union bufferevent_ctrl_data *data)
{
	switch (op) {
	case BEV_CTRL_GET_FD:
		data->fd = _evbuffer_overlapped_get_fd(bev->input);
		return 0;
	case BEV_CTRL_SET_FD: {
		struct event_iocp_port *iocp;

		if (data->fd == _evbuffer_overlapped_get_fd(bev->input))
			return 0;
		if (!(iocp = event_base_get_iocp(bev->ev_base)))
			return -1;
		if (event_iocp_port_associate(iocp, data->fd, 1) < 0)
			return -1;
		_evbuffer_overlapped_set_fd(bev->input, data->fd);
		_evbuffer_overlapped_set_fd(bev->output, data->fd);
		return 0;
	}
	case BEV_CTRL_CANCEL_ALL: {
		struct bufferevent_async *bev_a = upcast(bev);
		evutil_socket_t fd = _evbuffer_overlapped_get_fd(bev->input);
		if (fd != (evutil_socket_t)INVALID_SOCKET &&
		    (bev_a->bev.options & BEV_OPT_CLOSE_ON_FREE)) {
			closesocket(fd);
		}
		bev_a->ok = 0;
		return 0;
	}
	case BEV_CTRL_GET_UNDERLYING:
	default:
		return -1;
	}
}