/* Copyright StrongLoop, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "defs.h" #include #include #include /* A connection is modeled as an abstraction on top of two simple state * machines, one for reading and one for writing. Either state machine * is, when active, in one of three states: busy, done or stop; the fourth * and final state, dead, is an end state and only relevant when shutting * down the connection. A short overview: * * busy done stop * ----------|---------------------------|--------------------|------| * readable | waiting for incoming data | have incoming data | idle | * writable | busy writing out data | completed write | idle | * * We could remove the done state from the writable state machine. For our * purposes, it's functionally equivalent to the stop state. * * When the connection with upstream has been established, the client_ctx * moves into a state where incoming data from the client is sent upstream * and vice versa, incoming data from upstream is sent to the client. In * other words, we're just piping data back and forth. See conn_cycle() * for details. * * An interesting deviation from libuv's I/O model is that reads are discrete * rather than continuous events. In layman's terms, when a read operation * completes, the connection stops reading until further notice. * * The rationale for this approach is that we have to wait until the data * has been sent out again before we can reuse the read buffer. * * It also pleasingly unifies with the request model that libuv uses for * writes and everything else; libuv may switch to a request model for * reads in the future. */ enum conn_state { c_busy, /* Busy; waiting for incoming data or for a write to complete. */ c_done, /* Done; read incoming data or write finished. */ c_stop, /* Stopped. */ c_dead }; /* Session states. */ enum sess_state { s_handshake, /* Wait for client handshake. */ s_handshake_auth, /* Wait for client authentication data. */ s_req_start, /* Start waiting for request data. */ s_req_parse, /* Wait for request data. */ s_req_lookup, /* Wait for upstream hostname DNS lookup to complete. */ s_req_connect, /* Wait for uv_tcp_connect() to complete. */ s_proxy_start, /* Connected. Start piping data. */ s_proxy, /* Connected. Pipe data back and forth. */ s_kill, /* Tear down session. */ s_almost_dead_0, /* Waiting for finalizers to complete. */ s_almost_dead_1, /* Waiting for finalizers to complete. */ s_almost_dead_2, /* Waiting for finalizers to complete. */ s_almost_dead_3, /* Waiting for finalizers to complete. */ s_almost_dead_4, /* Waiting for finalizers to complete. */ s_dead /* Dead. Safe to free now. */ }; static void do_next(client_ctx *cx); static int do_handshake(client_ctx *cx); static int do_handshake_auth(client_ctx *cx); static int do_req_start(client_ctx *cx); static int do_req_parse(client_ctx *cx); static int do_req_lookup(client_ctx *cx); static int do_req_connect_start(client_ctx *cx); static int do_req_connect(client_ctx *cx); static int do_proxy_start(client_ctx *cx); static int do_proxy(client_ctx *cx); static int do_kill(client_ctx *cx); static int do_almost_dead(client_ctx *cx); static int conn_cycle(const char *who, conn *a, conn *b); static void conn_timer_reset(conn *c); static void conn_timer_expire(uv_timer_t *handle); static void conn_getaddrinfo(conn *c, const char *hostname); static void conn_getaddrinfo_done(uv_getaddrinfo_t *req, int status, struct addrinfo *ai); static int conn_connect(conn *c); static void conn_connect_done(uv_connect_t *req, int status); static void conn_read(conn *c); static void conn_read_done(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf); static void conn_alloc(uv_handle_t *handle, size_t size, uv_buf_t *buf); static void conn_write(conn *c, const void *data, unsigned int len); static void conn_write_done(uv_write_t *req, int status); static void conn_close(conn *c); static void conn_close_done(uv_handle_t *handle); /* |incoming| has been initialized by server.c when this is called. */ void client_finish_init(server_ctx *sx, client_ctx *cx) { conn *incoming; conn *outgoing; cx->sx = sx; cx->state = s_handshake; s5_init(&cx->parser); incoming = &cx->incoming; incoming->client = cx; incoming->result = 0; incoming->rdstate = c_stop; incoming->wrstate = c_stop; incoming->idle_timeout = sx->idle_timeout; CHECK(0 == uv_timer_init(sx->loop, &incoming->timer_handle)); outgoing = &cx->outgoing; outgoing->client = cx; outgoing->result = 0; outgoing->rdstate = c_stop; outgoing->wrstate = c_stop; outgoing->idle_timeout = sx->idle_timeout; CHECK(0 == uv_tcp_init(cx->sx->loop, &outgoing->handle.tcp)); CHECK(0 == uv_timer_init(cx->sx->loop, &outgoing->timer_handle)); /* Wait for the initial packet. */ conn_read(incoming); } /* This is the core state machine that drives the client <-> upstream proxy. * We move through the initial handshake and authentication steps first and * end up (if all goes well) in the proxy state where we're just proxying * data between the client and upstream. */ static void do_next(client_ctx *cx) { int new_state; ASSERT(cx->state != s_dead); switch (cx->state) { case s_handshake: new_state = do_handshake(cx); break; case s_handshake_auth: new_state = do_handshake_auth(cx); break; case s_req_start: new_state = do_req_start(cx); break; case s_req_parse: new_state = do_req_parse(cx); break; case s_req_lookup: new_state = do_req_lookup(cx); break; case s_req_connect: new_state = do_req_connect(cx); break; case s_proxy_start: new_state = do_proxy_start(cx); break; case s_proxy: new_state = do_proxy(cx); break; case s_kill: new_state = do_kill(cx); break; case s_almost_dead_0: case s_almost_dead_1: case s_almost_dead_2: case s_almost_dead_3: case s_almost_dead_4: new_state = do_almost_dead(cx); break; default: UNREACHABLE(); } cx->state = new_state; if (cx->state == s_dead) { if (DEBUG_CHECKS) { memset(cx, -1, sizeof(*cx)); } free(cx); } } static int do_handshake(client_ctx *cx) { unsigned int methods; conn *incoming; s5_ctx *parser; uint8_t *data; size_t size; int err; parser = &cx->parser; incoming = &cx->incoming; ASSERT(incoming->rdstate == c_done); ASSERT(incoming->wrstate == c_stop); incoming->rdstate = c_stop; if (incoming->result < 0) { pr_err("read error: %s", uv_strerror(incoming->result)); return do_kill(cx); } data = (uint8_t *) incoming->t.buf; size = (size_t) incoming->result; err = s5_parse(parser, &data, &size); if (err == s5_ok) { conn_read(incoming); return s_handshake; /* Need more data. */ } if (size != 0) { /* Could allow a round-trip saving shortcut here if the requested auth * method is S5_AUTH_NONE (provided unauthenticated traffic is allowed.) * Requires client support however. */ pr_err("junk in handshake"); return do_kill(cx); } if (err != s5_auth_select) { pr_err("handshake error: %s", s5_strerror(err)); return do_kill(cx); } methods = s5_auth_methods(parser); if ((methods & S5_AUTH_NONE) && can_auth_none(cx->sx, cx)) { s5_select_auth(parser, S5_AUTH_NONE); conn_write(incoming, "\5\0", 2); /* No auth required. */ return s_req_start; } if ((methods & S5_AUTH_PASSWD) && can_auth_passwd(cx->sx, cx)) { /* TODO(bnoordhuis) Implement username/password auth. */ } conn_write(incoming, "\5\377", 2); /* No acceptable auth. */ return s_kill; } /* TODO(bnoordhuis) Implement username/password auth. */ static int do_handshake_auth(client_ctx *cx) { UNREACHABLE(); return do_kill(cx); } static int do_req_start(client_ctx *cx) { conn *incoming; incoming = &cx->incoming; ASSERT(incoming->rdstate == c_stop); ASSERT(incoming->wrstate == c_done); incoming->wrstate = c_stop; if (incoming->result < 0) { pr_err("write error: %s", uv_strerror(incoming->result)); return do_kill(cx); } conn_read(incoming); return s_req_parse; } static int do_req_parse(client_ctx *cx) { conn *incoming; conn *outgoing; s5_ctx *parser; uint8_t *data; size_t size; int err; parser = &cx->parser; incoming = &cx->incoming; outgoing = &cx->outgoing; ASSERT(incoming->rdstate == c_done); ASSERT(incoming->wrstate == c_stop); ASSERT(outgoing->rdstate == c_stop); ASSERT(outgoing->wrstate == c_stop); incoming->rdstate = c_stop; if (incoming->result < 0) { pr_err("read error: %s", uv_strerror(incoming->result)); return do_kill(cx); } data = (uint8_t *) incoming->t.buf; size = (size_t) incoming->result; err = s5_parse(parser, &data, &size); if (err == s5_ok) { conn_read(incoming); return s_req_parse; /* Need more data. */ } if (size != 0) { pr_err("junk in request %u", (unsigned) size); return do_kill(cx); } if (err != s5_exec_cmd) { pr_err("request error: %s", s5_strerror(err)); return do_kill(cx); } if (parser->cmd == s5_cmd_tcp_bind) { /* Not supported but relatively straightforward to implement. */ pr_warn("BIND requests are not supported."); return do_kill(cx); } if (parser->cmd == s5_cmd_udp_assoc) { /* Not supported. Might be hard to implement because libuv has no * functionality for detecting the MTU size which the RFC mandates. */ pr_warn("UDP ASSOC requests are not supported."); return do_kill(cx); } ASSERT(parser->cmd == s5_cmd_tcp_connect); if (parser->atyp == s5_atyp_host) { conn_getaddrinfo(outgoing, (const char *) parser->daddr); return s_req_lookup; } if (parser->atyp == s5_atyp_ipv4) { memset(&outgoing->t.addr4, 0, sizeof(outgoing->t.addr4)); outgoing->t.addr4.sin_family = AF_INET; outgoing->t.addr4.sin_port = htons(parser->dport); memcpy(&outgoing->t.addr4.sin_addr, parser->daddr, sizeof(outgoing->t.addr4.sin_addr)); } else if (parser->atyp == s5_atyp_ipv6) { memset(&outgoing->t.addr6, 0, sizeof(outgoing->t.addr6)); outgoing->t.addr6.sin6_family = AF_INET6; outgoing->t.addr6.sin6_port = htons(parser->dport); memcpy(&outgoing->t.addr6.sin6_addr, parser->daddr, sizeof(outgoing->t.addr6.sin6_addr)); } else { UNREACHABLE(); } return do_req_connect_start(cx); } static int do_req_lookup(client_ctx *cx) { s5_ctx *parser; conn *incoming; conn *outgoing; parser = &cx->parser; incoming = &cx->incoming; outgoing = &cx->outgoing; ASSERT(incoming->rdstate == c_stop); ASSERT(incoming->wrstate == c_stop); ASSERT(outgoing->rdstate == c_stop); ASSERT(outgoing->wrstate == c_stop); if (outgoing->result < 0) { /* TODO(bnoordhuis) Escape control characters in parser->daddr. */ pr_err("lookup error for \"%s\": %s", parser->daddr, uv_strerror(outgoing->result)); /* Send back a 'Host unreachable' reply. */ conn_write(incoming, "\5\4\0\1\0\0\0\0\0\0", 10); return s_kill; } /* Don't make assumptions about the offset of sin_port/sin6_port. */ switch (outgoing->t.addr.sa_family) { case AF_INET: outgoing->t.addr4.sin_port = htons(parser->dport); break; case AF_INET6: outgoing->t.addr6.sin6_port = htons(parser->dport); break; default: UNREACHABLE(); } return do_req_connect_start(cx); } /* Assumes that cx->outgoing.t.sa contains a valid AF_INET/AF_INET6 address. */ static int do_req_connect_start(client_ctx *cx) { conn *incoming; conn *outgoing; int err; incoming = &cx->incoming; outgoing = &cx->outgoing; ASSERT(incoming->rdstate == c_stop); ASSERT(incoming->wrstate == c_stop); ASSERT(outgoing->rdstate == c_stop); ASSERT(outgoing->wrstate == c_stop); if (!can_access(cx->sx, cx, &outgoing->t.addr)) { pr_warn("connection not allowed by ruleset"); /* Send a 'Connection not allowed by ruleset' reply. */ conn_write(incoming, "\5\2\0\1\0\0\0\0\0\0", 10); return s_kill; } err = conn_connect(outgoing); if (err != 0) { pr_err("connect error: %s\n", uv_strerror(err)); return do_kill(cx); } return s_req_connect; } static int do_req_connect(client_ctx *cx) { const struct sockaddr_in6 *in6; const struct sockaddr_in *in; char addr_storage[sizeof(*in6)]; conn *incoming; conn *outgoing; uint8_t *buf; int addrlen; incoming = &cx->incoming; outgoing = &cx->outgoing; ASSERT(incoming->rdstate == c_stop); ASSERT(incoming->wrstate == c_stop); ASSERT(outgoing->rdstate == c_stop); ASSERT(outgoing->wrstate == c_stop); /* Build and send the reply. Not very pretty but gets the job done. */ buf = (uint8_t *) incoming->t.buf; if (outgoing->result == 0) { /* The RFC mandates that the SOCKS server must include the local port * and address in the reply. So that's what we do. */ addrlen = sizeof(addr_storage); CHECK(0 == uv_tcp_getsockname(&outgoing->handle.tcp, (struct sockaddr *) addr_storage, &addrlen)); buf[0] = 5; /* Version. */ buf[1] = 0; /* Success. */ buf[2] = 0; /* Reserved. */ if (addrlen == sizeof(*in)) { buf[3] = 1; /* IPv4. */ in = (const struct sockaddr_in *) &addr_storage; memcpy(buf + 4, &in->sin_addr, 4); memcpy(buf + 8, &in->sin_port, 2); conn_write(incoming, buf, 10); } else if (addrlen == sizeof(*in6)) { buf[3] = 4; /* IPv6. */ in6 = (const struct sockaddr_in6 *) &addr_storage; memcpy(buf + 4, &in6->sin6_addr, 16); memcpy(buf + 20, &in6->sin6_port, 2); conn_write(incoming, buf, 22); } else { UNREACHABLE(); } return s_proxy_start; } else { pr_err("upstream connection error: %s\n", uv_strerror(outgoing->result)); /* Send a 'Connection refused' reply. */ conn_write(incoming, "\5\5\0\1\0\0\0\0\0\0", 10); return s_kill; } UNREACHABLE(); return s_kill; } static int do_proxy_start(client_ctx *cx) { conn *incoming; conn *outgoing; incoming = &cx->incoming; outgoing = &cx->outgoing; ASSERT(incoming->rdstate == c_stop); ASSERT(incoming->wrstate == c_done); ASSERT(outgoing->rdstate == c_stop); ASSERT(outgoing->wrstate == c_stop); incoming->wrstate = c_stop; if (incoming->result < 0) { pr_err("write error: %s", uv_strerror(incoming->result)); return do_kill(cx); } conn_read(incoming); conn_read(outgoing); return s_proxy; } /* Proxy incoming data back and forth. */ static int do_proxy(client_ctx *cx) { if (conn_cycle("client", &cx->incoming, &cx->outgoing)) { return do_kill(cx); } if (conn_cycle("upstream", &cx->outgoing, &cx->incoming)) { return do_kill(cx); } return s_proxy; } static int do_kill(client_ctx *cx) { int new_state; if (cx->state >= s_almost_dead_0) { return cx->state; } /* Try to cancel the request. The callback still runs but if the * cancellation succeeded, it gets called with status=UV_ECANCELED. */ new_state = s_almost_dead_1; if (cx->state == s_req_lookup) { new_state = s_almost_dead_0; uv_cancel(&cx->outgoing.t.req); } conn_close(&cx->incoming); conn_close(&cx->outgoing); return new_state; } static int do_almost_dead(client_ctx *cx) { ASSERT(cx->state >= s_almost_dead_0); return cx->state + 1; /* Another finalizer completed. */ } static int conn_cycle(const char *who, conn *a, conn *b) { if (a->result < 0) { if (a->result != UV_EOF) { pr_err("%s error: %s", who, uv_strerror(a->result)); } return -1; } if (b->result < 0) { return -1; } if (a->wrstate == c_done) { a->wrstate = c_stop; } /* The logic is as follows: read when we don't write and write when we don't * read. That gives us back-pressure handling for free because if the peer * sends data faster than we consume it, TCP congestion control kicks in. */ if (a->wrstate == c_stop) { if (b->rdstate == c_stop) { conn_read(b); } else if (b->rdstate == c_done) { conn_write(a, b->t.buf, b->result); b->rdstate = c_stop; /* Triggers the call to conn_read() above. */ } } return 0; } static void conn_timer_reset(conn *c) { CHECK(0 == uv_timer_start(&c->timer_handle, conn_timer_expire, c->idle_timeout, 0)); } static void conn_timer_expire(uv_timer_t *handle) { conn *c; c = CONTAINER_OF(handle, conn, timer_handle); c->result = UV_ETIMEDOUT; do_next(c->client); } static void conn_getaddrinfo(conn *c, const char *hostname) { struct addrinfo hints; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; CHECK(0 == uv_getaddrinfo(c->client->sx->loop, &c->t.addrinfo_req, conn_getaddrinfo_done, hostname, NULL, &hints)); conn_timer_reset(c); } static void conn_getaddrinfo_done(uv_getaddrinfo_t *req, int status, struct addrinfo *ai) { conn *c; c = CONTAINER_OF(req, conn, t.addrinfo_req); c->result = status; if (status == 0) { /* FIXME(bnoordhuis) Should try all addresses. */ if (ai->ai_family == AF_INET) { c->t.addr4 = *(const struct sockaddr_in *) ai->ai_addr; } else if (ai->ai_family == AF_INET6) { c->t.addr6 = *(const struct sockaddr_in6 *) ai->ai_addr; } else { UNREACHABLE(); } } uv_freeaddrinfo(ai); do_next(c->client); } /* Assumes that c->t.sa contains a valid AF_INET or AF_INET6 address. */ static int conn_connect(conn *c) { ASSERT(c->t.addr.sa_family == AF_INET || c->t.addr.sa_family == AF_INET6); conn_timer_reset(c); return uv_tcp_connect(&c->t.connect_req, &c->handle.tcp, &c->t.addr, conn_connect_done); } static void conn_connect_done(uv_connect_t *req, int status) { conn *c; if (status == UV_ECANCELED) { return; /* Handle has been closed. */ } c = CONTAINER_OF(req, conn, t.connect_req); c->result = status; do_next(c->client); } static void conn_read(conn *c) { ASSERT(c->rdstate == c_stop); CHECK(0 == uv_read_start(&c->handle.stream, conn_alloc, conn_read_done)); c->rdstate = c_busy; conn_timer_reset(c); } static void conn_read_done(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf) { conn *c; c = CONTAINER_OF(handle, conn, handle); ASSERT(c->t.buf == buf->base); ASSERT(c->rdstate == c_busy); c->rdstate = c_done; c->result = nread; uv_read_stop(&c->handle.stream); do_next(c->client); } static void conn_alloc(uv_handle_t *handle, size_t size, uv_buf_t *buf) { conn *c; c = CONTAINER_OF(handle, conn, handle); ASSERT(c->rdstate == c_busy); buf->base = c->t.buf; buf->len = sizeof(c->t.buf); } static void conn_write(conn *c, const void *data, unsigned int len) { uv_buf_t buf; ASSERT(c->wrstate == c_stop || c->wrstate == c_done); c->wrstate = c_busy; /* It's okay to cast away constness here, uv_write() won't modify the * memory. */ buf.base = (char *) data; buf.len = len; CHECK(0 == uv_write(&c->write_req, &c->handle.stream, &buf, 1, conn_write_done)); conn_timer_reset(c); } static void conn_write_done(uv_write_t *req, int status) { conn *c; if (status == UV_ECANCELED) { return; /* Handle has been closed. */ } c = CONTAINER_OF(req, conn, write_req); ASSERT(c->wrstate == c_busy); c->wrstate = c_done; c->result = status; do_next(c->client); } static void conn_close(conn *c) { ASSERT(c->rdstate != c_dead); ASSERT(c->wrstate != c_dead); c->rdstate = c_dead; c->wrstate = c_dead; c->timer_handle.data = c; c->handle.handle.data = c; uv_close(&c->handle.handle, conn_close_done); uv_close((uv_handle_t *) &c->timer_handle, conn_close_done); } static void conn_close_done(uv_handle_t *handle) { conn *c; c = handle->data; do_next(c->client); }