Blob Blame History Raw
/*
  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
  This file is part of GlusterFS.

  This file is licensed to you under your choice of the GNU Lesser
  General Public License, version 3 or any later version (LGPLv3 or
  later), or the GNU General Public License, version 2 (GPLv2), in all
  cases as published by the Free Software Foundation.
*/

#ifndef _SOCKET_H
#define _SOCKET_H

#include <openssl/ssl.h>
#include <openssl/err.h>
#include <openssl/x509v3.h>
#ifdef HAVE_OPENSSL_DH_H
#include <openssl/dh.h>
#endif
#ifdef HAVE_OPENSSL_ECDH_H
#include <openssl/ecdh.h>
#endif

#include <glusterfs/gf-event.h>
#include "rpc-transport.h"
#include <glusterfs/logging.h>
#include <glusterfs/dict.h>
#include <glusterfs/mem-pool.h>
#include <glusterfs/globals.h>
#include <glusterfs/refcount.h>

#ifndef MAX_IOVEC
#define MAX_IOVEC 16
#endif /* MAX_IOVEC */

#define GF_DEFAULT_SOCKET_LISTEN_PORT GF_DEFAULT_BASE_PORT

#define RPC_MAX_FRAGMENT_SIZE 0x7fffffff

/* The default window size will be 0, indicating not to set
 * it to any size. Default size of Linux is found to be
 * performance friendly.
 * Linux allows us to over-ride the max values for the system.
 * Should we over-ride them? Because if we set a value larger than the default
 * setsockopt will fail. Having larger values might be beneficial for
 * IB links.
 */
#define GF_DEFAULT_SOCKET_WINDOW_SIZE (0)
#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
#define GF_MIN_SOCKET_WINDOW_SIZE (0)
#define GF_USE_DEFAULT_KEEPALIVE (-1)

#define GF_KEEPALIVE_TIME (20)
#define GF_KEEPALIVE_INTERVAL (2)
#define GF_KEEPALIVE_COUNT (9)

typedef enum {
    SP_STATE_NADA = 0,
    SP_STATE_COMPLETE,
    SP_STATE_READING_FRAGHDR,
    SP_STATE_READ_FRAGHDR,
    SP_STATE_READING_FRAG,
} sp_rpcrecord_state_t;

typedef enum {
    SP_STATE_RPCFRAG_INIT,
    SP_STATE_READING_MSGTYPE,
    SP_STATE_READ_MSGTYPE,
    SP_STATE_NOTIFYING_XID
} sp_rpcfrag_state_t;

typedef enum {
    SP_STATE_SIMPLE_MSG_INIT,
    SP_STATE_READING_SIMPLE_MSG,
} sp_rpcfrag_simple_msg_state_t;

typedef enum {
    SP_STATE_VECTORED_REQUEST_INIT,
    SP_STATE_READING_CREDBYTES,
    SP_STATE_READ_CREDBYTES, /* read credential data. */
    SP_STATE_READING_VERFBYTES,
    SP_STATE_READ_VERFBYTES, /* read verifier data */
    SP_STATE_READING_PROGHDR,
    SP_STATE_READ_PROGHDR,
    SP_STATE_READING_PROGHDR_XDATA,
    SP_STATE_READ_PROGHDR_XDATA, /* It's a bad "name" in the generic
                                    RPC state machine, but greatly
                                    aids code review (and xdata is
                                    the only "consumer" of this state)
                                 */
    SP_STATE_READING_PROG,
} sp_rpcfrag_vectored_request_state_t;

typedef enum {
    SP_STATE_REQUEST_HEADER_INIT,
    SP_STATE_READING_RPCHDR1,
    SP_STATE_READ_RPCHDR1, /* read msg from beginning till and
                            * including credlen
                            */
} sp_rpcfrag_request_header_state_t;

struct ioq {
    union {
        struct list_head list;
        struct {
            struct ioq *next;
            struct ioq *prev;
        };
    };

    struct iovec vector[MAX_IOVEC];
    int count;
    struct iovec *pending_vector;
    int pending_count;
    struct iobref *iobref;
    uint32_t fraghdr;
};

typedef struct {
    sp_rpcfrag_request_header_state_t header_state;
    sp_rpcfrag_vectored_request_state_t vector_state;
    int vector_sizer_state;
} sp_rpcfrag_request_state_t;

typedef enum {
    SP_STATE_VECTORED_REPLY_STATUS_INIT,
    SP_STATE_READING_REPLY_STATUS,
    SP_STATE_READ_REPLY_STATUS,
} sp_rpcfrag_vectored_reply_status_state_t;

typedef enum {
    SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT,
    SP_STATE_READING_PROC_HEADER,
    SP_STATE_READING_PROC_OPAQUE,
    SP_STATE_READ_PROC_OPAQUE,
    SP_STATE_READ_PROC_HEADER,
} sp_rpcfrag_vectored_reply_accepted_success_state_t;

typedef enum {
    SP_STATE_ACCEPTED_REPLY_INIT,
    SP_STATE_READING_REPLY_VERFLEN,
    SP_STATE_READ_REPLY_VERFLEN,
    SP_STATE_READING_REPLY_VERFBYTES,
    SP_STATE_READ_REPLY_VERFBYTES,
} sp_rpcfrag_vectored_reply_accepted_state_t;

typedef struct {
    uint32_t accept_status;
    sp_rpcfrag_vectored_reply_status_state_t status_state;
    sp_rpcfrag_vectored_reply_accepted_state_t accepted_state;
    sp_rpcfrag_vectored_reply_accepted_success_state_t accepted_success_state;
} sp_rpcfrag_vectored_reply_state_t;

struct gf_sock_incoming_frag {
    char *fragcurrent;
    uint32_t bytes_read;
    uint32_t remaining_size;
    struct iovec vector;
    struct iovec *pending_vector;
    union {
        sp_rpcfrag_request_state_t request;
        sp_rpcfrag_vectored_reply_state_t reply;
    } call_body;

    sp_rpcfrag_simple_msg_state_t simple_state;
    sp_rpcfrag_state_t state;
};

#define GF_SOCKET_RA_MAX 1024

struct gf_sock_incoming {
    char *proghdr_base_addr;
    struct iobuf *iobuf;
    size_t iobuf_size;
    int count;
    struct gf_sock_incoming_frag frag;
    struct iovec vector[2];
    struct iovec payload_vector;
    struct iobref *iobref;
    rpc_request_info_t *request_info;
    struct iovec *pending_vector;
    int pending_count;
    size_t total_bytes_read;

    size_t ra_read;
    size_t ra_max;
    size_t ra_served;
    char *ra_buf;
    uint32_t fraghdr;
    char complete_record;
    msg_type_t msg_type;
    sp_rpcrecord_state_t record_state;
};

typedef enum {
    OT_IDLE,       /* Uninitialized or termination complete. */
    OT_SPAWNING,   /* Past pthread_create but not in thread yet. */
    OT_RUNNING,    /* Poller thread running normally. */
    OT_CALLBACK,   /* Poller thread in the middle of a callback. */
    OT_PLEASE_DIE, /* Poller termination requested. */
} ot_state_t;

typedef struct {
    union {
        struct list_head ioq;
        struct {
            struct ioq *ioq_next;
            struct ioq *ioq_prev;
        };
    };
    pthread_mutex_t out_lock;
    pthread_mutex_t cond_lock;
    pthread_cond_t cond;
    pthread_t thread;
    int windowsize;
    int keepalive;
    int keepaliveidle;
    int keepaliveintvl;
    int keepalivecnt;
    int timeout;
    int log_ctr;
    /* ssl_error_required is used only during the SSL connection setup
     * phase.
     * It holds the error code returned by SSL_get_error() and is used to
     * arm the epoll event set for the required event for the specific fd.
     */
    int ssl_error_required;

    GF_REF_DECL; /* refcount to keep track of socket_poller
                    threads */
    struct {
        pthread_mutex_t lock;
        pthread_cond_t cond;
        uint64_t in_progress;
    } notify;
    int32_t sock;
    int32_t idx;
    int32_t gen;
    uint32_t backlog;
    SSL_METHOD *ssl_meth;
    SSL_CTX *ssl_ctx;
    int ssl_session_id;
    BIO *ssl_sbio;
    SSL *ssl_ssl;
    char *ssl_own_cert;
    char *ssl_private_key;
    char *ssl_ca_list;
    int pipe[2];
    struct gf_sock_incoming incoming;
    /* -1 = not connected. 0 = in progress. 1 = connected */
    char connected;
    /* 1 = connect failed for reasons other than EINPROGRESS/ENOENT
    see socket_connect for details */
    char connect_failed;
    char bio;
    char connect_finish_log;
    char submit_log;
    char lowlat;
    char nodelay;
    mgmt_ssl_t srvr_ssl;
    gf_boolean_t read_fail_log;
    gf_boolean_t ssl_enabled; /* outbound I/O */
    gf_boolean_t mgmt_ssl;    /* outbound mgmt */
    gf_boolean_t is_server;
    gf_boolean_t use_ssl;
    gf_boolean_t ssl_accepted;  /* To indicate SSL_accept() */
    gf_boolean_t ssl_connected; /* or SSL_connect() has been
                                 * been completed on this socket.
                                 * These are valid only when
                                 * use_ssl is true.
                                 */
    /* SSL_CTX is created for each transport. Since we are now using non-
     * blocking mechanism for SSL_accept() and SSL_connect(), the SSL
     * context is created on the first EPOLLIN event which may lead to
     * SSL_ERROR_WANT_READ/SSL_ERROR_WANT_WRITE and may not complete the
     * SSL connection at the first attempt.
     * ssl_context_created is a flag to note that we've created the SSL
     * context for the connection so that we don't blindly create any more
     * while !ssl_accepted or !ssl_connected.
     */
    gf_boolean_t ssl_context_created;
    gf_boolean_t accepted; /* explicit flag to be set in
                            * socket_event_handler() for
                            * newly accepted socket
                            */

} socket_private_t;

#endif