/* include/net/sock.h */


struct sock
{
    /* Socket demultiplex comparisons on incoming packets. */
    __u32                   daddr;          /* Foreign IPv4 addr*/
    __u32                   rcv_saddr;      /* Bound local IPv4 addr*/
    __u16                   dport;          /* Destination port*/
    unsigned short          num;            /* Local port*/
    int                     bound_dev_if;   /* Bound device index if != 0*/

    /* Main hash linkage for various protocol lookup tables. */
    struct sock             *next;
    struct sock             **pprev;
    struct sock             *bind_next;
    struct sock             **bind_pprev;

    volatile unsigned char  state,          /* Connection state*/
                            zapped;         /* In ax25 & ipx means not linked       */
    __u16                   sport;          /* Source port*/

    unsigned short          family;         /* Address family*/
    unsigned char           reuse;          /* SO_REUSEADDR setting*/
    unsigned char           shutdown;
    atomic_t                refcnt;         /* Reference count*/

    socket_lock_t           lock;           /* Synchronizer...*/
    int                     rcvbuf;         /* Size of receive buffer in bytes      */

    wait_queue_head_t       *sleep;         /* Sock wait queue*/
    struct dst_entry        *dst_cache;     /* Destination cache*/
    rwlock_t                dst_lock;
    atomic_t                rmem_alloc;     /* Receive queue bytes committed        */
    struct sk_buff_head     receive_queue;  /* Incoming packets*/
    atomic_t                wmem_alloc;     /* Transmit queue bytes committed       */
    struct sk_buff_head     write_queue;    /* Packet sending queue*/
    atomic_t                omem_alloc;     /* "o" is "option" or "other" */        
    int                     wmem_queued;    /* Persistent queue size */
    int                     forward_alloc;  /* Space allocated forward. */
    __u32                   saddr;          /* Sending source*/
    unsigned int            allocation;     /* Allocation mode*/
    int                     sndbuf;         /* Size of send buffer in bytes*/
    struct sock             *prev;

    /* Not all are volatile, but some are, so we might as well say they all
    are.
             * XXX Make this a flag word -DaveM
             */
    volatile char           dead,
    done,
    urginline,
    keepopen,
    linger,
    destroy,
    no_check,
    broadcast,
    bsdism;
    unsigned char           debug;
    unsigned char           rcvtstamp;
    unsigned char           userlocks;
    int                     proc;
    unsigned long           lingertime;

    int                     hashent;
    struct sock             *pair;

    /* The backlog queue is special, it is always used with
             * the per-socket spinlock held and requires low latency
             * access.  Therefore we special case it's implementation.
             */
    struct
    {
        struct sk_buff *head;
        struct sk_buff *tail;
    }
    backlog;

    rwlock_t                callback_lock;

    /* Error queue, rarely used. */
    struct sk_buff_head     error_queue;

    struct proto            *prot;

    #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
        union
        {
            struct ipv6_pinfo       af_inet6;
        }
        net_pinfo;
    #endif

    union
    {
        struct tcp_opt          af_tcp;
        #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
            struct raw_opt          tp_raw4;
        #endif
        #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
            struct raw6_opt         tp_raw;
        #endif /* CONFIG_IPV6 */
        #if defined(CONFIG_SPX) || defined (CONFIG_SPX_MODULE)
            struct spx_opt          af_spx;
        #endif /* CONFIG_SPX */

    }
    tp_pinfo;

    int                     err, err_soft;  /* Soft holds errors that don't
                                                       cause failure but are the cause
                                                       of a persistent failure not just
                                                       'timed out' */
    unsigned short          ack_backlog;
    unsigned short          max_ack_backlog;
    __u32                   priority;
    unsigned short          type;
    unsigned char           localroute;     /* Route locally only */
    unsigned char           protocol;
    struct ucred            peercred;
    int                     rcvlowat;
    long                    rcvtimeo;
    long                    sndtimeo;

    #ifdef CONFIG_FILTER
        /* Socket Filtering Instructions */
        struct sk_filter        *filter;
    #endif /* CONFIG_FILTER */

    /* This is where all the private (optional) areas that don't
             * overlap will eventually live.
             */
    union
    {
        void *destruct_hook;
        struct unix_opt af_unix;
        #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
            struct inet_opt af_inet;
        #endif
        #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
            struct atalk_sock       af_at;
        #endif
        #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
            struct ipx_opt          af_ipx;
        #endif
        #if defined (CONFIG_DECNET) || defined(CONFIG_DECNET_MODULE)
            struct dn_scp           dn;
        #endif
        #if defined (CONFIG_PACKET) || defined(CONFIG_PACKET_MODULE)
            struct packet_opt       *af_packet;
        #endif
        #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
            x25_cb                  *x25;
        #endif
        #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
            ax25_cb                 *ax25;
        #endif
        #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
            nr_cb                   *nr;
        #endif
        #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
            rose_cb                 *rose;
        #endif
        #if defined(CONFIG_PPPOE) || defined(CONFIG_PPPOE_MODULE)
            struct pppox_opt        *pppox;
        #endif
        #ifdef CONFIG_NETLINK
            struct netlink_opt      *af_netlink;
        #endif
        #if defined(CONFIG_ECONET) || defined(CONFIG_ECONET_MODULE)
            struct econet_opt       *af_econet;
        #endif
        #if defined(CONFIG_ATM) || defined(CONFIG_ATM_MODULE)
            struct atm_vcc          *af_atm;
        #endif
        #if defined(CONFIG_IRDA) || defined(CONFIG_IRDA_MODULE)
            struct irda_sock        *irda;
        #endif
    }
    protinfo;


    /* This part is used for the timeout functions. */
    struct timer_list       timer;          /* This is the sock cleanup timer. */
    struct timeval          stamp;

    /* Identd and reporting IO signals */
    struct socket           *socket;

    /* RPC layer private data */
    void                    *user_data;

    /* Callbacks */
    void                    (*state_change)(struct sock *sk);
    void                    (*data_ready)(struct sock *sk,int bytes);
    void                    (*write_space)(struct sock *sk);
    void                    (*error_report)(struct sock *sk);

    int                     (*backlog_rcv) (struct sock *sk,
                                            struct sk_buff *skb);
    void                    (*destruct)(struct sock *sk);
};


struct tcp_opt
{
    int tcp_header_len; /* Bytes of tcp header to send      */

    /*
     *  Header prediction flags
     *  0x5?10 << 16 + snd_wnd in net byte order
     */
    __u32   pred_flags;

    /*
     *  RFC793 variables by their proper names. This means you can
     *  read the code and the spec side by side (and laugh ...)
     *  See RFC793 and RFC1122. The RFC writes these in capitals.
     */
    __u32   rcv_nxt;    /* What we want to receive next     */
    __u32   snd_nxt;    /* Next sequence we send        */

    __u32   snd_una;    /* First byte we want an ack for    */
    __u32   snd_sml;    /* Last byte of the most recently transmitted small packet */
    __u32   rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
    __u32   lsndtime;   /* timestamp of last sent data packet (for restart window) */

    /* Delayed ACK control data */
    struct
    {
        __u8    pending;    /* ACK is pending */
        __u8    quick;      /* Scheduled number of quick acks   */
        __u8    pingpong;   /* The session is interactive       */
        __u8    blocked;    /* Delayed ACK was blocked by socket lock*/
        __u32   ato;        /* Predicted tick of soft clock     */
        unsigned long timeout;  /* Currently scheduled timeout      */
        __u32   lrcvtime;   /* timestamp of last received data packet*/
        __u16   last_seg_size;  /* Size of last incoming segment    */
        __u16   rcv_mss;    /* MSS used for delayed ACK decisions   */
    }
    ack;

    /* Data for direct copy to user */
    struct
    {
        struct sk_buff_head prequeue;
        int         memory;
        struct task_struct  *task;
        struct iovec        *iov;
        int         len;
    }
    ucopy;

    __u32   snd_wl1;    /* Sequence for window update       */
    __u32   snd_wnd;    /* The window we expect to receive  */
    __u32   max_window; /* Maximal window ever seen from peer   */
    __u32   pmtu_cookie;    /* Last pmtu seen by socket     */
    __u16   mss_cache;  /* Cached effective mss, not including SACKS */
    __u16   mss_clamp;  /* Maximal mss, negotiated at connection setup */
    __u16   ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
    __u8    ca_state;   /* State of fast-retransmit machine     */
    __u8    retransmits;    /* Number of unrecovered RTO timeouts.  */

    __u8    reordering; /* Packet reordering metric.        */
    __u8    queue_shrunk;   /* Write queue has been shrunk recently.*/
    __u8    defer_accept;   /* User waits for some data after accept() */

    /* RTT measurement */
    __u8    backoff;    /* backoff              */
    __u32   srtt;       /* smothed round trip time << 3     */
    __u32   mdev;       /* medium deviation         */
    __u32   mdev_max;   /* maximal mdev for the last rtt period */
    __u32   rttvar;     /* smoothed mdev_max            */
    __u32   rtt_seq;    /* sequence number to update rttvar */
    __u32   rto;        /* retransmit timeout           */

    __u32   packets_out;    /* Packets which are "in flight"    */
    __u32   left_out;   /* Packets which leaved network     */
    __u32   retrans_out;    /* Retransmitted packets out        */


    /*
     *  Slow start and congestion control (see also Nagle, and Karn & Partridge)
     */
    __u32   snd_ssthresh;   /* Slow start size threshold        */
    __u32   snd_cwnd;   /* Sending congestion window        */
    __u16   snd_cwnd_cnt;   /* Linear increase counter      */
    __u16   snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
    __u32   snd_cwnd_used;
    __u32   snd_cwnd_stamp;

    /* Two commonly used timers in both sender and receiver paths. */
    unsigned long       timeout;
    struct timer_list   retransmit_timer;   /* Resend (no ack)  */
    struct timer_list   delack_timer;       /* Ack delay        */

    struct sk_buff_head out_of_order_queue; /* Out of order segments go here */

    struct tcp_func     *af_specific;   /* Operations which are AF_INET{4,6} specific   */
    struct sk_buff      *send_head; /* Front of stuff to transmit           */

    __u32   rcv_wnd;    /* Current receiver window      */
    __u32   rcv_wup;    /* rcv_nxt on last window update sent   */
    __u32   write_seq;  /* Tail(+1) of data held in tcp send buffer */
    __u32   pushed_seq; /* Last pushed seq, required to talk to windows */
    __u32   copied_seq; /* Head of yet unread data      */
    /*
     *      Options received (usually on last packet, some only on SYN packets).
     */
    char    tstamp_ok,  /* TIMESTAMP seen on SYN packet     */
    wscale_ok,  /* Wscale seen on SYN packet        */
    sack_ok;    /* SACK seen on SYN packet      */
    char    saw_tstamp; /* Saw TIMESTAMP on last packet     */
    __u8    snd_wscale; /* Window scaling received from sender  */
    __u8    rcv_wscale; /* Window scaling to send to receiver   */
    __u8    nonagle;    /* Disable Nagle algorithm?             */
    __u8    keepalive_probes; /* num of allowed keep alive probes   */

    /*  PAWS/RTTM data  */
    __u32   rcv_tsval;  /* Time stamp value                 */
    __u32   rcv_tsecr;  /* Time stamp echo reply            */
    __u32   ts_recent;  /* Time stamp to echo next      */
    long    ts_recent_stamp;/* Time we stored ts_recent (for aging) */

    /*  SACKs data  */
    __u16   user_mss;   /* mss requested by user in ioctl */
    __u8    dsack;      /* D-SACK is scheduled          */
    __u8    eff_sacks;  /* Size of SACK array to send with next packet */
    struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
    struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/

    __u32   window_clamp;   /* Maximal window to advertise      */
    __u32   rcv_ssthresh;   /* Current window clamp         */
    __u8    probes_out; /* unanswered 0 window probes       */
    __u8    num_sacks;  /* Number of SACK blocks        */
    __u16   advmss;     /* Advertised MSS           */

    __u8    syn_retries;    /* num of allowed syn retries */
    __u8    ecn_flags;  /* ECN status bits.         */
    __u16   prior_ssthresh; /* ssthresh saved at recovery start */
    __u32   lost_out;   /* Lost packets             */
    __u32   sacked_out; /* SACK'd packets           */
    __u32   fackets_out;    /* FACK'd packets           */
    __u32   high_seq;   /* snd_nxt at onset of congestion   */

    __u32   retrans_stamp;  /* Timestamp of the last retransmit,
                     * also used in SYN-SENT to remember stamp of
                     * the first SYN. */
    __u32   undo_marker;    /* tracking retrans started here. */
    int undo_retrans;   /* number of undoable retransmissions. */
    __u32   syn_seq;    /* Seq of received SYN. */
    __u32   fin_seq;    /* Seq of received FIN. */
    __u32   urg_seq;    /* Seq of received urgent pointer */
    __u16   urg_data;   /* Saved octet of OOB data and control flags */
    __u8    pending;    /* Scheduled timer event    */
    __u8    urg_mode;   /* In urgent mode       */
    __u32   snd_up;     /* Urgent pointer       */

    /* The syn_wait_lock is necessary only to avoid tcp_get_info having
         * to grab the main lock sock while browsing the listening hash
         * (otherwise it's deadlock prone).
         * This lock is acquired in read mode only from tcp_get_info() and
         * it's acquired in write mode _only_ from code that is actively
         * changing the syn_wait_queue. All readers that are holding
         * the master sock lock don't need to grab this lock in read mode
         * too as the syn_wait_queue writes are always protected from
         * the main sock lock.
         */
    rwlock_t        syn_wait_lock;
    struct tcp_listen_opt   *listen_opt;

    /* FIFO of established children */
    struct open_request *accept_queue;
    struct open_request *accept_queue_tail;

    int         write_pending;  /* A write to socket waits to start. */

    unsigned int        keepalive_time;   /* time before keep alive takes place */
    unsigned int        keepalive_intvl;  /* time interval between keep alive probes */
    int         linger2;
};


struct proto
{
    void            (*close)(struct sock *sk,
                             long timeout);
    int         (*connect)(struct sock *sk,
                           struct sockaddr *uaddr,
                           int addr_len);
    int         (*disconnect)(struct sock *sk, int flags);

    struct sock *       (*accept) (struct sock *sk, int flags, int *err);

    int         (*ioctl)(struct sock *sk, int cmd,
                         unsigned long arg);
    int         (*init)(struct sock *sk);
    int         (*destroy)(struct sock *sk);
    void            (*shutdown)(struct sock *sk, int how);
    int         (*setsockopt)(struct sock *sk, int level,
                              int optname, char *optval, int optlen);
    int         (*getsockopt)(struct sock *sk, int level,
                              int optname, char *optval,
                              int *option);
    int         (*sendmsg)(struct sock *sk, struct msghdr *msg,
                           int len);
    int         (*recvmsg)(struct sock *sk, struct msghdr *msg,
                           int len, int noblock, int flags,
                           int *addr_len);
    int         (*bind)(struct sock *sk,
                        struct sockaddr *uaddr, int addr_len);

    int         (*backlog_rcv) (struct sock *sk,
                                struct sk_buff *skb);

    /* Keeping track of sk's, looking them up, and port selection methods. */
    void            (*hash)(struct sock *sk);
    void            (*unhash)(struct sock *sk);
    int         (*get_port)(struct sock *sk, unsigned short snum);

    char            name[32];

    struct
    {
        int inuse;
        u8  __pad[SMP_CACHE_BYTES - sizeof(int)];
    }
    stats[NR_CPUS];
};