From 1744ef90110e99efc5b3c10953ff664b7276517c Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 4 Oct 2011 11:25:10 +1100 Subject: [PATCH] Autoreconnect: autoreconnect was completely broken. Reimplement it so that it reconnects properly on session failure. --- include/libnfs-private.h | 7 +++ lib/libnfs-sync.c | 13 ----- lib/libnfs.c | 4 +- lib/socket.c | 118 ++++++++++++++++++++++++++++----------- 4 files changed, 95 insertions(+), 47 deletions(-) diff --git a/include/libnfs-private.h b/include/libnfs-private.h index 4c8594c..85cc8f9 100644 --- a/include/libnfs-private.h +++ b/include/libnfs-private.h @@ -45,6 +45,10 @@ struct rpc_context { int is_udp; struct sockaddr *udp_dest; int is_broadcast; + + /* track the address we connect to so we can auto-reconnect on session failure */ + struct sockaddr_storage s; + int auto_reconnect; }; struct rpc_pdu { @@ -85,3 +89,6 @@ int rpc_set_udp_destination(struct rpc_context *rpc, char *addr, int port, int i struct rpc_context *rpc_init_udp_context(void); struct sockaddr *rpc_get_recv_sockaddr(struct rpc_context *rpc); +void rpc_set_autoreconnect(struct rpc_context *rpc); +void rpc_unset_autoreconnect(struct rpc_context *rpc); + diff --git a/lib/libnfs-sync.c b/lib/libnfs-sync.c index 97d5a61..0340eda 100644 --- a/lib/libnfs-sync.c +++ b/lib/libnfs-sync.c @@ -105,19 +105,6 @@ static void wait_for_nfs_reply(struct nfs_context *nfs, struct sync_cb_data *cb_ cb_data->status = -EIO; break; } - if (nfs_get_fd(nfs) == -1) { - char *server = strdup(nfs_get_server(nfs)); - char *export = strdup(nfs_get_export(nfs)); - - if (nfs_mount(nfs, server, export) != 0) { - nfs_set_error(nfs, "Failed to reconnect to nfs server %s", nfs_get_error(nfs)); - free(server); - free(export); - break; - } - free(server); - free(export); - } } } diff --git a/lib/libnfs.c b/lib/libnfs.c index c1b4962..2f7ce09 100644 --- a/lib/libnfs.c +++ b/lib/libnfs.c @@ -352,6 +352,8 @@ static void nfs_mount_6_cb(struct rpc_context *rpc, int status, void *command_da free_nfs_cb_data(data); return; } + /* NFS TCP connections we want to autoreconnect after sessions are torn down (due to inactivity or error) */ + rpc_set_autoreconnect(rpc); } @@ -499,12 +501,10 @@ int nfs_mount_async(struct nfs_context *nfs, const char *server, const char *exp new_export = strdup(export); if (nfs->server != NULL) { free(nfs->server); - nfs->server = NULL; } nfs->server = new_server; if (nfs->export != NULL) { free(nfs->export); - nfs->export = NULL; } nfs->export = new_export; data->nfs = nfs; diff --git a/lib/socket.c b/lib/socket.c index bc101e7..00c65dd 100644 --- a/lib/socket.c +++ b/lib/socket.c @@ -53,7 +53,8 @@ #endif -static int rpc_disconnect_requeue(struct rpc_context *rpc); +static int rpc_reconnect_requeue(struct rpc_context *rpc); +static int rpc_connect_sockaddr_async(struct rpc_context *rpc, struct sockaddr_storage *s); static void set_nonblocking(int fd) { @@ -310,7 +311,7 @@ int rpc_service(struct rpc_context *rpc, int revents) if (revents & POLLIN) { if (rpc_read_from_socket(rpc) != 0) { - rpc_disconnect_requeue(rpc); + rpc_reconnect_requeue(rpc); return 0; } } @@ -325,37 +326,28 @@ int rpc_service(struct rpc_context *rpc, int revents) return 0; } -int rpc_connect_async(struct rpc_context *rpc, const char *server, int port, rpc_cb cb, void *private_data) +void rpc_set_autoreconnect(struct rpc_context *rpc) { - struct sockaddr_storage s; - struct sockaddr_in *sin = (struct sockaddr_in *)&s; - int socksize; - - if (rpc->fd != -1) { - rpc_set_error(rpc, "Trying to connect while already connected"); - return -1; - } + rpc->auto_reconnect = 1; +} - if (rpc->is_udp != 0) { - rpc_set_error(rpc, "Trying to connect on UDP socket"); - return -1; - } +void rpc_unset_autoreconnect(struct rpc_context *rpc) +{ + rpc->auto_reconnect = 0; +} - sin->sin_family = AF_INET; - sin->sin_port = htons(port); - if (inet_pton(AF_INET, server, &sin->sin_addr) != 1) { - rpc_set_error(rpc, "Not a valid server ip address"); - return -1; - } +static int rpc_connect_sockaddr_async(struct rpc_context *rpc, struct sockaddr_storage *s) +{ + int socksize; - switch (s.ss_family) { + switch (s->ss_family) { case AF_INET: socksize = sizeof(struct sockaddr_in); -#ifdef HAVE_SOCKADDR_LEN - sin->sin_len = socksize; -#endif rpc->fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); break; + default: + rpc_set_error(rpc, "Can not handle AF_FAMILY:%d", s->ss_family); + return -1; } if (rpc->fd == -1) { @@ -363,9 +355,6 @@ int rpc_connect_async(struct rpc_context *rpc, const char *server, int port, rpc return -1; } - rpc->connect_cb = cb; - rpc->connect_data = private_data; - #if !defined(WIN32) /* Some systems allow you to set capabilities on an executable @@ -409,9 +398,9 @@ int rpc_connect_async(struct rpc_context *rpc, const char *server, int port, rpc set_nonblocking(rpc->fd); #if defined(WIN32) - if (connect(rpc->fd, (struct sockaddr *)&s, socksize) == 0 && errno != EINPROGRESS ) + if (connect(rpc->fd, (struct sockaddr *)s, socksize) == 0 && errno != EINPROGRESS ) #else - if (connect(rpc->fd, (struct sockaddr *)&s, socksize) != 0 && errno != EINPROGRESS) + if (connect(rpc->fd, (struct sockaddr *)s, socksize) != 0 && errno != EINPROGRESS) #endif { rpc_set_error(rpc, "connect() to server failed"); @@ -421,8 +410,52 @@ int rpc_connect_async(struct rpc_context *rpc, const char *server, int port, rpc return 0; } +int rpc_connect_async(struct rpc_context *rpc, const char *server, int port, rpc_cb cb, void *private_data) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)&rpc->s; + + if (rpc->fd != -1) { + rpc_set_error(rpc, "Trying to connect while already connected"); + return -1; + } + + if (rpc->is_udp != 0) { + rpc_set_error(rpc, "Trying to connect on UDP socket"); + return -1; + } + + rpc->auto_reconnect = 0; + + sin->sin_family = AF_INET; + sin->sin_port = htons(port); + if (inet_pton(AF_INET, server, &sin->sin_addr) != 1) { + rpc_set_error(rpc, "Not a valid server ip address"); + return -1; + } + + + switch (rpc->s.ss_family) { + case AF_INET: +#ifdef HAVE_SOCKADDR_LEN + sin->sin_len = sizeof(struct sockaddr_in); +#endif + break; + } + + rpc->connect_cb = cb; + rpc->connect_data = private_data; + + if (rpc_connect_sockaddr_async(rpc, &rpc->s) != 0) { + return -1; + } + + return 0; +} + int rpc_disconnect(struct rpc_context *rpc, char *error) { + rpc_unset_autoreconnect(rpc); + if (rpc->fd != -1) { #if defined(WIN32) closesocket(rpc->fd); @@ -439,8 +472,18 @@ int rpc_disconnect(struct rpc_context *rpc, char *error) return 0; } -/* disconnect but do not error all PDUs, just move pdus in-flight back to the outqueue */ -static int rpc_disconnect_requeue(struct rpc_context *rpc) +static void reconnect_cb(struct rpc_context *rpc, int status, void *data _U_, void *private_data) +{ + if (status != RPC_STATUS_SUCCESS) { + rpc_error_all_pdus(rpc, "RPC ERROR: Failed to reconnect async"); + return; + } + + rpc->is_connected = 1; +} + +/* disconnect but do not error all PDUs, just move pdus in-flight back to the outqueue and reconnect */ +static int rpc_reconnect_requeue(struct rpc_context *rpc) { struct rpc_pdu *pdu; @@ -461,6 +504,17 @@ static int rpc_disconnect_requeue(struct rpc_context *rpc) for (pdu=rpc->waitpdu; pdu; pdu=pdu->next) { SLIST_REMOVE(&rpc->waitpdu, pdu); SLIST_ADD(&rpc->outqueue, pdu); + /* we have to re-send the whole pdu again */ + pdu->written = 0; + } + + if (rpc->auto_reconnect != 0) { + rpc->connect_cb = reconnect_cb; + + if (rpc_connect_sockaddr_async(rpc, &rpc->s) != 0) { + rpc_error_all_pdus(rpc, "RPC ERROR: Failed to reconnect async"); + return -1; + } } return 0; -- 2.34.1