Merge pull request #82 from plieven/readahead
[deb_libnfs.git] / lib / libnfs.c
index 830e28e4e2df75ec2d08e9e92641d683cb9cf9f5..41b8a2b35f01ea0b9a15cd0a575b7009476fa09d 100644 (file)
@@ -69,6 +69,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include "libnfs-zdr.h"
+#include "slist.h"
 #include "libnfs.h"
 #include "libnfs-raw.h"
 #include "libnfs-raw-mount.h"
 #include "libnfs-raw-portmap.h"
 #include "libnfs-private.h"
 
+#define MAX_DIR_CACHE 1024
+
 struct nfsdir {
+       struct nfs_fh3 fh;
+       fattr3 attr;
+       struct nfsdir *next;
+
        struct nfsdirent *entries;
        struct nfsdirent *current;
 };
 
+struct nfs_readahead {
+       uint64_t fh_offset;
+       uint64_t last_offset;
+       uint64_t buf_offset;
+       uint64_t buf_count;
+       time_t buf_ts;
+       void *buf;
+       uint32_t cur_ra;
+};
+
 struct nfsfh {
        struct nfs_fh3 fh;
        int is_sync;
        int is_append;
        uint64_t offset;
+       struct nfs_readahead ra;
 };
 
 struct nfs_context {
@@ -96,6 +114,7 @@ struct nfs_context {
        uint64_t readmax;
        uint64_t writemax;
        char *cwd;
+       struct nfsdir *dircache;
 };
 
 void nfs_free_nfsdir(struct nfsdir *nfsdir)
@@ -108,9 +127,39 @@ void nfs_free_nfsdir(struct nfsdir *nfsdir)
                free(nfsdir->entries);
                nfsdir->entries = dirent;
        }
+       free(nfsdir->fh.data.data_val);
        free(nfsdir);
 }
 
+static void nfs_dircache_add(struct nfs_context *nfs, struct nfsdir *nfsdir)
+{
+       int i;
+       LIBNFS_LIST_ADD(&nfs->dircache, nfsdir);
+
+       for (nfsdir = nfs->dircache, i = 0; nfsdir; nfsdir = nfsdir->next, i++) {
+               if (i > MAX_DIR_CACHE) {
+                       LIBNFS_LIST_REMOVE(&nfs->dircache, nfsdir);
+                       nfs_free_nfsdir(nfsdir);
+                       break;
+               }
+       }
+}
+
+static struct nfsdir *nfs_dircache_find(struct nfs_context *nfs, struct nfs_fh3 *fh)
+{
+       struct nfsdir *nfsdir;
+
+       for (nfsdir = nfs->dircache; nfsdir; nfsdir = nfsdir->next) {
+               if (nfsdir->fh.data.data_len == fh->data.data_len &&
+                   !memcmp(nfsdir->fh.data.data_val, fh->data.data_val, fh->data.data_len)) {
+                       LIBNFS_LIST_REMOVE(&nfs->dircache, nfsdir);
+                       return nfsdir;
+               }
+       }
+
+       return NULL;
+}
+
 struct nfs_cb_data;
 typedef int (*continue_func)(struct nfs_context *nfs, fattr3 *attr,
                             struct nfs_cb_data *data);
@@ -135,9 +184,8 @@ struct nfs_cb_data {
        int cancel;
        int oom;
        int num_calls;
-       uint64_t start_offset, max_offset, count;
+       uint64_t offset, count, max_offset, org_offset, org_count;
        char *buffer;
-       size_t request_size;
        char *usrbuf;
 };
 
@@ -182,12 +230,14 @@ char *nfs_get_error(struct nfs_context *nfs)
 
 static int nfs_set_context_args(struct nfs_context *nfs, char *arg, char *val)
 {
-       if (!strncmp(arg, "tcp-syncnt", 10)) {
+       if (!strcmp(arg, "tcp-syncnt")) {
                rpc_set_tcp_syncnt(nfs_get_rpc_context(nfs), atoi(val));
-       } else if (!strncmp(arg, "uid", 3)) {
+       } else if (!strcmp(arg, "uid")) {
                rpc_set_uid(nfs_get_rpc_context(nfs), atoi(val));
-       } else if (!strncmp(arg, "gid", 3)) {
+       } else if (!strcmp(arg, "gid")) {
                rpc_set_gid(nfs_get_rpc_context(nfs), atoi(val));
+       } else if (!strcmp(arg, "readahaed")) {
+               rpc_set_readahead(nfs_get_rpc_context(nfs), atoi(val));
        }
        return 0;
 }
@@ -379,6 +429,12 @@ void nfs_destroy_context(struct nfs_context *nfs)
                nfs->rootfh.data.data_val = NULL;
        }
 
+       while (nfs->dircache) {
+               struct nfsdir *nfsdir = nfs->dircache;
+               LIBNFS_LIST_REMOVE(&nfs->dircache, nfsdir);
+               nfs_free_nfsdir(nfsdir);
+       }
+
        free(nfs);
 }
 
@@ -1193,8 +1249,8 @@ static void nfs_stat_1_cb(struct rpc_context *rpc, int status, void *command_dat
         st.st_rdev    = 0;
         st.st_size    = res->GETATTR3res_u.resok.obj_attributes.size;
 #ifndef WIN32
-        st.st_blksize = 4096;
-        st.st_blocks  = res->GETATTR3res_u.resok.obj_attributes.size / 4096;
+        st.st_blksize = NFS_BLKSIZE;
+        st.st_blocks  = res->GETATTR3res_u.resok.obj_attributes.size / NFS_BLKSIZE;
 #endif//WIN32
         st.st_atime   = res->GETATTR3res_u.resok.obj_attributes.atime.seconds;
         st.st_mtime   = res->GETATTR3res_u.resok.obj_attributes.mtime.seconds;
@@ -1577,9 +1633,9 @@ static void nfs_pread_mcb(struct rpc_context *rpc, int status, void *command_dat
                        /* if we have more than one call or we have received a short read we need a reassembly buffer */
                        if (data->num_calls || (count < mdata->count && !res->READ3res_u.resok.eof)) {
                                if (data->buffer == NULL) {
-                                       data->buffer =  malloc(data->request_size);
+                                       data->buffer =  malloc(data->count);
                                        if (data->buffer == NULL) {
-                                               rpc_set_error(nfs->rpc, "Out-Of-Memory: Failed to allocate reassembly buffer for %d bytes", (int)data->request_size);
+                                               rpc_set_error(nfs->rpc, "Out-Of-Memory: Failed to allocate reassembly buffer for %d bytes", (int)data->count);
                                                data->oom = 1;
                                        }
                                }
@@ -1588,7 +1644,7 @@ static void nfs_pread_mcb(struct rpc_context *rpc, int status, void *command_dat
                                if (count <= mdata->count) {
                                        /* copy data into reassembly buffer if we have one */
                                        if (data->buffer != NULL) {
-                                               memcpy(&data->buffer[mdata->offset - data->start_offset], res->READ3res_u.resok.data.data_val, count);
+                                               memcpy(&data->buffer[mdata->offset - data->offset], res->READ3res_u.resok.data.data_val, count);
                                        }
                                        if (data->max_offset < mdata->offset + count) {
                                                data->max_offset = mdata->offset + count;
@@ -1644,14 +1700,35 @@ static void nfs_pread_mcb(struct rpc_context *rpc, int status, void *command_dat
        }
 
        if (data->buffer) {
-               data->cb(data->max_offset - data->start_offset, nfs, data->buffer, data->private_data);
+               if (data->max_offset > data->org_offset + data->org_count) {
+                       data->max_offset = data->org_offset + data->org_count;
+               }
+               data->cb(data->max_offset - data->org_offset, nfs, data->buffer + (data->org_offset - data->offset), data->private_data);
        } else {
                data->cb(res->READ3res_u.resok.count, nfs, res->READ3res_u.resok.data.data_val, data->private_data);
        }
 
+       data->nfsfh->ra.fh_offset = data->max_offset;
+       if (data->nfsfh->ra.cur_ra) {
+               free(data->nfsfh->ra.buf);
+               data->nfsfh->ra.buf = data->buffer;
+               data->nfsfh->ra.buf_offset = data->offset;
+               data->nfsfh->ra.buf_count = data->count;
+               data->nfsfh->ra.buf_ts = time(NULL);
+               data->buffer = NULL;
+       }
        free_nfs_cb_data(data);
 }
 
+static void nfs_ra_invalidate(struct nfsfh *nfsfh) {
+       free(nfsfh->ra.buf);
+       nfsfh->ra.buf = NULL;
+       nfsfh->ra.buf_offset = 0;
+       nfsfh->ra.buf_count = 0;
+       nfsfh->ra.buf_ts = time(NULL);
+       nfsfh->ra.cur_ra = NFS_BLKSIZE;
+}
+
 static int nfs_pread_async_internal(struct nfs_context *nfs, struct nfsfh *nfsfh, uint64_t offset, uint64_t count, nfs_cb cb, void *private_data, int update_pos)
 {
        struct nfs_cb_data *data;
@@ -1666,16 +1743,78 @@ static int nfs_pread_async_internal(struct nfs_context *nfs, struct nfsfh *nfsfh
        data->cb           = cb;
        data->private_data = private_data;
        data->nfsfh        = nfsfh;
-       data->request_size = count;
+       data->org_offset   = offset;
+       data->org_count    = count;
 
        assert(data->num_calls == 0);
 
+       if (nfs->rpc->readahead && time(NULL) - nfsfh->ra.buf_ts > NFS_RA_TIMEOUT) {
+               /* readahead cache timeout */
+               nfs_ra_invalidate(nfsfh);
+       }
+
+       if (nfs->rpc->readahead) {
+               if (offset >= nfsfh->ra.last_offset &&
+                       offset - NFS_BLKSIZE <= nfsfh->ra.fh_offset + nfsfh->ra.cur_ra) {
+                       if (nfs->rpc->readahead > nfsfh->ra.cur_ra) {
+                               nfsfh->ra.cur_ra <<= 1;
+                       }
+               } else {
+                       nfsfh->ra.cur_ra = NFS_BLKSIZE;
+               }
+
+               nfsfh->ra.last_offset = offset;
+
+               if (nfsfh->ra.buf_offset <= offset &&
+                       nfsfh->ra.buf_offset + nfsfh->ra.buf_count >= offset + count) {
+                       /* serve request completely from cache */
+                       data->buffer = malloc(count);
+                       if (data->buffer == NULL) {
+                               free_nfs_cb_data(data);
+                               return -ENOMEM;
+                       }
+                       memcpy(data->buffer, nfsfh->ra.buf + (offset - nfsfh->ra.buf_offset), count);
+                       data->cb(count, nfs, data->buffer, data->private_data);
+                       nfsfh->ra.fh_offset = offset + count;
+                       free_nfs_cb_data(data);
+                       return 0;
+               }
+
+               /* align start offset to blocksize */
+               count += offset & (NFS_BLKSIZE - 1);
+               offset &= ~(NFS_BLKSIZE - 1);
+
+               /* align end offset to blocksize and add readahead */
+               count += nfsfh->ra.cur_ra - 1;
+               count &= ~(NFS_BLKSIZE - 1);
+
+               data->buffer = malloc(count);
+               if (data->buffer == NULL) {
+                       free_nfs_cb_data(data);
+                       return -ENOMEM;
+               }
+               data->offset = offset;
+               data->count = count;
+
+               if (nfsfh->ra.buf_count && nfsfh->ra.buf_offset <= offset &&
+                       nfsfh->ra.buf_offset + nfsfh->ra.buf_count >= offset) {
+                       /* serve request partially from cache */
+                       size_t overlap = (nfsfh->ra.buf_offset + nfsfh->ra.buf_count) - offset;
+                       if (overlap > count) count = overlap;
+                       memcpy(data->buffer, nfsfh->ra.buf + (offset - nfsfh->ra.buf_offset), overlap);
+                       offset += overlap;
+                       count -= overlap;
+               }
+       } else {
+               data->offset = offset;
+               data->count = count;
+       }
+
+       data->max_offset = offset;
+
        /* chop requests into chunks of at most READMAX bytes if necessary.
         * we send all reads in parallel so that performance is still good.
         */
-       data->max_offset = offset;
-       data->start_offset = offset;
-
        do {
                uint64_t readcount = count;
                struct nfs_mcb_data *mdata;
@@ -1794,7 +1933,7 @@ static void nfs_pwrite_mcb(struct rpc_context *rpc, int status, void *command_da
                                        mdata->count -= count;
 
                                        nfs_fill_WRITE3args(&args, data->nfsfh, mdata->offset, mdata->count,
-                                                                               &data->usrbuf[mdata->offset - data->start_offset]);
+                                                                               &data->usrbuf[mdata->offset - data->offset]);
                                        if (rpc_nfs3_write_async(nfs->rpc, nfs_pwrite_mcb, &args, mdata) == 0) {
                                                data->num_calls++;
                                                return;
@@ -1834,7 +1973,7 @@ static void nfs_pwrite_mcb(struct rpc_context *rpc, int status, void *command_da
                return;
        }
 
-       data->cb(data->max_offset - data->start_offset, nfs, NULL, data->private_data);
+       data->cb(data->max_offset - data->offset, nfs, NULL, data->private_data);
 
        free_nfs_cb_data(data);
 }
@@ -1863,7 +2002,7 @@ static int nfs_pwrite_async_internal(struct nfs_context *nfs, struct nfsfh *nfsf
         * we send all writes in parallel so that performance is still good.
         */
        data->max_offset = offset;
-       data->start_offset = offset;
+       data->offset = offset;
 
        do {
                uint64_t writecount = count;
@@ -1890,7 +2029,7 @@ static int nfs_pwrite_async_internal(struct nfs_context *nfs, struct nfsfh *nfsf
                mdata->count  = writecount;
                mdata->update_pos = update_pos;
 
-               nfs_fill_WRITE3args(&args, nfsfh, offset, writecount, &buf[offset - data->start_offset]);
+               nfs_fill_WRITE3args(&args, nfsfh, offset, writecount, &buf[offset - data->offset]);
 
                if (rpc_nfs3_write_async(nfs->rpc, nfs_pwrite_mcb, &args, mdata) != 0) {
                        rpc_set_error(nfs->rpc, "RPC error: Failed to send WRITE call for %s", data->path);
@@ -1956,6 +2095,7 @@ static void nfs_write_append_cb(struct rpc_context *rpc, int status, void *comma
 
 int nfs_write_async(struct nfs_context *nfs, struct nfsfh *nfsfh, uint64_t count, char *buf, nfs_cb cb, void *private_data)
 {
+       nfs_ra_invalidate(nfsfh);
        if (nfsfh->is_append) {
                struct GETATTR3args args;
                struct nfs_cb_data *data;
@@ -1999,6 +2139,7 @@ int nfs_close_async(struct nfs_context *nfs, struct nfsfh *nfsfh, nfs_cb cb, voi
                free(nfsfh->fh.data.data_val);
                nfsfh->fh.data.data_val = NULL;
        }
+       free(nfsfh->ra.buf);
        free(nfsfh);
 
        cb(0, nfs, NULL, private_data);
@@ -2817,6 +2958,7 @@ static void nfs_opendir3_cb(struct rpc_context *rpc, int status, void *command_d
                        nfsdirent->ctime.tv_usec = attributes->ctime.nseconds/1000;
                        nfsdirent->uid = attributes->uid;
                        nfsdirent->gid = attributes->gid;
+                       nfsdirent->nlink = attributes->nlink;
                }
        }
 
@@ -2923,6 +3065,9 @@ static void nfs_opendir2_cb(struct rpc_context *rpc, int status, void *command_d
                return;
        }
 
+       if (res->READDIR3res_u.resok.dir_attributes.attributes_follow)
+               nfsdir->attr = res->READDIR3res_u.resok.dir_attributes.post_op_attr_u.attributes;
+
        /* steal the dirhandle */
        nfsdir->current = nfsdir->entries;
 
@@ -3054,6 +3199,7 @@ static void nfs_opendir_cb(struct rpc_context *rpc, int status, void *command_da
                        nfsdirent->ctime.tv_usec = entry->name_attributes.post_op_attr_u.attributes.ctime.nseconds/1000;
                        nfsdirent->uid = entry->name_attributes.post_op_attr_u.attributes.uid;
                        nfsdirent->gid = entry->name_attributes.post_op_attr_u.attributes.gid;
+                       nfsdirent->nlink = entry->name_attributes.post_op_attr_u.attributes.nlink;
                }
 
                nfsdirent->next  = nfsdir->entries;
@@ -3083,6 +3229,9 @@ static void nfs_opendir_cb(struct rpc_context *rpc, int status, void *command_da
                return;
        }
 
+       if (res->READDIRPLUS3res_u.resok.dir_attributes.attributes_follow)
+               nfsdir->attr = res->READDIRPLUS3res_u.resok.dir_attributes.post_op_attr_u.attributes;
+
        /* steal the dirhandle */
        data->continue_data = NULL;
        nfsdir->current = nfsdir->entries;
@@ -3094,6 +3243,31 @@ static void nfs_opendir_cb(struct rpc_context *rpc, int status, void *command_da
 static int nfs_opendir_continue_internal(struct nfs_context *nfs, fattr3 *attr _U_, struct nfs_cb_data *data)
 {
        READDIRPLUS3args args;
+       struct nfsdir *nfsdir = data->continue_data;;
+       struct nfsdir *cached;
+
+       cached = nfs_dircache_find(nfs, &data->fh);
+       if (cached) {
+               if (attr && attr->mtime.seconds == cached->attr.mtime.seconds) {
+                       cached->current = cached->entries;
+                       data->cb(0, nfs, cached, data->private_data);
+                       free_nfs_cb_data(data);
+                       return 0;
+               } else {
+                       /* cache must be stale */
+                       nfs_free_nfsdir(cached);
+               }
+       }
+
+       nfsdir->fh.data.data_len  = data->fh.data.data_len;
+       nfsdir->fh.data.data_val = malloc(nfsdir->fh.data.data_len);
+       if (nfsdir->fh.data.data_val == NULL) {
+               rpc_set_error(nfs->rpc, "OOM when allocating fh for nfsdir");
+               data->cb(-ENOMEM, nfs, rpc_get_error(nfs->rpc), data->private_data);
+               free_nfs_cb_data(data);
+               return -1;
+       }
+       memcpy(nfsdir->fh.data.data_val, data->fh.data.data_val, data->fh.data.data_len);
 
        args.dir = data->fh;
        args.cookie = 0;
@@ -3143,9 +3317,9 @@ struct nfsdirent *nfs_readdir(struct nfs_context *nfs _U_, struct nfsdir *nfsdir
 /*
  * closedir()
  */
-void nfs_closedir(struct nfs_context *nfs _U_, struct nfsdir *nfsdir)
+void nfs_closedir(struct nfs_context *nfs, struct nfsdir *nfsdir)
 {
-       nfs_free_nfsdir(nfsdir);
+       nfs_dircache_add(nfs, nfsdir);
 }
 
 
@@ -3294,11 +3468,11 @@ static void nfs_statvfs_1_cb(struct rpc_context *rpc, int status, void *command_
                return;
        }
 
-       svfs.f_bsize   = 4096;
-       svfs.f_frsize  = 4096;
-       svfs.f_blocks  = res->FSSTAT3res_u.resok.tbytes/4096;
-       svfs.f_bfree   = res->FSSTAT3res_u.resok.fbytes/4096;
-       svfs.f_bavail  = res->FSSTAT3res_u.resok.abytes/4096;
+       svfs.f_bsize   = NFS_BLKSIZE;
+       svfs.f_frsize  = NFS_BLKSIZE;
+       svfs.f_blocks  = res->FSSTAT3res_u.resok.tbytes/NFS_BLKSIZE;
+       svfs.f_bfree   = res->FSSTAT3res_u.resok.fbytes/NFS_BLKSIZE;
+       svfs.f_bavail  = res->FSSTAT3res_u.resok.abytes/NFS_BLKSIZE;
        svfs.f_files   = res->FSSTAT3res_u.resok.tfiles;
        svfs.f_ffree   = res->FSSTAT3res_u.resok.ffiles;
 #if !defined(ANDROID)
@@ -4336,6 +4510,10 @@ void nfs_set_gid(struct nfs_context *nfs, int gid) {
        rpc_set_gid(nfs->rpc, gid);
 }
 
+void nfs_set_readahead(struct nfs_context *nfs, uint32_t v) {
+       rpc_set_readahead(nfs->rpc, v);
+}
+
 void nfs_set_error(struct nfs_context *nfs, char *error_string, ...)
 {
         va_list ap;