root/branches/orange-next/src/kernel/linux-2.6/pvfs2-utils.c @ 8994

Revision 8994, 74.6 KB (checked in by dcypher, 22 months ago)

replaced %llu->%s for (PVFS|TROVE)_handle

Line 
1/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6#define  __PINT_PROTO_ENCODE_OPAQUE_HANDLE
7#include "pvfs2-kernel.h"
8#include "pvfs2-types.h"
9#include "pint-dev-shared.h"
10#include "pvfs2-dev-proto.h"
11#include "pvfs2-bufmap.h"
12#include "pvfs2-internal.h"
13#include "../../common/misc/pvfs2-handle-to-str.h"
14
15int pvfs2_gen_credentials(
16    PVFS_credentials *credentials)
17{
18    int ret = -1;
19
20    if (credentials)
21    {
22        memset(credentials, 0, sizeof(PVFS_credentials));
23#ifdef HAVE_CURRENT_FSUID
24        credentials->uid = current_fsuid();
25        credentials->gid = current_fsgid();
26#else
27        credentials->uid = current->fsuid;
28        credentials->gid = current->fsgid;
29#endif
30
31        ret = 0;
32    }
33    return ret;
34}
35
36PVFS_fs_id fsid_of_op(pvfs2_kernel_op_t *op)
37{
38    PVFS_fs_id fsid = PVFS_FS_ID_NULL;
39    if (op)
40    {
41        switch (op->upcall.type)
42        {
43            case PVFS2_VFS_OP_FILE_IO:
44                fsid = op->upcall.req.io.refn.fs_id;
45                break;
46            case PVFS2_VFS_OP_LOOKUP:
47                fsid = op->upcall.req.lookup.parent_refn.fs_id;
48                break;
49            case PVFS2_VFS_OP_CREATE:
50                fsid = op->upcall.req.create.parent_refn.fs_id;
51                break;
52            case PVFS2_VFS_OP_GETATTR:
53                fsid = op->upcall.req.getattr.refn.fs_id;
54                break;
55            case PVFS2_VFS_OP_REMOVE:
56                fsid = op->upcall.req.remove.parent_refn.fs_id;
57                break;
58            case PVFS2_VFS_OP_MKDIR:
59                fsid = op->upcall.req.mkdir.parent_refn.fs_id;
60                break;
61            case PVFS2_VFS_OP_READDIR:
62                fsid = op->upcall.req.readdir.refn.fs_id;
63                break;
64            case PVFS2_VFS_OP_SETATTR:
65                fsid = op->upcall.req.setattr.refn.fs_id;
66                break;
67            case PVFS2_VFS_OP_SYMLINK:
68                fsid = op->upcall.req.sym.parent_refn.fs_id;
69                break;
70            case PVFS2_VFS_OP_RENAME:
71                fsid = op->upcall.req.rename.old_parent_refn.fs_id;
72                break;
73            case PVFS2_VFS_OP_STATFS:
74                fsid = op->upcall.req.statfs.fs_id;
75                break;
76            case PVFS2_VFS_OP_TRUNCATE:
77                fsid = op->upcall.req.truncate.refn.fs_id;
78                break;
79            case PVFS2_VFS_OP_MMAP_RA_FLUSH:
80                fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
81                break;
82            case PVFS2_VFS_OP_FS_UMOUNT:
83                fsid = op->upcall.req.fs_umount.fs_id;
84                break;
85            case PVFS2_VFS_OP_GETXATTR:
86                fsid = op->upcall.req.getxattr.refn.fs_id;
87                break;
88            case PVFS2_VFS_OP_SETXATTR:
89                fsid = op->upcall.req.setxattr.refn.fs_id;
90                break;
91            case PVFS2_VFS_OP_LISTXATTR:
92                fsid = op->upcall.req.listxattr.refn.fs_id;
93                break;
94            case PVFS2_VFS_OP_REMOVEXATTR:
95                fsid = op->upcall.req.removexattr.refn.fs_id;
96                break;
97            case PVFS2_VFS_OP_FSYNC:
98                fsid = op->upcall.req.fsync.refn.fs_id;
99                break;
100            default:
101                break;
102        }
103    }
104    return fsid;
105}
106
107static void pvfs2_set_inode_flags(struct inode *inode,
108        PVFS_sys_attr *attrs)
109{
110    if (attrs->flags & PVFS_IMMUTABLE_FL) {
111        inode->i_flags |= S_IMMUTABLE;
112    }
113    else {
114        inode->i_flags &= ~S_IMMUTABLE;
115    }
116    if (attrs->flags & PVFS_APPEND_FL) {
117        inode->i_flags |= S_APPEND;
118    }
119    else {
120        inode->i_flags &= ~S_APPEND;
121    }
122    if (attrs->flags & PVFS_NOATIME_FL) {
123        inode->i_flags |= S_NOATIME;
124    }
125    else {
126        inode->i_flags &= ~S_NOATIME;
127    }
128    return;
129}
130
131/* NOTE: symname is ignored unless the inode is a sym link */
132int copy_attributes_to_inode(
133    struct inode *inode,
134    PVFS_sys_attr *attrs,
135    char *symname)
136{
137    int ret = -1;
138    int perm_mode = 0;
139    pvfs2_inode_t *pvfs2_inode = NULL;
140    loff_t inode_size = 0, rounded_up_size = 0;
141
142    if (inode && attrs)
143    {
144        pvfs2_inode = PVFS2_I(inode);
145
146        /*
147          arbitrarily set the inode block size; FIXME: we need to
148          resolve the difference between the reported inode blocksize
149          and the PAGE_CACHE_SIZE, since our block count will always
150          be wrong.
151
152          For now, we're setting the block count to be the proper
153          number assuming the block size is 512 bytes, and the size is
154          rounded up to the nearest 4K.  This is apparently required
155          to get proper size reports from the 'du' shell utility.
156
157          changing the inode->i_blkbits to something other than
158          PAGE_CACHE_SHIFT breaks mmap/execution as we depend on that.
159        */
160#ifdef HAVE_I_BLKSIZE_IN_STRUCT_INODE
161        inode->i_blksize = pvfs_bufmap_size_query();
162#endif
163        inode->i_blkbits = PAGE_CACHE_SHIFT;
164        gossip_debug(GOSSIP_UTILS_DEBUG, "attrs->mask = %x (objtype = %s)\n",
165                attrs->mask,
166                attrs->objtype == PVFS_TYPE_METAFILE ? "file" :
167                attrs->objtype == PVFS_TYPE_DIRECTORY ? "directory" :
168                attrs->objtype == PVFS_TYPE_SYMLINK ? "symlink" :
169                 "invalid/unknown");
170               
171        if (attrs->objtype == PVFS_TYPE_METAFILE)
172        {
173            pvfs2_set_inode_flags(inode, attrs);
174            if (attrs->mask & PVFS_ATTR_SYS_SIZE)
175            {
176                inode_size = (loff_t)attrs->size;
177                rounded_up_size =
178                    (inode_size + (4096 - (inode_size % 4096)));
179
180                pvfs2_lock_inode(inode);
181#ifdef PVFS2_LINUX_KERNEL_2_4
182#if (PVFS2_LINUX_KERNEL_2_4_MINOR_VER > 21)
183                inode->i_bytes = inode_size;
184#endif
185#else
186                /* this is always ok for 2.6.x */
187                inode->i_bytes = inode_size;
188#endif
189                inode->i_blocks = (unsigned long)(rounded_up_size / 512);
190                pvfs2_unlock_inode(inode);
191
192                /*
193                  NOTE: make sure all the places we're called from have
194                  the inode->i_sem lock.  we're fine in 99% of the cases
195                  since we're mostly called from a lookup.
196                */
197                inode->i_size = inode_size;
198            }
199        }
200        else if ((attrs->objtype == PVFS_TYPE_SYMLINK) &&
201                 (symname != NULL))
202        {
203            inode->i_size = (loff_t)strlen(symname);
204        }
205        else
206        {
207            pvfs2_lock_inode(inode);
208#ifdef PVFS2_LINUX_KERNEL_2_4
209#if (PVFS2_LINUX_KERNEL_2_4_MINOR_VER > 21)
210            inode->i_bytes = PAGE_CACHE_SIZE;
211#endif
212#else
213            /* always ok for 2.6.x */
214            inode->i_bytes = PAGE_CACHE_SIZE;
215#endif
216            inode->i_blocks = (unsigned long)(PAGE_CACHE_SIZE / 512);
217            pvfs2_unlock_inode(inode);
218
219            inode->i_size = PAGE_CACHE_SIZE;
220        }
221
222        inode->i_uid = attrs->owner;
223        inode->i_gid = attrs->group;
224#ifdef PVFS2_LINUX_KERNEL_2_4
225        inode->i_atime = (time_t)attrs->atime;
226        inode->i_mtime = (time_t)attrs->mtime;
227        inode->i_ctime = (time_t)attrs->ctime;
228#else
229        inode->i_atime.tv_sec = (time_t)attrs->atime;
230        inode->i_mtime.tv_sec = (time_t)attrs->mtime;
231        inode->i_ctime.tv_sec = (time_t)attrs->ctime;
232        inode->i_atime.tv_nsec = 0;
233        inode->i_mtime.tv_nsec = 0;
234        inode->i_ctime.tv_nsec = 0;
235#endif
236        if (attrs->perms & PVFS_O_EXECUTE)
237            perm_mode |= S_IXOTH;
238        if (attrs->perms & PVFS_O_WRITE)
239            perm_mode |= S_IWOTH;
240        if (attrs->perms & PVFS_O_READ)
241            perm_mode |= S_IROTH;
242
243        if (attrs->perms & PVFS_G_EXECUTE)
244            perm_mode |= S_IXGRP;
245        if (attrs->perms & PVFS_G_WRITE)
246            perm_mode |= S_IWGRP;
247        if (attrs->perms & PVFS_G_READ)
248            perm_mode |= S_IRGRP;
249
250        if (attrs->perms & PVFS_U_EXECUTE)
251            perm_mode |= S_IXUSR;
252        if (attrs->perms & PVFS_U_WRITE)
253            perm_mode |= S_IWUSR;
254        if (attrs->perms & PVFS_U_READ)
255            perm_mode |= S_IRUSR;
256
257        if (attrs->perms & PVFS_G_SGID)
258            perm_mode |= S_ISGID;
259        /* Should we honor the suid bit of the file? */
260        if (get_suid_flag(inode) == 1 && (attrs->perms & PVFS_U_SUID))
261            perm_mode |= S_ISUID;
262
263        inode->i_mode = perm_mode;
264
265        if (is_root_handle(inode))
266        {
267            /* special case: mark the root inode as sticky */
268            inode->i_mode |= S_ISVTX;
269            gossip_debug(GOSSIP_UTILS_DEBUG, "Marking inode %s as sticky\n",
270                    PVFS_handle_to_str(get_handle_from_ino(inode)));
271        }
272
273        switch (attrs->objtype)
274        {
275            case PVFS_TYPE_METAFILE:
276                inode->i_mode |= S_IFREG;
277                inode->i_op = &pvfs2_file_inode_operations;
278                inode->i_fop = &pvfs2_file_operations;
279                ret = 0;
280                break;
281            case PVFS_TYPE_DIRECTORY:
282                inode->i_mode |= S_IFDIR;
283                inode->i_op = &pvfs2_dir_inode_operations;
284                inode->i_fop = &pvfs2_dir_operations;
285                /* NOTE: we have no good way to keep nlink consistent for
286                 * directories across clients; keep constant at 1.  Why 1?  If
287                 * we go with 2, then find(1) gets confused and won't work
288                 * properly withouth the -noleaf option */
289                inode->i_nlink = 1;
290                ret = 0;
291                break;
292            case PVFS_TYPE_SYMLINK:
293                inode->i_mode |= S_IFLNK;
294                inode->i_op = &pvfs2_symlink_inode_operations;
295                inode->i_fop = NULL;
296
297                /* copy link target to inode private data */
298                if (pvfs2_inode && symname)
299                {
300                    strncpy(pvfs2_inode->link_target, symname, PVFS_NAME_MAX);
301                    gossip_debug(GOSSIP_UTILS_DEBUG, "Copied attr link target %s\n",
302                                pvfs2_inode->link_target);
303                }
304                gossip_debug(GOSSIP_UTILS_DEBUG, "symlink mode %o\n", inode->i_mode);
305                ret = 0;
306                break;
307            default:
308                gossip_err("pvfs2:copy_attributes_to_inode: got invalid "
309                            "attribute type %x\n", attrs->objtype);
310        }
311        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2: copy_attributes_to_inode: setting i_mode to %o, i_size to %lu\n",
312                inode->i_mode, (unsigned long)pvfs2_i_size_read(inode));
313    }
314    return ret;
315}
316
317static inline void convert_attribute_mode_to_pvfs_sys_attr(
318    int mode,
319    PVFS_sys_attr *attrs,
320    int suid)
321{
322    attrs->perms = PVFS_util_translate_mode(mode, suid);
323    attrs->mask |= PVFS_ATTR_SYS_PERM;
324
325    gossip_debug(GOSSIP_UTILS_DEBUG, "mode is %o | translated perms is %o\n", mode,
326                attrs->perms);
327
328    /* NOTE: this function only called during setattr.  Setattr must not mess
329     * with object type */
330}
331
332/*
333  NOTE: in kernel land, we never use the sys_attr->link_target for
334  anything, so don't bother copying it into the sys_attr object here.
335*/
336static inline int copy_attributes_from_inode(
337    struct inode *inode,
338    PVFS_sys_attr *attrs,
339    struct iattr *iattr)
340{
341    umode_t tmp_mode;
342
343    if (!iattr || !inode || !attrs)
344    {
345        gossip_err("NULL iattr (%p), inode (%p), attrs (%p) in copy_attributes_from_inode!\n",
346                iattr, inode, attrs);
347        return -EINVAL;
348    }
349    /*
350      we need to be careful
351      to only copy the attributes out of the iattr object that we
352      know are valid
353    */
354    attrs->mask = 0;
355    if (iattr->ia_valid & ATTR_UID)
356    {
357        attrs->owner = iattr->ia_uid;
358        attrs->mask |= PVFS_ATTR_SYS_UID;
359        gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
360    }
361    if (iattr->ia_valid & ATTR_GID)
362    {
363        attrs->group = iattr->ia_gid;
364        attrs->mask |= PVFS_ATTR_SYS_GID;
365        gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
366    }
367
368    if (iattr->ia_valid & ATTR_ATIME)
369    {
370        attrs->mask |= PVFS_ATTR_SYS_ATIME;
371        if (iattr->ia_valid & ATTR_ATIME_SET)
372        {
373            attrs->atime = pvfs2_convert_time_field((void *)&iattr->ia_atime);
374            attrs->mask |= PVFS_ATTR_SYS_ATIME_SET;
375        }
376    }
377    if (iattr->ia_valid & ATTR_MTIME)
378    {
379        attrs->mask |= PVFS_ATTR_SYS_MTIME;
380        if (iattr->ia_valid & ATTR_MTIME_SET)
381        {
382            attrs->mtime = pvfs2_convert_time_field((void *)&iattr->ia_mtime);
383            attrs->mask |= PVFS_ATTR_SYS_MTIME_SET;
384        }
385    }
386    if (iattr->ia_valid & ATTR_CTIME)
387    {
388        attrs->mask |= PVFS_ATTR_SYS_CTIME;
389    }
390    /* PVFS2 cannot set size with a setattr operation.  Probably not likely
391     * to be requested through the VFS, but just in case, don't worry about
392     * ATTR_SIZE */
393
394    if (iattr->ia_valid & ATTR_MODE)
395    {
396        tmp_mode = iattr->ia_mode;
397        if (tmp_mode & (S_ISVTX))
398        {
399            if (is_root_handle(inode))
400            {
401                /* allow sticky bit to be set on root (since it shows up that
402                 * way by default anyhow), but don't show it to
403                 * the server
404                 */
405                tmp_mode -= S_ISVTX;
406            }
407            else
408            {
409                gossip_debug(GOSSIP_UTILS_DEBUG, "User attempted to set sticky bit"
410                        "on non-root directory; returning EINVAL.\n");
411                return(-EINVAL);
412            }
413        }
414
415        if (tmp_mode & (S_ISUID))
416        {
417            gossip_debug(GOSSIP_UTILS_DEBUG, "Attempting to set setuid bit "
418                    "(not supported); returning EINVAL.\n");
419            return(-EINVAL);
420        }
421
422        convert_attribute_mode_to_pvfs_sys_attr(
423            tmp_mode, attrs, get_suid_flag(inode));
424    }
425
426    return 0;
427}
428
429/*
430  issues a pvfs2 getattr request and fills in the appropriate inode
431  attributes if successful.  returns 0 on success; -errno otherwise
432*/
433int pvfs2_inode_getattr(struct inode *inode, uint32_t getattr_mask)
434{
435    int ret = -EINVAL;
436    pvfs2_kernel_op_t *new_op = NULL;
437    pvfs2_inode_t *pvfs2_inode = NULL;
438
439    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_inode_getattr: called on inode %s\n",
440                PVFS_handle_to_str(get_handle_from_ino(inode)));
441
442    if (inode)
443    {
444        pvfs2_inode = PVFS2_I(inode);
445        if (!pvfs2_inode)
446        {
447            gossip_debug(GOSSIP_UTILS_DEBUG, "%s:%s:%d failed to resolve to pvfs2_inode\n", __FILE__, __func__, __LINE__);
448            return ret;
449        }
450
451        /*
452           in the case of being called from s_op->read_inode, the
453           pvfs2_inode private data hasn't been initialized yet, so we
454           need to use the inode number as the handle and query the
455           superblock for the fs_id.  Further, we assign that private
456           data here.
457
458           that call flow looks like:
459           lookup --> iget --> read_inode --> here
460
461           In the case we are doing an iget4 or an iget5_locked, there
462           is no call made to read_inode so we actually have valid fields
463           in pvfs2_inode->refn
464
465           if the inode were already in the inode cache, it looks like:
466           lookup --> revalidate --> here
467        */
468        if (pvfs2_inode->refn.handle == PVFS_HANDLE_NULL)
469        {
470#if defined(HAVE_IGET4_LOCKED) || defined(HAVE_IGET5_LOCKED)
471            gossip_lerr("Critical error: Invalid handle despite using iget4/iget5\n");
472            return -EINVAL;
473#endif
474            pvfs2_inode->refn.handle = get_handle_from_ino(inode);
475        }
476        if (pvfs2_inode->refn.fs_id == PVFS_FS_ID_NULL)
477        {
478#if defined(HAVE_IGET4_LOCKED) || defined(HAVE_IGET5_LOCKED)
479            gossip_lerr("Critical error: Invalid fsid despite using iget4/iget5\n");
480            return -EINVAL;
481#endif
482            pvfs2_inode->refn.fs_id = PVFS2_SB(inode->i_sb)->fs_id;
483        }
484
485        /*
486           post a getattr request here; make dentry valid if getattr
487           passes
488        */
489        new_op = op_alloc(PVFS2_VFS_OP_GETATTR);
490        if (!new_op)
491        {
492            return -ENOMEM;
493        }
494        new_op->upcall.req.getattr.refn = pvfs2_inode->refn;
495        new_op->upcall.req.getattr.mask = getattr_mask;
496
497        ret = service_operation(
498            new_op, "pvfs2_inode_getattr", 
499            get_interruptible_flag(inode));
500
501        /* check what kind of goodies we got */
502        if (ret == 0)
503        {
504            if (copy_attributes_to_inode
505                (inode, &new_op->downcall.resp.getattr.attributes,
506                 new_op->downcall.resp.getattr.link_target))
507            {
508                gossip_err("pvfs2_inode_getattr: failed to copy "
509                            "attributes\n");
510                ret = -ENOENT;
511                goto copy_attr_failure;
512            }
513
514            /* store blksize in pvfs2 specific part of inode structure; we
515             * are only going to use this to report to stat to make sure it
516             * doesn't perturb any inode related code paths
517             */
518            if(new_op->downcall.resp.getattr.attributes.objtype
519                == PVFS_TYPE_METAFILE)
520            {
521                pvfs2_inode->blksize =
522                   new_op->downcall.resp.getattr.attributes.blksize;
523            }
524            else
525            {
526                /* mimic behavior of generic_fillattr() for other types */
527                pvfs2_inode->blksize = (1 << inode->i_blkbits);
528            }
529        }
530
531      copy_attr_failure:
532        gossip_debug(GOSSIP_UTILS_DEBUG, "Getattr on handle %s, fsid %d\n  (inode ct = %d) "
533                    "returned %d\n",
534                    PVFS_handle_to_str(pvfs2_inode->refn.handle), pvfs2_inode->refn.fs_id,
535                    (int)atomic_read(&inode->i_count), ret);
536        /* store error code in the inode so that we can retrieve it later if
537         * needed
538         */
539        if(ret < 0)
540        {
541            pvfs2_inode->error_code = ret;
542        }
543
544        op_release(new_op);
545    }
546    return ret;
547}
548
549/*
550  issues a pvfs2 setattr request to make sure the new attribute values
551  take effect if successful.  returns 0 on success; -errno otherwise
552*/
553int pvfs2_inode_setattr(
554    struct inode *inode,
555    struct iattr *iattr)
556{
557    int ret = -ENOMEM;
558    pvfs2_kernel_op_t *new_op = NULL;
559    pvfs2_inode_t *pvfs2_inode = NULL;
560
561    if (inode)
562    {
563        pvfs2_inode = PVFS2_I(inode);
564
565        new_op = op_alloc(PVFS2_VFS_OP_SETATTR);
566        if (!new_op)
567        {
568            return ret;
569        }
570
571        new_op->upcall.req.setattr.refn = pvfs2_inode->refn;
572        if ((new_op->upcall.req.setattr.refn.handle == PVFS_HANDLE_NULL) &&
573            (new_op->upcall.req.setattr.refn.fs_id == PVFS_FS_ID_NULL))
574        {
575            struct super_block *sb = inode->i_sb;
576            new_op->upcall.req.setattr.refn.handle =
577                PVFS2_SB(sb)->root_handle;
578            new_op->upcall.req.setattr.refn.fs_id =
579                PVFS2_SB(sb)->fs_id;
580        }
581        ret = copy_attributes_from_inode(
582            inode, &new_op->upcall.req.setattr.attributes, iattr);
583        if(ret < 0)
584        {
585            op_release(new_op);
586            return(ret);
587        }
588
589        ret = service_operation(
590            new_op, "pvfs2_inode_setattr",
591            get_interruptible_flag(inode));
592
593        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_inode_setattr: returning %d\n", ret);
594
595        /* when request is serviced properly, free req op struct */
596        op_release(new_op);
597
598        /* successful setattr should clear the atime, mtime and ctime flags */
599        if (ret == 0) {
600            ClearAtimeFlag(pvfs2_inode);
601            ClearMtimeFlag(pvfs2_inode);
602            ClearCtimeFlag(pvfs2_inode);
603            ClearModeFlag(pvfs2_inode);
604        }
605    }
606    return ret;
607}
608
609int pvfs2_flush_inode(struct inode *inode)
610{
611    /*
612     * If it is a dirty inode, this function gets called.
613     * Gather all the information that needs to be setattr'ed
614     * Right now, this will only be used for mode, atime, mtime
615     * and/or ctime.
616     */
617    struct iattr wbattr;
618    int ret;
619    int mtime_flag, ctime_flag, atime_flag, mode_flag;
620    pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
621    memset(&wbattr, 0, sizeof(wbattr));
622
623    /* check inode flags up front, and clear them if they are set.  This
624     * will prevent multiple processes from all trying to flush the same
625     * inode if they call close() simultaneously
626     */
627    mtime_flag = MtimeFlag(pvfs2_inode);
628    ClearMtimeFlag(pvfs2_inode);
629    ctime_flag = CtimeFlag(pvfs2_inode);
630    ClearCtimeFlag(pvfs2_inode);
631    atime_flag = AtimeFlag(pvfs2_inode);
632    ClearAtimeFlag(pvfs2_inode);
633    mode_flag = ModeFlag(pvfs2_inode);
634    ClearModeFlag(pvfs2_inode);
635
636    /*  -- Lazy atime,mtime and ctime update --
637     * Note: all times are dictated by server in the new scheme
638     * and not by the clients
639     *
640     * Also mode updates are being handled now..
641     */
642
643    if (mtime_flag)
644        wbattr.ia_valid |= ATTR_MTIME;
645    if (ctime_flag)
646        wbattr.ia_valid |= ATTR_CTIME;
647    /*
648     * We do not need to honor atime flushes if
649     * a) object has a noatime marker
650     * b) object is a directory and has a nodiratime marker on the fs
651     * c) entire file system is mounted with noatime option
652     */
653
654    if (!((inode->i_flags & S_NOATIME)
655            || (inode->i_sb->s_flags & MS_NOATIME)
656            || ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))) && atime_flag)
657    {
658        wbattr.ia_valid |= ATTR_ATIME;
659    }
660    if (mode_flag)
661    {
662        wbattr.ia_mode = inode->i_mode;
663        wbattr.ia_valid |= ATTR_MODE;
664    }
665
666    gossip_debug(GOSSIP_UTILS_DEBUG, "*********** pvfs2_flush_inode: %s "
667            "(ia_valid %d)\n", PVFS_handle_to_str(get_handle_from_ino(inode)), wbattr.ia_valid);
668    if (wbattr.ia_valid == 0)
669    {
670        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_flush_inode skipping setattr()\n");
671        return 0;
672    }
673       
674    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_flush_inode (%s) writing mode %o\n",
675        PVFS_handle_to_str(get_handle_from_ino(inode)), inode->i_mode);
676
677    ret = pvfs2_inode_setattr(inode, &wbattr);
678    return ret;
679}
680
681/* metafile distribution */
682#define DIST_KEY    "system.pvfs2." METAFILE_DIST_KEYSTR
683/* datafile handles */
684#define DFILE_KEY   "system.pvfs2." DATAFILE_HANDLES_KEYSTR
685/* symlink */
686#define SYMLINK_KEY "system.pvfs2." SYMLINK_TARGET_KEYSTR
687/* root handle */ 
688#define ROOT_KEY    "system.pvfs2." ROOT_HANDLE_KEYSTR
689/* directory entry key */
690#define DIRENT_KEY  "system.pvfs2." DIRECTORY_ENTRY_KEYSTR
691
692/* Extended attributes helper functions */
693static char *xattr_non_zero_terminated[] = {
694    DFILE_KEY,
695    DIST_KEY,
696    ROOT_KEY,
697};
698
699/* Extended attributes helper functions */
700
701/*
702 * this function returns
703 * 0 if the val corresponding to name is known to be not terminated with an explicit \0
704 * 1 if the val corresponding to name is known to be \0 terminated
705 */
706static int xattr_zero_terminated(const char *name)
707{
708    int i;
709    static int xattr_count = sizeof(xattr_non_zero_terminated)/sizeof(char *);
710    for (i = 0;i < xattr_count; i++)
711    {
712        if (strcmp(name, xattr_non_zero_terminated[i]) == 0)
713            return 0;
714    }
715    return 1;
716}
717
718static char *xattr_resvd_keys[] = {
719    DFILE_KEY,
720    DIST_KEY,
721    DIRENT_KEY,
722    SYMLINK_KEY,
723    ROOT_KEY,
724};
725/*
726 * this function returns
727 * 0 if the key corresponding to name is not meant to be printed as part of a listxattr
728 * 1 if the key corresponding to name is meant to be returned as part of a listxattr.
729 * Currently xattr_resvd_keys[] is the array that holds the reserved entries
730 */
731static int is_reserved_key(const char *key, size_t size)
732{
733    int i;
734    static int resv_count = sizeof(xattr_resvd_keys)/sizeof(char *);
735    for (i = 0; i < resv_count; i++)
736    {
737        if (strncmp(key, xattr_resvd_keys[i], size) == 0)
738            return 1;
739    }
740    return 0;
741}
742
743/*
744 * Tries to get a specified key's attributes of a given
745 * file into a user-specified buffer. Note that the getxattr
746 * interface allows for the users to probe the size of an
747 * extended attribute by passing in a value of 0 to size.
748 * Thus our return value is always the size of the attribute
749 * unless the key does not exist for the file and/or if
750 * there were errors in fetching the attribute value.
751 */
752ssize_t pvfs2_inode_getxattr(struct inode *inode, const char* prefix,
753    const char *name, void *buffer, size_t size)
754{
755    ssize_t ret = -ENOMEM;
756    pvfs2_kernel_op_t *new_op = NULL;
757    pvfs2_inode_t *pvfs2_inode = NULL;
758    ssize_t length = 0;
759    int fsuid, fsgid;
760
761    if (name == NULL || (size > 0 && buffer == NULL))
762    {
763        gossip_err("pvfs2_inode_getxattr: bogus NULL pointers\n");
764        return -EINVAL;
765    }
766    if (size < 0 || (strlen(name)+strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN)
767    {
768        gossip_err("Invalid size (%d) or key length (%d)\n",
769                (int) size, (int)(strlen(name)+strlen(prefix)));
770        return -EINVAL;
771    }
772    if (inode)
773    {
774#ifdef HAVE_CURRENT_FSUID
775        fsuid = current_fsuid();
776        fsgid = current_fsgid();
777#else
778        fsuid = current->fsuid;
779        fsgid = current->fsgid;
780#endif
781
782        gossip_debug(GOSSIP_XATTR_DEBUG, "getxattr on inode %s, name %s (uid %o, gid %o)\n",
783                PVFS_handle_to_str(get_handle_from_ino(inode)), name, fsuid, fsgid);
784        pvfs2_inode = PVFS2_I(inode);
785        /* obtain the xattr semaphore */
786        down_read(&pvfs2_inode->xattr_sem);
787
788        new_op = op_alloc(PVFS2_VFS_OP_GETXATTR);
789        if (!new_op)
790        {
791            up_read(&pvfs2_inode->xattr_sem);
792            return ret;
793        }
794
795        new_op->upcall.req.getxattr.refn = pvfs2_inode->refn;
796        ret = snprintf((char*)new_op->upcall.req.getxattr.key,
797            PVFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name);
798        /*
799         * NOTE: Although keys are meant to be NULL terminated textual strings,
800         * I am going to explicitly pass the length just in case we change this
801         * later on...
802         */
803        new_op->upcall.req.getxattr.key_sz = ret + 1;
804
805        ret = service_operation(
806            new_op, "pvfs2_inode_getxattr", 
807            get_interruptible_flag(inode));
808
809        /* Upon success, we need to get the value length
810         * from downcall and return that.
811         * and also copy the value out to the requester
812         */
813        if (ret == 0)
814        {
815            ssize_t new_length;
816            length = new_op->downcall.resp.getxattr.val_sz;
817            /*
818             * if the xattr corresponding to name was not terminated with a \0
819             * then we return the entire response length
820             */
821            if (xattr_zero_terminated(name) == 0)
822            {
823                new_length = length;
824            }
825            /*
826             * if it was terminated by a \0 then we return 1 less for the getfattr
827             * programs to play nicely with displaying it
828             */
829            else {
830                new_length = length - 1;
831            }
832            /* Just return the length of the queried attribute after
833             * subtracting the \0 thingie */
834            if (size == 0)
835            {
836                ret = new_length;
837            }
838            else
839            {
840                /* check to see if key length is > provided buffer size */
841                if (new_length > size)
842                {
843                    ret = -ERANGE;
844                }
845                else
846                {
847                    /* No size problems */
848                    memset(buffer, 0, size);
849                    memcpy(buffer, new_op->downcall.resp.getxattr.val,
850                            new_length);
851                    ret = new_length;
852                    gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_inode_getxattr: inode %s key %s "
853                            " key_sz %d, val_length %d\n",
854                        PVFS_handle_to_str(get_handle_from_ino(inode)),
855                        (char*)new_op->upcall.req.getxattr.key,
856                        (int) new_op->upcall.req.getxattr.key_sz, (int) ret);
857                }
858            }
859        }
860        else if (ret == -ENOENT)
861        {
862            ret = -ENODATA; /* if no such keys exists we set this to be errno */
863            gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_inode_getxattr: inode %s key %s does not exist!\n",
864                    PVFS_handle_to_str(get_handle_from_ino(inode)), (char *) new_op->upcall.req.getxattr.key);
865        }
866
867        /* when request is serviced properly, free req op struct */
868        op_release(new_op);
869        up_read(&pvfs2_inode->xattr_sem);
870    }
871    return ret;
872}
873
874/*
875 * tries to set an attribute for a given key on a file.
876 * Returns a -ve number on error and 0 on success.
877 * Key is text, but value can be binary!
878 */
879int pvfs2_inode_setxattr(struct inode *inode, const char* prefix,
880    const char *name, const void *value, size_t size, int flags)
881{
882    int ret = -ENOMEM;
883    pvfs2_kernel_op_t *new_op = NULL;
884    pvfs2_inode_t *pvfs2_inode = NULL;
885
886    if (size < 0 || size >= PVFS_MAX_XATTR_VALUELEN || flags < 0)
887    {
888        gossip_err("pvfs2_inode_setxattr: bogus values of size(%d), flags(%d)\n",
889                (int) size, flags);
890        return -EINVAL;
891    }
892    if (name == NULL || (size > 0 && value == NULL))
893    {
894        gossip_err("pvfs2_inode_setxattr: bogus NULL pointers!\n");
895        return -EINVAL;
896    }
897
898    if (prefix)
899    {
900        if(strlen(name)+strlen(prefix) >= PVFS_MAX_XATTR_NAMELEN)
901        {
902                gossip_err("pvfs2_inode_setxattr: bogus key size (%d)\n",
903                                (int)(strlen(name)+strlen(prefix)));
904                return -EINVAL;
905        }
906    }
907    else
908    {
909        if(strlen(name) >= PVFS_MAX_XATTR_NAMELEN)
910        {
911                gossip_err("pvfs2_inode_setxattr: bogus key size (%d)\n",
912                           (int)(strlen(name)));
913                return -EINVAL;
914        }
915    }
916
917    /* This is equivalent to a removexattr */
918    if (size == 0 && value == NULL)
919    {
920        gossip_debug(GOSSIP_XATTR_DEBUG, "removing xattr (%s%s)\n", prefix, name);
921        return pvfs2_inode_removexattr(inode, prefix, name, flags);
922    }
923    if (inode)
924    {
925        gossip_debug(GOSSIP_XATTR_DEBUG, "setxattr on inode %s, name %s\n",
926                PVFS_handle_to_str(get_handle_from_ino(inode)), name);
927        if (IS_RDONLY(inode))
928        {
929            gossip_err("pvfs2_inode_setxattr: Read-only file system\n");
930            return -EROFS;
931        }
932        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
933        {
934            gossip_err("pvfs2_inode_setxattr: Immutable inode or append-only "
935                    "inode; operation not permitted\n");
936            return -EPERM;
937        }
938        pvfs2_inode = PVFS2_I(inode);
939
940        down_write(&pvfs2_inode->xattr_sem);
941        new_op = op_alloc(PVFS2_VFS_OP_SETXATTR);
942        if (!new_op)
943        {
944            up_write(&pvfs2_inode->xattr_sem);
945            return ret;
946        }
947
948        new_op->upcall.req.setxattr.refn = pvfs2_inode->refn;
949        new_op->upcall.req.setxattr.flags = flags;
950        /*
951         * NOTE: Although keys are meant to be NULL terminated textual strings,
952         * I am going to explicitly pass the length just in case we change this
953         * later on...
954         */
955        ret = snprintf((char*)new_op->upcall.req.setxattr.keyval.key,
956            PVFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name);
957        new_op->upcall.req.setxattr.keyval.key_sz =
958            ret + 1;
959        memcpy(new_op->upcall.req.setxattr.keyval.val, value, size);
960        new_op->upcall.req.setxattr.keyval.val[size] = '\0';
961        /* For some reason, val_sz should include the \0 at the end as well */
962        new_op->upcall.req.setxattr.keyval.val_sz = size + 1;
963
964        gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_inode_setxattr: key %s, key_sz %d "
965                " value size %zd\n",
966                 (char*)new_op->upcall.req.setxattr.keyval.key,
967                 (int) new_op->upcall.req.setxattr.keyval.key_sz,
968                 size + 1);
969
970        ret = service_operation(
971            new_op, "pvfs2_inode_setxattr",
972            get_interruptible_flag(inode));
973
974        gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_inode_setxattr: returning %d\n", ret);
975
976        /* when request is serviced properly, free req op struct */
977        op_release(new_op);
978        up_write(&pvfs2_inode->xattr_sem);
979    }
980    return ret;
981}
982
983int pvfs2_inode_removexattr(struct inode *inode, const char* prefix,
984    const char *name, int flags)
985{
986    int ret = -ENOMEM;
987    pvfs2_kernel_op_t *new_op = NULL;
988    pvfs2_inode_t *pvfs2_inode = NULL;
989
990    if(!name)
991    {
992        gossip_err("pvfs2_inode_removexattr: xattr key is NULL\n");
993        return -EINVAL;
994    }
995
996    if (prefix)
997    {
998        if((strlen(name)+strlen(prefix)) >= PVFS_MAX_XATTR_NAMELEN)
999        {
1000                gossip_err("pvfs2_inode_removexattr: Invalid key length(%d)\n",
1001                                (int)(strlen(name)+strlen(prefix)));
1002                return -EINVAL;
1003        }
1004    }
1005    else
1006    {
1007        if(strlen(name) >= PVFS_MAX_XATTR_NAMELEN)
1008        {
1009                gossip_err("pvfs2_inode_removexattr: Invalid key length(%d)\n",
1010                           (int)(strlen(name)));
1011                return -EINVAL;
1012        }
1013    }
1014
1015    if (inode)
1016    {
1017        pvfs2_inode = PVFS2_I(inode);
1018
1019        down_write(&pvfs2_inode->xattr_sem);
1020        new_op = op_alloc(PVFS2_VFS_OP_REMOVEXATTR);
1021        if (!new_op)
1022        {
1023            up_write(&pvfs2_inode->xattr_sem);
1024            return ret;
1025        }
1026
1027        new_op->upcall.req.removexattr.refn = pvfs2_inode->refn;
1028        /*
1029         * NOTE: Although keys are meant to be NULL terminated textual strings,
1030         * I am going to explicitly pass the length just in case we change this
1031         * later on...
1032         */
1033        ret = snprintf((char*)new_op->upcall.req.removexattr.key,
1034            PVFS_MAX_XATTR_NAMELEN, "%s%s",
1035            (prefix ? prefix : ""), name);
1036        new_op->upcall.req.removexattr.key_sz = ret + 1;
1037
1038        gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_inode_removexattr: key %s, key_sz %d\n",
1039                (char*)new_op->upcall.req.removexattr.key,
1040                (int) new_op->upcall.req.removexattr.key_sz);
1041
1042        ret = service_operation(
1043            new_op, "pvfs2_inode_removexattr",
1044            get_interruptible_flag(inode));
1045
1046        if (ret == -ENOENT)
1047        {
1048            /* Request to replace a non-existent attribute is an error */
1049            if (flags & XATTR_REPLACE)
1050                ret = -ENODATA;
1051            else
1052                ret = 0;
1053        }
1054        gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_inode_removexattr: returning %d\n",
1055                ret);
1056
1057        /* when request is serviced properly, free req op struct */
1058        op_release(new_op);
1059        up_write(&pvfs2_inode->xattr_sem);
1060    }
1061    return ret;
1062}
1063
1064/*
1065 * Tries to get a specified object's keys into a user-specified
1066 * buffer of a given size.
1067 * Note that like the previous instances of xattr routines,
1068 * this also allows you to pass in a NULL pointer and 0 size
1069 * to probe the size for subsequent memory allocations.
1070 * Thus our return value is always the size of all the keys
1071 * unless there were errors in fetching the keys!
1072 */
1073int pvfs2_inode_listxattr(struct inode *inode, char *buffer, size_t size)
1074{
1075    ssize_t ret = -ENOMEM, total = 0;
1076    int i = 0, count_keys = 0;
1077    pvfs2_kernel_op_t *new_op = NULL;
1078    pvfs2_inode_t *pvfs2_inode = NULL;
1079    ssize_t length = 0;
1080
1081    if (size > 0 && buffer == NULL)
1082    {
1083        gossip_err("pvfs2_inode_listxattr: bogus NULL pointers\n");
1084        return -EINVAL;
1085    }
1086    if (size < 0)
1087    {
1088        gossip_err("Invalid size (%d)\n", (int) size);
1089        return -EINVAL;
1090    }
1091    if (inode)
1092    {
1093        /* FIX: position stuff changed to use small int and flag */
1094        PVFS_ds_position token = PVFS_ITERATE_START;
1095
1096        pvfs2_inode = PVFS2_I(inode);
1097        /* obtain the xattr semaphore */
1098        down_read(&pvfs2_inode->xattr_sem);
1099
1100        new_op = op_alloc(PVFS2_VFS_OP_LISTXATTR);
1101        if (!new_op)
1102        {
1103            up_read(&pvfs2_inode->xattr_sem);
1104            return ret;
1105        }
1106        if (buffer && size > 0)
1107        {
1108            memset(buffer, 0, size);
1109        }
1110    try_again:
1111        new_op->upcall.req.listxattr.refn = pvfs2_inode->refn;
1112        new_op->upcall.req.listxattr.token = token;
1113        new_op->upcall.req.listxattr.requested_count = (size == 0) ? 0 : PVFS_MAX_XATTR_LISTLEN;
1114        ret = service_operation(
1115                new_op, "pvfs2_inode_listxattr",
1116                get_interruptible_flag(inode));
1117        if (ret == 0)
1118        {
1119            if (size == 0)
1120            {
1121                /*
1122                 * This is a bit of a big upper limit, but I did not want to spend too
1123                 * much time getting this correct, since users end up allocating memory
1124                 * rather than us...
1125                 */
1126                total = new_op->downcall.resp.listxattr.returned_count * PVFS_MAX_XATTR_NAMELEN;
1127                goto done;
1128            }
1129            length = new_op->downcall.resp.listxattr.keylen;
1130            if (length == 0)
1131            {
1132                goto done;
1133            }
1134            else
1135            {
1136                int key_size = 0;
1137                /* check to see how much can be fit in the buffer. fit only whole keys */
1138                for (i = 0; i < new_op->downcall.resp.listxattr.returned_count; i++)
1139                {
1140                    if (total + new_op->downcall.resp.listxattr.lengths[i] <= size)
1141                    {
1142                        /* Since many dumb programs try to setxattr() on our reserved xattrs
1143                         * this is a feeble attempt at defeating those by not listing them
1144                         * in the output of listxattr.. sigh
1145                         */
1146
1147                        if (is_reserved_key(new_op->downcall.resp.listxattr.key + key_size,
1148                                            new_op->downcall.resp.listxattr.lengths[i]) == 0)
1149                        {
1150                            gossip_debug(GOSSIP_XATTR_DEBUG, "Copying key %d -> %s\n",
1151                                    i, new_op->downcall.resp.listxattr.key + key_size);
1152                            memcpy(buffer + total, new_op->downcall.resp.listxattr.key + key_size,
1153                                    new_op->downcall.resp.listxattr.lengths[i]);
1154                            total += new_op->downcall.resp.listxattr.lengths[i];
1155                            count_keys++;
1156                        }
1157                        else {
1158                            gossip_debug(GOSSIP_XATTR_DEBUG, "[RESERVED] key %d -> %s\n",
1159                                    i, new_op->downcall.resp.listxattr.key + key_size);
1160                        }
1161                        key_size += new_op->downcall.resp.listxattr.lengths[i];
1162                    }
1163                    else {
1164                        goto done;
1165                    }
1166                }
1167                /* Since the buffer was large enough, we might have to continue fetching more keys! */
1168                token = new_op->downcall.resp.listxattr.token;
1169                /* FIX: position stuff changed to use small int and flag */
1170                if (token != PVFS_ITERATE_END)
1171                    goto try_again;
1172            }
1173        }
1174    done:
1175        gossip_debug(GOSSIP_XATTR_DEBUG, "pvfs2_inode_listxattr: returning %d [size of buffer %ld] "
1176                "(filled in %d keys)\n",
1177                ret ? (int) ret : (int) total, (long) size, count_keys);
1178        /* when request is serviced properly, free req op struct */
1179        op_release(new_op);
1180        up_read(&pvfs2_inode->xattr_sem);
1181        if (ret == 0)
1182            ret = total;
1183    }
1184    return ret;
1185}
1186
1187static inline struct inode *pvfs2_create_file(
1188    struct inode *dir,
1189    struct dentry *dentry,
1190    int mode,
1191    int *error_code)
1192{
1193    int ret = -1;
1194    pvfs2_kernel_op_t *new_op = NULL;
1195    pvfs2_inode_t *parent = PVFS2_I(dir);
1196    struct inode *inode = NULL;
1197
1198    new_op = op_alloc(PVFS2_VFS_OP_CREATE);
1199    if (!new_op)
1200    {
1201        *error_code = -ENOMEM;
1202        return NULL;
1203    }
1204
1205    if (parent && parent->refn.handle != PVFS_HANDLE_NULL && parent->refn.fs_id != PVFS_FS_ID_NULL)
1206    {
1207        new_op->upcall.req.create.parent_refn = parent->refn;
1208    }
1209    else
1210    {
1211#if defined(HAVE_IGET5_LOCKED) || defined(HAVE_IGET4_LOCKED)
1212        gossip_lerr("Critical error: i_ino cannot be relied on when using iget4/5\n");
1213        *error_code = -EINVAL;
1214        op_release(new_op);
1215        return NULL;
1216#endif
1217        new_op->upcall.req.create.parent_refn.handle =
1218            get_handle_from_ino(dir);
1219        new_op->upcall.req.create.parent_refn.fs_id =
1220            PVFS2_SB(dir->i_sb)->fs_id;
1221    }
1222
1223    /* macro defined in pvfs2-kernel.h */
1224    fill_default_sys_attrs(new_op->upcall.req.create.attributes,
1225                           PVFS_TYPE_METAFILE, mode);
1226
1227    strncpy(new_op->upcall.req.create.d_name,
1228            dentry->d_name.name, PVFS2_NAME_LEN);
1229
1230    ret = service_operation(
1231        new_op, "pvfs2_create_file",
1232        get_interruptible_flag(dir));
1233
1234    gossip_debug(GOSSIP_UTILS_DEBUG, "Create Got PVFS2 handle %s on fsid %d (ret=%d)\n",
1235                PVFS_handle_to_str(new_op->downcall.resp.create.refn.handle),
1236                new_op->downcall.resp.create.refn.fs_id, ret);
1237
1238    if (ret > -1)
1239    {
1240        inode = pvfs2_get_custom_inode(
1241            dir->i_sb, dir, (S_IFREG | mode), 0, new_op->downcall.resp.create.refn);
1242        if (!inode)
1243        {
1244            gossip_err("*** Failed to allocate pvfs2 file inode\n");
1245            op_release(new_op);
1246            *error_code = -ENOMEM;
1247            return NULL;
1248        }
1249
1250        gossip_debug(GOSSIP_UTILS_DEBUG, "Assigned file inode new number of %s\n",
1251                    PVFS_handle_to_str(get_handle_from_ino(inode)));
1252        /* finally, add dentry with this new inode to the dcache */
1253        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_create_file: Instantiating\n *negative* "
1254                    "dentry %p for %s\n", dentry,
1255                    dentry->d_name.name);
1256
1257        dentry->d_op = &pvfs2_dentry_operations;
1258        d_instantiate(dentry, inode);
1259        gossip_debug(GOSSIP_UTILS_DEBUG, "Inode (Regular File) %s -> %s\n",
1260                PVFS_handle_to_str(get_handle_from_ino(inode)), dentry->d_name.name);
1261    }
1262    else
1263    {
1264        *error_code = ret;
1265
1266        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_create_file: failed with error code %d\n",
1267                    *error_code);
1268    }
1269
1270    op_release(new_op);
1271    return inode;
1272}
1273
1274static inline struct inode *pvfs2_create_dir(
1275    struct inode *dir,
1276    struct dentry *dentry,
1277    int mode,
1278    int *error_code)
1279{
1280    int ret = -1;
1281    pvfs2_kernel_op_t *new_op = NULL;
1282    pvfs2_inode_t *parent = PVFS2_I(dir);
1283    struct inode *inode = NULL;
1284
1285    new_op = op_alloc(PVFS2_VFS_OP_MKDIR);
1286    if (!new_op)
1287    {
1288        *error_code = -ENOMEM;
1289        return NULL;
1290    }
1291
1292    if (parent && parent->refn.handle != PVFS_HANDLE_NULL && parent->refn.fs_id != PVFS_FS_ID_NULL)
1293    {
1294        new_op->upcall.req.mkdir.parent_refn = parent->refn;
1295    }
1296    else
1297    {
1298#if defined(HAVE_IGET5_LOCKED) || defined(HAVE_IGET4_LOCKED)
1299        gossip_lerr("Critical error: i_ino cannot be relied on when using iget4/5\n");
1300        *error_code = -EINVAL;
1301        op_release(new_op);
1302        return NULL;
1303#endif
1304        new_op->upcall.req.mkdir.parent_refn.handle =
1305            get_handle_from_ino(dir);
1306        new_op->upcall.req.mkdir.parent_refn.fs_id =
1307            PVFS2_SB(dir->i_sb)->fs_id;
1308    }
1309
1310    /* macro defined in pvfs2-kernel.h */
1311    fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
1312                           PVFS_TYPE_DIRECTORY, mode);
1313
1314    strncpy(new_op->upcall.req.mkdir.d_name,
1315            dentry->d_name.name, PVFS2_NAME_LEN);
1316
1317    ret = service_operation(
1318        new_op, "pvfs2_create_dir",
1319        get_interruptible_flag(dir));
1320
1321    gossip_debug(GOSSIP_UTILS_DEBUG, "Mkdir Got PVFS2 handle %s on fsid %d\n",
1322                PVFS_handle_to_str(new_op->downcall.resp.mkdir.refn.handle),
1323                new_op->downcall.resp.mkdir.refn.fs_id);
1324
1325    if (ret > -1)
1326    {
1327        inode = pvfs2_get_custom_inode(
1328            dir->i_sb, dir, (S_IFDIR | mode), 0, new_op->downcall.resp.mkdir.refn);
1329        if (!inode)
1330        {
1331            gossip_err("*** Failed to allocate pvfs2 dir inode\n");
1332            op_release(new_op);
1333            *error_code = -ENOMEM;
1334            return NULL;
1335        }
1336
1337        gossip_debug(GOSSIP_UTILS_DEBUG, "Assigned dir inode new number of %s\n",
1338                    PVFS_handle_to_str(get_handle_from_ino(inode)));
1339        /* finally, add dentry with this new inode to the dcache */
1340        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_create_dir: Instantiating\n  *negative* "
1341                    "dentry %p for %s\n", dentry,
1342                    dentry->d_name.name);
1343
1344        dentry->d_op = &pvfs2_dentry_operations;
1345        d_instantiate(dentry, inode);
1346        gossip_debug(GOSSIP_UTILS_DEBUG, "Inode (Directory) %s -> %s\n",
1347                PVFS_handle_to_str(get_handle_from_ino(inode)), dentry->d_name.name);
1348    }
1349    else
1350    {
1351        *error_code = ret;
1352
1353        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_create_dir: failed with error code %d\n",
1354                    *error_code);
1355    }
1356
1357    op_release(new_op);
1358    return inode;
1359}
1360
1361static inline struct inode *pvfs2_create_symlink(
1362    struct inode *dir,
1363    struct dentry *dentry,
1364    const char *symname,
1365    int mode,
1366    int *error_code)
1367{
1368    int ret = -1;
1369    pvfs2_kernel_op_t *new_op = NULL;
1370    pvfs2_inode_t *parent = PVFS2_I(dir);
1371    struct inode *inode = NULL;
1372
1373    if(!symname)
1374    {
1375        *error_code = -EINVAL;
1376        return NULL;
1377    }
1378
1379    new_op = op_alloc(PVFS2_VFS_OP_SYMLINK);
1380    if (!new_op)
1381    {
1382        *error_code = -ENOMEM;
1383        return NULL;
1384    }
1385
1386    if (parent && parent->refn.handle != PVFS_HANDLE_NULL && parent->refn.fs_id != PVFS_FS_ID_NULL)
1387    {
1388        new_op->upcall.req.sym.parent_refn = parent->refn;
1389    }
1390    else
1391    {
1392#if defined(HAVE_IGET5_LOCKED) || defined(HAVE_IGET4_LOCKED)
1393        gossip_lerr("Critical error: i_ino cannot be relied on when using iget4/5\n");
1394        *error_code = -EINVAL;
1395        op_release(new_op);
1396        return NULL;
1397#endif
1398        new_op->upcall.req.sym.parent_refn.handle =
1399            get_handle_from_ino(dir);
1400        new_op->upcall.req.sym.parent_refn.fs_id =
1401            PVFS2_SB(dir->i_sb)->fs_id;
1402    }
1403
1404    /* macro defined in pvfs2-kernel.h */
1405    fill_default_sys_attrs(new_op->upcall.req.sym.attributes,
1406                           PVFS_TYPE_SYMLINK, mode);
1407
1408    strncpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name,
1409            PVFS2_NAME_LEN);
1410    strncpy(new_op->upcall.req.sym.target, symname, PVFS2_NAME_LEN);
1411
1412    ret = service_operation(
1413        new_op, "pvfs2_symlink_file",
1414        get_interruptible_flag(dir));
1415
1416    gossip_debug(GOSSIP_UTILS_DEBUG, "Symlink Got PVFS2 handle %s on fsid %d (ret=%d)\n",
1417                PVFS_handle_to_str(new_op->downcall.resp.sym.refn.handle),
1418                new_op->downcall.resp.sym.refn.fs_id, ret);
1419
1420    if (ret > -1)
1421    {
1422        inode = pvfs2_get_custom_inode(
1423            dir->i_sb, dir, (S_IFLNK | mode), 0, new_op->downcall.resp.sym.refn);
1424        if (!inode)
1425        {
1426            gossip_err("*** Failed to allocate pvfs2 symlink inode\n");
1427            op_release(new_op);
1428            *error_code = -ENOMEM;
1429            return NULL;
1430        }
1431
1432        gossip_debug(GOSSIP_UTILS_DEBUG, "Assigned symlink inode new number of %s\n",
1433                    PVFS_handle_to_str(get_handle_from_ino(inode)));
1434
1435        /* finally, add dentry with this new inode to the dcache */
1436        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_create_symlink: Instantiating\n  "
1437                    "*negative* dentry %p for %s\n", dentry,
1438                    dentry->d_name.name);
1439
1440        dentry->d_op = &pvfs2_dentry_operations;
1441        d_instantiate(dentry, inode);
1442        gossip_debug(GOSSIP_UTILS_DEBUG, "Inode (Symlink) %s -> %s\n",
1443                PVFS_handle_to_str(get_handle_from_ino(inode)), dentry->d_name.name);
1444    }
1445    else
1446    {
1447        *error_code = ret;
1448
1449        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_create_symlink: failed with error code %d\n",
1450                    *error_code);
1451    }
1452
1453    op_release(new_op);
1454    return inode;
1455}
1456
1457/*
1458  create a pvfs2 entry; returns a properly populated inode
1459  pointer on success; NULL on failure.
1460
1461  the required error_code value will contain an error code ONLY if an
1462  error occurs (i.e. NULL is returned) and is set to 0 otherwise.
1463
1464  if op_type is PVFS_VFS_OP_CREATE, a file is created
1465  if op_type is PVFS_VFS_OP_MKDIR, a directory is created
1466  if op_type is PVFS_VFS_OP_SYMLINK, a symlink is created
1467
1468  symname should be null unless mode is PVFS_VFS_OP_SYMLINK
1469*/
1470struct inode *pvfs2_create_entry(
1471    struct inode *dir,
1472    struct dentry *dentry,
1473    const char *symname,
1474    int mode,
1475    int op_type,
1476    int *error_code)
1477{
1478    if (dir && dentry && error_code)
1479    {
1480        if(strlen(dentry->d_name.name) > (PVFS2_NAME_LEN - 1))
1481        {
1482            *error_code = -ENAMETOOLONG;
1483            return(NULL);
1484        }
1485
1486        switch (op_type)
1487        {
1488            case PVFS2_VFS_OP_CREATE:
1489                return pvfs2_create_file(
1490                    dir, dentry, mode, error_code);
1491            case PVFS2_VFS_OP_MKDIR:
1492                return pvfs2_create_dir(
1493                    dir, dentry, mode, error_code);
1494            case PVFS2_VFS_OP_SYMLINK:
1495                return pvfs2_create_symlink(
1496                    dir, dentry, symname, mode, error_code);
1497        }
1498    }
1499
1500    if (error_code)
1501    {
1502        gossip_err("pvfs2_create_entry: invalid op_type %d\n", op_type);
1503        *error_code = -EINVAL;
1504    }
1505    return NULL;
1506}
1507
1508int pvfs2_remove_entry(
1509    struct inode *dir,
1510    struct dentry *dentry)
1511{
1512    int ret = -EINVAL;
1513    pvfs2_kernel_op_t *new_op = NULL;
1514    pvfs2_inode_t *parent = PVFS2_I(dir);
1515    struct inode *inode = dentry->d_inode;
1516
1517    if (inode && parent && dentry)
1518    {
1519        gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_remove_entry: called on %s\n  (inode %s): "
1520                    "Parent is %s | fs_id %d\n", dentry->d_name.name,
1521                    PVFS_handle_to_str(get_handle_from_ino(inode)), PVFS_handle_to_str(parent->refn.handle),
1522                    parent->refn.fs_id);
1523
1524        new_op = op_alloc(PVFS2_VFS_OP_REMOVE);
1525        if (!new_op)
1526        {
1527            return -ENOMEM;
1528        }
1529
1530        if (parent && parent->refn.handle != PVFS_HANDLE_NULL && parent->refn.fs_id != PVFS_FS_ID_NULL)
1531        {
1532            new_op->upcall.req.remove.parent_refn = parent->refn;
1533        }
1534        else
1535        {
1536#if defined(HAVE_IGET5_LOCKED) || defined(HAVE_IGET4_LOCKED)
1537            gossip_lerr("Critical error: i_ino cannot be relied on when using iget4/5\n");
1538            op_release(new_op);
1539            return -ENOMEM;
1540#endif
1541            new_op->upcall.req.remove.parent_refn.handle =
1542                get_handle_from_ino(dir);
1543            new_op->upcall.req.remove.parent_refn.fs_id =
1544                PVFS2_SB(dir->i_sb)->fs_id;
1545        }
1546        strncpy(new_op->upcall.req.remove.d_name,
1547                dentry->d_name.name, PVFS2_NAME_LEN);
1548
1549        ret = service_operation(
1550            new_op, "pvfs2_remove_entry",
1551            get_interruptible_flag(inode));
1552
1553        /* when request is serviced properly, free req op struct */
1554        op_release(new_op);
1555    }
1556    return ret;
1557}
1558
1559int pvfs2_truncate_inode(
1560    struct inode *inode,
1561    loff_t size)
1562{
1563    int ret = -EINVAL;
1564    pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
1565    pvfs2_kernel_op_t *new_op = NULL;
1566
1567    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2: pvfs2_truncate_inode %s: "
1568                "Handle is %s | fs_id %d | size is %lu\n",
1569                PVFS_handle_to_str(get_handle_from_ino(inode)), PVFS_handle_to_str(pvfs2_inode->refn.handle),
1570                pvfs2_inode->refn.fs_id, (unsigned long)size);
1571
1572    new_op = op_alloc(PVFS2_VFS_OP_TRUNCATE);
1573    if (!new_op)
1574    {
1575        return -ENOMEM;
1576    }
1577    new_op->upcall.req.truncate.refn = pvfs2_inode->refn;
1578    new_op->upcall.req.truncate.size = (PVFS_size)size;
1579
1580    ret = service_operation(
1581        new_op, "pvfs2_truncate_inode", 
1582        get_interruptible_flag(inode));
1583
1584    /*
1585      the truncate has no downcall members to retrieve, but
1586      the status value tells us if it went through ok or not
1587    */
1588    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2: pvfs2_truncate got return value of %d\n",ret);
1589
1590    op_release(new_op);
1591
1592    return ret;
1593}
1594
1595#ifdef HAVE_FIND_INODE_HANDLE_SUPER_OPERATIONS
1596
1597typedef enum {
1598    HANDLE_CHECK_LENGTH = 1,
1599    HANDLE_CHECK_MAGIC  = 2,
1600    HANDLE_CHECK_FSID   = 4,
1601} handle_check_t;
1602
1603/* Perform simple sanity checks on the obtained handle */
1604static inline int perform_handle_checks(const struct file_handle *fhandle,
1605        handle_check_t check, void *p)
1606{
1607    if (!fhandle)
1608    {
1609        return -EINVAL;
1610    }
1611    /* okay good. now check if magic_nr matches */
1612    if (check & HANDLE_CHECK_LENGTH)
1613    {
1614        /* Make sure that handle length matches our opaque handle structure */
1615        if (fhandle->fh_private_length != sizeof(pvfs2_opaque_handle_t))
1616        {
1617            gossip_err("perform_handle_checks: length mismatch (%ld) "
1618                    " instead of (%ld)\n", (unsigned long) fhandle->fh_private_length,
1619                    (unsigned long) sizeof(pvfs2_opaque_handle_t));
1620            return 0;
1621        }
1622    }
1623    if (check & HANDLE_CHECK_MAGIC)
1624    {
1625        u32 magic;
1626
1627        get_fh_field(&fhandle->fh_generic, magic, magic);
1628
1629        if (magic != PVFS2_SUPER_MAGIC)
1630        {
1631            gossip_err("perform_handle_checks: mismatched magic number "
1632                    " (%x) instead of (%x)\n",
1633                    magic, PVFS2_SUPER_MAGIC);
1634            return 0;
1635        }
1636    }
1637    if (check & HANDLE_CHECK_FSID)
1638    {
1639        pvfs2_sb_info_t *pvfs2_sbp = NULL;
1640        struct super_block *sb = (struct super_block *) p;
1641        u32 fsid;
1642
1643        if (!sb)
1644            return 0;
1645        pvfs2_sbp = PVFS2_SB(sb);
1646
1647        get_fh_field(&fhandle->fh_generic, fsid, fsid);
1648
1649        if (fsid != pvfs2_sbp->fs_id)
1650        {
1651            gossip_err("perform_handle_checks: FSID did not match "
1652                    " (%d) instead of (%d)\n",
1653                    fsid, pvfs2_sbp->fs_id);
1654            return 0;
1655        }
1656        gossip_debug(GOSSIP_UTILS_DEBUG, "perform_handle_checks : fsid = %d\n", fsid);
1657    }
1658    return 1;
1659}
1660
1661/*
1662 * convert an opaque handle to a PVFS_sys_attr structure so that we could
1663 * call copy_attributes_to_inode() to initialize the VFS inode structure.
1664 */
1665static void convert_opaque_handle_to_sys_attr(
1666        PVFS_sys_attr *dst, pvfs2_opaque_handle_t *src)
1667{
1668    dst->owner = src->owner;
1669    dst->group = src->group;
1670    dst->perms = src->perms;
1671    dst->atime = src->atime;
1672    dst->mtime = src->mtime;
1673    dst->ctime = src->ctime;
1674    dst->size  = src->size;
1675    dst->link_target = NULL;
1676    dst->dfile_count = 0;
1677    dst->dirent_count = 0;
1678    dst->objtype = src->objtype;
1679    dst->mask = src->mask;
1680    return;
1681}
1682
1683static inline void do_decode_opaque_handle(pvfs2_opaque_handle_t *h, char *src)
1684{
1685    char *ptr = src;
1686    char **pptr = &ptr;
1687
1688    memset(h, 0, sizeof(pvfs2_opaque_handle_t));
1689    /* Deserialize the buffer */
1690    decode_pvfs2_opaque_handle_t(pptr, h);
1691    return;
1692}
1693
1694static int get_opaque_handle(struct super_block *sb,
1695        const struct file_handle *fhandle,
1696        pvfs2_opaque_handle_t *opaque_handle)
1697{
1698    /* Make sure that we actually get a valid handle */
1699    if (perform_handle_checks(fhandle,
1700            HANDLE_CHECK_LENGTH | HANDLE_CHECK_MAGIC
1701            | HANDLE_CHECK_FSID, sb) == 0)
1702    {
1703        gossip_err("get_handle: got invalid handle buffer!? "
1704                "Impossible happened\n");
1705        return -EINVAL;
1706    }
1707
1708    do_decode_opaque_handle(opaque_handle, (char *) fhandle->fh_private);
1709    /* make sure that fsid in private buffer also matches */
1710    if (opaque_handle->fsid != PVFS2_SB(sb)->fs_id) {
1711        gossip_err("get_handle: invalid fsid in private buffer "
1712                " (%d) instead of (%d)\n",
1713                opaque_handle->fsid, PVFS2_SB(sb)->fs_id);
1714        return -EINVAL;
1715    }
1716    gossip_debug(GOSSIP_UTILS_DEBUG, "get_handle: decoded fsid %d handle %lu\n",
1717            opaque_handle->fsid, (unsigned long) opaque_handle->handle);
1718    return 0;
1719}
1720
1721/*
1722 * called by openfh() system call.
1723 * Given a handle that ostensibly belongs to this PVFS2 superblock,
1724 * we either find an inode
1725 * in the icache already matching the given handle or we allocate
1726 * and place a new struct inode in the icache and fill it up based on
1727 * the buffer that we obtained from user. Presumably enough checks
1728 * at the upper-level (VFS) has been done to make sure that this is
1729 * indeed a buffer filled upon a successful openg().
1730 * Returns ERR_PTR(-errno) in case of error
1731 *         valid pointer to struct inode in case it was a success
1732 */
1733struct inode *pvfs2_sb_find_inode_handle(struct super_block *sb,
1734        const struct file_handle *fhandle)
1735{
1736    struct inode *inode = NULL;
1737    int err = 0;
1738    pvfs2_opaque_handle_t opaque_handle;
1739    PVFS_sys_attr attrs;
1740    PVFS_object_ref ref;
1741
1742    /* Decode the buffer */
1743    err = get_opaque_handle(sb, fhandle, &opaque_handle);
1744    if (err)
1745        return ERR_PTR(err);
1746
1747    /* and convert the opaque handle structure to the PVFS_sys_attr structure */
1748    convert_opaque_handle_to_sys_attr(&attrs, &opaque_handle);
1749
1750    ref.handle = opaque_handle.handle;
1751    ref.fs_id  = opaque_handle.fsid;
1752    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_sb_find_inode_handle: obtained inode number %s\n",
1753            PVFS_handle_to_str(opaque_handle.handle));
1754    /*
1755     * NOTE: Locate the inode number in the icache if possible.
1756     * If not allocate a new inode that is returned locked and
1757     * hashed. Since, we don't issue a getattr/read_inode() callback
1758     * the pvfs2 specific inode is almost guaranteed to be
1759     * uninitialized or invalid. Therefore, we need to
1760     * fill it up based on the information in opaque_handle!
1761     * Consequently, this approach should scale well since openfh()
1762     * does not require any network messages.
1763     */
1764    inode = pvfs2_iget_locked(sb, &ref);
1765
1766    if (!inode) {
1767        gossip_err("Could not allocate inode\n");
1768        return ERR_PTR(-ENOMEM);
1769    }
1770    else {
1771        if (is_bad_inode(inode)) {
1772            iput(inode);
1773            gossip_err("bad inode obtained from iget_locked\n");
1774            return ERR_PTR(-EINVAL);
1775        }
1776        /* Initialize and/or verify struct inode as well as pvfs2_inode */
1777        if ((err = copy_attributes_to_inode(inode, &attrs, NULL)) < 0) {
1778            gossip_err("copy_attributes_to_inode failed with err %d\n", err);
1779            iput(inode);
1780            return ERR_PTR(err);
1781        }
1782
1783        /* this inode was allocated afresh */
1784        if (inode->i_state & I_NEW) {
1785            pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
1786
1787            pvfs2_inode_initialize(pvfs2_inode);
1788            pvfs2_inode->refn.handle = opaque_handle.handle;
1789            pvfs2_inode->refn.fs_id  = opaque_handle.fsid;
1790            inode->i_mapping->host   = inode;
1791            inode->i_rdev            = 0;
1792            inode->i_bdev            = NULL;
1793            inode->i_cdev            = NULL;
1794            inode->i_mapping->a_ops  = &pvfs2_address_operations;
1795            inode->i_mapping->backing_dev_info = &pvfs2_backing_dev_info;
1796            /* Make sure that we unlock the inode */
1797            unlock_new_inode(inode);
1798        }
1799        return inode;
1800    }
1801}
1802
1803#endif
1804
1805#ifdef HAVE_FILL_HANDLE_INODE_OPERATIONS
1806
1807/*
1808 * dst would be encoded
1809 */
1810static int do_encode_opaque_handle(char *dst, struct inode *inode)
1811{
1812    char *ptr = dst;
1813    char **pptr = &ptr;
1814    pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
1815    pvfs2_opaque_handle_t h;
1816
1817    /* only metafile allowed */
1818    if (!S_ISREG(inode->i_mode))
1819        return -EINVAL;
1820    memset(&h, 0, sizeof(h));
1821    h.handle = pvfs2_inode->refn.handle;
1822    h.fsid   = pvfs2_inode->refn.fs_id;
1823    h.owner  = inode->i_uid;
1824    h.group  = inode->i_gid;
1825    h.perms  = PVFS_util_translate_mode(inode->i_mode, 0);
1826    h.mask   |= PVFS_ATTR_SYS_PERM;
1827    h.atime  = pvfs2_convert_time_field(&inode->i_atime);
1828    h.mtime  = pvfs2_convert_time_field(&inode->i_mtime);
1829    h.ctime  = pvfs2_convert_time_field(&inode->i_ctime);
1830    h.size   = pvfs2_i_size_read(inode);
1831    h.mask   |= PVFS_ATTR_SYS_SIZE;
1832    h.objtype = PVFS_TYPE_METAFILE;
1833    /* Serialize into the buffer */
1834    gossip_debug(GOSSIP_UTILS_DEBUG, "encoded fsid %d handle %lu\n",
1835            h.fsid, (unsigned long) h.handle);
1836    encode_pvfs2_opaque_handle_t(pptr, &h);
1837    return 0;
1838}
1839
1840static void *pvfs2_fh_ctor(void)
1841{
1842    void *buf;
1843
1844    buf = kmalloc(sizeof(pvfs2_opaque_handle_t),
1845                  PVFS2_BUFMAP_GFP_FLAGS);
1846    return buf;
1847}
1848
1849static void pvfs2_fh_dtor(void *buf)
1850{
1851    if (buf)
1852        kfree(buf);
1853    return;
1854}
1855
1856/*
1857 * This routine is called by openg() system call.
1858 * Given an inode (which has been looked up previously),
1859 * we fill in the attributes of the inode in an opaque buffer
1860 * and hand it back to user.
1861 * Note: We need to make it a fixed
1862 * endian ordering so that it would work on all homogenous platforms.
1863 * Hence the need to encode the handle buffer.
1864 */
1865int pvfs2_fill_handle(struct inode *inode, struct file_handle *fhandle)
1866{
1867    size_t pvfs2_opaque_handle_size = sizeof(pvfs2_opaque_handle_t);
1868
1869    if (!inode || !fhandle)
1870    {
1871        return -EINVAL;
1872    }
1873    /* querying the size of PVFS2 specific opaque handle buffer */
1874    if (fhandle->fh_private_length == 0)
1875    {
1876        fhandle->fh_private_length = pvfs2_opaque_handle_size;
1877        return 0;
1878    }
1879    else if (fhandle->fh_private_length < pvfs2_opaque_handle_size)
1880    {
1881        return -ERANGE; /* too small a buffer length */
1882    }
1883    else
1884    {
1885        fhandle->fh_private = pvfs2_fh_ctor();
1886        if (fhandle->fh_private == NULL)
1887        {
1888            return -ENOMEM;
1889        }
1890        /* encode the opaque handle information */
1891        if (do_encode_opaque_handle((char *) fhandle->fh_private, inode) < 0)
1892        {
1893            pvfs2_fh_dtor(fhandle->fh_private);
1894            fhandle->fh_private = NULL;
1895            return -EINVAL;
1896        }
1897        /* Set a destructor function for the fh_private */
1898        fhandle->fh_private_dtor = pvfs2_fh_dtor;
1899        /* and the length */
1900        fhandle->fh_private_length = pvfs2_opaque_handle_size;
1901        gossip_debug(GOSSIP_UTILS_DEBUG, "Returning handle length %ld\n",
1902                (unsigned long) pvfs2_opaque_handle_size);
1903        return 0;
1904    }
1905}
1906
1907#endif /* HAVE_FILL_HANDLE_INODE_OPERATIONS */
1908
1909#ifdef USE_MMAP_RA_CACHE
1910int pvfs2_flush_mmap_racache(struct inode *inode)
1911{
1912    int ret = -EINVAL;
1913    pvfs2_inode_t *pvfs2_inode = PVFS2_I(inode);
1914    pvfs2_kernel_op_t *new_op = NULL;
1915
1916    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_flush_mmap_racache %s: Handle is %s "
1917                "| fs_id %d\n", PVFS_handle_to_str(get_handle_from_ino(inode)),
1918                PVFS_handle_to_str(pvfs2_inode->refn.handle), pvfs2_inode->refn.fs_id);
1919
1920    new_op = op_alloc(PVFS2_VFS_OP_MMAP_RA_FLUSH);
1921    if (!new_op)
1922    {
1923        return -ENOMEM;
1924    }
1925    new_op->upcall.req.ra_cache_flush.refn = pvfs2_inode->refn;
1926
1927    ret = service_operation(new_op, "pvfs2_flush_mmap_racache",
1928                      get_interruptible_flag(inode));
1929
1930    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_flush_mmap_racache got return "
1931                "value of %d\n",ret);
1932
1933    op_release(new_op);
1934    return ret;
1935}
1936#endif
1937
1938int pvfs2_unmount_sb(struct super_block *sb)
1939{
1940    int ret = -EINVAL;
1941    pvfs2_kernel_op_t *new_op = NULL;
1942
1943    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_unmount_sb called on sb %p\n", sb);
1944
1945    new_op = op_alloc(PVFS2_VFS_OP_FS_UMOUNT);
1946    if (!new_op)
1947    {
1948        return -ENOMEM;
1949    }
1950    new_op->upcall.req.fs_umount.id = PVFS2_SB(sb)->id;
1951    new_op->upcall.req.fs_umount.fs_id = PVFS2_SB(sb)->fs_id;
1952    strncpy(new_op->upcall.req.fs_umount.pvfs2_config_server,
1953            PVFS2_SB(sb)->devname, PVFS_MAX_SERVER_ADDR_LEN);
1954
1955    gossip_debug(GOSSIP_UTILS_DEBUG, "Attempting PVFS2 Unmount via host %s\n",
1956                new_op->upcall.req.fs_umount.pvfs2_config_server);
1957
1958    ret = service_operation(new_op, "pvfs2_fs_umount", 0);
1959
1960    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_unmount: got return value of %d\n", ret);
1961    if (ret)
1962    {
1963        sb = ERR_PTR(ret);
1964    }
1965    else {
1966        PVFS2_SB(sb)->mount_pending = 1;
1967    }
1968
1969    op_release(new_op);
1970    return ret;
1971}
1972
1973/*
1974  NOTE: on successful cancellation, be sure to return -EINTR, as
1975  that's the return value the caller expects
1976*/
1977int pvfs2_cancel_op_in_progress(unsigned long tag)
1978{
1979    int ret = -EINVAL;
1980    pvfs2_kernel_op_t *new_op = NULL;
1981
1982    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_cancel_op_in_progress called on tag %lu\n", tag);
1983
1984    new_op = op_alloc(PVFS2_VFS_OP_CANCEL);
1985    if (!new_op)
1986    {
1987        return -ENOMEM;
1988    }
1989    new_op->upcall.req.cancel.op_tag = tag;
1990
1991    gossip_debug(GOSSIP_UTILS_DEBUG, "Attempting PVFS2 operation cancellation of tag %llu\n",
1992                llu(new_op->upcall.req.cancel.op_tag));
1993
1994    ret = service_operation(new_op, "pvfs2_cancel", PVFS2_OP_CANCELLATION);
1995
1996    gossip_debug(GOSSIP_UTILS_DEBUG, "pvfs2_cancel_op_in_progress: got return "
1997                "value of %d\n", ret);
1998
1999    op_release(new_op);
2000    return (ret);
2001}
2002
2003/*
2004  We want to clear everything except for rw_semaphore and the vfs_inode
2005*/
2006void pvfs2_inode_initialize(pvfs2_inode_t *pvfs2_inode)
2007{
2008    if (!InitFlag(pvfs2_inode))
2009    {
2010        pvfs2_inode->refn.handle = PVFS_HANDLE_NULL;
2011        pvfs2_inode->refn.fs_id = PVFS_FS_ID_NULL;
2012        pvfs2_inode->last_failed_block_index_read = 0;
2013        memset(pvfs2_inode->link_target, 0, sizeof(pvfs2_inode->link_target));
2014        pvfs2_inode->error_code = 0;
2015        pvfs2_inode->revalidate_failed = 0;
2016        pvfs2_inode->pinode_flags = 0;
2017        SetInitFlag(pvfs2_inode);
2018    }
2019}
2020
2021/*
2022  this is called from super:pvfs2_destroy_inode.
2023*/
2024void pvfs2_inode_finalize(pvfs2_inode_t *pvfs2_inode)
2025{
2026    pvfs2_inode->refn.handle = PVFS_HANDLE_NULL;
2027    pvfs2_inode->refn.fs_id = PVFS_FS_ID_NULL;
2028    pvfs2_inode->last_failed_block_index_read = 0;
2029    pvfs2_inode->error_code = 0;
2030}
2031
2032void pvfs2_op_initialize(pvfs2_kernel_op_t *op)
2033{
2034    op->io_completed = 0;
2035
2036    op->upcall.type = PVFS2_VFS_OP_INVALID;
2037    op->downcall.type = PVFS2_VFS_OP_INVALID;
2038    op->downcall.status = -1;
2039
2040    op->op_state = OP_VFS_STATE_UNKNOWN;
2041    op->tag = 0;
2042}
2043
2044void pvfs2_make_bad_inode(struct inode *inode)
2045{
2046    if (is_root_handle(inode))
2047    {
2048        /*
2049          if this occurs, the pvfs2-client-core was killed but we
2050          can't afford to lose the inode operations and such
2051          associated with the root handle in any case
2052        */
2053        gossip_debug(GOSSIP_UTILS_DEBUG, "*** NOT making bad root inode %s\n", PVFS_handle_to_str(get_handle_from_ino(inode)));
2054    }
2055    else
2056    {
2057        gossip_debug(GOSSIP_UTILS_DEBUG, "*** making bad inode %s\n", PVFS_handle_to_str(get_handle_from_ino(inode)));
2058        make_bad_inode(inode);
2059    }
2060}
2061
2062/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigmask */
2063void mask_blocked_signals(sigset_t *orig_sigset)
2064{
2065    unsigned long sigallow = sigmask(SIGKILL);
2066    unsigned long irqflags = 0;
2067    struct k_sigaction *action = pvfs2_current_sigaction;
2068
2069    sigallow |= ((action[SIGINT-1].sa.sa_handler == SIG_DFL) ?
2070                 sigmask(SIGINT) : 0);
2071    sigallow |= ((action[SIGQUIT-1].sa.sa_handler == SIG_DFL) ?
2072                 sigmask(SIGQUIT) : 0);
2073
2074    spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags);
2075    *orig_sigset = current->blocked;
2076    siginitsetinv(&current->blocked, sigallow & ~orig_sigset->sig[0]);
2077    pvfs2_recalc_sigpending();
2078    spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags);
2079}
2080
2081/* this code is based on linux/net/sunrpc/clnt.c:rpc_clnt_sigunmask */
2082void unmask_blocked_signals(sigset_t *orig_sigset)
2083{
2084    unsigned long irqflags = 0;
2085
2086    spin_lock_irqsave(&pvfs2_current_signal_lock, irqflags);
2087    current->blocked = *orig_sigset;
2088    pvfs2_recalc_sigpending();
2089    spin_unlock_irqrestore(&pvfs2_current_signal_lock, irqflags);
2090}
2091
2092PVFS_time pvfs2_convert_time_field(void *time_ptr)
2093{
2094    PVFS_time pvfs2_time;
2095#ifdef PVFS2_LINUX_KERNEL_2_4
2096    pvfs2_time = (PVFS_time)(*(time_t *)time_ptr);
2097#else
2098    struct timespec *tspec = (struct timespec *)time_ptr;
2099    pvfs2_time = (PVFS_time)((time_t)tspec->tv_sec);
2100#endif
2101    return pvfs2_time;
2102}
2103
2104/* macro defined in include/pvfs2-types.h */
2105DECLARE_ERRNO_MAPPING_AND_FN();
2106
2107int pvfs2_normalize_to_errno(PVFS_error error_code)
2108{
2109    if(error_code > 0)
2110    {
2111        gossip_err("pvfs2: error status receieved.\n");
2112        gossip_err("pvfs2: assuming error code is inverted.\n");
2113        error_code = -error_code;
2114    }
2115
2116    /* convert any error codes that are in pvfs2 format */
2117    if(IS_PVFS_NON_ERRNO_ERROR(-error_code))
2118    {
2119        if(PVFS_NON_ERRNO_ERROR_CODE(-error_code) == PVFS_ECANCEL)
2120        {
2121            /* cancellation error codes generally correspond to a timeout
2122             * from the client's perspective
2123             */
2124            error_code = -ETIMEDOUT;
2125        }
2126        else
2127        {
2128            /* assume a default error code */
2129            gossip_err("pvfs2: warning: "
2130                "got error code without errno equivalent: %d.\n", error_code);
2131            error_code = -EINVAL;
2132        }
2133    }
2134    else if(IS_PVFS_ERROR(-error_code))
2135    {
2136        error_code = -PVFS_ERROR_TO_ERRNO(-error_code);
2137    }
2138    return(error_code);
2139}
2140
2141int32_t PVFS_util_translate_mode(int mode, int suid)
2142{
2143    int ret = 0, i = 0;
2144#define NUM_MODES 11
2145    static int modes[NUM_MODES] =
2146    {
2147        S_IXOTH, S_IWOTH, S_IROTH,
2148        S_IXGRP, S_IWGRP, S_IRGRP,
2149        S_IXUSR, S_IWUSR, S_IRUSR,
2150        S_ISGID, S_ISUID
2151    };
2152    static int pvfs2_modes[NUM_MODES] =
2153    {
2154        PVFS_O_EXECUTE, PVFS_O_WRITE, PVFS_O_READ,
2155        PVFS_G_EXECUTE, PVFS_G_WRITE, PVFS_G_READ,
2156        PVFS_U_EXECUTE, PVFS_U_WRITE, PVFS_U_READ,
2157        PVFS_G_SGID,    PVFS_U_SUID
2158    };
2159
2160    for(i = 0; i < NUM_MODES; i++)
2161    {
2162        if (mode & modes[i])
2163        {
2164            ret |= pvfs2_modes[i];
2165        }
2166    }
2167    if (suid == 0 && (ret & PVFS_U_SUID))
2168    {
2169         ret &= ~PVFS_U_SUID;
2170    }
2171    return ret;
2172#undef NUM_MODES
2173}
2174
2175
2176static char * pvfs2_strtok(char *s, const char *toks)
2177{
2178   static char *in_string_p;         /* original string */
2179   char *this_string_p;              /* starting value of in_string_p */
2180                                     /* during this iteration         */
2181   uint32_t toks_len = strlen(toks); /* # of tokens */
2182   uint32_t i;                       /* index */
2183   
2184   if (s)
2185   {
2186      /* when s has a value, we are using a new input string */
2187      in_string_p=s;
2188   }
2189
2190   /* set new starting position */
2191   this_string_p = in_string_p;
2192
2193   /* loop through the string until a token or end-of-string(null)
2194    * is found.
2195   */
2196   for (;*in_string_p;in_string_p++)
2197   {
2198      /* Is character a token? */
2199      for (i=0; i<toks_len; i++)
2200      {
2201         if (*in_string_p == toks[i])
2202         {
2203            /*token found => end-of-word*/
2204            *in_string_p = 0;
2205             in_string_p++;
2206             return(this_string_p);
2207         }
2208      }/*end looping of tokens*/
2209   }/*end looping of the string*/
2210
2211  if (*this_string_p==0)
2212     return(NULL);
2213
2214  return (this_string_p);
2215}/*end function pvfs2_strtok*/
2216
2217/*convert 64-bit debug mask into a readable string of keywords*/
2218static int proc_mask_to_debug(__keyword_mask_t *mask_map
2219                             ,int num_mask_map
2220                             ,uint64_t mask
2221                             ,char *debug_string)
2222{
2223   unsigned int index = 0;
2224   unsigned int i;
2225
2226   memset(debug_string,0,PVFS2_MAX_DEBUG_STRING_LEN);
2227
2228   for (i=0; i<num_mask_map; i++)
2229   {
2230      if ( (index + strlen(mask_map[i].keyword)) >= PVFS2_MAX_DEBUG_STRING_LEN )
2231      {
2232         return(0);
2233      }
2234     
2235      switch( mask_map[i].mask_val )
2236      {
2237          case GOSSIP_NO_DEBUG  :
2238          {
2239               if ( mask == GOSSIP_NO_DEBUG )
2240               {
2241                  /* "none" */
2242                  strcpy(debug_string,mask_map[i].keyword);
2243                  return(0);
2244               }
2245               break;
2246          }
2247          case GOSSIP_MAX_DEBUG :
2248          {
2249              if ( mask == GOSSIP_MAX_DEBUG )
2250              {
2251                 /* "all" */
2252                 strcpy(debug_string,mask_map[i].keyword);
2253                 return(0);
2254              }
2255              break;
2256          }
2257          default :
2258          {
2259              if ((mask & mask_map[i].mask_val) != mask_map[i].mask_val)
2260              {   /*mask does NOT contain the mask value*/
2261                  break;
2262              }
2263              if (index != 0)
2264              {   /*add comma for second and subsequent mask keywords*/
2265                  (debug_string[index]) = ',';
2266                  index++;
2267              }
2268
2269              /*add keyword and slide index*/
2270              memcpy(&debug_string[index], mask_map[i].keyword
2271                    ,strlen(mask_map[i].keyword));
2272              index += strlen(mask_map[i].keyword);
2273          }
2274      }/*end switch*/
2275   }/*end for*/
2276
2277   return(0);
2278}/*end function proc_mask_to_debug*/
2279
2280
2281static uint64_t proc_debug_to_mask(__keyword_mask_t *mask_map,
2282        int num_mask_map, const char *event_logging)
2283{
2284    uint64_t mask = 0;
2285    char *s = NULL, *t = NULL;
2286    const char *toks = ", ";
2287    int i = 0, negate = 0, slen = 0;
2288
2289    if (event_logging)
2290    {
2291        /* s = strdup(event_logging); */
2292        slen=strlen(event_logging);
2293        s = kmalloc(slen+1,GFP_KERNEL);
2294        if (!s)
2295        {
2296           return (-ENOMEM);
2297        }
2298        memset(s,0,slen+1);
2299        memcpy(s,event_logging,slen);
2300
2301        /* t = strtok(s, toks); */
2302        t = pvfs2_strtok(s, toks);
2303
2304        while(t)
2305        {
2306            if (*t == '-')
2307            {
2308                negate = 1;
2309                ++t;
2310            }
2311
2312            for(i = 0; i < num_mask_map; i++)
2313            {
2314                if (!strcmp(t, mask_map[i].keyword))
2315                {
2316                    if (negate)
2317                    {
2318                        mask &= ~mask_map[i].mask_val;
2319                    }
2320                    else
2321                    {
2322                        mask |= mask_map[i].mask_val;
2323                    }
2324                    break;
2325                }
2326            }
2327            /* t = strtok(NULL, toks); */
2328            t = pvfs2_strtok(NULL, toks);
2329        }
2330        kfree(s);
2331    }
2332    return mask;
2333}
2334
2335/*
2336 * Based on human readable keywords, translate them into
2337 * a mask value appropriate for the debugging level desired.
2338 * The 'computed' mask is returned; 0 if no keywords are
2339 * present or recognized.  Unrecognized keywords are ignored when
2340 * mixed with recognized keywords.
2341 *
2342 * Prefix a keyword with "-" to turn it off.  All keywords
2343 * processed in specified order.
2344 */
2345uint64_t PVFS_proc_debug_eventlog_to_mask(const char *event_logging)
2346{
2347    return proc_debug_to_mask(s_keyword_mask_map,
2348            num_keyword_mask_map, event_logging);
2349}
2350
2351uint64_t PVFS_proc_kmod_eventlog_to_mask(const char *event_logging)
2352{
2353    return proc_debug_to_mask(s_kmod_keyword_mask_map,
2354            num_kmod_keyword_mask_map, event_logging);
2355}
2356
2357int PVFS_proc_kmod_mask_to_eventlog(uint64_t mask, char *debug_string)
2358{
2359    return( proc_mask_to_debug(s_kmod_keyword_mask_map
2360                              , num_kmod_keyword_mask_map
2361                              ,mask
2362                              ,debug_string) );
2363}/*end function PVFS_proc_kmod_mask_to_eventlog*/
2364
2365int PVFS_proc_mask_to_eventlog(uint64_t mask, char *debug_string)
2366{
2367   
2368    return( proc_mask_to_debug(s_keyword_mask_map
2369                              ,num_keyword_mask_map
2370                              ,mask
2371                              ,debug_string) );
2372}
2373
2374
2375/*
2376 * Local variables:
2377 *  c-indent-level: 4
2378 *  c-basic-offset: 4
2379 * End:
2380 *
2381 * vim: ts=8 sts=4 sw=4 expandtab
2382 */
Note: See TracBrowser for help on using the browser.