Changeset 9336

Show
Ignore:
Timestamp:
06/15/12 12:02:56 (11 months ago)
Author:
ligon
Message:

New Feature: users can now specify at configure time how they want the kernel module to handle the file pointer when an error occurs. The default behavior is to leave the file pointer alone, i.e., the file pointer will always represent the position of the last byte written, even though the user's entire request is not totally satisfied. In this case, a the actual number of bytes written is returned to the user. The new feature will change this default behavior such that the file pointer is repositioned to the byte just prior to the user's write request, a -1 is returned to the user's call, and errno is appropriately set. To enable this feature, add --enable-reset-file-pos on the configure command line when your OrangeFS system is configured.

Removed compiler warnings.

M Makefile.in
M configure.in
M ChangeLog?
M src/kernel/linux-2.6/Makefile.in
M src/kernel/linux-2.6/file.c
M src/kernel/linux-2.6/pvfs2-kernel.h
M src/io/trove/trove-dbpf/dbpf-bstream-direct.c
M src/common/misc/realpath.c
M src/client/usrint/posix.c
M src/client/usrint/stdio.c
M src/client/sysint/client-state-machine.c

Location:
branches/stable
Files:
11 modified

Legend:

Unmodified
Added
Removed
  • branches/stable/ChangeLog

    r9309 r9336  
    66 
    77Stable 
     8 
     9* New Feature: users can now specify at configure time how they want the 
     10* kernel module to handle the file pointer when an error occurs.  The 
     11* default behavior is to leave the file pointer alone, i.e., the file 
     12* pointer will always represent the position of the last byte written, 
     13* even though the user's entire request is not totally satisfied. In  
     14* this case, a the actual number of bytes written is returned to the user. 
     15* The new feature will change this default behavior such that the file pointer 
     16* is repositioned to the byte just prior to the user's write request, a 
     17* -1 is returned to the user's call, and errno is appropriately set.  To 
     18* enable this feature, add --enable-reset-file-pos on the configure 
     19* command line when your OrangeFS system is configured. 
    820 
    921* Bug fix:  modified pvfs2_readdir, which is executed by the kernel module 
  • branches/stable/Makefile.in

    r9323 r9336  
    204204ULIBDEPLIBS := -lpvfs2 
    205205MMAP_RA_CACHE = @MMAP_RA_CACHE@ 
     206RESET_FILE_POS = @RESET_FILE_POS@ 
    206207TRUSTED_CONNECTIONS = @TRUSTED_CONNECTIONS@ 
    207208REDHAT_RELEASE = @REDHAT_RELEASE@ 
     
    433434ifdef MMAP_RA_CACHE 
    434435CFLAGS += @MMAP_RA_CACHE@ 
     436endif 
     437 
     438# reset the file position pointer when a write call encounters errors (kernel only) 
     439# by default, this feature is disabled.  Default behavior is to increment the file 
     440# position pointer as bytes are written. 
     441ifdef RESET_FILE_POS 
     442CFLAGS += @RESET_FILE_POS@ 
    435443endif 
    436444 
  • branches/stable/configure.in

    r9249 r9336  
    298298,) 
    299299AC_SUBST(MMAP_RA_CACHE) 
     300 
     301dnl a mechanism that resets the file position pointer when an error occurs whether 
     302dnl or not any bytes were written (kernel interface only). 
     303RESET_FILE_POS="" 
     304AC_ARG_ENABLE(reset-file-pos, 
     305[  --enable-reset-file-pos Resets file position pointer in kernel interface upon error], 
     306[if test "x$enableval" = "xyes" ; then 
     307RESET_FILE_POS="-DRESET_FILE_POS" 
     308fi] 
     309,) 
     310AC_SUBST(RESET_FILE_POS) 
     311 
    300312 
    301313dnl See if the --enable-trusted-connections  option was given to configure 
     
    15031515fi 
    15041516 
     1517if test "x$RESET_FILE_POS" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then 
     1518   AC_MSG_RESULT([PVFS2 configured for resetting file position      :  no]) 
     1519else 
     1520   AC_MSG_RESULT([PVFS2 configured for resetting file position      : yes]) 
     1521fi 
     1522 
    15051523if test "x$REDHAT_RELEASE" = "x" -a "x$BUILD_BMI_ONLY" != "x1"; then 
    15061524   AC_MSG_RESULT([PVFS2 will use workaround for redhat 2.4 kernels  :  no]) 
  • branches/stable/src/client/sysint/client-state-machine.c

    r9020 r9336  
    531531 
    532532    gossip_debug(GOSSIP_CLIENT_DEBUG, 
     533            "PINT_client_io_cancel id %lld\n",lld(id)); 
     534    gossip_debug(GOSSIP_CANCEL_DEBUG, 
    533535            "PINT_client_io_cancel id %lld\n",lld(id)); 
    534536 
  • branches/stable/src/client/usrint/posix.c

    r9234 r9336  
    1818#include "posix-pvfs.h" 
    1919#include "openfile-util.h" 
     20 
     21/** 
     22 * function prototypes not defined in libc, though it is a linux 
     23 * system call and we define it in the usr lib 
     24 */ 
     25 
     26int getdents(unsigned int, struct dirent *, unsigned int); 
     27int getdents64(unsigned int, struct dirent64 *, unsigned int); 
     28int flock(int, int); 
     29int fadvise64(int, off64_t, off64_t, int); 
    2030 
    2131/* 
     
    13051315} 
    13061316 
     1317 
    13071318/** 
    13081319 * man page calls last arg count but is ambiguous if it is number 
     
    14071418    return rc; 
    14081419} 
     1420 
    14091421 
    14101422int flock(int fd, int op) 
  • branches/stable/src/client/usrint/stdio.c

    r9234 r9336  
    567567#endif 
    568568            /* write data directly */ 
    569             rc = write(stream->_fileno, ptr + rsz_buf, rsz_extra); 
     569            rc = write(stream->_fileno, (char *)ptr + rsz_buf, rsz_extra); 
    570570            if (rc == -1) 
    571571            { 
     
    577577        else 
    578578        { 
    579             memcpy(stream->_IO_write_ptr, ptr + rsz_buf, rsz_extra); 
     579            memcpy(stream->_IO_write_ptr, (char *)ptr + rsz_buf, rsz_extra); 
    580580            stream->_IO_write_ptr += rsz_extra; 
    581581        } 
     
    717717    } 
    718718 
    719     /* if more bytes requested */ 
     719   /* if more bytes requested */ 
    720720    if (rsz_extra) 
    721721    { 
     
    727727            { 
    728728                /* read directly from file for remainder of request */ 
    729                 bytes_read = read(stream->_fileno, ptr+rsz_buf, rsz_extra); 
     729                bytes_read = read(stream->_fileno, (char *)ptr+rsz_buf, rsz_extra); 
    730730                if (bytes_read == -1) 
    731731                { 
  • branches/stable/src/common/misc/realpath.c

    r9323 r9336  
    4343 
    4444#define MAX_READLINKS 32 
    45  
    46 extern int PVFS_util_resolve_absolute( 
    47     const char* local_path, 
    48     PVFS_fs_id* out_fs_id, 
    49     char* out_fs_path, 
    50     int out_fs_path_max); 
    5145 
    5246#ifdef WIN32 
  • branches/stable/src/io/trove/trove-dbpf/dbpf-bstream-direct.c

    r9247 r9336  
    582582#ifdef HAVE_OPEN_O_DIRECT 
    583583    if(!(fcntl(fd, F_GETFL) & O_DIRECT)) 
    584 #elif HAVE_FNCTL_F_NOCACHE 
     584#elif defined(HAVE_FNCTL_F_NOCACHE) 
    585585    if (!(fcntl(fd, F_GETFL) & F_NOCACHE))  
    586586#else 
  • branches/stable/src/kernel/linux-2.6/Makefile.in

    r7495 r9336  
    7979 
    8080EXTRA_CFLAGS += @MMAP_RA_CACHE@ 
     81EXTRA_CFLAGS += @RESET_FILE_POS@ 
    8182EXTRA_CFLAGS += -DPVFS2_VERSION="\"@PVFS2_VERSION@\"" 
    8283 
  • branches/stable/src/kernel/linux-2.6/file.c

    r9298 r9336  
    5050                            unsigned long xtnr_segs, 
    5151                            size_t total_size); 
     52#ifdef RESET_FILE_POS   
     53static ssize_t do_readv_writev_wrapper( struct rw_options *rw); 
     54#endif 
    5255 
    5356#define wake_up_daemon_for_return(op)             \ 
     
    176179        struct { 
    177180            loff_t        *offset; 
     181            loff_t         offset_before_request; 
    178182        } io; 
    179183        /* Non-contiguous file I/O operations use a vector of offsets */ 
     
    451455    { 
    452456        gossip_lerr("invalid parameters (rw: %p, vec: %p, nr_segs: %lu, " 
    453                 "total_size: %zd)\n", rw, vec, nr_segs, total_size); 
     457                    "total_size: %zd)\n", rw, vec, nr_segs, total_size); 
    454458        ret = -EINVAL; 
    455459        goto out; 
     
    477481        goto out; 
    478482    } 
    479     gossip_debug(GOSSIP_FILE_DEBUG, "GET op %p -> buffer_index %d\n", new_op, buffer_index); 
     483    gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu): GET op %p -> buffer_index %d\n" 
     484                                  , __func__ 
     485                                  ,rw->fnstr 
     486                                  , llu(rw->pvfs2_inode->refn.handle) 
     487                                  , new_op, buffer_index); 
    480488 
    481489    new_op->uses_shared_memory = 1; 
     
    484492    new_op->upcall.req.io.offset = *(rw->off.io.offset); 
    485493 
    486     gossip_debug(GOSSIP_FILE_DEBUG, "%s: copy_to_user %d nr_segs %lu, " 
    487             "offset: %llu total_size: %zd\n", rw->fnstr, rw->copy_to_user_addresses,  
    488             nr_segs, llu(*(rw->off.io.offset)), total_size); 
     494    gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu): copy_to_user %d nr_segs %lu, " 
     495                                    "offset: %llu total_size: %zd\n" 
     496                                   ,__func__ 
     497                                   ,rw->fnstr 
     498                                   ,llu(rw->pvfs2_inode->refn.handle) 
     499                                   ,rw->copy_to_user_addresses 
     500                                   ,nr_segs 
     501                                   ,llu(*(rw->off.io.offset)) 
     502                                   ,total_size); 
     503 
    489504 
    490505    /* Stage 1: copy the buffers into client-core's address space */ 
     506    /* precopy_buffers only pertains to writes.                   */ 
    491507    if ((ret = precopy_buffers(buffer_index, rw, vec, nr_segs, total_size)) < 0)  
    492508    { 
    493509        goto out; 
    494510    } 
     511 
     512    gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): Calling post_io_request with tag(%d)\n" 
     513                                  ,__func__ 
     514                                  ,rw->fnstr 
     515                                  ,llu(rw->pvfs2_inode->refn.handle) 
     516                                  ,(int)new_op->tag); 
    495517 
    496518    /* Stage 2: Service the I/O operation */ 
     
    500522    /* If service_operation() returns -EAGAIN #and# the operation was purged from 
    501523     * pvfs2_request_list or htable_ops_in_progress, then we know that the 
    502      * client was restarted, causing the share memory area to be wiped clean.  To restart an  
    503      * I/O operation in this case, we must re-copy the data from the user's iovec  
    504      * to a NEW shared memory location. 
     524     * client was restarted, causing the shared memory area to be wiped clean.  To restart a  
     525     * write operation in this case, we must re-copy the data from the user's iovec  
     526     * to a NEW shared memory location. To restart a read operation, we must get a new 
     527     * shared memory location. 
    505528    */ 
    506529    if ( ret == -EAGAIN && op_state_purged(new_op) ) 
    507530    { 
    508        gossip_debug(GOSSIP_WAIT_DEBUG,"%s:going to populate_shared_memory.\n",__func__); 
     531       gossip_debug(GOSSIP_WAIT_DEBUG,"%s:going to repopulate_shared_memory.\n",__func__); 
    509532       goto populate_shared_memory; 
    510533    } 
     
    540563          goto out; 
    541564    } 
     565 
    542566    /* Stage 3: Post copy buffers from client-core's address space */ 
     567    /* postcopy_buffers only pertains to reads.                    */ 
    543568    if ((ret = postcopy_buffers(buffer_index, rw, vec, nr_segs,  
    544569                    new_op->downcall.resp.io.amt_complete)) < 0) { 
     
    550575        goto out; 
    551576    } 
     577 
     578    gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): Amount written as returned by the sys-io call:%d\n" 
     579                                  ,__func__ 
     580                                  ,rw->fnstr 
     581                                  ,llu(rw->pvfs2_inode->refn.handle) 
     582                                  ,(int)new_op->downcall.resp.io.amt_complete); 
     583 
    552584    ret = new_op->downcall.resp.io.amt_complete; 
    553     gossip_debug(GOSSIP_FILE_DEBUG, "wait_for_io returning %ld\n", (long) ret); 
     585     
    554586    /* 
    555587      tell the device file owner waiting on I/O that this read has 
     
    564596    { 
    565597        pvfs_bufmap_put(buffer_index); 
    566         gossip_debug(GOSSIP_FILE_DEBUG, "PUT buffer_index %d\n", buffer_index); 
     598        gossip_debug(GOSSIP_FILE_DEBUG, "%s(%llu): PUT buffer_index %d\n" 
     599                                      , rw->fnstr 
     600                                      , llu(rw->pvfs2_inode->refn.handle) 
     601                                      , buffer_index); 
    567602        buffer_index = -1; 
    568603    } 
     
    12881323        goto out; 
    12891324    } 
     1325 
     1326    gossip_debug(GOSSIP_FILE_DEBUG,"%s-BEGIN/%s(%llu): count(%d) after estimate_max_iovecs.\n" 
     1327                                  ,__func__ 
     1328                                  ,rw->fnstr 
     1329                                  ,llu(pvfs2_inode->refn.handle) 
     1330                                  ,(int)count); 
     1331 
    12901332    if (rw->type == IO_WRITEV) 
    12911333    { 
     
    13121354            goto out; 
    13131355        } 
    1314         gossip_debug(GOSSIP_FILE_DEBUG, "%s: proceeding with offset : %llu, " 
    1315                                         "size %zd\n", 
    1316                                         rw->fnstr, llu(*offset), count); 
    1317     } 
     1356 
     1357        gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu): proceeding with offset : %llu, size %d\n" 
     1358                                      ,__func__ 
     1359                                      ,rw->fnstr 
     1360                                      ,llu(pvfs2_inode->refn.handle) 
     1361                                      ,llu(*offset), (int)count); 
     1362    } /*endif IO_WRITEV*/ 
     1363 
    13181364    if (count == 0) 
    13191365    { 
     
    13281374     * such that no iovec description straddles a block size limit 
    13291375     */ 
     1376 
     1377    gossip_debug(GOSSIP_FILE_DEBUG,"%s: pvfs_bufmap_size:%d\n" 
     1378                                  ,rw->fnstr 
     1379                                  ,pvfs_bufmap_size_query()); 
     1380 
    13301381    if (count > pvfs_bufmap_size_query()) 
    13311382    { 
     
    13721423    ptr = iovecptr; 
    13731424 
    1374     gossip_debug(GOSSIP_FILE_DEBUG, "%s %zd@%llu\n",  
    1375             rw->fnstr, count, llu(*offset)); 
    1376     gossip_debug(GOSSIP_FILE_DEBUG, "%s: new_nr_segs: %lu, seg_count: %lu\n",  
    1377             rw->fnstr, new_nr_segs, seg_count); 
     1425    gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu) %d@%llu\n" 
     1426                                  , __func__ 
     1427                                  , rw->fnstr 
     1428                                  , llu(pvfs2_inode->refn.handle) 
     1429                                  , (int)count, llu(*offset)); 
     1430    gossip_debug(GOSSIP_FILE_DEBUG, "%s/%s(%llu): new_nr_segs: %lu, seg_count: %lu\n" 
     1431                                  , __func__ 
     1432                                  , rw->fnstr 
     1433                                  , llu(pvfs2_inode->refn.handle) 
     1434                                  , new_nr_segs, seg_count); 
     1435 
    13781436#ifdef PVFS2_KERNEL_DEBUG 
    13791437    for (seg = 0; seg < new_nr_segs; seg++) 
     
    14141472        //{ 
    14151473            /* push the I/O directly through to storage */ 
    1416      ret = wait_for_direct_io(rw, ptr, seg_array[seg], each_count); 
    1417         //} 
     1474 
     1475        gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): size of each_count(%d)\n" 
     1476                                      ,__func__ 
     1477                                      ,rw->fnstr 
     1478                                      ,llu(pvfs2_inode->refn.handle) 
     1479                                      ,(int)each_count); 
     1480        gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): BEFORE wait_for_io: offset is %d\n" 
     1481                                      ,__func__ 
     1482                                      ,rw->fnstr 
     1483                                      ,llu(pvfs2_inode->refn.handle) 
     1484                                      ,(int)*offset); 
     1485 
     1486        ret = wait_for_direct_io(rw, ptr, seg_array[seg], each_count); 
     1487 
     1488        gossip_debug(GOSSIP_FILE_DEBUG,"%s%s(%llu): return from wait_for_io:%d\n" 
     1489                                      ,__func__ 
     1490                                      ,rw->fnstr 
     1491                                      ,llu(pvfs2_inode->refn.handle) 
     1492                                      ,(int)ret); 
     1493 
    14181494        if (ret < 0) 
    14191495        { 
    14201496            goto out; 
    14211497        } 
     1498 
    14221499        /* advance the iovec pointer */ 
    14231500        ptr += seg_array[seg]; 
     
    14271504        amt_complete = ret; 
    14281505 
     1506        gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): AFTER wait_for_io: offset is %d\n" 
     1507                                      ,__func__ 
     1508                                      ,rw->fnstr 
     1509                                      ,llu(pvfs2_inode->refn.handle) 
     1510                                      ,(int)*offset); 
     1511 
    14291512        /* if we got a short I/O operations, 
    14301513         * fall out and return what we got so far  
     
    14341517            break; 
    14351518        } 
    1436     } 
     1519    }/*end while*/ 
     1520 
    14371521    if (total_count > 0) 
    14381522    { 
     
    14591543        mark_inode_dirty_sync(inode); 
    14601544    } 
     1545 
     1546    gossip_debug(GOSSIP_FILE_DEBUG,"%s/%s(%llu): Value(%d) returned.\n" 
     1547                                      ,__func__ 
     1548                                      ,rw->fnstr 
     1549                                      ,llu(pvfs2_inode->refn.handle) 
     1550                                      ,(int)ret); 
     1551 
    14611552    return ret; 
    14621553} 
     
    15291620    g_pvfs2_stats.reads++; 
    15301621 
     1622#ifdef RESET_FILE_POS 
     1623    return do_readv_writev_wrapper(&rw); 
     1624#else 
    15311625    return do_readv_writev(&rw); 
     1626#endif 
    15321627} 
    15331628 
     
    15601655    rw.off.io.offset = offset; 
    15611656    g_pvfs2_stats.writes++; 
     1657 
     1658#ifdef RESET_FILE_POS 
     1659    return do_readv_writev_wrapper(&rw); 
     1660#else 
    15621661    return do_readv_writev(&rw); 
     1662#endif 
    15631663} 
    15641664 
     
    27012801    if (!rw->async) 
    27022802    { 
     2803 
     2804#ifdef RESET_FILE_POS 
     2805        error = do_readv_writev_wrapper(rw); 
     2806#else 
    27032807        error = do_readv_writev(rw); 
     2808#endif 
     2809 
    27042810        /* not sure this is the correct place or way to update ki_pos but it 
    27052811         * definitely needs to occur somehow. otherwise, a write following  
     
    35533659#endif 
    35543660 
     3661 
     3662#ifdef RESET_FILE_POS 
     3663/* This function wrapper imposes the rule that the user's 
     3664 * request was either entirely fulfilled or it wasn't.  If it wasn't, 
     3665 * then errno will be set appropriately, -1 will be returned as the 
     3666 * request's return value, and the file offset will be repositioned to 
     3667 * the beginning of the request.   If it was successfully completed, then 
     3668 * the amount written/read will be returned and the file offset will be 
     3669 * incremented the appropriate amount. 
     3670 */ 
     3671static ssize_t do_readv_writev_wrapper( struct rw_options *rw) 
     3672{ 
     3673    ssize_t ret; 
     3674     
     3675    gossip_err("Wrapper called.\n"); 
     3676 
     3677    /* Save the file's current offset before issuing this read/write 
     3678     * request. 
     3679     */ 
     3680    rw->off.io.offset_before_request = *(rw->off.io.offset); 
     3681 
     3682    /* If the return code from the request is negative, 
     3683     * restore the offset to it's original value. 
     3684     */ 
     3685    ret = do_readv_writev(rw); 
     3686    if (ret < 0) 
     3687    { 
     3688       *(rw->off.io.offset) = rw->off.io.offset_before_request; 
     3689    } 
     3690    return (ret); 
     3691} 
     3692#endif 
     3693 
     3694 
     3695 
    35553696/* 
    35563697 * Local variables: 
  • branches/stable/src/kernel/linux-2.6/pvfs2-kernel.h

    r9298 r9336  
    385385    /*  buffer and re-populate it.                                       */  
    386386    int uses_shared_memory; 
    387  
    388387 
    389388    pvfs2_upcall_t upcall;