Changeset 7861

Show
Ignore:
Timestamp:
07/07/09 18:06:08 (4 years ago)
Author:
sson
Message:

Fixed the bugs when there are more than 4 servers performing stripe alignment and allreduce operations.

Location:
branches/as-branch/src/server
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • branches/as-branch/src/server/allreduce.sm

    r7843 r7861  
    175175 
    176176        send_recv_op->u.send_recv.myRank = s_op->u.allreduce.myRank; 
    177         send_recv_op->u.send_recv.mask = 0x1; 
     177        send_recv_op->u.send_recv.mask = s_op->u.allreduce.mask; 
    178178 
    179179        ret = PINT_sm_push_frame(smcb, 0, send_recv_op); 
  • branches/as-branch/src/server/kmeans.sm

    r7853 r7861  
    302302                         s_op->u.kmeans.newClusters[i][j]); 
    303303 
    304     gossip_debug(GOSSIP_IO_DEBUG, "before Allreduce\n"); 
     304    gossip_debug(GOSSIP_IO_DEBUG, "before Allreduce: delta=%f\n", s_op->u.kmeans.delta); 
    305305    for (i=0; i<s_op->u.kmeans.numClusters; i++) 
    306306        for (j=0; j<s_op->u.kmeans.numCoords; j++) 
  • branches/as-branch/src/server/pipeline.sm

    r7853 r7861  
    312312{ 
    313313    struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT); 
    314     js_p->error_code = 0; 
    315314    PVFS_size file_req_offset = s_op->u.pipeline.file_req_offset; 
    316315    PINT_request_file_data fdata = s_op->u.pipeline.file_data; 
     
    319318    PVFS_size count; 
    320319    PVFS_offset strip_boundary; 
    321  
    322320    PVFS_offset loff = fdata.dist->methods->physical_to_logical_offset(fdata.dist->params, &fdata, s_op->u.pipeline.offsets[0]); 
     321    js_p->error_code = 0; 
    323322     
    324323    s_op->u.pipeline.loff = loff; 
     
    346345        count = (PVFS_size)(s_op->u.pipeline.buffer_used)/((*PVFS_FLOAT).ub); 
    347346        if(s_op->u.pipeline.op == 0x5800000f) { /* KMEANS */ 
    348             count = (PVFS_size)(s_op->u.pipeline.buffer_used)/((*PVFS_FLOAT).ub)/18; /* FIXME: 18 is numCoords */ 
    349             s_op->u.pipeline.unaligned_size = 0; /* FIXME */ 
     347            int cunit = ((*PVFS_FLOAT).ub)*18; 
     348            PVFS_size new_size; 
     349            float ctmp = ((float)(s_op->u.pipeline.buffer_used-file_req_offset))/((*PVFS_FLOAT).ub)/18; 
     350 
     351            gossip_debug(GOSSIP_IO_DEBUG, "ctmp=%f\n", ctmp); 
     352 
     353            /* last stripe? */ 
     354            if(s_op->u.pipeline.dfile_index == (s_op->u.pipeline.dfile_count-1)) 
     355                count = floor(ctmp); 
     356            else 
     357                count = ceil(ctmp); 
     358            new_size = count*cunit; 
     359            s_op->u.pipeline.new_buffer_used = new_size; 
     360            s_op->u.pipeline.buffer= realloc(s_op->u.pipeline.buffer, new_size); 
     361            s_op->u.pipeline.unaligned_size = new_size-s_op->u.pipeline.buffer_used; 
    350362            gossip_debug(GOSSIP_IO_DEBUG, "FLOAT: count=%lld\n", lld(count)); 
    351363        } 
    352364        gossip_debug(GOSSIP_IO_DEBUG, "strip_size=%ld, count=%lld\n",  
    353365                     dparam->strip_size, lld(count)); 
    354         //strip_boundary = ((int)(loff/(dparam->strip_size)))*(dparam->strip_size); 
    355         //s_op->u.pipeline.unaligned_size = loff-strip_boundary; 
     366#if 0 
     367        strip_boundary = ((int)(loff/(dparam->strip_size)))*(dparam->strip_size); 
     368        s_op->u.pipeline.unaligned_size = loff-strip_boundary; 
    356369        if(dparam->strip_size > (count*(*PVFS_FLOAT).ub)) { 
    357370            s_op->u.pipeline.unaligned_size = dparam->strip_size -  
     
    361374                    - (count*((*PVFS_FLOAT).ub)*18); 
    362375        } 
     376#endif 
    363377#if 0    
    364378        if (loff == strip_boundary && file_req_offset != 0) { 
     
    367381#endif 
    368382        if (s_op->u.pipeline.unaligned_size != 0 && count != 0) { 
    369             js_p->error_code = UNALIGNED; 
     383            /* if it is the last stripe, no need to bring data */ 
     384            /* FIXME: we assume no round-robin stripes for now,  
     385               but shound't it be more general? */ 
     386            if(s_op->u.pipeline.dfile_index != (s_op->u.pipeline.dfile_count-1)) 
     387                js_p->error_code = UNALIGNED; 
    370388            gossip_debug(GOSSIP_IO_DEBUG, "unaligned_size=%lld\n",  
    371389                         lld(s_op->u.pipeline.unaligned_size)); 
     
    405423    int next_server_index; 
    406424    PVFS_handle next_server_handle; 
     425    PINT_request_file_data fdata = s_op->u.pipeline.file_data; 
     426    PVFS_simple_stripe_params *dparam =  
     427        (PVFS_simple_stripe_params*)fdata.dist->params; 
    407428 
    408429    /* init msgpair */ 
     
    426447                                  PVFS_BYTE, &s_op->u.pipeline.file_req); 
    427448     
    428     s_op->u.pipeline.file_req_offset = (((int)(s_op->u.pipeline.loff/262144))+1)*262144; /* FIXME */ 
    429      
     449    /* strip_size from dparam */ 
     450    s_op->u.pipeline.file_req_offset = (((int)(s_op->u.pipeline.loff/dparam->strip_size))+1)*dparam->strip_size; 
     451 
    430452    regions = 1; 
    431453    gossip_debug(GOSSIP_IO_DEBUG, "s_op->u.pipeline.file_req_offset=%lld\n", lld(s_op->u.pipeline.file_req_offset)); 
     
    788810    kmeans_op->u.kmeans.threshold = 0.001; /* FIXME */ 
    789811    kmeans_op->u.kmeans.totalNumObjs = 17695; /* FIXME: should be obtained from allreduce(numObjs, SUM) */ 
    790     kmeans_op->u.kmeans.numObjs = s_op->u.pipeline.buffer_used/sizeof(float)/numCoords; 
     812    kmeans_op->u.kmeans.numObjs = s_op->u.pipeline.new_buffer_used/sizeof(float)/numCoords; 
    791813    gossip_debug(GOSSIP_IO_DEBUG, "numObjs=%d\n", kmeans_op->u.kmeans.numObjs); 
    792814 
  • branches/as-branch/src/server/pvfs2-server.h

    r7851 r7861  
    386386    char *buffer;  
    387387    PVFS_size buffer_size; 
    388     PVFS_size buffer_used;  
     388    PVFS_size buffer_used; 
     389    PVFS_size new_buffer_used; 
    389390    PVFS_size out_size; 
    390391    PINT_segpool_handle_t seg_handle;