Changeset 8304
- Timestamp:
- 04/19/10 16:39:13 (3 years ago)
- Location:
- trunk/src
- Files:
-
- 15 modified
-
common/quickhash/quickhash.h (modified) (1 diff)
-
io/bmi/bmi-method-support.c (modified) (1 diff)
-
io/bmi/bmi-method-support.h (modified) (5 diffs)
-
io/bmi/bmi.c (modified) (9 diffs)
-
io/bmi/bmi.h (modified) (6 diffs)
-
io/bmi/bmi_gm/bmi-gm.c (modified) (17 diffs)
-
io/bmi/bmi_ib/ib.c (modified) (10 diffs)
-
io/bmi/bmi_ib/ib.h (modified) (3 diffs)
-
io/bmi/bmi_mx/mx.c (modified) (19 diffs)
-
io/bmi/bmi_mx/mx.h (modified) (3 diffs)
-
io/bmi/bmi_portals/portals.c (modified) (14 diffs)
-
io/bmi/bmi_tcp/bmi-tcp-addressing.h (modified) (2 diffs)
-
io/bmi/bmi_tcp/bmi-tcp.c (modified) (32 diffs)
-
io/bmi/op-list.c (modified) (1 diff)
-
io/bmi/op-list.h (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/common/quickhash/quickhash.h
r7468 r8304 342 342 } 343 343 344 /* used for cases where we the key is already in good shape for hashing */ 345 static inline int quickhash_null32_hash(void *k, int table_size) 346 { 347 uint32_t *tmp = k; 348 return(int)(*tmp & (table_size - 1)); 349 } 350 344 351 #endif /* QUICKHASH_H */ 345 352 -
trunk/src/io/bmi/bmi-method-support.c
r6434 r8304 101 101 memset(my_method_addr, 0, (ssize + payload_size)); 102 102 my_method_addr->method_type = method_type; 103 my_method_addr->ref_count = 1; 103 104 104 105 my_method_addr->method_data = (char *) my_method_addr + ssize; -
trunk/src/io/bmi/bmi-method-support.h
r8015 r8304 44 44 { 45 45 int method_type; 46 int ref_count; 46 47 void *method_data; /* area to be used by specific methods */ 47 48 void *parent; /* pointer back to generic BMI address info */ 49 struct bmi_method_addr* primary; 50 struct bmi_method_addr* secondary; 48 51 }; 49 52 typedef struct bmi_method_addr *bmi_method_addr_p; … … 78 81 int (*unexpected_free) (void *); 79 82 80 int (*post_send) (bmi_op_id_t *,81 bmi_method_addr_p,82 const void *,83 bmi_size_t,84 enum bmi_buffer_type,85 bmi_msg_tag_t,86 void *,87 bmi_context_id,88 PVFS_hint hints);89 int (*post_sendunexpected) (bmi_op_id_t *,90 bmi_method_addr_p,91 const void *,92 bmi_size_t,93 enum bmi_buffer_type,94 bmi_msg_tag_t,95 void *,96 bmi_context_id,97 PVFS_hint hints);98 int (*post_recv) (bmi_op_id_t *,99 bmi_method_addr_p,100 void *,101 bmi_size_t,102 bmi_size_t *,103 enum bmi_buffer_type,104 bmi_msg_tag_t,105 void *,106 bmi_context_id,107 PVFS_hint hints);108 83 int (*test) (bmi_op_id_t, 109 84 int *, … … 133 108 int *, 134 109 struct bmi_method_unexpected_info *, 110 uint8_t, 135 111 int); 136 112 bmi_method_addr_p (*method_addr_lookup) (const char *); … … 166 142 enum bmi_buffer_type, 167 143 bmi_msg_tag_t, 144 uint8_t, 168 145 void *, 169 146 bmi_context_id, … … 190 167 void *user_ptr; /* user_ptr associated with this op */ 191 168 bmi_msg_tag_t msg_tag; /* message tag */ 169 uint8_t class; /* message class (if unexpected) */ 192 170 bmi_error_code_t error_code; /* final status of operation */ 193 171 bmi_size_t amt_complete; /* how much is completed */ -
trunk/src/io/bmi/bmi.c
r8211 r8304 666 666 bmi_hint hints) 667 667 { 668 ref_st_p tmp_ref = NULL; 669 int ret = -1; 670 671 gossip_debug(GOSSIP_BMI_DEBUG_OFFSETS, 672 "BMI_post_recv: addr: %ld, offset: 0x%lx, size: %ld, tag: %d\n", 673 (long)src, (long)buffer, (long)expected_size, (int)tag); 674 675 *id = 0; 676 677 gen_mutex_lock(&ref_mutex); 678 tmp_ref = ref_list_search_addr(cur_ref_list, src); 679 if (!tmp_ref) 680 { 681 gen_mutex_unlock(&ref_mutex); 682 return (bmi_errno_to_pvfs(-EPROTO)); 683 } 684 gen_mutex_unlock(&ref_mutex); 685 686 ret = tmp_ref->interface->post_recv( 687 id, tmp_ref->method_addr, buffer, expected_size, actual_size, 688 buffer_type, tag, user_ptr, context_id, (PVFS_hint)hints); 689 return (ret); 668 return(BMI_post_recv_list(id, src, &buffer, &expected_size, 1, 669 expected_size, actual_size, buffer_type, tag, user_ptr, context_id, 670 hints)); 690 671 } 691 672 … … 705 686 bmi_hint hints) 706 687 { 707 ref_st_p tmp_ref = NULL; 708 int ret = -1; 709 710 gossip_debug(GOSSIP_BMI_DEBUG_OFFSETS, 711 "BMI_post_send: addr: %ld, offset: 0x%lx, size: %ld, tag: %d\n", 712 (long)dest, (long)buffer, (long)size, (int)tag); 713 714 *id = 0; 715 716 gen_mutex_lock(&ref_mutex); 717 tmp_ref = ref_list_search_addr(cur_ref_list, dest); 718 if (!tmp_ref) 719 { 720 gen_mutex_unlock(&ref_mutex); 721 return (bmi_errno_to_pvfs(-EPROTO)); 722 } 723 gen_mutex_unlock(&ref_mutex); 724 725 ret = tmp_ref->interface->post_send( 726 id, tmp_ref->method_addr, buffer, size, buffer_type, tag, 727 user_ptr, context_id, (PVFS_hint)hints); 728 return (ret); 688 return(BMI_post_send_list(id, dest, &buffer, &size, 1, size, 689 buffer_type, tag, user_ptr, context_id, hints)); 729 690 } 730 691 … … 734 695 * \return 0 on success, -errno on failure. 735 696 */ 736 int BMI_post_sendunexpected (bmi_op_id_t * id,697 int BMI_post_sendunexpected_class(bmi_op_id_t * id, 737 698 BMI_addr_t dest, 738 699 const void *buffer, … … 740 701 enum bmi_buffer_type buffer_type, 741 702 bmi_msg_tag_t tag, 703 uint8_t class, 742 704 void *user_ptr, 743 705 bmi_context_id context_id, 744 706 bmi_hint hints) 745 707 { 746 ref_st_p tmp_ref = NULL; 747 int ret = -1; 748 749 gossip_debug(GOSSIP_BMI_DEBUG_OFFSETS, 750 "BMI_post_sendunexpected: addr: %ld, offset: 0x%lx, size: %ld, tag: %d\n", 751 (long)dest, (long)buffer, (long)size, (int)tag); 752 753 *id = 0; 754 755 gen_mutex_lock(&ref_mutex); 756 tmp_ref = ref_list_search_addr(cur_ref_list, dest); 757 if (!tmp_ref) 758 { 759 gen_mutex_unlock(&ref_mutex); 760 return (bmi_errno_to_pvfs(-EPROTO)); 761 } 762 gen_mutex_unlock(&ref_mutex); 763 764 ret = tmp_ref->interface->post_sendunexpected( 765 id, tmp_ref->method_addr, buffer, size, buffer_type, tag, 766 user_ptr, context_id, (PVFS_hint)hints); 767 return (ret); 708 return(BMI_post_sendunexpected_list_class(id, dest, &buffer, &size, 1, size, 709 buffer_type, tag, class, user_ptr, context_id, hints)); 768 710 } 769 711 … … 982 924 * \return 0 on success, -errno on failure. 983 925 */ 984 int BMI_testunexpected (int incount,926 int BMI_testunexpected_class(int incount, 985 927 int *outcount, 986 928 struct BMI_unexpected_info *info_array, 929 uint8_t class, 987 930 int max_idle_time_ms) 988 931 { … … 1016 959 ret = active_method_table[i]->testunexpected( 1017 960 (incount - position), &tmp_outcount, 1018 (&(sub_info[position])), max_idle_time_ms);961 (&(sub_info[position])), class, max_idle_time_ms); 1019 962 if (ret < 0) 1020 963 { … … 1841 1784 * -errno on failure. 1842 1785 */ 1843 int BMI_post_sendunexpected_list (bmi_op_id_t * id,1786 int BMI_post_sendunexpected_list_class(bmi_op_id_t * id, 1844 1787 BMI_addr_t dest, 1845 1788 const void *const *buffer_list, … … 1849 1792 enum bmi_buffer_type buffer_type, 1850 1793 bmi_msg_tag_t tag, 1794 uint8_t class, 1851 1795 void *user_ptr, 1852 1796 bmi_context_id context_id, … … 1887 1831 ret = tmp_ref->interface->post_sendunexpected_list( 1888 1832 id, tmp_ref->method_addr, buffer_list, size_list, 1889 list_count, total_size, buffer_type, tag, user_ptr,1833 list_count, total_size, buffer_type, tag, class, user_ptr, 1890 1834 context_id, (PVFS_hint)hints); 1891 1835 -
trunk/src/io/bmi/bmi.h
r7471 r8304 20 20 #ifndef __BMI_H 21 21 #define __BMI_H 22 23 #include <stdint.h> 22 24 23 25 #include "bmi-types.h" … … 53 55 bmi_hint hints); 54 56 55 int BMI_post_sendunexpected (bmi_op_id_t * id,57 int BMI_post_sendunexpected_class(bmi_op_id_t * id, 56 58 BMI_addr_t dest, 57 59 const void *buffer, … … 59 61 enum bmi_buffer_type buffer_type, 60 62 bmi_msg_tag_t tag, 63 uint8_t class, 61 64 void *user_ptr, 62 65 bmi_context_id context_id, 63 66 bmi_hint hints); 67 #define BMI_post_sendunexpected(__id, __dest, __buffer, __size, __buffer_type, __tag, __user_ptr, __context_id, __hints) \ 68 BMI_post_sendunexpected_class(__id, __dest, __buffer, __size, __buffer_type, __tag, 0, __user_ptr, __context_id, __hints) 64 69 65 70 int BMI_post_recv(bmi_op_id_t * id, … … 92 97 bmi_context_id context_id); 93 98 94 int BMI_testunexpected (int incount,99 int BMI_testunexpected_class(int incount, 95 100 int *outcount, 96 101 struct BMI_unexpected_info *info_array, 102 uint8_t class, 97 103 int max_idle_time_ms); 104 #define BMI_testunexpected(__incount, __outcount, __info_array, __max_idle_time_ms) \ 105 BMI_testunexpected_class(__incount, __outcount, __info_array, 0, __max_idle_time_ms) 98 106 99 107 int BMI_testcontext(int incount, … … 165 173 bmi_hint hints); 166 174 167 int BMI_post_sendunexpected_list (bmi_op_id_t * id,175 int BMI_post_sendunexpected_list_class(bmi_op_id_t * id, 168 176 BMI_addr_t dest, 169 177 const void *const *buffer_list, … … 174 182 enum bmi_buffer_type buffer_type, 175 183 bmi_msg_tag_t tag, 184 uint8_t class, 176 185 void *user_ptr, 177 186 bmi_context_id context_id, 178 187 bmi_hint hints); 188 #define BMI_post_sendunexpected_list(__id, __dest, __buffer_list, __size_list, __list_count, __total_size, __buffer_type, __tag, __user_ptr, __context_id, __hints) \ 189 BMI_post_sendunexpected_list_class(__id, __dest, __buffer_list, __size_list, __list_count, __total_size, __buffer_type, __tag, 0, __user_ptr, __context_id, __hints) 179 190 180 191 int BMI_cancel(bmi_op_id_t id, -
trunk/src/io/bmi/bmi_gm/bmi-gm.c
r8015 r8304 57 57 enum bmi_op_type send_recv); 58 58 int BMI_gm_unexpected_free(void *buffer); 59 int BMI_gm_post_send(bmi_op_id_t * id,60 bmi_method_addr_p dest,61 const void *buffer,62 bmi_size_t size,63 enum bmi_buffer_type buffer_type,64 bmi_msg_tag_t tag,65 void *user_ptr,66 bmi_context_id context_id,67 PVFS_hint hints);68 59 int BMI_gm_post_send_list(bmi_op_id_t * id, 69 60 bmi_method_addr_p dest, … … 85 76 enum bmi_buffer_type buffer_type, 86 77 bmi_msg_tag_t tag, 78 uint8_t class, 87 79 void *user_ptr, 88 80 bmi_context_id context_id, 89 81 PVFS_hint hints); 90 int BMI_gm_post_sendunexpected(bmi_op_id_t * id,91 bmi_method_addr_p dest,92 const void *buffer,93 bmi_size_t size,94 enum bmi_buffer_type buffer_type,95 bmi_msg_tag_t tag,96 void *user_ptr,97 bmi_context_id context_id,98 PVFS_hint hints);99 int BMI_gm_post_recv(bmi_op_id_t * id,100 bmi_method_addr_p src,101 void *buffer,102 bmi_size_t expected_size,103 bmi_size_t * actual_size,104 enum bmi_buffer_type buffer_type,105 bmi_msg_tag_t tag,106 void *user_ptr,107 bmi_context_id context_id,108 PVFS_hint hints);109 82 int BMI_gm_post_recv_list(bmi_op_id_t * id, 110 83 bmi_method_addr_p src, … … 146 119 int *outcount, 147 120 struct bmi_method_unexpected_info *info, 121 uint8_t class, 148 122 int max_idle_time_ms); 149 123 bmi_method_addr_p BMI_gm_method_addr_lookup(const char *id_string); … … 166 140 .memfree = BMI_gm_memfree, 167 141 .unexpected_free = BMI_gm_unexpected_free, 168 .post_send = BMI_gm_post_send,169 .post_sendunexpected = BMI_gm_post_sendunexpected,170 .post_recv = BMI_gm_post_recv,171 142 .test = BMI_gm_test, 172 143 .testsome = BMI_gm_testsome, … … 264 235 bmi_msg_tag_t msg_tag; /* message tag */ 265 236 int32_t actual_size; 237 uint8_t class; 266 238 }; 267 239 struct ctrl_put … … 386 358 bmi_msg_tag_t msg_tag, 387 359 bmi_method_addr_p map, 388 void *buffer); 360 void *buffer, 361 uint8_t class); 389 362 static int immed_recv_handler(bmi_size_t actual_size, 390 363 bmi_msg_tag_t msg_tag, … … 899 872 } 900 873 901 902 /* BMI_gm_post_send()903 *904 * Submits send operations.905 *906 * returns 0 on success that requires later poll, returns 1 on instant907 * completion, -errno on failure908 */909 int BMI_gm_post_send(bmi_op_id_t * id,910 bmi_method_addr_p dest,911 const void *buffer,912 bmi_size_t size,913 enum bmi_buffer_type buffer_type,914 bmi_msg_tag_t tag,915 void *user_ptr,916 bmi_context_id context_id,917 PVFS_hint hints)918 {919 int buffer_status = GM_BUF_USER_ALLOC;920 void *new_buffer = NULL;921 struct ctrl_msg *new_ctrl_msg = NULL;922 bmi_size_t buffer_size = 0;923 int ret = -1;924 925 gossip_ldebug(GOSSIP_BMI_DEBUG_GM, "BMI_gm_post_send called.\n");926 927 /* clear id immediately for safety */928 *id = 0;929 930 /* make sure it's not too big */931 if (size > GM_MODE_REND_LIMIT)932 {933 return (bmi_gm_errno_to_pvfs(-EMSGSIZE));934 }935 936 gen_mutex_lock(&interface_mutex);937 if (buffer_type != BMI_PRE_ALLOC)938 {939 if (size <= GM_IMMED_LENGTH)940 {941 /* pad enough room for a ctrl structure */942 buffer_size = sizeof(struct ctrl_msg) + size;943 944 /* create a new buffer and copy */945 new_buffer = (void *) gm_dma_malloc(local_port, (unsigned946 long) buffer_size);947 if (!new_buffer)948 {949 gen_mutex_unlock(&interface_mutex);950 gossip_lerr("Error: gm_dma_malloc failure.\n");951 return (bmi_gm_errno_to_pvfs(-ENOMEM));952 }953 memcpy(new_buffer, buffer, size);954 /* this seems little shady, but we are going to go ahead and forget955 * about the buffer that the user gave us. It serves no purpose956 * here anymore.957 */958 buffer = new_buffer;959 buffer_status = GM_BUF_METH_ALLOC;960 }961 else962 {963 buffer_status = GM_BUF_METH_REG;964 }965 }966 967 if (size <= GM_IMMED_LENGTH)968 {969 /* Immediate mode stuff */970 new_ctrl_msg = (struct ctrl_msg *) (buffer + size);971 new_ctrl_msg->ctrl_type = CTRL_IMMED_TYPE;972 new_ctrl_msg->magic_nr = BMI_MAGIC_NR;973 new_ctrl_msg->u.immed.actual_size = size;974 new_ctrl_msg->u.immed.msg_tag = tag;975 ret = gm_post_send_build_op(id, dest, buffer, size,976 tag,977 GM_MODE_IMMED,978 buffer_status, user_ptr, context_id);979 gen_mutex_unlock(&interface_mutex);980 return(ret);981 }982 else983 {984 /* 3 way rendezvous mode */985 ret = gm_post_send_build_op(id, dest, buffer, size,986 tag, GM_MODE_REND,987 buffer_status, user_ptr, context_id);988 gen_mutex_unlock(&interface_mutex);989 return(ret);990 }991 }992 993 874 /* BMI_gm_post_send_list() 994 875 * … … 1020 901 1021 902 gossip_ldebug(GOSSIP_BMI_DEBUG_GM, "BMI_gm_post_send_list called.\n"); 1022 1023 /* if there is only one buffer in the list, pass it on to the1024 * normal post_send() function because it is more efficient in1025 * the single buffer case1026 */1027 if(list_count == 1)1028 {1029 return(BMI_gm_post_send(id, dest, buffer_list[0], size_list[0],1030 buffer_type, tag, user_ptr, context_id, hints));1031 }1032 903 1033 904 /* TODO: think about this some. For now this is going to be … … 1111 982 enum bmi_buffer_type buffer_type, 1112 983 bmi_msg_tag_t tag, 984 uint8_t class, 1113 985 void *user_ptr, 1114 986 bmi_context_id context_id, … … 1126 998 "BMI_gm_post_sendunexpected_list called.\n"); 1127 999 1128 /* if there is only one buffer in the list, pass it on to the1129 * normal post_sendunexpected() function because it is more1130 * efficient in the single buffer case1131 */1132 if(list_count == 1)1133 {1134 return(BMI_gm_post_sendunexpected(id, dest, buffer_list[0],1135 size_list[0], buffer_type, tag, user_ptr, context_id, hints));1136 }1137 1138 1000 /* TODO: think about this some. For now this is going to be 1139 1001 * lame because we aren't going to take advantage of … … 1179 1041 new_ctrl_msg->u.immed.actual_size = total_size; 1180 1042 new_ctrl_msg->u.immed.msg_tag = tag; 1043 new_ctrl_msg->u.immed.class = class; 1181 1044 ret = gm_post_send_build_op(id, dest, new_buffer, total_size, 1182 1045 tag, … … 1185 1048 gen_mutex_unlock(&interface_mutex); 1186 1049 return(ret); 1187 }1188 1189 1190 /* BMI_gm_post_sendunexpected()1191 *1192 * Submits unexpected send operations.1193 *1194 * returns 0 on success that requires later poll, returns 1 on instant1195 * completion, -errno on failure1196 */1197 int BMI_gm_post_sendunexpected(bmi_op_id_t * id,1198 bmi_method_addr_p dest,1199 const void *buffer,1200 bmi_size_t size,1201 enum bmi_buffer_type buffer_type,1202 bmi_msg_tag_t tag,1203 void *user_ptr,1204 bmi_context_id context_id,1205 PVFS_hint hints)1206 {1207 int buffer_status = GM_BUF_USER_ALLOC;1208 void *new_buffer = NULL;1209 struct ctrl_msg *new_ctrl_msg = NULL;1210 bmi_size_t buffer_size = 0;1211 int ret;1212 1213 gossip_ldebug(GOSSIP_BMI_DEBUG_GM, "BMI_gm_post_sendunexpected called.\n");1214 1215 /* clear id immediately for safety */1216 *id = 0;1217 1218 if (size > GM_MODE_UNEXP_LIMIT)1219 {1220 return (bmi_gm_errno_to_pvfs(-EMSGSIZE));1221 }1222 1223 gen_mutex_lock(&interface_mutex);1224 if (buffer_type != BMI_PRE_ALLOC)1225 {1226 /* pad enough room for a ctrl structure */1227 buffer_size = sizeof(struct ctrl_msg) + size;1228 1229 /* create a new buffer and copy */1230 new_buffer = (void *) gm_dma_malloc(local_port, (unsigned1231 long) buffer_size);1232 if (!new_buffer)1233 {1234 gossip_lerr("Error: gm_dma_malloc failure.\n");1235 gen_mutex_unlock(&interface_mutex);1236 return (bmi_gm_errno_to_pvfs(-ENOMEM));1237 }1238 memcpy(new_buffer, buffer, size);1239 buffer_status = GM_BUF_METH_ALLOC;1240 /* this seems little shady, but we are going to go ahead and forget1241 * about the buffer that the user gave us. It serves no purpose1242 * here anymore.1243 */1244 buffer = new_buffer;1245 }1246 1247 /* Immediate mode stuff */1248 new_ctrl_msg = (struct ctrl_msg *) (buffer + size);1249 new_ctrl_msg->ctrl_type = CTRL_UNEXP_TYPE;1250 new_ctrl_msg->magic_nr = BMI_MAGIC_NR;1251 new_ctrl_msg->u.immed.actual_size = size;1252 new_ctrl_msg->u.immed.msg_tag = tag;1253 1254 ret = gm_post_send_build_op(id, dest, buffer, size,1255 tag, GM_MODE_UNEXP,1256 buffer_status, user_ptr, context_id);1257 gen_mutex_unlock(&interface_mutex);1258 return(ret);1259 }1260 1261 1262 1263 /* BMI_gm_post_recv()1264 *1265 * Submits recv operations.1266 *1267 * returns 0 on success that requires later poll, returns 1 on instant1268 * completion, -errno on failure1269 */1270 int BMI_gm_post_recv(bmi_op_id_t * id,1271 bmi_method_addr_p src,1272 void *buffer,1273 bmi_size_t expected_size,1274 bmi_size_t * actual_size,1275 enum bmi_buffer_type buffer_type,1276 bmi_msg_tag_t tag,1277 void *user_ptr,1278 bmi_context_id context_id,1279 PVFS_hint hints)1280 {1281 method_op_p query_op = NULL;1282 method_op_p new_method_op = NULL;1283 struct op_list_search_key key;1284 struct gm_op *gm_op_data = NULL;1285 struct gm_addr *gm_addr_data = NULL;1286 int ret = -1;1287 int buffer_status = GM_BUF_USER_ALLOC;1288 1289 gossip_ldebug(GOSSIP_BMI_DEBUG_GM, "BMI_gm_post_recv called.\n");1290 1291 /* what happens here ?1292 * see if the operation is already in progress (IND_NEED_RECV_POST)1293 * - if so, match it and poke it to continue1294 * - if not, create an op and queue it up in IND_NEED_CTRL_MATCH1295 */1296 1297 /* clear id immediately for safety */1298 *id = 0;1299 1300 /* make sure it's not too big */1301 if (expected_size > GM_MODE_REND_LIMIT)1302 {1303 return (bmi_gm_errno_to_pvfs(-EMSGSIZE));1304 }1305 1306 /* set flag to indicate if we need to pinn this buffer internally */1307 if(expected_size > GM_IMMED_LENGTH && buffer_type != BMI_PRE_ALLOC)1308 buffer_status = GM_BUF_METH_REG;1309 1310 /* push work first; use this as an opportunity to make sure that the1311 * receive keeps buffers moving as quickly as possible1312 */1313 gen_mutex_lock(&interface_mutex);1314 ret = gm_do_work(0);1315 if (ret < 0)1316 {1317 gen_mutex_unlock(&interface_mutex);1318 return (ret);1319 }1320 1321 /* see if this operation has already begun... */1322 memset(&key, 0, sizeof(struct op_list_search_key));1323 key.method_addr = src;1324 key.method_addr_yes = 1;1325 key.msg_tag = tag;1326 key.msg_tag_yes = 1;1327 1328 query_op = op_list_search(op_list_array[IND_NEED_RECV_POST], &key);1329 if (query_op)1330 {1331 gm_addr_data = query_op->addr->method_data;1332 *id = query_op->op_id;1333 query_op->context_id = context_id;1334 query_op->user_ptr = user_ptr;1335 gm_op_data = query_op->method_data;1336 gm_op_data->buffer_status = buffer_status;1337 1338 if(query_op->actual_size > expected_size)1339 {1340 gossip_lerr("Error: message ordering violation;\n");1341 gossip_lerr("Error: message too large for next buffer.\n");1342 gen_mutex_unlock(&interface_mutex);1343 return(bmi_gm_errno_to_pvfs(-EPROTO));1344 }1345 1346 /* we found the operation in progress. */1347 if (query_op->mode == GM_MODE_REND)1348 {1349 /* post has occurred */1350 op_list_remove(query_op);1351 query_op->buffer = buffer;1352 1353 /* now we need a token to send a ctrl ack */1354 if (gm_alloc_send_token(local_port, GM_HIGH_PRIORITY))1355 {1356 #ifdef ENABLE_GM_BUFPOOL1357 if (!io_buffers_exhausted())1358 {1359 #endif /* ENABLE_GM_BUFPOOL */1360 prepare_for_recv(query_op);1361 ret = 0;1362 #ifdef ENABLE_GM_BUFPOOL1363 }1364 else1365 {1366 gm_free_send_token(local_port, GM_HIGH_PRIORITY);1367 op_list_add(op_list_array[IND_NEED_SEND_TOK_HI_CTRLACK],1368 query_op);1369 ret = 0;1370 }1371 #endif /* ENABLE_GM_BUFPOOL */1372 }1373 else1374 {1375 /* we don't have enough tokens */1376 op_list_add(op_list_array[IND_NEED_SEND_TOK_HI_CTRLACK],1377 query_op);1378 ret = 0;1379 }1380 }1381 else if (query_op->mode == GM_MODE_IMMED)1382 {1383 /* all is done except memory copy- complete instantly */1384 op_list_remove(query_op);1385 gm_op_data = query_op->method_data;1386 memcpy(buffer, query_op->buffer, query_op->actual_size);1387 *actual_size = query_op->actual_size;1388 free(query_op->buffer);1389 *id = 0;1390 dealloc_gm_method_op(query_op);1391 ret = 1;1392 }1393 else1394 {1395 /* we don't have any other modes implemented yet */1396 ret = bmi_gm_errno_to_pvfs(-ENOSYS);1397 }1398 }1399 else1400 {1401 /* we must create the operation and queue it up */1402 new_method_op = alloc_gm_method_op();1403 if (!new_method_op)1404 {1405 gen_mutex_unlock(&interface_mutex);1406 return (bmi_gm_errno_to_pvfs(-ENOMEM));1407 }1408 *id = new_method_op->op_id;1409 new_method_op->user_ptr = user_ptr;1410 new_method_op->send_recv = BMI_RECV;1411 new_method_op->addr = src;1412 new_method_op->buffer = buffer;1413 new_method_op->expected_size = expected_size;1414 new_method_op->actual_size = 0;1415 new_method_op->msg_tag = tag;1416 new_method_op->context_id = context_id;1417 /* TODO: make sure this is ok */1418 new_method_op->mode = 0;1419 gm_op_data = new_method_op->method_data;1420 gm_op_data->buffer_status = buffer_status;1421 1422 /* just for safety; the user should not use this value in this case */1423 *actual_size = 0;1424 1425 op_list_add(op_list_array[IND_NEED_CTRL_MATCH], new_method_op);1426 ret = 0;1427 }1428 1429 gen_mutex_unlock(&interface_mutex);1430 return (ret);1431 1050 } 1432 1051 … … 1464 1083 1465 1084 gossip_ldebug(GOSSIP_BMI_DEBUG_GM, "BMI_gm_post_recv_list called.\n"); 1466 1467 /* if there is only one buffer in the list, pass it on to the1468 * normal post_recv() function because it is more efficient in1469 * the single buffer case1470 */1471 if(list_count == 1)1472 {1473 return(BMI_gm_post_recv(id, src, buffer_list[0], size_list[0],1474 total_actual_size, buffer_type, tag, user_ptr, context_id, hints));1475 }1476 1085 1477 1086 /* what happens here ? … … 1946 1555 int *outcount, 1947 1556 struct bmi_method_unexpected_info *info, 1557 uint8_t class, 1948 1558 int max_idle_time_ms) 1949 1559 { 1950 1560 int ret = -1; 1951 1561 method_op_p query_op = NULL; 1562 struct op_list_search_key key; 1563 1564 memset(&key, 0, sizeof(struct op_list_search_key)); 1565 key.class = class; 1566 key.class_yes = 1; 1952 1567 1953 1568 *outcount = 0; 1954 1569 1955 1570 gen_mutex_lock(&interface_mutex); 1571 1572 if(op_list_empty(op_list_array[IND_COMPLETE_RECV_UNEXP])) 1573 { 1574 /* nothing ready yet, do some work */ 1575 ret = gm_do_work(max_idle_time_ms*1000); 1576 if (ret < 0) 1577 { 1578 gen_mutex_unlock(&interface_mutex); 1579 return (ret); 1580 } 1581 } 1956 1582 1957 1583 while ((*outcount < incount) && 1958 1584 (query_op = 1959 op_list_shownext(op_list_array[IND_COMPLETE_RECV_UNEXP]))) 1960 { 1961 info[*outcount].error_code = query_op->error_code; 1962 info[*outcount].addr = query_op->addr; 1963 info[*outcount].buffer = query_op->buffer; 1964 info[*outcount].size = query_op->actual_size; 1965 info[*outcount].tag = query_op->msg_tag; 1966 op_list_remove(query_op); 1967 dealloc_gm_method_op(query_op); 1968 (*outcount)++; 1969 } 1970 if(*outcount) 1971 { 1972 gen_mutex_unlock(&interface_mutex); 1973 return(0); 1974 } 1975 1976 /* do some ``real work'' here */ 1977 ret = gm_do_work(max_idle_time_ms*1000); 1978 if (ret < 0) 1979 { 1980 gen_mutex_unlock(&interface_mutex); 1981 return (ret); 1982 } 1983 1984 while ((*outcount < incount) && 1985 (query_op = 1986 op_list_shownext(op_list_array[IND_COMPLETE_RECV_UNEXP]))) 1585 op_list_search(op_list_array[IND_COMPLETE_RECV_UNEXP], &key))) 1987 1586 { 1988 1587 info[*outcount].error_code = query_op->error_code; … … 2905 2504 bmi_msg_tag_t msg_tag, 2906 2505 bmi_method_addr_p map, 2907 void *buffer) 2506 void *buffer, 2507 uint8_t class) 2908 2508 { 2909 2509 method_op_p new_method_op = NULL; … … 2924 2524 new_method_op->mode = GM_MODE_UNEXP; 2925 2525 new_method_op->buffer = buffer; 2526 new_method_op->class = class; 2926 2527 gm_op_data = new_method_op->method_data; 2927 2528 … … 3193 2794 ret = 3194 2795 immed_unexp_recv_handler(ctrl_copy.u.immed.actual_size, 3195 ctrl_copy.u.immed.msg_tag, map, tmp_buffer); 2796 ctrl_copy.u.immed.msg_tag, map, tmp_buffer, 2797 ctrl_copy.u.immed.class); 3196 2798 break; 3197 2799 default: -
trunk/src/io/bmi/bmi_ib/ib.c
r8086 r8304 333 333 ? MSG_EAGER_SENDUNEXPECTED : MSG_EAGER_SEND); 334 334 mh_eager.bmi_tag = sq->bmi_tag; 335 mh_eager.class = 0; 336 if(sq->is_unexpected) 337 mh_eager.class += sq->class; 335 338 encode_msg_header_eager_t(&ptr, &mh_eager); 336 339 … … 809 812 int numbufs, const void *const *buffers, const bmi_size_t *sizes, 810 813 bmi_size_t total_size, bmi_msg_tag_t tag, void *user_ptr, 811 bmi_context_id context_id, int is_unexpected )814 bmi_context_id context_id, int is_unexpected, uint8_t class) 812 815 { 813 816 struct ib_work *sq; … … 827 830 sq->type = BMI_SEND; 828 831 sq->state.send = SQ_WAITING_BUFFER; 832 sq->class = class; 829 833 830 834 debug(2, "%s: sq %p len %lld peer %s", __func__, sq, (long long) total_size, … … 893 897 894 898 static int 895 BMI_ib_post_send(bmi_op_id_t *id, struct bmi_method_addr *remote_map,896 const void *buffer, bmi_size_t total_size,897 enum bmi_buffer_type buffer_flag __unused,898 bmi_msg_tag_t tag, void *user_ptr, bmi_context_id899 context_id, PVFS_hint hints __unused)900 {901 return post_send(id, remote_map, 0, &buffer, &total_size,902 total_size, tag, user_ptr, context_id, 0);903 }904 905 static int906 899 BMI_ib_post_send_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, 907 900 const void *const *buffers, const bmi_size_t *sizes, int list_count, … … 911 904 { 912 905 return post_send(id, remote_map, list_count, buffers, sizes, 913 total_size, tag, user_ptr, context_id, 0); 914 } 915 916 static int 917 BMI_ib_post_sendunexpected(bmi_op_id_t *id, struct bmi_method_addr *remote_map, 918 const void *buffer, bmi_size_t total_size, 919 enum bmi_buffer_type buffer_flag __unused, 920 bmi_msg_tag_t tag, void *user_ptr, 921 bmi_context_id context_id, PVFS_hint hints 922 __unused) 923 { 924 return post_send(id, remote_map, 0, &buffer, &total_size, 925 total_size, tag, user_ptr, context_id, 1); 906 total_size, tag, user_ptr, context_id, 0, 0); 926 907 } 927 908 … … 932 913 bmi_size_t total_size, 933 914 enum bmi_buffer_type buffer_flag __unused, 934 bmi_msg_tag_t tag, void *user_ptr, 935 bmi_context_id context_id, PVFS_hint hints 936 __unused) 915 bmi_msg_tag_t tag, 916 uint8_t class, 917 void *user_ptr, 918 bmi_context_id context_id, 919 PVFS_hint hints __unused) 937 920 { 938 921 return post_send(id, remote_map, list_count, buffers, sizes, 939 total_size, tag, user_ptr, context_id, 1 );922 total_size, tag, user_ptr, context_id, 1, class); 940 923 } 941 924 … … 1065 1048 gen_mutex_unlock(&interface_mutex); 1066 1049 return ret; 1067 }1068 1069 static int1070 BMI_ib_post_recv(bmi_op_id_t *id, struct bmi_method_addr *remote_map,1071 void *buffer, bmi_size_t expected_len, bmi_size_t *actual_len __unused,1072 enum bmi_buffer_type buffer_flag __unused, bmi_msg_tag_t tag, void *user_ptr,1073 bmi_context_id context_id, PVFS_hint hints __unused)1074 {1075 return post_recv(id, remote_map, 0, &buffer, &expected_len,1076 expected_len, tag, user_ptr, context_id);1077 1050 } 1078 1051 … … 1372 1345 static int 1373 1346 BMI_ib_testunexpected(int incount __unused, int *outcount, 1374 struct bmi_method_unexpected_info *ui, int max_idle_time)1347 struct bmi_method_unexpected_info *ui, uint8_t class, int max_idle_time) 1375 1348 { 1376 1349 struct qlist_head *l; … … 1392 1365 1393 1366 decode_msg_header_eager_t(&ptr, &mh_eager); 1367 if(mh_eager.class != class) 1368 continue; 1394 1369 1395 1370 debug(2, "%s: found waiting testunexpected", __func__); … … 2122 2097 .memfree = BMI_ib_memfree, 2123 2098 .unexpected_free = BMI_ib_unexpected_free, 2124 .post_send = BMI_ib_post_send,2125 .post_sendunexpected = BMI_ib_post_sendunexpected,2126 .post_recv = BMI_ib_post_recv,2127 2099 .test = BMI_ib_test, 2128 2100 .testsome = BMI_ib_testsome, -
trunk/src/io/bmi/bmi_ib/ib.h
r7665 r8304 243 243 u_int64_t rts_mop_id; /* recv: return tag to give to rts sender */ 244 244 bmi_size_t actual_len; /* recv: could be shorter than posted */ 245 uint8_t class; /* only for unexpected messages */ 245 246 }; 246 247 … … 265 266 msg_header_common_t c; 266 267 bmi_msg_tag_t bmi_tag; 267 u _int32_t __pad;268 uint32_t class; 268 269 } msg_header_eager_t; 269 270 endecode_fields_4(msg_header_eager_t, … … 271 272 uint32_t, c.credit, 272 273 int32_t, bmi_tag, 273 uint32_t, __pad);274 uint32_t, class); 274 275 275 276 /* -
trunk/src/io/bmi/bmi_mx/mx.c
r8184 r8304 1411 1411 1412 1412 static void 1413 bmx_parse_match(uint64_t match, uint8_t *type, uint32_t *id, uint32_t *tag) 1413 bmx_parse_match(uint64_t match, uint8_t *type, uint32_t *id, uint32_t *tag, 1414 uint8_t* class) 1414 1415 { 1415 1416 *type = (uint8_t) (match >> BMX_MSG_SHIFT); 1417 *class = (uint8_t) (match >> BMX_CLASS_SHIFT); 1416 1418 *id = (uint32_t) ((match >> BMX_ID_SHIFT) & BMX_MAX_PEER_ID); /* 20 bits */ 1417 1419 *tag = (uint32_t) (match & BMX_MAX_TAG); /* 32 bits */ … … 1425 1427 uint64_t type = (uint64_t) ctx->mxc_msg_type; 1426 1428 uint64_t id = 0ULL; 1429 uint64_t class = 0ULL; 1427 1430 uint64_t tag = (uint64_t) ((uint32_t) ctx->mxc_tag); 1428 1431 … … 1450 1453 } 1451 1454 1452 ctx->mxc_match = (type << BMX_MSG_SHIFT) | (id << BMX_ID_SHIFT) | tag; 1455 class += ctx->mxc_class; 1456 ctx->mxc_match = (type << BMX_MSG_SHIFT) | (id << BMX_ID_SHIFT) | 1457 tag | (class << BMX_CLASS_SHIFT); 1453 1458 1454 1459 return; … … 1563 1568 const bmi_size_t *sizes, bmi_size_t total_size, 1564 1569 bmi_msg_tag_t tag, void *user_ptr, 1565 bmi_context_id context_id, int is_unexpected,1566 PVFS_hint hints)1570 bmi_context_id context_id, uint8_t class, 1571 int is_unexpected, PVFS_hint hints) 1567 1572 { 1568 1573 struct bmx_ctx *tx = NULL; … … 1643 1648 tx->mxc_tag = tag; 1644 1649 tx->mxc_peer = peer; 1650 tx->mxc_class = class; 1645 1651 if (!is_unexpected) { 1646 1652 tx->mxc_msg_type = BMX_MSG_EXPECTED; … … 1683 1689 1684 1690 static int 1685 BMI_mx_post_send(bmi_op_id_t *id, struct bmi_method_addr *remote_map,1686 const void *buffer, bmi_size_t size,1687 enum bmi_buffer_type buffer_flag __unused,1688 bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id,1689 PVFS_hint hints)1690 {1691 int ret = 0;1692 BMX_ENTER;1693 1694 ret = bmx_post_send_common(id, remote_map, 1, &buffer, &size, size,1695 tag, user_ptr, context_id, 0, hints);1696 1697 BMX_EXIT;1698 1699 return ret;1700 }1701 1702 static int1703 1691 BMI_mx_post_send_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, 1704 1692 const void *const *buffers, const bmi_size_t *sizes, int list_count, … … 1712 1700 1713 1701 ret = bmx_post_send_common(id, remote_map, list_count, buffers, sizes, 1714 total_size, tag, user_ptr, context_id, 0, 1715 hints); 1716 1717 BMX_EXIT; 1718 1719 return ret; 1720 } 1721 1722 static int 1723 BMI_mx_post_sendunexpected(bmi_op_id_t *id, struct bmi_method_addr *remote_map, 1724 const void *buffer, bmi_size_t size, 1725 enum bmi_buffer_type buffer_flag __unused, 1726 bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id, 1727 PVFS_hint hints) 1728 { 1729 int ret = 0; 1730 1731 BMX_ENTER; 1732 1733 ret = bmx_post_send_common(id, remote_map, 1, &buffer, &size, size, 1734 tag, user_ptr, context_id, 1, hints); 1702 total_size, tag, user_ptr, context_id, 1703 0, 0, hints); 1735 1704 1736 1705 BMX_EXIT; … … 1743 1712 const void *const *buffers, const bmi_size_t *sizes, int list_count, 1744 1713 bmi_size_t total_size, enum bmi_buffer_type buffer_flag __unused, 1745 bmi_msg_tag_t tag, void *user_ptr, bmi_context_id context_id,1714 bmi_msg_tag_t tag, uint8_t class, void *user_ptr, bmi_context_id context_id, 1746 1715 PVFS_hint hints) 1747 1716 { … … 1751 1720 1752 1721 return bmx_post_send_common(id, remote_map, list_count, buffers, sizes, 1753 total_size, tag, user_ptr, context_id, 1,1754 hints);1722 total_size, tag, user_ptr, context_id, 1723 class, 1, hints); 1755 1724 1756 1725 BMX_EXIT; … … 1906 1875 1907 1876 static int 1908 BMI_mx_post_recv(bmi_op_id_t *id, struct bmi_method_addr *remote_map,1909 void *buffer, bmi_size_t expected_len, bmi_size_t *actual_len __unused,1910 enum bmi_buffer_type buffer_flag __unused, bmi_msg_tag_t tag, void *user_ptr,1911 bmi_context_id context_id,1912 PVFS_hint hints)1913 {1914 int ret = 0;1915 1916 BMX_ENTER;1917 1918 ret = bmx_post_recv_common(id, remote_map, 1, &buffer, &expected_len,1919 expected_len, tag, user_ptr, context_id,1920 hints);1921 1922 BMX_EXIT;1923 1924 return ret;1925 }1926 1927 static int1928 1877 BMI_mx_post_recv_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, 1929 1878 void *const *buffers, const bmi_size_t *sizes, int list_count, … … 2012 1961 void *peerp = (void *) &peer; 2013 1962 mx_return_t mxret = MX_SUCCESS; 1963 uint8_t class = 0; 2014 1964 2015 1965 BMX_ENTER; 2016 1966 2017 1967 if (id == 0 && tag == 0 && type == 0) { 2018 bmx_parse_match(match, &type, &id, &tag );1968 bmx_parse_match(match, &type, &id, &tag, &class); 2019 1969 } 2020 1970 … … 2077 2027 struct bmx_ctx *rx = NULL; 2078 2028 uint8_t type = 0; 2029 uint8_t class = 0; 2079 2030 uint32_t id = 0; 2080 2031 uint32_t tag = 0; … … 2083 2034 mx_return_t mxret = MX_SUCCESS; 2084 2035 2085 bmx_parse_match(match_value, &type, &id, &tag );2036 bmx_parse_match(match_value, &type, &id, &tag, &class); 2086 2037 2087 2038 switch (type) { … … 2309 2260 if (result) { 2310 2261 uint8_t type = 0; 2262 uint8_t class = 0; 2311 2263 uint32_t id = 0; 2312 2264 uint32_t sid = 0; … … 2337 2289 continue; 2338 2290 } 2339 bmx_parse_match(rx->mxc_match, &type, &id, &version); 2291 bmx_parse_match(rx->mxc_match, &type, &id, &version, 2292 &class); 2340 2293 if (version != BMX_VERSION) { 2341 2294 /* TODO send error conn_ack */ … … 2801 2754 static int 2802 2755 BMI_mx_testunexpected(int incount __unused, int *outcount, 2803 struct bmi_method_unexpected_info *ui, int max_idle_time __unused)2756 struct bmi_method_unexpected_info *ui, uint8_t class, int max_idle_time __unused) 2804 2757 { 2805 2758 uint32_t result = 0; … … 2812 2765 struct bmx_peer *peer = NULL; 2813 2766 int again = 1; 2767 uint64_t class_match = 0; 2814 2768 2815 2769 if (count++ % 1000 == 0) { … … 2821 2775 2822 2776 bmx_get_completion_token(); 2777 2778 /* must match the correct class as well */ 2779 class_match += class; 2780 match |= (class_match << BMX_CLASS_SHIFT); 2823 2781 2824 2782 /* if the unexpected handler cannot get a rx, it does not post a receive. … … 3160 3118 .memfree = BMI_mx_memfree, 3161 3119 .unexpected_free = BMI_mx_unexpected_free, 3162 .post_send = BMI_mx_post_send,3163 .post_sendunexpected = BMI_mx_post_sendunexpected,3164 .post_recv = BMI_mx_post_recv,3165 3120 .test = BMI_mx_test, 3166 3121 .testsome = 0, -
trunk/src/io/bmi/bmi_mx/mx.h
r7897 r8304 104 104 * Bits Description 105 105 * 60-63 Msg type 106 * 52-59 Reserved106 * 52-59 Msg class 107 107 * 32-51 Peer id (of the sender, assigned by receiver) 108 108 * 0-31 bmi_msg_tag_t … … 118 118 119 119 #define BMX_MSG_SHIFT 60 120 #define BMX_CLASS_SHIFT 52 120 121 #define BMX_ID_SHIFT 32 121 122 #define BMX_MASK_ALL (~0ULL) 122 123 #define BMX_MASK_MSG (0xFULL << BMX_MSG_SHIFT) 124 #define BMX_MASK_MSG_AND_CLASS (0xFFFULL << BMX_CLASS_SHIFT) 123 125 124 126 #define BMX_MAX_PEER_ID ((1<<20) - 1) /* 20 bits - actually 1,048,574 peers … … 268 270 struct bmx_peer *mxc_peer; /* owning peer */ 269 271 bmi_msg_tag_t mxc_tag; /* BMI tag (int32_t) */ 272 uint8_t mxc_class; /* BMI unexpected msg class */ 270 273 uint64_t mxc_match; /* match info */ 271 274 -
trunk/src/io/bmi/bmi_portals/portals.c
r8015 r8304 32 32 #include <src/common/misc/pvfs2-internal.h> /* lld */ 33 33 #include <src/common/id-generator/id-generator.h> 34 #include "pint-hint.h" 34 35 35 36 #ifdef HAVE_VALGRIND_H … … 131 132 * to us for the data. That get will use the second set of match bits. 132 133 * 133 * The rest of the 30top bits are used to encode a sequence number per134 * The the next 22 top bits are used to encode a sequence number per 134 135 * peer. As BMI can post multiple sends with the same tag, we have to 135 136 * be careful that if send #2 for a given tag goes to the zero_md, that 136 137 * when he does the get, he grabs from buffer #2, not buffer #1 because 137 138 * the sender was too slow in unlinking it. 139 * 140 * The next 8 bits are used for the class value on unexpected messages. 138 141 */ 139 142 static const uint64_t match_bits_unexpected = 1ULL << 63; /* 8... */ 140 143 static const uint64_t match_bits_long_send = 1ULL << 62; /* 4... */ 141 static const uint32_t match_bits_seqno_max = 1UL << 30;144 static const uint32_t match_bits_seqno_max = 1UL << 22; 142 145 static const int match_bits_seqno_shift = 32; 146 static const int match_bits_class_shift = 54; 143 147 144 148 static uint64_t mb_from_tag_and_seqno(uint32_t tag, uint32_t seqno) … … 310 314 bmi_msg_tag_t bmi_tag; /* recv: unexpected or nonpp tag that arrived */ 311 315 uint64_t match_bits; /* recv: full match bits, including seqno */ 316 uint8_t bmi_class; /* recv (unexp): class of unexpected message */ 312 317 313 318 int is_unexpected; /* send: if user posted this as unexpected */ … … 488 493 rq->actual_len = ev->mlength; 489 494 rq->bmi_tag = ev->match_bits & 0xffffffffULL; /* just 32 bits */ 495 rq->bmi_class = (uint8_t) (ev->match_bits >> 496 match_bits_class_shift); 490 497 rq->mop.addr = addr_from_nidpid(ev->initiator); 491 498 memcpy(rq->unex_buf, (char *) ev->md.start + ev->offset, … … 836 843 static int bmip_testunexpected(int incount, int *outcount, 837 844 struct bmi_method_unexpected_info *ui, 845 uint8_t class, 838 846 int max_idle_time) 839 847 { … … 850 858 if (n == incount) 851 859 break; 860 if(w->bmi_class != class) 861 continue; 852 862 ui[n].error_code = 0; 853 863 ui[n].addr = w->mop.addr; … … 1079 1089 int numbufs, const void *const *buffers, const bmi_size_t *sizes, 1080 1090 bmi_size_t total_size, bmi_msg_tag_t bmi_tag, void *user_ptr, 1081 bmi_context_id context_id, int is_unexpected )1091 bmi_context_id context_id, int is_unexpected, uint8_t class) 1082 1092 { 1083 1093 struct bmip_method_addr *pma = addr->method_data; … … 1120 1130 lld(total_size), pma->peername, bmi_tag); 1121 1131 /* md without any match entry, for sending */ 1122 mb = match_bits_unexpected | bmi_tag; 1132 mb = 0; 1133 mb += class; 1134 mb << match_bits_class_shift; 1135 mb |= match_bits_unexpected; 1136 mb |= bmi_tag; 1123 1137 ret = PtlMDBind(ni, mdesc, PTL_UNLINK, &sq->md); 1124 1138 if (ret) { … … 1172 1186 } 1173 1187 1174 static int bmip_post_send(bmi_op_id_t *id, struct bmi_method_addr *remote_map,1175 const void *buffer, bmi_size_t total_size,1176 enum bmi_buffer_type buffer_flag __unused,1177 bmi_msg_tag_t tag, void *user_ptr,1178 bmi_context_id context_id)1179 {1180 return post_send(id, remote_map, 0, &buffer, &total_size,1181 total_size, tag, user_ptr, context_id, 0);1182 }1183 1184 1188 static int bmip_post_send_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, 1185 1189 const void *const *buffers, … … 1188 1192 enum bmi_buffer_type buffer_flag __unused, 1189 1193 bmi_msg_tag_t tag, void *user_ptr, 1190 bmi_context_id context_id) 1194 bmi_context_id context_id, 1195 PVFS_hint hints __unused) 1191 1196 { 1192 1197 return post_send(id, remote_map, list_count, buffers, sizes, 1193 total_size, tag, user_ptr, context_id, 0); 1194 } 1195 1196 static int bmip_post_sendunexpected(bmi_op_id_t *id, 1197 struct bmi_method_addr *remote_map, 1198 const void *buffer, bmi_size_t total_size, 1199 enum bmi_buffer_type bflag __unused, 1200 bmi_msg_tag_t tag, void *user_ptr, 1201 bmi_context_id context_id) 1202 { 1203 return post_send(id, remote_map, 0, &buffer, &total_size, 1204 total_size, tag, user_ptr, context_id, 1); 1198 total_size, tag, user_ptr, context_id, 0, 0); 1205 1199 } 1206 1200 … … 1211 1205 int list_count, bmi_size_t total_size, 1212 1206 enum bmi_buffer_type bflag __unused, 1213 bmi_msg_tag_t tag, void *user_ptr, 1214 bmi_context_id context_id) 1207 bmi_msg_tag_t tag, 1208 uint8_t class, 1209 void *user_ptr, 1210 bmi_context_id context_id, 1211 PVFS_hint hints __unused) 1215 1212 { 1216 1213 return post_send(id, remote_map, list_count, buffers, sizes, 1217 total_size, tag, user_ptr, context_id, 1 );1214 total_size, tag, user_ptr, context_id, 1, class); 1218 1215 } 1219 1216 … … 1506 1503 } 1507 1504 1508 static int bmip_post_recv(bmi_op_id_t *id, struct bmi_method_addr *remote_map,1509 void *buffer, bmi_size_t expected_len,1510 bmi_size_t *actual_len __unused,1511 enum bmi_buffer_type buffer_flag __unused,1512 bmi_msg_tag_t tag, void *user_ptr,1513 bmi_context_id context_id)1514 {1515 return post_recv(id, remote_map, 0, &buffer, &expected_len,1516 expected_len, tag, user_ptr, context_id);1517 }1518 1519 1505 static int bmip_post_recv_list(bmi_op_id_t *id, struct bmi_method_addr *remote_map, 1520 1506 void *const *buffers, const bmi_size_t *sizes, … … 1523 1509 enum bmi_buffer_type buffer_flag __unused, 1524 1510 bmi_msg_tag_t tag, void *user_ptr, 1525 bmi_context_id context_id) 1511 bmi_context_id context_id, PVFS_hint hints 1512 __unused) 1526 1513 { 1527 1514 return post_recv(id, remote_map, list_count, buffers, sizes, … … 2302 2289 .memfree = bmip_memfree, 2303 2290 .unexpected_free = bmip_unexpected_free, 2304 .post_send = bmip_post_send,2305 .post_sendunexpected = bmip_post_sendunexpected,2306 .post_recv = bmip_post_recv,2307 2291 .test = bmip_test, 2308 2292 .testsome = bmip_testsome, -
trunk/src/io/bmi/bmi_tcp/bmi-tcp-addressing.h
r7674 r8304 12 12 #define __BMI_TCP_ADDRESSING_H 13 13 14 #include <netinet/in.h> 14 15 #include "bmi-types.h" 15 #include <netinet/in.h>16 #include "quickhash.h" 16 17 17 18 /***************************************************************** … … 77 78 char* peer; 78 79 int peer_type; 80 uint32_t addr_hash; /* hash of string identifier */ 81 struct qhash_head hash_link; 82 bmi_method_addr_p parent; 79 83 }; 80 84 -
trunk/src/io/bmi/bmi_tcp/bmi-tcp.c
r7819 r8304 49 49 #include "pint-hint.h" 50 50 #include "pint-event.h" 51 #include "quickhash.h" 52 53 #define BMI_TCP_S2S_MAGIC_NR 51912 51 54 52 55 static gen_mutex_t interface_mutex = GEN_MUTEX_INITIALIZER; … … 69 72 enum bmi_op_type send_recv); 70 73 int BMI_tcp_unexpected_free(void *buffer); 71 int BMI_tcp_post_send(bmi_op_id_t * id,72 bmi_method_addr_p dest,73 const void *buffer,74 bmi_size_t size,75 enum bmi_buffer_type buffer_type,76 bmi_msg_tag_t tag,77 void *user_ptr,78 bmi_context_id context_id,79 PVFS_hint hints);80 int BMI_tcp_post_sendunexpected(bmi_op_id_t * id,81 bmi_method_addr_p dest,82 const void *buffer,83 bmi_size_t size,84 enum bmi_buffer_type buffer_type,85 bmi_msg_tag_t tag,86 void *user_ptr,87 bmi_context_id context_id,88 PVFS_hint hints);89 int BMI_tcp_post_recv(bmi_op_id_t * id,90 bmi_method_addr_p src,91 void *buffer,92 bmi_size_t expected_size,93 bmi_size_t * actual_size,94 enum bmi_buffer_type buffer_type,95 bmi_msg_tag_t tag,96 void *user_ptr,97 bmi_context_id context_id,98 PVFS_hint hints);99 74 int BMI_tcp_test(bmi_op_id_t id, 100 75 int *outcount, … … 116 91 int *outcount, 117 92 struct bmi_method_unexpected_info *info, 93 uint8_t class, 118 94 int max_idle_time_ms); 119 95 int BMI_tcp_testcontext(int incount, … … 159 135 enum bmi_buffer_type buffer_type, 160 136 bmi_msg_tag_t tag, 137 uint8_t class, 161 138 void *user_ptr, 162 139 bmi_context_id context_id, … … 169 146 170 147 /* size of encoded message header */ 171 #define TCP_ENC_HDR_SIZE 2 4148 #define TCP_ENC_HDR_SIZE 25 172 149 173 150 /* structure internal to tcp for use as a message header */ … … 178 155 bmi_msg_tag_t tag; /* user specified message tag */ 179 156 bmi_size_t size; /* length of trailing message */ 157 uint32_t src_addr_hash; /* hash of local svr addr (if present) */ 158 uint8_t class; /* class of msg (if unexpected) */ 180 159 char enc_hdr[TCP_ENC_HDR_SIZE]; /* encoded version of header info */ 181 160 }; … … 185 164 *((uint32_t*)&((hdr).enc_hdr[0])) = htobmi32((hdr).magic_nr); \ 186 165 *((uint32_t*)&((hdr).enc_hdr[4])) = htobmi32((hdr).mode); \ 187 *((uint64_t*)&((hdr).enc_hdr[8])) = htobmi64((hdr).tag); \ 166 *((uint32_t*)&((hdr).enc_hdr[8])) = htobmi64((hdr).tag); \ 167 *((uint32_t*)&((hdr).enc_hdr[12])) = htobmi32((hdr).src_addr_hash);\ 188 168 *((uint64_t*)&((hdr).enc_hdr[16])) = htobmi64((hdr).size); \ 169 *((uint8_t*)&((hdr).enc_hdr[24])) = (hdr).class; \ 189 170 } while(0) 190 171 … … 193 174 (hdr).magic_nr = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[0]))); \ 194 175 (hdr).mode = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[4]))); \ 195 (hdr).tag = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[8]))); \ 176 (hdr).tag = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[8]))); \ 177 (hdr).src_addr_hash = bmitoh32(*((uint32_t*)&((hdr).enc_hdr[12])));\ 196 178 (hdr).size = bmitoh64(*((uint64_t*)&((hdr).enc_hdr[16]))); \ 179 (hdr).class = *((uint8_t*)&((hdr).enc_hdr[24])); \ 197 180 } while(0) 198 181 … … 285 268 bmi_size_t* current_index_complete, enum bmi_op_type send_recv, 286 269 char* enc_hdr, bmi_size_t* env_amt_complete); 270 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval); 271 static int addr_hash_compare(void* key, struct qhash_head* link); 287 272 288 273 #if defined(USE_TRUSTED) && defined(__PVFS2_CLIENT__) … … 305 290 .memfree = BMI_tcp_memfree, 306 291 .unexpected_free = BMI_tcp_unexpected_free, 307 .post_send = BMI_tcp_post_send,308 .post_sendunexpected = BMI_tcp_post_sendunexpected,309 .post_recv = BMI_tcp_post_recv,310 292 .test = BMI_tcp_test, 311 293 .testsome = BMI_tcp_testsome, … … 405 387 static pid_t bmi_tcp_pid; 406 388 389 static struct qhash_table* addr_hash_table = NULL; 390 #define ADDR_HASH_TABLE_SIZE 137 391 407 392 /************************************************************************* 408 393 * Visible Interface … … 514 499 "%d%d%d%llu%d%d", 515 500 "%d", &bmi_tcp_recv_event_id); 501 502 /* create a hash table to store method addresses based on addr hash */ 503 addr_hash_table = qhash_init(addr_hash_compare, quickhash_null32_hash, 504 ADDR_HASH_TABLE_SIZE); 505 if(!addr_hash_table) 506 { 507 tmp_errno = bmi_tcp_errno_to_pvfs(-ENOMEM); 508 goto initialize_failure; 509 } 516 510 517 511 gen_mutex_unlock(&interface_mutex); … … 600 594 struct tcp_addr *tcp_addr_data = NULL; 601 595 int ret = -1; 596 uint32_t addr_hash; 597 struct qhash_head* tmp_link; 602 598 603 599 tcp_string = string_key("tcp", id_string); 604 600 if (!tcp_string) 605 601 { 606 /* the string doesn't even have our info */607 return (NULL);602 /* the string doesn't even have our info */ 603 return (NULL); 608 604 } 609 605 … … 612 608 if ((delim = index(tcp_string, ':')) == NULL) 613 609 { 614 gossip_lerr("Error: malformed tcp address.\n"); 615 free(tcp_string); 616 return (NULL); 617 } 618 619 /* looks ok, so let's build the method addr structure */ 620 new_addr = alloc_tcp_method_addr(); 621 if (!new_addr) 622 { 623 free(tcp_string); 624 return (NULL); 625 } 626 tcp_addr_data = new_addr->method_data; 610 gossip_lerr("Error: malformed tcp address.\n"); 611 free(tcp_string); 612 return (NULL); 613 } 614 615 addr_hash = hashlittle(id_string, strlen(id_string), 3334); 616 /* do we already have a connection established from this host? */ 617 if(addr_hash_table && (tmp_link = qhash_search(addr_hash_table, 618 &addr_hash))) 619 { 620 /* we have already received an inbound connection from the host that 621 * we are looking up. Re-use the existing method addr rather than 622 * creating a new one 623 */ 624 tcp_addr_data = qlist_entry(tmp_link, struct 625 tcp_addr, hash_link); 626 new_addr = tcp_addr_data->parent; 627 if(tcp_addr_data->hostname) 628 free(tcp_addr_data->hostname); 629 tcp_addr_data->dont_reconnect = 0; 630 assert(new_addr->ref_count == 1); 631 new_addr->ref_count++; 632 } 633 else 634 { 635 /* looks ok, so let's build the method addr structure */ 636 new_addr = alloc_tcp_method_addr(); 637 if (!new_addr) 638 { 639 free(tcp_string); 640 return (NULL); 641 } 642 tcp_addr_data = new_addr->method_data; 643 } 627 644 628 645 ret = sscanf((delim + 1), "%d", &(tcp_addr_data->port)); 629 646 if (ret != 1) 630 647 { 631 gossip_lerr("Error: malformed tcp address.\n");632 dealloc_tcp_method_addr(new_addr);633 free(tcp_string);634 return (NULL);648 gossip_lerr("Error: malformed tcp address.\n"); 649 dealloc_tcp_method_addr(new_addr); 650 free(tcp_string); 651 return (NULL); 635 652 } 636 653 … … 638 655 if (!hostname) 639 656 { 640 dealloc_tcp_method_addr(new_addr);641 free(tcp_string);642 return (NULL);657 dealloc_tcp_method_addr(new_addr); 658 free(tcp_string); 659 return (NULL); 643 660 } 644 661 strncpy(hostname, tcp_string, (delim - tcp_string)); … … 646 663 647 664 tcp_addr_data->hostname = hostname; 665 tcp_addr_data->addr_hash = addr_hash; 666 /* add entry to hash table so we can find it later */ 667 if(addr_hash_table) 668 { 669 qhash_add(addr_hash_table, &tcp_addr_data->addr_hash, 670 &tcp_addr_data->hash_link); 671 } 672 gossip_debug(GOSSIP_BMI_DEBUG_TCP, 673 "Hashed BMI address %s to %u\n", id_string, 674 tcp_addr_data->addr_hash); 648 675 649 676 free(tcp_string); 677 650 678 return (new_addr); 651 679 } … … 987 1015 } 988 1016 989 990 /* BMI_tcp_post_send()991 *992 * Submits send operations.993 *994 * returns 0 on success that requires later poll, returns 1 on instant995 * completion, -errno on failure996 */997 int BMI_tcp_post_send(bmi_op_id_t * id,998 bmi_method_addr_p dest,999 const void *buffer,1000 bmi_size_t size,1001 enum bmi_buffer_type buffer_type,1002 bmi_msg_tag_t tag,1003 void *user_ptr,1004 bmi_context_id context_id,1005 PVFS_hint hints)1006 {1007 struct tcp_msg_header my_header;1008 int ret = -1;1009 1010 /* clear the id field for safety */1011 *id = 0;1012 1013 /* fill in the TCP-specific message header */1014 if (size > TCP_MODE_REND_LIMIT)1015 {1016 return (bmi_tcp_errno_to_pvfs(-EMSGSIZE));1017 }1018 1019 if (size <= TCP_MODE_EAGER_LIMIT)1020 {1021 my_header.mode = TCP_MODE_EAGER;1022 }1023 else1024 {1025 my_header.mode = TCP_MODE_REND;1026 }1027 my_header.tag = tag;1028 my_header.size = size;1029 my_header.magic_nr = BMI_MAGIC_NR;1030 1031 gen_mutex_lock(&interface_mutex);1032 1033 ret = tcp_post_send_generic(id, dest, &buffer,1034 &size, 1, buffer_type, my_header,1035 user_ptr, context_id, hints);1036 1037 gen_mutex_unlock(&interface_mutex);1038 return(ret);1039 }1040 1041 1042 /* BMI_tcp_post_sendunexpected()1043 *1044 * Submits unexpected send operations.1045 *1046 * returns 0 on success that requires later poll, returns 1 on instant1047 * completion, -errno on failure1048 */1049 int BMI_tcp_post_sendunexpected(bmi_op_id_t * id,1050 bmi_method_addr_p dest,1051 const void *buffer,1052 bmi_size_t size,1053 enum bmi_buffer_type buffer_type,1054 bmi_msg_tag_t tag,1055 void *user_ptr,1056 bmi_context_id context_id,1057 PVFS_hint hints)1058 {1059 struct tcp_msg_header my_header;1060 int ret = -1;1061 1062 /* clear the id field for safety */1063 *id = 0;1064 1065 if (size > TCP_MODE_EAGER_LIMIT)1066 {1067 return (bmi_tcp_errno_to_pvfs(-EMSGSIZE));1068 }1069 1070 my_header.mode = TCP_MODE_UNEXP;1071 my_header.tag = tag;1072 my_header.size = size;1073 my_header.magic_nr = BMI_MAGIC_NR;1074 1075 gen_mutex_lock(&interface_mutex);1076 1077 ret = tcp_post_send_generic(id, dest, &buffer,1078 &size, 1, buffer_type, my_header,1079 user_ptr, context_id, hints);1080 gen_mutex_unlock(&interface_mutex);1081 return(ret);1082 }1083 1084 1085 1086 /* BMI_tcp_post_recv()1087 *1088 * Submits recv operations.1089 *1090 * returns 0 on success that requires later poll, returns 1 on instant1091 * completion, -errno on failure1092 */1093 int BMI_tcp_post_recv(bmi_op_id_t * id,1094 bmi_method_addr_p src,1095 void *buffer,1096 bmi_size_t expected_size,1097 bmi_size_t * actual_size,1098 enum bmi_buffer_type buffer_type,1099 bmi_msg_tag_t tag,1100 void *user_ptr,1101 bmi_context_id context_id,1102 PVFS_hint hints)1103 {1104 int ret = -1;1105 1106 /* A few things could happen here:1107 * a) rendez. recv with sender not ready yet1108 * b) rendez. recv with sender waiting1109 * c) eager recv, data not available yet1110 * d) eager recv, some/all data already here1111 * e) rendez. recv with sender in eager mode1112 *1113 * b or d could lead to completion without polling.1114 * we don't look for unexpected messages here.1115 */1116 1117 if (expected_size > TCP_MODE_REND_LIMIT)1118 {1119 return (bmi_tcp_errno_to_pvfs(-EINVAL));1120 }1121 gen_mutex_lock(&interface_mutex);1122 1123 ret = tcp_post_recv_generic(id, src, &buffer, &expected_size,1124 1, expected_size, actual_size,1125 buffer_type, tag,1126 user_ptr, context_id, hints);1127 1128 gen_mutex_unlock(&interface_mutex);1129 return (ret);1130 }1131 1132 1133 1017 /* BMI_tcp_test() 1134 1018 * … … 1260 1144 int *outcount, 1261 1145 struct bmi_method_unexpected_info *info, 1146 uint8_t class, 1262 1147 int max_idle_time) 1263 1148 { 1264 1149 int ret = -1; 1265 1150 method_op_p query_op = NULL; 1151 struct op_list_search_key key; 1152 1153 memset(&key, 0, sizeof(struct op_list_search_key)); 1154 key.class = class; 1155 key.class_yes = 1; 1266 1156 1267 1157 gen_mutex_lock(&interface_mutex); … … 1283 1173 * stuff and we have room in the info array for it 1284 1174 */ 1285 while ((*outcount < incount) && 1286 (query_op = 1287 op_list_shownext(op_list_array[IND_COMPLETE_RECV_UNEXP]))) 1288 { 1289 info[*outcount].error_code = query_op->error_code; 1290 info[*outcount].addr = query_op->addr; 1175 while(*outcount < incount) 1176 { 1177 query_op = op_list_search(op_list_array[IND_COMPLETE_RECV_UNEXP], 1178 &key); 1179 if(!query_op) 1180 { 1181 break; 1182 } 1183 info[*outcount].error_code = query_op->error_code; 1184 /* always show unexpected messages on primary address */ 1185 if(query_op->addr->primary) 1186 info[*outcount].addr = query_op->addr->primary; 1187 else 1188 info[*outcount].addr = query_op->addr; 1291 1189 info[*outcount].buffer = query_op->buffer; 1292 1190 info[*outcount].size = query_op->actual_size; … … 1296 1194 (*outcount)++; 1297 1195 } 1196 1298 1197 gen_mutex_unlock(&interface_mutex); 1299 1198 return (0); … … 1401 1300 struct tcp_msg_header my_header; 1402 1301 int ret = -1; 1302 struct tcp_addr *tcp_addr_data = NULL; 1403 1303 1404 1304 /* clear the id field for safety */ … … 1422 1322 my_header.tag = tag; 1423 1323 my_header.size = total_size; 1424 my_header.magic_nr = BMI_MAGIC_NR; 1324 my_header.magic_nr = BMI_TCP_S2S_MAGIC_NR; 1325 if(tcp_method_params.method_flags & BMI_INIT_SERVER) 1326 { 1327 /* servers identify themselves to peers with an address hash */ 1328 tcp_addr_data = tcp_method_params.listen_addr->method_data; 1329 my_header.src_addr_hash = tcp_addr_data->addr_hash; 1330 } 1331 else 1332 { 1333 my_header.src_addr_hash = 0; 1334 } 1425 1335 1426 1336 gen_mutex_lock(&interface_mutex); … … 1489 1399 enum bmi_buffer_type buffer_type, 1490 1400 bmi_msg_tag_t tag, 1401 uint8_t class, 1491 1402 void *user_ptr, 1492 1403 bmi_context_id context_id, … … 1495 1406 struct tcp_msg_header my_header; 1496 1407 int ret = -1; 1408 struct tcp_addr *tcp_addr_data = NULL; 1497 1409 1498 1410 /* clear the id field for safety */ … … 1507 1419 my_header.tag = tag; 1508 1420 my_header.size = total_size; 1509 my_header.magic_nr = BMI_MAGIC_NR; 1421 my_header.magic_nr = BMI_TCP_S2S_MAGIC_NR; 1422 my_header.class = class; 1423 if(tcp_method_params.method_flags & BMI_INIT_SERVER) 1424 { 1425 /* servers identify themselves to peers with an address hash */ 1426 tcp_addr_data = tcp_method_params.listen_addr->method_data; 1427 my_header.src_addr_hash = tcp_addr_data->addr_hash; 1428 } 1429 else 1430 { 1431 my_header.src_addr_hash = 0; 1432 } 1510 1433 1511 1434 gen_mutex_lock(&interface_mutex); … … 1899 1822 if (dealloc_flag) 1900 1823 { 1901 dealloc_tcp_method_addr(map); 1824 map->ref_count--; 1825 if(map->ref_count == 0) 1826 { 1827 if(map->secondary) 1828 { 1829 dealloc_tcp_method_addr(map->secondary); 1830 } 1831 if(map->primary) 1832 { 1833 map->primary->secondary = NULL; 1834 } 1835 dealloc_tcp_method_addr(map); 1836 } 1902 1837 } 1903 1838 else 1904 1839 { 1905 /* this will cause the bmi control layer to check to see if 1906 * this address can be completely forgotten 1907 */ 1908 bmi_method_addr_forget_callback(bmi_addr); 1840 if(!map->primary) 1841 { 1842 /* this will cause the bmi control layer to check to see if 1843 * this address can be completely forgotten 1844 */ 1845 bmi_method_addr_forget_callback(bmi_addr); 1846 } 1909 1847 } 1910 1848 return; … … 1945 1883 free(tcp_addr_data->peer); 1946 1884 1885 if (tcp_addr_data->hash_link.next || tcp_addr_data->hash_link.prev) 1886 { 1887 qhash_del(&tcp_addr_data->hash_link); 1888 } 1889 1947 1890 bmi_dealloc_method_addr(map); 1948 1891 … … 1980 1923 tcp_addr_data->map = my_method_addr; 1981 1924 tcp_addr_data->sc_index = -1; 1925 tcp_addr_data->parent = my_method_addr; 1982 1926 1983 1927 return (my_method_addr); … … 2958 2902 */ 2959 2903 tcp_addr_data->dont_reconnect = 1; 2960 /* register this address with the method control layer */ 2961 tcp_addr_data->bmi_addr = bmi_method_addr_reg_callback(new_addr); 2962 if (ret < 0) 2963 { 2964 tcp_shutdown_addr(new_addr); 2965 dealloc_tcp_method_addr(new_addr); 2966 dealloc_tcp_method_addr(map); 2967 return (ret); 2968 } 2904 2969 2905 BMI_socket_collection_add(tcp_socket_collection_p, new_addr); 2970 2906 … … 3134 3070 3135 3071 /* check magic number of message */ 3136 if(new_header.magic_nr != BMI_MAGIC_NR) 3137 { 3138 gossip_err("Error: bad magic in BMI TCP message.\n"); 3072 if(new_header.magic_nr != BMI_TCP_S2S_MAGIC_NR) 3073 { 3074 gossip_err("Error: Bad magic in BMI TCP message.\n"); 3075 gossip_err("Error: This may be due to port scanning or communication between incompatible versions of BMI.\n"); 3139 3076 tcp_forget_addr(map, 0, bmi_tcp_errno_to_pvfs(-EBADMSG)); 3140 3077 return(0); 3078 } 3079 3080 gossip_debug(GOSSIP_BMI_DEBUG_TCP, 3081 "Received a message from hashed address %u\n", 3082 new_header.src_addr_hash); 3083 3084 if(tcp_addr_data->addr_hash == 0) 3085 { 3086 struct qhash_head* tmp_link; 3087 struct tcp_addr* found_tcp_addr_data = NULL; 3088 bmi_method_addr_p found_map = NULL; 3089 3090 /* This is the first incoming message on a new socket */ 3091 if(new_header.src_addr_hash == 0) 3092 { 3093 /* client connection; there is no identifier in the header */ 3094 /* register this address with the method control layer */ 3095 tcp_addr_data->bmi_addr = bmi_method_addr_reg_callback(map); 3096 if (ret < 0) 3097 { 3098 tcp_shutdown_addr(map); 3099 dealloc_tcp_method_addr(map); 3100 return (ret); 3101 } 3102 } 3103 else 3104 { 3105 /* server connection; search to see if we can find an address 3106 * that we already explicitly resolved to this host 3107 */ 3108 tmp_link = qhash_search(addr_hash_table, &new_header.src_addr_hash); 3109 if(tmp_link) 3110 { 3111 /* we found a match; this host has already been looked up 3112 * locally 3113 */ 3114 found_tcp_addr_data = qlist_entry(tmp_link, struct 3115 tcp_addr, hash_link); 3116 found_map = found_tcp_addr_data->parent; 3117 3118 /* link the two addresses together */ 3119 found_map->secondary = map; 3120 map->primary = found_map; 3121 } 3122 else 3123 { 3124 /* No lookups on this host yet. */ 3125 tcp_addr_data->addr_hash = new_header.src_addr_hash; 3126 /* add entry to hash table so we can find it later */ 3127 qhash_add(addr_hash_table, &tcp_addr_data->addr_hash, 3128 &tcp_addr_data->hash_link); 3129 /* register this address with the method control layer */ 3130 tcp_addr_data->bmi_addr = bmi_method_addr_reg_callback(map); 3131 if (ret < 0) 3132 { 3133 tcp_shutdown_addr(map); 3134 dealloc_tcp_method_addr(map); 3135 return (ret); 3136 } 3137 } 3138 } 3141 3139 } 3142 3140 … … 3176 3174 active_method_op->size_list = &(active_method_op->actual_size); 3177 3175 active_method_op->list_count = 1; 3176 active_method_op->class = new_header.class; 3178 3177 tcp_op_data = active_method_op->method_data; 3179 3178 tcp_op_data->tcp_op_state = BMI_TCP_INPROGRESS; … … 4082 4081 4083 4082 /* 4083 * My best guess at if you are big-endian or little-endian. This may 4084 * need adjustment. 4085 */ 4086 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ 4087 __BYTE_ORDER == __LITTLE_ENDIAN) || \ 4088 (defined(i386) || defined(__i386__) || defined(__i486__) || \ 4089 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) 4090 # define HASH_LITTLE_ENDIAN 1 4091 # define HASH_BIG_ENDIAN 0 4092 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ 4093 __BYTE_ORDER == __BIG_ENDIAN) || \ 4094 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) 4095 # define HASH_LITTLE_ENDIAN 0 4096 # define HASH_BIG_ENDIAN 1 4097 #else 4098 # define HASH_LITTLE_ENDIAN 0 4099 # define HASH_BIG_ENDIAN 0 4100 #endif 4101 4102 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 4103 4104 /* 4105 ------------------------------------------------------------------------------- 4106 mix -- mix 3 32-bit values reversibly. 4107 4108 This is reversible, so any information in (a,b,c) before mix() is 4109 still in (a,b,c) after mix(). 4110 4111 If four pairs of (a,b,c) inputs are run through mix(), or through 4112 mix() in reverse, there are at least 32 bits of the output that 4113 are sometimes the same for one pair and different for another pair. 4114 This was tested for: 4115 * pairs that differed by one bit, by two bits, in any combination 4116 of top bits of (a,b,c), or in any combination of bottom bits of 4117 (a,b,c). 4118 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 4119 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 4120 is commonly produced by subtraction) look like a single 1-bit 4121 difference. 4122 * the base values were pseudorandom, all zero but one bit set, or 4123 all zero plus a counter that starts at zero. 4124 4125 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 4126 satisfy this are 4127 4 6 8 16 19 4 4128 9 15 3 18 27 15 4129 14 9 3 7 17 3 4130 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing 4131 for "differ" defined as + with a one-bit base and a two-bit delta. I 4132 used http://burtleburtle.net/bob/hash/avalanche.html to choose 4133 the operations, constants, and arrangements of the variables. 4134 4135 This does not achieve avalanche. There are input bits of (a,b,c) 4136 that fail to affect some output bits of (a,b,c), especially of a. The 4137 most thoroughly mixed value is c, but it doesn't really even achieve 4138 avalanche in c. 4139 4140 This allows some parallelism. Read-after-writes are good at doubling 4141 the number of bits affected, so the goal of mixing pulls in the opposite 4142 direction as the goal of parallelism. I did what I could. Rotates 4143 seem to cost as much as shifts on every machine I could lay my hands 4144 on, and rotates are much kinder to the top and bottom bits, so I used 4145 rotates. 4146 ------------------------------------------------------------------------------- 4147 */ 4148 #define mix(a,b,c) \ 4149 { \ 4150 a -= c; a ^= rot(c, 4); c += b; \ 4151 b -= a; b ^= rot(a, 6); a += c; \ 4152 c -= b; c ^= rot(b, 8); b += a; \ 4153 a -= c; a ^= rot(c,16); c += b; \ 4154 b -= a; b ^= rot(a,19); a += c; \ 4155 c -= b; c ^= rot(b, 4); b += a; \ 4156 } 4157 4158 /* 4159 ------------------------------------------------------------------------------- 4160 final -- final mixing of 3 32-bit values (a,b,c) into c 4161 4162 Pairs of (a,b,c) values differing in only a few bits will usually 4163 produce values of c that look totally different. This was tested for 4164 * pairs that differed by one bit, by two bits, in any combination 4165 of top bits of (a,b,c), or in any combination of bottom bits of 4166 (a,b,c). 4167 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 4168 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 4169 is commonly produced by subtraction) look like a single 1-bit 4170 difference. 4171 * the base values were pseudorandom, all zero but one bit set, or 4172 all zero plus a counter that starts at zero. 4173 4174 These constants passed: 4175 14 11 25 16 4 14 24 4176 12 14 25 16 4 14 24 4177 and these came close: 4178 4 8 15 26 3 22 24 4179 10 8 15 26 3 22 24 4180 11 8 15 26 3 22 24 4181 ------------------------------------------------------------------------------- 4182 */ 4183 #define final(a,b,c) \ 4184 { \ 4185 c ^= b; c -= rot(b,14); \ 4186 a ^= c; a -= rot(c,11); \ 4187 b ^= a; b -= rot(a,25); \ 4188 c ^= b; c -= rot(b,16); \ 4189 a ^= c; a -= rot(c,4); \ 4190 b ^= a; b -= rot(a,14); \ 4191 c ^= b; c -= rot(b,24); \ 4192 } 4193 4194 4195 /* 4196 ------------------------------------------------------------------------------- 4197 hashlittle() -- hash a variable-length key into a 32-bit value 4198 k : the key (the unaligned variable-length array of bytes) 4199 length : the length of the key, counting by bytes 4200 initval : can be any 4-byte value 4201 Returns a 32-bit value. Every bit of the key affects every bit of 4202 the return value. Two keys differing by one or two bits will have 4203 totally different hash values. 4204 4205 The best hash table sizes are powers of 2. There is no need to do 4206 mod a prime (mod is sooo slow!). If you need less than 32 bits, 4207 use a bitmask. For example, if you need only 10 bits, do 4208 h = (h & hashmask(10)); 4209 In which case, the hash table should have hashsize(10) elements. 4210 4211 If you are hashing n strings (uint8_t **)k, do it like this: 4212 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); 4213 4214 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 4215 code any way you wish, private, educational, or commercial. It's free. 4216 4217 Use for hash table lookup, or anything where one collision in 2^^32 is 4218 acceptable. Do NOT use for cryptographic purposes. 4219 ------------------------------------------------------------------------------- 4220 */ 4221 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval) 4222 { 4223 uint32_t a,b,c; /* internal state */ 4224 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 4225 4226 /* Set up the internal state */ 4227 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; 4228 4229 u.ptr = key; 4230 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 4231 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 4232 #ifdef VALGRIND 4233 const uint8_t *k8; 4234 #endif 4235 4236 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 4237 while (length > 12) 4238 { 4239 a += k[0]; 4240 b += k[1]; 4241 c += k[2]; 4242 mix(a,b,c); 4243 length -= 12; 4244 k += 3; 4245 } 4246 4247 /*----------------------------- handle the last (probably partial) block */ 4248 /* 4249 * "k[2]&0xffffff" actually reads beyond the end of the string, but 4250 * then masks off the part it's not allowed to read. Because the 4251 * string is aligned, the masked-off tail is in the same word as the 4252 * rest of the string. Every machine with memory protection I've seen 4253 * does it on word boundaries, so is OK with this. But VALGRIND will 4254 * still catch it and complain. The masking trick does make the hash 4255 * noticably faster for short strings (like English words). 4256 */ 4257 #ifndef VALGRIND 4258 4259 switch(length) 4260 { 4261 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 4262 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 4263 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 4264 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 4265 case 8 : b+=k[1]; a+=k[0]; break; 4266 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 4267 case 6 : b+=k[1]&0xffff; a+=k[0]; break; 4268 case 5 : b+=k[1]&0xff; a+=k[0]; break; 4269 case 4 : a+=k[0]; break; 4270 case 3 : a+=k[0]&0xffffff; break; 4271 case 2 : a+=k[0]&0xffff; break; 4272 case 1 : a+=k[0]&0xff; break; 4273 case 0 : return c; /* zero length strings require no mixing */ 4274 } 4275 4276 #else /* make valgrind happy */ 4277 4278 k8 = (const uint8_t *)k; 4279 switch(length) 4280 { 4281 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 4282 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 4283 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ 4284 case 9 : c+=k8[8]; /* fall through */ 4285 case 8 : b+=k[1]; a+=k[0]; break; 4286 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 4287 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ 4288 case 5 : b+=k8[4]; /* fall through */ 4289 case 4 : a+=k[0]; break; 4290 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 4291 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ 4292 case 1 : a+=k8[0]; break; 4293 case 0 : return c; 4294 } 4295 4296 #endif /* !valgrind */ 4297 4298 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 4299 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 4300 const uint8_t *k8; 4301 4302 /*--------------- all but last block: aligned reads and different mixing */ 4303 while (length > 12) 4304 { 4305 a += k[0] + (((uint32_t)k[1])<<16); 4306 b += k[2] + (((uint32_t)k[3])<<16); 4307 c += k[4] + (((uint32_t)k[5])<<16); 4308 mix(a,b,c); 4309 length -= 12; 4310 k += 6; 4311 } 4312 4313 /*----------------------------- handle the last (probably partial) block */ 4314 k8 = (const uint8_t *)k; 4315 switch(length) 4316 { 4317 case 12: c+=k[4]+(((uint32_t)k[5])<<16); 4318 b+=k[2]+(((uint32_t)k[3])<<16); 4319 a+=k[0]+(((uint32_t)k[1])<<16); 4320 break; 4321 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 4322 case 10: c+=k[4]; 4323 b+=k[2]+(((uint32_t)k[3])<<16); 4324 a+=k[0]+(((uint32_t)k[1])<<16); 4325 break; 4326 case 9 : c+=k8[8]; /* fall through */ 4327 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 4328 a+=k[0]+(((uint32_t)k[1])<<16); 4329 break; 4330 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 4331 case 6 : b+=k[2]; 4332 a+=k[0]+(((uint32_t)k[1])<<16); 4333 break; 4334 case 5 : b+=k8[4]; /* fall through */ 4335 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 4336 break; 4337 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 4338 case 2 : a+=k[0]; 4339 break; 4340 case 1 : a+=k8[0]; 4341 break; 4342 case 0 : return c; /* zero length requires no mixing */ 4343 } 4344 4345 } else { /* need to read the key one byte at a time */ 4346 const uint8_t *k = (const uint8_t *)key; 4347 4348 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 4349 while (length > 12) 4350 { 4351 a += k[0]; 4352 a += ((uint32_t)k[1])<<8; 4353 a += ((uint32_t)k[2])<<16; 4354 a += ((uint32_t)k[3])<<24; 4355 b += k[4]; 4356 b += ((uint32_t)k[5])<<8; 4357 b += ((uint32_t)k[6])<<16; 4358 b += ((uint32_t)k[7])<<24; 4359 c += k[8]; 4360 c += ((uint32_t)k[9])<<8; 4361 c += ((uint32_t)k[10])<<16; 4362 c += ((uint32_t)k[11])<<24; 4363 mix(a,b,c); 4364 length -= 12; 4365 k += 12; 4366 } 4367 4368 /*-------------------------------- last block: affect all 32 bits of (c) */ 4369 switch(length) /* all the case statements fall through */ 4370 { 4371 case 12: c+=((uint32_t)k[11])<<24; 4372 case 11: c+=((uint32_t)k[10])<<16; 4373 case 10: c+=((uint32_t)k[9])<<8; 4374 case 9 : c+=k[8]; 4375 case 8 : b+=((uint32_t)k[7])<<24; 4376 case 7 : b+=((uint32_t)k[6])<<16; 4377 case 6 : b+=((uint32_t)k[5])<<8; 4378 case 5 : b+=k[4]; 4379 case 4 : a+=((uint32_t)k[3])<<24; 4380 case 3 : a+=((uint32_t)k[2])<<16; 4381 case 2 : a+=((uint32_t)k[1])<<8; 4382 case 1 : a+=k[0]; 4383 break; 4384 case 0 : return c; 4385 } 4386 } 4387 4388 final(a,b,c); 4389 return c; 4390 } 4391 4392 static int addr_hash_compare(void* key, struct qhash_head* link) 4393 { 4394 uint32_t *addr_hash = key; 4395 struct tcp_addr *tcp_addr_data = NULL; 4396 4397 tcp_addr_data = qhash_entry(link, struct tcp_addr, hash_link); 4398 assert(tcp_addr_data); 4399 4400 if(tcp_addr_data->addr_hash == *addr_hash) 4401 { 4402 return(1); 4403 } 4404 return(0); 4405 } 4406 4407 /* 4084 4408 * Local variables: 4085 4409 * c-indent-level: 4 -
trunk/src/io/bmi/op-list.c
r6434 r8304 217 217 { 218 218 219 if (my_key->m ethod_addr_yes && (my_key->method_addr != my_op->addr))219 if (my_key->msg_tag_yes && (my_key->msg_tag != my_op->msg_tag)) 220 220 { 221 221 return (1); 222 222 } 223 if (my_key-> msg_tag_yes && (my_key->msg_tag != my_op->msg_tag))223 if (my_key->op_id_yes && (my_key->op_id != my_op->op_id)) 224 224 { 225 225 return (1); 226 226 } 227 if (my_key->op_id_yes && (my_key->op_id != my_op->op_id)) 228 { 229 return (1); 227 if (my_key->method_addr_yes) 228 { 229 if(my_key->method_addr == my_op->addr) 230 { 231 /* normal case */ 232 } 233 else if(my_op->addr->primary && 234 my_key->method_addr == my_op->addr->primary) 235 { 236 /* swap address in the op to match addr we are using */ 237 my_op->addr = my_op->addr->primary; 238 } 239 else if(my_op->addr->secondary && 240 my_key->method_addr == my_op->addr->secondary) 241 { 242 /* swap address in the op to match addr we are using */ 243 my_op->addr = my_op->addr->secondary; 244 } 245 else 246 { 247 return(1); 248 } 249 } 250 if (my_key->class_yes && (my_key->class != my_op->class)) 251 { 252 return(1); 230 253 } 231 254 return (0); -
trunk/src/io/bmi/op-list.h
r6434 r8304 33 33 bmi_op_id_t op_id; 34 34 int op_id_yes; 35 uint8_t class; 36 int class_yes; 35 37 }; 36 38
