+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+
|
+
+---------- bulk ----------
+bulk rdma收发文件
+
+mercury/05_bulk/client.c
+./client protocal server_addr filename
+main
+ HG_Set_log_level("debug")
+ state.hg_class = HG_Init(protocol, HG_FALSE)
+ state.hg_context = HG_Context_create(state.hg_class)
+ state.save_rpc_id = MERCURY_REGISTER(state.hg_class, "save", save_in_t, save_out_t, NULL)
+ access(save_op.filename, F_OK) 检查文件是否存在
+ HG_Addr_lookup1(state.hg_context, lookup_callback, &save_op, server_address, HG_OP_ID_IGNORE) -> 查地址, 然后回调
+ lookup_callback
+ save_operation* save_op = (save_operation*)(callback_info->arg); 回调通过指针传递参数
+ FILE* file = fopen(save_op->filename,"r");
+ fseek(file, 0L, SEEK_END)
+ save_op->size = ftell(file) 计算文件位置
+ fseek(file, 0L, SEEK_SET);
+ save_op->buffer = calloc(1, save_op->size);
+ fread(save_op->buffer,1,save_op->size,file) // fread和fwrite的用法详解(以数据块的形式读写文件)
+ HG_Create(state->hg_context, addr, state->save_rpc_id, &handle)
+ HG_Bulk_create(state->hg_class, 1, (void**) &(save_op->buffer), &(save_op->size), HG_BULK_READ_ONLY, &(save_op->bulk_handle));
+ HG_Bulk_create (void**) &(save_op->buffer) 传入文件内容buf指针地址(二级指针) -> 客户端创建BULK
+ hg_buld malloc/memset 0
+ hg_atomic_init32 ref_count 1 初始化引用计数
+ 转分段 注册分段 hg_bulk_register_segments 最大静态段=8, HG_BULK_STATIC_MAX
+ hg_bulk_create_na_mem_descs 单独注册内存描述 individually 分为静态staic和动态数组dynamic
+ hg_bulk_register 注册段 传递索引i
+ NA_Mem_handle_create
+ mem_handler_create -> na_ofi_mem_handler_create
+ NA_UNUSED 编译参数 忽略编译告警
+ calloc
+ na_ofi_mem_desc s[0] 单段, d 多段
+ flags & 0xff 全1
+ NA_Mem_register
+ mem_register na_ofi_mem_register
+ requested_key 自增
+ NA_MEM_READ_ONLY
+ access = FI_REMOTE_READ | FI_WRITE 远程读和本地写
+ fi_mr_regv 注册内存向量
+ rxm_mr_regv
+ rxm_mr_get_msg_access
+ fi_mr_regv
+ vrb_mr_regv
+ vrb_mr_cache_reg
+ ofi_mr_rbt_find 如果有缓存就找红黑树
+ util_mr_cache_create
+ add_region
+ vrb_mr_reg_common
+ ibv_reg_mr 注册内存
+ md->mr_fid.key = md->mr->rkey 远程键
+ md->lkey = md->mr->lkey 本地键
+ vrb_eq_write_event 写完成事件
+ rxm_mr_init
+ fi_mr_key 取回内存key
+ ...desc.info.fi_mr_key = 远程键
+ NA_Mem_handle_get_serialize_size na_ofi_mem_handle_get_serialize_size
+ sizeof iovec info + iovcnt
+ HG_Forward -> 客户端调用RDMA的发送接口, 服务端调用 HG_Bulk_transfer HG_BULK_PULL 拉取数据
+ save_completed
+ while ... do -> 循环
+ ret = HG_Trigger(state.hg_context, 0, 1, &count)
+ HG_Progress(state.hg_context, 100)
+ ret = HG_Context_destroy(state.hg_context)
+ hg_return_t err = HG_Finalize(state.hg_class)
+
+----------
+
+server:
+mercury/05_bulk/server.c
+./server addr
+main
+ HG_Init
+ HG_Addr_self
+ HG_Addr_to_string
+ HG_Context_create
+ HG_Register_data
+ do
+ HG_Trigger
+ HG_Progress
+ save
+ HG_Bulk_transfer(stt->hg_context, save_bulk_completed, my_rpc_state, HG_BULK_PULL, info->addr, in.bulk_handle, 0, my_rpc_state->bulk_handle, 0, my_rpc_state->size, HG_OP_ID_IGNORE) origin and local 对齐 -> 服务端通过RDMA的单边读请求, 从客户端DMA拉取数据
+ HG_Context_destroy
+ HG_Finalize
+---------- bulk ----------
+HG_Forward
+ ...
+ hg_core_handle->ops.forward(hg_core_handle) -> hg_core_forward_na
+ na_ret = NA_Msg_send_unexpected(hg_core_handle->na_class,
+ hg_core_handle->na_context, hg_core_send_input_cb, hg_core_handle,
+ hg_core_handle->core_handle.in_buf, hg_core_handle->in_buf_used,
+ hg_core_handle->in_buf_plugin_data, hg_core_handle->na_addr,
+ hg_core_handle->core_handle.info.context_id, hg_core_handle->tag,
+ hg_core_handle->na_send_op_id);
+ ...
+ na_ofi_msg_send_unexpected(na_class_t *na_class -> na_ofi_msg_send
+ rc = fi_senddata(ep, msg_info->buf.const_ptr, msg_info->buf_size,
+ msg_info->desc, msg_info->tag & NA_OFI_TAG_MASK, msg_info->fi_addr,
+ context);
+
+
+
+ofi+verbs;ofi_rxm://192.169.29.63:55555, class_name:ofi, protocol_name:verbs
+HG_Core_init_opt2
+hg_core_init
+NA_Initialize_opt -> Initialize NA if not provided externally
+ na_plugin_check_protocol
+ na_ofi_check_protocol(const char *protocol_name)
+ uint32_t runtime_version = fi_version()
+ FI_MAJOR(runtime_version), FI_MINOR(runtime_version)
+ type = na_ofi_prov_name_to_type(protocol_name) -> static const char *const na_ofi_prov_name[] = {NA_OFI_PROV_TYPES} -> 网络配置,插件, 协议等
+ na_ofi_getinfo(type, NULL, &providers)
+ // 设置网络参数(提示信息,过滤提供者)
+ hints = fi_allocinfo()
+ hints->mode = FI_ASYNC_IOV -> FI_ASYNC_IOV 模式指示应用程序必须提供 IO 向量所需的缓冲。 设置后,应用程序不得修改长度 > 1 的 IO 向量,包括任何相关的内存描述符数组,直到关联操作完成
+ hints->ep_attr->type = FI_EP_RDM -> 设置端点类型为: 可靠但是连接更少的数据报类型
+ hints->caps = FI_MSG | FI_TAGGED | FI_RMA | FI_DIRECTED_RECV
+ hints->tx_attr->msg_order = FI_ORDER_SAS -> msg_order:保证具有相同标签的消息是有序的。 (FI_ORDER_SAS - 发送后发送(send after send)。如果设置,消息发送操作(包括标记的发送)将按照相对于其他消息发送的提交顺序进行传输。如果未设置,则消息发送可能会不按其提交的顺序进行传输)
+ hints->tx_attr->op_flags = FI_INJECT_COMPLETE -> 当可以安全地重用缓冲区时生成完成事件
+ hints->domain_attr->av_type = FI_AV_MAP
+ hints->domain_attr->resource_mgmt = FI_RM_ENABLED
+ hints->fabric_attr->prov_name = strdup(na_ofi_prov_name[prov_type]) -> 提供者: verbs;ofi_rxm
+ hints->mode |= FI_CONTEXT
+ hints->ep_attr->protocol = (uint32_t) na_ofi_prov_ep_proto[prov_type]
+ hints->caps |= na_ofi_prov_extra_caps[prov_type]
+ hints->domain_attr->control_progress = na_ofi_prov_progress[prov_type]
+ hints->domain_attr->data_progress = na_ofi_prov_progress[prov_type]
+ rc = fi_getinfo(NA_OFI_VERSION, node, service, flags, hints, fi_info_p)
+ fi_freeinfo(providers)
+ ret = na_private_class->na_class.ops->initialize( &na_private_class->na_class, na_info, listen) -> na_ofi_initialize(
+ na_ofi_prov_addr_format(prov_type, na_init_info.addr_format)
+ info.thread_mode = ((na_init_info.thread_mode & NA_THREAD_MODE_SINGLE)
+ na_ofi_class = na_ofi_class_alloc();
+ ret = na_ofi_verify_info
+ na_ofi_class->msg_recv_unexpected == na_ofi_msg_recv
+ na_ofi_class->opt_features |= NA_OPT_MULTI_RECV
+ na_ofi_class->cq_poll = na_ofi_cq_poll_no_source
+ na_ofi_fabric_open(prov_type, na_ofi_class->fi_info->fabric_attr, &na_ofi_class->fabric)
+ na_ofi_domain_open(na_ofi_class->fabric, na_init_info.auth_key,
+ no_wait, na_ofi_prov_flags[prov_type] & NA_OFI_SEP,
+ na_ofi_prov_flags[prov_type] & NA_OFI_DOM_SHARED, na_ofi_class->fi_info,
+ &na_ofi_class->domain)
+ na_ofi_class->context_max = na_init_info.max_contexts
+ na_ofi_class->use_sep
+ na_ofi_endpoint_open(na_ofi_class->fabric, na_ofi_class->domain,
+ na_ofi_class->no_wait, na_ofi_class->use_sep, na_ofi_class->context_max,
+ na_init_info.max_unexpected_size, na_init_info.max_expected_size,
+ na_ofi_class->fi_info, &na_ofi_class->endpoint)
+ pool_chunk_size
+ na_ofi_class->send_pool = hg_mem_pool_create(pool_chunk_size
+ na_ofi_class->recv_pool = hg_mem_pool_create(pool_chunk_size
+ for (i = 0; i < NA_OFI_ADDR_POOL_COUNT; i++) -> 地址池
+ struct na_ofi_addr *na_ofi_addr = na_ofi_addr_alloc(na_ofi_class)
+ HG_QUEUE_PUSH_TAIL(&na_ofi_class->addr_pool.queue, na_ofi_addr, entry)
+ ret = na_ofi_endpoint_get_src_addr(na_ofi_class)
+
+
+
+
+fi_getinfo
+ret = prov->provider->getinfo(version, node, service, flags, hints, &cur); -> static int rxm_getinfo
+ ofi_is_wildcard_listen_addr
+ rxm_validate_atomic_hints(hints)
+ ofix_getinfo(version, node, service, flags, &rxm_util_prov, hints, rxm_info_to_core, rxm_info_to_rxm, info)
+ ...
+ prov_mode = ofi_mr_get_prov_mode(api_version, user_info, prov_info)
+ prov_mode = ofi_cap_mr_mode(user_info->caps, prov_mode)
+
+ rxm_alter_info(hints, *info)
+ofi_set_prov_attr(tail->fabric_attr, prov->provider)
+
+发送数据
+fi_senddata -> rxm_senddata
+ rxm_get_conn(rxm_ep, dest_addr, &rxm_conn)
+ ...
+ rxm_conn_progress(ep)
+ ret = fi_eq_read(ep->msg_eq, &event, &cm_entry -> do loop -> vrb_eq_read(struct fid_eq *eq_fid, uint32_t *event, void *buf, size_t len, uint64_t flags)
+ eq = container_of(eq_fid, struct vrb_eq, eq_fid.fid) -> 事件队列, 参考结构: struct vrb_eq {
+ ret = vrb_eq_read_event(eq, event, buf, len, flags)
+ if (dlistfd_empty(&eq->list_head)) -> 读eq队列
+ rdma_get_cm_event(eq->channel, &cma_event);
+ vrb_eq_cm_process_event(eq, cma_event, event, buf, len) -> VERBS模块, 从事件队列中处理CM事件
+ RDMA_CM_EVENT_ROUTE_RESOLVED -> 客户端路由解析事件
+ ep->state = VRB_CONNECTING
+ rdma_connect(ep->id, &ep->conn_param) -> 主动发起连接请求(建连) -> int rdma_connect (struct rdma_cm_id *id, struct rdma_conn_param *conn_param) ->
+ rdma_ack_cm_event(cma_event)
+ rxm_handle_event(ep, event, &cm_entry, ret)
+ rxm_process_connect(cm_entry)
+ domain->flow_ctrl_ops->enable(conn->msg_ep, conn->ep->msg_info->rx_attr->size / 2) -> 流控 -> static int vrb_enable_ep_flow_ctrl
+ ep->peer_rq_credits = 1 + ep->saved_peer_rq_credits
+ vrb_ep2_domain(ep)->send_credits
+ conn->state = RXM_CM_CONNECTED -> 已建连
+ rxm_send_common -> rxm_send_common(struct rxm_ep *rxm_ep,
+ data_len = ofi_total_iov_len(iov, count)
+ assert(count <= rxm_ep->rxm_info->tx_attr->iov_limit) -> 限制4个iov
+ iface = rxm_mr_desc_to_hmem_iface_dev(desc, count, &device)
+ if (data_len <= rxm_ep->eager_limit) -> 紧急数据(小数据)16KB=16384B, data_len=184
+ rxm_send_eager(rxm_ep, rxm_conn, iov, desc, count,
+ fi_send(rxm_conn->msg_ep, &eager_buf->pkt, total_len -> vrb_msg_ep_send
+ struct ibv_send_wr wr = {
+ .wr_id = VERBS_COMP(ep, (uintptr_t)context),
+ .opcode = IBV_WR_SEND,
+ .send_flags = VERBS_INJECT(ep, len, desc),
+ };
+ vrb_send_buf
+ struct ibv_sge sge = vrb_init_sge(buf, len, desc);
+ wr->sg_list = &sge;
+ wr->num_sge = 1;
+ vrb_post_send(ep, wr, 0)
+ if (!ep->sq_credits || !ep->peer_rq_credits) -> 流控
+ vrb_flush_cq(cq)
+ vrb_poll_cq(cq, &wc)
+ ret = ibv_poll_cq(cq->cq, 1, wc)
+ vrb_report_wc(cq, &wc)
+ (void) ofi_cq_write(&cq->util_cq, (void *) (uintptr_t) wc->wr_id,flags, len, NULL, data, 0)
+ ofi_cirque_commit(cq->cirq) -> 环形链表计数器+1
+ ep->sq_credits--
+ ret = ibv_post_send(ep->ibv_qp, wr, &bad_wr)
+
+buf转sge:
+#define vrb_init_sge(buf, len, desc) (struct ibv_sge) \
+ { .addr = (uintptr_t) buf, \
+ .length = (uint32_t) len, \
+ .lkey = (desc) ? ((struct vrb_mem_desc *) (desc))->lkey : 0 }
+
+
+
+
+
+
+
+
+
+static int rxm_send_connect(struct rxm_conn *conn)
+static int rxm_open_conn
+ ret = fi_endpoint(domain->msg_domain, msg_info, &msg_ep, conn)
+ int vrb_open_ep -> vrb_get_port_space -> return RDMA_PS_TCP
+ int vrb_create_ep
+ rdma_create_id
+ rdma_resolve_addr -> TODO 也将此调用转换为非阻塞(使用事件通道):在运行大型 MPI 作业时,这可能是为了更好地扩展所需要的。 使其成为非阻塞意味着我们无法在 EP 启用时创建 QP。 在使用 rdma_create_qp 创建 QP 之前,我们需要等待 RDMA_CM_EVENT_ADDR_RESOLVED 事件。 它还需要一个SW接收队列来存储启用EP后应用程序发布的recvs
+ ep->util_ep.ep_fid.fid.ops = &vrb_ep_ops
+ ep->util_ep.ep_fid.ops = &vrb_ep_base_ops
+ fi_ep_bind(msg_ep, &ep->msg_eq->fid, 0) -> static int vrb_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
+ ofi_ep_bind_valid(&vrb_prov, bfid, flags)
+ rdma_migrate_id(ep->id, ep->eq->channel)
+ rxm_bind_comp(ep, msg_ep)
+ fi_ep_bind(msg_ep, &ep->msg_cq->fid, FI_TRANSMIT | FI_RECV)
+ ret = ofi_ep_bind_cq(&ep->util_ep, &cq->util_cq, flags)
+ return fid_list_insert(&cq->ep_list
+ fi_enable(msg_ep); -> static int vrb_ep_enable(struct fid_ep *ep_fid)
+ vrb_msg_ep_get_qp_attr(ep, &attr)
+ ret = rdma_create_qp(ep->id, domain->pd, &attr)
+ conn->flow_ctrl = domain->flow_ctrl_ops->available(msg_ep) -> static bool vrb_flow_ctrl_available
+ ret = rxm_prepost_recv(ep, msg_ep) -> ep->msg_info->rx_attr->size = 128
+ rx_buf = ofi_buf_alloc(rxm_ep->rx_pool)
+ if (ofi_bufpool_grow(pool))
+ ret = ofi_bufpool_region_alloc(buf_region);
+ rxm_post_recv(rx_buf)
+ ret = (int) fi_recv(rx_buf->rx_ep, &rx_buf->pkt -> vrb_msg_ep_recv(struct fid_ep *ep_fid
+ struct ibv_recv_wr wr = {
+ .wr_id = (uintptr_t)context,
+ .num_sge = 1,
+ .sg_list = &sge,
+ .next = NULL,
+ };
+ ssize_t vrb_post_recv(struct vrb_ep *ep, struct ibv_recv_wr *wr)
+ ret = ibv_post_recv(ep->ibv_qp, wr, &bad_wr)
+ conn->msg_ep = msg_ep
+ret = rxm_init_connect_data(conn, &cm_data)
+ ret = fi_getopt(&conn->ep->msg_pep->fid, FI_OPT_ENDPOINT
+ cm_data->connect.port = ofi_addr_get_port(&conn->ep->addr.sa)
+ cm_data->connect.client_conn_id = rxm_conn_id(conn->peer->index) -> rx_size=128
+fi_connect(conn->msg_ep, info->dest_addr, &cm_data, sizeof(cm_data)) -> vrb_msg_ep_connect(struct fid_ep *ep_fid,
+ vrb_msg_ep_prepare_cm_data(param, paramlen, cm_hdr)
+ vrb_ep_prepare_rdma_cm_param(&ep->conn_param,
+ conn_param->responder_resources = RDMA_MAX_RESP_RES
+ conn_param->initiator_depth = RDMA_MAX_INIT_DEPTH
+ conn_param->flow_control = 1;
+ conn_param->rnr_retry_count = 7; -> 无限重试
+ ep->conn_param.retry_count = 15
+ rdma_resolve_route(ep->id, VERBS_RESOLVE_TIMEOUT) -> 触发CM事件: RDMA_CM_EVENT_ROUTE_RESOLVED
+
+rxm_ep_settings_init
+
+
+
+
+rdma write, 单边写,
+NA_Put
+na_ofi_put
+na_ofi_rma fi_writemsg = fi_rma_op .writemsg = -> include/rdma/fi_rma.h -> fi_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags)
+ep->rma->writemsg(ep, msg, flags)
+.writemsg = vrb_msg_ep_rma_writemsg
+vrb_msg_ep_rma_writemsg -> prov/verbs/src/verbs_rma.c
+ struct ibv_send_wr wr
+ wr.opcode = IBV_WR_RDMA_WRITE | wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM 写|立即数写
+ vrb_send_iov(ep, &wr, msg->msg_iov, msg->desc -> ssize_t vrb_send_iovli
+ wr->sg_list = alloca(sizeof(*wr->sg_list) * count)
+ wr->sg_list[i].addr = (uintptr_t) iov[i].iov_base
+ wr->sg_list[i].length = iov[i].iov_len;
+ wr->send_flags = IBV_SEND_INLINE ?IBV_SEND_FENCE
+ wr->sg_list[0]
+ wr->num_sge =
+ vrb_post_send(ep, wr, flags) -> prov/verbs/src/verbs_ep.c -> ssize_t vrb_post_send
+ ibv_post_send(ep->ibv_qp, wr, &bad_wr)
+
+所有标签(Flags), include/rdma/fabric.h -> #define FI_MSG (1ULL << 1) ...
+
+
+服务端执行bulk拉取
+HG_Bulk_transfer(hg_context_t *context
+ hg_bulk_transfer_na(hg_bulk_op_t op
+ case HG_BULK_PULL:
+ na_bulk_op = hg_bulk_na_get
+ na_ret = na_bulk_op(hg_bulk_op_id->na_class -> hg_bulk_na_get -> return NA_Get -> na_class->ops->get -> na_ofi_get(na_class_t *na_class
+ na_ofi_rma_common(NA_OFI_CLASS(na_class), context, NA_CB_GET,
+ callback, arg, fi_readmsg, "fi_readmsg", FI_COMPLETION,
+ (struct na_ofi_mem_handle *) local_mem_handle, local_offset,
+ (struct na_ofi_mem_handle *) remote_mem_handle, remote_offset, length,
+ (struct na_ofi_addr *) remote_addr, remote_id,
+ (struct na_ofi_op_id *) op_id) -> 服务端拉取数据(READ)
+ ..
+na_ofi_rma_common(struct na_ofi_class *na_ofi_class,
+ struct iovec *local_iov = NA_OFI_IOV
+ void *local_desc = fi_mr_desc(na_ofi_mem_handle_local->fi_mr)
+ na_ofi_iov_translate
+ na_ofi_rma_iov_translate
+ na_ofi_rma_post(na_ofi_context->fi_tx, rma_info, &na_ofi_op_id->fi_ctx)
+ rc = rma_info->fi_rma_op -> rxm_ep_readmsg(struct fid_ep *ep_fid,
+ return rxm_ep_rma_common(rxm_ep, msg, flags | rxm_ep->util_ep.tx_msg_flags, fi_readmsg, FI_READ);
+ rxm_ep_rma_reg_iov(rxm_ep, msg_rma.msg_iov, msg_rma.desc, mr_desc, msg_rma.iov_count, comp_flags & (FI_WRITE | FI_READ), rma_buf)
+ fi_mr_desc(((struct rxm_mr *) desc[i])->msg_mr)
+ rma_msg(rxm_conn->msg_ep, &msg_rma, flags) -> vrb_msg_ep_rma_readmsg(struct fid_ep *ep_fid
+ struct ibv_send_wr wr = {
+ .wr_id = VERBS_COMP_READ_FLAGS(ep, flags, (uintptr_t)msg->context),
+ .opcode = IBV_WR_RDMA_READ,
+ .wr.rdma.remote_addr = msg->rma_iov->addr,
+ .wr.rdma.rkey = (uint32_t)msg->rma_iov->key,
+ .num_sge = msg->iov_count,
+ };
+ vrb_post_send(ep, &wr, 0)
+ vrb_flush_cq(cq)
+ ep->sq_credits--
+ ctx->op_queue = VRB_OP_SQ
+ ret = ibv_post_send(ep->ibv_qp, wr, &bad_wr) -> 提交WR,将客户端数据读过来(DMA)
+ wr->wr_id = (uintptr_t) ctx->user_ctx
+ slist_insert_tail(&ctx->entry, &ep->sq_list) -> 触发 vrb_flush_sq 遍历该发送队列 -> while (!slist_empty(&ep->sq_list))
+
+
+下刷发送队列调用栈
+(gdb) bt
+#0 vrb_flush_sq (ep=0xb7a4d0) at prov/verbs/src/verbs_ep.c:456
+#1 0x00007ffff7339f90 in vrb_ep_close (fid=0xb7a4d0) at prov/verbs/src/verbs_ep.c:582
+#2 0x00007ffff7349804 in fi_close (fid=0xb7a4d0) at ./include/rdma/fabric.h:631
+#3 0x00007ffff734a605 in rxm_close_conn (conn=0xb756b8) at prov/rxm/src/rxm_conn.c:88
+#4 0x00007ffff734cd88 in rxm_process_shutdown (conn=0xb756b8) at prov/rxm/src/rxm_conn.c:768
+#5 0x00007ffff734d170 in rxm_handle_event (ep=0x78b570, event=3, cm_entry=0x7fffffffd3d0, len=16) at prov/rxm/src/rxm_conn.c:830
+#6 0x00007ffff734d279 in rxm_conn_progress (ep=0x78b570) at prov/rxm/src/rxm_conn.c:850
+#7 0x00007ffff736256e in rxm_ep_do_progress (util_ep=0x78b570) at prov/rxm/src/rxm_cq.c:1843
+#8 0x00007ffff736268a in rxm_ep_progress (util_ep=0x78b570) at prov/rxm/src/rxm_cq.c:1863
+#9 0x00007ffff72d5b7d in ofi_cq_progress (cq=0x787770) at prov/util/src/util_cq.c:498
+#10 0x00007ffff72d51c0 in ofi_cq_readfrom (cq_fid=0x787770, buf=0x0, count=0, src_addr=0x0) at prov/util/src/util_cq.c:257
+#11 0x00007ffff72d3a56 in fi_cq_readfrom (cq=0x787770, buf=0x0, count=0, src_addr=0x0) at ./include/rdma/fi_eq.h:400
+#12 0x00007ffff72d5207 in ofi_cq_read (cq_fid=0x787770, buf=0x0, count=0) at prov/util/src/util_cq.c:264
+#13 0x00007ffff72dbf4f in fi_cq_read (cq=0x787770, buf=0x0, count=0) at ./include/rdma/fi_eq.h:394
+#14 0x00007ffff72dc4cc in util_poll_run (poll_fid=0x783390, context=0x7fffffffdb78, count=1) at prov/util/src/util_poll.c:95
+#15 0x00007ffff72dc9a0 in fi_poll (pollset=0x783390, context=0x7fffffffdb78, count=1) at ./include/rdma/fi_eq.h:335
+#16 0x00007ffff72de0bf in util_wait_fd_try (wait=0x783290) at prov/util/src/util_wait.c:366
+#17 0x00007ffff72dd456 in ofi_trywait (fabric=0x781b40, fids=0x7fffffffdc40, count=1) at prov/util/src/util_wait.c:72
+#18 0x00000000004266de in fi_trywait (count=1, fids=0x7fffffffdc40, fabric=<optimized out>) at /usr/local/include/rdma/fi_eq.h:323
+#19 na_ofi_poll_try_wait (na_class=<optimized out>, context=<optimized out>) at /home/xb/project/mercury/src/na/na_ofi.c:8229
+#20 0x0000000000417b4a in hg_core_poll_try_wait (context=0x786c80) at /home/xb/project/mercury/src/mercury_core.c:5058
+#21 hg_core_progress (context=context@entry=0x786c80, timeout_ms=100) at /home/xb/project/mercury/src/mercury_core.c:5001
+#22 0x000000000042043e in HG_Core_progress (context=0x786c80, timeout_ms=timeout_ms@entry=100) at /home/xb/project/mercury/src/mercury_core.c:6530
+#23 0x000000000040bf02 in HG_Progress (context=<optimized out>, timeout=timeout@entry=100) at /home/xb/project/mercury/src/mercury.c:2178
+#24 0x0000000000407741 in main (argc=<optimized out>, argv=<optimized out>) at /home/xb/project/mercury/Examples/src/server.c:69
+
+
+
+
+创建RDMA事件通道调用栈
+#0 0x00007ffff6c966d0 in rdma_create_event_channel () from /lib64/librdmacm.so.1
+#1 0x00007ffff6c967c5 in ucma_alloc_id () from /lib64/librdmacm.so.1
+#2 0x00007ffff6c96821 in rdma_create_id2.part.20 () from /lib64/librdmacm.so.1
+#3 0x00007ffff6c96476 in ucma_init () from /lib64/librdmacm.so.1
+#4 0x00007ffff6c9697a in rdma_create_id () from /lib64/librdmacm.so.1
+#5 0x00007ffff7334ebe in vrb_ifa_rdma_info (ifa=0x6784b0, dev_name=0x7fffffffd428, rai=0x7fffffffd420) at prov/verbs/src/verbs_info.c:955
+#6 0x00007ffff73359e6 in vrb_getifaddrs (verbs_devs=0x7ffff776aaa0 <verbs_devs>) at prov/verbs/src/verbs_info.c:1177
+#7 0x00007ffff7336048 in vrb_init_info (all_infos=0x7ffff776a5e8 <vrb_util_prov+8>) at prov/verbs/src/verbs_info.c:1378
+#8 0x00007ffff73379c9 in vrb_getinfo (version=65549, node=0x0, service=0x0, flags=576460752303423488, hints=0x66a4c0, info=0x7fffffffd600) at prov/verbs/src/verbs_info.c:1872
+#9 0x00007ffff729730c in fi_getinfo_ (version=65549, node=0x0, service=0x0, flags=576460752303423488, hints=0x66a4c0, info=0x7fffffffd750) at src/fabric.c:1282
+#10 0x00007ffff72c70c0 in ofi_get_core_info (version=65549, node=0x0, service=0x0, flags=0, util_prov=0x7ffff776b960 <rxm_util_prov>, util_hints=0x6681e0, base_attr=0x7ffff776b8e0 <rxm_verbs_info>, info_to_core=0x7ffff73448f8 <rxm_info_to_core>, core_info=0x7fffffffd750)
+ at prov/util/src/util_attr.c:305
+#11 0x00007ffff72c71da in ofix_getinfo (version=65549, node=0x0, service=0x0, flags=0, util_prov=0x7ffff776b960 <rxm_util_prov>, hints=0x6681e0, info_to_core=0x7ffff73448f8 <rxm_info_to_core>, info_to_util=0x7ffff7344ff7 <rxm_info_to_rxm>, info=0x7fffffffd8c0)
+ at prov/util/src/util_attr.c:328
+#12 0x00007ffff7345d68 in rxm_getinfo (version=65549, node=0x0, service=0x0, flags=0, hints=0x6681e0, info=0x7fffffffd8c0) at prov/rxm/src/rxm_init.c:557
+#13 0x00007ffff729730c in fi_getinfo_ (version=65549, node=0x0, service=0x0, flags=0, hints=0x6681e0, info=0x7fffffffd9c0) at src/fabric.c:1282
+--------------------------
+#14 0x000000000042a22f in na_ofi_getinfo (prov_type=prov_type@entry=NA_OFI_PROV_VERBS_RXM, info=info@entry=0x0, fi_info_p=fi_info_p@entry=0x7fffffffd9c0) at /home/xb/project/mercury/src/na/na_ofi.c:3247
+#15 0x00000000004313ee in na_ofi_check_protocol (protocol_name=protocol_name@entry=0x6681a0 "verbs;ofi_rxm") at /home/xb/project/mercury/src/na/na_ofi.c:6679
+#16 0x00000000004225ce in na_plugin_check_protocol (class_ops=0x44a2e0 <na_plugin_static_g>, ops_p=<synthetic pointer>, protocol_name=<optimized out>, class_name=0x668180 "ofi") at /home/xb/project/mercury/src/na/na.c:451
+#17 NA_Initialize_opt2 (info_string=info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://175.16.53.61:55555", listen=<optimized out>, version=version@entry=262144, na_init_info=na_init_info@entry=0x7fffffffdaf0) at /home/xb/project/mercury/src/na/na.c:776
+#18 0x00000000004229d1 in NA_Initialize_opt (info_string=info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://175.16.53.61:55555", listen=<optimized out>, na_init_info=na_init_info@entry=0x7fffffffdaf0) at /home/xb/project/mercury/src/na/na.c:734
+#19 0x0000000000413378 in hg_core_init (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://175.16.53.61:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info_p=hg_init_info_p@entry=0x0, class_p=class_p@entry=0x7fffffffdcb8)
+ at /home/xb/project/mercury/src/mercury_core.c:1225
+#20 0x000000000041bce1 in HG_Core_init_opt2 (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://175.16.53.61:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info=hg_init_info@entry=0x0)
+ at /home/xb/project/mercury/src/mercury_core.c:5620
+#21 0x000000000040967f in HG_Init_opt2 (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://175.16.53.61:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info_p=hg_init_info_p@entry=0x0) at /home/xb/project/mercury/src/mercury.c:1100
+#22 0x000000000040987d in HG_Init (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://175.16.53.61:55555", na_listen=na_listen@entry=1 '\001') at /home/xb/project/mercury/src/mercury.c:1041
+#23 0x0000000000407681 in main (argc=<optimized out>, argv=<optimized out>) at /home/xb/project/mercury/Examples/src/server.c:42
+
+
+vrb_getifaddrs
+ vrb_ifa_rdma_info
+ rdma_getaddrinfo
+ rdma_bind_addr
+ *dev_name = strdup(ibv_get_device_name(id->verbs->device))
+
+
+
+
+#0 0x00007ffff6c966d0 in rdma_create_event_channel () from /lib64/librdmacm.so.1
+#1 0x00007ffff6c967c5 in ucma_alloc_id () from /lib64/librdmacm.so.1
+#2 0x00007ffff6c96821 in rdma_create_id2.part.20 () from /lib64/librdmacm.so.1
+#3 0x00007ffff7320cc0 in vrb_get_rai_id (node=0x0, service=0x0, flags=576460752303423488, hints=0x787fd0, rai=0x7fffffffd4c8, id=0x7fffffffd4c0) at prov/verbs/src/verbs_init.c:278
+#4 0x00007ffff733745a in vrb_handle_sock_addr (node=0x0, service=0x0, flags=576460752303423488, hints=0x787fd0, info=0x7fffffffd650) at prov/verbs/src/verbs_info.c:1770
+#5 0x00007ffff733764c in vrb_get_match_infos (version=65549, node=0x0, service=0x0, flags=576460752303423488, hints=0x787fd0, raw_info=0x7ffff776a5e8 <vrb_util_prov+8>, info=0x7fffffffd650) at prov/verbs/src/verbs_info.c:1805
+#6 0x00007ffff7337a01 in vrb_getinfo (version=65549, node=0x0, service=0x0, flags=576460752303423488, hints=0x787fd0, info=0x7fffffffd650) at prov/verbs/src/verbs_info.c:1876
+#7 0x00007ffff729730c in fi_getinfo_ (version=65549, node=0x0, service=0x0, flags=576460752303423488, hints=0x787fd0, info=0x78b700) at src/fabric.c:1282
+#8 0x00007ffff72c70c0 in ofi_get_core_info (version=65549, node=0x0, service=0x0, flags=0, util_prov=0x7ffff776b960 <rxm_util_prov>, util_hints=0x784540, base_attr=0x0, info_to_core=0x7ffff73448f8 <rxm_info_to_core>, core_info=0x78b700) at prov/util/src/util_attr.c:305
+#9 0x00007ffff7353f15 in rxm_open_core_res (ep=0x78b560) at prov/rxm/src/rxm_ep.c:1748
+#10 0x00007ffff73545b3 in rxm_endpoint (domain=0x788450, info=0x6683b0, ep_fid=0x670ad0, context=0x0) at prov/rxm/src/rxm_ep.c:1925
+#11 0x0000000000434563 in fi_endpoint (context=0x0, ep=0x670ad0, info=0x6683b0, domain=<optimized out>) at /usr/local/include/rdma/fi_endpoint.h:178
+#12 na_ofi_basic_ep_open (na_ofi_endpoint=0x670ad0, no_wait=false, fi_info=0x6683b0, na_ofi_domain=0x780ca0, na_ofi_fabric=0x7836b0) at /home/xb/project/mercury/src/na/na_ofi.c:4385
+#13 na_ofi_endpoint_open (na_ofi_endpoint_p=0x66a4f0, fi_info=0x6683b0, expected_msg_size_max=0, unexpected_msg_size_max=0, max_contexts=<optimized out>, sep=false, no_wait=false, na_ofi_domain=0x780ca0, na_ofi_fabric=0x7836b0) at /home/xb/project/mercury/src/na/na_ofi.c:4357
+#14 na_ofi_initialize (na_class=<optimized out>, na_info=<optimized out>, listen=<optimized out>) at /home/xb/project/mercury/src/na/na_ofi.c:6867
+#15 0x00000000004227cb in NA_Initialize_opt2 (info_string=info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", listen=<optimized out>, version=version@entry=262144, na_init_info=na_init_info@entry=0x7fffffffdaf0) at /home/xb/project/mercury/src/na/na.c:819
+#16 0x00000000004229d1 in NA_Initialize_opt (info_string=info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", listen=<optimized out>, na_init_info=na_init_info@entry=0x7fffffffdaf0) at /home/xb/project/mercury/src/na/na.c:734
+#17 0x0000000000413378 in hg_core_init (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info_p=hg_init_info_p@entry=0x0, class_p=class_p@entry=0x7fffffffdcb8)
+ at /home/xb/project/mercury/src/mercury_core.c:1225
+#18 0x000000000041bce1 in HG_Core_init_opt2 (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info=hg_init_info@entry=0x0)
+ at /home/xb/project/mercury/src/mercury_core.c:5620
+#19 0x000000000040967f in HG_Init_opt2 (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info_p=hg_init_info_p@entry=0x0) at /home/xb/project/mercury/src/mercury.c:1100
+#20 0x000000000040987d in HG_Init (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", na_listen=na_listen@entry=1 '\001') at /home/xb/project/mercury/src/mercury.c:1041
+#21 0x0000000000407681 in main (argc=<optimized out>, argv=<optimized out>) at /home/xb/project/mercury/Examples/src/server.c:42
+
+
+
+
+rdma_listen: 服务端RDMA监听, 调用栈
+#0 0x00007ffff6c95dd0 in rdma_listen () from /lib64/librdmacm.so.1
+#1 0x00007ffff7324566 in vrb_pep_listen (pep_fid=0x783dd0) at prov/verbs/src/verbs_cm.c:535
+#2 0x00007ffff7349ea9 in fi_listen (pep=0x783dd0) at ./include/rdma/fi_cm.h:91
+#3 0x00007ffff734dcee in rxm_start_listen (ep=0x78b560) at prov/rxm/src/rxm_conn.c:1009
+#4 0x00007ffff7353959 in rxm_ep_ctrl (fid=0x78b560, command=6, arg=0x0) at prov/rxm/src/rxm_ep.c:1607
+#5 0x00000000004346cf in fi_enable (ep=<optimized out>) at /usr/local/include/rdma/fi_endpoint.h:217
+#6 na_ofi_basic_ep_open (na_ofi_endpoint=0x670ad0, no_wait=false, fi_info=0x6683b0, na_ofi_domain=<optimized out>, na_ofi_fabric=0x7836b0) at /home/xb/project/mercury/src/na/na_ofi.c:4427
+#7 na_ofi_endpoint_open (na_ofi_endpoint_p=0x66a4f0, fi_info=0x6683b0, expected_msg_size_max=0, unexpected_msg_size_max=0, max_contexts=<optimized out>, sep=false, no_wait=false, na_ofi_domain=<optimized out>, na_ofi_fabric=0x7836b0) at /home/xb/project/mercury/src/na/na_ofi.c:4357
+#8 na_ofi_initialize (na_class=<optimized out>, na_info=<optimized out>, listen=<optimized out>) at /home/xb/project/mercury/src/na/na_ofi.c:6867
+#9 0x00000000004227cb in NA_Initialize_opt2 (info_string=info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", listen=<optimized out>, version=version@entry=262144, na_init_info=na_init_info@entry=0x7fffffffdaf0) at /home/xb/project/mercury/src/na/na.c:819
+#10 0x00000000004229d1 in NA_Initialize_opt (info_string=info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", listen=<optimized out>, na_init_info=na_init_info@entry=0x7fffffffdaf0) at /home/xb/project/mercury/src/na/na.c:734
+#11 0x0000000000413378 in hg_core_init (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info_p=hg_init_info_p@entry=0x0, class_p=class_p@entry=0x7fffffffdcb8)
+ at /home/xb/project/mercury/src/mercury_core.c:1225
+#12 0x000000000041bce1 in HG_Core_init_opt2 (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info=hg_init_info@entry=0x0)
+ at /home/xb/project/mercury/src/mercury_core.c:5620
+#13 0x000000000040967f in HG_Init_opt2 (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", na_listen=na_listen@entry=1 '\001', version=version@entry=0, hg_init_info_p=hg_init_info_p@entry=0x0) at /home/xb/project/mercury/src/mercury.c:1100
+#14 0x000000000040987d in HG_Init (na_info_string=na_info_string@entry=0x7fffffffe1db "ofi+verbs;ofi_rxm://172.17.29.63:55555", na_listen=na_listen@entry=1 '\001') at /home/xb/project/mercury/src/mercury.c:1041
+#15 0x0000000000407681 in main (argc=<optimized out>, argv=<optimized out>) at /home/xb/project/mercury/Examples/src/server.c:42
+
+
+
+(gdb) 发送端解析地址调用栈
+#0 0x00007ffff6c97780 in rdma_resolve_addr () from /lib64/librdmacm.so.1
+#1 0x00007ffff73211be in vrb_create_ep (ep=0xa42010, ps=RDMA_PS_TCP, id=0xa42200) at prov/verbs/src/verbs_init.c:346
+#2 0x00007ffff733c492 in vrb_open_ep (domain=0x783730, info=0x793e00, ep_fid=0x7fffffffd790, context=0xa3ccb8) at prov/verbs/src/verbs_ep.c:1210
+#3 0x00007ffff7349da7 in fi_endpoint (domain=0x783730, info=0x793e00, ep=0x7fffffffd790, context=0xa3ccb8) at ./include/rdma/fi_endpoint.h:178
+#4 0x00007ffff734ad73 in rxm_open_conn (conn=0xa3ccb8, msg_info=0x793e00) at prov/rxm/src/rxm_conn.c:189
+#5 0x00007ffff734b455 in rxm_send_connect (conn=0xa3ccb8) at prov/rxm/src/rxm_conn.c:289
+#6 0x00007ffff734b5fb in rxm_connect (conn=0xa3ccb8) at prov/rxm/src/rxm_conn.c:321
+#7 0x00007ffff734bcd3 in rxm_get_conn (ep=0x786300, addr=1, conn=0x7fffffffd928) at prov/rxm/src/rxm_conn.c:466
+#8 0x00007ffff7357dba in rxm_senddata (ep_fid=0x786300, buf=0x7ffff7edb030, len=156, desc=0x7934b0, data=1, dest_addr=1, context=0xa3c310) at prov/rxm/src/rxm_msg.c:848
+#9 0x000000000042762c in fi_senddata (context=0xa3c310, dest_addr=<optimized out>, data=<optimized out>, desc=<optimized out>, len=<optimized out>, buf=<optimized out>, ep=0x786300) at /usr/local/include/rdma/fi_endpoint.h:343
+#10 na_ofi_msg_send (ep=0x786300, msg_info=0xa3c138, context=0xa3c310) at /home/xb/project/mercury/src/na/na_ofi.c:5065
+#11 0x000000000042d088 in na_ofi_msg_send_unexpected (na_class=<optimized out>, context=<optimized out>, callback=<optimized out>, arg=0xa3bc50, buf=<optimized out>, buf_size=<optimized out>, plugin_data=<optimized out>, dest_addr=<optimized out>,
+ dest_id=<optimized out>, tag=<optimized out>, op_id=<optimized out>) at /home/xb/project/mercury/src/na/na_ofi.c:7554
+#12 0x0000000000414b8e in NA_Msg_send_unexpected (op_id=<optimized out>, tag=<optimized out>, dest_id=<optimized out>, dest_addr=<optimized out>, plugin_data=<optimized out>, buf_size=<optimized out>, buf=<optimized out>, arg=0xa3bc50,
+ callback=0x41a620 <hg_core_send_input_cb>, context=<optimized out>, na_class=<optimized out>) at /home/xb/project/mercury/src/na/na.h:1140
+#13 hg_core_forward_na (hg_core_handle=0xa3bc50) at /home/xb/project/mercury/src/mercury_core.c:3949
+#14 0x00000000004200d5 in hg_core_forward (payload_size=128, flags=0 '\000', arg=0xa3c6c0, callback=0x408120 <hg_core_forward_cb>, hg_core_handle=0xa3bc50) at /home/xb/project/mercury/src/mercury_core.c:3887
+#15 HG_Core_forward (handle=0xa3bc50, callback=callback@entry=0x408120 <hg_core_forward_cb>, arg=arg@entry=0xa3c6c0, flags=<optimized out>, payload_size=128) at /home/xb/project/mercury/src/mercury_core.c:6432
+#16 0x000000000040bdad in HG_Forward (handle=0xa3c6c0, callback=callback@entry=0x4080a0 <save_completed>, arg=arg@entry=0x7fffffffdda0, in_struct=in_struct@entry=0x7fffffffdbb0) at /home/xb/project/mercury/src/mercury.c:2111
+#17 0x000000000040807b in lookup_callback (callback_info=0x7fffffffdc00) at /home/xb/project/mercury/Examples/src/client.c:132
+#18 0x0000000000408245 in hg_core_addr_lookup_cb (callback_info=<optimized out>) at /home/xb/project/mercury/src/mercury.c:437
+#19 0x00000000004186d5 in hg_core_trigger_lookup_entry (hg_core_op_id=0x786260) at /home/xb/project/mercury/src/mercury_core.c:5387
+#20 hg_core_trigger (context=0x78d750, timeout_ms=timeout_ms@entry=0, max_count=max_count@entry=1, actual_count_p=actual_count_p@entry=0x7fffffffdd7c) at /home/xb/project/mercury/src/mercury_core.c:5339
+#21 0x000000000042072f in HG_Core_trigger (context=<optimized out>, timeout=timeout@entry=0, max_count=max_count@entry=1, actual_count_p=actual_count_p@entry=0x7fffffffdd7c) at /home/xb/project/mercury/src/mercury_core.c:6577
+#22 0x000000000040c162 in HG_Trigger (context=<optimized out>, timeout=timeout@entry=0, max_count=max_count@entry=1, actual_count_p=actual_count_p@entry=0x7fffffffdd7c) at /home/xb/project/mercury/src/mercury.c:2197
+#23 0x0000000000407896 in main (argc=<optimized out>, argv=0x7fffffffdec8) at /home/xb/project/mercury/Examples/src/client.c:78
+
|