Skip to content

Commit

Permalink
NA: add NA_IO return code for generic I/O errors
Browse files Browse the repository at this point in the history
HG: add HG_IO return code and reserve space for additional NA codes

Update OFI and UCX plugins to use new code
  • Loading branch information
soumagne committed Oct 24, 2024
1 parent 5ee197c commit 928034b
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 37 deletions.
63 changes: 32 additions & 31 deletions src/mercury_core_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,39 +119,40 @@ struct hg_init_info {
unsigned int multi_recv_copy_threshold;
};

/* Keep offset to keep room for additional NA error codes */
#define HG_NA_ERRNO_OFFSET 64

/* Error return codes:
* Functions return 0 for success or corresponding return code */
#define HG_RETURN_VALUES \
X(HG_SUCCESS) /*!< operation succeeded */ \
X(HG_PERMISSION) /*!< operation not permitted */ \
X(HG_NOENTRY) /*!< no such file or directory */ \
X(HG_INTERRUPT) /*!< operation interrupted */ \
X(HG_AGAIN) /*!< operation must be retried */ \
X(HG_NOMEM) /*!< out of memory */ \
X(HG_ACCESS) /*!< permission denied */ \
X(HG_FAULT) /*!< bad address */ \
X(HG_BUSY) /*!< device or resource busy */ \
X(HG_EXIST) /*!< entry already exists */ \
X(HG_NODEV) /*!< no such device */ \
X(HG_INVALID_ARG) /*!< invalid argument */ \
X(HG_PROTOCOL_ERROR) /*!< protocol error */ \
X(HG_OVERFLOW) /*!< value too large */ \
X(HG_MSGSIZE) /*!< message size too long */ \
X(HG_PROTONOSUPPORT) /*!< protocol not supported */ \
X(HG_OPNOTSUPPORTED) /*!< operation not supported on endpoint */ \
X(HG_ADDRINUSE) /*!< address already in use */ \
X(HG_ADDRNOTAVAIL) /*!< cannot assign requested address */ \
X(HG_HOSTUNREACH) /*!< cannot reach host during operation */ \
X(HG_TIMEOUT) /*!< operation reached timeout */ \
X(HG_CANCELED) /*!< operation canceled */ \
X(HG_CHECKSUM_ERROR) /*!< checksum error */ \
X(HG_NA_ERROR) /*!< generic NA error */ \
X(HG_OTHER_ERROR) /*!< generic HG error */ \
X(HG_RETURN_MAX)

#define X(a) a,
typedef enum hg_return { HG_RETURN_VALUES } hg_return_t;
#undef X
typedef enum hg_return {
HG_SUCCESS, /*!< operation succeeded */
HG_PERMISSION, /*!< operation not permitted */
HG_NOENTRY, /*!< no such file or directory */
HG_INTERRUPT, /*!< operation interrupted */
HG_AGAIN, /*!< operation must be retried */
HG_NOMEM, /*!< out of memory */
HG_ACCESS, /*!< permission denied */
HG_FAULT, /*!< bad address */
HG_BUSY, /*!< device or resource busy */
HG_EXIST, /*!< entry already exists */
HG_NODEV, /*!< no such device */
HG_INVALID_ARG, /*!< invalid argument */
HG_PROTOCOL_ERROR, /*!< protocol error */
HG_OVERFLOW, /*!< value too large */
HG_MSGSIZE, /*!< message size too long */
HG_PROTONOSUPPORT, /*!< protocol not supported */
HG_OPNOTSUPPORTED, /*!< operation not supported on endpoint */
HG_ADDRINUSE, /*!< address already in use */
HG_ADDRNOTAVAIL, /*!< cannot assign requested address */
HG_HOSTUNREACH, /*!< cannot reach host during operation */
HG_TIMEOUT, /*!< operation reached timeout */
HG_CANCELED, /*!< operation canceled */
HG_IO, /*!< I/O error */
HG_CHECKSUM_ERROR = HG_NA_ERRNO_OFFSET, /*!< checksum error */
HG_NA_ERROR, /*!< generic NA error */
HG_OTHER_ERROR, /*!< generic HG error */
HG_RETURN_MAX
} hg_return_t;

/* Compat return codes */
#define HG_INVALID_PARAM HG_INVALID_ARG
Expand Down
29 changes: 25 additions & 4 deletions src/na/na_ofi.c
Original file line number Diff line number Diff line change
Expand Up @@ -2169,13 +2169,28 @@ na_ofi_errno_to_na(int rc)
case FI_EINTR:
ret = NA_INTERRUPT;
break;
case FI_EIO:
#if !defined(__APPLE__)
case FI_EREMOTEIO:
#endif
ret = NA_IO;
break;
case FI_EAGAIN:
#ifdef _WIN32
case FI_EWOULDBLOCK:
#endif
ret = NA_AGAIN;
break;
case FI_ENOMEM:
case FI_EMFILE:
case FI_ENOSPC:
case FI_ENOBUFS:
ret = NA_NOMEM;
break;
case FI_EACCES:
#if !defined(_WIN32) && !defined(__APPLE__)
case FI_EKEYREJECTED:
#endif
ret = NA_ACCESS;
break;
case FI_EFAULT:
Expand All @@ -2187,6 +2202,8 @@ na_ofi_errno_to_na(int rc)
case FI_ENODEV:
ret = NA_NODEV;
break;
case FI_E2BIG:
case FI_EBADF:
case FI_EINVAL:
ret = NA_INVALID_ARG;
break;
Expand All @@ -2197,6 +2214,7 @@ na_ofi_errno_to_na(int rc)
ret = NA_MSGSIZE;
break;
case FI_ENOPROTOOPT:
case FI_ENOSYS:
ret = NA_PROTONOSUPPORT;
break;
case FI_EOPNOTSUPP:
Expand All @@ -2210,14 +2228,12 @@ na_ofi_errno_to_na(int rc)
break;
case FI_ENETDOWN:
case FI_ENETUNREACH:
case FI_ENOTCONN:
case FI_ECONNABORTED:
case FI_ECONNREFUSED:
case FI_ECONNRESET:
#ifndef _WIN32
case FI_ENOTCONN:
case FI_ESHUTDOWN:
case FI_ECONNREFUSED:
case FI_EHOSTDOWN:
#endif
case FI_EHOSTUNREACH:
ret = NA_HOSTUNREACH;
break;
Expand All @@ -2227,6 +2243,11 @@ na_ofi_errno_to_na(int rc)
case FI_ECANCELED:
ret = NA_CANCELED;
break;
case FI_ENOMSG:
case FI_ENODATA:
case FI_EISCONN:
case FI_EALREADY:
case FI_EINPROGRESS:
default:
ret = NA_PROTOCOL_ERROR;
break;
Expand Down
1 change: 1 addition & 0 deletions src/na/na_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ struct na_protocol_info {
X(NA_HOSTUNREACH) /*!< cannot reach host during operation */ \
X(NA_TIMEOUT) /*!< operation reached timeout */ \
X(NA_CANCELED) /*!< operation canceled */ \
X(NA_IO) /*!< I/O error */ \
X(NA_RETURN_MAX)

#define X(a) a,
Expand Down
7 changes: 5 additions & 2 deletions src/na/na_ucx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,6 @@ na_ucs_status_to_na(ucs_status_t status)
ret = NA_ADDRNOTAVAIL;
break;

case UCS_ERR_SOME_CONNECTS_FAILED:
case UCS_ERR_UNREACHABLE:
case UCS_ERR_CONNECTION_RESET:
case UCS_ERR_NOT_CONNECTED:
Expand All @@ -1062,8 +1061,12 @@ na_ucs_status_to_na(ucs_status_t status)
ret = NA_CANCELED;
break;

case UCS_ERR_NO_MESSAGE:
case UCS_ERR_SOME_CONNECTS_FAILED:
case UCS_ERR_IO_ERROR:
ret = NA_IO;
break;

case UCS_ERR_NO_MESSAGE:
case UCS_ERR_SHMEM_SEGMENT:
default:
ret = NA_PROTOCOL_ERROR;
Expand Down

0 comments on commit 928034b

Please sign in to comment.