|
Lines 4952-4963
List slurm_send_addr_recv_msgs(slurm_msg_t *msg, char *name, int timeout)
Link Here
|
| 4952 |
ret_data_info_t *ret_data_info = NULL; |
4952 |
ret_data_info_t *ret_data_info = NULL; |
| 4953 |
ListIterator itr; |
4953 |
ListIterator itr; |
| 4954 |
int i; |
4954 |
int i; |
|
|
4955 |
char addr_str[32]; |
| 4955 |
|
4956 |
|
| 4956 |
slurm_mutex_lock(&conn_lock); |
4957 |
slurm_mutex_lock(&conn_lock); |
| 4957 |
if (conn_timeout == NO_VAL16) |
4958 |
if (conn_timeout == NO_VAL16) |
| 4958 |
conn_timeout = MIN(slurm_get_msg_timeout(), 10); |
4959 |
conn_timeout = MIN(slurm_get_msg_timeout(), 10); |
| 4959 |
slurm_mutex_unlock(&conn_lock); |
4960 |
slurm_mutex_unlock(&conn_lock); |
| 4960 |
|
4961 |
|
|
|
4962 |
if (msg->msg_type == REQUEST_LAUNCH_PROLOG) { |
| 4963 |
slurm_print_slurm_addr(&msg->address, addr_str, |
| 4964 |
sizeof(addr_str)); |
| 4965 |
debug("BUG7928 %s: pid %d going to connect to %s to send %s", |
| 4966 |
__func__, getpid(), addr_str, |
| 4967 |
rpc_num2string(msg->msg_type)); |
| 4968 |
} |
| 4961 |
/* This connect retry logic permits Slurm hierarchical communications |
4969 |
/* This connect retry logic permits Slurm hierarchical communications |
| 4962 |
* to better survive slurmd restarts */ |
4970 |
* to better survive slurmd restarts */ |
| 4963 |
for (i = 0; i <= conn_timeout; i++) { |
4971 |
for (i = 0; i <= conn_timeout; i++) { |
|
Lines 4969-4974
List slurm_send_addr_recv_msgs(slurm_msg_t *msg, char *name, int timeout)
Link Here
|
| 4969 |
if (i == 0) |
4977 |
if (i == 0) |
| 4970 |
debug3("connect refused, retrying"); |
4978 |
debug3("connect refused, retrying"); |
| 4971 |
} |
4979 |
} |
|
|
4980 |
if (msg->msg_type == REQUEST_LAUNCH_PROLOG) |
| 4981 |
debug("BUG7928 %s:retries slurm_open_msg_conn=%d, conn_timeout=%d, fd=%d, errno: %s", |
| 4982 |
__func__, i, conn_timeout, fd, slurm_strerror(errno)); |
| 4983 |
|
| 4972 |
if (fd < 0) { |
4984 |
if (fd < 0) { |
| 4973 |
mark_as_failed_forward(&ret_list, name, |
4985 |
mark_as_failed_forward(&ret_list, name, |
| 4974 |
SLURM_COMMUNICATIONS_CONNECTION_ERROR); |
4986 |
SLURM_COMMUNICATIONS_CONNECTION_ERROR); |