Skip to content

Commit

Permalink
Merge branch 'main' into dump_bmp_index
Browse files Browse the repository at this point in the history
  • Loading branch information
vsian authored Dec 13, 2024
2 parents f4b2998 + ba8dc1c commit fe98d28
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 22 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/slow_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ jobs:
env:
old_value: ${{ steps.start_tests_debug.outputs.old_mmmap_rnd_bits }}
run: |
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-debug/src/infinity | xargs echo)
sudo chmod +x scripts/timeout_kill.sh
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 10 ${pids}"
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-debug/src/infinity | xargs echo)
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 15 ${pids}"
if [ $? -ne 0 ]; then
echo "Failed to kill infinity debug version"
exit 1
Expand Down Expand Up @@ -216,9 +216,9 @@ jobs:
# && !contains(github.event.pull_request.labels.*.name, 'invalid')
id: stop_py_tests
run: |
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-release/src/infinity | xargs echo)
sudo chmod +x scripts/timeout_kill.sh
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 10 ${pids}"
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-release/src/infinity | xargs echo)
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 15 ${pids}"
if [ $? -ne 0 ]; then
echo "Failed to kill infinity release version"
exit 1
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,9 @@ jobs:
if: ${{ !cancelled() }}
id: stop_tests_debug_minio
run: |
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-debug/src/infinity | xargs echo)
sudo chmod +x scripts/timeout_kill.sh
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 10 ${pids}"
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-debug/src/infinity | xargs echo)
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 15 ${pids}"
if [ $? -ne 0 ]; then
echo "Failed to kill infinity debug version"
exit 1
Expand Down Expand Up @@ -170,9 +170,9 @@ jobs:
if: ${{ !cancelled() }}
id: stop_tests_debug
run: |
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-debug/src/infinity | xargs echo)
sudo chmod +x scripts/timeout_kill.sh
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 10 ${pids}"
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-debug/src/infinity | xargs echo)
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 15 ${pids}"
if [ $? -ne 0 ]; then
echo "Failed to kill infinity debug version"
exit 1
Expand Down Expand Up @@ -307,9 +307,9 @@ jobs:
if: ${{ !cancelled() }}
id: stop_tests_release_minio
run: |
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-release/src/infinity | xargs echo)
sudo chmod +x scripts/timeout_kill.sh
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 10 ${pids}"
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-release/src/infinity | xargs echo)
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 15 ${pids}"
if [ $? -ne 0 ]; then
echo "Failed to kill infinity debug version"
exit 1
Expand Down Expand Up @@ -337,9 +337,9 @@ jobs:
if: ${{ !cancelled() }}
id: stop_tests_release
run: |
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-release/src/infinity | xargs echo)
sudo chmod +x scripts/timeout_kill.sh
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 10 ${pids}"
pids=$(sudo docker exec ${BUILDER_CONTAINER} pgrep -f cmake-build-release/src/infinity | xargs echo)
sudo docker exec ${BUILDER_CONTAINER} bash -c "/infinity/scripts/timeout_kill.sh 15 ${pids}"
if [ $? -ne 0 ]; then
echo "Failed to kill infinity debug version"
exit 1
Expand Down
7 changes: 6 additions & 1 deletion scripts/timeout_kill.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ fi
# kill all infinity process
for pid in "${@:2}"; do
# Send SIGTERM
echo "Terminate pid: $pid"
kill -15 $pid
done

Expand All @@ -27,6 +28,7 @@ while true; do
# Check if all processes are still running
all_dead=true
for pid in "${@:2}"; do
echo "Check pid: $pid status"
if ps -p $pid > /dev/null; then
all_dead=false
break
Expand All @@ -43,7 +45,10 @@ while true; do
if [ $current_time -ge $end_time ]; then
echo "Some processes did not terminate in time. Sending SIGKILL."
for pid in "${@:2}"; do
kill -9 $pid
if ps -p $pid > /dev/null; then
echo "Pid: $pid didn't terminate"
kill -9 $pid
fi
done
exit 2 # Return a different value
fi
Expand Down
5 changes: 5 additions & 0 deletions src/main/cluster_manager_leader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ Status ClusterManager::AddNodeInfo(const SharedPtr<NodeInfo> &other_node) {
return Status::DuplicateNode(other_node_name);
}

if (other_node->node_ip() == this_node_->node_ip() and other_node->node_port() == this_node_->node_port()) {
return Status::InvalidServerAddress(
fmt::format("Follower or learner peer server address {}: {} are same as leader", this_node_->node_ip(), this_node_->node_port()));
}

// Add by register
auto iter = other_node_map_.find(other_node_name);
if (iter != other_node_map_.end()) {
Expand Down
2 changes: 1 addition & 1 deletion src/main/cluster_manager_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ Status ClusterManager::RegisterToLeaderNoLock() {
Status status = Status::OK();
if (register_peer_task->error_code_ != 0) {
status.code_ = static_cast<ErrorCode>(register_peer_task->error_code_);
status.msg_ = MakeUnique<String>(register_peer_task->error_message_);
status.msg_ = MakeUnique<String>(fmt::format("From leader: {}", register_peer_task->error_message_));
return status;
}
auto now = std::chrono::system_clock::now();
Expand Down
1 change: 1 addition & 0 deletions src/network/peer_server_thrift_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ void PeerServerThriftService::Register(infinity_peer_server::RegisterResponse &r
response.leader_term = leader_node->leader_term();
response.heart_beat_interval = leader_node->heartbeat_interval();
} else {
LOG_ERROR(fmt::format("Node: {} fail to register with leader, error: {}", request.node_name, status.message()));
response.error_code = static_cast<i64>(status.code());
response.error_message = status.message();
}
Expand Down
18 changes: 10 additions & 8 deletions src/network/peer_thrift_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,17 +458,17 @@ void PeerClient::SyncLogs(SyncLogTask *peer_task) {
LOG_ERROR(fmt::format("Sync log to node: {}, error: {}", peer_task->node_name_, peer_task->error_message_));
}
} catch (apache::thrift::transport::TTransportException &thrift_exception) {
peer_task->error_message_ = thrift_exception.what();
peer_task->error_message_ = fmt::format("Sync log to node, transport error: {}, error: {}", peer_task->node_name_, thrift_exception.what());
peer_task->error_code_ = static_cast<i64>(ErrorCode::kCantConnectServer);
LOG_ERROR(fmt::format("Sync log to node, transport error: {}, error: {}", peer_task->node_name_, peer_task->error_message_));
LOG_ERROR(peer_task->error_message_);
Status status = InfinityContext::instance().cluster_manager()->UpdateNodeByLeader(peer_task->node_name_, UpdateNodeOp::kLostConnection);
if (!status.ok()) {
LOG_ERROR(status.message());
}
} catch (apache::thrift::TApplicationException &application_exception) {
peer_task->error_message_ = application_exception.what();
peer_task->error_message_ = fmt::format("Sync log to node, application: {}, error: {}", peer_task->node_name_, application_exception.what());
peer_task->error_code_ = static_cast<i64>(ErrorCode::kCantConnectServer);
LOG_ERROR(fmt::format("Sync log to node, application: {}, error: {}", peer_task->node_name_, peer_task->error_message_));
LOG_ERROR(peer_task->error_message_);
Status status = InfinityContext::instance().cluster_manager()->UpdateNodeByLeader(peer_task->node_name_, UpdateNodeOp::kLostConnection);
if (!status.ok()) {
LOG_ERROR(status.message());
Expand Down Expand Up @@ -500,18 +500,20 @@ void PeerClient::ChangeRole(ChangeRoleTask *change_role_task) {
LOG_ERROR(fmt::format("Sync log to node: {}, error: {}", change_role_task->node_name_, change_role_task->error_message_));
}
} catch (apache::thrift::transport::TTransportException &thrift_exception) {
change_role_task->error_message_ = thrift_exception.what();
change_role_task->error_message_ =
fmt::format("Sync log to node, transport error: {}, error: {}", change_role_task->node_name_, thrift_exception.what());
change_role_task->error_code_ = static_cast<i64>(ErrorCode::kCantConnectServer);
LOG_ERROR(fmt::format("Sync log to node, transport error: {}, error: {}", change_role_task->node_name_, change_role_task->error_message_));
LOG_ERROR(change_role_task->error_message_);
Status status =
InfinityContext::instance().cluster_manager()->UpdateNodeByLeader(change_role_task->node_name_, UpdateNodeOp::kLostConnection);
if (!status.ok()) {
LOG_ERROR(status.message());
}
} catch (apache::thrift::TApplicationException &application_exception) {
change_role_task->error_message_ = application_exception.what();
change_role_task->error_message_ =
fmt::format("Sync log to node, application: {}, error: {}", change_role_task->node_name_, application_exception.what());
change_role_task->error_code_ = static_cast<i64>(ErrorCode::kCantConnectServer);
LOG_ERROR(fmt::format("Sync log to node, application: {}, error: {}", change_role_task->node_name_, change_role_task->error_message_));
LOG_ERROR(change_role_task->error_message_);
Status status =
InfinityContext::instance().cluster_manager()->UpdateNodeByLeader(change_role_task->node_name_, UpdateNodeOp::kLostConnection);
if (!status.ok()) {
Expand Down

0 comments on commit fe98d28

Please sign in to comment.