diff --git a/behaviortree_ros2/CMakeLists.txt b/behaviortree_ros2/CMakeLists.txt index 314edd0..c2e8586 100644 --- a/behaviortree_ros2/CMakeLists.txt +++ b/behaviortree_ros2/CMakeLists.txt @@ -62,4 +62,13 @@ install( ament_export_include_directories(include) ament_export_libraries(${PROJECT_NAME}) +if(BUILD_TESTING) + find_package(ament_cmake_gtest REQUIRED) + + ament_add_gtest(test_cancel_goal_race + test/test_cancel_goal_race.cpp) + target_link_libraries(test_cancel_goal_race + ${PROJECT_NAME}) +endif() + ament_package() diff --git a/behaviortree_ros2/include/behaviortree_ros2/bt_action_node.hpp b/behaviortree_ros2/include/behaviortree_ros2/bt_action_node.hpp index 11bfc96..dad092e 100644 --- a/behaviortree_ros2/include/behaviortree_ros2/bt_action_node.hpp +++ b/behaviortree_ros2/include/behaviortree_ros2/bt_action_node.hpp @@ -23,6 +23,7 @@ #include "behaviortree_cpp/action_node.h" #include "behaviortree_cpp/bt_factory.h" #include "rclcpp_action/rclcpp_action.hpp" +#include "rclcpp_action/exceptions.hpp" #include "behaviortree_ros2/ros_node_params.hpp" @@ -558,21 +559,35 @@ inline void RosActionNode::cancelGoal() auto& action_client = client_instance_->action_client; - auto future_result = action_client->async_get_result(goal_handle_); - auto future_cancel = action_client->async_cancel_goal(goal_handle_); + try + { + auto future_result = action_client->async_get_result(goal_handle_); + auto future_cancel = action_client->async_cancel_goal(goal_handle_); - constexpr auto SUCCESS = rclcpp::FutureReturnCode::SUCCESS; + constexpr auto SUCCESS = rclcpp::FutureReturnCode::SUCCESS; - if(executor.spin_until_future_complete(future_cancel, server_timeout_) != SUCCESS) - { - RCLCPP_ERROR(logger(), "Failed to cancel action server for [%s]", - action_name_.c_str()); - } + if(executor.spin_until_future_complete(future_cancel, server_timeout_) != SUCCESS) + { + RCLCPP_ERROR(logger(), "Failed to cancel action server for [%s]", + action_name_.c_str()); + } - if(executor.spin_until_future_complete(future_result, server_timeout_) != SUCCESS) + if(executor.spin_until_future_complete(future_result, server_timeout_) != SUCCESS) + { + RCLCPP_ERROR(logger(), "Failed to get result call failed :( for [%s]", + action_name_.c_str()); + } + } + catch(const rclcpp_action::exceptions::UnknownGoalHandleError& e) { - RCLCPP_ERROR(logger(), "Failed to get result call failed :( for [%s]", - action_name_.c_str()); + // The action server's terminal result arrived and the rclcpp_action + // client's result callback has already erased the goal from its + // internal registry before we reached async_get_result / + // async_cancel_goal. This is a benign race: the goal has already + // reached a terminal state, which is exactly what cancelGoal is + // trying to achieve. + RCLCPP_DEBUG(logger(), "Goal already terminal at cancel time for [%s]: %s", + action_name_.c_str(), e.what()); } } diff --git a/behaviortree_ros2/package.xml b/behaviortree_ros2/package.xml index a7e6bd4..a6d7018 100644 --- a/behaviortree_ros2/package.xml +++ b/behaviortree_ros2/package.xml @@ -20,6 +20,8 @@ generate_parameter_library btcpp_ros2_interfaces + ament_cmake_gtest + ament_cmake diff --git a/behaviortree_ros2/test/test_cancel_goal_race.cpp b/behaviortree_ros2/test/test_cancel_goal_race.cpp new file mode 100644 index 0000000..bcee685 --- /dev/null +++ b/behaviortree_ros2/test/test_cancel_goal_race.cpp @@ -0,0 +1,221 @@ +// Copyright (c) 2026 Davide Faconti +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Regression test for the fast-cancel race between +// `RosActionNode::cancelGoal()` and the `rclcpp_action` client's result +// callback. +// +// Background: `cancelGoal()` calls `async_get_result()` and +// `async_cancel_goal()` on the action client. If the server produces a +// terminal result (including one accepted via cancel) before those calls +// run on the BT thread, the client's result-callback has already erased +// the goal from its internal registry and both calls throw +// `rclcpp_action::exceptions::UnknownGoalHandleError`. The exception used +// to propagate out of `halt()` and cause `TreeExecutionServer` to abort +// the outer goal instead of canceling it. + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "behaviortree_ros2/bt_action_node.hpp" +#include "btcpp_ros2_interfaces/action/sleep.hpp" + +using namespace std::chrono_literals; +using Sleep = btcpp_ros2_interfaces::action::Sleep; + +namespace +{ + +class SleepLeaf : public BT::RosActionNode +{ +public: + SleepLeaf(const std::string& name, const BT::NodeConfig& conf, + const BT::RosNodeParams& params) + : BT::RosActionNode(name, conf, params) + {} + + bool setGoal(Goal& goal) override + { + goal.msec_timeout = 10000; // deliberately long: server must not finish normally + return true; + } + + BT::NodeStatus onResultReceived(const WrappedResult& /*wr*/) override + { + return BT::NodeStatus::SUCCESS; + } + + BT::NodeStatus onFeedback(const std::shared_ptr /*fb*/) override + { + return BT::NodeStatus::RUNNING; + } + + BT::NodeStatus onFailure(BT::ActionNodeErrorCode /*err*/) override + { + return BT::NodeStatus::FAILURE; + } +}; + +class FastCancelRaceTest : public ::testing::Test +{ +protected: + static void SetUpTestSuite() + { + if(!rclcpp::ok()) + { + rclcpp::init(0, nullptr); + } + } + static void TearDownTestSuite() + { + if(rclcpp::ok()) + { + rclcpp::shutdown(); + } + } + + void SetUp() override + { + bt_node_ = std::make_shared("bt_cancel_race_test"); + server_node_ = std::make_shared("server_cancel_race_test"); + + using GoalHandle = rclcpp_action::ServerGoalHandle; + + action_server_ = rclcpp_action::create_server( + server_node_, "/sleep", + [this](const rclcpp_action::GoalUUID&, std::shared_ptr) { + goal_received_ = true; + return rclcpp_action::GoalResponse::ACCEPT_AND_EXECUTE; + }, + [this](const std::shared_ptr) { + cancel_received_ = true; + return rclcpp_action::CancelResponse::ACCEPT; + }, + [this](const std::shared_ptr goal_handle) { + // Detach a thread that waits up to ~50 ms for a cancel request, + // then completes the goal synchronously once it sees one. This + // is the shape of the race: the server's `canceled()` call on + // the client side fires the result callback, which erases the + // goal from `goal_handles_` on the rclcpp_action client + // registry. If `halt()` -> `cancelGoal()` reaches + // `async_get_result` / `async_cancel_goal` after that erase, + // both calls used to throw `UnknownGoalHandleError`. + std::thread([goal_handle, this]() { + for(int i = 0; i < 50 && !cancel_received_.load(); ++i) + { + std::this_thread::sleep_for(5ms); + } + auto result = std::make_shared(); + result->done = false; + if(cancel_received_.load()) + { + goal_handle->canceled(result); + } + else + { + result->done = true; + goal_handle->succeed(result); + } + }).detach(); + }); + + exec_ = std::make_unique( + rclcpp::ExecutorOptions(), 2, false, std::chrono::milliseconds(50)); + exec_->add_node(bt_node_); + exec_->add_node(server_node_); + spinner_ = std::thread([this]() { exec_->spin(); }); + } + + void TearDown() override + { + if(exec_) + { + exec_->cancel(); + } + if(spinner_.joinable()) + { + spinner_.join(); + } + exec_.reset(); + } + + BT::BehaviorTreeFactory makeFactory() + { + BT::RosNodeParams params; + params.nh = bt_node_; + params.server_timeout = 1000ms; + params.wait_for_server_timeout = 2000ms; + params.default_port_value = "/sleep"; + + BT::BehaviorTreeFactory factory; + factory.registerNodeType("SleepLeaf", params); + return factory; + } + + std::shared_ptr bt_node_; + std::shared_ptr server_node_; + rclcpp_action::Server::SharedPtr action_server_; + std::unique_ptr exec_; + std::thread spinner_; + + std::atomic goal_received_{ false }; + std::atomic cancel_received_{ false }; +}; + +} // namespace + +// Regression test for the UnknownGoalHandleError fast-cancel race. +// `tree.haltTree()` must not throw when the server completes the cancel +// handshake synchronously from its own thread while the BT thread is +// still reaching the `cancelGoal()` body. +TEST_F(FastCancelRaceTest, HaltDoesNotThrowOnFastCancel) +{ + auto factory = makeFactory(); + auto tree = factory.createTreeFromText(R"( + + + +)"); + + // Tick until the server has accepted the goal. This ensures + // `goal_handle_` is populated inside the BT leaf before we halt. + tree.tickOnce(); + for(int i = 0; i < 100 && !goal_received_.load(); ++i) + { + std::this_thread::sleep_for(5ms); + tree.tickOnce(); + } + ASSERT_TRUE(goal_received_.load()) << "Server never received the goal; precondition " + "for the race " + "reproducer was not met."; + + // The halt must not leak `UnknownGoalHandleError` (or any other + // exception). Before the fix, this throws with + // "Goal handle is not known to this client.". + EXPECT_NO_THROW(tree.haltTree()); + + // The server must have actually received the cancel request — otherwise + // we'd be testing the non-racing path. + EXPECT_TRUE(cancel_received_.load()); +}