test: add default_tools test, adapt tool set

felixocker · felixocker · commit c85f246e43c0 · 2025-02-22T00:10:23.000+01:00
diff --git a/tests/example_tools.py b/tests/example_tools.py
@@ -84,3 +84,13 @@ def slow(duration: int) -> str:
 
     time.sleep(duration)
     return "Done"
+
+
+def speak(text: str) -> str:
+    """
+    Loudly say something to the user via speakers.
+
+    :param text: The text to speak.
+    :return: The quotient of a and b.
+    """
+    return f"Successfully said `{text}`."
diff --git a/tests/test_tool_library.py b/tests/test_tool_library.py
@@ -52,6 +52,7 @@ def test_init(self):
                 "multiply",
                 "divide",
                 "slow",
+                "speak",
             },
             "Initializing tool library with functions failed.",
         )
@@ -71,6 +72,7 @@ def test_init_verbose_id(self):
                 "tests__example_tools__multiply",
                 "tests__example_tools__divide",
                 "tests__example_tools__slow",
+                "tests__example_tools__speak",
             },
             "Initializing tool library with functions failed.",
         )
@@ -128,7 +130,7 @@ def test_init_file_unspecified(self):
         functions = tulip.collection.get(include=[])["ids"]
         self.assertEqual(
             len(functions),
-            5,
+            6,
             "Functions from file were not added in first initialization.",
         )
         tulip = ToolLibrary(chroma_sub_dir="test/")
@@ -232,6 +234,7 @@ def test_load_file(self):
                 "multiply",
                 "divide",
                 "slow",
+                "speak",
             },
             "Loading entire file failed.",
         )
@@ -294,6 +297,7 @@ def test_remove_tool(self):
                 "multiply",
                 "divide",
                 "slow",
+                "speak",
             },
             "Removing function failed.",
         )
@@ -313,9 +317,7 @@ def test_execute(self):
         tulip = ToolLibrary(
             chroma_sub_dir="test/", file_imports=[("tests.example_tools", ["multiply"])]
         )
-        res, error = tulip.execute(
-            tool_id="multiply", arguments={"a": 2.0, "b": 2.0}
-        )
+        res, error = tulip.execute(tool_id="multiply", arguments={"a": 2.0, "b": 2.0})
         self.assertEqual(error, False, "Function execution failed.")
         self.assertEqual(
             res,
@@ -329,9 +331,7 @@ def test_execute_timeout(self):
             file_imports=[("tests.example_tools", ["slow"])],
             default_timeout=1,
         )
-        res, error = tulip.execute(
-            tool_id="slow", arguments={"duration": 2}
-        )
+        res, error = tulip.execute(tool_id="slow", arguments={"duration": 2})
         self.assertEqual(error, True, "Function execution succeeded despite timeout.")
         self.assertEqual(
             res,
@@ -343,9 +343,7 @@ def test_execute_unknown_tool(self):
         tulip = ToolLibrary(
             chroma_sub_dir="test/", file_imports=[("tests.example_tools", [])]
         )
-        res, error = tulip.execute(
-            tool_id="unknown", arguments={}
-        )
+        res, error = tulip.execute(tool_id="unknown", arguments={})
         self.assertEqual(error, True, "Calling unknown function not caught.")
         self.assertEqual(
             res,
@@ -357,9 +355,7 @@ def test_execute_invalid_arguments(self):
         tulip = ToolLibrary(
             chroma_sub_dir="test/", file_imports=[("tests.example_tools", ["multiply"])]
         )
-        res, error = tulip.execute(
-            tool_id="multiply", arguments={"a": 1, "wrong": 2}
-        )
+        res, error = tulip.execute(tool_id="multiply", arguments={"a": 1, "wrong": 2})
         self.assertEqual(
             error, True, "Function execution succeeded despite wrong arguments."
         )
diff --git a/tests/test_tulip_agent.py b/tests/test_tulip_agent.py
@@ -127,6 +127,25 @@ def test_cot_tulip_query_with_instance(self):
             "LLM query failed.",
         )
 
+    def test_default_tools(self):
+        character = (
+            "You must solve the task provided by the user using a tool. "
+            "Eventually use the speak function to tell them the result."
+        )
+        agent = MinimalTulipAgent(
+            tool_library=self.tulip,
+            default_tools=[self.tulip.tools["speak"]],
+            top_k_functions=1,
+            instructions=character,
+        )
+        res = agent.query(prompt="What is 2+2?")
+        self.assertTrue(
+            any(s in res.lower() for s in ("4", "four"))
+            and agent.messages[-2]["role"] == "tool"
+            and agent.messages[-2]["name"] == "speak",
+            "Using default_tool failed.",
+        )
+
 
 if __name__ == "__main__":
     unittest.main()