LLM Load balance #252

SciSharp · Jan 22, 2024 · 3dad385 · 3dad385
1 parent 844ed86
commit 3dad385
Show file tree

Hide file tree

Showing 5 changed files with 71 additions and 11 deletions.
diff --git a/docs/llm/provider.md b/docs/llm/provider.md
@@ -1,6 +1,31 @@
 # LLM Provider
 
-`BotSharp` can support multiple LLM providers through plug-ins.
+`BotSharp` can support multiple LLM providers through plug-ins, one `provider` could contain several `model` settings.
+```json
+[{
+  "Provider": "azure-openai",
+  "Models": [
+    {
+      "Id": "",
+      "Name": "gpt-35-turbo",
+      "Group": "",
+      "ApiKey": "",
+      "Endpoint": "https://gpt-35-turbo.openai.azure.com/",
+      "Type": "chat",
+      "PromptCost": 0.0015,
+      "CompletionCost": 0.002
+    },
+    {
+      "Name": "gpt-35-turbo-instruct",
+      "ApiKey": "",
+      "Endpoint": "https://gpt-35-turbo-instruct.openai.azure.com/",
+      "Type": "text",
+      "PromptCost": 0.0015,
+      "CompletionCost": 0.002
+    }
+  ]
+}]
+```
 
 You can set the names of `Provider` and `Model` in each round of dialogue to control the LLM that should be used in the current dialogue, or you can also specify the LLM used in subsequent dialogues once during dialogue initialization.
 
@@ -10,4 +35,8 @@ You can set the names of `Provider` and `Model` in each round of dialogue to con
   "provider": "google-ai",
   "model": "palm2"
 }
-```
+```
+
+## Load balancing
+
+If you have deployed models with the same functions in multiple regions and want to establish a load balance among these regions to reduce the resource constraints of large model providers, you need to set a consistent Group value in the model configuration.
diff --git a/src/Infrastructure/BotSharp.Abstraction/BotSharp.Abstraction.csproj b/src/Infrastructure/BotSharp.Abstraction/BotSharp.Abstraction.csproj
@@ -24,11 +24,11 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.AspNetCore.Http.Abstractions" Version="2.2.0" />
-    <PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="8.0.0" />
+    <PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="8.0.1" />
     <PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="8.0.0" />
     <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
     <PackageReference Include="System.ComponentModel.Annotations" Version="5.0.0" />
-    <PackageReference Include="System.Text.Json" Version="8.0.0" />
+    <PackageReference Include="System.Text.Json" Version="8.0.1" />
     <PackageReference Include="Serilog.Sinks.Console" Version="5.0.1" />
     <PackageReference Include="Serilog.Sinks.File" Version="5.0.0" />
   </ItemGroup>

diff --git a/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs b/src/Infrastructure/BotSharp.Abstraction/MLTasks/Settings/LlmModelSetting.cs
@@ -2,8 +2,22 @@ namespace BotSharp.Abstraction.MLTasks.Settings;
 
 public class LlmModelSetting
 {
-    public string Id { get; set; }
+    /// <summary>
+    /// Model Id, like "gpt-3.5" and "gpt-4".
+    /// </summary>
+    public string? Id { get; set; }
+
+    /// <summary>
+    /// Deployment model name
+    /// </summary>
     public string Name { get; set; }
+
+    /// <summary>
+    /// Deployment same functional model in a group.
+    /// It can be used to deploy same model in different regions.
+    /// </summary>
+    public string? Group { get; set; }
+
     public string ApiKey { get; set; }
     public string Endpoint { get; set; }
     public LlmModelType Type { get; set; } = LlmModelType.Chat;

diff --git a/src/Infrastructure/BotSharp.Core/Infrastructures/LlmProviderService.cs b/src/Infrastructure/BotSharp.Core/Infrastructures/LlmProviderService.cs
@@ -41,20 +41,37 @@ public List<LlmModelSetting> GetProviderModels(string provider)
     public LlmModelSetting? GetSetting(string provider, string model)
     {
         var settings = _services.GetRequiredService<List<LlmProviderSetting>>();
-        var providerSetting = settings.FirstOrDefault(p => p.Provider.Equals(provider, StringComparison.CurrentCultureIgnoreCase));
+        var providerSetting = settings.FirstOrDefault(p => 
+            p.Provider.Equals(provider, StringComparison.CurrentCultureIgnoreCase));
         if (providerSetting == null)
         {
             _logger.LogError($"Can't find provider settings for {provider}");
             return null;
-        }
+        }        
 
-        var modelSetting = providerSetting.Models.FirstOrDefault(m => m.Name.Equals(model, StringComparison.CurrentCultureIgnoreCase));
+        var modelSetting = providerSetting.Models.FirstOrDefault(m => 
+            m.Name.Equals(model, StringComparison.CurrentCultureIgnoreCase));
         if (modelSetting == null)
         {
             _logger.LogError($"Can't find model settings for {provider}.{model}");
             return null;
         }
 
+        // load balancing
+        if (!string.IsNullOrEmpty(modelSetting.Group))
+        {
+            // find the models in the same group
+            var models = providerSetting.Models
+                .Where(m => !string.IsNullOrEmpty(m.Group) && 
+                    m.Group.Equals(modelSetting.Group, StringComparison.CurrentCultureIgnoreCase))
+                .ToList();
+
+            // pick one model randomly
+            var random = new Random();
+            var index = random.Next(0, models.Count());
+            modelSetting = models.ElementAt(index);
+        }
+
         return modelSetting;
     }
 }
diff --git a/tests/BotSharp.Plugin.PizzaBot/data/users/456e35c5-caf0-4d45-9084-b44a8ca717e4/user.json b/tests/BotSharp.Plugin.PizzaBot/data/users/456e35c5-caf0-4d45-9084-b44a8ca717e4/user.json
@@ -1,7 +1,7 @@
 { 
-  "userName": "haiping",
-  "firstName": "Haiping",
-  "lastName": "Chen",
+  "userName": "admin",
+  "firstName": "Administrator",
+  "lastName": "",
   "email": "admin@gmail.com",
   "salt": "55f8fafcf829479ca97e635937440fee",
   "password": "3b459ae9988628d41f0362c499a768dd",