Update README\RELEASE.md for 2.38.0/23.09 (#6357)

* Update README\RELEASE.md for 2.38.0/23.09 * Revert Windows changes
triton-inference-server · Sep 27, 2023 · 1412282 · 1412282
1 parent 19b0e6e
commit 1412282
Show file tree

Hide file tree

Showing 2 changed files with 158 additions and 38 deletions.
diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min
@@ -39,23 +39,6 @@ RUN powershell.exe Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope Loca
 RUN powershell.exe [Net.ServicePointManager]::Expect100Continue=$true;[Net.ServicePointManager]::SecurityProtocol=[Net.SecurityProtocolType]::Tls,[Net.SecurityProtocolType]::Tls11,[Net.SecurityProtocolType]::Tls12,[Net.SecurityProtocolType]::Ssl3;Invoke-Expression( New-Object System.Net.WebClient ).DownloadString('https://chocolatey.org/install.ps1')
 RUN choco install git docker unzip -y
 
-#
-# Installing CMake
-#
-ARG CMAKE_VERSION=3.27.1
-ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
-ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
-
-ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
-RUN unzip %CMAKE_FILE%.zip
-RUN move %CMAKE_FILE% "c:\CMake"
-RUN setx PATH "c:\CMake\bin;%PATH%"
-
-ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
-ENV VCPKG_TARGET_TRIPLET x64-windows
-
-LABEL CMAKE_VERSION=${CMAKE_VERSION}
-
 # Be aware that pip can interact badly with VS cmd shell so need to pip install before
 # vsdevcmd.bat (see https://bugs.python.org/issue38989)
 ARG PYTHON_VERSION=3.8.10
@@ -68,31 +51,44 @@ RUN pip install grpcio-tools
 
 LABEL PYTHON_VERSION=${PYTHON_VERSION}
 
-#
-# Installing Visual Studio BuildTools: VS17 2022
-#
-ARG BUILDTOOLS_VERSION
-# Download collect.exe in case of an install failure.
-ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"
-
-# Use the latest release channel. For more control, specify the location of an internal layout.
-ARG CHANNEL_URL=https://aka.ms/vs/17/release/channel
-ADD ${CHANNEL_URL} "C:\tmp\VisualStudio.chman"
-# Download the Build Tools bootstrapper.
-ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
-ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
-# Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
+# Download and install Build Tools for Visual Studio. The use of
+# powershell for the install seems to be required to make the command
+# wait for the install to complete before continuing. To avoid failures
+# caused by VS regressions we want to stick with a working
+# compiler. Currently this is 16.11.21. This page contains download
+# links for buildtools.
+# https://docs.microsoft.com/en-us/visualstudio/releases/2019/history#release-dates-and-build-numbers
+ARG BUILDTOOLS_VERSION=16.11.21
+ARG BUILDTOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/8f1eb024-006a-43f6-a372-0721f71058b3/cc5cc690ac094fbfa78dfb8e40089ba52056026579e8d8dc31e95e8ea5466df5/vs_BuildTools.exe
+ADD ${BUILDTOOLS_SOURCE} vs_buildtools.exe
 ARG VS_INSTALL_PATH_WP="C:\BuildTools"
-RUN vs_buildtools.exe --quiet --wait --norestart --nocache install --installPath %VS_INSTALL_PATH_WP% --channelUri "C:\tmp\VisualStudio.chman" --installChannelUri "C:\tmp\VisualStudio.chman" --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended --locale "En-us"
+RUN powershell.exe Start-Process -FilePath vs_buildtools.exe -ArgumentList "--wait","--quiet","--norestart","--nocache","--installPath","%VS_INSTALL_PATH_WP%","--channelUri","C:\tmp\doesnotexist.chman","--addProductLang","En-us","--add","Microsoft.VisualStudio.Workload.VCTools`;includeRecommended","--add","Microsoft.Component.MSBuild" -Wait -PassThru
 
 LABEL BUILDTOOLS_VERSION=${BUILDTOOLS_VERSION}
 
 WORKDIR /
 
+#
+# Installing CMake
+#
+ARG CMAKE_VERSION=3.26.1
+ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
+ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
+
+ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
+RUN unzip %CMAKE_FILE%.zip
+RUN move %CMAKE_FILE% CMake
+RUN setx PATH "c:\CMake\bin;%PATH%"
+
+ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
+ENV VCPKG_TARGET_TRIPLET x64-windows
+
+LABEL CMAKE_VERSION=${CMAKE_VERSION}
+
 #
 # Installing Vcpkg
 #
-ARG VCPGK_VERSION=2023.07.21
+ARG VCPGK_VERSION=2022.11.14
 RUN git clone --single-branch --depth=1 -b %VCPGK_VERSION% https://github.com/microsoft/vcpkg.git
 WORKDIR /vcpkg
 RUN bootstrap-vcpkg.bat
@@ -104,12 +100,13 @@ LABEL VCPGK_VERSION=${VCPGK_VERSION}
 
 WORKDIR /
 
+
 #
 # Installing CUDA
 #
 ARG CUDA_MAJOR=12
 ARG CUDA_MINOR=2
-ARG CUDA_PATCH=1
+ARG CUDA_PATCH=0
 ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
 ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
                    cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
@@ -130,14 +127,14 @@ ADD ${CUDA_SOURCE} cuda_${CUDA_VERSION}_windows_network.exe
 RUN cuda_%CUDA_VERSION%_windows_network.exe -s %CUDA_PACKAGES%
 # Copy the CUDA visualstudio integration from where it was installed
 # into the appropriate place in BuildTools
-RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensions\*" "%VS_INSTALL_PATH_WP%\MSBuild\Microsoft\VC\v170\BuildCustomizations"
+RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensions\*" "%VS_INSTALL_PATH_WP%\MSBuild\Microsoft\VC\v160\BuildCustomizations"
 
 RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
 
 LABEL CUDA_VERSION="${CUDA_VERSION}"
 
 #
-# Installing TensorRT
+# Installing Tensorrt
 #
 ARG TENSORRT_VERSION=8.6.1.6
 ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.0.zip"
@@ -155,9 +152,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
 
 
 #
-# Installing cuDNN
+# Installing CUDNN
 #
-ARG CUDNN_VERSION=8.9.5.27
+ARG CUDNN_VERSION=8.9.4.25
 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
 ARG CUDNN_SOURCE=${CUDNN_ZIP}
 

diff --git a/RELEASE.md b/RELEASE.md
@@ -0,0 +1,123 @@
+<!--
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
+
+# Release Notes for 2.38.0
+
+## New Freatures and Improvements
+
+* Triton now has Python bindings for the C API. Please refer to 
+  [this PR](https://github.com/triton-inference-server/core/pull/265) for 
+  usage.
+
+* Triton now forwards request parameters to each of the composing models of an 
+  ensemble model.
+
+* The Filesystem API now supports named temporary cache directories when 
+  downloading models using the repository agent.
+
+* Added the number of requests currently in the queue to the metrics API. 
+  Documentation can be found 
+  [here](https://github.com/triton-inference-server/server/blob/r23.09/docs/user_guide/metrics.md#pending-request-count-queue-size-per-model).
+
+* Python backend models can now respond with error codes in addition to error 
+  messages.
+
+* TensorRT backend now supports 
+  [TensortRT version compatibility](https://github.com/triton-inference-server/tensorrt_backend/tree/r23.09#command-line-options) 
+  across models generated with the same major version of TensorRT. Use the 
+  `--backend-config=tensorrt,--version-compatible=true` flag to enable this 
+  feature. 
+
+* Triton’s backend API now supports accessing the inference response outputs by 
+  name or by index. See the new API 
+  [here](https://github.com/triton-inference-server/core/blob/r23.09/include/triton/core/tritonbackend.h#L1572-L1608).
+
+* The Python backend now supports loading 
+  [Pytorch models directly](https://github.com/triton-inference-server/python_backend/tree/r23.08#pytorch-platform-experimental). 
+  This feature is experimental and should be treated as Beta.
+
+* Fixed an issue where if the user didn't call `SetResponseReleaseCallback`, 
+  canceling a new request could cancel the old response factory as well. Now 
+  when canceling a request which is being re-used, a new response factory is 
+  created for each inference.
+
+* Refer to the 23.09 column of the 
+  [Frameworks Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html) 
+  for container image versions on which the 23.09 inference server container is 
+  based.
+
+## Known Issues
+
+* When using decoupled models, there is a possibility that response order as 
+  sent from the backend may not match with the order in which these responses 
+  are received by the streaming gRPC client. Note that this only applies to 
+  responses from different requests. Any responses corresponding to the same 
+  request will still be received in their expected order, relative to each 
+  other.
+
+* The FasterTransformer backend is only officially supported for 22.12, though 
+  it can be built for Triton container versions up to 23.07. 
+
+* The Java CAPI is known to have intermittent segfaults we’re looking for a 
+  root cause.
+
+* Some systems which implement `malloc()` may not release memory back to the 
+  operating system right away causing a false memory leak. This can be mitigated 
+  by using a different malloc implementation. Tcmalloc and jemalloc are 
+  installed in the Triton container and can be 
+  [used by specifying the library in LD_PRELOAD](https://github.com/triton-inference-server/server/blob/r22.12/docs/user_guide/model_management.md). 
+  We recommend experimenting with both `tcmalloc` and `jemalloc` to determine 
+  which one works better for your use case.
+
+* Auto-complete may cause an increase in server start time. To avoid a start 
+  time increase, users can provide the full model configuration and launch the 
+  server with `--disable-auto-complete-config`.
+
+* Auto-complete does not support PyTorch models due to lack of metadata in the 
+  model. It can only verify that the number of inputs and the input names 
+  matches what is specified in the model configuration. There is no model 
+  metadata about the number of outputs and datatypes. Related PyTorch bug: 
+  https://github.com/pytorch/pytorch/issues/38273
+
+* Triton Client PIP wheels for ARM SBSA are not available from PyPI and pip 
+  will install an incorrect Jetson version of Triton Client library for Arm 
+  SBSA. The correct client wheel file can be pulled directly from the Arm SBSA 
+  SDK image and manually installed.
+
+* Traced models in PyTorch seem to create overflows when int8 tensor values are 
+  transformed to int32 on the GPU. Refer to 
+  https://github.com/pytorch/pytorch/issues/66930 for more information.
+
+* Triton cannot retrieve GPU metrics with MIG-enabled GPU devices (A100 and A30).
+
+* Triton metrics might not work if the host machine is running a separate DCGM 
+  agent on bare-metal or in a container.
+
+* When cloud storage (AWS, GCS, AZURE) is used as a model repository and a model 
+  has multiple versions, Triton creates an extra local copy of the cloud model’s 
+  folder in the temporary directory, which is deleted upon server’s shutdown.