使用 tritonserver:22.12 版本编译对应版本的 libtriton_onnxruntime.so backend
1. pull tritonserver docker 镜像
docker pull nvcr.io/nvidia/tritonserver:22.12-py3
2. 安装新版本的 cmake
wget http://server:13680/download/codes/CMake-3.30.1.tar.gz
tar xzf CMake-3.30.1.tar.gz
cd CMake-3.30.1 && ./bootstrap && make && make install
3. clone onnxruntime_backend
git clone https://github.com/triton-inference-server/onnxruntime_backend.git
4. 切换到分支 22.12
cd onnxruntime_backend && git checkout r22.12
5. 首次尝试编译,产生 Dockerfile.ort
mkdir -p build && \
cd build && \
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
-DTRITON_BUILD_ONNXRUNTIME_VERSION=1.9.0 \
-DTRITON_BUILD_CONTAINER_VERSION=21.08 \
-DTRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON \
-DTRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON \
-DTRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION=2021.2.200 .. \
-DONNXRUNTIME_LIBRARY=/opt/tritonserver/backends/onnxruntime/ \
-DOV_LIBRARY=/opt/tritonserver/backends/onnxruntime/ && \
make install
在 build 目录下会产生 Dockerfile.ort
文件。这个文件的作用是使用 docker 容器来编译 onnxruntime 依赖的库。但是呢在镜像 nvcr.io/nvidia/tritonserver:22.12-py3
里面其实已经有这些库了。所以不需要再通过 docker 容器编译一次了。
在产生的这个 Dockerfile.ort
文件中,从第 66 行之后就是 copy 编译完成的文件了,所以只需要手动把第 66 行之后的 .h
头文件复制一下就可以了。而那些 .so
文件在 /opt/tritonserver/backends/onnxruntime/
目录里面都有。
而这些 .h
头文件的位置就在仓库 https://github.com/microsoft/onnxruntime
里面。
git clone -b rel-1.9.0 --recursive https://github.com/microsoft/onnxruntime onnxruntime
从 Dockerfile.ort 节选的脚本
#!/bin/bash
BASE_IMAGE=nvcr.io/nvidia/tritonserver:21.08-py3-min
ONNXRUNTIME_VERSION=1.9.0
ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime
ONNXRUNTIME_BUILD_CONFIG=Release
ONNXRUNTIME_OPENVINO_VERSION=2021.2.200
mkdir -p /workspace && \
cd /workspace && \
git clone -b rel-${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime && (cd onnxruntime && git submodule update --init --recursive) && \
cd /workspace/onnxruntime
#
# Copy all artifacts needed by the backend to /opt/onnxruntime
#
mkdir -p /opt/onnxruntime && \
cd /opt/onnxruntime && \
cp /workspace/onnxruntime/LICENSE /opt/onnxruntime && \
cat /workspace/onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt
# ONNX Runtime headers, libraries and binaries
mkdir -p /opt/onnxruntime/include && \
cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h /opt/onnxruntime/include && \
cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h /opt/onnxruntime/include && \
cp /workspace/onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h /opt/onnxruntime/include
# TensorRT specific headers and libraries
cp /workspace/onnxruntime/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h /opt/onnxruntime/include
# OpenVino specific headers and libraries
cp -r /opt/intel/openvino_2022/docs/licensing /opt/onnxruntime/LICENSE.openvino
cp /workspace/onnxruntime/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h /opt/onnxruntime/include
6. 在 onnxruntime_backend/CMakeLists.txt
文件中增加 link 配置
6.1 增加版本信息
103 set(TRITON_BACKEND_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/backend repo")
104 set(TRITON_CORE_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/core repo")
105 set(TRITON_COMMON_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/common repo")
6.2 增加 include 和 lib 配置
修改 onnxruntime_backend/CMakeLists.txt
文件,需要改动两个地方
1. 删除 docker 开关,在第 123 行,把 docker 关改成 OFF
1. 增加 link 目录配置,在第 107 行左右增加两个配置。
- INCLUDE_DIRECTORIES
是上一步 copy 出来的 .h
头文件的目录
- LINK_DIRECTORIES
是 libtriton_onnxruntime.so 依赖的 onnx 库的目录。
INCLUDE_DIRECTORIES (
${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-dev/include
)
LINK_DIRECTORIES (
/opt/tritonserver/backends/onnxruntime/
)
6.3 增加 crypt 库
在 target_link_libraries
的位置增加 crypt
target_link_libraries(
triton-onnxruntime-backend
PRIVATE
triton-core-serverapi # from repo-core
triton-core-backendapi # from repo-core
triton-core-serverstub # from repo-core
triton-backend-utils # from repo-backend
${TRITON_ONNXRUNTIME_LDFLAGS}
${ONNXRUNTIME_LIBRARY}
crypt
)
6.x 可能需要的依赖包
libtbb-dev
-apt install -y libtbb-dev
7. 开始正式编译
mkdir -p build && \
cd build && \
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
-DTRITON_BUILD_ONNXRUNTIME_VERSION=1.9.0 \
-DTRITON_BUILD_CONTAINER_VERSION=21.08 \
-DTRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON \
-DTRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON \
-DTRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION=2021.2.200 .. \
-DONNXRUNTIME_LIBRARY=/opt/tritonserver/backends/onnxruntime/ \
-DOV_LIBRARY=/opt/tritonserver/backends/onnxruntime/ && \
make install
生成的 so 文件为:build/libtriton_onnxruntime.so
8. 加载模型文件的位置
onnxruntime_backend/src/onnxruntime.cc
第 632 行左右onnxruntime_backend/src/onnxruntime_loader.cc
第 190 行左右