编译 triton server 的 onnxruntime backend 插件

创建日期: 2024-09-13 16:57 | 作者: 风波 | 浏览次数: 21 | 分类: TritonServer

使用 tritonserver:22.12 版本编译对应版本的 libtriton_onnxruntime.so backend

1. pull tritonserver docker 镜像

docker pull nvcr.io/nvidia/tritonserver:22.12-py3

2. 安装新版本的 cmake

wget http://server:13680/download/codes/CMake-3.30.1.tar.gz

tar xzf CMake-3.30.1.tar.gz
cd CMake-3.30.1 && ./bootstrap && make && make install

3. clone onnxruntime_backend

git clone https://github.com/triton-inference-server/onnxruntime_backend.git

4. 切换到分支 22.12

cd onnxruntime_backend && git checkout r22.12

5. 首次尝试编译,产生 Dockerfile.ort

mkdir -p build && \
cd build && \
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
    -DTRITON_BUILD_ONNXRUNTIME_VERSION=1.9.0 \
    -DTRITON_BUILD_CONTAINER_VERSION=21.08 \
    -DTRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON \
    -DTRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON \
    -DTRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION=2021.2.200 .. \
    -DONNXRUNTIME_LIBRARY=/opt/tritonserver/backends/onnxruntime/ \
    -DOV_LIBRARY=/opt/tritonserver/backends/onnxruntime/ && \
make install

在 build 目录下会产生 Dockerfile.ort 文件。这个文件的作用是使用 docker 容器来编译 onnxruntime 依赖的库。但是呢在镜像 nvcr.io/nvidia/tritonserver:22.12-py3 里面其实已经有这些库了。所以不需要再通过 docker 容器编译一次了。

在产生的这个 Dockerfile.ort 文件中,从第 66 行之后就是 copy 编译完成的文件了,所以只需要手动把第 66 行之后的 .h头文件复制一下就可以了。而那些 .so 文件在 /opt/tritonserver/backends/onnxruntime/ 目录里面都有。

而这些 .h 头文件的位置就在仓库 https://github.com/microsoft/onnxruntime 里面。

git clone -b rel-1.9.0 --recursive https://github.com/microsoft/onnxruntime onnxruntime

从 Dockerfile.ort 节选的脚本

#!/bin/bash

BASE_IMAGE=nvcr.io/nvidia/tritonserver:21.08-py3-min
ONNXRUNTIME_VERSION=1.9.0
ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime
ONNXRUNTIME_BUILD_CONFIG=Release
ONNXRUNTIME_OPENVINO_VERSION=2021.2.200

mkdir -p /workspace && \
cd /workspace && \
git clone -b rel-${ONNXRUNTIME_VERSION} --recursive ${ONNXRUNTIME_REPO} onnxruntime &&         (cd onnxruntime && git submodule update --init --recursive) && \
cd /workspace/onnxruntime

#
# Copy all artifacts needed by the backend to /opt/onnxruntime
#
mkdir -p /opt/onnxruntime && \
cd /opt/onnxruntime && \
cp /workspace/onnxruntime/LICENSE /opt/onnxruntime && \
cat /workspace/onnxruntime/cmake/external/onnx/VERSION_NUMBER > /opt/onnxruntime/ort_onnx_version.txt

# ONNX Runtime headers, libraries and binaries
mkdir -p /opt/onnxruntime/include &&  \
cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h        /opt/onnxruntime/include && \
cp /workspace/onnxruntime/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h        /opt/onnxruntime/include && \
cp /workspace/onnxruntime/include/onnxruntime/core/providers/cpu/cpu_provider_factory.h        /opt/onnxruntime/include

# TensorRT specific headers and libraries
cp /workspace/onnxruntime/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h        /opt/onnxruntime/include 

# OpenVino specific headers and libraries
cp -r /opt/intel/openvino_2022/docs/licensing /opt/onnxruntime/LICENSE.openvino

cp /workspace/onnxruntime/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h        /opt/onnxruntime/include

6. 在 onnxruntime_backend/CMakeLists.txt 文件中增加 link 配置

6.1 增加版本信息

103 set(TRITON_BACKEND_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/backend repo")
104 set(TRITON_CORE_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/core repo")
105 set(TRITON_COMMON_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/common repo")

6.2 增加 include 和 lib 配置

修改 onnxruntime_backend/CMakeLists.txt 文件,需要改动两个地方 1. 删除 docker 开关,在第 123 行,把 docker 关改成 OFF 1. 增加 link 目录配置,在第 107 行左右增加两个配置。 - INCLUDE_DIRECTORIES 是上一步 copy 出来的 .h 头文件的目录 - LINK_DIRECTORIES 是 libtriton_onnxruntime.so 依赖的 onnx 库的目录。

INCLUDE_DIRECTORIES (
    ${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime-dev/include
    )
LINK_DIRECTORIES (
    /opt/tritonserver/backends/onnxruntime/
    )

6.3 增加 crypt 库

target_link_libraries 的位置增加 crypt

target_link_libraries(
  triton-onnxruntime-backend
  PRIVATE
    triton-core-serverapi   # from repo-core
    triton-core-backendapi  # from repo-core
    triton-core-serverstub  # from repo-core
    triton-backend-utils    # from repo-backend
    ${TRITON_ONNXRUNTIME_LDFLAGS}
    ${ONNXRUNTIME_LIBRARY}
    crypt
)

6.x 可能需要的依赖包

7. 开始正式编译

mkdir -p build && \
cd build && \
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install \
    -DTRITON_BUILD_ONNXRUNTIME_VERSION=1.9.0 \
    -DTRITON_BUILD_CONTAINER_VERSION=21.08 \
    -DTRITON_ENABLE_ONNXRUNTIME_TENSORRT=ON \
    -DTRITON_ENABLE_ONNXRUNTIME_OPENVINO=ON \
    -DTRITON_BUILD_ONNXRUNTIME_OPENVINO_VERSION=2021.2.200 .. \
    -DONNXRUNTIME_LIBRARY=/opt/tritonserver/backends/onnxruntime/ \
    -DOV_LIBRARY=/opt/tritonserver/backends/onnxruntime/ && \
make install

生成的 so 文件为:build/libtriton_onnxruntime.so

8. 加载模型文件的位置

21 浏览
11 爬虫
0 评论