在 MacOS M1 芯片上可以跑
1. Dockerfile
文件 docker/Dockerfile
FROM ubuntu:24.04
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get clean
RUN rm -rf /var/lib/apt/lists/*
RUN apt-get update --allow-releaseinfo-change
RUN apt install -y python3 python3-pip python3.12-venv
RUN python3 -m venv /opt/venv
ENV VIRTUAL_ENV=/opt/venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN apt install -y libgl1 libglib2.0-0 libsm6 libxext6 libxrender1
RUN pip install --upgrade pip setuptools wheel
RUN pip install modelscope -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
RUN pip install onnxruntime -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
RUN pip install rapidocr -i https://pypi.tuna.tsinghua.edu.cn/simple/
2. docker-compose.yml
services:
RapidOCR:
build:
context: docker
dockerfile: ./Dockerfile
image: rapidocr:v1
3. build docker 镜像
docker-compose -f docker-compose.yml build
检查 RapidOCR 是否安装成功
docker run --rm -it rapidocr:v1 rapidocr check
4. 在 python 中使用 RapidOCR
来源:https://rapidai.github.io/RapidOCRDocs/main/quickstart/
4.1 官方例子
from rapidocr import RapidOCR
engine = RapidOCR()
img_url = "https://github.com/RapidAI/RapidOCR/blob/main/python/tests/test_files/ch_en_num.jpg?raw=true"
result = engine(img_url)
print(result)
result.vis("vis_result.jpg")
4.2 批量处理本地图片
from rapidocr import RapidOCR
import sys
engine = RapidOCR()
for filename in sys.argv[1:]:
filename = filename.strip()
if not filename:
continue
img_url = filename
print(f"file: {filename}", file=sys.stderr)
result = engine(img_url)
print(''.join(result.txts))
result 的数据结构如下
RapidOCROutput(
boxes=array([[[ 6., 2.],
[322., 9.],
[320., 104.],
[ 4., 97.]],
...
[[202., 391.],
[287., 391.],
[287., 413.],
[202., 413.]]], dtype=float32),
txts=('正品促销', '大桶装更划算', '强力去污符合国标', '40°C深度防冻不结冰', '日常价¥', '真击', '10.0起', '10.0起', '日常价¥', '底价', '5.8', '券后价¥', '起', '惊喜福利不容错过', '极速发货', '冰点标准', '破损就赔', '假一赔十'),
scores=(0.99893, 0.9843, 0.97842, 0.93412, 0.81418, 0.66226, 0.99243, 0.99849, 0.81369, 0.99633, 0.9999, 0.83907, 0.99993, 0.99782, 0.99813, 0.99786, 0.92679, 0.99717),
word_results=(None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None), elapse_list=[0.16008154186420143, 0.017705917358398438, 0.35501312371343374], elapse=0.5328005829360336)