Skip to content

ORC 部署使用

PaddleOCR

文档

Python 环境安装

下载

国内镜像下载

安装

sh
./configure --prefix=/解压路径 --enable-optimizations


make -j$(nproc)


sudo make altinstall

创建软连接

sh
sudo ln -s /解压路径/bin/python版本 /usr/bin/python版本
sudo ln -s /解压路径/bin/pip版本 /usr/bin/pip版本

测试

sh
python版本 -V

PaddlePaddle 安装

下载

  • 在线
sh
pip版本 install paddlepaddle==3.1.0 -i https://www.paddlepaddle.org.cn/packages/stable/cpu/
  • 离线

下载地址

sh
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
mkdir build && cd build
cmake .. -DPY_VERSION=3.9 -DWITH_GPU=OFF -DWITH_TESTING=OFF
make -j$(nproc)
pip install python/dist/paddlepaddle-*.whl

PaddleOcr 安装

在线安装

sh
sudo pip3.9 install paddleocr --resume-retries=5

测试安装结果

sh
python3.9 -c "from paddleocr import PaddleOCR; print('安装成功')"

接入

通过 Python 命令行调用

  • py脚本
python
# ocr_api.py
from paddleocr import PaddleOCR
import sys
import json


ocr = PaddleOCR(use_angle_cls=True, lang="ch")


def recognize(image_path):
    result = ocr.ocr(image_path, cls=True)
    return [[box.tolist(), (text, float(score))] for line in result for box, (text, score) in line]


if __name__ == "__main__":
    image_path = sys.argv[1]
    print(json.dumps(recognize(image_path)))  # 输出JSON格式结果
  • java调用
java
import java.io.BufferedReader;
import java.io.InputStreamReader;


public class PaddleOCRJava {
    public static String runOCR(String imagePath) throws Exception {
        ProcessBuilder pb = new ProcessBuilder("python", "ocr_api.py", imagePath);
        Process p = pb.start();


BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()));
        StringBuilder result = new StringBuilder();
        String line;
        while ((line = reader.readLine()) != null) {
            result.append(line);
        }
        return result.toString(); // 返回JSON字符串
    }


public static void main(String[] args) throws Exception {
        String jsonResult = runOCR("/path/to/image.jpg");
        System.out.println("OCR结果: " + jsonResult);
    }
}

封装 HTTP 服务

  • py脚本
python
# server.py
from fastapi import FastAPI, UploadFile
from paddleocr import PaddleOCR
import numpy as np
import cv2


app = FastAPI()
ocr = PaddleOCR(use_angle_cls=True, lang="ch")


@app.post("/ocr")
async def do_ocr(file: UploadFile):
    image = cv2.imdecode(np.frombuffer(await file.read(), np.uint8), cv2.IMREAD_COLOR)
    result = ocr.ocr(image, cls=True)
    return {"result": [[box.tolist(), [text, float(score)]] for line in result for box, (text, score) in line]}
sh
# 安装fastapi
pip install fastapi 
# 检查 Uvicorn 是否已安装
pip show uvicorn
# 安装
pip install uvicorn[standard]
# 验证
uvicorn --version
#
uvicorn server:app --host 0.0.0.0 --port 5000
  • java 调用
java
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;


public class OCRClient {
    public static String sendImage(String url, String imagePath) throws Exception {
        CloseableHttpClient httpClient = HttpClients.createDefault();
        HttpPost httpPost = new HttpPost(url);


HttpEntity entity = MultipartEntityBuilder.create()
            .addBinaryBody("file", new File(imagePath), 
                ContentType.APPLICATION_OCTET_STREAM, "image.jpg")
            .build();


httpPost.setEntity(entity);
        try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
            return EntityUtils.toString(response.getEntity());
        }
    }


public static void main(String[] args) throws Exception {
        String result = sendImage("http://localhost:5000/ocr", "test.jpg");
        System.out.println(result);
    }
}