How to use paddle python on image OCR

Today, we have so many image need to OCR, So i will try use PaddleOCR for it.

.env file

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
DB_CONNECTION = mysql
DB_HOST =
DB_PORT = 3306
DB_DATABASE =
DB_USERNAME =
DB_PASSWORD =
DB_PREFIX =

REDIS_HOST =
REDIS_PASSWORD =
REDIS_PORT =
REDIS_DB =

GOOGLE_API_KEY =

OSS_HOST =

config.py file

1
2
3
4
5
6
7
8
9
import os
def import_env():
if os.path.exists('.env'):
print('Importing environment from .env...')
for line in open('.env'):
var = line.strip().split('=')
if len(var) == 2:
key, value = var[0].strip(), var[1].strip()
os.environ[key] = value

ocr.py file

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import easyocr

import pathlib
import textwrap
import mysql.connector
from paddleocr import PaddleOCR, draw_ocr
import sys
import os
import google.generativeai as genai
import time
from config import import_env

from random import randint

import urllib.request

from PIL import Image

import shutil

import json

import_env()

from IPython.display import display
from IPython.display import Markdown

import redis

import os.path

# reader = easyocr.Reader(['en'], gpu=False)

ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=False )

genai.configure(api_key=os.environ.get('GOOGLE_API_KEY'))


def paddle_nlp_image(path):
dst = path
if os.path.exists(dst)==False:
src = os.environ.get('OSS_HOST') + dst
print("image src " + src)
try:
urllib.request.urlretrieve(src, dst)
except:
return False
print("processing the images " + dst)
result = ocr.ocr(dst, cls=True)
lists = []
for idx in range(len(result)):
res = result[idx]
try:
for line in res:
# print(line)
print(line[1][0])
lists.append(line[1][0])
except:
print("Error")
return []
return lists

def nlp_image(path):
dst = path
if os.path.exists(dst)==False:
src = os.environ.get('OSS_HOST') + dst
print("image src " + src)
try:
urllib.request.urlretrieve(src, dst)
except ValueError:
return False
# shutil.copyfile(src, dst)
result = reader.readtext(path, detail = 0)
print(result)
return result

def search_from_google_ai(path):
model = genai.GenerativeModel('gemini-pro-vision')
dst = path
if os.path.exists(dst)==False:
src = os.environ.get('OSS_HOST') + dst
print("image src " + src)
try:
urllib.request.urlretrieve(src, dst)
except:
return False
# shutil.copyfile(src, dst)
img = Image.open(dst)
try:
response = model.generate_content(img)
print(response.text)
return response.text
except ValueError:
#print(response.text)
# print(response.parts)
return False
print(response)
print(response.text)
print(response.parts)
return response.text


db = mysql.connector.connect(
host=os.environ.get('DB_HOST'),
user=os.environ.get('DB_USERNAME'),
password=os.environ.get('DB_PASSWORD'),
database=os.environ.get('DB_DATABASE')
)

r = redis.Redis(host=os.environ.get('REDIS_HOST'), port=os.environ.get('REDIS_PORT'), db=os.environ.get('REDIS_DB'), password=os.environ.get('REDIS_PASSWORD'), decode_responses=True, protocol=3)
cursor = db.cursor()

cursor.execute("SELECT * from sd_goods_gallery where img_desc is null order by img_id desc limit 10000")

items = cursor.fetchall()

for item in items:
print("start " + str(item[0]))
# result = nlp_image(item[5])
print("images " + item[5])
result = paddle_nlp_image(item[5])
cache_key = str(item[0])+"_gallery_ai_images"
ai_result = r.get(cache_key)
print(ai_result)
if(r.exists(cache_key)==False):
ai_result = search_from_google_ai(item[5])
if(ai_result!=False):
r.set(cache_key, ai_result)
y = json.dumps(result)
print(y)
print(ai_result)
if (y=='[]'):
y = []
y.append(ai_result)
y = json.dumps(y)
if(y!='[]'):
cursor.execute(u'''Update sd_goods_gallery set img_desc=%s where img_id=%s;''', (y, item[0]))
db.commit()

# this needs to run only once to load the model into memory
# nlp_image()

requirements.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
APScheduler==3.10.4
asttokens==2.4.1
async-timeout==4.0.3
cachetools==5.3.2
certifi==2023.11.17
charset-normalizer==3.3.2
click==8.1.7
decorator==5.1.1
easyocr==1.7.1
exceptiongroup==1.2.0
executing==2.0.1
filelock==3.13.1
fsspec==2023.12.2
google-ai-generativelanguage==0.4.0
google-api-core==2.15.0
google-auth==2.25.2
google-generativeai==0.3.2
googleapis-common-protos==1.62.0
grpcio==1.60.0
grpcio-status==1.60.0
idna==3.6
imageio==2.33.1
ipython==8.18.1
jedi==0.19.1
Jinja2==3.1.3
lazy_loader==0.3
MarkupSafe==2.1.4
matplotlib-inline==0.1.6
mpmath==1.3.0
mysql==0.0.3
mysql-connector==2.2.9
mysqlclient==2.2.3
networkx==3.2.1
ninja==1.11.1.1
numpy==1.26.3
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.18.1
nvidia-nvjitlink-cu12==12.3.101
nvidia-nvtx-cu12==12.1.105
opencv-python-headless==4.9.0.80
packaging==23.2
parso==0.8.3
pexpect==4.9.0
Pillow==10.1.0
prompt-toolkit==3.0.43
proto-plus==1.23.0
protobuf==4.25.1
ptyprocess==0.7.0
pure-eval==0.2.2
pyasn1==0.5.1
pyasn1-modules==0.3.0
pyclipper==1.3.0.post5
Pygments==2.17.2
python-bidi==0.4.2
pytz==2023.3.post1
PyYAML==6.0.1
redis==5.0.1
requests==2.31.0
rq==1.15.1
rsa==4.9
scikit-image==0.22.0
scipy==1.12.0
shapely==2.0.2
six==1.16.0
stack-data==0.6.3
sympy==1.12
tifffile==2023.12.9
torch==2.1.2
torchvision==0.16.2
tqdm==4.66.1
traitlets==5.14.1
triton==2.1.0
typing_extensions==4.9.0
tzlocal==5.2
urllib3==2.1.0
wcwidth==0.2.13

Referce

PaddleOCR