32 lines
1.1 KiB
Python
32 lines
1.1 KiB
Python
############################################################
|
|
### Project: coin
|
|
### File: tess.py
|
|
### Description: handles all pytesseract-related operations
|
|
### Version: 1.0
|
|
############################################################
|
|
import sys, os, glob
|
|
import cv2
|
|
from PIL import Image
|
|
import pytesseract
|
|
from constants import *
|
|
|
|
# get_tess_model_names(models_path): automatically finds all available
|
|
# language models in models_path
|
|
def get_tess_model_names(models_path=MODELS_PATH):
|
|
models_list = glob.glob(models_path + "*.traineddata")
|
|
model_names = []
|
|
for path in models_list:
|
|
base_name = os.path.basename(path)
|
|
base_name = os.path.splitext(base_name)[0]
|
|
model_names.append(base_name)
|
|
return model_names
|
|
|
|
# image_to_text(model, cropped_image): use the model with the name
|
|
# model to try to recognize the text in cropped_image
|
|
def image_to_text(model, cropped_image):
|
|
gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
|
|
gray = cv2.medianBlur(gray, 1)
|
|
crop = Image.fromarray(gray)
|
|
text = pytesseract.image_to_string(crop, lang=model).strip()
|
|
return text
|