coin/tess.py

############################################################
### Project: coin
### File: tess.py
### Description: handles all pytesseract-related operations
### Version: 1.0
############################################################
import sys, os, glob
import cv2
from PIL import Image
import pytesseract
from constants import *

# get_tess_model_names(models_path): automatically finds all available
#  language models in models_path
def get_tess_model_names(models_path=MODELS_PATH):
    models_list = glob.glob(models_path + "*.traineddata")
    model_names = []
    for path in models_list:
        base_name = os.path.basename(path)
        base_name = os.path.splitext(base_name)[0]
        model_names.append(base_name)
    return model_names

# image_to_text(model, cropped_image): use the model with the name
#  model to try to recognize the text in cropped_image
def image_to_text(model, cropped_image):
    gray = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
    gray = cv2.medianBlur(gray, 1)
    crop = Image.fromarray(gray)
    text = pytesseract.image_to_string(crop, lang=model).strip()
    return text