25 lines
725 B
Python
25 lines
725 B
Python
# ...
|
|
#
|
|
# Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org>
|
|
#
|
|
# This work is licensed under the terms of the GNU GPL, version 2 or
|
|
# later. See the COPYING file in the top-level directory.
|
|
|
|
import logging
|
|
from subprocess import run
|
|
|
|
|
|
def tesseract_ocr(image_path, tesseract_args=''):
|
|
console_logger = logging.getLogger('console')
|
|
console_logger.debug(image_path)
|
|
proc = run(['tesseract', image_path, 'stdout'],
|
|
capture_output=True, encoding='utf8')
|
|
if proc.returncode:
|
|
return None
|
|
lines = []
|
|
for line in proc.stdout.split('\n'):
|
|
sline = line.strip()
|
|
if len(sline):
|
|
console_logger.debug(sline)
|
|
lines += [sline]
|
|
return lines
|