import os import re import pytesseract from PIL import Image #from clear import clear_image total = 0 count_success = 0 count_failue = 0 def test_one(image_path): image = Image.open(image_path) #image = clear_image(image) #降噪 code = pytesseract.image_to_string(image, lang='eng', config='--psm 8') #识别 #去除首尾空格 code = code.strip() #保留数字和大写字符 code = re.sub(u"([^\u0041-\u005a\u0061-\u007a\u0030-\u0039]|[a-z])", "", code) file_name = os.path.splitext(os.path.basename(image_path))[0] global total global count_success global count_failue total += 1 print('识别文件: (%s), 识别结果: (%s) ' % (file_name, code)) # if code == file_name: if len(code) == 4: count_success += 1 else: count_failue += 1 def print_result(): ratio = count_success / total * 100 print('识别验证码个数: ', total) print('正确识别个数: ', count_success) print('错误识别个数: ', count_failue) print('识别成功率: %.2f%%' % ratio) if __name__ == '__main__': # files = os.listdir('test') files = os.listdir('clear_image') files.sort(key= lambda x:int(x[:-4])) for file_path in files: # file_path = os.path.join('test', file_path) file_path = os.path.join('clear_image', file_path) if os.path.isfile(file_path): test_one(file_path) else: print('not file') print_result()