1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import os
- import re
- import pytesseract
- from PIL import Image
- #from clear import clear_image
- total = 0
- count_success = 0
- count_failue = 0
- def test_one(image_path):
- image = Image.open(image_path)
- #image = clear_image(image) #降噪
- code = pytesseract.image_to_string(image, lang='eng', config='--psm 8') #识别
- #去除首尾空格
- code = code.strip()
- #保留数字和大写字符
- code = re.sub(u"([^\u0041-\u005a\u0061-\u007a\u0030-\u0039]|[a-z])", "", code)
- file_name = os.path.splitext(os.path.basename(image_path))[0]
- global total
- global count_success
- global count_failue
- total += 1
- print('识别文件: (%s), 识别结果: (%s) ' % (file_name, code))
- # if code == file_name:
- if len(code) == 4:
- count_success += 1
- else:
- count_failue += 1
- def print_result():
- ratio = count_success / total * 100
- print('识别验证码个数: ', total)
- print('正确识别个数: ', count_success)
- print('错误识别个数: ', count_failue)
- print('识别成功率: %.2f%%' % ratio)
- if __name__ == '__main__':
- # files = os.listdir('test')
- files = os.listdir('clear_image')
- files.sort(key= lambda x:int(x[:-4]))
- for file_path in files:
- # file_path = os.path.join('test', file_path)
- file_path = os.path.join('clear_image', file_path)
- if os.path.isfile(file_path):
- test_one(file_path)
- else:
- print('not file')
- print_result()
|