import os
import re
import pytesseract
from PIL import Image
#from clear import clear_image

total = 0
count_success = 0
count_failue = 0

def test_one(image_path):
	image = Image.open(image_path)
	#image = clear_image(image) #降噪
	code = pytesseract.image_to_string(image, lang='eng', config='--psm 8') #识别
	#去除首尾空格
	code = code.strip()
	#保留数字和大写字符
	code = re.sub(u"([^\u0041-\u005a\u0061-\u007a\u0030-\u0039]|[a-z])", "", code)

	file_name = os.path.splitext(os.path.basename(image_path))[0]
	global total
	global count_success
	global count_failue
	total += 1
	print('识别文件: (%s), 识别结果: (%s) ' % (file_name, code))
	# if code == file_name:
	if len(code) == 4:
		count_success += 1
	else: 
		count_failue += 1

def print_result():
	ratio = count_success / total * 100
	print('识别验证码个数: ', total)
	print('正确识别个数: ', count_success)
	print('错误识别个数: ', count_failue)
	print('识别成功率: %.2f%%' % ratio)

if __name__ == '__main__':
	# files = os.listdir('test')
	files = os.listdir('clear_image')
	files.sort(key= lambda x:int(x[:-4]))
	for file_path in files:
		# file_path = os.path.join('test', file_path)
		file_path = os.path.join('clear_image', file_path)
		if os.path.isfile(file_path):
			test_one(file_path)
		else:
			print('not file')
	print_result()