test.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import os
  2. import re
  3. import pytesseract
  4. from PIL import Image
  5. #from clear import clear_image
  6. total = 0
  7. count_success = 0
  8. count_failue = 0
  9. def test_one(image_path):
  10. image = Image.open(image_path)
  11. #image = clear_image(image) #降噪
  12. code = pytesseract.image_to_string(image, lang='eng', config='--psm 8') #识别
  13. #去除首尾空格
  14. code = code.strip()
  15. #保留数字和大写字符
  16. code = re.sub(u"([^\u0041-\u005a\u0061-\u007a\u0030-\u0039]|[a-z])", "", code)
  17. file_name = os.path.splitext(os.path.basename(image_path))[0]
  18. global total
  19. global count_success
  20. global count_failue
  21. total += 1
  22. print('识别文件: (%s), 识别结果: (%s) ' % (file_name, code))
  23. # if code == file_name:
  24. if len(code) == 4:
  25. count_success += 1
  26. else:
  27. count_failue += 1
  28. def print_result():
  29. ratio = count_success / total * 100
  30. print('识别验证码个数: ', total)
  31. print('正确识别个数: ', count_success)
  32. print('错误识别个数: ', count_failue)
  33. print('识别成功率: %.2f%%' % ratio)
  34. if __name__ == '__main__':
  35. # files = os.listdir('test')
  36. files = os.listdir('clear_image')
  37. files.sort(key= lambda x:int(x[:-4]))
  38. for file_path in files:
  39. # file_path = os.path.join('test', file_path)
  40. file_path = os.path.join('clear_image', file_path)
  41. if os.path.isfile(file_path):
  42. test_one(file_path)
  43. else:
  44. print('not file')
  45. print_result()