Python批量识别图片中的文字并保存到txt文档中
源码地址:https://blog.csdn.net/zjwlgr/article/details/126171199
Tesseract下载地址:https://digi.bib.uni-mannheim.de/tesseract/
# encoding=utf8 ''' Python批量识别图片中的文字并保存到txt文档中 源码地址:https://blog.csdn.net/zjwlgr/article/details/126171199 Tesseract下载地址:https://digi.bib.uni-mannheim.de/tesseract/ ''' # 导入包 from PIL import Image import string,re,os import pytesseract # 定义方法 def imgtostr(imgpath): '''识别图片中的所有文字''' image = Image.open(imgpath) text = pytesseract.image_to_string(image, lang = 'chi_sim') # 使用简体中文解析图片 return text.replace("\n", "") # 去掉换行 def writefile(txtpath,strstr): '''将文字累加并写入txt文档''' with open(txtpath, "a", encoding= "utf-8") as f: f.write(strstr) # 写入文件 f.write("\n\n") if __name__ == '__main__': # 存放待识别图片的目录,支持所有图片格式 imgpath = r'D:\Test\image' # 识别结果保存的txt文件路径 txtpath = r'D:\Test\word.txt' # 开始执行 for a, b, filenames in os.walk(imgpath): toltal = 0 for fe in filenames: grpaimg = imgpath + '/' + fe textddd = imgtostr(grpaimg) writefile(txtpath, grpaimg+":\n"+textddd) print(grpaimg, "\n", textddd, end="\n\n")