step-1
first download poppler from here....
step-2
pip install opencv-python
step-3
pip install pdf2image
from pdf2image import convert_from_path
import pytesseract
import cv2
images = convert_from_path(r"C:\Users\darsh\OneDrive\Desktop\ML\what is ML.pdf", 500,poppler_path=r"C:\Program Files\poppler-0.68.0_x86\poppler-0.68.0\bin")
for i in range(len(images)):
# Save pages as images in the pdf
images[i].save('page'+ str(i) +'.jpg', 'JPEG')
src = cv2.imread('page'+ str(i) +'.jpg');
img1 = cv2.cvtColor(src,cv2.COLOR_BGR2GRAY)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files (x86)\Tesseract-OCR\tesseract"
text = pytesseract.image_to_string(img1)
print(text)
Comments
Post a Comment