import csv
import os
import PyPDF2
# Path to the PDF directory
pdf_dir = ''
# Path to the output CSV file
csv_file_path = '
# Loop through all PDF files in the directory
with open(csv_file_path, 'a', newline='', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for filename in os.listdir(pdf_dir):
if filename.endswith('.pdf'):
# Open the PDF file
pdf_file_path = os.path.join(pdf_dir, filename)
pdf_file = open(pdf_file_path, 'rb')
# Create a PDF reader object
pdf_reader = PyPDF2.PdfReader(pdf_file)
# Extract the text from the PDF file
for page in pdf_reader.pages:
lines = page.extract_text().split('\n')
# Skip the first 5 lines
for line in lines[5:]:
# Write the line to the CSV file
writer.writerow([line])
# Close the PDF file
pdf_file.close()