Hi
I'm trying to split pdf pages vertically in 2, and merge all pages to one file, using script below.
I get an output file where there is double pages of original, but they are all blank?
My script is:
import PyPDF2
def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfWriter()
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
width = page.mediabox.upper_right[0]
height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]
half_width = width / 2
page_left = pdf_writer.add_blank_page(width=half_width, height=height)
page_left.merge_page(page)
page_right = pdf_writer.add_blank_page(width=half_width, height=height)
page_right.merge_page(page)
page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])
with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)
# Eksempel på brug
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
Can anyone help me figuring out what is wrong in the code?
Thanks in advance
Here are some modifications you can make to address these issues:
Initialize PdfWriter correctly: Change pdf_writer = PyPDF2.PdfWriter() to pdf_writer = PyPDF2.PdfFileWriter().
Adjusting page_right coordinates: After merging the page into page_right, you need to adjust the coordinates of page_right to start from the right half of the page. You can do this by updating the page_right.mediabox.lower_left and page_right.trimbox.lower_left.
Here's the modified script:
import PyPDF2
def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfFileWriter()
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
width = page.mediabox.upper_right[0]
height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]
half_width = width / 2
page_left = pdf_writer.addBlankPage(width=half_width, height=height)
page_left.merge_page(page)
page_right = pdf_writer.addBlankPage(width=half_width, height=height)
page_right.merge_page(page)
page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
page_right.trimBox.lower_left = (half_width, page_right.trimBox.lower_left[1])
with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)
# Example of usage
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
These changes should help fix the issue you're facing.
When you're merging the left and right pages, you're merging the original page into both the left and right pages.
Try this
import PyPDF2
def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfWriter()
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
width = page.mediabox.upper_right[0]
height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]
half_width = width / 2
page_left = pdf_writer.add_blank_page(width=half_width, height=height)
page_left.merge_page(page)
page_right = pdf_writer.add_blank_page(width=half_width, height=height)
page_right.merge_page(page.copy()) # Create a copy of the original page
page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])
with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)
# Example usage
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
Hi Caltang
I get this error when run the script?
AttributeError Traceback (most recent call last)
<ipython-input-12-fa8bf0cb6d10> in <module>
27 input_pdf_path = r"C:\Users\DKTINN\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
28 output_pdf_path = r"C:\Users\DKTINN\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
---> 29 split_pdf(input_pdf_path, output_pdf_path)
<ipython-input-12-fa8bf0cb6d10> in split_pdf(input_pdf, output_pdf)
17
18 page_right = pdf_writer.add_blank_page(width=half_width, height=height)
---> 19 page_right.merge_page(page.copy()) # Create a copy of the original page
20 page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
21 page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])
~\AppData\Roaming\Python\Python38\site-packages\PyPDF2\_page.py in merge_page(self, page2, expand)
683 expanded to accommodate the dimensions of the page to be merged.
684 """
--> 685 self._merge_page(page2, expand=expand)
686
687 def mergePage(self, page2: "PageObject") -> None: # pragma: no cover
~\AppData\Roaming\Python\Python38\site-packages\PyPDF2\_page.py in _merge_page(self, page2, page2transformation, ctm, expand)
757 )
758
--> 759 page2content = page2.get_contents()
760 if page2content is not None:
761 page2content = ContentStream(page2content, self.pdf)
AttributeError: 'dict' object has no attribute 'get_contents'
Hmm... I think there's a version compatibility issue with the PyPDF2 library.
Can you use this:
pip install PyMuPDF
Then use this:
import fitz # PyMuPDF
def split_pdf(input_pdf, output_pdf):
pdf_document = fitz.open(input_pdf)
pdf_writer = fitz.open()
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
width = page.rect.width
height = page.rect.height
half_width = width / 2
page_left = pdf_writer.new_page(width=half_width, height=height)
page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)
page_right = pdf_writer.new_page(width=half_width, height=height)
page_right.show_pdf_page(rect=fitz.Rect(half_width, 0, width, height), pdf=pdf_document, page_number=page_num)
pdf_writer.save(output_pdf)
pdf_writer.close()
I'm not sure if you're really strict on PyPDF2 but let me know if this works?
Thanks for your help
I'm still getting some errors?
My pdf document is a aprox 1500 pages, where the format is Landscape, where I want to split it vertically, just to make it clear :-)
import fitz # PyMuPDF
def split_pdf(input_pdf, output_pdf):
pdf_document = fitz.open(input_pdf)
pdf_writer = fitz.open()
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
width = page.rect.width
height = page.rect.height
half_width = width / 2
page_left = pdf_writer.new_page(width=half_width, height=height)
page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)
page_right = pdf_writer.new_page(width=half_width, height=height)
page_right.show_pdf_page(rect=fitz.Rect(half_width, 0, width, height), pdf=pdf_document, page_number=page_num)
pdf_writer.save(output_pdf)
pdf_writer.close()
# Examble of use
input_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
----------------------------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-35-709476f53b96> in <module>
25 input_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
26 output_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
---> 27 split_pdf(input_pdf_path, output_pdf_path)
<ipython-input-35-709476f53b96> in split_pdf(input_pdf, output_pdf)
14
15 page_left = pdf_writer.new_page(width=half_width, height=height)
---> 16 page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)
17
18 page_right = pdf_writer.new_page(width=half_width, height=height)
~\AppData\Roaming\Python\Python38\site-packages\fitz\utils.py in show_pdf_page(*args, **kwargs)
122 """
123 if len(args) not in (3, 4):
--> 124 raise ValueError("bad number of positional parameters")
125 pno = None
126 if len(args) == 3:
ValueError: bad number of positional parameters
Hmm, I'm stuck on this myself. Maybe Stackoverflow is the better place for this?
I found a solution after all.
It wasn't as advanced as I thought thought it would be.
import PyPDF2
import copy
def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfWriter()
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
page_left = copy.copy(page)
page_right = copy.copy(page)
if page.rotation == 90:
page_left.Cropbox.top = page.Cropbox.top / 2
page_right.Cropbox.bottom = page.Cropbox.top / 2
else:
page_left.Cropbox.right = page.Cropbox.right / 2
page_right.Cropbox.left = page.Cropbox.left / 2
pdf_writer.add_page(page_left)
pdf_writer.add_page(page_right)
with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)