Blank pages in output file using PyPDF2 in Python script
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Mute
- Printer Friendly Page
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Notify Moderator
Hi
I'm trying to split pdf pages vertically in 2, and merge all pages to one file, using script below.
I get an output file where there is double pages of original, but they are all blank?
My script is:
import PyPDF2
def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfWriter()
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
width = page.mediabox.upper_right[0]
height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]
half_width = width / 2
page_left = pdf_writer.add_blank_page(width=half_width, height=height)
page_left.merge_page(page)
page_right = pdf_writer.add_blank_page(width=half_width, height=height)
page_right.merge_page(page)
page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])
with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)
# Eksempel på brug
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
Can anyone help me figuring out what is wrong in the code?
Thanks in advance
- Labels:
- Python
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Notify Moderator
Here are some modifications you can make to address these issues:
Initialize PdfWriter correctly: Change pdf_writer = PyPDF2.PdfWriter() to pdf_writer = PyPDF2.PdfFileWriter().
Adjusting page_right coordinates: After merging the page into page_right, you need to adjust the coordinates of page_right to start from the right half of the page. You can do this by updating the page_right.mediabox.lower_left and page_right.trimbox.lower_left.
Here's the modified script:
import PyPDF2
def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfFileWriter()
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
width = page.mediabox.upper_right[0]
height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]
half_width = width / 2
page_left = pdf_writer.addBlankPage(width=half_width, height=height)
page_left.merge_page(page)
page_right = pdf_writer.addBlankPage(width=half_width, height=height)
page_right.merge_page(page)
page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
page_right.trimBox.lower_left = (half_width, page_right.trimBox.lower_left[1])
with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)
# Example of usage
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
These changes should help fix the issue you're facing.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Notify Moderator
When you're merging the left and right pages, you're merging the original page into both the left and right pages.
Try this
import PyPDF2
def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfWriter()
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
width = page.mediabox.upper_right[0]
height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]
half_width = width / 2
page_left = pdf_writer.add_blank_page(width=half_width, height=height)
page_left.merge_page(page)
page_right = pdf_writer.add_blank_page(width=half_width, height=height)
page_right.merge_page(page.copy()) # Create a copy of the original page
page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])
with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)
# Example usage
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
Alteryx ACE
https://www.linkedin.com/in/calvintangkw/
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Notify Moderator
Hi Caltang
I get this error when run the script?
AttributeError Traceback (most recent call last)
<ipython-input-12-fa8bf0cb6d10> in <module>
27 input_pdf_path = r"C:\Users\DKTINN\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
28 output_pdf_path = r"C:\Users\DKTINN\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
---> 29 split_pdf(input_pdf_path, output_pdf_path)
<ipython-input-12-fa8bf0cb6d10> in split_pdf(input_pdf, output_pdf)
17
18 page_right = pdf_writer.add_blank_page(width=half_width, height=height)
---> 19 page_right.merge_page(page.copy()) # Create a copy of the original page
20 page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
21 page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])
~\AppData\Roaming\Python\Python38\site-packages\PyPDF2\_page.py in merge_page(self, page2, expand)
683 expanded to accommodate the dimensions of the page to be merged.
684 """
--> 685 self._merge_page(page2, expand=expand)
686
687 def mergePage(self, page2: "PageObject") -> None: # pragma: no cover
~\AppData\Roaming\Python\Python38\site-packages\PyPDF2\_page.py in _merge_page(self, page2, page2transformation, ctm, expand)
757 )
758
--> 759 page2content = page2.get_contents()
760 if page2content is not None:
761 page2content = ContentStream(page2content, self.pdf)
AttributeError: 'dict' object has no attribute 'get_contents'
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Notify Moderator
Hmm... I think there's a version compatibility issue with the PyPDF2 library.
Can you use this:
pip install PyMuPDF
Then use this:
import fitz # PyMuPDF
def split_pdf(input_pdf, output_pdf):
pdf_document = fitz.open(input_pdf)
pdf_writer = fitz.open()
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
width = page.rect.width
height = page.rect.height
half_width = width / 2
page_left = pdf_writer.new_page(width=half_width, height=height)
page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)
page_right = pdf_writer.new_page(width=half_width, height=height)
page_right.show_pdf_page(rect=fitz.Rect(half_width, 0, width, height), pdf=pdf_document, page_number=page_num)
pdf_writer.save(output_pdf)
pdf_writer.close()
I'm not sure if you're really strict on PyPDF2 but let me know if this works?
Alteryx ACE
https://www.linkedin.com/in/calvintangkw/
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Notify Moderator
Thanks for your help
I'm still getting some errors?
My pdf document is a aprox 1500 pages, where the format is Landscape, where I want to split it vertically, just to make it clear :-)
import fitz # PyMuPDF
def split_pdf(input_pdf, output_pdf):
pdf_document = fitz.open(input_pdf)
pdf_writer = fitz.open()
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
width = page.rect.width
height = page.rect.height
half_width = width / 2
page_left = pdf_writer.new_page(width=half_width, height=height)
page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)
page_right = pdf_writer.new_page(width=half_width, height=height)
page_right.show_pdf_page(rect=fitz.Rect(half_width, 0, width, height), pdf=pdf_document, page_number=page_num)
pdf_writer.save(output_pdf)
pdf_writer.close()
# Examble of use
input_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
----------------------------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-35-709476f53b96> in <module>
25 input_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
26 output_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
---> 27 split_pdf(input_pdf_path, output_pdf_path)
<ipython-input-35-709476f53b96> in split_pdf(input_pdf, output_pdf)
14
15 page_left = pdf_writer.new_page(width=half_width, height=height)
---> 16 page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)
17
18 page_right = pdf_writer.new_page(width=half_width, height=height)
~\AppData\Roaming\Python\Python38\site-packages\fitz\utils.py in show_pdf_page(*args, **kwargs)
122 """
123 if len(args) not in (3, 4):
--> 124 raise ValueError("bad number of positional parameters")
125 pno = None
126 if len(args) == 3:
ValueError: bad number of positional parameters
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Notify Moderator
Hmm, I'm stuck on this myself. Maybe Stackoverflow is the better place for this?
Alteryx ACE
https://www.linkedin.com/in/calvintangkw/
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Notify Moderator
I found a solution after all.
It wasn't as advanced as I thought thought it would be.
import PyPDF2
import copy
def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfWriter()
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
page_left = copy.copy(page)
page_right = copy.copy(page)
if page.rotation == 90:
page_left.Cropbox.top = page.Cropbox.top / 2
page_right.Cropbox.bottom = page.Cropbox.top / 2
else:
page_left.Cropbox.right = page.Cropbox.right / 2
page_right.Cropbox.left = page.Cropbox.left / 2
pdf_writer.add_page(page_left)
pdf_writer.add_page(page_right)
with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)
