Alteryx Designer Desktop Discussions

Find answers, ask questions, and share expertise about Alteryx Designer Desktop and Intelligence Suite.

Blank pages in output file using PyPDF2 in Python script

tore_nielsen
5 - Atom

Hi 

 

I'm trying to split pdf pages vertically in 2, and merge all pages to one file, using script below.

 

I get an output file where there is double pages of original, but they are all blank?

 

My script is:

 

 

 

 

import PyPDF2

def split_pdf(input_pdf, output_pdf):
    pdf_reader = PyPDF2.PdfReader(input_pdf)
    pdf_writer = PyPDF2.PdfWriter()

    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num]

        width = page.mediabox.upper_right[0]
        height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]

        half_width = width / 2

        page_left = pdf_writer.add_blank_page(width=half_width, height=height)
        page_left.merge_page(page)

        page_right = pdf_writer.add_blank_page(width=half_width, height=height)
        page_right.merge_page(page)
        page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
        page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])

    with open(output_pdf, 'wb') as output_file:
        pdf_writer.write(output_file)

# Eksempel på brug
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)

 

 

 

 

Can anyone help me figuring out what is wrong in the code?

 

Thanks in advance

7 REPLIES 7
Hammad_Rashid
11 - Bolide

 

Here are some modifications you can make to address these issues:

 

  1. Initialize PdfWriter correctly: Change pdf_writer = PyPDF2.PdfWriter() to pdf_writer = PyPDF2.PdfFileWriter().

  2. Adjusting page_right coordinates: After merging the page into page_right, you need to adjust the coordinates of page_right to start from the right half of the page. You can do this by updating the page_right.mediabox.lower_left and page_right.trimbox.lower_left.

Here's the modified script:

 

import PyPDF2

def split_pdf(input_pdf, output_pdf):
pdf_reader = PyPDF2.PdfReader(input_pdf)
pdf_writer = PyPDF2.PdfFileWriter()

for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]

width = page.mediabox.upper_right[0]
height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]

half_width = width / 2

page_left = pdf_writer.addBlankPage(width=half_width, height=height)
page_left.merge_page(page)

page_right = pdf_writer.addBlankPage(width=half_width, height=height)
page_right.merge_page(page)
page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
page_right.trimBox.lower_left = (half_width, page_right.trimBox.lower_left[1])

with open(output_pdf, 'wb') as output_file:
pdf_writer.write(output_file)

# Example of usage
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)

 

These changes should help fix the issue you're facing.

caltang
17 - Castor
17 - Castor

When you're merging the left and right pages, you're merging the original page into both the left and right pages.

 

Try this

import PyPDF2

def split_pdf(input_pdf, output_pdf):
    pdf_reader = PyPDF2.PdfReader(input_pdf)
    pdf_writer = PyPDF2.PdfWriter()

    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num]

        width = page.mediabox.upper_right[0]
        height = page.mediabox.upper_right[1] - page.mediabox.lower_left[1]

        half_width = width / 2

        page_left = pdf_writer.add_blank_page(width=half_width, height=height)
        page_left.merge_page(page)

        page_right = pdf_writer.add_blank_page(width=half_width, height=height)
        page_right.merge_page(page.copy())  # Create a copy of the original page
        page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
        page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])

    with open(output_pdf, 'wb') as output_file:
        pdf_writer.write(output_file)

# Example usage
input_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)
Calvin Tang
Alteryx ACE
https://www.linkedin.com/in/calvintangkw/
tore_nielsen
5 - Atom

Hi Caltang

 

I get this error when run the script?

 

AttributeError                            Traceback (most recent call last)
<ipython-input-12-fa8bf0cb6d10> in <module>
     27 input_pdf_path = r"C:\Users\DKTINN\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
     28 output_pdf_path = r"C:\Users\DKTINN\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
---> 29 split_pdf(input_pdf_path, output_pdf_path)

<ipython-input-12-fa8bf0cb6d10> in split_pdf(input_pdf, output_pdf)
     17 
     18         page_right = pdf_writer.add_blank_page(width=half_width, height=height)
---> 19         page_right.merge_page(page.copy())  # Create a copy of the original page
     20         page_right.mediabox.lower_left = (half_width, page_right.mediabox.lower_left[1])
     21         page_right.trimbox.lower_left = (half_width, page_right.trimbox.lower_left[1])

~\AppData\Roaming\Python\Python38\site-packages\PyPDF2\_page.py in merge_page(self, page2, expand)
    683                 expanded to accommodate the dimensions of the page to be merged.
    684         """
--> 685         self._merge_page(page2, expand=expand)
    686 
    687     def mergePage(self, page2: "PageObject") -> None:  # pragma: no cover

~\AppData\Roaming\Python\Python38\site-packages\PyPDF2\_page.py in _merge_page(self, page2, page2transformation, ctm, expand)
    757             )
    758 
--> 759         page2content = page2.get_contents()
    760         if page2content is not None:
    761             page2content = ContentStream(page2content, self.pdf)

AttributeError: 'dict' object has no attribute 'get_contents'

 

caltang
17 - Castor
17 - Castor

Hmm... I think there's a version compatibility issue with the PyPDF2 library.

 

Can you use this:

 

pip install PyMuPDF

 

 

Then use this:

 

import fitz  # PyMuPDF

def split_pdf(input_pdf, output_pdf):
    pdf_document = fitz.open(input_pdf)
    pdf_writer = fitz.open()

    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]

        width = page.rect.width
        height = page.rect.height

        half_width = width / 2

        page_left = pdf_writer.new_page(width=half_width, height=height)
        page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)

        page_right = pdf_writer.new_page(width=half_width, height=height)
        page_right.show_pdf_page(rect=fitz.Rect(half_width, 0, width, height), pdf=pdf_document, page_number=page_num)

    pdf_writer.save(output_pdf)
    pdf_writer.close()

 

 

I'm not sure if you're really strict on PyPDF2 but let me know if this works?

Calvin Tang
Alteryx ACE
https://www.linkedin.com/in/calvintangkw/
tore_nielsen
5 - Atom

Thanks for your help

 

I'm still getting some errors?

 

My pdf document is a aprox 1500 pages, where the format is Landscape, where I want to split it vertically, just to make it clear :-)

 

import fitz  # PyMuPDF

def split_pdf(input_pdf, output_pdf):
    pdf_document = fitz.open(input_pdf)
    pdf_writer = fitz.open()

    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]

        width = page.rect.width
        height = page.rect.height

        half_width = width / 2

        page_left = pdf_writer.new_page(width=half_width, height=height)
        page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)

        page_right = pdf_writer.new_page(width=half_width, height=height)
        page_right.show_pdf_page(rect=fitz.Rect(half_width, 0, width, height), pdf=pdf_document, page_number=page_num)

    pdf_writer.save(output_pdf)
    pdf_writer.close()
    
# Examble of use
input_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
output_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
split_pdf(input_pdf_path, output_pdf_path)


----------------------------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)
<ipython-input-35-709476f53b96> in <module>
     25 input_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di.pdf"
     26 output_pdf_path = r"C:\Users\xxxxxx\Desktop\Pdf splitter - test\didkdk_annualreport_2022_di_split.pdf"
---> 27 split_pdf(input_pdf_path, output_pdf_path)

<ipython-input-35-709476f53b96> in split_pdf(input_pdf, output_pdf)
     14 
     15         page_left = pdf_writer.new_page(width=half_width, height=height)
---> 16         page_left.show_pdf_page(rect=page.rect, pdf=pdf_document, page_number=page_num)
     17 
     18         page_right = pdf_writer.new_page(width=half_width, height=height)

~\AppData\Roaming\Python\Python38\site-packages\fitz\utils.py in show_pdf_page(*args, **kwargs)
    122     """
    123     if len(args) not in (3, 4):
--> 124         raise ValueError("bad number of positional parameters")
    125     pno = None
    126     if len(args) == 3:

ValueError: bad number of positional parameters

 

caltang
17 - Castor
17 - Castor

Hmm, I'm stuck on this myself. Maybe Stackoverflow is the better place for this?

Calvin Tang
Alteryx ACE
https://www.linkedin.com/in/calvintangkw/
tore_nielsen
5 - Atom

I found a solution after all.

 

It wasn't as advanced as I thought thought it would be.

import PyPDF2
import copy

def split_pdf(input_pdf, output_pdf):
        pdf_reader = PyPDF2.PdfReader(input_pdf)
        pdf_writer = PyPDF2.PdfWriter()

        for page_num in range(len(pdf_reader.pages)):
                    page = pdf_reader.pages[page_num]
                    
                    page_left = copy.copy(page)
                    page_right = copy.copy(page)
                    
                    if page.rotation == 90:                    
                        page_left.Cropbox.top = page.Cropbox.top / 2
                        page_right.Cropbox.bottom = page.Cropbox.top / 2
                    else:
                        page_left.Cropbox.right = page.Cropbox.right / 2
                        page_right.Cropbox.left = page.Cropbox.left / 2
                        
                    pdf_writer.add_page(page_left)
                    pdf_writer.add_page(page_right)

        with open(output_pdf, 'wb') as output_file:
            pdf_writer.write(output_file)
Polls
We’re dying to get your help in determining what the new profile picture frame should be this Halloween. Cast your vote and help us haunt the Community with the best spooky character.
Don’t ghost us—pick your favorite now!
Labels