Pdf Reader

Functions to read PDF files
Author

Benedict Thekkel

PDF to image

from nbdevAuto.pdf import PDFreader

source

PDFreader

 PDFreader (path, size=(6, 4))

Function for reading and displaying pdf files ex. path = ‘./Data/.pdf’ pdf = PDFreader(path, size = (10, 8)) pdf[0:5]

Exported source
class PDFreader:
    ''' Function for reading and displaying pdf files
        ex. path = './Data/<file>.pdf' 
            pdf = PDFreader(path, size = (10, 8))
            pdf[0:5]
    '''
    def __init__(self, path, size = (6, 4)):
        self.filepath = path
        self.images = self.display_pdf_slides()
        self.index = 0
        self.size = size   
        
        
    def __str__(self):
        return f'--string--: path:"{self.filepath}"   index:{self.index}   size:{self.size}   len:{len(self.images)}'

    def __repr__(self):
        return f'--Representation-- \r\npath:"{self.filepath}" \r\nindex:{self.index} \r\nsize:{self.size} \r\nlen:{len(self.images)}'
    
    def __len__(self):
        return len(self.images)
    
        
    @property
    def filepath(self):
        'return file path'
        return self._path

    @filepath.setter
    def filepath(self, value):
        'set the file path variable'
        import os
        
        if os.path.exists(value) == False:
            raise ValueError("Pdf file is not reachable")
        self._path = value
    
    @property
    def length(self):
        'return file length'
        return len(self.images)

    def display_pdf_slides(self):
        'return pdf slides as images'
        from pdf2image import convert_from_path
        # Convert PDF to a list of images
        return convert_from_path(self.filepath)
    
    
    def __call__(self, size = ''):
        'make function class a callable function'
        if size != '':
            self.size = size
        self.slides(slice(self.index,self.index + 1))   
        self.index += 1
        

    def __getitem__(self, slide):
        'get slides like indexing a array'
        if isinstance(slide, int):
            self.slides(slice(slide, slide + 1))   
        elif isinstance(slide, slice):
            self.slides(slide)  
            
    
    def slides(self, slide):
        'display the slide as plt.imshow(image)'
        import matplotlib.pyplot as plt

        for i, image in enumerate(self.images[slide]):
            plt.figure(figsize=self.size)
            plt.imshow(image)
            if slide.step == None:
                plt.title(f'Slide {slide.start + i}')
            else:
                plt.title(f'Slide {slide.start + slide.step}')
            plt.axis('off')
        plt.show()
        
    def size(self, size):
        'return slide size'
        self.size = size
pdf_path = './Data/AI-based_object_detection.pdf'
#pdf_path = ''
pdffile = PDFreader(pdf_path)
pdffile
--Representation-- 
path:"./Data/AI-based_object_detection.pdf" 
index:1 
size:(6, 4) 
len:17
pdffile.slides(slice(10,11))

pdffile()

pdffile[2]

pdffile[0:2]

pdffile.size = (3,2)
pdffile[0:2]

Back to top