-
Notifications
You must be signed in to change notification settings - Fork 1
/
pdf_wrapper.py
34 lines (26 loc) · 886 Bytes
/
pdf_wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import io
import PyPDF2
import urllib.request
class PDFWrapper:
"""
Wrapper class for PDF files
"""
def __init__(self, reader):
self.pageNum = len(reader.pages)
self.pages = {i: page.extract_text() for i, page in enumerate(reader.pages)}
def get_num_pages(self):
return self.pageNum
def get_page(self, page_num):
return self.pages.get(page_num)
@staticmethod
def from_url(url):
with urllib.request.urlopen(url) as f:
remote_file_bytes = io.BytesIO(f.read())
reader = PyPDF2.PdfReader(remote_file_bytes)
return PDFWrapper(reader)
@staticmethod
def from_local_file(file_path):
with open(file_path, "rb") as f:
local_file_bytes = io.BytesIO(f.read())
reader = PyPDF2.PdfReader(local_file_bytes)
return PDFWrapper(reader)