- Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Copy pathpdf_compare.py
83 lines (66 loc) · 2.47 KB
/
pdf_compare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# SPDX-FileCopyrightText: 2025 James R. Barlow
# SPDX-License-Identifier: MIT
"""Compare two PDFs."""
from __future__ importannotations
importos
fromioimportBytesIO
frompathlibimportPath
fromtempfileimportTemporaryDirectory
importpikepdf
importpymupdf
importstreamlitasst
fromlxmlimportetree
fromstreamlit_pdf_viewerimportpdf_viewer
defdo_metadata(pdf):
withpikepdf.open(pdf) aspdf:
withpdf.open_metadata() asmeta:
xml_txt=str(meta)
parser=etree.XMLParser(remove_blank_text=True)
tree=etree.fromstring(xml_txt, parser=parser)
st.code(
etree.tostring(tree, pretty_print=True).decode("utf-8"),
language="xml",
)
st.write(pdf.docinfo)
st.write("Number of pages:", len(pdf.pages))
defmain():
st.set_page_config(layout="wide")
st.title("PDF Compare")
st.write("Compare two PDFs.")
col1, col2=st.columns(2)
withcol1:
uploaded_pdf1=st.file_uploader("Upload a PDF", type=["pdf"], key='pdf1')
withcol2:
uploaded_pdf2=st.file_uploader("Upload a PDF", type=["pdf"], key='pdf2')
ifuploaded_pdf1isNoneoruploaded_pdf2isNone:
return
pdf_bytes1=uploaded_pdf1.getvalue()
pdf_bytes2=uploaded_pdf2.getvalue()
withst.expander("PDF Metadata"):
col1, col2=st.columns(2)
withcol1:
do_metadata(BytesIO(pdf_bytes1))
withcol2:
do_metadata(BytesIO(pdf_bytes2))
withTemporaryDirectory() asd:
Path(d, "1.pdf").write_bytes(pdf_bytes1)
Path(d, "2.pdf").write_bytes(pdf_bytes2)
withst.expander("Text"):
doc1=pymupdf.open(os.path.join(d, "1.pdf"))
doc2=pymupdf.open(os.path.join(d, "2.pdf"))
fori, page1_2inenumerate(zip(doc1, doc2)):
st.write(f"Page {i+1}")
page1, page2=page1_2
col1, col2=st.columns(2)
withcol1, st.container(border=True):
st.write(page1.get_text())
withcol2, st.container(border=True):
st.write(page2.get_text())
withst.expander("PDF Viewer"):
col1, col2=st.columns(2)
withcol1:
pdf_viewer(Path(d, "1.pdf"), key='pdf_viewer1', render_text=True)
withcol2:
pdf_viewer(Path(d, "2.pdf"), key='pdf_viewer2', render_text=True)
if__name__=="__main__":
main()