import pypdfium2 as pdfium import streamlit as st from PIL import Image from chandra.layout import parse_layout, draw_layout from chandra.load import load_pdf_images from chandra.model import load, BatchItem, generate @st.cache_resource() def load_model(): return load() @st.cache_data() def get_page_image(pdf_file, page_num): return load_pdf_images(pdf_file, [page_num])[0] @st.cache_data() def page_counter(pdf_file): doc = pdfium.PdfDocument(pdf_file) doc_len = len(doc) doc.close() return doc_len # Function for OCR def ocr_layout( img: Image.Image, ) -> (Image.Image, str): batch = BatchItem( images=[img], prompt_type="ocr_layout", ) html = generate([batch], model=model)[0] print(f"Generated HTML: {html[:500]}...") layout = parse_layout(html, img) layout_image = draw_layout(img, layout) return html, layout_image def ocr( img: Image.Image, ) -> str: batch = BatchItem( images=[img], prompt_type="ocr" ) return generate([batch], model=model)[0] st.set_page_config(layout="wide") col1, col2 = st.columns([0.5, 0.5]) model = load_model() st.markdown(""" # Chandra OCR Demo This app will let you try chandra, a multilingual OCR toolkit. """) in_file = st.sidebar.file_uploader( "PDF file or image:", type=["pdf", "png", "jpg", "jpeg", "gif", "webp"] ) if in_file is None: st.stop() filetype = in_file.type page_count = None if "pdf" in filetype: page_count = page_counter(in_file) page_number = st.sidebar.number_input( f"Page number out of {page_count}:", min_value=0, value=0, max_value=page_count ) pil_image = get_page_image(in_file, page_number) else: pil_image = Image.open(in_file).convert("RGB") page_number = None run_ocr = st.sidebar.button("Run OCR") prompt_type = st.sidebar.selectbox( "Prompt type", ["ocr_layout", "ocr"], index=0, help="Select the prompt type for OCR.", ) if pil_image is None: st.stop() if run_ocr: if prompt_type == "ocr_layout": pred, layout_image = ocr_layout( pil_image, ) else: pred = ocr( pil_image, ) layout_image = None with col1: html_tab, text_tab, layout_tab = st.tabs(["HTML", "HTML as text", "Layout Image"]) with html_tab: st.markdown(pred, unsafe_allow_html=True) with text_tab: st.text(pred) if layout_image: with layout_tab: st.image(layout_image, caption="Detected Layout", use_container_width=True) with col2: st.image(pil_image, caption="Uploaded Image", use_container_width=True)