Finereader Python | Abbyy
return result import logging from functools import wraps logging.basicConfig(level=logging.INFO) logger = logging.getLogger( name )
def _parse_amount(self, raw): match = re.search(r'\$\s*[\d,]+\.?\d0,2', raw) if match: amount = match.group(0).replace('$', '').replace(',', '') return float(amount) return 0.0 abbyy finereader python
# Configure PDF export settings export_params = "PDFExportMode": 1, # 1 = Text and pictures (searchable) "PDFAComplianceMode": 1, # PDF/A-1b "PreserveOriginalPageSize": True return result import logging from functools import wraps
results = [] for image in Path(input_folder).glob("*.jpg"): print(f"Processing: image.name") # OCR text = fr.get_recognized_text(str(image)) # Save text txt_path = Path(output_folder) / f"image.stem.txt" txt_path.write_text(text, encoding='utf-8') # Save metadata results.append( "file": image.name, "text_length": len(text), "timestamp": datetime.now().isoformat() ) raw): match = re.search(r'\$\s*[\d
def __del__(self): self.app.Quit() pythoncom.CoUninitialize() fr = FineReaderCOM() text = fr.get_recognized_text("invoice.jpg") print(text[:500]) Zonal OCR example (extract specific invoice fields) zones = [(100, 200, 400, 230), # Invoice number (100, 300, 400, 330), # Date (500, 500, 800, 800)] # Total amount invoice_data = fr.zonal_ocr("invoice.jpg", zones) print(invoice_data) Advanced: PDF Searchable Creation def create_searchable_pdf(input_pdf_path, output_pdf_path): """Convert image-only PDF to searchable PDF/A.""" fr = FineReaderCOM() doc = fr.app.CreateDocument() # Load PDF pages doc.AddImageFile(input_pdf_path, 0)
def process_one(img_path): out_name = output_folder / f"img_path.stem_ocr" fine_read_cli(str(img_path), str(out_name), "txt")