Generative AI

Designing a Schema-driven invoicing pipeline with lift-pdf for Accounts-Payable Extraction, Validation, and Ledger Generation

def render_pdf(d, path):
   """Draw a realistic one-page invoice: header, meta, bill/ship, line items, totals, payment."""
   from reportlab.lib.pagesizes import LETTER
   from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
   from reportlab.lib.units import inch
   from reportlab.lib import colors
   from reportlab.platypus import (SimpleDocTemplate, Paragraph, Spacer,
                                   Table, TableStyle)
   c   = compute(d)
   sym = d["currency_symbol"]
   def money(x): return f"{sym}{x:,.2f}"
   ss   = getSampleStyleSheet()
   H1   = ParagraphStyle("H1",   parent=ss["Title"],    fontSize=18, leading=22, spaceAfter=2)
   SMALL= ParagraphStyle("SM",   parent=ss["Normal"],   fontSize=8.5, textColor=colors.grey, leading=11)
   LBL  = ParagraphStyle("LBL",  parent=ss["Normal"],   fontSize=8.5, textColor=colors.HexColor("#2b3a67"),
                         spaceAfter=1, fontName="Helvetica-Bold")
   BODY = ParagraphStyle("BODY", parent=ss["Normal"],   fontSize=9.5, leading=13)
   RIGHT= ParagraphStyle("R",    parent=ss["Normal"],   fontSize=16, leading=18, alignment=2,
                         textColor=colors.HexColor("#2b3a67"), fontName="Helvetica-Bold")
   story = []
  
   head = Table([[
       [Paragraph(d["vendor_name"], H1), Paragraph(d["vendor_address"], SMALL)],
       [Paragraph("INVOICE", RIGHT),
        Paragraph(f"{d['invoice_number']}", ParagraphStyle('n', parent=SMALL, alignment=2, fontSize=9.5))],
   ]], colWidths=[4.2 * inch, 2.8 * inch])
   head.setStyle(TableStyle([("VALIGN", (0, 0), (-1, -1), "TOP")]))
   story += [head, Spacer(1, 10)]
  
   meta_rows = [["Invoice date", d["invoice_date"], "Due date", d["due_date"]]]
   if d.get("po_number"):
       meta_rows.append(["PO number", d["po_number"], "Currency", d["currency_code"]])
   else:
       meta_rows.append(["Currency", d["currency_code"], "", ""])
   meta = Table(meta_rows, colWidths=[1.3 * inch, 2.2 * inch, 1.3 * inch, 2.2 * inch])
   meta.setStyle(TableStyle([
       ("FONTSIZE", (0, 0), (-1, -1), 9),
       ("TEXTCOLOR", (0, 0), (0, -1), colors.HexColor("#2b3a67")),
       ("TEXTCOLOR", (2, 0), (2, -1), colors.HexColor("#2b3a67")),
       ("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
       ("FONTNAME", (2, 0), (2, -1), "Helvetica-Bold"),
       ("BOTTOMPADDING", (0, 0), (-1, -1), 3), ("TOPPADDING", (0, 0), (-1, -1), 3)]))
   story += [meta, Spacer(1, 12)]
  
   bill = [Paragraph("BILL TO", LBL), Paragraph(d["bill_to_name"], BODY),
           Paragraph(d["bill_to_address"], SMALL)]
   if d.get("ship_to_name"):
       ship = [Paragraph("SHIP TO", LBL), Paragraph(d["ship_to_name"], BODY),
               Paragraph(d["ship_to_address"], SMALL)]
   else:
       ship = [Paragraph("SHIP TO", LBL), Paragraph("Same as billing address", SMALL)]
   parties = Table([[bill, ship]], colWidths=[3.5 * inch, 3.5 * inch])
   parties.setStyle(TableStyle([("VALIGN", (0, 0), (-1, -1), "TOP"),
                                ("LEFTPADDING", (0, 0), (-1, -1), 0)]))
   story += [parties, Spacer(1, 14)]
  
   rows = [["Description", "Qty", "Unit price", "Amount"]]
   for (desc, q, up, t) in c["items"]:
       rows.append([desc, str(q), money(up), money
   items_tbl = Table(rows, colWidths=[3.5 * inch, 0.7 * inch, 1.4 * inch, 1.4 * inch])
   items_tbl.setStyle(TableStyle([
       ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#2b3a67")),
       ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
       ("FONTSIZE", (0, 0), (-1, -1), 9.5),
       ("ALIGN", (1, 0), (-1, -1), "RIGHT"),
       ("GRID", (0, 0), (-1, -1), 0.4, colors.HexColor("#cdd3e6")),
       ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#eef1f8")]),
       ("LEFTPADDING", (0, 0), (-1, -1), 8), ("TOPPADDING", (0, 0), (-1, -1), 5),
       ("BOTTOMPADDING", (0, 0), (-1, -1), 5)]))
   story += [items_tbl, Spacer(1, 10)]
  
   tot_rows = [["Subtotal", money(c["subtotal"])]]
   if c["discount"]:
       tot_rows.append(["Discount", "-" + money(c["discount"])])
   tot_rows.append([f"Tax ({d['tax_rate']*100:.1f}%)", money(c["tax"])])
   tot_rows.append(["TOTAL", money(c["total"])])
   totals = Table(tot_rows, colWidths=[1.6 * inch, 1.4 * inch], hAlign="RIGHT")
   totals.setStyle(TableStyle([
       ("FONTSIZE", (0, 0), (-1, -1), 10),
       ("ALIGN", (0, 0), (-1, -1), "RIGHT"),
       ("LINEABOVE", (0, -1), (-1, -1), 1.0, colors.HexColor("#2b3a67")),
       ("FONTNAME", (0, -1), (-1, -1), "Helvetica-Bold"),
       ("TEXTCOLOR", (0, -1), (-1, -1), colors.HexColor("#2b3a67")),
       ("TOPPADDING", (0, 0), (-1, -1), 3), ("BOTTOMPADDING", (0, 0), (-1, -1), 3)]))
   story += [totals, Spacer(1, 8)]
  
   pay_rows = [["Amount paid", money(c["amount_paid"])],
               ["Balance due", money(c["balance"])]]
   pay = Table(pay_rows, colWidths=[1.6 * inch, 1.4 * inch], hAlign="RIGHT")
   due_color = colors.HexColor("#1b7a3d") if c["is_paid"] else colors.HexColor("#7a2e2e")
   pay.setStyle(TableStyle([
       ("FONTSIZE", (0, 0), (-1, -1), 10),
       ("ALIGN", (0, 0), (-1, -1), "RIGHT"),
       ("FONTNAME", (0, 1), (-1, 1), "Helvetica-Bold"),
       ("TEXTCOLOR", (0, 1), (-1, 1), due_color),
       ("TOPPADDING", (0, 0), (-1, -1), 2), ("BOTTOMPADDING", (0, 0), (-1, -1), 2)]))
   status = "PAID IN FULL" if c["is_paid"] else "BALANCE DUE"
   story += [pay, Spacer(1, 6),
             Paragraph(f"Status: {status}", BODY), Spacer(1, 16),
             Paragraph("Notes", LBL), Paragraph(d["notes"], BODY)]
   SimpleDocTemplate(path, pagesize=LETTER,
                     topMargin=0.7 * inch, bottomMargin=0.7 * inch,
                     leftMargin=0.8 * inch, rightMargin=0.8 * inch).build(story)
print("STEP 3/7 · Generating synthetic invoice PDFs…")
CORPUS = []
for i, d in enumerate(DOCS):
   path = f"/content/invoice_{i}.pdf" if os.path.isdir("/content") else f"invoice_{i}.pdf"
   render_pdf(d, path)
   CORPUS.append((d, ground_truth(d), path))
   print(f"     ✓ {os.path.basename(path)}  —  {d['vendor_name']} → {d['bill_to_name']}")
print()
if SHOW_FIRST_PAGE:
   try:
       import pypdfium2 as pdfium, matplotlib.pyplot as plt
       pg  = pdfium.PdfDocument(CORPUS[0][2])[0]
       img = pg.render(scale=2.0).to_pil()
       plt.figure(figsize=(6.4, 8.3)); plt.imshow(img); plt.axis("off")
       plt.title("What lift reads — page 1 of invoice_0.pdf", fontsize=10); plt.show()
   except Exception as e:
       print("     page preview skipped:", e, "n")

Source link

Related Articles

Leave a Reply

Your email address will not be published. Required fields are marked *

Back to top button