From 7a3b3f2161984223f9876c6e5c88cf6e9ab6e58e Mon Sep 17 00:00:00 2001 From: Emmaline Date: Wed, 4 Jun 2025 11:19:21 -0600 Subject: [PATCH] reverting fieldRename to last working version --- tools/fieldRename.ts | 637 +++++++++---------------------------------- 1 file changed, 132 insertions(+), 505 deletions(-) diff --git a/tools/fieldRename.ts b/tools/fieldRename.ts index fe0d1ac..1b75e6a 100644 --- a/tools/fieldRename.ts +++ b/tools/fieldRename.ts @@ -1,18 +1,13 @@ import { - PDFAcroField, - PDFAcroTerminal, + type PDFAcroField, PDFArray, PDFCheckBox, - PDFContext, - PDFDict, type PDFDocument, type PDFField, - PDFHexString, PDFName, PDFNumber, - type PDFObject, PDFRadioGroup, - PDFRef, + type PDFRef, PDFString, PDFTextField, type PDFWidgetAnnotation, @@ -25,458 +20,63 @@ import { cliAlert, cliLog, cliPrompt } from "../cli/prompts.ts"; import { multiSelectMenuInteractive } from "../cli/selectMenu.ts"; import type { callback, ITool } from "../types.ts"; import { toCase } from "util/caseManagement.ts"; -import { log } from "util/logfile.ts"; -function removeWidgetFromOldField( - doc: PDFDocument, +function applyRename( field: PDFField, - widget: PDFWidgetAnnotation, + name: string, + pattern: RegExp, + change: string, ) { - const maybeKids = field.acroField.dict.get(PDFName.of("Kids")); - if (!maybeKids || !(maybeKids instanceof PDFArray)) return; - const kids = maybeKids; - if (!kids) return; - - const widgetRef = getWidgetRef(widget, doc); - if (!widgetRef) return; - - const updatedKids = kids.asArray().filter((ref) => { - const dict = doc.context.lookup(ref); - return dict !== widget.dict; - }); - - if (updatedKids.length === 0) { - // Field is now empty, remove it from the AcroForm - const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict); - const fields = acroForm.lookup(PDFName.of("Fields"), PDFArray); - const fieldRef = field.acroField.ref; - const newFields = fields.asArray().filter((ref) => ref !== fieldRef); - acroForm.set(PDFName.of("Fields"), doc.context.obj(newFields)); - } else { - field.acroField.dict.set(PDFName.of("Kids"), doc.context.obj(updatedKids)); - } -} - -function moveWidgetToFlatField( - doc: PDFDocument, - field: PDFField, - widget: PDFWidgetAnnotation, - newName: string, -) { - const form = doc.getForm(); - const page = findPageForWidget(doc, widget); - if (!page) throw new Error("Widget's page not found"); - - const rect = widget.getRectangle(); - if (!rect) throw new Error("Widget has no rectangle"); - - const fieldType = detectFieldType(field); - const widgetRef = getWidgetRef(widget, doc); - if (!widgetRef) throw new Error("Widget ref not found"); - - // 🔒 Extract value + style before any destructive ops - let value: string | undefined; - try { - if (fieldType === "/Tx" && field instanceof PDFTextField) { - value = field.getText(); - } - } catch (_) { - log("Failed to extract value from field"); - } - - const sourceFieldDict = field.acroField.dict; - const sourceWidgetDict = widget.dict; - - // 🔥 Remove widget from page + field - removeWidgetFromPage(widget, doc); - removeWidgetCompletely(doc, widget, field); - - // 🔥 Carefully remove field + parents - try { - fullyDeleteFieldHierarchy(doc, field); - } catch (_) { - // fallback - log("Failed to remove field hierarchy"); - removeFieldIfEmpty(doc, field); - } - - sanitizeFieldsTree(doc); - removeDanglingParents(doc); - removeEmptyAncestors(doc, field); - - // 🔁 Create replacement field - let newField: PDFField; - - switch (fieldType) { - case "/Tx": { - const tf = form.createTextField(newName); - if (value) tf.setText(value); - tf.addToPage(page, rect); - newField = tf; - break; - } - - case "/Btn": { - const isRadio = getFlag(field, 15); - if (isRadio) { - const rg = form.createRadioGroup(newName); - rg.addOptionToPage(newName, page, rect); - return; - } else { - const cb = form.createCheckBox(newName); - cb.addToPage(page, rect); - if (field instanceof PDFCheckBox && field.isChecked()) { - cb.check(); - } - return; + const segments = name.split("."); + const matchingSegments = segments.filter((s) => pattern.test(s)); + let cField: PDFAcroField | undefined = field.acroField; + while (cField) { + if ( + cField.getPartialName() && + matchingSegments.includes(cField.getPartialName()!) + ) { + const mName = cField.getPartialName()?.replace(pattern, change); + if (mName) { + cField.dict.set(PDFName.of("T"), PDFString.of(mName)); + // console.log(cField.getPartialName()) } } - - case "/Ch": { - const ff = sourceFieldDict.get(PDFName.of("Ff")); - const isCombo = ff instanceof PDFNumber && - ((ff.asNumber() & (1 << 17)) !== 0); - const opts = sourceFieldDict.lookupMaybe(PDFName.of("Opt"), PDFArray); - const values = - opts?.asArray().map((opt) => - opt instanceof PDFString || opt instanceof PDFHexString - ? opt.decodeText() - : "" - ) ?? []; - - if (isCombo) { - const dd = form.createDropdown(newName); - dd.addOptions(values); - dd.addToPage(page, rect); - newField = dd; - } else { - const ol = form.createOptionList(newName); - ol.addOptions(values); - ol.addToPage(page, rect); - newField = ol; - } - break; - } - - default: - throw new Error(`Unsupported field type: ${fieldType}`); - } - - // 🔧 Apply styles *after creation* - const targetWidgetDict = newField.acroField.getWidgets()[0].dict; - copyFieldAndWidgetStyles( - sourceFieldDict, - sourceWidgetDict, - newField.acroField.dict, - targetWidgetDict, - ); -} - -function removeDanglingParents(doc: PDFDocument) { - const context = doc.context; - const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict); - const fields = acroForm.lookupMaybe(PDFName.of("Fields"), PDFArray); - if (!(fields instanceof PDFArray)) return; - - function fixFieldDict(dict: PDFDict) { - const parentRef = dict.get(PDFName.of("Parent")); - if (!parentRef || !(parentRef instanceof PDFRef)) return; - - try { - const parentDict = context.lookup(parentRef, PDFDict); - if (!parentDict) throw new Error("Missing parent"); - } catch { - // Parent is broken — remove reference - dict.delete(PDFName.of("Parent")); - log("Broken parent reference removed"); - } - } - - const visited = new Set(); - - function recurseKids(dict: PDFDict) { - const kids = dict.lookupMaybe(PDFName.of("Kids"), PDFArray); - if (!(kids instanceof PDFArray)) return; - - for (const kidRef of kids.asArray()) { - if (!(kidRef instanceof PDFRef)) continue; - const key = kidRef.toString(); - if (visited.has(key)) continue; - visited.add(key); - - try { - const kidDict = context.lookup(kidRef, PDFDict); - fixFieldDict(kidDict); - recurseKids(kidDict); - } catch (e) { - context.delete(kidRef); // nuke broken reference - log("Broken kid reference removed"); - log(e); - } - } - } - - for (const ref of fields.asArray()) { - if (!(ref instanceof PDFRef)) continue; - try { - const dict = context.lookup(ref, PDFDict); - fixFieldDict(dict); - recurseKids(dict); - } catch { - context.delete(ref); // broken root - log("Broken root reference removed"); - } + cField = cField.getParent(); + // console.log(cField?.getPartialName()) } } -function removeFieldByName(doc: PDFDocument, fieldName: string) { - const form = doc.getForm(); - const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict); - const fields = acroForm.lookup(PDFName.of("Fields"), PDFArray); - const context = doc.context; +// function applyWidgetRename( +// doc: PDFDocument, +// field: PDFField, +// widget: PDFWidgetAnnotation, +// name: string, +// pattern: RegExp, +// change: string, +// ) { +// if (field.acroField.getWidgets().length > 1) { +// const widgets = field.acroField.getWidgets(); +// const widgetIndex = widgets.indexOf(widget); +// widgets.splice(widgetIndex, 1); - const remainingFields = fields.asArray().filter((ref) => { - const dict = context.lookup(ref, PDFDict); - const name = dict?.get(PDFName.of("T")); +// const pdfDocContext = doc.context; - if (name && (name.decodeText?.() === fieldName)) { - context.delete(ref as PDFRef); - return false; - } +// const originalRef = field.acroField.ref; +// const originalFieldDict = pdfDocContext.lookup(originalRef); +// if (!originalFieldDict) return; - return true; - }); - - acroForm.set(PDFName.of("Fields"), context.obj(remainingFields)); -} - -function sanitizeFieldsTree(doc: PDFDocument) { - const context = doc.context; - const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict); - const fields = acroForm.lookupMaybe(PDFName.of("Fields"), PDFArray); - if (!(fields instanceof PDFArray)) return; - - function pruneInvalidKids(dict: PDFDict, context: PDFContext) { - const kids = dict.lookupMaybe(PDFName.of("Kids"), PDFArray); - if (!(kids instanceof PDFArray)) return; - - const validKids: PDFRef[] = []; - - for (const ref of kids.asArray()) { - // 💥 Defensive: skip anything that's not a real PDFRef - if (!ref || !(ref instanceof PDFRef)) continue; - - let child: PDFDict | undefined; - try { - child = context.lookup(ref, PDFDict); - } catch (e) { - context.delete(ref); - log("Broken kid reference removed"); - log(e); - continue; - } - - if (!child) { - context.delete(ref); - continue; - } - - const t = child.get(PDFName.of("T")); - if (!(t instanceof PDFString || t instanceof PDFHexString)) { - context.delete(ref); - continue; - } - - // Recurse, but protect inner layers too - pruneInvalidKids(child, context); - validKids.push(ref); - } - - if (validKids.length > 0) { - dict.set(PDFName.of("Kids"), context.obj(validKids)); - } else { - dict.delete(PDFName.of("Kids")); - } - } - - const validFields: PDFRef[] = []; - - for (const ref of fields.asArray()) { - if (!ref || !(ref instanceof PDFRef)) continue; - - let dict: PDFDict | undefined; - try { - dict = context.lookup(ref, PDFDict); - } catch { - context.delete(ref); - log("Broken field reference removed"); - continue; - } - - if (!dict) { - context.delete(ref); - continue; - } - - const t = dict.get(PDFName.of("T")); - if (!(t instanceof PDFString || t instanceof PDFHexString)) { - context.delete(ref); - continue; - } - - pruneInvalidKids(dict, context); - validFields.push(ref); - } - - acroForm.set(PDFName.of("Fields"), context.obj(validFields)); -} - -function fullyDeleteFieldHierarchy(doc: PDFDocument, rootField: PDFField) { - const context = doc.context; - const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict); - const fields = acroForm.lookup(PDFName.of("Fields"), PDFArray); - - function recurseDelete(dict: PDFDict, ref: PDFRef) { - const kids = dict.lookupMaybe(PDFName.of("Kids"), PDFArray); - - if (kids instanceof PDFArray) { - for (const kidRef of kids.asArray()) { - const kidDict = context.lookup(kidRef, PDFDict); - if (kidDict) { - recurseDelete(kidDict, kidRef as PDFRef); - } - } - } - - context.delete(ref); - } - - recurseDelete(rootField.acroField.dict, rootField.acroField.ref); - - // Remove root from AcroForm.Fields - const newFields = fields - .asArray() - .filter((ref) => ref !== rootField.acroField.ref); - - acroForm.set(PDFName.of("Fields"), context.obj(newFields)); -} - -function removeEmptyAncestors(doc: PDFDocument, field: PDFField) { - let current: PDFAcroField | undefined = field.acroField; - const context = doc.context; - - while (current) { - const parent = current.getParent(); - - const kids = parent?.dict.lookupMaybe(PDFName.of("Kids"), PDFArray); - if (kids instanceof PDFArray) { - const remaining = kids.asArray().filter((ref) => { - try { - const kidDict = context.lookup(ref, PDFDict); - return kidDict !== current?.dict; - } catch (e) { - log("Broken kid reference removed"); - log(e); - return false; - } - }); - - if (remaining.length > 0) { - parent.dict.set(PDFName.of("Kids"), context.obj(remaining)); - break; - } else { - parent.dict.delete(PDFName.of("Kids")); - } - } - - context.delete(current.ref); - current = parent; - } -} - -function removeWidgetCompletely( - doc: PDFDocument, - widget: PDFWidgetAnnotation, - field: PDFField, -) { - const widgetRef = getWidgetRef(widget, doc); - if (!widgetRef) return; - - // 1. Remove from field's /Kids array - const kidsRaw = field.acroField.dict.get(PDFName.of("Kids")); - if (kidsRaw instanceof PDFArray) { - const updatedKids = kidsRaw.asArray().filter((ref) => { - const dict = doc.context.lookup(ref); - return dict !== widget.dict; - }); - - if (updatedKids.length > 0) { - field.acroField.dict.set( - PDFName.of("Kids"), - doc.context.obj(updatedKids), - ); - } else { - field.acroField.dict.delete(PDFName.of("Kids")); - } - } - - // 2. Remove from page /Annots - for (const page of doc.getPages()) { - const annotsRaw = page.node.Annots()?.asArray(); - if (!annotsRaw) continue; - - const remainingAnnots = annotsRaw.filter((ref) => { - const dict = doc.context.lookup(ref); - return dict !== widget.dict; - }); - - page.node.set(PDFName.of("Annots"), doc.context.obj(remainingAnnots)); - } - - // Optional: delete the widget from the context - doc.context.delete(widgetRef); -} -function removeFieldIfEmpty(doc: PDFDocument, field: PDFField) { - const kids = field.acroField.getWidgets(); - if (kids.length > 0) return; - - const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict); - const fieldsArray = acroForm.lookup(PDFName.of("Fields"), PDFArray); - const ref = field.acroField.ref; - - const updatedFields = fieldsArray.asArray().filter((f) => f !== ref); - acroForm.set(PDFName.of("Fields"), doc.context.obj(updatedFields)); - - // Optional: remove field object entirely - doc.context.delete(ref); -} - -function copyFieldAndWidgetStyles( - sourceFieldDict: PDFDict, - sourceWidgetDict: PDFDict, - targetFieldDict: PDFDict, - targetWidgetDict: PDFDict, -) { - const fieldKeys = ["DA", "DR", "Q"]; - const widgetKeys = ["MK", "BS", "Border"]; - - // Copy from field dict → field dict - for (const key of fieldKeys) { - const val = sourceFieldDict.get(PDFName.of(key)); - if (val) { - targetFieldDict.set(PDFName.of(key), val); - } - } - - // Copy from widget dict → widget dict - for (const key of widgetKeys) { - const val = sourceWidgetDict.get(PDFName.of(key)); - if (val) { - targetWidgetDict.set(PDFName.of(key), val); - } - } -} +// const newFieldDict = pdfDocContext.obj({ +// ...originalFieldDict, +// T: PDFString.of(name.replace(pattern, change)), +// Kids: [getWidgetRef(widget, doc.getPages())], +// }); +// const newField = pdfDocContext.register(newFieldDict); +// const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict); +// const fields = acroForm.lookup(PDFName.of("Fields"), PDFArray); +// fields.push(newField); +// } +// } function findPageForWidget( doc: PDFDocument, widget: PDFWidgetAnnotation, @@ -534,22 +134,19 @@ function applyWidgetRename( try { const form = doc.getForm(); const widgets = field.acroField.getWidgets(); + + if (widgets.length <= 1) return; const widgetDict = widget.dict; const widgetIndex = widgets.findIndex((w) => w.dict === widgetDict); if (widgetIndex === -1) return; - const widgetRef = getWidgetRef(widget, doc); - if (!widgetRef) return; - - // Remove widget from internal widgets list widgets.splice(widgetIndex, 1); - // Remove from /Kids - const maybeKids = field.acroField.dict.get(PDFName.of("Kids")); - if (maybeKids instanceof PDFArray) { - const updatedKids = maybeKids.asArray().filter((ref) => { + const kids = field.acroField.dict.lookup(PDFName.of("Kids"), PDFArray); + if (kids) { + const updatedKids = kids.asArray().filter((ref) => { const maybeDict = doc.context.lookup(ref); - return maybeDict !== widgetDict; + return maybeDict !== widget.dict; }); field.acroField.dict.set( PDFName.of("Kids"), @@ -558,41 +155,48 @@ function applyWidgetRename( } const page = findPageForWidget(doc, widget); - if (!page) throw new Error("Widget's page not found"); + if (!page) throw new Error("Widget page not found"); const rect = widget.getRectangle(); if (!rect) throw new Error("Widget has no rectangle"); const finalName = newName.replace(pattern, change); - const fieldType = detectFieldType(field); - // Attempt to find an existing field with the new name + // Try to get existing field with the new name let targetField: PDFField | undefined; + try { targetField = form.getField(finalName); } catch { - // - log("Failed to find existing field"); + // Field doesn't exist — that's fine } + // Compare field types if field exists if (targetField) { const sourceType = detectFieldType(field); const targetType = detectFieldType(targetField); + if (sourceType !== targetType) { throw new Error( `Field "${finalName}" already exists with a different type (${targetType} vs ${sourceType})`, ); } - // Add widget to existing field - widget.dict.set(PDFName.of("Parent"), targetField.acroField.ref); - - const kids = targetField.acroField.dict.lookup( + // ✅ Same type — attach widget to the existing field + // const targetFieldWidgets = targetField.acroField.getWidgets(); + const targetKidsArray = targetField.acroField.dict.lookup( PDFName.of("Kids"), PDFArray, ); - if (kids) { - kids.push(widgetRef); + + // Set /Parent on the widget to point to the existing field + widget.dict.set(PDFName.of("Parent"), targetField.acroField.ref); + + // Add the widget to the field's /Kids array + const widgetRef = getWidgetRef(widget, doc); + if (!widgetRef) throw new Error("Widget ref not found"); + if (targetKidsArray) { + targetKidsArray.push(widgetRef); } else { targetField.acroField.dict.set( PDFName.of("Kids"), @@ -600,23 +204,22 @@ function applyWidgetRename( ); } - const annots = page.node.Annots()?.asArray() ?? []; - if (!annots.includes(widgetRef)) { - annots.push(widgetRef); - page.node.set(PDFName.of("Annots"), doc.context.obj(annots)); + // Also ensure widget is attached to a page + const page = findPageForWidget(doc, widget); + if (!page) throw new Error("Widget's page not found"); + + const pageAnnots = page.node.Annots(); + const refs = pageAnnots?.asArray() ?? []; + if (!refs.includes(widgetRef)) { + refs.push(widgetRef); + page.node.set(PDFName.of("Annots"), doc.context.obj(refs)); } - removeWidgetFromPage(widget, doc); - removeWidgetCompletely(doc, widget, field); - removeFieldIfEmpty(doc, field); - - return; + return; // Done } - - // No existing field — create new one and move widget removeWidgetFromPage(widget, doc); - removeWidgetCompletely(doc, widget, field); - removeFieldIfEmpty(doc, field); + + const fieldType = detectFieldType(field); let newField: PDFField; @@ -627,12 +230,6 @@ function applyWidgetRename( const val = field.getText(); if (val) tf.setText(val); } - tf.addToPage(page, { - x: rect.x, - y: rect.y, - width: rect.width, - height: rect.height, - }); newField = tf; break; } @@ -640,8 +237,8 @@ function applyWidgetRename( case "/Btn": { const isRadio = getFlag(field, 15); if (isRadio) { - const radio = form.createRadioGroup(finalName); - radio.addOptionToPage(finalName, page, { + const rf = form.createRadioGroup(finalName); + rf.addOptionToPage(finalName, page, { x: rect.x, y: rect.y, width: rect.width, @@ -649,7 +246,7 @@ function applyWidgetRename( }); if (field instanceof PDFRadioGroup) { const selected = field.getSelected(); - if (selected) radio.select(selected); + if (selected) rf.select(selected); } return; } else { @@ -671,15 +268,20 @@ function applyWidgetRename( throw new Error(`Unsupported field type: ${fieldType}`); } - // Apply styles from old field/widget after creation - copyFieldAndWidgetStyles( - field.acroField.dict, - widget.dict, - newField.acroField.dict, - newField.acroField.getWidgets()[0].dict, - ); - } catch (e) { - log("applyWidgetRename error:", e); + // Attach the new field to the page if necessary + if ( + newField instanceof PDFTextField || + newField instanceof PDFCheckBox + ) { + newField.addToPage(page, { + x: rect.x, + y: rect.y, + width: rect.width, + height: rect.height, + }); + } + } catch { + // log(e); } } @@ -702,6 +304,36 @@ function removeWidgetFromPage(widget: PDFWidgetAnnotation, doc: PDFDocument) { } } +// function getWidgetRef( +// widget: PDFWidgetAnnotation, +// pages: PDFPage[], +// ): PDFRef | undefined { +// const widgetRect = (widget?.dict?.get(PDFName.of("Rect")) as PDFArray) +// ?.asArray(); +// const widgetFT = (widget?.dict?.get(PDFName.of("FT")) as PDFString) +// ?.["value"]; + +// for (const page of pages) { +// const annotsArray = page.node.Annots()?.asArray(); +// if (!annotsArray) continue; + +// for (const annotRef of annotsArray) { +// const annotDict = page.doc.context.lookup(annotRef); +// if (!annotDict) continue; +// if (!(annotDict instanceof PDFDict)) continue; +// const rect = (annotDict.get(PDFName.of("Rect")) as PDFArray)?.asArray(); +// const ft = (annotDict.get(PDFName.of("FT")) as PDFString)?.["value"]; + +// // rudimentary match (you can add more checks like /T, /Subtype, etc.) +// if (rect?.toString() === widgetRect?.toString() && ft === widgetFT) { +// return annotRef as PDFRef; +// } +// } +// } + +// return undefined; +// } + /*** * Evaluates the change string with the match array * @@ -869,12 +501,7 @@ class RenameFields implements ITool { new RegExp(patternRegex), toChange, ) - : moveWidgetToFlatField( - pdf, - field, - field.acroField.getWidgets()[0], - preview, - ); + : applyRename(field, name, patternRegex, toChange); changesMade = true; }, ]; @@ -900,7 +527,7 @@ class RenameFields implements ITool { try { await savePdf(pdf, path || pdfPath); } catch { - log(e); + // log(e); } } else { cliLog("No changes made, skipping", this.block);