pdf-tools/tools/fieldRename.ts

924 lines
26 KiB
TypeScript

import {
PDFAcroField,
PDFAcroTerminal,
PDFArray,
PDFCheckBox,
PDFContext,
PDFDict,
type PDFDocument,
type PDFField,
PDFHexString,
PDFName,
PDFNumber,
type PDFObject,
PDFRadioGroup,
PDFRef,
PDFString,
PDFTextField,
type PDFWidgetAnnotation,
} from "pdf-lib";
import { loadPdf, savePdf } from "util/saveLoadPdf.ts";
import { TerminalBlock } from "../cli/TerminalLayout.ts";
import { forceArgs } from "../cli/forceArgs.ts";
import { colorize } from "../cli/style.ts";
import { cliAlert, cliLog, cliPrompt } from "../cli/prompts.ts";
import { multiSelectMenuInteractive } from "../cli/selectMenu.ts";
import type { callback, ITool } from "../types.ts";
import { toCase } from "util/caseManagement.ts";
import { log } from "util/logfile.ts";
function removeWidgetFromOldField(
doc: PDFDocument,
field: PDFField,
widget: PDFWidgetAnnotation,
) {
const maybeKids = field.acroField.dict.get(PDFName.of("Kids"));
if (!maybeKids || !(maybeKids instanceof PDFArray)) return;
const kids = maybeKids;
if (!kids) return;
const widgetRef = getWidgetRef(widget, doc);
if (!widgetRef) return;
const updatedKids = kids.asArray().filter((ref) => {
const dict = doc.context.lookup(ref);
return dict !== widget.dict;
});
if (updatedKids.length === 0) {
// Field is now empty, remove it from the AcroForm
const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict);
const fields = acroForm.lookup(PDFName.of("Fields"), PDFArray);
const fieldRef = field.acroField.ref;
const newFields = fields.asArray().filter((ref) => ref !== fieldRef);
acroForm.set(PDFName.of("Fields"), doc.context.obj(newFields));
} else {
field.acroField.dict.set(PDFName.of("Kids"), doc.context.obj(updatedKids));
}
}
function moveWidgetToFlatField(
doc: PDFDocument,
field: PDFField,
widget: PDFWidgetAnnotation,
newName: string,
) {
const form = doc.getForm();
const page = findPageForWidget(doc, widget);
if (!page) throw new Error("Widget's page not found");
const rect = widget.getRectangle();
if (!rect) throw new Error("Widget has no rectangle");
const fieldType = detectFieldType(field);
const widgetRef = getWidgetRef(widget, doc);
if (!widgetRef) throw new Error("Widget ref not found");
// 🔒 Extract value + style before any destructive ops
let value: string | undefined;
try {
if (fieldType === "/Tx" && field instanceof PDFTextField) {
value = field.getText();
}
} catch (_) {
log("Failed to extract value from field");
}
const sourceFieldDict = field.acroField.dict;
const sourceWidgetDict = widget.dict;
// 🔥 Remove widget from page + field
removeWidgetFromPage(widget, doc);
removeWidgetCompletely(doc, widget, field);
// 🔥 Carefully remove field + parents
try {
fullyDeleteFieldHierarchy(doc, field);
} catch (_) {
// fallback
log("Failed to remove field hierarchy");
removeFieldIfEmpty(doc, field);
}
sanitizeFieldsTree(doc);
removeDanglingParents(doc);
removeEmptyAncestors(doc, field);
// 🔁 Create replacement field
let newField: PDFField;
switch (fieldType) {
case "/Tx": {
const tf = form.createTextField(newName);
if (value) tf.setText(value);
tf.addToPage(page, rect);
newField = tf;
break;
}
case "/Btn": {
const isRadio = getFlag(field, 15);
if (isRadio) {
const rg = form.createRadioGroup(newName);
rg.addOptionToPage(newName, page, rect);
return;
} else {
const cb = form.createCheckBox(newName);
cb.addToPage(page, rect);
if (field instanceof PDFCheckBox && field.isChecked()) {
cb.check();
}
return;
}
}
case "/Ch": {
const ff = sourceFieldDict.get(PDFName.of("Ff"));
const isCombo = ff instanceof PDFNumber &&
((ff.asNumber() & (1 << 17)) !== 0);
const opts = sourceFieldDict.lookupMaybe(PDFName.of("Opt"), PDFArray);
const values =
opts?.asArray().map((opt) =>
opt instanceof PDFString || opt instanceof PDFHexString
? opt.decodeText()
: ""
) ?? [];
if (isCombo) {
const dd = form.createDropdown(newName);
dd.addOptions(values);
dd.addToPage(page, rect);
newField = dd;
} else {
const ol = form.createOptionList(newName);
ol.addOptions(values);
ol.addToPage(page, rect);
newField = ol;
}
break;
}
default:
throw new Error(`Unsupported field type: ${fieldType}`);
}
// 🔧 Apply styles *after creation*
const targetWidgetDict = newField.acroField.getWidgets()[0].dict;
copyFieldAndWidgetStyles(
sourceFieldDict,
sourceWidgetDict,
newField.acroField.dict,
targetWidgetDict,
);
}
function removeDanglingParents(doc: PDFDocument) {
const context = doc.context;
const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict);
const fields = acroForm.lookupMaybe(PDFName.of("Fields"), PDFArray);
if (!(fields instanceof PDFArray)) return;
function fixFieldDict(dict: PDFDict) {
const parentRef = dict.get(PDFName.of("Parent"));
if (!parentRef || !(parentRef instanceof PDFRef)) return;
try {
const parentDict = context.lookup(parentRef, PDFDict);
if (!parentDict) throw new Error("Missing parent");
} catch {
// Parent is broken — remove reference
dict.delete(PDFName.of("Parent"));
log("Broken parent reference removed");
}
}
const visited = new Set<string>();
function recurseKids(dict: PDFDict) {
const kids = dict.lookupMaybe(PDFName.of("Kids"), PDFArray);
if (!(kids instanceof PDFArray)) return;
for (const kidRef of kids.asArray()) {
if (!(kidRef instanceof PDFRef)) continue;
const key = kidRef.toString();
if (visited.has(key)) continue;
visited.add(key);
try {
const kidDict = context.lookup(kidRef, PDFDict);
fixFieldDict(kidDict);
recurseKids(kidDict);
} catch (e) {
context.delete(kidRef); // nuke broken reference
log("Broken kid reference removed");
log(e);
}
}
}
for (const ref of fields.asArray()) {
if (!(ref instanceof PDFRef)) continue;
try {
const dict = context.lookup(ref, PDFDict);
fixFieldDict(dict);
recurseKids(dict);
} catch {
context.delete(ref); // broken root
log("Broken root reference removed");
}
}
}
function removeFieldByName(doc: PDFDocument, fieldName: string) {
const form = doc.getForm();
const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict);
const fields = acroForm.lookup(PDFName.of("Fields"), PDFArray);
const context = doc.context;
const remainingFields = fields.asArray().filter((ref) => {
const dict = context.lookup(ref, PDFDict);
const name = dict?.get(PDFName.of("T"));
if (name && (name.decodeText?.() === fieldName)) {
context.delete(ref as PDFRef);
return false;
}
return true;
});
acroForm.set(PDFName.of("Fields"), context.obj(remainingFields));
}
function sanitizeFieldsTree(doc: PDFDocument) {
const context = doc.context;
const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict);
const fields = acroForm.lookupMaybe(PDFName.of("Fields"), PDFArray);
if (!(fields instanceof PDFArray)) return;
function pruneInvalidKids(dict: PDFDict, context: PDFContext) {
const kids = dict.lookupMaybe(PDFName.of("Kids"), PDFArray);
if (!(kids instanceof PDFArray)) return;
const validKids: PDFRef[] = [];
for (const ref of kids.asArray()) {
// 💥 Defensive: skip anything that's not a real PDFRef
if (!ref || !(ref instanceof PDFRef)) continue;
let child: PDFDict | undefined;
try {
child = context.lookup(ref, PDFDict);
} catch (e) {
context.delete(ref);
log("Broken kid reference removed");
log(e);
continue;
}
if (!child) {
context.delete(ref);
continue;
}
const t = child.get(PDFName.of("T"));
if (!(t instanceof PDFString || t instanceof PDFHexString)) {
context.delete(ref);
continue;
}
// Recurse, but protect inner layers too
pruneInvalidKids(child, context);
validKids.push(ref);
}
if (validKids.length > 0) {
dict.set(PDFName.of("Kids"), context.obj(validKids));
} else {
dict.delete(PDFName.of("Kids"));
}
}
const validFields: PDFRef[] = [];
for (const ref of fields.asArray()) {
if (!ref || !(ref instanceof PDFRef)) continue;
let dict: PDFDict | undefined;
try {
dict = context.lookup(ref, PDFDict);
} catch {
context.delete(ref);
log("Broken field reference removed");
continue;
}
if (!dict) {
context.delete(ref);
continue;
}
const t = dict.get(PDFName.of("T"));
if (!(t instanceof PDFString || t instanceof PDFHexString)) {
context.delete(ref);
continue;
}
pruneInvalidKids(dict, context);
validFields.push(ref);
}
acroForm.set(PDFName.of("Fields"), context.obj(validFields));
}
function fullyDeleteFieldHierarchy(doc: PDFDocument, rootField: PDFField) {
const context = doc.context;
const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict);
const fields = acroForm.lookup(PDFName.of("Fields"), PDFArray);
function recurseDelete(dict: PDFDict, ref: PDFRef) {
const kids = dict.lookupMaybe(PDFName.of("Kids"), PDFArray);
if (kids instanceof PDFArray) {
for (const kidRef of kids.asArray()) {
const kidDict = context.lookup(kidRef, PDFDict);
if (kidDict) {
recurseDelete(kidDict, kidRef as PDFRef);
}
}
}
context.delete(ref);
}
recurseDelete(rootField.acroField.dict, rootField.acroField.ref);
// Remove root from AcroForm.Fields
const newFields = fields
.asArray()
.filter((ref) => ref !== rootField.acroField.ref);
acroForm.set(PDFName.of("Fields"), context.obj(newFields));
}
function removeEmptyAncestors(doc: PDFDocument, field: PDFField) {
let current: PDFAcroField | undefined = field.acroField;
const context = doc.context;
while (current) {
const parent = current.getParent();
const kids = parent?.dict.lookupMaybe(PDFName.of("Kids"), PDFArray);
if (kids instanceof PDFArray) {
const remaining = kids.asArray().filter((ref) => {
try {
const kidDict = context.lookup(ref, PDFDict);
return kidDict !== current?.dict;
} catch (e) {
log("Broken kid reference removed");
log(e);
return false;
}
});
if (remaining.length > 0) {
parent.dict.set(PDFName.of("Kids"), context.obj(remaining));
break;
} else {
parent.dict.delete(PDFName.of("Kids"));
}
}
context.delete(current.ref);
current = parent;
}
}
function removeWidgetCompletely(
doc: PDFDocument,
widget: PDFWidgetAnnotation,
field: PDFField,
) {
const widgetRef = getWidgetRef(widget, doc);
if (!widgetRef) return;
// 1. Remove from field's /Kids array
const kidsRaw = field.acroField.dict.get(PDFName.of("Kids"));
if (kidsRaw instanceof PDFArray) {
const updatedKids = kidsRaw.asArray().filter((ref) => {
const dict = doc.context.lookup(ref);
return dict !== widget.dict;
});
if (updatedKids.length > 0) {
field.acroField.dict.set(
PDFName.of("Kids"),
doc.context.obj(updatedKids),
);
} else {
field.acroField.dict.delete(PDFName.of("Kids"));
}
}
// 2. Remove from page /Annots
for (const page of doc.getPages()) {
const annotsRaw = page.node.Annots()?.asArray();
if (!annotsRaw) continue;
const remainingAnnots = annotsRaw.filter((ref) => {
const dict = doc.context.lookup(ref);
return dict !== widget.dict;
});
page.node.set(PDFName.of("Annots"), doc.context.obj(remainingAnnots));
}
// Optional: delete the widget from the context
doc.context.delete(widgetRef);
}
function removeFieldIfEmpty(doc: PDFDocument, field: PDFField) {
const kids = field.acroField.getWidgets();
if (kids.length > 0) return;
const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict);
const fieldsArray = acroForm.lookup(PDFName.of("Fields"), PDFArray);
const ref = field.acroField.ref;
const updatedFields = fieldsArray.asArray().filter((f) => f !== ref);
acroForm.set(PDFName.of("Fields"), doc.context.obj(updatedFields));
// Optional: remove field object entirely
doc.context.delete(ref);
}
function copyFieldAndWidgetStyles(
sourceFieldDict: PDFDict,
sourceWidgetDict: PDFDict,
targetFieldDict: PDFDict,
targetWidgetDict: PDFDict,
) {
const fieldKeys = ["DA", "DR", "Q"];
const widgetKeys = ["MK", "BS", "Border"];
// Copy from field dict → field dict
for (const key of fieldKeys) {
const val = sourceFieldDict.get(PDFName.of(key));
if (val) {
targetFieldDict.set(PDFName.of(key), val);
}
}
// Copy from widget dict → widget dict
for (const key of widgetKeys) {
const val = sourceWidgetDict.get(PDFName.of(key));
if (val) {
targetWidgetDict.set(PDFName.of(key), val);
}
}
}
function findPageForWidget(
doc: PDFDocument,
widget: PDFWidgetAnnotation,
) {
const pages = doc.getPages();
for (const page of pages) {
const annots = page.node.Annots();
if (!annots) continue;
const annotRefs = annots.asArray();
for (const ref of annotRefs) {
const annot = doc.context.lookup(ref);
if (annot === widget.dict) {
return page;
}
}
}
return undefined;
}
function detectFieldType(field: PDFField): string | undefined {
const ft = field.acroField.dict.get(PDFName.of("FT"));
return ft instanceof PDFName ? ft.asString() : undefined;
}
function getFlag(field: PDFField, bit: number): boolean {
const ff = field.acroField.dict.get(PDFName.of("Ff"));
return ff instanceof PDFNumber ? (ff.asNumber() & (1 << bit)) !== 0 : false;
}
function getWidgetRef(
widget: PDFWidgetAnnotation,
doc: PDFDocument,
): PDFRef | undefined {
for (const page of doc.getPages()) {
const annots = page.node.Annots()?.asArray() ?? [];
for (const ref of annots) {
const maybeDict = doc.context.lookup(ref);
if (maybeDict === widget.dict) {
return ref as PDFRef;
}
}
}
return undefined;
}
function applyWidgetRename(
doc: PDFDocument,
field: PDFField,
widget: PDFWidgetAnnotation,
newName: string,
pattern: RegExp,
change: string,
) {
try {
const form = doc.getForm();
const widgets = field.acroField.getWidgets();
const widgetDict = widget.dict;
const widgetIndex = widgets.findIndex((w) => w.dict === widgetDict);
if (widgetIndex === -1) return;
const widgetRef = getWidgetRef(widget, doc);
if (!widgetRef) return;
// Remove widget from internal widgets list
widgets.splice(widgetIndex, 1);
// Remove from /Kids
const maybeKids = field.acroField.dict.get(PDFName.of("Kids"));
if (maybeKids instanceof PDFArray) {
const updatedKids = maybeKids.asArray().filter((ref) => {
const maybeDict = doc.context.lookup(ref);
return maybeDict !== widgetDict;
});
field.acroField.dict.set(
PDFName.of("Kids"),
doc.context.obj(updatedKids),
);
}
const page = findPageForWidget(doc, widget);
if (!page) throw new Error("Widget's page not found");
const rect = widget.getRectangle();
if (!rect) throw new Error("Widget has no rectangle");
const finalName = newName.replace(pattern, change);
const fieldType = detectFieldType(field);
// Attempt to find an existing field with the new name
let targetField: PDFField | undefined;
try {
targetField = form.getField(finalName);
} catch {
//
log("Failed to find existing field");
}
if (targetField) {
const sourceType = detectFieldType(field);
const targetType = detectFieldType(targetField);
if (sourceType !== targetType) {
throw new Error(
`Field "${finalName}" already exists with a different type (${targetType} vs ${sourceType})`,
);
}
// Add widget to existing field
widget.dict.set(PDFName.of("Parent"), targetField.acroField.ref);
const kids = targetField.acroField.dict.lookup(
PDFName.of("Kids"),
PDFArray,
);
if (kids) {
kids.push(widgetRef);
} else {
targetField.acroField.dict.set(
PDFName.of("Kids"),
doc.context.obj([widgetRef]),
);
}
const annots = page.node.Annots()?.asArray() ?? [];
if (!annots.includes(widgetRef)) {
annots.push(widgetRef);
page.node.set(PDFName.of("Annots"), doc.context.obj(annots));
}
removeWidgetFromPage(widget, doc);
removeWidgetCompletely(doc, widget, field);
removeFieldIfEmpty(doc, field);
return;
}
// No existing field — create new one and move widget
removeWidgetFromPage(widget, doc);
removeWidgetCompletely(doc, widget, field);
removeFieldIfEmpty(doc, field);
let newField: PDFField;
switch (fieldType) {
case "/Tx": {
const tf = form.createTextField(finalName);
if (field instanceof PDFTextField) {
const val = field.getText();
if (val) tf.setText(val);
}
tf.addToPage(page, {
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
});
newField = tf;
break;
}
case "/Btn": {
const isRadio = getFlag(field, 15);
if (isRadio) {
const radio = form.createRadioGroup(finalName);
radio.addOptionToPage(finalName, page, {
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
});
if (field instanceof PDFRadioGroup) {
const selected = field.getSelected();
if (selected) radio.select(selected);
}
return;
} else {
const cb = form.createCheckBox(finalName);
cb.addToPage(page, {
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
});
if (field instanceof PDFCheckBox && field.isChecked()) {
cb.check();
}
return;
}
}
default:
throw new Error(`Unsupported field type: ${fieldType}`);
}
// Apply styles from old field/widget after creation
copyFieldAndWidgetStyles(
field.acroField.dict,
widget.dict,
newField.acroField.dict,
newField.acroField.getWidgets()[0].dict,
);
} catch (e) {
log("applyWidgetRename error:", e);
}
}
function removeWidgetFromPage(widget: PDFWidgetAnnotation, doc: PDFDocument) {
const pages = doc.getPages();
for (const page of pages) {
const annotsArray = page.node.Annots();
if (!annotsArray) continue;
const refs = annotsArray.asArray();
const newRefs = refs.filter((ref) => {
const maybeDict = doc.context.lookup(ref);
return maybeDict !== widget.dict;
});
// Replace /Annots with updated array
if (newRefs.length === refs.length) continue;
page.node.set(PDFName.of("Annots"), doc.context.obj(newRefs));
}
}
/***
* Evaluates the change string with the match array
*
* @description The change string can include the following variables:
*
* - $<int> - capture groups, indexed from 1
* - $<int>i - capture groups, indexed from 1, transforming an integer to an index
* - $<int>s - capture groups, indexed from 1, transforming a string to snake case
* - $<int>c - capture groups, indexed from 1, transforming a string to camel case
* - $<int>l - capture groups, indexed from 1, transforming a string to lower case
* - $<int>u - capture groups, indexed from 1, transforming a string to upper case
* - $<int>t - capture groups, indexed from 1, transforming a string to title case
*/
function evaluateChange(change: string, match: RegExpExecArray, index: number) {
return change.replace(
/\$(\d+)([icslut]?)/g,
(_, i, indexed) => {
switch (indexed) {
case "i":
return (parseInt(match[i])
? (parseInt(match[i]) - 1).toString()
: match[i]);
case "s":
return toCase(match[i], "snake");
case "c":
return toCase(match[i], "camel");
case "t":
return toCase(match[i], "title");
case "l":
return match[i].toLowerCase();
case "u":
return match[i].toUpperCase();
default:
return match[i];
}
},
)
.replace(
/\$I{((\w+,?)+)}/,
(_, offset) => {
const options = offset.split(",");
return options[index % options.length];
},
)
.replace(
/\$I(-?\d+)?/,
(_, offset) =>
(parseInt(offset) ? index + parseInt(offset) : index).toString(),
);
}
class RenameFields implements ITool {
name = "renamefields";
description = "Renames fields in a PDF form";
block: TerminalBlock | undefined;
setBlock(block: TerminalBlock) {
this.block = block;
}
async help(standalone = false) {
await cliAlert(
"Usage: rename-fields <pdfPath> <pattern> <change>\n",
standalone ? undefined : this.block,
);
}
async run(pdfPath: string = "", pattern: string = "", change: string = "") {
if (!this.block) {
this.block = new TerminalBlock();
}
this.block.setPreserveHistory(true);
[pdfPath, pattern, change] = await forceArgs(
[pdfPath, pattern, change],
[
[
"Please provide path to PDF (comma separated for multiple):",
(p) => !!p && p.endsWith(".pdf"),
],
"Please provide search string:",
"Please provide requested change:",
],
this.block,
);
const paths = pdfPath.split(",");
for (const pdfPath of paths) {
const patternRegex = new RegExp(pattern);
const pdf = await loadPdf(pdfPath);
const form = pdf.getForm();
const fields = form.getFields().sort((a, b) => {
const aWidgets = a.acroField.getWidgets();
const bWidgets = b.acroField.getWidgets();
const aWidget = aWidgets[0];
const bWidget = bWidgets[0];
const aPage = a.doc.findPageForAnnotationRef(a.acroField.ref);
const bPage = b.doc.findPageForAnnotationRef(b.acroField.ref);
if (aPage && bPage && aPage !== bPage) {
const pages = a.doc.getPages();
const aPageIndex = pages.indexOf(aPage);
const bPageIndex = pages.indexOf(bPage);
if (aPageIndex !== bPageIndex) return aPageIndex - bPageIndex;
}
const aRect = aWidget.Rect()?.asRectangle();
const bRect = bWidget.Rect()?.asRectangle();
if (aRect && bRect) {
const dy = bRect.y - aRect.y;
if (Math.abs(dy) > 5) return dy;
return aRect.x - bRect.x;
}
return a.getName().localeCompare(b.getName());
});
let badFields = 0;
for (const field of fields) {
if (field.acroField.getWidgets().length > 1) {
badFields++;
}
}
badFields && await cliLog(
colorize(
`Warning, ${badFields} fields with shared widgets found`,
"yellow",
),
this.block,
);
const foundUpdates: [string, callback][] = [];
let changesMade = false;
let i = 0;
for (const field of fields) {
const name = field.getName();
const match = patternRegex.exec(name);
if (match) {
foundUpdates.push(
...field.acroField.getWidgets()?.map<[string, callback]>((
widget,
) => {
const toChange = evaluateChange(change, match, i);
const preview = name.replace(
new RegExp(patternRegex),
toChange,
);
i++;
return [
`${colorize(name, "red")} -> ${colorize(preview, "green")}`,
() => {
field.acroField.getWidgets().length > 1
? applyWidgetRename(
pdf,
field,
widget,
name,
new RegExp(patternRegex),
toChange,
)
: moveWidgetToFlatField(
pdf,
field,
field.acroField.getWidgets()[0],
preview,
);
changesMade = true;
},
];
}),
);
}
}
if (foundUpdates.length) {
await cliLog("Found updates:", this.block);
await multiSelectMenuInteractive(
"Please select an option to apply",
foundUpdates,
{ terminalBlock: this.block },
);
}
if (changesMade) {
const path = await cliPrompt(
"Save to path (or hit enter to keep current):",
this.block,
);
try {
await savePdf(pdf, path || pdfPath);
} catch {
log(e);
}
} else {
cliLog("No changes made, skipping", this.block);
}
}
}
}
export default new RenameFields();
// if (import.meta.main) {
// // await call(renameFields)
// // while (!path || !path.endsWith('.pdf')) path = prompt("Please provide path to PDF:") || '';
// // while (!pattern) pattern = prompt("Please provide search string:") || '';
// // while (!change) change = prompt("Please provide requested change:") || '';
// await callWithArgPrompt(renameFields, [
// ["Please provide path to PDF:", (p) => !!p && p.endsWith(".pdf")],
// "Please provide search string:",
// "Please provide requested change:",
// ]);
// }