pdf-tools/tools/fieldRename.ts
Emma 0f9c377853 change: selects now use inputmanager
fix: bad exit logic
feat: field rename now supports renaming things with multiple widgets
2025-05-27 12:44:45 -06:00

551 lines
16 KiB
TypeScript

import {
type PDFAcroField,
PDFArray,
PDFCheckBox,
type PDFDocument,
type PDFField,
PDFName,
PDFNumber,
PDFRadioGroup,
type PDFRef,
PDFString,
PDFTextField,
type PDFWidgetAnnotation,
} from "pdf-lib";
import { loadPdf, savePdf } from "util/saveLoadPdf.ts";
import { TerminalBlock } from "../cli/TerminalLayout.ts";
import { forceArgs } from "../cli/forceArgs.ts";
import { colorize } from "../cli/style.ts";
import { cliAlert, cliLog, cliPrompt } from "../cli/prompts.ts";
import { multiSelectMenuInteractive } from "../cli/selectMenu.ts";
import type { callback, ITool } from "../types.ts";
import { toCase } from "util/caseManagement.ts";
function applyRename(
field: PDFField,
name: string,
pattern: RegExp,
change: string,
) {
const segments = name.split(".");
const matchingSegments = segments.filter((s) => pattern.test(s));
let cField: PDFAcroField | undefined = field.acroField;
while (cField) {
if (
cField.getPartialName() &&
matchingSegments.includes(cField.getPartialName()!)
) {
const mName = cField.getPartialName()?.replace(pattern, change);
if (mName) {
cField.dict.set(PDFName.of("T"), PDFString.of(mName));
// console.log(cField.getPartialName())
}
}
cField = cField.getParent();
// console.log(cField?.getPartialName())
}
}
// function applyWidgetRename(
// doc: PDFDocument,
// field: PDFField,
// widget: PDFWidgetAnnotation,
// name: string,
// pattern: RegExp,
// change: string,
// ) {
// if (field.acroField.getWidgets().length > 1) {
// const widgets = field.acroField.getWidgets();
// const widgetIndex = widgets.indexOf(widget);
// widgets.splice(widgetIndex, 1);
// const pdfDocContext = doc.context;
// const originalRef = field.acroField.ref;
// const originalFieldDict = pdfDocContext.lookup(originalRef);
// if (!originalFieldDict) return;
// const newFieldDict = pdfDocContext.obj({
// ...originalFieldDict,
// T: PDFString.of(name.replace(pattern, change)),
// Kids: [getWidgetRef(widget, doc.getPages())],
// });
// const newField = pdfDocContext.register(newFieldDict);
// const acroForm = doc.catalog.lookup(PDFName.of("AcroForm"), PDFDict);
// const fields = acroForm.lookup(PDFName.of("Fields"), PDFArray);
// fields.push(newField);
// }
// }
function findPageForWidget(
doc: PDFDocument,
widget: PDFWidgetAnnotation,
) {
const pages = doc.getPages();
for (const page of pages) {
const annots = page.node.Annots();
if (!annots) continue;
const annotRefs = annots.asArray();
for (const ref of annotRefs) {
const annot = doc.context.lookup(ref);
if (annot === widget.dict) {
return page;
}
}
}
return undefined;
}
function detectFieldType(field: PDFField): string | undefined {
const ft = field.acroField.dict.get(PDFName.of("FT"));
return ft instanceof PDFName ? ft.asString() : undefined;
}
function getFlag(field: PDFField, bit: number): boolean {
const ff = field.acroField.dict.get(PDFName.of("Ff"));
return ff instanceof PDFNumber ? (ff.asNumber() & (1 << bit)) !== 0 : false;
}
function getWidgetRef(
widget: PDFWidgetAnnotation,
doc: PDFDocument,
): PDFRef | undefined {
for (const page of doc.getPages()) {
const annots = page.node.Annots()?.asArray() ?? [];
for (const ref of annots) {
const maybeDict = doc.context.lookup(ref);
if (maybeDict === widget.dict) {
return ref as PDFRef;
}
}
}
return undefined;
}
function applyWidgetRename(
doc: PDFDocument,
field: PDFField,
widget: PDFWidgetAnnotation,
newName: string,
pattern: RegExp,
change: string,
) {
try {
const form = doc.getForm();
const widgets = field.acroField.getWidgets();
if (widgets.length <= 1) return;
const widgetDict = widget.dict;
const widgetIndex = widgets.findIndex((w) => w.dict === widgetDict);
if (widgetIndex === -1) return;
widgets.splice(widgetIndex, 1);
const kids = field.acroField.dict.lookup(PDFName.of("Kids"), PDFArray);
if (kids) {
const updatedKids = kids.asArray().filter((ref) => {
const maybeDict = doc.context.lookup(ref);
return maybeDict !== widget.dict;
});
field.acroField.dict.set(
PDFName.of("Kids"),
doc.context.obj(updatedKids),
);
}
const page = findPageForWidget(doc, widget);
if (!page) throw new Error("Widget page not found");
const rect = widget.getRectangle();
if (!rect) throw new Error("Widget has no rectangle");
const finalName = newName.replace(pattern, change);
// Try to get existing field with the new name
let targetField: PDFField | undefined;
try {
targetField = form.getField(finalName);
} catch {
// Field doesn't exist — that's fine
}
// Compare field types if field exists
if (targetField) {
const sourceType = detectFieldType(field);
const targetType = detectFieldType(targetField);
if (sourceType !== targetType) {
throw new Error(
`Field "${finalName}" already exists with a different type (${targetType} vs ${sourceType})`,
);
}
// ✅ Same type — attach widget to the existing field
// const targetFieldWidgets = targetField.acroField.getWidgets();
const targetKidsArray = targetField.acroField.dict.lookup(
PDFName.of("Kids"),
PDFArray,
);
// Set /Parent on the widget to point to the existing field
widget.dict.set(PDFName.of("Parent"), targetField.acroField.ref);
// Add the widget to the field's /Kids array
const widgetRef = getWidgetRef(widget, doc);
if (!widgetRef) throw new Error("Widget ref not found");
if (targetKidsArray) {
targetKidsArray.push(widgetRef);
} else {
targetField.acroField.dict.set(
PDFName.of("Kids"),
doc.context.obj([widgetRef]),
);
}
// Also ensure widget is attached to a page
const page = findPageForWidget(doc, widget);
if (!page) throw new Error("Widget's page not found");
const pageAnnots = page.node.Annots();
const refs = pageAnnots?.asArray() ?? [];
if (!refs.includes(widgetRef)) {
refs.push(widgetRef);
page.node.set(PDFName.of("Annots"), doc.context.obj(refs));
}
return; // Done
}
removeWidgetFromPage(widget, doc);
const fieldType = detectFieldType(field);
let newField: PDFField;
switch (fieldType) {
case "/Tx": {
const tf = form.createTextField(finalName);
if (field instanceof PDFTextField) {
const val = field.getText();
if (val) tf.setText(val);
}
newField = tf;
break;
}
case "/Btn": {
const isRadio = getFlag(field, 15);
if (isRadio) {
const rf = form.createRadioGroup(finalName);
rf.addOptionToPage(finalName, page, {
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
});
if (field instanceof PDFRadioGroup) {
const selected = field.getSelected();
if (selected) rf.select(selected);
}
return;
} else {
const cb = form.createCheckBox(finalName);
cb.addToPage(page, {
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
});
if (field instanceof PDFCheckBox && field.isChecked()) {
cb.check();
}
return;
}
}
default:
throw new Error(`Unsupported field type: ${fieldType}`);
}
// Attach the new field to the page if necessary
if (
newField instanceof PDFTextField ||
newField instanceof PDFCheckBox
) {
newField.addToPage(page, {
x: rect.x,
y: rect.y,
width: rect.width,
height: rect.height,
});
}
} catch {
// log(e);
}
}
function removeWidgetFromPage(widget: PDFWidgetAnnotation, doc: PDFDocument) {
const pages = doc.getPages();
for (const page of pages) {
const annotsArray = page.node.Annots();
if (!annotsArray) continue;
const refs = annotsArray.asArray();
const newRefs = refs.filter((ref) => {
const maybeDict = doc.context.lookup(ref);
return maybeDict !== widget.dict;
});
// Replace /Annots with updated array
if (newRefs.length === refs.length) continue;
page.node.set(PDFName.of("Annots"), doc.context.obj(newRefs));
}
}
// function getWidgetRef(
// widget: PDFWidgetAnnotation,
// pages: PDFPage[],
// ): PDFRef | undefined {
// const widgetRect = (widget?.dict?.get(PDFName.of("Rect")) as PDFArray)
// ?.asArray();
// const widgetFT = (widget?.dict?.get(PDFName.of("FT")) as PDFString)
// ?.["value"];
// for (const page of pages) {
// const annotsArray = page.node.Annots()?.asArray();
// if (!annotsArray) continue;
// for (const annotRef of annotsArray) {
// const annotDict = page.doc.context.lookup(annotRef);
// if (!annotDict) continue;
// if (!(annotDict instanceof PDFDict)) continue;
// const rect = (annotDict.get(PDFName.of("Rect")) as PDFArray)?.asArray();
// const ft = (annotDict.get(PDFName.of("FT")) as PDFString)?.["value"];
// // rudimentary match (you can add more checks like /T, /Subtype, etc.)
// if (rect?.toString() === widgetRect?.toString() && ft === widgetFT) {
// return annotRef as PDFRef;
// }
// }
// }
// return undefined;
// }
/***
* Evaluates the change string with the match array
*
* @description The change string can include the following variables:
*
* - $<int> - capture groups, indexed from 1
* - $<int>i - capture groups, indexed from 1, transforming an integer to an index
* - $<int>s - capture groups, indexed from 1, transforming a string to snake case
* - $<int>c - capture groups, indexed from 1, transforming a string to camel case
* - $<int>l - capture groups, indexed from 1, transforming a string to lower case
* - $<int>u - capture groups, indexed from 1, transforming a string to upper case
* - $<int>t - capture groups, indexed from 1, transforming a string to title case
*/
function evaluateChange(change: string, match: RegExpExecArray, index: number) {
return change.replace(
/\$(\d+)([icslut]?)/g,
(_, i, indexed) => {
switch (indexed) {
case "i":
return (parseInt(match[i])
? (parseInt(match[i]) - 1).toString()
: match[i]);
case "s":
return toCase(match[i], "snake");
case "c":
return toCase(match[i], "camel");
case "t":
return toCase(match[i], "title");
case "l":
return match[i].toLowerCase();
case "u":
return match[i].toUpperCase();
default:
return match[i];
}
},
)
.replace(
/\$I{((\w+,?)+)}/,
(_, offset) => {
const options = offset.split(",");
return options[index % options.length];
},
)
.replace(
/\$I(-?\d+)?/,
(_, offset) =>
(parseInt(offset) ? index + parseInt(offset) : index).toString(),
);
}
class RenameFields implements ITool {
name = "renamefields";
description = "Renames fields in a PDF form";
block: TerminalBlock | undefined;
setBlock(block: TerminalBlock) {
this.block = block;
}
async help(standalone = false) {
await cliAlert(
"Usage: rename-fields <pdfPath> <pattern> <change>\n",
standalone ? undefined : this.block,
);
}
async run(pdfPath: string = "", pattern: string = "", change: string = "") {
if (!this.block) {
this.block = new TerminalBlock();
}
this.block.setPreserveHistory(true);
[pdfPath, pattern, change] = await forceArgs(
[pdfPath, pattern, change],
[
[
"Please provide path to PDF (comma separated for multiple):",
(p) => !!p && p.endsWith(".pdf"),
],
"Please provide search string:",
"Please provide requested change:",
],
this.block,
);
const paths = pdfPath.split(",");
for (const pdfPath of paths) {
const patternRegex = new RegExp(pattern);
const pdf = await loadPdf(pdfPath);
const form = pdf.getForm();
const fields = form.getFields().sort((a, b) => {
const aWidgets = a.acroField.getWidgets();
const bWidgets = b.acroField.getWidgets();
const aWidget = aWidgets[0];
const bWidget = bWidgets[0];
const aPage = a.doc.findPageForAnnotationRef(a.acroField.ref);
const bPage = b.doc.findPageForAnnotationRef(b.acroField.ref);
if (aPage && bPage && aPage !== bPage) {
const pages = a.doc.getPages();
const aPageIndex = pages.indexOf(aPage);
const bPageIndex = pages.indexOf(bPage);
if (aPageIndex !== bPageIndex) return aPageIndex - bPageIndex;
}
const aRect = aWidget.Rect()?.asRectangle();
const bRect = bWidget.Rect()?.asRectangle();
if (aRect && bRect) {
const dy = bRect.y - aRect.y;
if (Math.abs(dy) > 5) return dy;
return aRect.x - bRect.x;
}
return a.getName().localeCompare(b.getName());
});
let badFields = 0;
for (const field of fields) {
if (field.acroField.getWidgets().length > 1) {
badFields++;
}
}
badFields && await cliLog(
colorize(
`Warning, ${badFields} fields with shared widgets found`,
"yellow",
),
this.block,
);
const foundUpdates: [string, callback][] = [];
let changesMade = false;
let i = 0;
for (const field of fields) {
const name = field.getName();
const match = patternRegex.exec(name);
if (match) {
foundUpdates.push(
...field.acroField.getWidgets()?.map<[string, callback]>((
widget,
) => {
const toChange = evaluateChange(change, match, i);
const preview = name.replace(
new RegExp(patternRegex),
toChange,
);
i++;
return [
`${colorize(name, "red")} -> ${colorize(preview, "green")}`,
() => {
field.acroField.getWidgets().length > 1
? applyWidgetRename(
pdf,
field,
widget,
name,
new RegExp(patternRegex),
toChange,
)
: applyRename(field, name, patternRegex, toChange);
changesMade = true;
},
];
}),
);
}
}
if (foundUpdates.length) {
await cliLog("Found updates:", this.block);
await multiSelectMenuInteractive(
"Please select an option to apply",
foundUpdates,
{ terminalBlock: this.block },
);
}
if (changesMade) {
const path = await cliPrompt(
"Save to path (or hit enter to keep current):",
this.block,
);
try {
await savePdf(pdf, path || pdfPath);
} catch {
// log(e);
}
} else {
cliLog("No changes made, skipping", this.block);
}
}
}
}
export default new RenameFields();
// if (import.meta.main) {
// // await call(renameFields)
// // while (!path || !path.endsWith('.pdf')) path = prompt("Please provide path to PDF:") || '';
// // while (!pattern) pattern = prompt("Please provide search string:") || '';
// // while (!change) change = prompt("Please provide requested change:") || '';
// await callWithArgPrompt(renameFields, [
// ["Please provide path to PDF:", (p) => !!p && p.endsWith(".pdf")],
// "Please provide search string:",
// "Please provide requested change:",
// ]);
// }