diff --git a/README.md b/README.md index b7266a2..f77fd45 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,8 @@ Optional options: - `--avoid-overwrite`: Avoid overwriting if EXIF tags already exist in the file. - `--ext `: File extensions to watch. Only files with this extensions will be processed. - `--concurrency `: The numbers of files to process concurrently in watch mode. -- `--face-group-ids ` List of face group IDs to use for face recognition. +- `--face-group-ids `: List of face group IDs to use for face recognition. +- `--repeat`: The number of times to repeat the task if the AI-generated result is deemed unacceptable. This parameter helps ensure the quality of the output by allowing multiple attempts. Default value is 0. An AI-generated description is considered acceptable if it has more than 10 characters and is not in markdown format. AI-generated tags are considered acceptable if there are more than 1 tag and they are not in markdown format. Using this parameter will consume more tokens, which may incur additional costs. Use it at your own risk. Example usage: @@ -91,6 +92,7 @@ const options = { avoidOverwrite: false, // Avoid overwriting if EXIF tags already exist in the file doNotEndExifTool: false, // Do not end ExifTool process after writing metadata faceGroupIds: [], // List of face group IDs to use for face recognition + repeat: 0, // The number of times to repeat the task if the AI-generated result is deemed unacceptable }; execute(options) diff --git a/README.zh-CN.md b/README.zh-CN.md index 6745233..482257c 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -52,6 +52,7 @@ exif-ai -i example.jpeg -a ollama - `--ext `: 指定要监视的文件扩展名,只有符合这些扩展名的文件会被处理。 - `--concurrency `: 在监视模式下,同时处理的文件数量上限。 - `--face-group-ids `: 指定用于面部识别的面部组ID列表。 +- `--repeat `: 如果AI生成结果被认为不可接受时,重复执行任务的次数。此参数通过允许多次尝试来确保输出质量。默认值为0。如果AI生成的描述超过10个字符且不是Markdown格式,则被视为可接受。AI生成的标签如果超过1个且不是Markdown格式,则被视为可接受。使用此参数将消耗更多令牌,可能会产生额外费用。使用时请自行承担风险。 示例用法: diff --git a/package.json b/package.json index 6fddd47..47bad32 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "exif-ai", - "version": "3.2.1", + "version": "3.2.2", "description": "A Node.js CLI and library that uses Ollama, ZhipuAI, Google Gemini, Coze or OpenAI to intelligently write image description and/or tags to exif metadata by it's content.", "homepage": "https://github.com/tychenjiajun/exif-ai", "repository": { diff --git a/src/exif-ai.ts b/src/exif-ai.ts index 6b9400b..2da534f 100644 --- a/src/exif-ai.ts +++ b/src/exif-ai.ts @@ -31,7 +31,7 @@ async function findFilesRecursive( } const program = new Command(); program - .version("3.2.1") + .version("3.2.2") .description(getText("description") ?? "") .requiredOption("-a, --api-provider ", getText("api-provider")) .option("-T, --tasks ", getText("tasks")) @@ -50,6 +50,7 @@ program .option("--ext ", getText("ext")) .option("--concurrency ", getText("concurrency")) .option("--face-group-ids ", getText("face-group-ids")) + .option("--repeat ", getText("repeat")) .parse(); const options = program.opts(); @@ -78,6 +79,7 @@ async function handleExecution(path: string) { avoidOverwrite: options.avoidOverwrite, doNotEndExifTool: Boolean(watchMode), faceGroupIds: options.faceGroupIds, + repeat: options.repeat, }); } catch (error) { console.error(`Error processing file ${path}:`, error); diff --git a/src/fluent/index.ts b/src/fluent/index.ts index d503c0a..2e34aa8 100644 --- a/src/fluent/index.ts +++ b/src/fluent/index.ts @@ -26,6 +26,9 @@ verbose = Enable verbose output for debugging. tasks = List of tasks to perform ('description', 'tag', 'face'). concurrency = The numbers of files to process concurrently in watch mode. face-group-ids = List of face group IDs to use for face recognition. +repeat = The number of times to repeat the task if the AI-generated result is deemed unacceptable. This parameter helps ensure the quality of the output by allowing multiple attempts. Default value is 0. An AI-generated description is considered acceptable if it has more than 10 characters and is not in markdown format. AI-generated tags are considered acceptable if there are more than 1 tag and they are not in markdown format. Using this parameter will consume more tokens, which may incur additional costs. Use it at your own risk. +description-prompt-input = Describe image. +tag-prompt-input = Tag image in words based on subject, object, event, place. Output format: , , , , , ..., `), ); @@ -49,6 +52,9 @@ verbose = 启用详细输出以进行调试。 tasks = 要执行的任务列表('description','tag','face')。 concurrency = 在监视模式下同时处理文件的数目。 face-group-ids = 人脸搜索要使用的面部组ID列表。 +repeat = 如果AI生成结果被认为不可接受时,重复执行任务的次数。此参数通过允许多次尝试来确保输出质量。默认值为0。如果AI生成的描述超过10个字符且不是Markdown格式,则被视为可接受。AI生成的标签如果超过1个且不是Markdown格式,则被视为可接受。使用此参数将消耗更多令牌,可能会产生额外费用。使用时请自行承担风险。 +description-prompt-input = 描述图像。输出格式为一行文本。示例输出:这幅照片是在一个风景名胜区里拍摄的,可以看到很多人在那里参观。这些石柱高耸,顶部平坦,看起来像是人工雕琢而成,让人想起中国的园林风格。前景是熙熙攘攘的人群,他们似乎都在欣赏这令人叹为观止的景色。照片里的气氛是宁静的,让人感觉平静祥和。,Description":"这幅照片是在一个风景名胜区里拍摄的,可以看到很多人在那里参观。这些石柱高耸,顶部平坦,看起来像是人工雕琢而成,让人想起中国的园林风格。前景是熙熙攘攘的人群,他们似乎都在欣赏这令人叹为观止的景色。照片里的气氛是宁静的,让人感觉平静祥和。 +tag-prompt-input = 根据主题、对象、事件、地点对图像进行标签。输出格式:标签1,标签2,标签3,标签4,标签5,标签6,……,标签N。示例输出:石林,中国,中国石林,自然,喀斯特,山,旅行,旅游,景区,观光,人群,户外,公园,树木,天空,建筑。" `), ); diff --git a/src/index.ts b/src/index.ts index b93e8ad..a3da599 100644 --- a/src/index.ts +++ b/src/index.ts @@ -17,6 +17,7 @@ import { DescriptionKey, getDescription } from "./tasks/description.js"; import { getTags, TagKey } from "./tasks/tags.js"; import { HttpsProxyAgent } from "https-proxy-agent"; import { getFaces } from "./tasks/face.js"; +import { getText } from "./fluent/index.js"; if ( !globalThis.fetch || @@ -66,8 +67,8 @@ export async function execute({ "Caption-Abstract", ], tagTags = ["Subject", "TagsList", "Keywords"], - descriptionPrompt = `Describe image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"}`, - tagPrompt = `Tag image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"} words based on subject, object, event, place. Output format: , , , , , ..., `, + descriptionPrompt = getText('description-prompt-input') ?? `Describe image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"}`, + tagPrompt = getText('tag-prompt-input') ?? `Tag image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"} words based on subject, object, event, place. Output format: , , , , , ..., `, verbose = false, dry = false, writeArgs, @@ -75,6 +76,7 @@ export async function execute({ avoidOverwrite = false, doNotEndExifTool = false, faceGroupIds = [], + repeat = 0, }: { /** * Array of tasks to perform: 'description', 'tag', 'face' @@ -136,6 +138,10 @@ export async function execute({ * Array of face group IDs to use for face recognition */ faceGroupIds?: string[]; + /** + * Number of times to repeat the task if it does not return acceptable results + */ + repeat?: number; }) { if (["description", "tag", "tags", "face"].every((t) => !tasks.includes(t))) return; @@ -191,6 +197,12 @@ export async function execute({ file_id = id; } + if (verbose) { + // log tasks' prompt + console.log("Description prompt:", descriptionPrompt); + console.log("Tag prompt:", tagPrompt); + } + const [description, tags] = await Promise.all([ tasks.includes("description") ? getDescription({ @@ -204,6 +216,7 @@ export async function execute({ existingTags, path: resolvedPath, file_id, + repeat, }) : undefined, tasks.includes("tag") || tasks.includes("tags") @@ -218,6 +231,7 @@ export async function execute({ additionalTags: faces, path: resolvedPath, file_id, + repeat, }) : tasks.includes("face") ? getTags({ @@ -230,9 +244,10 @@ export async function execute({ additionalTags: faces, path: resolvedPath, file_id, + repeat, }) : undefined, - ]); + ] as const); const result = { ...description, diff --git a/src/provider/google.ts b/src/provider/google.ts index 6a057ee..bc85215 100644 --- a/src/provider/google.ts +++ b/src/provider/google.ts @@ -16,12 +16,16 @@ async function sizeHandle( const sharpInstance = await sharp(buffer); const { width = 0, height = 0 } = await sharpInstance.metadata(); let done = await sharp(buffer) + .resize({ + ...(width > height ? { width: 6000 } : { height: 6000 }), + withoutEnlargement: true, + }) .jpeg({ quality, }) .toBuffer(); - while (done.byteLength > 20_000_000) { + while (done.byteLength > 18_000_000) { quality = Math.max(quality - drop, 0); done = await sharp(buffer) .resize({ diff --git a/src/tasks/description.ts b/src/tasks/description.ts index 855f360..840f864 100644 --- a/src/tasks/description.ts +++ b/src/tasks/description.ts @@ -16,6 +16,7 @@ export async function getDescription({ existingTags, path, file_id, + repeat, }: { buffer: Buffer; model?: string; @@ -27,23 +28,33 @@ export async function getDescription({ existingTags?: Readonly; path: string; file_id?: string; + repeat?: number; }) { // Get description from provider let description: string | undefined; - try { - description = await providerModule.getDescription?.({ - buffer, - model, - prompt: prompt, - providerArgs, - path, - file_id, - }); - } catch (error) { - console.error("Failed to get description from provider:", error); - return; + if (providerModule) { + for (let i = 0; i < (repeat ?? 0) + 1; i++) { + try { + description = await providerModule.getDescription?.({ + buffer, + model, + prompt: prompt, + providerArgs, + path, + file_id, + }); + } catch (error) { + if (verbose) + console.error("Failed to get description from provider:", error); + } + if (description && description.trim().length > 10 && !/[*#>`]/.test(description)) { + description = description.trim().replaceAll(/\n/g, ""); + break; + } + } } + if (verbose) console.log("Description is:", description); return description diff --git a/src/tasks/face.ts b/src/tasks/face.ts index 0cc0ec0..181e887 100644 --- a/src/tasks/face.ts +++ b/src/tasks/face.ts @@ -100,7 +100,7 @@ export async function getFaces({ (k) => k != null, ); } catch (error) { - console.error("Failed to get tags from provider:", error); + if (verbose) console.error("Failed to get faces", error); return; } } diff --git a/src/tasks/tags.ts b/src/tasks/tags.ts index a3219c8..757aa5b 100644 --- a/src/tasks/tags.ts +++ b/src/tasks/tags.ts @@ -13,25 +13,42 @@ type TagKey2 = keyof { export type TagKey = Exclude; -function formatTags(tags: string | string[] | undefined) { - return typeof tags === "string" - ? tags - .replaceAll(/tag[0-9]+/g, "") - .replaceAll(/[\[\]\.{}<>/*'"()]/g, "") - .split(tags.includes(":") ? ":" : ":") - .at(-1) - ?.split(tags.includes(",") ? "," : "\n") - .map((s) => - s - .trim() - .replace(/\n$/g, "") - .replace(/[0-9]+[ ]+(.*)/g, "$1"), - ) - .filter( - (s) => - s.length > 0 && [...s.matchAll(/ /g)].length <= 1 && s !== "\n", - ) - : tags; +function formatTags(tags: string | string[] | undefined): string[] { + const result = + typeof tags === "string" + ? Number(tags.match(/[0-9]+.*\n/g)?.length) > 1 + ? (tags.match(/[0-9]+.*\n/g)?.map((s) => { + return s + .replaceAll(/tag[0-9]+/g, "") + .replaceAll(/[\[\]\.{}<>/*'"()。]/g, "") + .replace(/\n$/g, "") + .replace(/[0-9]+(.*)/g, "$1") + .trim(); + }) ?? []) + : (tags + .replaceAll(/tag[0-9]+/g, "") + .replaceAll(/[\[\]\.{}<>/*'"()。]/g, "") + .split(tags.includes(":") ? ":" : ":") + .at(-1) + ?.split( + tags.includes(",") ? "," : tags.includes(",") ? "," : "\n", + ) + .map((s) => + s + .trim() + .replace(/\n$/g, "") + .replace(/[0-9]+[ ]+(.*)/g, "$1"), + ) + .filter( + (s) => + s.length > 0 && [...s.matchAll(/ /g)].length <= 1 && s !== "\n", + ) ?? []) + : (tags ?? []); + + // if (result.length === 1) { + // return result.flatMap(r => formatTags(r)); + // } + return result; } export async function getTags({ @@ -46,6 +63,7 @@ export async function getTags({ additionalTags, path, file_id, + repeat, }: { buffer: Buffer; model?: string; @@ -58,27 +76,32 @@ export async function getTags({ additionalTags?: Readonly; path: string; file_id?: string; + repeat?: number; }) { // Get tags from provider - let tags: string | string[] = []; + let tags: string[] = []; if (providerModule) { - try { - tags = await providerModule.getTags?.({ - buffer, - model, - prompt: prompt, - providerArgs, - path, - file_id, - }); - } catch (error) { - console.error("Failed to get tags from provider:", error); - return; + for (let i = 0; i < (repeat ?? 0) + 1; i++) { + try { + tags = formatTags( + await providerModule.getTags?.({ + buffer, + model, + prompt: prompt, + providerArgs, + path, + file_id, + }), + ); + } catch (error) { + if (verbose) console.error("Failed to get tags from provider:", error); + } + if (tags.length > 1) break; } } - const formatted = formatTags(tags)?.concat(additionalTags ?? []); + const formatted = tags?.concat(additionalTags ?? []); if (verbose) console.log("Tags are:", formatted); diff --git a/test/index.test.ts b/test/index.test.ts index e2be9a1..0e411e6 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -29,7 +29,7 @@ describe("Image Processing Tests", () => { "ImageDescription", "Caption-Abstract", ] as Parameters[0]["descriptionTags"], - prompt: "Describe image in English", + prompt: "Describe image.", verbose: true, dry: false, writeArgs: [], @@ -63,13 +63,13 @@ describe("Image Processing Tests", () => { // Verify the existing tag is not overwritten const descriptionTags = await exiftool.read(resolvedPath); - expect(descriptionTags.XPComment).to.equal("Describe image in English"); - expect(descriptionTags.Description).to.equal("Describe image in English"); + expect(descriptionTags.XPComment).to.equal("Describe image."); + expect(descriptionTags.Description).to.equal("Describe image."); expect(descriptionTags.ImageDescription).to.equal( - "Describe image in English", + "Describe image.", ); expect(descriptionTags["Caption-Abstract"]).to.equal( - "Describe image in English", + "Describe image.", ); expect(existsSync(`${resolvedPath}_original`)).to.be.true; @@ -106,12 +106,12 @@ describe("Image Processing Tests", () => { // Verify the existing tag is not overwritten const descriptionTags = await exiftool.read(resolvedPath); expect(descriptionTags.XPComment).to.equal("Existing comment"); - expect(descriptionTags.Description).to.equal("Describe image in English"); + expect(descriptionTags.Description).to.equal("Describe image."); expect(descriptionTags.ImageDescription).to.equal( - "Describe image in English", + "Describe image.", ); expect(descriptionTags["Caption-Abstract"]).to.equal( - "Describe image in English", + "Describe image.", ); }); @@ -126,7 +126,7 @@ describe("Image Processing Tests", () => { // Assuming the function returns undefined on success // Verify that the descriptionTags are written correctly const descriptionTags = await exiftool.read(resolvedPath); - expect(descriptionTags.XPComment).to.equal("Describe image in English"); + expect(descriptionTags.XPComment).to.equal("Describe image."); // Additional assertions can be made based on the expected behavior with the given write args expect(existsSync(`${resolvedPath}_original`)).to.be.false; }); diff --git a/test/tasks/tags.test.ts b/test/tasks/tags.test.ts index de6cfc9..2c236f8 100644 --- a/test/tasks/tags.test.ts +++ b/test/tasks/tags.test.ts @@ -13,6 +13,7 @@ const existingTags = { Subject: ["existingSubject"], Keywords: ["existingKeywords"], }; +const path = "testPath"; const baseOptions = { buffer, @@ -23,6 +24,7 @@ const baseOptions = { verbose, tagTags, existingTags, + path, }; import { getTags } from "../../src/tasks/tags.js"; @@ -126,7 +128,7 @@ describe("Tag Tests", () => { providerModule, }); - expect(result).to.deep.equal(undefined); + expect(result).to.deep.equal({}); }); it.each([ @@ -251,7 +253,6 @@ Given these elements, here are some possible tags based on subject, object, even "门口外部照明灯", "展览门口安保设施", "门口的窗帘", - "外部的街道、建筑周围环境", ], }, { @@ -288,7 +289,6 @@ Given these elements, here are some possible tags based on subject, object, even "多彩的露台", "聚集在露台下", "夜晚游乐设施周边", - "露台周围景观", ], }, { @@ -315,7 +315,6 @@ Given these elements, here are some possible tags based on subject, object, even "自然景观", "科学探索", "技术成就", - "宇宙探索", ], }, { a: '["a","b"]', expected: ["a", "b"] },