feat: add repeat option

tychenjiajun · Sep 28, 2024 · dfbb0f1 · dfbb0f1
1 parent b888401
commit dfbb0f1
Show file tree

Hide file tree

Showing 12 changed files with 129 additions and 66 deletions.
diff --git a/README.md b/README.md
@@ -56,7 +56,8 @@ Optional options:
 - `--avoid-overwrite`: Avoid overwriting if EXIF tags already exist in the file.
 - `--ext <extensions...>`: File extensions to watch. Only files with this extensions will be processed.
 - `--concurrency <number>`: The numbers of files to process concurrently in watch mode.
-- `--face-group-ids <group...>` List of face group IDs to use for face recognition.
+- `--face-group-ids <group...>`: List of face group IDs to use for face recognition.
+- `--repeat`: The number of times to repeat the task if the AI-generated result is deemed unacceptable. This parameter helps ensure the quality of the output by allowing multiple attempts. Default value is 0. An AI-generated description is considered acceptable if it has more than 10 characters and is not in markdown format. AI-generated tags are considered acceptable if there are more than 1 tag and they are not in markdown format. Using this parameter will consume more tokens, which may incur additional costs. Use it at your own risk.
 
 Example usage:
 
@@ -91,6 +92,7 @@ const options = {
   avoidOverwrite: false, // Avoid overwriting if EXIF tags already exist in the file
   doNotEndExifTool: false, // Do not end ExifTool process after writing metadata
   faceGroupIds: [], // List of face group IDs to use for face recognition
+  repeat: 0, // The number of times to repeat the task if the AI-generated result is deemed unacceptable
 };
 
 execute(options)

diff --git a/README.zh-CN.md b/README.zh-CN.md
@@ -52,6 +52,7 @@ exif-ai -i example.jpeg -a ollama
 - `--ext <extensions...>`: 指定要监视的文件扩展名，只有符合这些扩展名的文件会被处理。
 - `--concurrency <number>`: 在监视模式下，同时处理的文件数量上限。
 - `--face-group-ids <group...>`: 指定用于面部识别的面部组ID列表。
+- `--repeat <number>`: 如果AI生成结果被认为不可接受时，重复执行任务的次数。此参数通过允许多次尝试来确保输出质量。默认值为0。如果AI生成的描述超过10个字符且不是Markdown格式，则被视为可接受。AI生成的标签如果超过1个且不是Markdown格式，则被视为可接受。使用此参数将消耗更多令牌，可能会产生额外费用。使用时请自行承担风险。
 
 示例用法:
 

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "exif-ai",
-  "version": "3.2.1",
+  "version": "3.2.2",
   "description": "A Node.js CLI and library that uses Ollama, ZhipuAI, Google Gemini, Coze or OpenAI to intelligently write image description and/or tags to exif metadata by it's content.",
   "homepage": "https://github.com/tychenjiajun/exif-ai",
   "repository": {

diff --git a/src/exif-ai.ts b/src/exif-ai.ts
@@ -31,7 +31,7 @@ async function findFilesRecursive(
 }
 const program = new Command();
 program
-  .version("3.2.1")
+  .version("3.2.2")
   .description(getText("description") ?? "")
   .requiredOption("-a, --api-provider <provider>", getText("api-provider"))
   .option("-T, --tasks <tasks...>", getText("tasks"))
@@ -50,6 +50,7 @@ program
   .option("--ext <extensions...>", getText("ext"))
   .option("--concurrency <number>", getText("concurrency"))
   .option("--face-group-ids <groups...>", getText("face-group-ids"))
+  .option("--repeat <number>", getText("repeat"))
   .parse();
 
 const options = program.opts();
@@ -78,6 +79,7 @@ async function handleExecution(path: string) {
       avoidOverwrite: options.avoidOverwrite,
       doNotEndExifTool: Boolean(watchMode),
       faceGroupIds: options.faceGroupIds,
+      repeat: options.repeat,
     });
   } catch (error) {
     console.error(`Error processing file ${path}:`, error);

diff --git a/src/fluent/index.ts b/src/fluent/index.ts
@@ -26,6 +26,9 @@ verbose = Enable verbose output for debugging.
 tasks = List of tasks to perform ('description', 'tag', 'face').
 concurrency = The numbers of files to process concurrently in watch mode.
 face-group-ids = List of face group IDs to use for face recognition.
+repeat = The number of times to repeat the task if the AI-generated result is deemed unacceptable. This parameter helps ensure the quality of the output by allowing multiple attempts. Default value is 0. An AI-generated description is considered acceptable if it has more than 10 characters and is not in markdown format. AI-generated tags are considered acceptable if there are more than 1 tag and they are not in markdown format. Using this parameter will consume more tokens, which may incur additional costs. Use it at your own risk.
+description-prompt-input = Describe image.
+tag-prompt-input = Tag image in words based on subject, object, event, place. Output format: <tag1>, <tag2>, <tag3>, <tag4>,  <tag5>,  ..., <tagN>
 `),
 );
 
@@ -49,6 +52,9 @@ verbose = 启用详细输出以进行调试。
 tasks = 要执行的任务列表（'description'，'tag'，'face'）。
 concurrency = 在监视模式下同时处理文件的数目。
 face-group-ids = 人脸搜索要使用的面部组ID列表。
+repeat = 如果AI生成结果被认为不可接受时，重复执行任务的次数。此参数通过允许多次尝试来确保输出质量。默认值为0。如果AI生成的描述超过10个字符且不是Markdown格式，则被视为可接受。AI生成的标签如果超过1个且不是Markdown格式，则被视为可接受。使用此参数将消耗更多令牌，可能会产生额外费用。使用时请自行承担风险。
+description-prompt-input = 描述图像。输出格式为一行文本。示例输出：这幅照片是在一个风景名胜区里拍摄的，可以看到很多人在那里参观。这些石柱高耸，顶部平坦，看起来像是人工雕琢而成，让人想起中国的园林风格。前景是熙熙攘攘的人群，他们似乎都在欣赏这令人叹为观止的景色。照片里的气氛是宁静的，让人感觉平静祥和。，Description":"这幅照片是在一个风景名胜区里拍摄的，可以看到很多人在那里参观。这些石柱高耸，顶部平坦，看起来像是人工雕琢而成，让人想起中国的园林风格。前景是熙熙攘攘的人群，他们似乎都在欣赏这令人叹为观止的景色。照片里的气氛是宁静的，让人感觉平静祥和。
+tag-prompt-input = 根据主题、对象、事件、地点对图像进行标签。输出格式：标签1，标签2，标签3，标签4，标签5，标签6，……，标签N。示例输出：石林，中国，中国石林，自然，喀斯特，山，旅行，旅游，景区，观光，人群，户外，公园，树木，天空，建筑。"
 `),
 );
 

diff --git a/src/index.ts b/src/index.ts
@@ -17,6 +17,7 @@ import { DescriptionKey, getDescription } from "./tasks/description.js";
 import { getTags, TagKey } from "./tasks/tags.js";
 import { HttpsProxyAgent } from "https-proxy-agent";
 import { getFaces } from "./tasks/face.js";
+import { getText } from "./fluent/index.js";
 
 if (
   !globalThis.fetch ||
@@ -66,15 +67,16 @@ export async function execute({
     "Caption-Abstract",
   ],
   tagTags = ["Subject", "TagsList", "Keywords"],
-  descriptionPrompt = `Describe image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"}`,
-  tagPrompt = `Tag image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"} words based on subject, object, event, place. Output format: <tag1>, <tag2>, <tag3>, <tag4>,  <tag5>,  ..., <tagN>`,
+  descriptionPrompt = getText('description-prompt-input') ?? `Describe image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"}`,
+  tagPrompt = getText('tag-prompt-input') ?? `Tag image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"} words based on subject, object, event, place. Output format: <tag1>, <tag2>, <tag3>, <tag4>,  <tag5>,  ..., <tagN>`,
   verbose = false,
   dry = false,
   writeArgs,
   providerArgs,
   avoidOverwrite = false,
   doNotEndExifTool = false,
   faceGroupIds = [],
+  repeat = 0,
 }: {
   /**
    * Array of tasks to perform: 'description', 'tag', 'face'
@@ -136,6 +138,10 @@ export async function execute({
    * Array of face group IDs to use for face recognition
    */
   faceGroupIds?: string[];
+  /**
+   * Number of times to repeat the task if it does not return acceptable results
+   */
+  repeat?: number;
 }) {
   if (["description", "tag", "tags", "face"].every((t) => !tasks.includes(t)))
     return;
@@ -191,6 +197,12 @@ export async function execute({
       file_id = id;
     }
 
+    if (verbose) {
+      // log tasks' prompt
+      console.log("Description prompt:", descriptionPrompt);
+      console.log("Tag prompt:", tagPrompt);
+    }
+
     const [description, tags] = await Promise.all([
       tasks.includes("description")
         ? getDescription({
@@ -204,6 +216,7 @@ export async function execute({
             existingTags,
             path: resolvedPath,
             file_id,
+            repeat,
           })
         : undefined,
       tasks.includes("tag") || tasks.includes("tags")
@@ -218,6 +231,7 @@ export async function execute({
             additionalTags: faces,
             path: resolvedPath,
             file_id,
+            repeat,
           })
         : tasks.includes("face")
           ? getTags({
@@ -230,9 +244,10 @@ export async function execute({
               additionalTags: faces,
               path: resolvedPath,
               file_id,
+              repeat,
             })
           : undefined,
-    ]);
+    ] as const);
 
     const result = {
       ...description,

diff --git a/src/provider/google.ts b/src/provider/google.ts
@@ -16,12 +16,16 @@ async function sizeHandle(
   const sharpInstance = await sharp(buffer);
   const { width = 0, height = 0 } = await sharpInstance.metadata();
   let done = await sharp(buffer)
+    .resize({
+      ...(width > height ? { width: 6000 } : { height: 6000 }),
+      withoutEnlargement: true,
+    })
     .jpeg({
       quality,
     })
     .toBuffer();
 
-  while (done.byteLength > 20_000_000) {
+  while (done.byteLength > 18_000_000) {
     quality = Math.max(quality - drop, 0);
     done = await sharp(buffer)
       .resize({

diff --git a/src/tasks/description.ts b/src/tasks/description.ts
@@ -16,6 +16,7 @@ export async function getDescription({
   existingTags,
   path,
   file_id,
+  repeat,
 }: {
   buffer: Buffer;
   model?: string;
@@ -27,23 +28,33 @@ export async function getDescription({
   existingTags?: Readonly<Tags>;
   path: string;
   file_id?: string;
+  repeat?: number;
 }) {
   // Get description from provider
   let description: string | undefined;
 
-  try {
-    description = await providerModule.getDescription?.({
-      buffer,
-      model,
-      prompt: prompt,
-      providerArgs,
-      path,
-      file_id,
-    });
-  } catch (error) {
-    console.error("Failed to get description from provider:", error);
-    return;
+  if (providerModule) {
+    for (let i = 0; i < (repeat ?? 0) + 1; i++) {
+      try {
+        description = await providerModule.getDescription?.({
+          buffer,
+          model,
+          prompt: prompt,
+          providerArgs,
+          path,
+          file_id,
+        });
+      } catch (error) {
+        if (verbose)
+          console.error("Failed to get description from provider:", error);
+      }
+      if (description && description.trim().length > 10 && !/[*#>`]/.test(description)) {
+        description = description.trim().replaceAll(/\n/g, "");
+        break;
+      }
+    }
   }
+
   if (verbose) console.log("Description is:", description);
 
   return description

diff --git a/src/tasks/face.ts b/src/tasks/face.ts
@@ -100,7 +100,7 @@ export async function getFaces({
       (k) => k != null,
     );
   } catch (error) {
-    console.error("Failed to get tags from provider:", error);
+    if (verbose) console.error("Failed to get faces", error);
     return;
   }
 }
diff --git a/src/tasks/tags.ts b/src/tasks/tags.ts
@@ -13,25 +13,42 @@ type TagKey2 = keyof {
 
 export type TagKey = Exclude<TagKey2, TagKey1>;
 
-function formatTags(tags: string | string[] | undefined) {
-  return typeof tags === "string"
-    ? tags
-        .replaceAll(/tag[0-9]+/g, "")
-        .replaceAll(/[\[\]\.{}<>/*'"()]/g, "")
-        .split(tags.includes("：") ? "：" : ":")
-        .at(-1)
-        ?.split(tags.includes(",") ? "," : "\n")
-        .map((s) =>
-          s
-            .trim()
-            .replace(/\n$/g, "")
-            .replace(/[0-9]+[ ]+(.*)/g, "$1"),
-        )
-        .filter(
-          (s) =>
-            s.length > 0 && [...s.matchAll(/ /g)].length <= 1 && s !== "\n",
-        )
-    : tags;
+function formatTags(tags: string | string[] | undefined): string[] {
+  const result =
+    typeof tags === "string"
+      ? Number(tags.match(/[0-9]+.*\n/g)?.length) > 1
+        ? (tags.match(/[0-9]+.*\n/g)?.map((s) => {
+            return s
+              .replaceAll(/tag[0-9]+/g, "")
+              .replaceAll(/[\[\]\.{}<>/*'"()。]/g, "")
+              .replace(/\n$/g, "")
+              .replace(/[0-9]+(.*)/g, "$1")
+              .trim();
+          }) ?? [])
+        : (tags
+            .replaceAll(/tag[0-9]+/g, "")
+            .replaceAll(/[\[\]\.{}<>/*'"()。]/g, "")
+            .split(tags.includes("：") ? "：" : ":")
+            .at(-1)
+            ?.split(
+              tags.includes("，") ? "，" : tags.includes(",") ? "," : "\n",
+            )
+            .map((s) =>
+              s
+                .trim()
+                .replace(/\n$/g, "")
+                .replace(/[0-9]+[ ]+(.*)/g, "$1"),
+            )
+            .filter(
+              (s) =>
+                s.length > 0 && [...s.matchAll(/ /g)].length <= 1 && s !== "\n",
+            ) ?? [])
+      : (tags ?? []);
+
+  // if (result.length === 1) {
+  //   return result.flatMap(r => formatTags(r));
+  // }
+  return result;
 }
 
 export async function getTags({
@@ -46,6 +63,7 @@ export async function getTags({
   additionalTags,
   path,
   file_id,
+  repeat,
 }: {
   buffer: Buffer;
   model?: string;
@@ -58,27 +76,32 @@ export async function getTags({
   additionalTags?: Readonly<string[]>;
   path: string;
   file_id?: string;
+  repeat?: number;
 }) {
   // Get tags from provider
-  let tags: string | string[] = [];
+  let tags: string[] = [];
 
   if (providerModule) {
-    try {
-      tags = await providerModule.getTags?.({
-        buffer,
-        model,
-        prompt: prompt,
-        providerArgs,
-        path,
-        file_id,
-      });
-    } catch (error) {
-      console.error("Failed to get tags from provider:", error);
-      return;
+    for (let i = 0; i < (repeat ?? 0) + 1; i++) {
+      try {
+        tags = formatTags(
+          await providerModule.getTags?.({
+            buffer,
+            model,
+            prompt: prompt,
+            providerArgs,
+            path,
+            file_id,
+          }),
+        );
+      } catch (error) {
+        if (verbose) console.error("Failed to get tags from provider:", error);
+      }
+      if (tags.length > 1) break;
     }
   }
 
-  const formatted = formatTags(tags)?.concat(additionalTags ?? []);
+  const formatted = tags?.concat(additionalTags ?? []);
 
   if (verbose) console.log("Tags are:", formatted);