Skip to content

Commit

Permalink
feat: add repeat option
Browse files Browse the repository at this point in the history
  • Loading branch information
tychenjiajun committed Sep 28, 2024
1 parent b888401 commit dfbb0f1
Show file tree
Hide file tree
Showing 12 changed files with 129 additions and 66 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ Optional options:
- `--avoid-overwrite`: Avoid overwriting if EXIF tags already exist in the file.
- `--ext <extensions...>`: File extensions to watch. Only files with this extensions will be processed.
- `--concurrency <number>`: The numbers of files to process concurrently in watch mode.
- `--face-group-ids <group...>` List of face group IDs to use for face recognition.
- `--face-group-ids <group...>`: List of face group IDs to use for face recognition.
- `--repeat`: The number of times to repeat the task if the AI-generated result is deemed unacceptable. This parameter helps ensure the quality of the output by allowing multiple attempts. Default value is 0. An AI-generated description is considered acceptable if it has more than 10 characters and is not in markdown format. AI-generated tags are considered acceptable if there are more than 1 tag and they are not in markdown format. Using this parameter will consume more tokens, which may incur additional costs. Use it at your own risk.

Example usage:

Expand Down Expand Up @@ -91,6 +92,7 @@ const options = {
avoidOverwrite: false, // Avoid overwriting if EXIF tags already exist in the file
doNotEndExifTool: false, // Do not end ExifTool process after writing metadata
faceGroupIds: [], // List of face group IDs to use for face recognition
repeat: 0, // The number of times to repeat the task if the AI-generated result is deemed unacceptable
};

execute(options)
Expand Down
1 change: 1 addition & 0 deletions README.zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ exif-ai -i example.jpeg -a ollama
- `--ext <extensions...>`: 指定要监视的文件扩展名,只有符合这些扩展名的文件会被处理。
- `--concurrency <number>`: 在监视模式下,同时处理的文件数量上限。
- `--face-group-ids <group...>`: 指定用于面部识别的面部组ID列表。
- `--repeat <number>`: 如果AI生成结果被认为不可接受时,重复执行任务的次数。此参数通过允许多次尝试来确保输出质量。默认值为0。如果AI生成的描述超过10个字符且不是Markdown格式,则被视为可接受。AI生成的标签如果超过1个且不是Markdown格式,则被视为可接受。使用此参数将消耗更多令牌,可能会产生额外费用。使用时请自行承担风险。

示例用法:

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "exif-ai",
"version": "3.2.1",
"version": "3.2.2",
"description": "A Node.js CLI and library that uses Ollama, ZhipuAI, Google Gemini, Coze or OpenAI to intelligently write image description and/or tags to exif metadata by it's content.",
"homepage": "https://github.com/tychenjiajun/exif-ai",
"repository": {
Expand Down
4 changes: 3 additions & 1 deletion src/exif-ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ async function findFilesRecursive(
}
const program = new Command();
program
.version("3.2.1")
.version("3.2.2")
.description(getText("description") ?? "")
.requiredOption("-a, --api-provider <provider>", getText("api-provider"))
.option("-T, --tasks <tasks...>", getText("tasks"))
Expand All @@ -50,6 +50,7 @@ program
.option("--ext <extensions...>", getText("ext"))
.option("--concurrency <number>", getText("concurrency"))
.option("--face-group-ids <groups...>", getText("face-group-ids"))
.option("--repeat <number>", getText("repeat"))
.parse();

const options = program.opts();
Expand Down Expand Up @@ -78,6 +79,7 @@ async function handleExecution(path: string) {
avoidOverwrite: options.avoidOverwrite,
doNotEndExifTool: Boolean(watchMode),
faceGroupIds: options.faceGroupIds,
repeat: options.repeat,
});
} catch (error) {
console.error(`Error processing file ${path}:`, error);
Expand Down
6 changes: 6 additions & 0 deletions src/fluent/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ verbose = Enable verbose output for debugging.
tasks = List of tasks to perform ('description', 'tag', 'face').
concurrency = The numbers of files to process concurrently in watch mode.
face-group-ids = List of face group IDs to use for face recognition.
repeat = The number of times to repeat the task if the AI-generated result is deemed unacceptable. This parameter helps ensure the quality of the output by allowing multiple attempts. Default value is 0. An AI-generated description is considered acceptable if it has more than 10 characters and is not in markdown format. AI-generated tags are considered acceptable if there are more than 1 tag and they are not in markdown format. Using this parameter will consume more tokens, which may incur additional costs. Use it at your own risk.
description-prompt-input = Describe image.
tag-prompt-input = Tag image in words based on subject, object, event, place. Output format: <tag1>, <tag2>, <tag3>, <tag4>, <tag5>, ..., <tagN>
`),
);

Expand All @@ -49,6 +52,9 @@ verbose = 启用详细输出以进行调试。
tasks = 要执行的任务列表('description','tag','face')。
concurrency = 在监视模式下同时处理文件的数目。
face-group-ids = 人脸搜索要使用的面部组ID列表。
repeat = 如果AI生成结果被认为不可接受时,重复执行任务的次数。此参数通过允许多次尝试来确保输出质量。默认值为0。如果AI生成的描述超过10个字符且不是Markdown格式,则被视为可接受。AI生成的标签如果超过1个且不是Markdown格式,则被视为可接受。使用此参数将消耗更多令牌,可能会产生额外费用。使用时请自行承担风险。
description-prompt-input = 描述图像。输出格式为一行文本。示例输出:这幅照片是在一个风景名胜区里拍摄的,可以看到很多人在那里参观。这些石柱高耸,顶部平坦,看起来像是人工雕琢而成,让人想起中国的园林风格。前景是熙熙攘攘的人群,他们似乎都在欣赏这令人叹为观止的景色。照片里的气氛是宁静的,让人感觉平静祥和。,Description":"这幅照片是在一个风景名胜区里拍摄的,可以看到很多人在那里参观。这些石柱高耸,顶部平坦,看起来像是人工雕琢而成,让人想起中国的园林风格。前景是熙熙攘攘的人群,他们似乎都在欣赏这令人叹为观止的景色。照片里的气氛是宁静的,让人感觉平静祥和。
tag-prompt-input = 根据主题、对象、事件、地点对图像进行标签。输出格式:标签1,标签2,标签3,标签4,标签5,标签6,……,标签N。示例输出:石林,中国,中国石林,自然,喀斯特,山,旅行,旅游,景区,观光,人群,户外,公园,树木,天空,建筑。"
`),
);

Expand Down
21 changes: 18 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { DescriptionKey, getDescription } from "./tasks/description.js";
import { getTags, TagKey } from "./tasks/tags.js";
import { HttpsProxyAgent } from "https-proxy-agent";
import { getFaces } from "./tasks/face.js";
import { getText } from "./fluent/index.js";

if (
!globalThis.fetch ||
Expand Down Expand Up @@ -66,15 +67,16 @@ export async function execute({
"Caption-Abstract",
],
tagTags = ["Subject", "TagsList", "Keywords"],
descriptionPrompt = `Describe image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"}`,
tagPrompt = `Tag image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"} words based on subject, object, event, place. Output format: <tag1>, <tag2>, <tag3>, <tag4>, <tag5>, ..., <tagN>`,
descriptionPrompt = getText('description-prompt-input') ?? `Describe image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"}`,
tagPrompt = getText('tag-prompt-input') ?? `Tag image in ${lang ? (ISO6391.getName(lang) ?? "English") : "English"} words based on subject, object, event, place. Output format: <tag1>, <tag2>, <tag3>, <tag4>, <tag5>, ..., <tagN>`,
verbose = false,
dry = false,
writeArgs,
providerArgs,
avoidOverwrite = false,
doNotEndExifTool = false,
faceGroupIds = [],
repeat = 0,
}: {
/**
* Array of tasks to perform: 'description', 'tag', 'face'
Expand Down Expand Up @@ -136,6 +138,10 @@ export async function execute({
* Array of face group IDs to use for face recognition
*/
faceGroupIds?: string[];
/**
* Number of times to repeat the task if it does not return acceptable results
*/
repeat?: number;
}) {
if (["description", "tag", "tags", "face"].every((t) => !tasks.includes(t)))
return;
Expand Down Expand Up @@ -191,6 +197,12 @@ export async function execute({
file_id = id;
}

if (verbose) {
// log tasks' prompt
console.log("Description prompt:", descriptionPrompt);
console.log("Tag prompt:", tagPrompt);
}

const [description, tags] = await Promise.all([
tasks.includes("description")
? getDescription({
Expand All @@ -204,6 +216,7 @@ export async function execute({
existingTags,
path: resolvedPath,
file_id,
repeat,
})
: undefined,
tasks.includes("tag") || tasks.includes("tags")
Expand All @@ -218,6 +231,7 @@ export async function execute({
additionalTags: faces,
path: resolvedPath,
file_id,
repeat,
})
: tasks.includes("face")
? getTags({
Expand All @@ -230,9 +244,10 @@ export async function execute({
additionalTags: faces,
path: resolvedPath,
file_id,
repeat,
})
: undefined,
]);
] as const);

const result = {
...description,
Expand Down
6 changes: 5 additions & 1 deletion src/provider/google.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@ async function sizeHandle(
const sharpInstance = await sharp(buffer);
const { width = 0, height = 0 } = await sharpInstance.metadata();
let done = await sharp(buffer)
.resize({
...(width > height ? { width: 6000 } : { height: 6000 }),
withoutEnlargement: true,
})
.jpeg({
quality,
})
.toBuffer();

while (done.byteLength > 20_000_000) {
while (done.byteLength > 18_000_000) {
quality = Math.max(quality - drop, 0);
done = await sharp(buffer)
.resize({
Expand Down
35 changes: 23 additions & 12 deletions src/tasks/description.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export async function getDescription({
existingTags,
path,
file_id,
repeat,
}: {
buffer: Buffer;
model?: string;
Expand All @@ -27,23 +28,33 @@ export async function getDescription({
existingTags?: Readonly<Tags>;
path: string;
file_id?: string;
repeat?: number;
}) {
// Get description from provider
let description: string | undefined;

try {
description = await providerModule.getDescription?.({
buffer,
model,
prompt: prompt,
providerArgs,
path,
file_id,
});
} catch (error) {
console.error("Failed to get description from provider:", error);
return;
if (providerModule) {
for (let i = 0; i < (repeat ?? 0) + 1; i++) {
try {
description = await providerModule.getDescription?.({
buffer,
model,
prompt: prompt,
providerArgs,
path,
file_id,
});
} catch (error) {
if (verbose)
console.error("Failed to get description from provider:", error);
}
if (description && description.trim().length > 10 && !/[*#>`]/.test(description)) {
description = description.trim().replaceAll(/\n/g, "");
break;
}
}
}

if (verbose) console.log("Description is:", description);

return description
Expand Down
2 changes: 1 addition & 1 deletion src/tasks/face.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ export async function getFaces({
(k) => k != null,
);
} catch (error) {
console.error("Failed to get tags from provider:", error);
if (verbose) console.error("Failed to get faces", error);
return;
}
}
89 changes: 56 additions & 33 deletions src/tasks/tags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,42 @@ type TagKey2 = keyof {

export type TagKey = Exclude<TagKey2, TagKey1>;

function formatTags(tags: string | string[] | undefined) {
return typeof tags === "string"
? tags
.replaceAll(/tag[0-9]+/g, "")
.replaceAll(/[\[\]\.{}<>/*'"()]/g, "")
.split(tags.includes(":") ? ":" : ":")
.at(-1)
?.split(tags.includes(",") ? "," : "\n")
.map((s) =>
s
.trim()
.replace(/\n$/g, "")
.replace(/[0-9]+[ ]+(.*)/g, "$1"),
)
.filter(
(s) =>
s.length > 0 && [...s.matchAll(/ /g)].length <= 1 && s !== "\n",
)
: tags;
function formatTags(tags: string | string[] | undefined): string[] {
const result =
typeof tags === "string"
? Number(tags.match(/[0-9]+.*\n/g)?.length) > 1
? (tags.match(/[0-9]+.*\n/g)?.map((s) => {
return s
.replaceAll(/tag[0-9]+/g, "")
.replaceAll(/[\[\]\.{}<>/*'"()。]/g, "")
.replace(/\n$/g, "")
.replace(/[0-9]+(.*)/g, "$1")
.trim();
}) ?? [])
: (tags
.replaceAll(/tag[0-9]+/g, "")
.replaceAll(/[\[\]\.{}<>/*'"()。]/g, "")
.split(tags.includes(":") ? ":" : ":")
.at(-1)
?.split(
tags.includes(",") ? "," : tags.includes(",") ? "," : "\n",
)
.map((s) =>
s
.trim()
.replace(/\n$/g, "")
.replace(/[0-9]+[ ]+(.*)/g, "$1"),
)
.filter(
(s) =>
s.length > 0 && [...s.matchAll(/ /g)].length <= 1 && s !== "\n",
) ?? [])
: (tags ?? []);

// if (result.length === 1) {
// return result.flatMap(r => formatTags(r));
// }
return result;
}

export async function getTags({
Expand All @@ -46,6 +63,7 @@ export async function getTags({
additionalTags,
path,
file_id,
repeat,
}: {
buffer: Buffer;
model?: string;
Expand All @@ -58,27 +76,32 @@ export async function getTags({
additionalTags?: Readonly<string[]>;
path: string;
file_id?: string;
repeat?: number;
}) {
// Get tags from provider
let tags: string | string[] = [];
let tags: string[] = [];

if (providerModule) {
try {
tags = await providerModule.getTags?.({
buffer,
model,
prompt: prompt,
providerArgs,
path,
file_id,
});
} catch (error) {
console.error("Failed to get tags from provider:", error);
return;
for (let i = 0; i < (repeat ?? 0) + 1; i++) {
try {
tags = formatTags(
await providerModule.getTags?.({
buffer,
model,
prompt: prompt,
providerArgs,
path,
file_id,
}),
);
} catch (error) {
if (verbose) console.error("Failed to get tags from provider:", error);
}
if (tags.length > 1) break;
}
}

const formatted = formatTags(tags)?.concat(additionalTags ?? []);
const formatted = tags?.concat(additionalTags ?? []);

if (verbose) console.log("Tags are:", formatted);

Expand Down
Loading

0 comments on commit dfbb0f1

Please sign in to comment.