diff --git a/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-api.png b/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-api.png new file mode 100644 index 00000000..016cb26a Binary files /dev/null and b/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-api.png differ diff --git a/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-caption_type.png b/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-caption_type.png new file mode 100644 index 00000000..af794eda Binary files /dev/null and b/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-caption_type.png differ diff --git a/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-example.png b/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-example.png new file mode 100644 index 00000000..6f727f85 Binary files /dev/null and b/docs/docs/ai-assistants/imgs/siliconcloud-joycaption2-example.png differ diff --git a/docs/docs/ai-assistants/introduce.md b/docs/docs/ai-assistants/introduce.md index 04dc7fd1..3ad6ec64 100644 --- a/docs/docs/ai-assistants/introduce.md +++ b/docs/docs/ai-assistants/introduce.md @@ -37,3 +37,61 @@ For the most current information on available models and pricing, please refer t ## ☁️BizyAir Joy Caption The ☁️BizyAir Joy Caption node is a powerful tool designed to automatically generate descriptive captions for images, thanks to https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha. + +## ☁️BizyAir Joy Caption2 + +The ☁️BizyAir Joy Caption2 node is an upgraded version of ☁️BizyAir Joy Caption node, thanks to https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-two. + +### Key Features: + +![](./imgs/siliconcloud-joycaption2-api.png) + +1. **do_sample**: The do_sample parameter determines whether the model uses a random sampling method to generate the next word, or simply selects the most likely word. + + - `do_sample=True`: It can increased variety and creativity of generated text. + + - `do_sample=False`: The next word with the highest probability will be selected, and the content of the article will be conservative. + +2. **temperature**: The temperature parameter affects the shape of the probability distribution when sampling, and thus the variety of generated text. + + - A higher temperature will make the distribution more uniform and increase randomness. + + - A lower temperature makes the distribution sharper, less random, and more inclined to choose words with higher probability. + +3. **max_tokens**: The max_tokens parameter specifies the maximum number of tokens that the model can generate when generating text. The upper limit here is 512. + +4. **caption_type**: Each caption_type corresponds to the default system prompts. + + ![](./imgs/siliconcloud-joycaption2-caption_type.png) + + - **Descriptive**: Write a descriptive caption for this image in a formal tone. + + - **Descriptive (Informal)**: Write a descriptive caption for this image in a casual tone. + + - **Training Prompt**: Write a stable diffusion prompt for this image. + + - **MidJourney**: Write a MidJourney prompt for this image. + + - **Booru tag list**: Write a list of Booru tags for this image. + + - **Booru-like tag list**: Write a list of Booru-like tags for this image. + + - **Art Critic**: Analyze this image like an art critic would with information about its composition, style, symbolism, the use of color, light, any artistic movement it might belong to, etc. + + - **Product Listing**: Write a caption for this image as though it were a product listing. + + - **Social Media Post**: Write a caption for this image as if it were being used for a social media post. + +5. **caption_length**: The caption_length parameter is the length of the output. If the max_tokens parameter is less than it, the output will be truncated. + +6. **extra_options**: If you want to add more prompts to the default prompts, you can write here. + + - For example, if you want to describe the person in the picture as someone, you can write as follow: `If there is a person/character in the image you must refer to them as {name}.` + +7. **name_input**: The name in the *name_input* can replace the `{name}` in the *extra_options*. + + - For example, you write `Jack` here and write `If there is a person/character in the image you must refer to them as {name}.` in the *extra_options*, it will be found that the person in the image is named as Jack in the output. + + ![](./imgs/siliconcloud-joycaption2-example.png) + +8. **custom_prompt**: If you want to customize the prompts, you can write here to override the prompts and previous actions related to the prompts(*caption_type*, *extra_options* and *extra_options*) will be invalid. diff --git a/llm.py b/llm.py index e7424781..1f16cdc5 100644 --- a/llm.py +++ b/llm.py @@ -272,13 +272,158 @@ def joycaption(self, image, do_sample, temperature, max_tokens): return (caption,) +class BizyAirJoyCaption2: + def __init__(self): + pass + + # refer to: https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha + API_URL = f"{BIZYAIR_SERVER_ADDRESS}/supernode/joycaption2" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "do_sample": ([True, False],), + "temperature": ( + "FLOAT", + { + "default": 0.5, + "min": 0.0, + "max": 2.0, + "step": 0.01, + "round": 0.001, + "display": "number", + }, + ), + "max_tokens": ( + "INT", + { + "default": 256, + "min": 16, + "max": 512, + "step": 16, + "display": "number", + }, + ), + "caption_type": ( + [ + "Descriptive", + "Descriptive (Informal)", + "Training Prompt", + "MidJourney", + "Booru tag list", + "Booru-like tag list", + "Art Critic", + "Product Listing", + "Social Media Post", + ], + ), + "caption_length": ( + ["any", "very short", "short", "medium-length", "long", "very long"] + + [str(i) for i in range(20, 261, 10)], + ), + "extra_options": ( + "STRING", + { + "default": "If there is a person/character in the image you must refer to them as {name}.", + "tooltip": "Extra options for the model", + "multiline": True, + }, + ), + "name_input": ( + "STRING", + { + "default": "Jack", + "tooltip": "Name input is only used if an Extra Option is selected that requires it.", + }, + ), + "custom_prompt": ( + "STRING", + { + "default": "", + "multiline": True, + }, + ), + } + } + + RETURN_TYPES = ("STRING",) + FUNCTION = "joycaption2" + + CATEGORY = "☁️BizyAir/AI Assistants" + + def joycaption2( + self, + image, + do_sample, + temperature, + max_tokens, + caption_type, + caption_length, + extra_options, + name_input, + custom_prompt, + ): + API_KEY = get_api_key() + SIZE_LIMIT = 1536 + _, w, h, c = image.shape + assert ( + w <= SIZE_LIMIT and h <= SIZE_LIMIT + ), f"width and height must be less than {SIZE_LIMIT}x{SIZE_LIMIT}, but got {w} and {h}" + + payload = { + "image": None, + "do_sample": do_sample == True, + "temperature": temperature, + "max_new_tokens": max_tokens, + "caption_type": caption_type, + "caption_length": caption_length, + "extra_options": [extra_options], + "name_input": name_input, + "custom_prompt": custom_prompt, + } + auth = f"Bearer {API_KEY}" + headers = { + "accept": "application/json", + "content-type": "application/json", + "authorization": auth, + } + input_image = encode_data(image, disable_image_marker=True) + payload["image"] = input_image + + ret: str = send_post_request(self.API_URL, payload=payload, headers=headers) + ret = json.loads(ret) + + try: + if "result" in ret: + ret = json.loads(ret["result"]) + except Exception as e: + raise Exception(f"Unexpected response: {ret} {e=}") + + if ret["type"] == "error": + raise Exception(ret["message"]) + + msg = ret["data"] + if msg["type"] not in ( + "comfyair", + "bizyair", + ): + raise Exception(f"Unexpected response type: {msg}") + + caption = msg["data"] + return (caption,) + + NODE_CLASS_MAPPINGS = { "BizyAirSiliconCloudLLMAPI": SiliconCloudLLMAPI, "BizyAirSiliconCloudVLMAPI": SiliconCloudVLMAPI, "BizyAirJoyCaption": BizyAirJoyCaption, + "BizyAirJoyCaption2": BizyAirJoyCaption2, } NODE_DISPLAY_NAME_MAPPINGS = { "BizyAirSiliconCloudLLMAPI": "☁️BizyAir SiliconCloud LLM API", "BizyAirSiliconCloudVLMAPI": "☁️BizyAir SiliconCloud VLM API", "BizyAirJoyCaption": "☁️BizyAir Joy Caption", + "BizyAirJoyCaption2": "☁️BizyAir Joy Caption2", }