diff --git a/.github/workflows/workflow-ci.yml b/.github/workflows/workflow-ci.yml index 16ffab13..39cdef02 100644 --- a/.github/workflows/workflow-ci.yml +++ b/.github/workflows/workflow-ci.yml @@ -22,7 +22,7 @@ jobs: modified_files=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }}) only_non_code_files=true for file in $modified_files; do - if [[ "$file" == *.py ]] || [[ "$file" == *.js ]] || [[ "$file" == *.json ]] || [[ "$file" == *.yml ]]; then + if [[ "$file" == *.py ]] || [[ "$file" == *.js ]] || [[ "$file" == *.json ]]; then only_non_code_files=false break fi diff --git a/README.md b/README.md index 273efe1e..757fd543 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # BizyAir +- [2024/08/14] 🌩️ BizyAir JoyCaption node has been released. [Try the example to recreate a image by JoyCaption and Flux](./examples/bizyair_flux_joycaption_img2img_workflow.json), thanks to [fancyfeast/joy-caption-pre-alpha](https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha) - [2024/08/05] 🌩️ FLUX.1-dev has been supported. [FLUX.1-dev Text to Image](./examples/bizyair_flux_dev_workflow.json), [FLUX.1-dev Image to Image](./examples/bizyair_flux_img2img_workflow.json) - [2024/08/02] 🌩️ FLUX.1-schnell has been supported. [FLUX.1-schnell Text to Image](./examples/bizyair_flux_schnell_workflow.json) - [2024/08/01] 🌩️ [BizyAir MinusZone Kolors](https://siliconflow.github.io/BizyAir/kolors/introduce.html) nodes have been released(thanks to [MinusZoneAI/ComfyUI-Kolors-MZ](https://github.com/MinusZoneAI/ComfyUI-Kolors-MZ)), and BizyAir now supports over 10 new base models. They all support ControlNet, LoRA, and IPAdapter. [BizyAir KSampler](https://siliconflow.github.io/BizyAir/ksampler/introduce.html) diff --git a/bizyair_example_menu.json b/bizyair_example_menu.json index d78bc878..a7aeeec2 100644 --- a/bizyair_example_menu.json +++ b/bizyair_example_menu.json @@ -24,5 +24,5 @@ }, "Remove the background from the image": "bizyair_showcase_remove_background.json", "Super Resolution": "bizyair_showcase_realistic_superresolution.json", - "Recreate an existing image": "bizyair_showcase_caption_redraw.json" + "Recreate an existing image": "bizyair_flux_joycaption_img2img_workflow.json" } \ No newline at end of file diff --git a/examples/bizyair_flux_joycaption_img2img_workflow.json b/examples/bizyair_flux_joycaption_img2img_workflow.json new file mode 100644 index 00000000..00bd54d1 --- /dev/null +++ b/examples/bizyair_flux_joycaption_img2img_workflow.json @@ -0,0 +1,700 @@ +{ + "last_node_id": 69, + "last_link_id": 89, + "nodes": [ + { + "id": 54, + "type": "BizyAir_VAEDecode", + "pos": [ + 2171.0771484375, + 130 + ], + "size": { + "0": 271.0523681640625, + "1": 46 + }, + "flags": {}, + "order": 12, + "mode": 0, + "inputs": [ + { + "name": "samples", + "type": "LATENT", + "link": 66, + "slot_index": 0 + }, + { + "name": "vae", + "type": "BIZYAIR_VAE", + "link": 67, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 68 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_VAEDecode" + } + }, + { + "id": 50, + "type": "BizyAir_SamplerCustomAdvanced", + "pos": [ + 2167, + 231 + ], + "size": { + "0": 260.3999938964844, + "1": 106 + }, + "flags": {}, + "order": 11, + "mode": 0, + "inputs": [ + { + "name": "noise", + "type": "NOISE", + "link": 74 + }, + { + "name": "guider", + "type": "GUIDER", + "link": 61 + }, + { + "name": "sampler", + "type": "SAMPLER", + "link": 75, + "slot_index": 2 + }, + { + "name": "sigmas", + "type": "SIGMAS", + "link": 73 + }, + { + "name": "latent_image", + "type": "LATENT", + "link": 83, + "slot_index": 4 + } + ], + "outputs": [ + { + "name": "output", + "type": "LATENT", + "links": [ + 66 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "denoised_output", + "type": "LATENT", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "BizyAir_SamplerCustomAdvanced" + } + }, + { + "id": 47, + "type": "BizyAir_BasicGuider", + "pos": [ + 2163, + 389 + ], + "size": { + "0": 253.01522827148438, + "1": 52.98030471801758 + }, + "flags": {}, + "order": 10, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "BIZYAIR_MODEL", + "link": 59, + "slot_index": 0 + }, + { + "name": "conditioning", + "type": "BIZYAIR_CONDITIONING", + "link": 60, + "slot_index": 1 + } + ], + "outputs": [ + { + "name": "GUIDER", + "type": "GUIDER", + "links": [ + 61 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_BasicGuider" + } + }, + { + "id": 37, + "type": "BizyAir_CLIPTextEncode", + "pos": [ + 2123, + 499 + ], + "size": { + "0": 344.9754943847656, + "1": 127.23637390136719 + }, + "flags": {}, + "order": 9, + "mode": 0, + "inputs": [ + { + "name": "clip", + "type": "BIZYAIR_CLIP", + "link": 56 + }, + { + "name": "text", + "type": "STRING", + "link": 89, + "widget": { + "name": "text" + } + } + ], + "outputs": [ + { + "name": "CONDITIONING", + "type": "BIZYAIR_CONDITIONING", + "links": [ + 60 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_CLIPTextEncode" + }, + "widgets_values": [ + "black forest gateau cake spelling out the words \"BizyAir\", tasty, food photography, dynamic shot" + ] + }, + { + "id": 58, + "type": "BizyAir_BasicScheduler", + "pos": [ + 1784, + 167 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 6, + "mode": 0, + "inputs": [ + { + "name": "model", + "type": "BIZYAIR_MODEL", + "link": 72, + "slot_index": 0 + } + ], + "outputs": [ + { + "name": "SIGMAS", + "type": "SIGMAS", + "links": [ + 73 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_BasicScheduler" + }, + "widgets_values": [ + "normal", + 20, + 0.85 + ] + }, + { + "id": 36, + "type": "BizyAir_DualCLIPLoader", + "pos": [ + 1326, + 684 + ], + "size": { + "0": 315, + "1": 106 + }, + "flags": {}, + "order": 0, + "mode": 0, + "outputs": [ + { + "name": "BIZYAIR_CLIP", + "type": "BIZYAIR_CLIP", + "links": [ + 56 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_DualCLIPLoader" + }, + "widgets_values": [ + "t5xxl_fp16.safetensors", + "clip_l.safetensors", + "flux" + ] + }, + { + "id": 48, + "type": "BizyAir_UNETLoader", + "pos": [ + 1326, + 534 + ], + "size": { + "0": 315, + "1": 82 + }, + "flags": {}, + "order": 1, + "mode": 0, + "outputs": [ + { + "name": "BIZYAIR_MODEL", + "type": "BIZYAIR_MODEL", + "links": [ + 59, + 72 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_UNETLoader" + }, + "widgets_values": [ + "flux/flux1-dev.sft", + "default" + ] + }, + { + "id": 60, + "type": "BizyAir_KSamplerSelect", + "pos": [ + 1340, + 419 + ], + "size": { + "0": 259.05419921875, + "1": 58 + }, + "flags": {}, + "order": 2, + "mode": 0, + "outputs": [ + { + "name": "SAMPLER", + "type": "SAMPLER", + "links": [ + 75 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_KSamplerSelect" + }, + "widgets_values": [ + "euler" + ] + }, + { + "id": 59, + "type": "BizyAir_RandomNoise", + "pos": [ + 1354, + 281 + ], + "size": { + "0": 244.64930725097656, + "1": 82 + }, + "flags": {}, + "order": 3, + "mode": 0, + "outputs": [ + { + "name": "NOISE", + "type": "NOISE", + "links": [ + 74 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_RandomNoise" + }, + "widgets_values": [ + 438153234912084, + "fixed" + ] + }, + { + "id": 55, + "type": "BizyAir_VAELoader", + "pos": [ + 1369, + 164 + ], + "size": { + "0": 273.7454833984375, + "1": 58 + }, + "flags": {}, + "order": 4, + "mode": 0, + "outputs": [ + { + "name": "vae", + "type": "BIZYAIR_VAE", + "links": [ + 67, + 82 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_VAELoader" + }, + "widgets_values": [ + "flux/ae.sft" + ] + }, + { + "id": 66, + "type": "BizyAir_VAEEncode", + "pos": [ + 1801, + 331 + ], + "size": { + "0": 254.9784393310547, + "1": 46 + }, + "flags": {}, + "order": 7, + "mode": 0, + "inputs": [ + { + "name": "pixels", + "type": "IMAGE", + "link": 81 + }, + { + "name": "vae", + "type": "BIZYAIR_VAE", + "link": 82 + } + ], + "outputs": [ + { + "name": "LATENT", + "type": "LATENT", + "links": [ + 83 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAir_VAEEncode" + } + }, + { + "id": 56, + "type": "PreviewImage", + "pos": [ + 2919, + 144 + ], + "size": { + "0": 386.4439392089844, + "1": 402.4732971191406 + }, + "flags": {}, + "order": 13, + "mode": 0, + "inputs": [ + { + "name": "images", + "type": "IMAGE", + "link": 68 + } + ], + "properties": { + "Node name for S&R": "PreviewImage" + } + }, + { + "id": 65, + "type": "LoadImage", + "pos": [ + 2495, + 126 + ], + "size": { + "0": 397.6864318847656, + "1": 430.62481689453125 + }, + "flags": {}, + "order": 5, + "mode": 0, + "outputs": [ + { + "name": "IMAGE", + "type": "IMAGE", + "links": [ + 81, + 88 + ], + "shape": 3, + "slot_index": 0 + }, + { + "name": "MASK", + "type": "MASK", + "links": null, + "shape": 3 + } + ], + "properties": { + "Node name for S&R": "LoadImage" + }, + "widgets_values": [ + "example.png", + "image" + ] + }, + { + "id": 69, + "type": "BizyAirJoyCaption", + "pos": [ + 1741, + 458 + ], + "size": { + "0": 329.83148193359375, + "1": 279.5472717285156 + }, + "flags": {}, + "order": 8, + "mode": 0, + "inputs": [ + { + "name": "image", + "type": "IMAGE", + "link": 88 + } + ], + "outputs": [ + { + "name": "STRING", + "type": "STRING", + "links": [ + 89 + ], + "shape": 3, + "slot_index": 0 + } + ], + "properties": { + "Node name for S&R": "BizyAirJoyCaption" + }, + "widgets_values": [ + "enable", + 0.5, + 256 + ] + } + ], + "links": [ + [ + 56, + 36, + 0, + 37, + 0, + "BIZYAIR_CLIP" + ], + [ + 59, + 48, + 0, + 47, + 0, + "BIZYAIR_MODEL" + ], + [ + 60, + 37, + 0, + 47, + 1, + "BIZYAIR_CONDITIONING" + ], + [ + 61, + 47, + 0, + 50, + 1, + "GUIDER" + ], + [ + 66, + 50, + 0, + 54, + 0, + "LATENT" + ], + [ + 67, + 55, + 0, + 54, + 1, + "BIZYAIR_VAE" + ], + [ + 68, + 54, + 0, + 56, + 0, + "IMAGE" + ], + [ + 72, + 48, + 0, + 58, + 0, + "BIZYAIR_MODEL" + ], + [ + 73, + 58, + 0, + 50, + 3, + "SIGMAS" + ], + [ + 74, + 59, + 0, + 50, + 0, + "NOISE" + ], + [ + 75, + 60, + 0, + 50, + 2, + "SAMPLER" + ], + [ + 81, + 65, + 0, + 66, + 0, + "IMAGE" + ], + [ + 82, + 55, + 0, + 66, + 1, + "BIZYAIR_VAE" + ], + [ + 83, + 66, + 0, + 50, + 4, + "LATENT" + ], + [ + 88, + 65, + 0, + 69, + 0, + "IMAGE" + ], + [ + 89, + 69, + 0, + 37, + 1, + "STRING" + ] + ], + "groups": [], + "config": {}, + "extra": { + "ds": { + "scale": 0.7513148009015777, + "offset": [ + -621.5853896595721, + 217.98403398673898 + ] + } + }, + "version": 0.4 +} \ No newline at end of file diff --git a/js/image_caption.js b/js/image_caption.js index 4d295bd6..221011a9 100644 --- a/js/image_caption.js +++ b/js/image_caption.js @@ -4,7 +4,7 @@ import { ComfyWidgets } from "../../scripts/widgets.js"; app.registerExtension({ name: "bizyair.image.to.caption", async beforeRegisterNodeDef(nodeType, nodeData, app) { - if (nodeData.name === "BizyAirImageCaption") { + if (nodeData.name === "BizyAirImageCaption" || nodeData.name === "BizyAirJoyCaption") { function populate(text) { if (this.widgets) { const pos = this.widgets.findIndex((w) => w.name === "showtext"); diff --git a/llm.py b/llm.py index 6f43e9d3..bee9a1b9 100644 --- a/llm.py +++ b/llm.py @@ -8,6 +8,7 @@ get_api_key, ) +from bizyair.image_utils import encode_data, decode_data from .utils import get_llm_response BIZYAIR_SERVER_ADDRESS = os.getenv( @@ -135,11 +136,99 @@ def detailed_caption( return {"ui": {"text": (caption,)}, "result": (caption,)} +class BizyAirJoyCaption: + # refer to: https://huggingface.co/spaces/fancyfeast/joy-caption-pre-alpha + API_URL = f"{BIZYAIR_SERVER_ADDRESS}/supernode/joycaption" + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + "do_sample": (["enable", "disable"],), + "temperature": ( + "FLOAT", + { + "default": 0.5, + "min": 0.0, + "max": 2.0, + "step": 0.01, + "round": 0.001, + "display": "number", + }, + ), + "max_tokens": ( + "INT", + { + "default": 256, + "min": 16, + "max": 512, + "step": 16, + "display": "number", + }, + ), + } + } + + RETURN_TYPES = ("STRING",) + FUNCTION = "joycaption" + + CATEGORY = "☁️BizyAir/AI Assistants" + + def joycaption(self, image, do_sample, temperature, max_tokens): + API_KEY = get_api_key() + SIZE_LIMIT = 1536 + device = image.device + _, w, h, c = image.shape + assert ( + w <= SIZE_LIMIT and h <= SIZE_LIMIT + ), f"width and height must be less than {SIZE_LIMIT}x{SIZE_LIMIT}, but got {w} and {h}" + + payload = { + "image": None, + "do_sample": do_sample == "enable", + "temperature": temperature, + "max_new_tokens": max_tokens, + } + auth = f"Bearer {API_KEY}" + headers = { + "accept": "application/json", + "content-type": "application/json", + "authorization": auth, + } + input_image = encode_data(image, disable_image_marker=True) + payload["image"] = input_image + + ret: str = send_post_request(self.API_URL, payload=payload, headers=headers) + ret = json.loads(ret) + + try: + if "result" in ret: + ret = json.loads(ret["result"]) + except Exception as e: + raise Exception(f"Unexpected response: {ret}") + + if ret["status"] == "error": + raise Exception(ret["message"]) + + msg = ret["data"] + if msg["type"] not in ( + "comfyair", + "bizyair", + ): + raise Exception(f"Unexpected response type: {msg}") + + caption = msg["data"] + return {"ui": {"text": (caption,)}, "result": (caption,)} + + NODE_CLASS_MAPPINGS = { "BizyAirSiliconCloudLLMAPI": SiliconCloudLLMAPI, "BizyAirImageCaption": BizyAirImageCaption, + "BizyAirJoyCaption": BizyAirJoyCaption, } NODE_DISPLAY_NAME_MAPPINGS = { "BizyAirSiliconCloudLLMAPI": "☁️BizyAir SiliconCloud LLM API", "BizyAirImageCaption": "☁️BizyAir Image Caption", + "BizyAirJoyCaption": "☁️BizyAir Joy Caption", }