From 1388f851e15ed8b8e8ab2598e7cda6cf594e58dd Mon Sep 17 00:00:00 2001
From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com>
Date: Mon, 5 Aug 2024 12:32:45 +0530
Subject: [PATCH] initial: Implement PBR Maps node

A node to generate normal, roughness and displacement maps from a single image. Based on: https://github.com/joeyballentine/Material-Map-Generator
---
 invokeai/app/invocations/pbr_maps.py          |  57 +++
 .../image_util/pbr_maps/architecture/block.py | 367 ++++++++++++++++++
 .../pbr_maps/architecture/pbr_rrdb_net.py     |  70 ++++
 .../backend/image_util/pbr_maps/pbr_maps.py   | 104 +++++
 .../image_util/pbr_maps/utils/image_ops.py    |  93 +++++
 5 files changed, 691 insertions(+)
 create mode 100644 invokeai/app/invocations/pbr_maps.py
 create mode 100644 invokeai/backend/image_util/pbr_maps/architecture/block.py
 create mode 100644 invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py
 create mode 100644 invokeai/backend/image_util/pbr_maps/pbr_maps.py
 create mode 100644 invokeai/backend/image_util/pbr_maps/utils/image_ops.py

diff --git a/invokeai/app/invocations/pbr_maps.py b/invokeai/app/invocations/pbr_maps.py
new file mode 100644
index 00000000000..7475c96e4c5
--- /dev/null
+++ b/invokeai/app/invocations/pbr_maps.py
@@ -0,0 +1,57 @@
+import pathlib
+from typing import Literal
+
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
+from invokeai.app.invocations.fields import ImageField, InputField, OutputField
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
+from invokeai.backend.image_util.pbr_maps.pbr_maps import NORMAL_MAP_MODEL, OTHER_MAP_MODEL, PBRMapsGenerator
+from invokeai.backend.util.devices import TorchDevice
+
+
+@invocation_output("pbr_maps-output")
+class PBRMapsOutput(BaseInvocationOutput):
+    normal_map: ImageField = OutputField(default=None, description="The generated normal map")
+    roughness_map: ImageField = OutputField(default=None, description="The generated roughness map")
+    displacement_map: ImageField = OutputField(default=None, description="The generated displacement map")
+
+
+@invocation("pbr_maps", title="PBR Maps", tags=["image", "material"], category="image", version="1.0.0")
+class PBRMapsInvocation(BaseInvocation):
+    """Generate Normal, Displacement and Roughness Map from a given image"""
+
+    image: ImageField = InputField(default=None, description="Input image")
+    tile_size: int = InputField(default=512, description="Tile size")
+    border_mode: Literal["none", "seamless", "mirror", "replicate"] = InputField(
+        default="none", description="Border mode to apply to eliminate any artifacts or seams"
+    )
+
+    def invoke(self, context: InvocationContext) -> PBRMapsOutput:
+        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
+
+        def loader(model_path: pathlib.Path):
+            return PBRMapsGenerator.load_model(model_path, TorchDevice.choose_torch_device())
+
+        with (
+            context.models.load_remote_model(NORMAL_MAP_MODEL, loader) as normal_map_model,
+            context.models.load_remote_model(OTHER_MAP_MODEL, loader) as other_map_model,
+        ):
+            assert isinstance(normal_map_model, PBR_RRDB_Net)
+            assert isinstance(other_map_model, PBR_RRDB_Net)
+            pbr_pipeline = PBRMapsGenerator(normal_map_model, other_map_model, TorchDevice.choose_torch_device())
+            normal_map, roughness_map, displacement_map = pbr_pipeline.generate_maps(
+                image_pil, self.tile_size, self.border_mode
+            )
+
+            normal_map = context.images.save(normal_map)
+            normal_map_field = ImageField(image_name=normal_map.image_name)
+
+            roughness_map = context.images.save(roughness_map)
+            roughness_map_field = ImageField(image_name=roughness_map.image_name)
+
+            displacement_map = context.images.save(displacement_map)
+            displacement_map_map_field = ImageField(image_name=displacement_map.image_name)
+
+        return PBRMapsOutput(
+            normal_map=normal_map_field, roughness_map=roughness_map_field, displacement_map=displacement_map_map_field
+        )
diff --git a/invokeai/backend/image_util/pbr_maps/architecture/block.py b/invokeai/backend/image_util/pbr_maps/architecture/block.py
new file mode 100644
index 00000000000..6c066c7a310
--- /dev/null
+++ b/invokeai/backend/image_util/pbr_maps/architecture/block.py
@@ -0,0 +1,367 @@
+# Original: https://github.com/joeyballentine/Material-Map-Generator
+# Adopted and optimized for Invoke AI
+
+from collections import OrderedDict
+from typing import Any, List, Literal, Optional
+
+import torch
+import torch.nn as nn
+
+ACTIVATION_LAYER_TYPE = Literal["relu", "leakyrelu", "prelu"]
+NORMALIZATION_LAYER_TYPE = Literal["batch", "instance"]
+PADDING_LAYER_TYPE = Literal["zero", "reflect", "replicate"]
+BLOCK_MODE = Literal["CNA", "NAC", "CNAC"]
+UPCONV_BLOCK_MODE = Literal["nearest", "linear", "bilinear", "bicubic", "trilinear"]
+
+
+def act(act_type: ACTIVATION_LAYER_TYPE, inplace: bool = True, neg_slope: float = 0.2, n_prelu: int = 1):
+    """Helper to select Activation Layer"""
+    if act_type == "relu":
+        layer = nn.ReLU(inplace)
+    elif act_type == "leakyrelu":
+        layer = nn.LeakyReLU(neg_slope, inplace)
+    elif act_type == "prelu":
+        layer = nn.PReLU(num_parameters=n_prelu, init=neg_slope)
+    return layer
+
+
+def norm(norm_type: NORMALIZATION_LAYER_TYPE, nc: int):
+    """Helper to select Normalization Layer"""
+    if norm_type == "batch":
+        layer = nn.BatchNorm2d(nc, affine=True)
+    elif norm_type == "instance":
+        layer = nn.InstanceNorm2d(nc, affine=False)
+    return layer
+
+
+def pad(pad_type: PADDING_LAYER_TYPE, padding: int):
+    """Helper to select Padding Layer"""
+    if padding == 0 or pad_type == "zero":
+        return None
+    if pad_type == "reflect":
+        layer = nn.ReflectionPad2d(padding)
+    elif pad_type == "replicate":
+        layer = nn.ReplicationPad2d(padding)
+    return layer
+
+
+def get_valid_padding(kernel_size: int, dilation: int):
+    kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1)
+    padding = (kernel_size - 1) // 2
+    return padding
+
+
+def sequential(*args: Any):
+    # Flatten Sequential. It unwraps nn.Sequential.
+    if len(args) == 1:
+        if isinstance(args[0], OrderedDict):
+            raise NotImplementedError("sequential does not support OrderedDict input.")
+        return args[0]  # No sequential is needed.
+    modules: List[nn.Module] = []
+    for module in args:
+        if isinstance(module, nn.Sequential):
+            for submodule in module.children():
+                modules.append(submodule)
+        elif isinstance(module, nn.Module):
+            modules.append(module)
+    return nn.Sequential(*modules)
+
+
+def conv_block(
+    in_nc: int,
+    out_nc: int,
+    kernel_size: int,
+    stride: int = 1,
+    dilation: int = 1,
+    groups: int = 1,
+    bias: bool = True,
+    pad_type: Optional[PADDING_LAYER_TYPE] = "zero",
+    norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
+    act_type: Optional[ACTIVATION_LAYER_TYPE] = "relu",
+    mode: BLOCK_MODE = "CNA",
+):
+    """
+    Conv layer with padding, normalization, activation
+    mode: CNA --> Conv -> Norm -> Act
+        NAC --> Norm -> Act --> Conv (Identity Mappings in Deep Residual Networks, ECCV16)
+    """
+    assert mode in ["CNA", "NAC", "CNAC"], f"Wrong conv mode [{mode}]"
+    padding = get_valid_padding(kernel_size, dilation)
+    p = pad(pad_type, padding) if pad_type else None
+    padding = padding if pad_type == "zero" else 0
+
+    c = nn.Conv2d(
+        in_nc,
+        out_nc,
+        kernel_size=kernel_size,
+        stride=stride,
+        padding=padding,
+        dilation=dilation,
+        bias=bias,
+        groups=groups,
+    )
+    a = act(act_type) if act_type else None
+    match mode:
+        case "CNA":
+            n = norm(norm_type, out_nc) if norm_type else None
+            return sequential(p, c, n, a)
+        case "NAC":
+            if norm_type is None and act_type is not None:
+                a = act(act_type, inplace=False)
+            n = norm(norm_type, in_nc) if norm_type else None
+            return sequential(n, a, p, c)
+        case "CNAC":
+            n = norm(norm_type, in_nc) if norm_type else None
+            return sequential(n, a, p, c)
+
+
+class ConcatBlock(nn.Module):
+    # Concat the output of a submodule to its input
+    def __init__(self, submodule: nn.Module):
+        super(ConcatBlock, self).__init__()
+        self.sub = submodule
+
+    def forward(self, x: torch.Tensor):
+        output = torch.cat((x, self.sub(x)), dim=1)
+        return output
+
+    def __repr__(self):
+        tmpstr = "Identity .. \n|"
+        modstr = self.sub.__repr__().replace("\n", "\n|")
+        tmpstr = tmpstr + modstr
+        return tmpstr
+
+
+class ShortcutBlock(nn.Module):
+    # Elementwise sum the output of a submodule to its input
+    def __init__(self, submodule: nn.Module):
+        super(ShortcutBlock, self).__init__()
+        self.sub = submodule
+
+    def forward(self, x: torch.Tensor):
+        output = x + self.sub(x)
+        return output
+
+    def __repr__(self):
+        tmpstr = "Identity + \n|"
+        modstr = self.sub.__repr__().replace("\n", "\n|")
+        tmpstr = tmpstr + modstr
+        return tmpstr
+
+
+class ShortcutBlockSPSR(nn.Module):
+    # Elementwise sum the output of a submodule to its input
+    def __init__(self, submodule: nn.Module):
+        super(ShortcutBlockSPSR, self).__init__()
+        self.sub = submodule
+
+    def forward(self, x: torch.Tensor):
+        return x, self.sub
+
+    def __repr__(self):
+        tmpstr = "Identity + \n|"
+        modstr = self.sub.__repr__().replace("\n", "\n|")
+        tmpstr = tmpstr + modstr
+        return tmpstr
+
+
+class ResNetBlock(nn.Module):
+    """
+    ResNet Block, 3-3 style
+    with extra residual scaling used in EDSR
+    (Enhanced Deep Residual Networks for Single Image Super-Resolution, CVPRW 17)
+    """
+
+    def __init__(
+        self,
+        in_nc: int,
+        mid_nc: int,
+        out_nc: int,
+        kernel_size: int = 3,
+        stride: int = 1,
+        dilation: int = 1,
+        groups: int = 1,
+        bias: bool = True,
+        pad_type: PADDING_LAYER_TYPE = "zero",
+        norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
+        act_type: Optional[ACTIVATION_LAYER_TYPE] = "relu",
+        mode: BLOCK_MODE = "CNA",
+        res_scale: int = 1,
+    ):
+        super(ResNetBlock, self).__init__()
+        conv0 = conv_block(
+            in_nc, mid_nc, kernel_size, stride, dilation, groups, bias, pad_type, norm_type, act_type, mode
+        )
+        if mode == "CNA":
+            act_type = None
+        if mode == "CNAC":  # Residual path: |-CNAC-|
+            act_type = None
+            norm_type = None
+        conv1 = conv_block(
+            mid_nc, out_nc, kernel_size, stride, dilation, groups, bias, pad_type, norm_type, act_type, mode
+        )
+
+        self.res = sequential(conv0, conv1)
+        self.res_scale = res_scale
+
+    def forward(self, x: torch.Tensor):
+        res = self.res(x).mul(self.res_scale)
+        return x + res
+
+
+class ResidualDenseBlock_5C(nn.Module):
+    """
+    Residual Dense Block
+    style: 5 convs
+    The core module of paper: (Residual Dense Network for Image Super-Resolution, CVPR 18)
+    """
+
+    def __init__(
+        self,
+        nc: int,
+        kernel_size: int = 3,
+        gc: int = 32,
+        stride: int = 1,
+        bias: bool = True,
+        pad_type: PADDING_LAYER_TYPE = "zero",
+        norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
+        act_type: ACTIVATION_LAYER_TYPE = "leakyrelu",
+        mode: BLOCK_MODE = "CNA",
+    ):
+        super(ResidualDenseBlock_5C, self).__init__()
+        # gc: growth channel, i.e. intermediate channels
+        self.conv1 = conv_block(
+            nc, gc, kernel_size, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=act_type, mode=mode
+        )
+        self.conv2 = conv_block(
+            nc + gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+        )
+        self.conv3 = conv_block(
+            nc + 2 * gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+        )
+        self.conv4 = conv_block(
+            nc + 3 * gc,
+            gc,
+            kernel_size,
+            stride,
+            bias=bias,
+            pad_type=pad_type,
+            norm_type=norm_type,
+            act_type=act_type,
+            mode=mode,
+        )
+        if mode == "CNA":
+            last_act = None
+        else:
+            last_act = act_type
+        self.conv5 = conv_block(
+            nc + 4 * gc, nc, 3, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=last_act, mode=mode
+        )
+
+    def forward(self, x: torch.Tensor):
+        x1 = self.conv1(x)
+        x2 = self.conv2(torch.cat((x, x1), 1))
+        x3 = self.conv3(torch.cat((x, x1, x2), 1))
+        x4 = self.conv4(torch.cat((x, x1, x2, x3), 1))
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        return x5.mul(0.2) + x
+
+
+class RRDB(nn.Module):
+    """
+    Residual in Residual Dense Block
+    (ESRGAN: Enhanced Super-Resolution Generative Adversarial Networks)
+    """
+
+    def __init__(
+        self,
+        nc: int,
+        kernel_size: int = 3,
+        gc: int = 32,
+        stride: int = 1,
+        bias: bool = True,
+        pad_type: PADDING_LAYER_TYPE = "zero",
+        norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
+        act_type: ACTIVATION_LAYER_TYPE = "leakyrelu",
+        mode: BLOCK_MODE = "CNA",
+    ):
+        super(RRDB, self).__init__()
+        self.RDB1 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
+        self.RDB2 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
+        self.RDB3 = ResidualDenseBlock_5C(nc, kernel_size, gc, stride, bias, pad_type, norm_type, act_type, mode)
+
+    def forward(self, x: torch.Tensor):
+        out = self.RDB1(x)
+        out = self.RDB2(out)
+        out = self.RDB3(out)
+        return out.mul(0.2) + x
+
+
+# Upsampler
+def pixelshuffle_block(
+    in_nc: int,
+    out_nc: int,
+    upscale_factor: int = 2,
+    kernel_size: int = 3,
+    stride: int = 1,
+    bias: bool = True,
+    pad_type: PADDING_LAYER_TYPE = "zero",
+    norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
+    act_type: ACTIVATION_LAYER_TYPE = "relu",
+):
+    """
+    Pixel shuffle layer
+    (Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional
+    Neural Network, CVPR17)
+    """
+    conv = conv_block(
+        in_nc,
+        out_nc * (upscale_factor**2),
+        kernel_size,
+        stride,
+        bias=bias,
+        pad_type=pad_type,
+        norm_type=None,
+        act_type=None,
+    )
+    pixel_shuffle = nn.PixelShuffle(upscale_factor)
+
+    n = norm(norm_type, out_nc) if norm_type else None
+    a = act(act_type) if act_type else None
+    return sequential(conv, pixel_shuffle, n, a)
+
+
+def upconv_blcok(
+    in_nc: int,
+    out_nc: int,
+    upscale_factor: int = 2,
+    kernel_size: int = 3,
+    stride: int = 1,
+    bias: bool = True,
+    pad_type: PADDING_LAYER_TYPE = "zero",
+    norm_type: Optional[NORMALIZATION_LAYER_TYPE] = None,
+    act_type: ACTIVATION_LAYER_TYPE = "relu",
+    mode: UPCONV_BLOCK_MODE = "nearest",
+):
+    # Adopted from https://distill.pub/2016/deconv-checkerboard/
+    upsample = nn.Upsample(scale_factor=upscale_factor, mode=mode)
+    conv = conv_block(
+        in_nc, out_nc, kernel_size, stride, bias=bias, pad_type=pad_type, norm_type=norm_type, act_type=act_type
+    )
+    return sequential(upsample, conv)
diff --git a/invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py b/invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py
new file mode 100644
index 00000000000..2d8e443a25e
--- /dev/null
+++ b/invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py
@@ -0,0 +1,70 @@
+# Original: https://github.com/joeyballentine/Material-Map-Generator
+# Adopted and optimized for Invoke AI
+
+import math
+from typing import Literal, Optional
+
+import torch
+import torch.nn as nn
+
+import invokeai.backend.image_util.pbr_maps.architecture.block as B
+
+UPSCALE_MODE = Literal["upconv", "pixelshuffle"]
+
+
+class PBR_RRDB_Net(nn.Module):
+    def __init__(
+        self,
+        in_nc: int,
+        out_nc: int,
+        nf: int,
+        nb: int,
+        gc: int = 32,
+        upscale: int = 4,
+        norm_type: Optional[B.NORMALIZATION_LAYER_TYPE] = None,
+        act_type: B.ACTIVATION_LAYER_TYPE = "leakyrelu",
+        mode: B.BLOCK_MODE = "CNA",
+        res_scale: int = 1,
+        upsample_mode: UPSCALE_MODE = "upconv",
+    ):
+        super(PBR_RRDB_Net, self).__init__()
+        n_upscale = int(math.log(upscale, 2))
+        if upscale == 3:
+            n_upscale = 1
+
+        fea_conv = B.conv_block(in_nc, nf, kernel_size=3, norm_type=None, act_type=None)
+        rb_blocks = [
+            B.RRDB(
+                nf,
+                kernel_size=3,
+                gc=32,
+                stride=1,
+                bias=True,
+                pad_type="zero",
+                norm_type=norm_type,
+                act_type=act_type,
+                mode="CNA",
+            )
+            for _ in range(nb)
+        ]
+        LR_conv = B.conv_block(nf, nf, kernel_size=3, norm_type=norm_type, act_type=None, mode=mode)
+
+        if upsample_mode == "upconv":
+            upsample_block = B.upconv_blcok
+        elif upsample_mode == "pixelshuffle":
+            upsample_block = B.pixelshuffle_block
+
+        if upscale == 3:
+            upsampler = upsample_block(nf, nf, 3, act_type=act_type)
+        else:
+            upsampler = [upsample_block(nf, nf, act_type=act_type) for _ in range(n_upscale)]
+
+        HR_conv0 = B.conv_block(nf, nf, kernel_size=3, norm_type=None, act_type=act_type)
+        HR_conv1 = B.conv_block(nf, out_nc, kernel_size=3, norm_type=None, act_type=None)
+
+        self.model = B.sequential(
+            fea_conv, B.ShortcutBlock(B.sequential(*rb_blocks, LR_conv)), *upsampler, HR_conv0, HR_conv1
+        )
+
+    def forward(self, x: torch.Tensor):
+        return self.model(x)
diff --git a/invokeai/backend/image_util/pbr_maps/pbr_maps.py b/invokeai/backend/image_util/pbr_maps/pbr_maps.py
new file mode 100644
index 00000000000..fb1b09c8a58
--- /dev/null
+++ b/invokeai/backend/image_util/pbr_maps/pbr_maps.py
@@ -0,0 +1,104 @@
+# Original: https://github.com/joeyballentine/Material-Map-Generator
+# Adopted and optimized for Invoke AI
+
+import pathlib
+from typing import Any, Literal
+
+import cv2
+import numpy as np
+import numpy.typing as npt
+import torch
+from PIL import Image
+
+from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
+from invokeai.backend.image_util.pbr_maps.utils.image_ops import crop_seamless, esrgan_launcher_split_merge
+
+NORMAL_MAP_MODEL = "https://github.com/joeyballentine/Material-Map-Generator/blob/master/utils/models/1x_NormalMapGenerator-CX-Lite_200000_G.pth"
+OTHER_MAP_MODEL = "https://github.com/joeyballentine/Material-Map-Generator/blob/master/utils/models/1x_FrankenMapGenerator-CX-Lite_215000_G.pth"
+
+
+class PBRMapsGenerator:
+    def __init__(self, normal_map_model: PBR_RRDB_Net, other_map_model: PBR_RRDB_Net, device: torch.device) -> None:
+        self.normal_map_model = normal_map_model
+        self.other_map_model = other_map_model
+        self.device = device
+
+    @staticmethod
+    def load_model(model_path: pathlib.Path, device: torch.device) -> PBR_RRDB_Net:
+        state_dict = torch.load(model_path.as_posix(), map_location="cpu")
+
+        model = PBR_RRDB_Net(
+            3,
+            3,
+            32,
+            12,
+            gc=32,
+            upscale=1,
+            norm_type=None,
+            act_type="leakyrelu",
+            mode="CNA",
+            res_scale=1,
+            upsample_mode="upconv",
+        )
+
+        model.load_state_dict(state_dict, strict=False)
+        del state_dict
+        model.eval()
+
+        for _, v in model.named_parameters():
+            v.requires_grad = False
+
+        return model.to(device)
+
+    def process(self, img: npt.NDArray[Any], model: PBR_RRDB_Net):
+        img = img.astype(np.float32) / np.iinfo(img.dtype).max
+        img = img[..., ::-1].copy()
+        tensor_img = torch.tensor(img).permute(2, 0, 1).unsqueeze(0).to(self.device)
+
+        with torch.no_grad():
+            output = model(tensor_img).data.squeeze(0).float().cpu().clamp_(0, 1).numpy()
+            output = output[[2, 1, 0], :, :]
+            output = np.transpose(output, (1, 2, 0))
+            output = (output * 255.0).round()
+            return output
+
+    def _cv2_to_pil(self, image: npt.NDArray[Any]):
+        return Image.fromarray(cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2BGR))
+
+    def generate_maps(
+        self,
+        image: Image.Image,
+        tile_size: int = 512,
+        border_mode: Literal["none", "seamless", "mirror", "replicate"] = "none",
+    ):
+        models = [self.normal_map_model, self.other_map_model]
+        np_image = np.array(image).astype(np.uint8)
+
+        match border_mode:
+            case "seamless":
+                np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_WRAP)
+            case "mirror":
+                np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_REFLECT_101)
+            case "replicate":
+                np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_REPLICATE)
+            case "none":
+                pass
+
+        img_height, img_width = np_image.shape[:2]
+
+        # Checking whether to perform tiled inference
+        do_split = img_height > tile_size or img_width > tile_size
+
+        if do_split:
+            rlts = esrgan_launcher_split_merge(np_image, self.process, models, scale_factor=1, tile_size=tile_size)
+        else:
+            rlts = [self.process(np_image, model) for model in models]
+
+        if border_mode != "none":
+            rlts = [crop_seamless(rlt) for rlt in rlts]
+
+        normal_map = self._cv2_to_pil(rlts[0])
+        roughness = self._cv2_to_pil(rlts[1][:, :, 1])
+        displacement = self._cv2_to_pil(rlts[1][:, :, 0])
+
+        return normal_map, roughness, displacement
diff --git a/invokeai/backend/image_util/pbr_maps/utils/image_ops.py b/invokeai/backend/image_util/pbr_maps/utils/image_ops.py
new file mode 100644
index 00000000000..426620797cb
--- /dev/null
+++ b/invokeai/backend/image_util/pbr_maps/utils/image_ops.py
@@ -0,0 +1,93 @@
+# Original: https://github.com/joeyballentine/Material-Map-Generator
+# Adopted and optimized for Invoke AI
+
+import math
+from typing import Any, Callable, List
+
+import numpy as np
+import numpy.typing as npt
+
+from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
+
+
+def crop_seamless(img: npt.NDArray[Any]):
+    img_height, img_width = img.shape[:2]
+    y, x = 16, 16
+    h, w = img_height - 32, img_width - 32
+    img = img[y : y + h, x : x + w]
+    return img
+
+
+# from https://github.com/ata4/esrgan-launcher/blob/master/upscale.py
+def esrgan_launcher_split_merge(
+    input_image: npt.NDArray[Any],
+    upscale_function: Callable[[npt.NDArray[Any], PBR_RRDB_Net], npt.NDArray[Any]],
+    models: List[PBR_RRDB_Net],
+    scale_factor: int = 4,
+    tile_size: int = 512,
+    tile_padding: float = 0.125,
+):
+    width, height, depth = input_image.shape
+    output_width = width * scale_factor
+    output_height = height * scale_factor
+    output_shape = (output_width, output_height, depth)
+
+    # start with black image
+    output_images = [np.zeros(output_shape, np.uint8) for _ in range(len(models))]
+
+    tile_padding = math.ceil(tile_size * tile_padding)
+    tile_size = math.ceil(tile_size / scale_factor)
+
+    tiles_x = math.ceil(width / tile_size)
+    tiles_y = math.ceil(height / tile_size)
+
+    for y in range(tiles_y):
+        for x in range(tiles_x):
+            # extract tile from input image
+            ofs_x = x * tile_size
+            ofs_y = y * tile_size
+
+            # input tile area on total image
+            input_start_x = ofs_x
+            input_end_x = min(ofs_x + tile_size, width)
+
+            input_start_y = ofs_y
+            input_end_y = min(ofs_y + tile_size, height)
+
+            # input tile area on total image with padding
+            input_start_x_pad = max(input_start_x - tile_padding, 0)
+            input_end_x_pad = min(input_end_x + tile_padding, width)
+
+            input_start_y_pad = max(input_start_y - tile_padding, 0)
+            input_end_y_pad = min(input_end_y + tile_padding, height)
+
+            # input tile dimensions
+            input_tile_width = input_end_x - input_start_x
+            input_tile_height = input_end_y - input_start_y
+
+            input_tile = input_image[input_start_x_pad:input_end_x_pad, input_start_y_pad:input_end_y_pad]
+
+            for idx, model in enumerate(models):
+                # upscale tile
+                output_tile = upscale_function(input_tile, model)
+
+                # output tile area on total image
+                output_start_x = input_start_x * scale_factor
+                output_end_x = input_end_x * scale_factor
+
+                output_start_y = input_start_y * scale_factor
+                output_end_y = input_end_y * scale_factor
+
+                # output tile area without padding
+                output_start_x_tile = (input_start_x - input_start_x_pad) * scale_factor
+                output_end_x_tile = output_start_x_tile + input_tile_width * scale_factor
+
+                output_start_y_tile = (input_start_y - input_start_y_pad) * scale_factor
+                output_end_y_tile = output_start_y_tile + input_tile_height * scale_factor
+
+                # put tile into output image
+                output_images[idx][output_start_x:output_end_x, output_start_y:output_end_y] = output_tile[
+                    output_start_x_tile:output_end_x_tile, output_start_y_tile:output_end_y_tile
+                ]
+
+    return output_images