0.4.0 (#33)

* Bug Fixes + text appending * output names + validity icon changes * Bump version to 0.3.2 * Fixed the node validity check * Fix node validity again, add new nodes * Added Overlay Images node and BoundedIntegerInput (#32) * Add number picker to slider input * Transparency related features * crash fixes * Add update check on startup * correct small mistake * Add error handler message box w/ issue submit * Set NODE_ENV to production for building * Fix overlay node bug * Ignore single-color alpha, fix overflow bug Co-authored-by: theflyingzamboni <[email protected]>
chaiNNer-org · Mar 18, 2022 · 96dce84 · 96dce84
1 parent 7f3b419
commit 96dce84
Show file tree

Hide file tree

Showing 21 changed files with 1,005 additions and 146 deletions.
diff --git a/backend/nodes/image_nodes.py b/backend/nodes/image_nodes.py
diff --git a/backend/nodes/ncnn_nodes.py b/backend/nodes/ncnn_nodes.py
@@ -95,44 +95,94 @@ def __init__(self):
         self.icon = "NCNN"
         self.sub = "NCNN"
 
-    def run(self, net_tuple: tuple, img: np.ndarray) -> np.ndarray:
+    def upscale(self, img: np.ndarray, net: tuple, input_name: str, output_name: str):
         dtype_max = 1
         try:
             dtype_max = np.iinfo(img.dtype).max
         except:
-            logger.info("img dtype is not an int")
+            logger.debug("img dtype is not an int")
 
         img = (img.astype("float32") / dtype_max * 255).astype(
             np.uint8
         )  # don't ask lol
 
-        # ncnn only supports 3 apparently
-        in_nc = 3
-        gray = False
-        if img.ndim == 2:
-            gray = True
-            logger.warn("Expanding image channels")
-            img = np.tile(np.expand_dims(img, axis=2), (1, 1, min(in_nc, 3)))
-        # Remove extra channels if too many (i.e three channel image, single channel model)
-        elif img.shape[2] > in_nc:
-            logger.warn("Truncating image channels")
-            img = img[:, :, :in_nc]
-        # Pad with solid alpha channel if needed (i.e three channel image, four channel model)
-        elif img.shape[2] == 3 and in_nc == 4:
-            logger.warn("Expanding image channels")
-            img = np.dstack((img, np.full(img.shape[:-1], 1.0)))
-
-        param_path, bin_path, input_name, output_name, net = net_tuple
-
         # Try/except block to catch errors
         try:
+            vkdev = ncnn.get_gpu_device(0)
+            blob_vkallocator = ncnn.VkBlobAllocator(vkdev)
+            staging_vkallocator = ncnn.VkStagingAllocator(vkdev)
             output, _ = ncnn_auto_split_process(
-                img, net, input_name=input_name, output_name=output_name
+                img,
+                net,
+                input_name=input_name,
+                output_name=output_name,
+                blob_vkallocator=blob_vkallocator,
+                staging_vkallocator=staging_vkallocator,
             )
+            # blob_vkallocator.clear() # this slows stuff down
+            # staging_vkallocator.clear() # as does this
             # net.clear() # don't do this, it makes chaining break
-            if gray:
-                output = np.average(output, axis=2)
-            return np.clip(output.astype(np.float32) / 255, 0, 1)
+            return output
         except Exception as e:
             logger.error(e)
             raise RuntimeError("An unexpected error occurred during NCNN processing.")
+
+    def run(self, net_tuple: tuple, img: np.ndarray) -> np.ndarray:
+
+        h, w = img.shape[:2]
+        c = img.shape[2] if len(img.shape) > 2 else 1
+
+        param_path, bin_path, input_name, output_name, net = net_tuple
+
+        # ncnn only supports 3 apparently
+        in_nc = 3
+
+        # TODO: This can prob just be a shared function tbh
+        # Transparency hack (white/black background difference alpha)
+        if in_nc == 3 and c == 4:
+            # Ignore single-color alpha
+            unique = np.unique(img[:, :, 3])
+            if len(unique) == 1:
+                logger.info("Single color alpha channel, ignoring.")
+                output = self.upscale(img[:, :, :3], net, input_name, output_name)
+                output = np.dstack(
+                    (output, np.full(output.shape[:-1], (unique[0] * 255)))
+                )
+                output = np.clip(output.astype(np.float32) / 255, 0, 1)
+            else:
+                img1 = np.copy(img[:, :, :3])
+                img2 = np.copy(img[:, :, :3])
+                for c in range(3):
+                    img1[:, :, c] *= img[:, :, 3]
+                    img2[:, :, c] = (img2[:, :, c] - 1) * img[:, :, 3] + 1
+
+                output1 = self.upscale(img1, net, input_name, output_name)
+                output2 = self.upscale(img2, net, input_name, output_name)
+                output1 = np.clip(output1.astype(np.float32) / 255, 0, 1)
+                output2 = np.clip(output2.astype(np.float32) / 255, 0, 1)
+                alpha = 1 - np.mean(output2 - output1, axis=2)
+                output = np.dstack((output1, alpha))
+        else:
+            gray = False
+            if img.ndim == 2:
+                gray = True
+                logger.debug("Expanding image channels")
+                img = np.tile(np.expand_dims(img, axis=2), (1, 1, min(in_nc, 3)))
+            # Remove extra channels if too many (i.e three channel image, single channel model)
+            elif img.shape[2] > in_nc:
+                logger.warn("Truncating image channels")
+                img = img[:, :, :in_nc]
+            # Pad with solid alpha channel if needed (i.e three channel image, four channel model)
+            elif img.shape[2] == 3 and in_nc == 4:
+                logger.debug("Expanding image channels")
+                img = np.dstack((img, np.full(img.shape[:-1], 1.0)))
+            output = self.upscale(img, net, input_name, output_name)
+
+            if gray:
+                output = np.average(output, axis=2)
+
+            output = output.astype(np.float32) / 255
+
+        output = np.clip(output, 0, 1)
+
+        return output
diff --git a/backend/nodes/properties/inputs/generic_inputs.py b/backend/nodes/properties/inputs/generic_inputs.py
@@ -1,21 +1,26 @@
 from typing import Dict, List
 
 
-def DropDownInput(input_type: str, label: str, options: List[str]) -> Dict:
+def DropDownInput(
+    input_type: str, label: str, options: List[str], optional: bool = False
+) -> Dict:
     """Input for a dropdown"""
     return {
         "type": f"dropdown::{input_type}",
         "label": label,
         "options": options,
+        "optional": optional,
     }
 
 
-def TextInput(label: str) -> Dict:
+def TextInput(label: str, has_handle=True, max_length=None, optional=False) -> Dict:
     """Input for arbitrary text"""
     return {
         "type": "text::any",
         "label": label,
-        "hasHandle": True,
+        "hasHandle": has_handle,
+        "maxLength": max_length,
+        "optional": optional,
     }
 
 
@@ -54,6 +59,25 @@ def OddIntegerInput(label: str) -> Dict:
     }
 
 
+def BoundedIntegerInput(
+    label: str,
+    minimum: int = 0,
+    maximum: int = 100,
+    default: int = 50,
+    optional: bool = False,
+) -> Dict:
+    """Bounded input for integer number"""
+    return {
+        "type": "number::integer",
+        "label": label,
+        "min": minimum,
+        "max": maximum,
+        "def": default,
+        "hasHandle": True,
+        "optional": optional,
+    }
+
+
 def BoundlessIntegerInput(label: str) -> Dict:
     """Input for integer number"""
     return {
@@ -66,14 +90,17 @@ def BoundlessIntegerInput(label: str) -> Dict:
     }
 
 
-def SliderInput(label: str, min: int, max: int, default: int) -> Dict:
+def SliderInput(
+    label: str, min: int, max: int, default: int, optional: bool = False
+) -> Dict:
     """Input for integer number via slider"""
     return {
         "type": "number::slider",
         "label": label,
         "min": min,
         "max": max,
         "def": default,
+        "optional": optional,
     }
 
 
@@ -133,4 +160,5 @@ def StackOrientationDropdown() -> Dict:
                 "value": "vertical",
             },
         ],
+        optional=True,
     )
diff --git a/backend/nodes/properties/outputs/generic_outputs.py b/backend/nodes/properties/outputs/generic_outputs.py
@@ -15,3 +15,12 @@ def IntegerOutput(label: str) -> Dict:
         "type": "number::integer",
         "label": label,
     }
+
+
+def TextOutput(label: str) -> Dict:
+    """Output for arbitrary text"""
+    return {
+        "type": "text::any",
+        "label": label,
+        "hasHandle": True,
+    }
diff --git a/backend/nodes/pytorch_nodes.py b/backend/nodes/pytorch_nodes.py
@@ -63,7 +63,7 @@ def __init__(self):
         super().__init__()
         self.description = "Load PyTorch state dict file (.pth) into an auto-detected supported model architecture. Supports most variations of the RRDB architecture (ESRGAN, Real-ESRGAN, RealSR, BSRGAN, SPSR) and Real-ESRGAN's SRVGG architecture."
         self.inputs = [PthFileInput()]
-        self.outputs = [ModelOutput()]
+        self.outputs = [ModelOutput(), TextOutput("Model Name")]
 
         self.icon = "PyTorch"
         self.sub = "Input & Output"
@@ -82,7 +82,9 @@ def run(self, path: str) -> Any:
         model.eval()
         model = model.to(torch.device(os.environ["device"]))
 
-        return model
+        basename = os.path.splitext(os.path.basename(path))[0]
+
+        return model, basename
 
 
 @NodeFactory.register("PyTorch", "Upscale Image")
@@ -100,9 +102,32 @@ def __init__(self):
         self.icon = "PyTorch"
         self.sub = "Processing"
 
+    def upscale(self, img: np.ndarray, model: torch.nn.Module, scale: int):
+        # Borrowed from iNNfer
+        logger.info("Converting image to tensor")
+        img_tensor = np2tensor(img, change_range=True)
+        if os.environ["isFp16"] == "True":
+            model = model.half()
+        logger.info("Upscaling image")
+        t_out, _ = auto_split_process(
+            img_tensor,
+            model,
+            scale,
+        )
+        del img_tensor, model
+        logger.info("Converting tensor to image")
+        img_out = tensor2np(t_out.detach(), change_range=False, imtype=np.float32)
+        logger.info("Done upscaling")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        del t_out
+        return img_out
+
     def run(self, model: torch.nn.Module, img: np.ndarray) -> np.ndarray:
         """Upscales an image with a pretrained model"""
 
+        torch.load
+
         check_env()
 
         logger.info(f"Upscaling image...")
@@ -128,46 +153,49 @@ def run(self, model: torch.nn.Module, img: np.ndarray) -> np.ndarray:
         # The frontend should type-validate this enough where it shouldn't be needed,
         # But I want to be extra safe
 
-        # # Add extra channels if not enough (i.e single channel img, three channel model)
-        gray = False
-        if img.ndim == 2:
-            gray = True
-            logger.warn("Expanding image channels")
-            img = np.tile(np.expand_dims(img, axis=2), (1, 1, min(in_nc, 3)))
-        # Remove extra channels if too many (i.e three channel image, single channel model)
-        elif img.shape[2] > in_nc:
-            logger.warn("Truncating image channels")
-            img = img[:, :, :in_nc]
-        # Pad with solid alpha channel if needed (i.e three channel image, four channel model)
-        elif img.shape[2] == 3 and in_nc == 4:
-            logger.warn("Expanding image channels")
-            img = np.dstack((img, np.full(img.shape[:-1], 1.0)))
+        # Transparency hack (white/black background difference alpha)
+        if in_nc == 3 and c == 4:
+            # Ignore single-color alpha
+            unique = np.unique(img[:, :, 3])
+            if len(unique) == 1:
+                logger.info("Single color alpha channel, ignoring.")
+                output = self.upscale(img[:, :, :3], model, model.scale)
+                output = np.dstack((output, np.full(output.shape[:-1], unique[0])))
+            else:
+                img1 = np.copy(img[:, :, :3])
+                img2 = np.copy(img[:, :, :3])
+                for c in range(3):
+                    img1[:, :, c] *= img[:, :, 3]
+                    img2[:, :, c] = (img2[:, :, c] - 1) * img[:, :, 3] + 1
+
+                output1 = self.upscale(img1, model, model.scale)
+                output2 = self.upscale(img2, model, model.scale)
+                alpha = 1 - np.mean(output2 - output1, axis=2)
+                output = np.dstack((output1, alpha))
+        else:
+            # # Add extra channels if not enough (i.e single channel img, three channel model)
+            gray = False
+            if img.ndim == 2:
+                gray = True
+                logger.debug("Expanding image channels")
+                img = np.tile(np.expand_dims(img, axis=2), (1, 1, min(in_nc, 3)))
+            # Remove extra channels if too many (i.e three channel image, single channel model)
+            elif img.shape[2] > in_nc:
+                logger.warn("Truncating image channels")
+                img = img[:, :, :in_nc]
+            # Pad with solid alpha channel if needed (i.e three channel image, four channel model)
+            elif img.shape[2] == 3 and in_nc == 4:
+                logger.debug("Expanding image channels")
+                img = np.dstack((img, np.full(img.shape[:-1], 1.0)))
 
-        # Borrowed from iNNfer
-        logger.info("Converting image to tensor")
-        img_tensor = np2tensor(img, change_range=True)
-        if os.environ["isFp16"] == "True":
-            model = model.half()
-        logger.info("Upscaling image")
-        t_out, _ = auto_split_process(
-            img_tensor,
-            model,
-            scale,
-        )
-        del img_tensor, model
-        logger.info("Converting tensor to image")
-        img_out = tensor2np(t_out.detach(), change_range=False, imtype=np.float32)
-        logger.info("Done upscaling")
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        del t_out
+            output = self.upscale(img, model, model.scale)
 
-        if gray:
-            img_out = np.average(img_out, axis=2).astype("float32")
+            if gray:
+                output = np.average(output, axis=2).astype("float32")
 
-        img_out = np.clip(img_out, 0, 1)
+        output = np.clip(output, 0, 1)
 
-        return img_out
+        return output
 
 
 @NodeFactory.register("PyTorch", "Interpolate Models")
@@ -247,17 +275,17 @@ def __init__(self):
         """Constructor"""
         super().__init__()
         self.description = "Save a PyTorch model to specified directory."
-        self.inputs = [StateDictInput(), DirectoryInput(), TextInput("Model Name")]
+        self.inputs = [ModelInput(), DirectoryInput(), TextInput("Model Name")]
         self.outputs = []
 
         self.icon = "PyTorch"
         self.sub = "Input & Output"
 
-    def run(self, model: OrderedDict(), directory: str, name: str) -> bool:
+    def run(self, model: torch.nn.Module, directory: str, name: str) -> bool:
         fullFile = f"{name}.pth"
         fullPath = os.path.join(directory, fullFile)
         logger.info(f"Writing model to path: {fullPath}")
-        status = torch.save(model, fullPath)
+        status = torch.save(model.state, fullPath)
 
         return status