Use faiss scalar quantizer to reduce dimensionality

Wesxdz · Nov 1, 2022 · 3c5ae15 · 3c5ae15
1 parent 7edc9c2
commit 3c5ae15
Show file tree

Hide file tree

Showing 8 changed files with 186 additions and 73 deletions.
diff --git a/config.py b/config.py
@@ -1,8 +1,10 @@
+downsample = 4
 # C x H x W
-screen_size = (4, 360, 640)
+screen_size = (4, int(360/downsample), int(640/downsample))
+# screen_size = (128, 128)
 # TODO: The screen size is too large to fit in GPU memory and is probably overkill
 # Let's downscale the resolution of the input images and represent the four channels as one
 
-screen_input_size = 4 * 360 * 640
-voxel_grid_size = (64*64,)
-dataset_size = 1
+# screen_input_size = 4 * 360 * 640
+voxel_grid_size = (16*16,)
+dataset_size = 32
diff --git a/generate_dataset.py b/generate_dataset.py
@@ -26,7 +26,7 @@ def set_random_seed(self):
 
 pool_collection_name = "voxels"
 block_size = 0.0
-chunk_size = 16
+chunk_size = 8
 
 # TODO: Will need to create a distinct placer for each voxel type, or encode objects in vertex data
 placer = bpy.data.objects["placer"]
@@ -63,7 +63,7 @@ def spawn_occlusion_chunks(radius, noise, d):
     verts = []
     rgb = []
     colors = []
-    for y in range(0, radius*2):
+    for y in range(0, radius):
         for x in range(0, radius*2):
             chunk_origin = Vector((x*chunk_size, y*chunk_size, 0.0))
             spawn_voxel_occlusion_heightmap(noise, verts, colors, rgb, chunk_origin.x, chunk_origin.y, chunk_size, chunk_size, 1.0)
@@ -73,7 +73,7 @@ def spawn_occlusion_chunks(radius, noise, d):
     bpy.context.object.modifiers['GeometryNodes'].node_group = bpy.data.node_groups['Geometry Nodes']
     bpy.context.object.modifiers['GeometryNodes']['Input_2'] = bpy.data.objects['occlusion_voxel']
     bpy.ops.geometry.color_attribute_add(name="color", color=(1.0, 0, 0.0, 1))
-    player_floor = noise[chunk_size*radius][chunk_size*radius]
+    player_floor = noise[0][chunk_size*radius]
     ob.location.z = -player_floor*block_size
     ob.scale = Vector((block_size, block_size, block_size))
     for i, cd in enumerate(bpy.context.active_object.data.attributes['color'].data):
@@ -86,7 +86,7 @@ def spawn_occlusion_chunks(radius, noise, d):
 def spawn_chunks(radius, noise):
     terrain = []
     verts = []
-    for y in range(0, radius*2):
+    for y in range(0, radius):
         for x in range(0, radius*2):
             chunk_origin = Vector((x*chunk_size, y*chunk_size, 0.0))
             spawn_voxel_heightmap(noise, verts, chunk_origin.x, chunk_origin.y, chunk_size, chunk_size, 1.0)
@@ -108,16 +108,20 @@ def spawn_chunks(radius, noise):
         bpy.context.object.modifiers['GeometryNodes']['Input_2'] = bpy.data.objects[vt + '_voxel']
         ob.scale = Vector((block_size, block_size, block_size))
 
-        player_floor = noise[chunk_size*radius][chunk_size*radius]
+        player_floor = noise[0][chunk_size*radius]
         ob.location.z = -player_floor*block_size
         terrain.append(ob)
     bpy.data.objects["Camera"].location.x = chunk_size*radius*block_size
-    bpy.data.objects["Camera"].location.y = chunk_size*radius*block_size
+    bpy.data.objects["Camera"].location.y = 0
     return terrain
 
 bpy.context.scene.render.image_settings.color_depth = "16"
 bpy.context.scene.render.image_settings.compression = 0
 
+downsample = 4
+bpy.context.scene.render.resolution_x = int(640/downsample)
+bpy.context.scene.render.resolution_y = int(360/downsample)
+
 dataset_size = 32
 for d in range(dataset_size):
     # Recreate noise layers for random seeds!
@@ -134,7 +138,7 @@ def spawn_chunks(radius, noise):
     bpy.data.objects["Camera"].rotation_euler.x = math.radians(random.randint(45, 115))
     bpy.data.objects["Camera"].rotation_euler.z = math.radians(random.randint(-44, 44))
     # Spawn random terrain
-    radius = 2
+    radius = 1
     noise = np.zeros(shape=[chunk_size*radius*2, chunk_size*radius*2])
     for layer in noise_layers:
         opensimplex.seed(layer.seed)

diff --git a/pca.py b/pca.py
@@ -0,0 +1,11 @@
+import faiss
+import numpy as np
+
+# random training data 
+mt = np.random.rand(1, 4).astype('float32')
+mat = faiss.PCAMatrix (4, 2)
+mat.train(mt)
+assert mat.is_trained
+tr = mat.apply(mt)
+# print this to show that the magnitude of tr's columns is decreasing
+print(tr.shape)
diff --git a/scqu.py b/scqu.py
@@ -0,0 +1,34 @@
+import faiss
+import numpy as np
+from skimage import io, transform
+import os
+from einops import rearrange
+import sys
+
+d = 4  # data dimension
+dataset_size = 32
+
+# train set
+img = io.imread(os.path.join("data", "voxels_0.png"))
+xt = rearrange(img, 'h w c -> (h w) c').astype('float32')
+print(sys.getsizeof(xt))
+
+# QT_8bit allocates 8 bits per dimension (QT_4bit also works)
+sq = faiss.ScalarQuantizer(d, faiss.ScalarQuantizer.QT_4bit)
+sq.train(xt)
+
+# encode 
+codes = sq.compute_codes(xt)
+print(sys.getsizeof(codes))
+print(type(codes[0][1]))
+
+# decode
+x2 = sq.decode(codes)
+
+comp = rearrange(x2, '(h w) c -> h w c', h=90, w=160)
+io.imsave("test.png", comp)
+
+# compute reconstruction error
+# avg_relative_error = ((xt - x2)**2).sum() / (xt ** 2).sum()
+# print(codes)
+# print(avg_relative_error)
diff --git a/train.py b/train.py
@@ -6,17 +6,17 @@
 from halonet_pytorch import HaloAttention
 from voxel_dataset import VoxelViewDataset
 from config import *
-from math import prod
-# from einops import
+import numpy as np
+from einops import rearrange, reduce, repeat
 
 # Hyperparameters
 num_epochs = 80
-batch_size = 100
+batch_size = 1
 learning_rate = 0.001
 
 training_dataset = VoxelViewDataset(12, "data")
-dataloader = DataLoader(training_dataset, batch_size=4,
-                        shuffle=True, num_workers=3)
+dataloader = DataLoader(training_dataset, batch_size=1,
+                        shuffle=True, num_workers=1)
 
 for i_batch, sample_batched in enumerate(dataloader):
     print(sample_batched.keys())
@@ -31,50 +31,117 @@
 # based on ResNet architecture
 # https://arxiv.org/pdf/1512.03385.pdf
 
-class NeuralNetwork(nn.Module):
-    def __init__(self) -> None:
-        super(NeuralNetwork, self).__init__()
-        view_ch = prod(screen_size)
-        voxel_ch = prod(voxel_grid_size)
-        self.residual = nn.Sequential(
-            nn.Conv2d(view_ch, view_ch, (7, 7, ),
-            nn.Conv2d(view_ch, view_ch, (7, 7)),
-            nn.
-            nn.AvgPool2d(),
-            nn.Linear(view_ch, voxel_ch)
-        )
-
-
-# class NeuralNetwork(nn.Module):
-#     def __init__(self):
-#         super(NeuralNetwork, self).__init__()
-#         self.flatten = nn.Flatten()
-#         self.linear_relu_stack = nn.Sequential(
-#             HaloAttention(
-#                 dim = screen_input_size/4,
-#                 block_size = 8,
-#                 halo_size = 4,
-#                 dim_head = 64,
-#                 heads = 4
-#             ),
-#             nn.ReLU(),
-#             nn.Linear(screen_input_size/4, voxel_grid_size[0])
-#         )
-
-#     def forward(self, x):
-#         x = self.flatten(x)
-#         logits = self.linear_relu_stack(x)
-#         return logits
-
-# model = NeuralNetwork().to(device)
-# print(model)
-
-# print(model)
-
-
-# def get_reward(actual, prediction, rewards):
-#     reward = 0
-#     for i in len(actual):
-#         if actual[i] == prediction[i]:
-#             reward += rewards[i]
-#     return reward
+# https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/deep_residual_network/main.py
+
+def conv3x3(in_channels, out_channels, stride=1):
+    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
+
+view_channels = np.prod(screen_size)
+voxel_channels = np.prod(voxel_grid_size)
+
+# Residual block
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1, downsample=None) -> None:
+        super(ResidualBlock, self).__init__()
+        # TODO Replace convolutional layers with HaloAttention
+        self.conv1 = conv3x3(in_channels, out_channels, stride)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(out_channels, out_channels)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        self.downsample = downsample
+
+        def forward(self, x):
+            residual = x
+            out = self.conv1(x)
+            out = self.bn1(out)
+            out = self.relu(out)
+            out = self.conv2d(out)
+            out = self.bn2(out)
+            if self.downsample:
+                residual = self.downsample(x)
+            out += residual
+            out = self.relu(out)
+            return out
+
+class ResNet(nn.Module):
+    def __init__(self, block, layers) -> None:
+        super(ResNet, self).__init__()
+        self.in_channels = view_channels
+        self.conv = conv3x3(3, view_channels)
+        self.bn = nn.BatchNorm2d(view_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self.make_layer(block, view_channels, layers[0])
+        self.layer2 = self.make_layer(block, view_channels*2, layers[1], 2)
+        self.layer3 = self.make_layer(block, view_channels*4, layers[2], 2)
+        self.avg_pool = nn.AvgPool2d(view_channels/2)
+        self.fc = nn.Linear(view_channels*4, voxel_channels)
+
+    def make_layer(self, block, out_channels, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.in_channels != out_channels:
+            downsample = nn.Sequential(
+                conv3x3(self.in_channels, out_channels, stride=stride),
+                nn.BatchNorm2d(out_channels)
+            )
+        layers = []
+        layers.append(block(self.in_channels, out_channels, stride, downsample))
+        self.in_channels = out_channels
+        for i in range(1, blocks):
+            layers.append(block(out_channels, out_channels))
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.bn(out)
+        out = self.relu(out)
+        out = self.layer1(out)
+        out = self.layer2(out)
+        out = self.layer3(out)
+        out = self.avg_pool(out)
+        out = out.view(out.size(0), -1)
+        out = self.fc(out)
+        return out
+
+def get_reward(actual, prediction, rewards):
+    reward = 0
+    for i in len(actual):
+        if actual[i] == prediction[i]:
+            reward += rewards[i]
+    return reward
+
+model = ResNet(ResidualBlock, [2, 2, 2]).to(device)
+
+criterion = nn.CrossEntropyLoss()
+optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
+
+def update_lr(optimizer, lr):
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+total_step = len(training_dataset)
+curr_lr = learning_rate
+for epoch in range(num_epochs):
+    for i, sample in enumerate(training_dataset):
+        outputs = model()
+        loss = criterion(outputs, sample['grid'])
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        if (i+1) % 100 == 0:
+            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
+                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
+
+    # Decay learning rate
+    if (epoch+1) % 20 == 0:
+        curr_lr /= 3
+        update_lr(optimizer, curr_lr)
+
+# model.eval()
+# with torch.no_grad():
+    # for sample in test_dataset:
+
+
+torch.save(model.state_dict(), 'voxelsight.ckpt')
diff --git a/voxel.blend b/voxel.blend
diff --git a/voxel.blend1 b/voxel.blend1
diff --git a/voxel_dataset.py b/voxel_dataset.py
@@ -10,13 +10,12 @@
 import faiss
 from einops import rearrange
 from config import *
+import faiss
 
 # Ignore warnings
 import warnings
 warnings.filterwarnings("ignore")
 
-plt.ion()   # interactive mode
-
 class VoxelViewDataset(Dataset):
     """
     Monocular forward facing views of 64x64 voxel grid
@@ -34,14 +33,10 @@ def __len__(self):
 
     def get_camera_view(self, idx):
         img = io.imread(os.path.join(self.dir, "voxels_{}.png".format(idx)))
-        d = 4
-        cs = 1
-        colors = rearrange(img, 'w h c -> (w h) c')
-        print(colors.shape)
-        pq = faiss.ScalarQuantizer(d, cs)
-        pq.train(colors)
-        # codes = pq.compute_codes(colors)
-        # print(codes)
+        xt = rearrange(img, 'h w c -> (h w) c').astype('float32')
+        sq = faiss.ScalarQuantizer(screen_size[0], faiss.ScalarQuantizer.QT_4bit)
+        sq.train(xt)
+        codes = sq.compute_codes(xt)
         return torch.from_numpy(img)
 
     def get_voxel_grid(self, idx):