Skip to content

Commit

Permalink
Use faiss scalar quantizer to reduce dimensionality
Browse files Browse the repository at this point in the history
  • Loading branch information
Wesxdz committed Nov 1, 2022
1 parent 7edc9c2 commit 3c5ae15
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 73 deletions.
10 changes: 6 additions & 4 deletions config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
downsample = 4
# C x H x W
screen_size = (4, 360, 640)
screen_size = (4, int(360/downsample), int(640/downsample))
# screen_size = (128, 128)
# TODO: The screen size is too large to fit in GPU memory and is probably overkill
# Let's downscale the resolution of the input images and represent the four channels as one

screen_input_size = 4 * 360 * 640
voxel_grid_size = (64*64,)
dataset_size = 1
# screen_input_size = 4 * 360 * 640
voxel_grid_size = (16*16,)
dataset_size = 32
18 changes: 11 additions & 7 deletions generate_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def set_random_seed(self):

pool_collection_name = "voxels"
block_size = 0.0
chunk_size = 16
chunk_size = 8

# TODO: Will need to create a distinct placer for each voxel type, or encode objects in vertex data
placer = bpy.data.objects["placer"]
Expand Down Expand Up @@ -63,7 +63,7 @@ def spawn_occlusion_chunks(radius, noise, d):
verts = []
rgb = []
colors = []
for y in range(0, radius*2):
for y in range(0, radius):
for x in range(0, radius*2):
chunk_origin = Vector((x*chunk_size, y*chunk_size, 0.0))
spawn_voxel_occlusion_heightmap(noise, verts, colors, rgb, chunk_origin.x, chunk_origin.y, chunk_size, chunk_size, 1.0)
Expand All @@ -73,7 +73,7 @@ def spawn_occlusion_chunks(radius, noise, d):
bpy.context.object.modifiers['GeometryNodes'].node_group = bpy.data.node_groups['Geometry Nodes']
bpy.context.object.modifiers['GeometryNodes']['Input_2'] = bpy.data.objects['occlusion_voxel']
bpy.ops.geometry.color_attribute_add(name="color", color=(1.0, 0, 0.0, 1))
player_floor = noise[chunk_size*radius][chunk_size*radius]
player_floor = noise[0][chunk_size*radius]
ob.location.z = -player_floor*block_size
ob.scale = Vector((block_size, block_size, block_size))
for i, cd in enumerate(bpy.context.active_object.data.attributes['color'].data):
Expand All @@ -86,7 +86,7 @@ def spawn_occlusion_chunks(radius, noise, d):
def spawn_chunks(radius, noise):
terrain = []
verts = []
for y in range(0, radius*2):
for y in range(0, radius):
for x in range(0, radius*2):
chunk_origin = Vector((x*chunk_size, y*chunk_size, 0.0))
spawn_voxel_heightmap(noise, verts, chunk_origin.x, chunk_origin.y, chunk_size, chunk_size, 1.0)
Expand All @@ -108,16 +108,20 @@ def spawn_chunks(radius, noise):
bpy.context.object.modifiers['GeometryNodes']['Input_2'] = bpy.data.objects[vt + '_voxel']
ob.scale = Vector((block_size, block_size, block_size))

player_floor = noise[chunk_size*radius][chunk_size*radius]
player_floor = noise[0][chunk_size*radius]
ob.location.z = -player_floor*block_size
terrain.append(ob)
bpy.data.objects["Camera"].location.x = chunk_size*radius*block_size
bpy.data.objects["Camera"].location.y = chunk_size*radius*block_size
bpy.data.objects["Camera"].location.y = 0
return terrain

bpy.context.scene.render.image_settings.color_depth = "16"
bpy.context.scene.render.image_settings.compression = 0

downsample = 4
bpy.context.scene.render.resolution_x = int(640/downsample)
bpy.context.scene.render.resolution_y = int(360/downsample)

dataset_size = 32
for d in range(dataset_size):
# Recreate noise layers for random seeds!
Expand All @@ -134,7 +138,7 @@ def spawn_chunks(radius, noise):
bpy.data.objects["Camera"].rotation_euler.x = math.radians(random.randint(45, 115))
bpy.data.objects["Camera"].rotation_euler.z = math.radians(random.randint(-44, 44))
# Spawn random terrain
radius = 2
radius = 1
noise = np.zeros(shape=[chunk_size*radius*2, chunk_size*radius*2])
for layer in noise_layers:
opensimplex.seed(layer.seed)
Expand Down
11 changes: 11 additions & 0 deletions pca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import faiss
import numpy as np

# random training data
mt = np.random.rand(1, 4).astype('float32')
mat = faiss.PCAMatrix (4, 2)
mat.train(mt)
assert mat.is_trained
tr = mat.apply(mt)
# print this to show that the magnitude of tr's columns is decreasing
print(tr.shape)
34 changes: 34 additions & 0 deletions scqu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import faiss
import numpy as np
from skimage import io, transform
import os
from einops import rearrange
import sys

d = 4 # data dimension
dataset_size = 32

# train set
img = io.imread(os.path.join("data", "voxels_0.png"))
xt = rearrange(img, 'h w c -> (h w) c').astype('float32')
print(sys.getsizeof(xt))

# QT_8bit allocates 8 bits per dimension (QT_4bit also works)
sq = faiss.ScalarQuantizer(d, faiss.ScalarQuantizer.QT_4bit)
sq.train(xt)

# encode
codes = sq.compute_codes(xt)
print(sys.getsizeof(codes))
print(type(codes[0][1]))

# decode
x2 = sq.decode(codes)

comp = rearrange(x2, '(h w) c -> h w c', h=90, w=160)
io.imsave("test.png", comp)

# compute reconstruction error
# avg_relative_error = ((xt - x2)**2).sum() / (xt ** 2).sum()
# print(codes)
# print(avg_relative_error)
171 changes: 119 additions & 52 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
from halonet_pytorch import HaloAttention
from voxel_dataset import VoxelViewDataset
from config import *
from math import prod
# from einops import
import numpy as np
from einops import rearrange, reduce, repeat

# Hyperparameters
num_epochs = 80
batch_size = 100
batch_size = 1
learning_rate = 0.001

training_dataset = VoxelViewDataset(12, "data")
dataloader = DataLoader(training_dataset, batch_size=4,
shuffle=True, num_workers=3)
dataloader = DataLoader(training_dataset, batch_size=1,
shuffle=True, num_workers=1)

for i_batch, sample_batched in enumerate(dataloader):
print(sample_batched.keys())
Expand All @@ -31,50 +31,117 @@
# based on ResNet architecture
# https://arxiv.org/pdf/1512.03385.pdf

class NeuralNetwork(nn.Module):
def __init__(self) -> None:
super(NeuralNetwork, self).__init__()
view_ch = prod(screen_size)
voxel_ch = prod(voxel_grid_size)
self.residual = nn.Sequential(
nn.Conv2d(view_ch, view_ch, (7, 7, ),
nn.Conv2d(view_ch, view_ch, (7, 7)),
nn.
nn.AvgPool2d(),
nn.Linear(view_ch, voxel_ch)
)


# class NeuralNetwork(nn.Module):
# def __init__(self):
# super(NeuralNetwork, self).__init__()
# self.flatten = nn.Flatten()
# self.linear_relu_stack = nn.Sequential(
# HaloAttention(
# dim = screen_input_size/4,
# block_size = 8,
# halo_size = 4,
# dim_head = 64,
# heads = 4
# ),
# nn.ReLU(),
# nn.Linear(screen_input_size/4, voxel_grid_size[0])
# )

# def forward(self, x):
# x = self.flatten(x)
# logits = self.linear_relu_stack(x)
# return logits

# model = NeuralNetwork().to(device)
# print(model)

# print(model)


# def get_reward(actual, prediction, rewards):
# reward = 0
# for i in len(actual):
# if actual[i] == prediction[i]:
# reward += rewards[i]
# return reward
# https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/deep_residual_network/main.py

def conv3x3(in_channels, out_channels, stride=1):
return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)

view_channels = np.prod(screen_size)
voxel_channels = np.prod(voxel_grid_size)

# Residual block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None) -> None:
super(ResidualBlock, self).__init__()
# TODO Replace convolutional layers with HaloAttention
self.conv1 = conv3x3(in_channels, out_channels, stride)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample

def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2d(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out

class ResNet(nn.Module):
def __init__(self, block, layers) -> None:
super(ResNet, self).__init__()
self.in_channels = view_channels
self.conv = conv3x3(3, view_channels)
self.bn = nn.BatchNorm2d(view_channels)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(block, view_channels, layers[0])
self.layer2 = self.make_layer(block, view_channels*2, layers[1], 2)
self.layer3 = self.make_layer(block, view_channels*4, layers[2], 2)
self.avg_pool = nn.AvgPool2d(view_channels/2)
self.fc = nn.Linear(view_channels*4, voxel_channels)

def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if stride != 1 or self.in_channels != out_channels:
downsample = nn.Sequential(
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels)
)
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels
for i in range(1, blocks):
layers.append(block(out_channels, out_channels))
return nn.Sequential(*layers)

def forward(self, x):
out = self.conv(x)
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out

def get_reward(actual, prediction, rewards):
reward = 0
for i in len(actual):
if actual[i] == prediction[i]:
reward += rewards[i]
return reward

model = ResNet(ResidualBlock, [2, 2, 2]).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

def update_lr(optimizer, lr):
for param_group in optimizer.param_groups:
param_group['lr'] = lr

total_step = len(training_dataset)
curr_lr = learning_rate
for epoch in range(num_epochs):
for i, sample in enumerate(training_dataset):
outputs = model()
loss = criterion(outputs, sample['grid'])

optimizer.zero_grad()
loss.backward()
optimizer.step()

if (i+1) % 100 == 0:
print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Decay learning rate
if (epoch+1) % 20 == 0:
curr_lr /= 3
update_lr(optimizer, curr_lr)

# model.eval()
# with torch.no_grad():
# for sample in test_dataset:


torch.save(model.state_dict(), 'voxelsight.ckpt')
Binary file modified voxel.blend
Binary file not shown.
Binary file added voxel.blend1
Binary file not shown.
15 changes: 5 additions & 10 deletions voxel_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@
import faiss
from einops import rearrange
from config import *
import faiss

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion() # interactive mode

class VoxelViewDataset(Dataset):
"""
Monocular forward facing views of 64x64 voxel grid
Expand All @@ -34,14 +33,10 @@ def __len__(self):

def get_camera_view(self, idx):
img = io.imread(os.path.join(self.dir, "voxels_{}.png".format(idx)))
d = 4
cs = 1
colors = rearrange(img, 'w h c -> (w h) c')
print(colors.shape)
pq = faiss.ScalarQuantizer(d, cs)
pq.train(colors)
# codes = pq.compute_codes(colors)
# print(codes)
xt = rearrange(img, 'h w c -> (h w) c').astype('float32')
sq = faiss.ScalarQuantizer(screen_size[0], faiss.ScalarQuantizer.QT_4bit)
sq.train(xt)
codes = sq.compute_codes(xt)
return torch.from_numpy(img)

def get_voxel_grid(self, idx):
Expand Down

0 comments on commit 3c5ae15

Please sign in to comment.