altronis · equinox2333 · Nov 5, 2022 · Nov 5, 2022 · Nov 12, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .idea
 __pycache__
-out.obj
+out.obj
+test1.py
diff --git a/bproc_render.py b/bproc_render.py
@@ -2,15 +2,22 @@
 # 1. 0.hdf5: save the whole points, normals, colors and depth
 # 2. info.pkl: save the intrinsics, pose, angle, height, width
 # the two output files is to be the input files of compute_point_cloud.py
-# usage: blenderproc run ./bproc_render.py [--angle] [--width] [--height] [--focal] 
-#        [--activate_antialiasing]  [--mesh_directory] [--mesh_file_name] 
-#        [--output_directory]
+# usage: blenderproc run ./bproc_render.py [--angle] [--width] [--height] 
+#        [--focal_x] [--focal_y] [--activate_antialiasing]  [--mesh_directory] 
+#        [--mesh_filename] [--output_directory]
 import blenderproc as bproc
-import numpy as np
 import argparse
+
+# from tools import generate_pose, save_info_dict, save_image, merge_path, generate_intrinsics
+import numpy as np
+import pickle
 import os
 from PIL import Image
-import pickle
+
+def merge_path(directory: str, filename: str):
+    if directory[-1] != '/':
+        return directory + '/' + filename
+    return directory + filename
 
 def generate_pose(angle='random'):
     # View from back to front
@@ -22,16 +29,16 @@ def generate_pose(angle='random'):
         angle_x, angle_y, angle_z = np.pi / 2, 0, np.pi
     elif angle == 'left':
         angle_x, angle_y, angle_z = np.pi / 2, 0, np.pi / 2
-
     elif angle == 'top':
         angle_x, angle_y, angle_z = 0, 0, np.pi
     elif angle == 'bottom':
         angle_x, angle_y, angle_z = 0, np.pi, 0
     elif len(angle) == 3:
         angle_x, angle_y, angle_z = int(angle[0]) * np.pi / 2, int(angle[1]) * np.pi / 2, int(angle[2]) * np.pi / 2
-    else:
+    elif angle == 'random':
         angle_x, angle_y, angle_z = np.random.uniform() * 2 * np.pi, np.random.uniform() * 2 * np.pi, np.random.uniform() * 2 * np.pi    
-
+    else:
+        raise ValueError(f'wrong angle input format: {angle}')
     Rx = np.array([[1, 0, 0],
                    [0, np.cos(angle_x), -np.sin(angle_x)],
                    [0, np.sin(angle_x), np.cos(angle_x)]])
@@ -47,31 +54,60 @@ def generate_pose(angle='random'):
     pose = np.concatenate([np.concatenate([R, t], 1), np.array([[0, 0, 0, 1]])], 0)
     return pose
 
+def generate_intrinsics(focal_x, focal_y, width, height):
+    return np.array([[focal_x, 0, width / 2], [0, focal_y, height / 2], [0, 0, 1]])
+
+def save_image(output_directory: str, output_filename: str, image):
+    os.system('mkdir -p ' + output_directory)
+    if isinstance(image, np.ndarray):
+        pil_image = Image.fromarray(image)
+        pil_image.save(merge_path(output_directory, output_filename))
+    # elif isinstance(image, torch.Tensor):
+    #     torchvision.utils.save_image(image, merge_path(output_directory, output_filename))
+    else:
+        numpy_image = np.array(image)
+        pil_image = Image.fromarray(numpy_image)
+        pil_image.save(merge_path(output_directory, output_filename))
+
+def save_info_dict(output_directory: str, output_filename: str, intrinsics, pose, angle, width, height):
+    os.system('mkdir -p ' + output_directory)
+    with open(merge_path(output_directory, output_filename), 'wb') as f:
+        d = dict()
+        d['intrinsics'], d['pose'], d['angle'], d['width'], d['height'] = intrinsics, pose, angle, width, height
+        pickle.dump(d, f)
+
 if __name__ == '__main__':
     bproc.init()
 
     parser = argparse.ArgumentParser()
     parser.add_argument("--angle", type=str, default='random', help="which angle")
     parser.add_argument("--width", type=int, default=512, help="width of output image")
     parser.add_argument("--height", type=int, default=512, help="height of output image")
-    parser.add_argument("--focal", type=int, default=400, help="focal of output image")
+    parser.add_argument("--focal_x", type=int, default=400, help="x focal of output image")
+    parser.add_argument("--focal_y", type=int, default=400, help="y focal of output image")
     parser.add_argument("--activate_antialiasing", type=bool, default=False, help="whether activate_antialiasing")
     parser.add_argument("--mesh_directory", type=str, default='./models/', help='mesh_directory')
-    parser.add_argument("--mesh_file_name", type=str, default='model_normalized.obj', help='mesh_file_name directory')
-    parser.add_argument("--output_directory", type=str, default='./')
+    parser.add_argument("--mesh_filename", type=str, default='model_normalized.obj', help='mesh_filename directory')
+    parser.add_argument("--output_directory", type=str, default='./bproc_render_output')
+    parser.add_argument("--png_filename", type=str, default='rgb.png')
+    parser.add_argument("--info_dict_filename", type=str, default='info_dict.pkl')
     args = parser.parse_args()
 
     angle = args.angle
     width = args.width
     height = args.height
-    focal = args.focal
+    focal_x = args.focal_x
+    focal_y = args.focal_y
     activate_antialiasing = args.activate_antialiasing
-    mesh_path = args.mesh_directory + args.mesh_file_name
+    mesh_directory = args.mesh_directory
+    mesh_filename = args.mesh_filename
     output_directory = args.output_directory
+    png_filename = args.png_filename
+    info_dict_filename = args.info_dict_filename
 
-    objs = bproc.loader.load_obj(mesh_path)
+    objs = bproc.loader.load_obj(merge_path(args.mesh_directory, args.mesh_filename))
 
-    intrinsics = np.array([[focal, 0, width / 2], [0, focal, height / 2], [0, 0, 1]])
+    intrinsics = generate_intrinsics(focal_x, focal_y, width, height)
     bproc.camera.set_intrinsics_from_K_matrix(intrinsics, width, height)
 
     pose = generate_pose(angle)
@@ -82,19 +118,6 @@ def generate_pose(angle='random'):
 
     data = bproc.renderer.render()
 
-    os.system('mkdir -p ' + output_directory + 'output')
-    bproc.writer.write_hdf5(output_directory + "output/", data)
-
-    image = Image.fromarray(data['colors'][0])
-    image.save(output_directory + 'output/rgb_'+args.angle+'.png')
-    print('Save image: ' + output_directory + 'output/rgb_'+args.angle+'.png')
-    with open(output_directory + 'output/info.pkl', 'wb') as f:
-        d = dict()
-        d['intrinsics'] = intrinsics
-        d['pose'] = pose
-        d['angle'] = angle
-        d['height'] = height
-        d['width'] = width
-        pickle.dump(d, f)
-        print('Save info dict: ' + output_directory + 'output/info.pkl')
-
+    bproc.writer.write_hdf5(output_directory, data)
+    save_image(output_directory, png_filename, data['colors'][0])
+    save_info_dict(output_directory, info_dict_filename, intrinsics, pose, angle, width, height)
diff --git a/bproc_render_output/0.hdf5 b/bproc_render_output/0.hdf5
diff --git a/bproc_render_output/info_dict.pkl b/bproc_render_output/info_dict.pkl
diff --git a/bproc_render_output/normals.npy b/bproc_render_output/normals.npy
diff --git a/bproc_render_output/point_cloud.npy b/bproc_render_output/point_cloud.npy
diff --git a/bproc_render_output/rgb.png b/bproc_render_output/rgb.png
diff --git a/bproc_render_outputpoint_cloud.npy b/bproc_render_outputpoint_cloud.npy
diff --git a/compute_point_cloud.py b/compute_point_cloud.py
@@ -8,43 +8,37 @@
 import numpy as np
 from kornia.geometry.camera.perspective import unproject_points
 import argparse
-import h5py
-import pickle
 import torch
-import os
+
+from tools import load_info_dict, load_hdf5, save_numpy
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument("--input_directory", type=str, default='./output/')
-    parser.add_argument("--output_directory", type=str, default='./output/')
+    parser.add_argument("--input_directory", type=str, default='./bproc_render_output')
+    parser.add_argument("--output_directory", type=str, default='./bproc_render_output')
     parser.add_argument("--depth_threshold", type=int, default=10000)
     parser.add_argument("--hdf5_filename", type=str, default='0.hdf5')
-    parser.add_argument("--info_filename", type=str, default='info.pkl')
+    parser.add_argument("--info_dict_filename", type=str, default='info_dict.pkl')
     parser.add_argument("--output_colors", type=bool, default=False)
     parser.add_argument("--output_normals", type=bool, default=True)
-
+    parser.add_argument("--point_cloud_filename", type=str, default='point_cloud.npy')
+    parser.add_argument("--normals_filename", type=str, default='normals.npy')
+    parser.add_argument("--colors_filename", type=str, default='colors.npy')
     args = parser.parse_args()
 
     input_directory = args.input_directory
     output_directory = args.output_directory
     depth_threshold = args.depth_threshold
     hdf5_filename = args.hdf5_filename
-    info_filename = args.info_filename
+    info_dict_filename = args.info_dict_filename
     output_colors = args.output_colors
     output_normals = args.output_normals
+    point_cloud_filename = args.point_cloud_filename
+    normals_filename = args.normals_filename
+    colors_filename = args.colors_filename
 
-    with h5py.File(input_directory + hdf5_filename, "r") as data:
-        colors = np.array(data['colors'][:])
-        depth = np.array(data['depth'][:])
-        normals = np.array(data['normals'][:])
-
-    with open(input_directory + info_filename, 'rb') as f:
-        d = pickle.load(f)
-        intrinsics = d['intrinsics']
-        pose = d['pose']
-        angle = d['angle']
-        height = d['height']
-        width = d['width']
+    colors, depth, normals = load_hdf5(input_directory, hdf5_filename)
+    intrinsics, pose, angle, width, height = load_info_dict(input_directory, info_dict_filename)
 
     uv = np.arange(0, height*width, dtype=int).reshape((height, width))
     uv = np.concatenate([(uv % width).reshape(-1, 1), (uv // width).reshape(-1, 1)], axis=1) # (height*width, 2)
@@ -57,17 +51,12 @@
     normals = normals.reshape(-1, 3) # (height, width, 3) -> (height*width, 3)
     normals = normals[valid] #  (# of valid, 3)
     normals = (normals - 0.5) * 2
-    # TODO: valid
+    normals[:, 1] = -normals[:, 1]
     normals[:, 2] = -normals[:, 2]
     pc = unproject_points(torch.tensor(uv), torch.tensor(depth), torch.tensor(intrinsics)).numpy()
 
-    os.system('mkdir -p ' + output_directory)
-
-    with open(output_directory + 'point_cloud.npy', 'wb') as f:
-        np.save(f, pc)
+    save_numpy(output_directory, point_cloud_filename, pc)
     if output_normals:
-        with open(output_directory + 'normals.npy', 'wb') as f:
-            np.save(f, normals)
+        save_numpy(output_directory, normals_filename, normals)
     if output_colors:
-        with open(output_directory + 'colors.npy', 'wb') as f:
-            np.save(f, colors)
+        save_numpy(output_directory, colors_filename, colors)
diff --git a/config.py b/config.py
@@ -1,9 +1,9 @@
-num_samples = 500000  # Number of points to sample around the point cloud
+num_samples = 50000  # Number of points to sample around the point cloud
 
 use_dropout = False  # Whether to use dropout after FC layers
 batch_size = 256
 lr = 1e-4
-epochs = 5
+epochs = 400
 
 log_interval = 100
 res = 64  # Rendering resolution
diff --git a/data.py b/data.py
@@ -71,7 +71,6 @@ def get_pc_points_and_normals_from_mesh(mesh_path):
     mesh = trimesh.load(mesh_path)
     pc_points = mesh.vertices
     pc_points = normalize_pc(pc_points)
-
     pc_normals = mesh.vertex_normals
     return pc_points, pc_normals
 
@@ -83,14 +82,8 @@ def get_pc_points_and_normals_from_files(points_path, normals_path):
         pc_normals = np.load(f)
     return pc_points, pc_normals
 
-# Given path(s) to mesh or pc & normals data files, get the sampled 3D point coordinates and their SDF values.
-def get_training_data(num_samples, points_path=None, normals_path=None, mesh_path=None, source='file'):
-    if points_path is not None and normals_path is not None and source == 'file':
-        pc_points, pc_normals = get_pc_points_and_normals_from_files(points_path, normals_path)
-    elif mesh_path is not None and source == 'mesh':
-        pc_points, pc_normals = get_pc_points_and_normals_from_mesh(mesh_path)
-    else:
-        raise ValueError('get_training_data function has wrong error')
+# Given number of samples pc & normals, get the sampled 3D point coordinates and their SDF values.
+def get_training_data(num_samples, pc_points, pc_normals):
     sample_pts = sample_training_pts(pc_points, num_samples)
     sdf = get_sdf(pc_points, pc_normals, sample_pts)
 
@@ -107,8 +100,8 @@ def to_tensor(x):
 
 
 class PointsDataset(Dataset):
-    def __init__(self, num_samples, points_path=None, normals_path=None, mesh_path=None, source='file'):
-        train_data = get_training_data(num_samples, points_path, normals_path, mesh_path, source)
+    def __init__(self, num_samples, pc_points, pc_normals):
+        train_data = get_training_data(num_samples, pc_points, pc_normals)
         self.pts = to_tensor(train_data['pts'])
         self.sdf = to_tensor(train_data['sdf'])
 

diff --git a/model.pth b/model.pth
diff --git a/out.png b/out.png
diff --git a/output/0.hdf5 b/output/0.hdf5
diff --git a/output/info.pkl b/output/info.pkl
diff --git a/output/normals.npy b/output/normals.npy
diff --git a/output/point_cloud.npy b/output/point_cloud.npy
diff --git a/output/rgb_000.png b/output/rgb_000.png
diff --git a/output/rgb_random.png b/output/rgb_random.png
diff --git a/output/rgb_top.png b/output/rgb_top.png
diff --git a/render.py b/render.py
@@ -3,9 +3,9 @@
 import torchvision
 import numpy as np
 
-# from model import DeepSDF
+from model import DeepSDF
 import sphere_tracing
-
+from tools import info_from_intrinsics
 
 def translation(sdf, t):
     def wrapper(p):
@@ -26,17 +26,18 @@ def compute_rotation_matrix(axes, angles):
     return rotation_matrices
 
 
-def render(model):
-    device = torch.device("cuda")
+def render(model, intrinsics, distance, azimuth, elevation, device):
     dtype = torch.float32
 
     num_iterations = 500
     convergence_threshold = 1e-3
 
     # ---------------- Intrinsic matrix ---------------- #
-    fx = fy = 256
-    cx = cy = 128
-    camera_matrix = torch.tensor([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], device=device)
+    # fx = fy = 256
+    # cx = cy = 128
+    # camera_matrix = torch.tensor([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], device=device)
+    camera_matrix = torch.from_numpy(intrinsics).to(device).type(dtype)
+    focal_x, focal_y, width, height = info_from_intrinsics(intrinsics)
 
     # ---------------- Camera position ---------------- #
     distance = 2.5
@@ -51,6 +52,7 @@ def render(model):
 
     # ---------------- Camera rotation ---------------- #
     target_position = torch.tensor([0.0, -1.0, 0.0], device=device, dtype=dtype)
+    # target_position = torch.tensor([0.0, 0.0, 0.0], device=device, dtype=dtype)
     up_direction = torch.tensor([0.0, 1.0, 0.0], device=device, dtype=dtype)
 
     camera_z_axis = target_position - camera_position
@@ -63,8 +65,8 @@ def render(model):
     light_directions = torch.tensor([1.0, -0.5, 0.0], device=device, dtype=dtype)
 
     # ---------------- Ray marching ---------------- #
-    y_positions = torch.arange(cy * 2, dtype=camera_matrix.dtype, device=device)
-    x_positions = torch.arange(cx * 2, dtype=camera_matrix.dtype, device=device)
+    y_positions = torch.arange(height, dtype=camera_matrix.dtype, device=device)
+    x_positions = torch.arange(width, dtype=camera_matrix.dtype, device=device)
     y_positions, x_positions = torch.meshgrid(y_positions, x_positions, indexing='ij')
     z_positions = torch.ones_like(y_positions)
     ray_positions = torch.stack((x_positions, y_positions, z_positions), dim=-1)
@@ -108,10 +110,10 @@ def render(model):
     return image.squeeze()
 
 
-# if __name__ == '__main__':
-#     model = DeepSDF(use_dropout=False)
-#     model.load_state_dict(torch.load('model.pth'))
-#     model.cuda()
-#     model.eval()
-#     image = render(model)
-#     torchvision.utils.save_image(image, f'out.png')
+if __name__ == '__main__':
+    model = DeepSDF(use_dropout=False)
+    model.load_state_dict(torch.load('model.pth'))
+    model.cuda()
+    model.eval()
+    image = render(model)
+    torchvision.utils.save_image(image, f'out.png')
diff --git a/results/epoch_39.png b/results/epoch_39.png
diff --git a/test1.py b/test1.py
@@ -0,0 +1,36 @@
+# import torch
+# import clip
+# from PIL import Image
+
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+# model, preprocess = clip.load("ViT-B/32", device=device)
+
+# image = preprocess(Image.open("CLIP.png")).unsqueeze(0).to(device)
+# text = clip.tokenize(["a diagram"]).to(device)
+
+# with torch.no_grad():
+#     image_features = model.encode_image(image)
+#     text_features = model.encode_text(text)
+
+#     logits_per_image, logits_per_text = model(image, text)
+#     probs = logits_per_image.softmax(dim=-1).cpu().numpy()
+
+# print("Label probs:", logits_per_image)  # prints: [[0.9927937  0.00421068 0.00299572]]
+
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+import argparse
+def get_pc_points_and_normals_from_files(points_path, normals_path):
+    with open(points_path, 'rb') as f:
+        pc_points = np.load(f)
+    with open(normals_path, 'rb') as f:
+        pc_normals = np.load(f)
+    return pc_points, pc_normals
+if __name__ == '__main__':
+    learning_times_stage_1, learning_times_stage_2 = 20, 10
+    epoch_number_per_cycle = learning_times_stage_1 + learning_times_stage_2
+    cycle_number = 2
+    cycles_to_output_images = [0, 1, -1]
+    for epoch in range(epoch_number_per_cycle * cycle_number):
+        output_image = (epoch % epoch_number_per_cycle - learning_times_stage_1) in [i % learning_times_stage_2 for i in cycles_to_output_images]
+        print(f'epoch: {epoch, output_image}')