cvg · sarlinpe · Jan 25, 2024 · Jan 18, 2024 · Jan 18, 2024 · Jan 18, 2024
diff --git a/demo.ipynb b/demo.ipynb
@@ -382,11 +382,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "71ab5306",
+   "id": "603c5533-f7b5-4e2c-ae62-de047abce7cc",
    "metadata": {},
    "outputs": [],
    "source": [
-    "pose = pycolmap.Image(tvec=ret[\"tvec\"], qvec=ret[\"qvec\"])\n",
+    "pose = pycolmap.Image(cam_from_world=ret[\"cam_from_world\"])\n",
     "viz_3d.plot_camera_colmap(\n",
     "    fig, pose, camera, color=\"rgba(0,255,0,0.5)\", name=query, fill=True\n",
     ")\n",

diff --git a/hloc/__init__.py b/hloc/__init__.py
@@ -21,15 +21,14 @@
 except ImportError:
     logger.warning("pycolmap is not installed, some features may not work.")
 else:
-    min_version = version.parse("0.3.0")
-    max_version = version.parse("0.4.0")
+    min_version = version.parse("0.6.0")
     found_version = pycolmap.__version__
     if found_version != "dev":
         version = version.parse(found_version)
-        if version < min_version or version > max_version:
-            s = f"pycolmap>={min_version},<={max_version}"
+        if version < min_version:
+            s = f"pycolmap>={min_version}"
             logger.warning(
-                "hloc now requires %s but found pycolmap==%s, "
+                "hloc requires %s but found pycolmap==%s, "
                 'please upgrade with `pip install --upgrade "%s"`',
                 s,
                 found_version,

diff --git a/hloc/extractors/dog.py b/hloc/extractors/dog.py
@@ -61,7 +61,7 @@ def _forward(self, data):
                 device=getattr(pycolmap.Device, "cuda" if use_gpu else "cpu"),
             )
 
-        keypoints, scores, descriptors = self.sift.extract(image_np)
+        keypoints, descriptors = self.sift.extract(image_np)
         scales = keypoints[:, 2]
         oris = np.rad2deg(keypoints[:, 3])
 
@@ -96,7 +96,7 @@ def _forward(self, data):
         keypoints = torch.from_numpy(keypoints[:, :2])  # keep only x, y
         scales = torch.from_numpy(scales)
         oris = torch.from_numpy(oris)
-        scores = torch.from_numpy(scores)
+        scores = keypoints.new_zeros(len(keypoints))  # no scores for SIFT yet
 
         if self.conf["max_keypoints"] != -1:
             # TODO: check that the scores from PyCOLMAP are 100% correct,

diff --git a/hloc/localize_sfm.py b/hloc/localize_sfm.py
@@ -85,7 +85,7 @@ def pose_from_cluster(
     num_matches = 0
     for i, db_id in enumerate(db_ids):
         image = localizer.reconstruction.images[db_id]
-        if image.num_points3D() == 0:
+        if image.num_points3D == 0:
             logger.debug(f"No 3D points found for {image.name}.")
             continue
         points3D_ids = np.array(
@@ -106,12 +106,8 @@ def pose_from_cluster(
     mkp_idxs = [i for i in idxs for _ in kp_idx_to_3D[i]]
     mp3d_ids = [j for i in idxs for j in kp_idx_to_3D[i]]
     ret = localizer.localize(kpq, mkp_idxs, mp3d_ids, query_camera, **kwargs)
-    ret["camera"] = {
-        "model": query_camera.model_name,
-        "width": query_camera.width,
-        "height": query_camera.height,
-        "params": query_camera.params,
-    }
+    if ret is not None:
+        ret["camera"] = query_camera
 
     # mostly for logging and post-processing
     mkp_to_3D_to_db = [
@@ -156,7 +152,7 @@ def main(
     config = {"estimation": {"ransac": {"max_error": ransac_thresh}}, **(config or {})}
     localizer = QueryLocalizer(reference_sfm, config)
 
-    poses = {}
+    cam_from_world = {}
     logs = {
         "features": features,
         "matches": matches,
@@ -185,13 +181,13 @@ def main(
                 ret, log = pose_from_cluster(
                     localizer, qname, qcam, cluster_ids, features, matches
                 )
-                if ret["success"] and ret["num_inliers"] > best_inliers:
+                if ret is not None and ret["num_inliers"] > best_inliers:
                     best_cluster = i
                     best_inliers = ret["num_inliers"]
                 logs_clusters.append(log)
             if best_cluster is not None:
                 ret = logs_clusters[best_cluster]["PnP_ret"]
-                poses[qname] = (ret["qvec"], ret["tvec"])
+                cam_from_world[qname] = ret["cam_from_world"]
             logs["loc"][qname] = {
                 "db": db_ids,
                 "best_cluster": best_cluster,
@@ -202,28 +198,28 @@ def main(
             ret, log = pose_from_cluster(
                 localizer, qname, qcam, db_ids, features, matches
             )
-            if ret["success"]:
-                poses[qname] = (ret["qvec"], ret["tvec"])
+            if ret is not None:
+                cam_from_world[qname] = ret["cam_from_world"]
             else:
                 closest = reference_sfm.images[db_ids[0]]
-                poses[qname] = (closest.qvec, closest.tvec)
+                cam_from_world[qname] = closest.cam_from_world
             log["covisibility_clustering"] = covisibility_clustering
             logs["loc"][qname] = log
 
-    logger.info(f"Localized {len(poses)} / {len(queries)} images.")
+    logger.info(f"Localized {len(cam_from_world)} / {len(queries)} images.")
     logger.info(f"Writing poses to {results}...")
     with open(results, "w") as f:
-        for q in poses:
-            qvec, tvec = poses[q]
-            qvec = " ".join(map(str, qvec))
-            tvec = " ".join(map(str, tvec))
-            name = q.split("/")[-1]
+        for query, t in cam_from_world.items():
+            qvec = " ".join(map(str, t.rotation.quat[[3, 0, 1, 2]]))
+            tvec = " ".join(map(str, t.translation))
+            name = query.split("/")[-1]
             if prepend_camera_name:
-                name = q.split("/")[-2] + "/" + name
+                name = query.split("/")[-2] + "/" + name
             f.write(f"{name} {qvec} {tvec}\n")
 
     logs_path = f"{results}_logs.pkl"
     logger.info(f"Writing logs to {logs_path}...")
+    # TODO: Resolve pickling issue with pycolmap objects.
     with open(logs_path, "wb") as f:
         pickle.dump(logs, f)
     logger.info("Done!")

diff --git a/hloc/triangulation.py b/hloc/triangulation.py
@@ -45,7 +45,7 @@ def create_db_from_model(
 
     for i, camera in reconstruction.cameras.items():
         db.add_camera(
-            camera.model_id,
+            camera.model.value,
             camera.width,
             camera.height,
             camera.params,
@@ -116,7 +116,9 @@ def estimation_and_geometric_verification(
     with OutputCapture(verbose):
         with pycolmap.ostream():
             pycolmap.verify_matches(
-                database_path, pairs_path, max_num_trials=20000, min_inlier_ratio=0.1
+                database_path,
+                pairs_path,
+                options=dict(ransac=dict(max_num_trials=20000, min_inlier_ratio=0.1)),
             )
 
 
@@ -143,7 +145,7 @@ def geometric_verification(
         kps0, noise0 = get_keypoints(features_path, name0, return_uncertainty=True)
         noise0 = 1.0 if noise0 is None else noise0
         if len(kps0) > 0:
-            kps0 = np.stack(cam0.image_to_world(kps0))
+            kps0 = np.stack(cam0.cam_from_img(kps0))
         else:
             kps0 = np.zeros((0, 2))
 
@@ -154,7 +156,7 @@ def geometric_verification(
             kps1, noise1 = get_keypoints(features_path, name1, return_uncertainty=True)
             noise1 = 1.0 if noise1 is None else noise1
             if len(kps1) > 0:
-                kps1 = np.stack(cam1.image_to_world(kps1))
+                kps1 = np.stack(cam1.cam_from_img(kps1))
             else:
                 kps1 = np.zeros((0, 2))
 
@@ -168,15 +170,13 @@ def geometric_verification(
                 db.add_two_view_geometry(id0, id1, matches)
                 continue
 
-            qvec_01, tvec_01 = pycolmap.relative_pose(
-                image0.qvec, image0.tvec, image1.qvec, image1.tvec
-            )
-            _, errors0, errors1 = compute_epipolar_errors(
-                qvec_01, tvec_01, kps0[matches[:, 0]], kps1[matches[:, 1]]
+            cam1_from_cam0 = image1.cam_from_world * image0.cam_from_world.inverse()
+            errors0, errors1 = compute_epipolar_errors(
+                cam1_from_cam0, kps0[matches[:, 0]], kps1[matches[:, 1]]
             )
             valid_matches = np.logical_and(
-                errors0 <= max_error * noise0 / cam0.mean_focal_length(),
-                errors1 <= max_error * noise1 / cam1.mean_focal_length(),
+                errors0 <= cam0.cam_from_img_threshold(noise0 * max_error),
+                errors1 <= cam1.cam_from_img_threshold(noise1 * max_error),
             )
             # TODO: We could also add E to the database, but we need
             # to reverse the transformations if id0 > id1 in utils/database.py.

diff --git a/hloc/utils/geometry.py b/hloc/utils/geometry.py
@@ -6,28 +6,11 @@ def to_homogeneous(p):
     return np.pad(p, ((0, 0),) * (p.ndim - 1) + ((0, 1),), constant_values=1)
 
 
-def vector_to_cross_product_matrix(v):
-    return np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
-
-
-def compute_epipolar_errors(qvec_r2t, tvec_r2t, p2d_r, p2d_t):
-    T_r2t = pose_matrix_from_qvec_tvec(qvec_r2t, tvec_r2t)
-    # Compute errors in normalized plane to avoid distortion.
-    E = vector_to_cross_product_matrix(T_r2t[:3, -1]) @ T_r2t[:3, :3]
-    l2d_r2t = (E @ to_homogeneous(p2d_r).T).T
-    l2d_t2r = (E.T @ to_homogeneous(p2d_t).T).T
-    errors_r = np.abs(np.sum(to_homogeneous(p2d_r) * l2d_t2r, axis=1)) / np.linalg.norm(
-        l2d_t2r[:, :2], axis=1
-    )
-    errors_t = np.abs(np.sum(to_homogeneous(p2d_t) * l2d_r2t, axis=1)) / np.linalg.norm(
-        l2d_r2t[:, :2], axis=1
-    )
-    return E, errors_r, errors_t
-
-
-def pose_matrix_from_qvec_tvec(qvec, tvec):
-    pose = np.zeros((4, 4))
-    pose[:3, :3] = pycolmap.qvec_to_rotmat(qvec)
-    pose[:3, -1] = tvec
-    pose[-1, -1] = 1
-    return pose
+def compute_epipolar_errors(j_from_i: pycolmap.Rigid3d, p2d_i, p2d_j):
+    j_E_i = j_from_i.essential_matrix()
+    l2d_j = to_homogeneous(p2d_i) @ j_E_i.T
+    l2d_i = to_homogeneous(p2d_j) @ j_E_i
+    dist = np.abs(np.sum(to_homogeneous(p2d_i) * l2d_i, axis=1))
+    errors_i = dist / np.linalg.norm(l2d_i[:, :2], axis=1)
+    errors_j = dist / np.linalg.norm(l2d_j[:, :2], axis=1)
+    return errors_i, errors_j
diff --git a/hloc/utils/parsers.py b/hloc/utils/parsers.py
@@ -19,7 +19,9 @@ def parse_image_list(path, with_intrinsics=False):
             if with_intrinsics:
                 model, width, height, *params = data
                 params = np.array(params, float)
-                cam = pycolmap.Camera(model, int(width), int(height), params)
+                cam = pycolmap.Camera(
+                    model=model, width=int(width), height=int(height), params=params
+                )
                 images.append((name, cam))
             else:
                 images.append(name)

diff --git a/hloc/utils/viz_3d.py b/hloc/utils/viz_3d.py
@@ -147,13 +147,14 @@ def plot_camera_colmap(
     **kwargs
 ):
     """Plot a camera frustum from PyCOLMAP objects"""
+    world_t_camera = image.cam_from_world.inverse()
     plot_camera(
         fig,
-        image.rotmat().T,
-        image.projection_center(),
+        world_t_camera.rotation.matrix(),
+        world_t_camera.translation,
         camera.calibration_matrix(),
         name=name or str(image.image_id),
-        text=image.summary(),
+        text=str(image),
         **kwargs
     )
 

diff --git a/requirements.txt b/requirements.txt
@@ -7,7 +7,7 @@ matplotlib
 plotly
 scipy
 h5py
-pycolmap>=0.3.0
+pycolmap>=0.6.0
 kornia>=0.6.11
 gdown
 lightglue @ git+https://github.com/cvg/LightGlue