diff --git a/.gitignore b/.gitignore
index 908ae94..371e266 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,8 @@ third_party/QuadTreeAttention
desktop.ini
*.egg-info
output.pkl
+experiments*
+gen_example.py
+datasets/lines/terrace0.JPG
+datasets/lines/terrace1.JPG
+datasets/South-Building*
diff --git a/.gitmodules b/.gitmodules
index 336af93..e4ee749 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -64,3 +64,6 @@
[submodule "third_party/DarkFeat"]
path = third_party/DarkFeat
url = https://github.com/agipro/DarkFeat.git
+[submodule "third_party/mast3r"]
+ path = third_party/mast3r
+ url = https://github.com/naver/mast3r
diff --git a/Dockerfile b/Dockerfile
index 9501dec..863cdf8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,7 +20,7 @@ ENV PATH /opt/conda/envs/imw/bin:$PATH
# Make RUN commands use the new environment
SHELL ["conda", "run", "-n", "imw", "/bin/bash", "-c"]
RUN pip install --upgrade pip
-RUN pip install -r env-docker.txt
+RUN pip install -r requirements.txt
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
# Export port
diff --git a/README.md b/README.md
index bd02d4b..2a3ca15 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ Here is a demo of the tool:
https://github.com/Vincentqyw/image-matching-webui/assets/18531182/263534692-c3484d1b-cc00-4fdc-9b31-e5b7af07ecd9
The tool currently supports various popular image matching algorithms, namely:
+- [x] [MASt3R](https://github.com/naver/mast3r), CVPR 2024
- [x] [DUSt3R](https://github.com/naver/dust3r), CVPR 2024
- [x] [OmniGlue](https://github.com/Vincentqyw/omniglue-onnx), CVPR 2024
- [x] [XFeat](https://github.com/verlab/accelerated_features), CVPR 2024
diff --git a/hloc/__init__.py b/hloc/__init__.py
index 7c2e3dd..cf64b08 100644
--- a/hloc/__init__.py
+++ b/hloc/__init__.py
@@ -3,7 +3,7 @@
import torch
from packaging import version
-__version__ = "1.3"
+__version__ = "1.5"
formatter = logging.Formatter(
fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s",
@@ -23,14 +23,18 @@
except ImportError:
logger.warning("pycolmap is not installed, some features may not work.")
else:
- minimal_version = version.parse("0.3.0")
- found_version = version.parse(getattr(pycolmap, "__version__"))
- if found_version < minimal_version:
- logger.warning(
- "hloc now requires pycolmap>=%s but found pycolmap==%s, "
- "please upgrade with `pip install --upgrade pycolmap`",
- minimal_version,
- found_version,
- )
+ min_version = version.parse("0.6.0")
+ found_version = pycolmap.__version__
+ if found_version != "dev":
+ version = version.parse(found_version)
+ if version < min_version:
+ s = f"pycolmap>={min_version}"
+ logger.warning(
+ "hloc requires %s but found pycolmap==%s, "
+ 'please upgrade with `pip install --upgrade "%s"`',
+ s,
+ found_version,
+ s,
+ )
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
diff --git a/hloc/colmap_from_nvm.py b/hloc/colmap_from_nvm.py
new file mode 100644
index 0000000..1f3ad89
--- /dev/null
+++ b/hloc/colmap_from_nvm.py
@@ -0,0 +1,220 @@
+import argparse
+import sqlite3
+from collections import defaultdict
+from pathlib import Path
+
+import numpy as np
+from tqdm import tqdm
+
+from . import logger
+from .utils.read_write_model import (
+ CAMERA_MODEL_NAMES,
+ Camera,
+ Image,
+ Point3D,
+ write_model,
+)
+
+
+def recover_database_images_and_ids(database_path):
+ images = {}
+ cameras = {}
+ db = sqlite3.connect(str(database_path))
+ ret = db.execute("SELECT name, image_id, camera_id FROM images;")
+ for name, image_id, camera_id in ret:
+ images[name] = image_id
+ cameras[name] = camera_id
+ db.close()
+ logger.info(
+ f"Found {len(images)} images and {len(cameras)} cameras in database."
+ )
+ return images, cameras
+
+
+def quaternion_to_rotation_matrix(qvec):
+ qvec = qvec / np.linalg.norm(qvec)
+ w, x, y, z = qvec
+ R = np.array(
+ [
+ [
+ 1 - 2 * y * y - 2 * z * z,
+ 2 * x * y - 2 * z * w,
+ 2 * x * z + 2 * y * w,
+ ],
+ [
+ 2 * x * y + 2 * z * w,
+ 1 - 2 * x * x - 2 * z * z,
+ 2 * y * z - 2 * x * w,
+ ],
+ [
+ 2 * x * z - 2 * y * w,
+ 2 * y * z + 2 * x * w,
+ 1 - 2 * x * x - 2 * y * y,
+ ],
+ ]
+ )
+ return R
+
+
+def camera_center_to_translation(c, qvec):
+ R = quaternion_to_rotation_matrix(qvec)
+ return (-1) * np.matmul(R, c)
+
+
+def read_nvm_model(
+ nvm_path, intrinsics_path, image_ids, camera_ids, skip_points=False
+):
+ with open(intrinsics_path, "r") as f:
+ raw_intrinsics = f.readlines()
+
+ logger.info(f"Reading {len(raw_intrinsics)} cameras...")
+ cameras = {}
+ for intrinsics in raw_intrinsics:
+ intrinsics = intrinsics.strip("\n").split(" ")
+ name, camera_model, width, height = intrinsics[:4]
+ params = [float(p) for p in intrinsics[4:]]
+ camera_model = CAMERA_MODEL_NAMES[camera_model]
+ assert len(params) == camera_model.num_params
+ camera_id = camera_ids[name]
+ camera = Camera(
+ id=camera_id,
+ model=camera_model.model_name,
+ width=int(width),
+ height=int(height),
+ params=params,
+ )
+ cameras[camera_id] = camera
+
+ nvm_f = open(nvm_path, "r")
+ line = nvm_f.readline()
+ while line == "\n" or line.startswith("NVM_V3"):
+ line = nvm_f.readline()
+ num_images = int(line)
+ assert num_images == len(cameras)
+
+ logger.info(f"Reading {num_images} images...")
+ image_idx_to_db_image_id = []
+ image_data = []
+ i = 0
+ while i < num_images:
+ line = nvm_f.readline()
+ if line == "\n":
+ continue
+ data = line.strip("\n").split(" ")
+ image_data.append(data)
+ image_idx_to_db_image_id.append(image_ids[data[0]])
+ i += 1
+
+ line = nvm_f.readline()
+ while line == "\n":
+ line = nvm_f.readline()
+ num_points = int(line)
+
+ if skip_points:
+ logger.info(f"Skipping {num_points} points.")
+ num_points = 0
+ else:
+ logger.info(f"Reading {num_points} points...")
+ points3D = {}
+ image_idx_to_keypoints = defaultdict(list)
+ i = 0
+ pbar = tqdm(total=num_points, unit="pts")
+ while i < num_points:
+ line = nvm_f.readline()
+ if line == "\n":
+ continue
+
+ data = line.strip("\n").split(" ")
+ x, y, z, r, g, b, num_observations = data[:7]
+ obs_image_ids, point2D_idxs = [], []
+ for j in range(int(num_observations)):
+ s = 7 + 4 * j
+ img_index, kp_index, kx, ky = data[s : s + 4]
+ image_idx_to_keypoints[int(img_index)].append(
+ (int(kp_index), float(kx), float(ky), i)
+ )
+ db_image_id = image_idx_to_db_image_id[int(img_index)]
+ obs_image_ids.append(db_image_id)
+ point2D_idxs.append(kp_index)
+
+ point = Point3D(
+ id=i,
+ xyz=np.array([x, y, z], float),
+ rgb=np.array([r, g, b], int),
+ error=1.0, # fake
+ image_ids=np.array(obs_image_ids, int),
+ point2D_idxs=np.array(point2D_idxs, int),
+ )
+ points3D[i] = point
+
+ i += 1
+ pbar.update(1)
+ pbar.close()
+
+ logger.info("Parsing image data...")
+ images = {}
+ for i, data in enumerate(image_data):
+ # Skip the focal length. Skip the distortion and terminal 0.
+ name, _, qw, qx, qy, qz, cx, cy, cz, _, _ = data
+ qvec = np.array([qw, qx, qy, qz], float)
+ c = np.array([cx, cy, cz], float)
+ t = camera_center_to_translation(c, qvec)
+
+ if i in image_idx_to_keypoints:
+ # NVM only stores triangulated 2D keypoints: add dummy ones
+ keypoints = image_idx_to_keypoints[i]
+ point2D_idxs = np.array([d[0] for d in keypoints])
+ tri_xys = np.array([[x, y] for _, x, y, _ in keypoints])
+ tri_ids = np.array([i for _, _, _, i in keypoints])
+
+ num_2Dpoints = max(point2D_idxs) + 1
+ xys = np.zeros((num_2Dpoints, 2), float)
+ point3D_ids = np.full(num_2Dpoints, -1, int)
+ xys[point2D_idxs] = tri_xys
+ point3D_ids[point2D_idxs] = tri_ids
+ else:
+ xys = np.zeros((0, 2), float)
+ point3D_ids = np.full(0, -1, int)
+
+ image_id = image_ids[name]
+ image = Image(
+ id=image_id,
+ qvec=qvec,
+ tvec=t,
+ camera_id=camera_ids[name],
+ name=name,
+ xys=xys,
+ point3D_ids=point3D_ids,
+ )
+ images[image_id] = image
+
+ return cameras, images, points3D
+
+
+def main(nvm, intrinsics, database, output, skip_points=False):
+ assert nvm.exists(), nvm
+ assert intrinsics.exists(), intrinsics
+ assert database.exists(), database
+
+ image_ids, camera_ids = recover_database_images_and_ids(database)
+
+ logger.info("Reading the NVM model...")
+ model = read_nvm_model(
+ nvm, intrinsics, image_ids, camera_ids, skip_points=skip_points
+ )
+
+ logger.info("Writing the COLMAP model...")
+ output.mkdir(exist_ok=True, parents=True)
+ write_model(*model, path=str(output), ext=".bin")
+ logger.info("Done.")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--nvm", required=True, type=Path)
+ parser.add_argument("--intrinsics", required=True, type=Path)
+ parser.add_argument("--database", required=True, type=Path)
+ parser.add_argument("--output", required=True, type=Path)
+ parser.add_argument("--skip_points", action="store_true")
+ args = parser.parse_args()
+ main(**args.__dict__)
diff --git a/hloc/extract_features.py b/hloc/extract_features.py
index 1e66f03..62eeb45 100644
--- a/hloc/extract_features.py
+++ b/hloc/extract_features.py
@@ -329,6 +329,11 @@
"model": {"name": "cosplace"},
"preprocessing": {"resize_max": 1024},
},
+ "eigenplaces": {
+ "output": "global-feats-eigenplaces",
+ "model": {"name": "eigenplaces"},
+ "preprocessing": {"resize_max": 1024},
+ },
}
diff --git a/hloc/extractors/eigenplaces.py b/hloc/extractors/eigenplaces.py
new file mode 100644
index 0000000..fd9953b
--- /dev/null
+++ b/hloc/extractors/eigenplaces.py
@@ -0,0 +1,57 @@
+"""
+Code for loading models trained with EigenPlaces (or CosPlace) as a global
+features extractor for geolocalization through image retrieval.
+Multiple models are available with different backbones. Below is a summary of
+models available (backbone : list of available output descriptors
+dimensionality). For example you can use a model based on a ResNet50 with
+descriptors dimensionality 1024.
+
+EigenPlaces trained models:
+ ResNet18: [ 256, 512]
+ ResNet50: [128, 256, 512, 2048]
+ ResNet101: [128, 256, 512, 2048]
+ VGG16: [ 512]
+
+CosPlace trained models:
+ ResNet18: [32, 64, 128, 256, 512]
+ ResNet50: [32, 64, 128, 256, 512, 1024, 2048]
+ ResNet101: [32, 64, 128, 256, 512, 1024, 2048]
+ ResNet152: [32, 64, 128, 256, 512, 1024, 2048]
+ VGG16: [ 64, 128, 256, 512]
+
+EigenPlaces paper (ICCV 2023): https://arxiv.org/abs/2308.10832
+CosPlace paper (CVPR 2022): https://arxiv.org/abs/2204.02287
+"""
+
+import torch
+import torchvision.transforms as tvf
+
+from ..utils.base_model import BaseModel
+
+
+class EigenPlaces(BaseModel):
+ default_conf = {
+ "variant": "EigenPlaces",
+ "backbone": "ResNet101",
+ "fc_output_dim": 2048,
+ }
+ required_inputs = ["image"]
+
+ def _init(self, conf):
+ self.net = torch.hub.load(
+ "gmberton/" + conf["variant"],
+ "get_trained_model",
+ backbone=conf["backbone"],
+ fc_output_dim=conf["fc_output_dim"],
+ ).eval()
+
+ mean = [0.485, 0.456, 0.406]
+ std = [0.229, 0.224, 0.225]
+ self.norm_rgb = tvf.Normalize(mean=mean, std=std)
+
+ def _forward(self, data):
+ image = self.norm_rgb(data["image"])
+ desc = self.net(image)
+ return {
+ "global_descriptor": desc,
+ }
diff --git a/hloc/localize_inloc.py b/hloc/localize_inloc.py
new file mode 100644
index 0000000..1e003b1
--- /dev/null
+++ b/hloc/localize_inloc.py
@@ -0,0 +1,183 @@
+import argparse
+import pickle
+from pathlib import Path
+
+import cv2
+import h5py
+import numpy as np
+import pycolmap
+import torch
+from scipy.io import loadmat
+from tqdm import tqdm
+
+from . import logger
+from .utils.parsers import names_to_pair, parse_retrieval
+
+
+def interpolate_scan(scan, kp):
+ h, w, c = scan.shape
+ kp = kp / np.array([[w - 1, h - 1]]) * 2 - 1
+ assert np.all(kp > -1) and np.all(kp < 1)
+ scan = torch.from_numpy(scan).permute(2, 0, 1)[None]
+ kp = torch.from_numpy(kp)[None, None]
+ grid_sample = torch.nn.functional.grid_sample
+
+ # To maximize the number of points that have depth:
+ # do bilinear interpolation first and then nearest for the remaining points
+ interp_lin = grid_sample(scan, kp, align_corners=True, mode="bilinear")[
+ 0, :, 0
+ ]
+ interp_nn = torch.nn.functional.grid_sample(
+ scan, kp, align_corners=True, mode="nearest"
+ )[0, :, 0]
+ interp = torch.where(torch.isnan(interp_lin), interp_nn, interp_lin)
+ valid = ~torch.any(torch.isnan(interp), 0)
+
+ kp3d = interp.T.numpy()
+ valid = valid.numpy()
+ return kp3d, valid
+
+
+def get_scan_pose(dataset_dir, rpath):
+ split_image_rpath = rpath.split("/")
+ floor_name = split_image_rpath[-3]
+ scan_id = split_image_rpath[-2]
+ image_name = split_image_rpath[-1]
+ building_name = image_name[:3]
+
+ path = Path(
+ dataset_dir,
+ "database/alignments",
+ floor_name,
+ f"transformations/{building_name}_trans_{scan_id}.txt",
+ )
+ with open(path) as f:
+ raw_lines = f.readlines()
+
+ P_after_GICP = np.array(
+ [
+ np.fromstring(raw_lines[7], sep=" "),
+ np.fromstring(raw_lines[8], sep=" "),
+ np.fromstring(raw_lines[9], sep=" "),
+ np.fromstring(raw_lines[10], sep=" "),
+ ]
+ )
+
+ return P_after_GICP
+
+
+def pose_from_cluster(
+ dataset_dir, q, retrieved, feature_file, match_file, skip=None
+):
+ height, width = cv2.imread(str(dataset_dir / q)).shape[:2]
+ cx = 0.5 * width
+ cy = 0.5 * height
+ focal_length = 4032.0 * 28.0 / 36.0
+
+ all_mkpq = []
+ all_mkpr = []
+ all_mkp3d = []
+ all_indices = []
+ kpq = feature_file[q]["keypoints"].__array__()
+ num_matches = 0
+
+ for i, r in enumerate(retrieved):
+ kpr = feature_file[r]["keypoints"].__array__()
+ pair = names_to_pair(q, r)
+ m = match_file[pair]["matches0"].__array__()
+ v = m > -1
+
+ if skip and (np.count_nonzero(v) < skip):
+ continue
+
+ mkpq, mkpr = kpq[v], kpr[m[v]]
+ num_matches += len(mkpq)
+
+ scan_r = loadmat(Path(dataset_dir, r + ".mat"))["XYZcut"]
+ mkp3d, valid = interpolate_scan(scan_r, mkpr)
+ Tr = get_scan_pose(dataset_dir, r)
+ mkp3d = (Tr[:3, :3] @ mkp3d.T + Tr[:3, -1:]).T
+
+ all_mkpq.append(mkpq[valid])
+ all_mkpr.append(mkpr[valid])
+ all_mkp3d.append(mkp3d[valid])
+ all_indices.append(np.full(np.count_nonzero(valid), i))
+
+ all_mkpq = np.concatenate(all_mkpq, 0)
+ all_mkpr = np.concatenate(all_mkpr, 0)
+ all_mkp3d = np.concatenate(all_mkp3d, 0)
+ all_indices = np.concatenate(all_indices, 0)
+
+ cfg = {
+ "model": "SIMPLE_PINHOLE",
+ "width": width,
+ "height": height,
+ "params": [focal_length, cx, cy],
+ }
+ ret = pycolmap.absolute_pose_estimation(all_mkpq, all_mkp3d, cfg, 48.00)
+ ret["cfg"] = cfg
+ return ret, all_mkpq, all_mkpr, all_mkp3d, all_indices, num_matches
+
+
+def main(dataset_dir, retrieval, features, matches, results, skip_matches=None):
+ assert retrieval.exists(), retrieval
+ assert features.exists(), features
+ assert matches.exists(), matches
+
+ retrieval_dict = parse_retrieval(retrieval)
+ queries = list(retrieval_dict.keys())
+
+ feature_file = h5py.File(features, "r", libver="latest")
+ match_file = h5py.File(matches, "r", libver="latest")
+
+ poses = {}
+ logs = {
+ "features": features,
+ "matches": matches,
+ "retrieval": retrieval,
+ "loc": {},
+ }
+ logger.info("Starting localization...")
+ for q in tqdm(queries):
+ db = retrieval_dict[q]
+ ret, mkpq, mkpr, mkp3d, indices, num_matches = pose_from_cluster(
+ dataset_dir, q, db, feature_file, match_file, skip_matches
+ )
+
+ poses[q] = (ret["qvec"], ret["tvec"])
+ logs["loc"][q] = {
+ "db": db,
+ "PnP_ret": ret,
+ "keypoints_query": mkpq,
+ "keypoints_db": mkpr,
+ "3d_points": mkp3d,
+ "indices_db": indices,
+ "num_matches": num_matches,
+ }
+
+ logger.info(f"Writing poses to {results}...")
+ with open(results, "w") as f:
+ for q in queries:
+ qvec, tvec = poses[q]
+ qvec = " ".join(map(str, qvec))
+ tvec = " ".join(map(str, tvec))
+ name = q.split("/")[-1]
+ f.write(f"{name} {qvec} {tvec}\n")
+
+ logs_path = f"{results}_logs.pkl"
+ logger.info(f"Writing logs to {logs_path}...")
+ with open(logs_path, "wb") as f:
+ pickle.dump(logs, f)
+ logger.info("Done!")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--dataset_dir", type=Path, required=True)
+ parser.add_argument("--retrieval", type=Path, required=True)
+ parser.add_argument("--features", type=Path, required=True)
+ parser.add_argument("--matches", type=Path, required=True)
+ parser.add_argument("--results", type=Path, required=True)
+ parser.add_argument("--skip_matches", type=int)
+ args = parser.parse_args()
+ main(**args.__dict__)
diff --git a/hloc/localize_sfm.py b/hloc/localize_sfm.py
new file mode 100644
index 0000000..a1cb672
--- /dev/null
+++ b/hloc/localize_sfm.py
@@ -0,0 +1,247 @@
+import argparse
+import pickle
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Union
+
+import numpy as np
+import pycolmap
+from tqdm import tqdm
+
+from . import logger
+from .utils.io import get_keypoints, get_matches
+from .utils.parsers import parse_image_lists, parse_retrieval
+
+
+def do_covisibility_clustering(
+ frame_ids: List[int], reconstruction: pycolmap.Reconstruction
+):
+ clusters = []
+ visited = set()
+ for frame_id in frame_ids:
+ # Check if already labeled
+ if frame_id in visited:
+ continue
+
+ # New component
+ clusters.append([])
+ queue = {frame_id}
+ while len(queue):
+ exploration_frame = queue.pop()
+
+ # Already part of the component
+ if exploration_frame in visited:
+ continue
+ visited.add(exploration_frame)
+ clusters[-1].append(exploration_frame)
+
+ observed = reconstruction.images[exploration_frame].points2D
+ connected_frames = {
+ obs.image_id
+ for p2D in observed
+ if p2D.has_point3D()
+ for obs in reconstruction.points3D[
+ p2D.point3D_id
+ ].track.elements
+ }
+ connected_frames &= set(frame_ids)
+ connected_frames -= visited
+ queue |= connected_frames
+
+ clusters = sorted(clusters, key=len, reverse=True)
+ return clusters
+
+
+class QueryLocalizer:
+ def __init__(self, reconstruction, config=None):
+ self.reconstruction = reconstruction
+ self.config = config or {}
+
+ def localize(self, points2D_all, points2D_idxs, points3D_id, query_camera):
+ points2D = points2D_all[points2D_idxs]
+ points3D = [self.reconstruction.points3D[j].xyz for j in points3D_id]
+ ret = pycolmap.absolute_pose_estimation(
+ points2D,
+ points3D,
+ query_camera,
+ estimation_options=self.config.get("estimation", {}),
+ refinement_options=self.config.get("refinement", {}),
+ )
+ return ret
+
+
+def pose_from_cluster(
+ localizer: QueryLocalizer,
+ qname: str,
+ query_camera: pycolmap.Camera,
+ db_ids: List[int],
+ features_path: Path,
+ matches_path: Path,
+ **kwargs,
+):
+ kpq = get_keypoints(features_path, qname)
+ kpq += 0.5 # COLMAP coordinates
+
+ kp_idx_to_3D = defaultdict(list)
+ kp_idx_to_3D_to_db = defaultdict(lambda: defaultdict(list))
+ num_matches = 0
+ for i, db_id in enumerate(db_ids):
+ image = localizer.reconstruction.images[db_id]
+ if image.num_points3D == 0:
+ logger.debug(f"No 3D points found for {image.name}.")
+ continue
+ points3D_ids = np.array(
+ [p.point3D_id if p.has_point3D() else -1 for p in image.points2D]
+ )
+
+ matches, _ = get_matches(matches_path, qname, image.name)
+ matches = matches[points3D_ids[matches[:, 1]] != -1]
+ num_matches += len(matches)
+ for idx, m in matches:
+ id_3D = points3D_ids[m]
+ kp_idx_to_3D_to_db[idx][id_3D].append(i)
+ # avoid duplicate observations
+ if id_3D not in kp_idx_to_3D[idx]:
+ kp_idx_to_3D[idx].append(id_3D)
+
+ idxs = list(kp_idx_to_3D.keys())
+ mkp_idxs = [i for i in idxs for _ in kp_idx_to_3D[i]]
+ mp3d_ids = [j for i in idxs for j in kp_idx_to_3D[i]]
+ ret = localizer.localize(kpq, mkp_idxs, mp3d_ids, query_camera, **kwargs)
+ if ret is not None:
+ ret["camera"] = query_camera
+
+ # mostly for logging and post-processing
+ mkp_to_3D_to_db = [
+ (j, kp_idx_to_3D_to_db[i][j]) for i in idxs for j in kp_idx_to_3D[i]
+ ]
+ log = {
+ "db": db_ids,
+ "PnP_ret": ret,
+ "keypoints_query": kpq[mkp_idxs],
+ "points3D_ids": mp3d_ids,
+ "points3D_xyz": None, # we don't log xyz anymore because of file size
+ "num_matches": num_matches,
+ "keypoint_index_to_db": (mkp_idxs, mkp_to_3D_to_db),
+ }
+ return ret, log
+
+
+def main(
+ reference_sfm: Union[Path, pycolmap.Reconstruction],
+ queries: Path,
+ retrieval: Path,
+ features: Path,
+ matches: Path,
+ results: Path,
+ ransac_thresh: int = 12,
+ covisibility_clustering: bool = False,
+ prepend_camera_name: bool = False,
+ config: Dict = None,
+):
+ assert retrieval.exists(), retrieval
+ assert features.exists(), features
+ assert matches.exists(), matches
+
+ queries = parse_image_lists(queries, with_intrinsics=True)
+ retrieval_dict = parse_retrieval(retrieval)
+
+ logger.info("Reading the 3D model...")
+ if not isinstance(reference_sfm, pycolmap.Reconstruction):
+ reference_sfm = pycolmap.Reconstruction(reference_sfm)
+ db_name_to_id = {img.name: i for i, img in reference_sfm.images.items()}
+
+ config = {
+ "estimation": {"ransac": {"max_error": ransac_thresh}},
+ **(config or {}),
+ }
+ localizer = QueryLocalizer(reference_sfm, config)
+
+ cam_from_world = {}
+ logs = {
+ "features": features,
+ "matches": matches,
+ "retrieval": retrieval,
+ "loc": {},
+ }
+ logger.info("Starting localization...")
+ for qname, qcam in tqdm(queries):
+ if qname not in retrieval_dict:
+ logger.warning(
+ f"No images retrieved for query image {qname}. Skipping..."
+ )
+ continue
+ db_names = retrieval_dict[qname]
+ db_ids = []
+ for n in db_names:
+ if n not in db_name_to_id:
+ logger.warning(f"Image {n} was retrieved but not in database")
+ continue
+ db_ids.append(db_name_to_id[n])
+
+ if covisibility_clustering:
+ clusters = do_covisibility_clustering(db_ids, reference_sfm)
+ best_inliers = 0
+ best_cluster = None
+ logs_clusters = []
+ for i, cluster_ids in enumerate(clusters):
+ ret, log = pose_from_cluster(
+ localizer, qname, qcam, cluster_ids, features, matches
+ )
+ if ret is not None and ret["num_inliers"] > best_inliers:
+ best_cluster = i
+ best_inliers = ret["num_inliers"]
+ logs_clusters.append(log)
+ if best_cluster is not None:
+ ret = logs_clusters[best_cluster]["PnP_ret"]
+ cam_from_world[qname] = ret["cam_from_world"]
+ logs["loc"][qname] = {
+ "db": db_ids,
+ "best_cluster": best_cluster,
+ "log_clusters": logs_clusters,
+ "covisibility_clustering": covisibility_clustering,
+ }
+ else:
+ ret, log = pose_from_cluster(
+ localizer, qname, qcam, db_ids, features, matches
+ )
+ if ret is not None:
+ cam_from_world[qname] = ret["cam_from_world"]
+ else:
+ closest = reference_sfm.images[db_ids[0]]
+ cam_from_world[qname] = closest.cam_from_world
+ log["covisibility_clustering"] = covisibility_clustering
+ logs["loc"][qname] = log
+
+ logger.info(f"Localized {len(cam_from_world)} / {len(queries)} images.")
+ logger.info(f"Writing poses to {results}...")
+ with open(results, "w") as f:
+ for query, t in cam_from_world.items():
+ qvec = " ".join(map(str, t.rotation.quat[[3, 0, 1, 2]]))
+ tvec = " ".join(map(str, t.translation))
+ name = query.split("/")[-1]
+ if prepend_camera_name:
+ name = query.split("/")[-2] + "/" + name
+ f.write(f"{name} {qvec} {tvec}\n")
+
+ logs_path = f"{results}_logs.pkl"
+ logger.info(f"Writing logs to {logs_path}...")
+ # TODO: Resolve pickling issue with pycolmap objects.
+ with open(logs_path, "wb") as f:
+ pickle.dump(logs, f)
+ logger.info("Done!")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--reference_sfm", type=Path, required=True)
+ parser.add_argument("--queries", type=Path, required=True)
+ parser.add_argument("--features", type=Path, required=True)
+ parser.add_argument("--matches", type=Path, required=True)
+ parser.add_argument("--retrieval", type=Path, required=True)
+ parser.add_argument("--results", type=Path, required=True)
+ parser.add_argument("--ransac_thresh", type=float, default=12.0)
+ parser.add_argument("--covisibility_clustering", action="store_true")
+ parser.add_argument("--prepend_camera_name", action="store_true")
+ args = parser.parse_args()
+ main(**args.__dict__)
diff --git a/hloc/match_dense.py b/hloc/match_dense.py
index ac95d93..e1253d7 100644
--- a/hloc/match_dense.py
+++ b/hloc/match_dense.py
@@ -1,11 +1,25 @@
+import argparse
+import pprint
+from collections import Counter, defaultdict
+from itertools import chain
+from pathlib import Path
from types import SimpleNamespace
+from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
import cv2
+import h5py
import numpy as np
import torch
import torchvision.transforms.functional as F
+from scipy.spatial import KDTree
+from tqdm import tqdm
+from . import logger, matchers
from .extract_features import read_image, resize_image
+from .match_features import find_unique_new_pairs
+from .utils.base_model import dynamic_load
+from .utils.io import list_h5_names
+from .utils.parsers import names_to_pair, parse_retrieval
device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -144,6 +158,20 @@
"dfactor": 16,
},
},
+ "mast3r": {
+ "output": "matches-mast3r",
+ "model": {
+ "name": "mast3r",
+ "weights": "vit_large",
+ "max_keypoints": 2000,
+ "match_threshold": 0.2,
+ },
+ "preprocessing": {
+ "grayscale": False,
+ "resize_max": 512,
+ "dfactor": 16,
+ },
+ },
"xfeat_dense": {
"output": "matches-xfeat_dense",
"model": {
@@ -262,12 +290,483 @@
}
+def to_cpts(kpts, ps):
+ if ps > 0.0:
+ kpts = np.round(np.round((kpts + 0.5) / ps) * ps - 0.5, 2)
+ return [tuple(cpt) for cpt in kpts]
+
+
+def assign_keypoints(
+ kpts: np.ndarray,
+ other_cpts: Union[List[Tuple], np.ndarray],
+ max_error: float,
+ update: bool = False,
+ ref_bins: Optional[List[Counter]] = None,
+ scores: Optional[np.ndarray] = None,
+ cell_size: Optional[int] = None,
+):
+ if not update:
+ # Without update this is just a NN search
+ if len(other_cpts) == 0 or len(kpts) == 0:
+ return np.full(len(kpts), -1)
+ dist, kpt_ids = KDTree(np.array(other_cpts)).query(kpts)
+ valid = dist <= max_error
+ kpt_ids[~valid] = -1
+ return kpt_ids
+ else:
+ ps = cell_size if cell_size is not None else max_error
+ ps = max(ps, max_error)
+ # With update we quantize and bin (optionally)
+ assert isinstance(other_cpts, list)
+ kpt_ids = []
+ cpts = to_cpts(kpts, ps)
+ bpts = to_cpts(kpts, int(max_error))
+ cp_to_id = {val: i for i, val in enumerate(other_cpts)}
+ for i, (cpt, bpt) in enumerate(zip(cpts, bpts)):
+ try:
+ kid = cp_to_id[cpt]
+ except KeyError:
+ kid = len(cp_to_id)
+ cp_to_id[cpt] = kid
+ other_cpts.append(cpt)
+ if ref_bins is not None:
+ ref_bins.append(Counter())
+ if ref_bins is not None:
+ score = scores[i] if scores is not None else 1
+ ref_bins[cp_to_id[cpt]][bpt] += score
+ kpt_ids.append(kid)
+ return np.array(kpt_ids)
+
+
+def get_grouped_ids(array):
+ # Group array indices based on its values
+ # all duplicates are grouped as a set
+ idx_sort = np.argsort(array)
+ sorted_array = array[idx_sort]
+ _, ids, _ = np.unique(sorted_array, return_counts=True, return_index=True)
+ res = np.split(idx_sort, ids[1:])
+ return res
+
+
+def get_unique_matches(match_ids, scores):
+ if len(match_ids.shape) == 1:
+ return [0]
+
+ isets1 = get_grouped_ids(match_ids[:, 0])
+ isets2 = get_grouped_ids(match_ids[:, 1])
+ uid1s = [ids[scores[ids].argmax()] for ids in isets1 if len(ids) > 0]
+ uid2s = [ids[scores[ids].argmax()] for ids in isets2 if len(ids) > 0]
+ uids = list(set(uid1s).intersection(uid2s))
+ return match_ids[uids], scores[uids]
+
+
+def matches_to_matches0(matches, scores):
+ if len(matches) == 0:
+ return np.zeros(0, dtype=np.int32), np.zeros(0, dtype=np.float16)
+ n_kps0 = np.max(matches[:, 0]) + 1
+ matches0 = -np.ones((n_kps0,))
+ scores0 = np.zeros((n_kps0,))
+ matches0[matches[:, 0]] = matches[:, 1]
+ scores0[matches[:, 0]] = scores
+ return matches0.astype(np.int32), scores0.astype(np.float16)
+
+
+def kpids_to_matches0(kpt_ids0, kpt_ids1, scores):
+ valid = (kpt_ids0 != -1) & (kpt_ids1 != -1)
+ matches = np.dstack([kpt_ids0[valid], kpt_ids1[valid]])
+ matches = matches.reshape(-1, 2)
+ scores = scores[valid]
+
+ # Remove n-to-1 matches
+ matches, scores = get_unique_matches(matches, scores)
+ return matches_to_matches0(matches, scores)
+
+
def scale_keypoints(kpts, scale):
if np.any(scale != 1.0):
kpts *= kpts.new_tensor(scale)
return kpts
+class ImagePairDataset(torch.utils.data.Dataset):
+ default_conf = {
+ "grayscale": True,
+ "resize_max": 1024,
+ "dfactor": 8,
+ "cache_images": False,
+ }
+
+ def __init__(self, image_dir, conf, pairs):
+ self.image_dir = image_dir
+ self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
+ self.pairs = pairs
+ if self.conf.cache_images:
+ image_names = set(sum(pairs, ())) # unique image names in pairs
+ logger.info(
+ f"Loading and caching {len(image_names)} unique images."
+ )
+ self.images = {}
+ self.scales = {}
+ for name in tqdm(image_names):
+ image = read_image(self.image_dir / name, self.conf.grayscale)
+ self.images[name], self.scales[name] = self.preprocess(image)
+
+ def preprocess(self, image: np.ndarray):
+ image = image.astype(np.float32, copy=False)
+ size = image.shape[:2][::-1]
+ scale = np.array([1.0, 1.0])
+
+ if self.conf.resize_max:
+ scale = self.conf.resize_max / max(size)
+ if scale < 1.0:
+ size_new = tuple(int(round(x * scale)) for x in size)
+ image = resize_image(image, size_new, "cv2_area")
+ scale = np.array(size) / np.array(size_new)
+
+ if self.conf.grayscale:
+ assert image.ndim == 2, image.shape
+ image = image[None]
+ else:
+ image = image.transpose((2, 0, 1)) # HxWxC to CxHxW
+ image = torch.from_numpy(image / 255.0).float()
+
+ # assure that the size is divisible by dfactor
+ size_new = tuple(
+ map(
+ lambda x: int(x // self.conf.dfactor * self.conf.dfactor),
+ image.shape[-2:],
+ )
+ )
+ image = F.resize(image, size=size_new)
+ scale = np.array(size) / np.array(size_new)[::-1]
+ return image, scale
+
+ def __len__(self):
+ return len(self.pairs)
+
+ def __getitem__(self, idx):
+ name0, name1 = self.pairs[idx]
+ if self.conf.cache_images:
+ image0, scale0 = self.images[name0], self.scales[name0]
+ image1, scale1 = self.images[name1], self.scales[name1]
+ else:
+ image0 = read_image(self.image_dir / name0, self.conf.grayscale)
+ image1 = read_image(self.image_dir / name1, self.conf.grayscale)
+ image0, scale0 = self.preprocess(image0)
+ image1, scale1 = self.preprocess(image1)
+ return image0, image1, scale0, scale1, name0, name1
+
+
+@torch.no_grad()
+def match_dense(
+ conf: Dict,
+ pairs: List[Tuple[str, str]],
+ image_dir: Path,
+ match_path: Path, # out
+ existing_refs: Optional[List] = [],
+):
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ Model = dynamic_load(matchers, conf["model"]["name"])
+ model = Model(conf["model"]).eval().to(device)
+
+ dataset = ImagePairDataset(image_dir, conf["preprocessing"], pairs)
+ loader = torch.utils.data.DataLoader(
+ dataset, num_workers=16, batch_size=1, shuffle=False
+ )
+
+ logger.info("Performing dense matching...")
+ with h5py.File(str(match_path), "a") as fd:
+ for data in tqdm(loader, smoothing=0.1):
+ # load image-pair data
+ image0, image1, scale0, scale1, (name0,), (name1,) = data
+ scale0, scale1 = scale0[0].numpy(), scale1[0].numpy()
+ image0, image1 = image0.to(device), image1.to(device)
+
+ # match semi-dense
+ # for consistency with pairs_from_*: refine kpts of image0
+ if name0 in existing_refs:
+ # special case: flip to enable refinement in query image
+ pred = model({"image0": image1, "image1": image0})
+ pred = {
+ **pred,
+ "keypoints0": pred["keypoints1"],
+ "keypoints1": pred["keypoints0"],
+ }
+ else:
+ # usual case
+ pred = model({"image0": image0, "image1": image1})
+
+ # Rescale keypoints and move to cpu
+ kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"]
+ kpts0 = scale_keypoints(kpts0 + 0.5, scale0) - 0.5
+ kpts1 = scale_keypoints(kpts1 + 0.5, scale1) - 0.5
+ kpts0 = kpts0.cpu().numpy()
+ kpts1 = kpts1.cpu().numpy()
+ scores = pred["scores"].cpu().numpy()
+
+ # Write matches and matching scores in hloc format
+ pair = names_to_pair(name0, name1)
+ if pair in fd:
+ del fd[pair]
+ grp = fd.create_group(pair)
+
+ # Write dense matching output
+ grp.create_dataset("keypoints0", data=kpts0)
+ grp.create_dataset("keypoints1", data=kpts1)
+ grp.create_dataset("scores", data=scores)
+ del model, loader
+
+
+# default: quantize all!
+def load_keypoints(
+ conf: Dict, feature_paths_refs: List[Path], quantize: Optional[set] = None
+):
+ name2ref = {
+ n: i for i, p in enumerate(feature_paths_refs) for n in list_h5_names(p)
+ }
+
+ existing_refs = set(name2ref.keys())
+ if quantize is None:
+ quantize = existing_refs # quantize all
+ if len(existing_refs) > 0:
+ logger.info(f"Loading keypoints from {len(existing_refs)} images.")
+
+ # Load query keypoints
+ cpdict = defaultdict(list)
+ bindict = defaultdict(list)
+ for name in existing_refs:
+ with h5py.File(str(feature_paths_refs[name2ref[name]]), "r") as fd:
+ kps = fd[name]["keypoints"].__array__()
+ if name not in quantize:
+ cpdict[name] = kps
+ else:
+ if "scores" in fd[name].keys():
+ kp_scores = fd[name]["scores"].__array__()
+ else:
+ # we set the score to 1.0 if not provided
+ # increase for more weight on reference keypoints for
+ # stronger anchoring
+ kp_scores = [1.0 for _ in range(kps.shape[0])]
+ # bin existing keypoints of reference images for association
+ assign_keypoints(
+ kps,
+ cpdict[name],
+ conf["max_error"],
+ True,
+ bindict[name],
+ kp_scores,
+ conf["cell_size"],
+ )
+ return cpdict, bindict
+
+
+def aggregate_matches(
+ conf: Dict,
+ pairs: List[Tuple[str, str]],
+ match_path: Path,
+ feature_path: Path,
+ required_queries: Optional[Set[str]] = None,
+ max_kps: Optional[int] = None,
+ cpdict: Dict[str, Iterable] = defaultdict(list),
+ bindict: Dict[str, List[Counter]] = defaultdict(list),
+):
+ if required_queries is None:
+ required_queries = set(sum(pairs, ()))
+ # default: do not overwrite existing features in feature_path!
+ required_queries -= set(list_h5_names(feature_path))
+
+ # if an entry in cpdict is provided as np.ndarray we assume it is fixed
+ required_queries -= set(
+ [k for k, v in cpdict.items() if isinstance(v, np.ndarray)]
+ )
+
+ # sort pairs for reduced RAM
+ pairs_per_q = Counter(list(chain(*pairs)))
+ pairs_score = [min(pairs_per_q[i], pairs_per_q[j]) for i, j in pairs]
+ pairs = [p for _, p in sorted(zip(pairs_score, pairs))]
+
+ if len(required_queries) > 0:
+ logger.info(
+ f"Aggregating keypoints for {len(required_queries)} images."
+ )
+ n_kps = 0
+ with h5py.File(str(match_path), "a") as fd:
+ for name0, name1 in tqdm(pairs, smoothing=0.1):
+ pair = names_to_pair(name0, name1)
+ grp = fd[pair]
+ kpts0 = grp["keypoints0"].__array__()
+ kpts1 = grp["keypoints1"].__array__()
+ scores = grp["scores"].__array__()
+
+ # Aggregate local features
+ update0 = name0 in required_queries
+ update1 = name1 in required_queries
+
+ # in localization we do not want to bin the query kp
+ # assumes that the query is name0!
+ if update0 and not update1 and max_kps is None:
+ max_error0 = cell_size0 = 0.0
+ else:
+ max_error0 = conf["max_error"]
+ cell_size0 = conf["cell_size"]
+
+ # Get match ids and extend query keypoints (cpdict)
+ mkp_ids0 = assign_keypoints(
+ kpts0,
+ cpdict[name0],
+ max_error0,
+ update0,
+ bindict[name0],
+ scores,
+ cell_size0,
+ )
+ mkp_ids1 = assign_keypoints(
+ kpts1,
+ cpdict[name1],
+ conf["max_error"],
+ update1,
+ bindict[name1],
+ scores,
+ conf["cell_size"],
+ )
+
+ # Build matches from assignments
+ matches0, scores0 = kpids_to_matches0(mkp_ids0, mkp_ids1, scores)
+
+ assert kpts0.shape[0] == scores.shape[0]
+ grp.create_dataset("matches0", data=matches0)
+ grp.create_dataset("matching_scores0", data=scores0)
+
+ # Convert bins to kps if finished, and store them
+ for name in (name0, name1):
+ pairs_per_q[name] -= 1
+ if pairs_per_q[name] > 0 or name not in required_queries:
+ continue
+ kp_score = [c.most_common(1)[0][1] for c in bindict[name]]
+ cpdict[name] = [c.most_common(1)[0][0] for c in bindict[name]]
+ cpdict[name] = np.array(cpdict[name], dtype=np.float32)
+
+ # Select top-k query kps by score (reassign matches later)
+ if max_kps:
+ top_k = min(max_kps, cpdict[name].shape[0])
+ top_k = np.argsort(kp_score)[::-1][:top_k]
+ cpdict[name] = cpdict[name][top_k]
+ kp_score = np.array(kp_score)[top_k]
+
+ # Write query keypoints
+ with h5py.File(feature_path, "a") as kfd:
+ if name in kfd:
+ del kfd[name]
+ kgrp = kfd.create_group(name)
+ kgrp.create_dataset("keypoints", data=cpdict[name])
+ kgrp.create_dataset("score", data=kp_score)
+ n_kps += cpdict[name].shape[0]
+ del bindict[name]
+
+ if len(required_queries) > 0:
+ avg_kp_per_image = round(n_kps / len(required_queries), 1)
+ logger.info(
+ f"Finished assignment, found {avg_kp_per_image} "
+ f"keypoints/image (avg.), total {n_kps}."
+ )
+ return cpdict
+
+
+def assign_matches(
+ pairs: List[Tuple[str, str]],
+ match_path: Path,
+ keypoints: Union[List[Path], Dict[str, np.array]],
+ max_error: float,
+):
+ if isinstance(keypoints, list):
+ keypoints = load_keypoints({}, keypoints, kpts_as_bin=set([]))
+ assert len(set(sum(pairs, ())) - set(keypoints.keys())) == 0
+ with h5py.File(str(match_path), "a") as fd:
+ for name0, name1 in tqdm(pairs):
+ pair = names_to_pair(name0, name1)
+ grp = fd[pair]
+ kpts0 = grp["keypoints0"].__array__()
+ kpts1 = grp["keypoints1"].__array__()
+ scores = grp["scores"].__array__()
+
+ # NN search across cell boundaries
+ mkp_ids0 = assign_keypoints(kpts0, keypoints[name0], max_error)
+ mkp_ids1 = assign_keypoints(kpts1, keypoints[name1], max_error)
+
+ matches0, scores0 = kpids_to_matches0(mkp_ids0, mkp_ids1, scores)
+
+ # overwrite matches0 and matching_scores0
+ del grp["matches0"], grp["matching_scores0"]
+ grp.create_dataset("matches0", data=matches0)
+ grp.create_dataset("matching_scores0", data=scores0)
+
+
+@torch.no_grad()
+def match_and_assign(
+ conf: Dict,
+ pairs_path: Path,
+ image_dir: Path,
+ match_path: Path, # out
+ feature_path_q: Path, # out
+ feature_paths_refs: Optional[List[Path]] = [],
+ max_kps: Optional[int] = 8192,
+ overwrite: bool = False,
+) -> Path:
+ for path in feature_paths_refs:
+ if not path.exists():
+ raise FileNotFoundError(f"Reference feature file {path}.")
+ pairs = parse_retrieval(pairs_path)
+ pairs = [(q, r) for q, rs in pairs.items() for r in rs]
+ pairs = find_unique_new_pairs(pairs, None if overwrite else match_path)
+ required_queries = set(sum(pairs, ()))
+
+ name2ref = {
+ n: i for i, p in enumerate(feature_paths_refs) for n in list_h5_names(p)
+ }
+ existing_refs = required_queries.intersection(set(name2ref.keys()))
+
+ # images which require feature extraction
+ required_queries = required_queries - existing_refs
+
+ if feature_path_q.exists():
+ existing_queries = set(list_h5_names(feature_path_q))
+ feature_paths_refs.append(feature_path_q)
+ existing_refs = set.union(existing_refs, existing_queries)
+ if not overwrite:
+ required_queries = required_queries - existing_queries
+
+ if len(pairs) == 0 and len(required_queries) == 0:
+ logger.info("All pairs exist. Skipping dense matching.")
+ return
+
+ # extract semi-dense matches
+ match_dense(conf, pairs, image_dir, match_path, existing_refs=existing_refs)
+
+ logger.info("Assigning matches...")
+
+ # Pre-load existing keypoints
+ cpdict, bindict = load_keypoints(
+ conf, feature_paths_refs, quantize=required_queries
+ )
+
+ # Reassign matches by aggregation
+ cpdict = aggregate_matches(
+ conf,
+ pairs,
+ match_path,
+ feature_path=feature_path_q,
+ required_queries=required_queries,
+ max_kps=max_kps,
+ cpdict=cpdict,
+ bindict=bindict,
+ )
+
+ # Invalidate matches that are far from selected bin by reassignment
+ if max_kps is not None:
+ logger.info(f'Reassign matches with max_error={conf["max_error"]}.')
+ assign_matches(pairs, match_path, cpdict, max_error=conf["max_error"])
+
+
def scale_lines(lines, scale):
if np.any(scale != 1.0):
lines *= lines.new_tensor(scale)
@@ -483,3 +982,88 @@ def preprocess(image: np.ndarray):
del pred
torch.cuda.empty_cache()
return ret
+
+
+@torch.no_grad()
+def main(
+ conf: Dict,
+ pairs: Path,
+ image_dir: Path,
+ export_dir: Optional[Path] = None,
+ matches: Optional[Path] = None, # out
+ features: Optional[Path] = None, # out
+ features_ref: Optional[Path] = None,
+ max_kps: Optional[int] = 8192,
+ overwrite: bool = False,
+) -> Path:
+ logger.info(
+ "Extracting semi-dense features with configuration:"
+ f"\n{pprint.pformat(conf)}"
+ )
+
+ if features is None:
+ features = "feats_"
+
+ if isinstance(features, Path):
+ features_q = features
+ if matches is None:
+ raise ValueError(
+ "Either provide both features and matches as Path"
+ " or both as names."
+ )
+ else:
+ if export_dir is None:
+ raise ValueError(
+ "Provide an export_dir if features and matches"
+ f" are not file paths: {features}, {matches}."
+ )
+ features_q = Path(export_dir, f'{features}{conf["output"]}.h5')
+ if matches is None:
+ matches = Path(export_dir, f'{conf["output"]}_{pairs.stem}.h5')
+
+ if features_ref is None:
+ features_ref = []
+ elif isinstance(features_ref, list):
+ features_ref = list(features_ref)
+ elif isinstance(features_ref, Path):
+ features_ref = [features_ref]
+ else:
+ raise TypeError(str(features_ref))
+
+ match_and_assign(
+ conf,
+ pairs,
+ image_dir,
+ matches,
+ features_q,
+ features_ref,
+ max_kps,
+ overwrite,
+ )
+
+ return features_q, matches
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--pairs", type=Path, required=True)
+ parser.add_argument("--image_dir", type=Path, required=True)
+ parser.add_argument("--export_dir", type=Path, required=True)
+ parser.add_argument(
+ "--matches", type=Path, default=confs["loftr"]["output"]
+ )
+ parser.add_argument(
+ "--features", type=str, default="feats_" + confs["loftr"]["output"]
+ )
+ parser.add_argument(
+ "--conf", type=str, default="loftr", choices=list(confs.keys())
+ )
+ args = parser.parse_args()
+ main(
+ confs[args.conf],
+ args.pairs,
+ args.image_dir,
+ args.export_dir,
+ args.matches,
+ args.features,
+ )
diff --git a/hloc/matchers/mast3r.py b/hloc/matchers/mast3r.py
new file mode 100644
index 0000000..46489cc
--- /dev/null
+++ b/hloc/matchers/mast3r.py
@@ -0,0 +1,111 @@
+import os
+import sys
+import urllib.request
+from pathlib import Path
+
+import numpy as np
+import torch
+import torchvision.transforms as tfm
+
+from .. import logger
+
+mast3r_path = Path(__file__).parent / "../../third_party/mast3r"
+sys.path.append(str(mast3r_path))
+
+dust3r_path = Path(__file__).parent / "../../third_party/dust3r"
+sys.path.append(str(dust3r_path))
+
+from dust3r.image_pairs import make_pairs
+from dust3r.inference import inference
+from mast3r.fast_nn import fast_reciprocal_NNs
+from mast3r.model import AsymmetricMASt3R
+
+from hloc.matchers.duster import Duster
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+class Mast3r(Duster):
+ default_conf = {
+ "name": "Mast3r",
+ "model_path": mast3r_path
+ / "model_weights/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth",
+ "max_keypoints": 2000,
+ "vit_patch_size": 16,
+ }
+
+ def _init(self, conf):
+ self.normalize = tfm.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+ self.model_path = self.conf["model_path"]
+ self.download_weights()
+ self.net = AsymmetricMASt3R.from_pretrained(self.model_path).to(device)
+ logger.info("Loaded Mast3r model")
+
+ def download_weights(self):
+ url = "https://download.europe.naverlabs.com/ComputerVision/MASt3R/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth"
+
+ self.model_path.parent.mkdir(parents=True, exist_ok=True)
+ if not os.path.isfile(self.model_path):
+ logger.info("Downloading Mast3r(ViT large)... (takes a while)")
+ urllib.request.urlretrieve(url, self.model_path)
+ logger.info("Downloading Mast3r(ViT large)... done!")
+
+ def _forward(self, data):
+ img0, img1 = data["image0"], data["image1"]
+ mean = torch.tensor([0.5, 0.5, 0.5]).to(device)
+ std = torch.tensor([0.5, 0.5, 0.5]).to(device)
+
+ img0 = (img0 - mean.view(1, 3, 1, 1)) / std.view(1, 3, 1, 1)
+ img1 = (img1 - mean.view(1, 3, 1, 1)) / std.view(1, 3, 1, 1)
+
+ images = [
+ {"img": img0, "idx": 0, "instance": 0},
+ {"img": img1, "idx": 1, "instance": 1},
+ ]
+ pairs = make_pairs(
+ images, scene_graph="complete", prefilter=None, symmetrize=True
+ )
+ output = inference(pairs, self.net, device, batch_size=1)
+
+ # at this stage, you have the raw dust3r predictions
+ _, pred1 = output["view1"], output["pred1"]
+ _, pred2 = output["view2"], output["pred2"]
+
+ desc1, desc2 = (
+ pred1["desc"][1].squeeze(0).detach(),
+ pred2["desc"][1].squeeze(0).detach(),
+ )
+
+ # find 2D-2D matches between the two images
+ matches_im0, matches_im1 = fast_reciprocal_NNs(
+ desc1,
+ desc2,
+ subsample_or_initxy1=2,
+ device=device,
+ dist="dot",
+ block_size=2**13,
+ )
+
+ mkpts0 = matches_im0.copy()
+ mkpts1 = matches_im1.copy()
+
+ if len(mkpts0) == 0:
+ pred = {
+ "keypoints0": torch.zeros([0, 2]),
+ "keypoints1": torch.zeros([0, 2]),
+ }
+ logger.warning(f"Matched {0} points")
+ else:
+
+ top_k = self.conf["max_keypoints"]
+ if top_k is not None and len(mkpts0) > top_k:
+ keep = np.round(np.linspace(0, len(mkpts0) - 1, top_k)).astype(
+ int
+ )
+ mkpts0 = mkpts0[keep]
+ mkpts1 = mkpts1[keep]
+ pred = {
+ "keypoints0": torch.from_numpy(mkpts0),
+ "keypoints1": torch.from_numpy(mkpts1),
+ }
+ return pred
diff --git a/hloc/matchers/superglue.py b/hloc/matchers/superglue.py
index 7e427f4..6fae344 100644
--- a/hloc/matchers/superglue.py
+++ b/hloc/matchers/superglue.py
@@ -4,7 +4,9 @@
from ..utils.base_model import BaseModel
sys.path.append(str(Path(__file__).parent / "../../third_party"))
-from SuperGluePretrainedNetwork.models.superglue import SuperGlue as SG
+from SuperGluePretrainedNetwork.models.superglue import ( # noqa: E402
+ SuperGlue as SG,
+)
class SuperGlue(BaseModel):
diff --git a/hloc/pairs_from_covisibility.py b/hloc/pairs_from_covisibility.py
new file mode 100644
index 0000000..49f3e57
--- /dev/null
+++ b/hloc/pairs_from_covisibility.py
@@ -0,0 +1,60 @@
+import argparse
+from collections import defaultdict
+from pathlib import Path
+
+import numpy as np
+from tqdm import tqdm
+
+from . import logger
+from .utils.read_write_model import read_model
+
+
+def main(model, output, num_matched):
+ logger.info("Reading the COLMAP model...")
+ cameras, images, points3D = read_model(model)
+
+ logger.info("Extracting image pairs from covisibility info...")
+ pairs = []
+ for image_id, image in tqdm(images.items()):
+ matched = image.point3D_ids != -1
+ points3D_covis = image.point3D_ids[matched]
+
+ covis = defaultdict(int)
+ for point_id in points3D_covis:
+ for image_covis_id in points3D[point_id].image_ids:
+ if image_covis_id != image_id:
+ covis[image_covis_id] += 1
+
+ if len(covis) == 0:
+ logger.info(f"Image {image_id} does not have any covisibility.")
+ continue
+
+ covis_ids = np.array(list(covis.keys()))
+ covis_num = np.array([covis[i] for i in covis_ids])
+
+ if len(covis_ids) <= num_matched:
+ top_covis_ids = covis_ids[np.argsort(-covis_num)]
+ else:
+ # get covisible image ids with top k number of common matches
+ ind_top = np.argpartition(covis_num, -num_matched)
+ ind_top = ind_top[-num_matched:] # unsorted top k
+ ind_top = ind_top[np.argsort(-covis_num[ind_top])]
+ top_covis_ids = [covis_ids[i] for i in ind_top]
+ assert covis_num[ind_top[0]] == np.max(covis_num)
+
+ for i in top_covis_ids:
+ pair = (image.name, images[i].name)
+ pairs.append(pair)
+
+ logger.info(f"Found {len(pairs)} pairs.")
+ with open(output, "w") as f:
+ f.write("\n".join(" ".join([i, j]) for i, j in pairs))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", required=True, type=Path)
+ parser.add_argument("--output", required=True, type=Path)
+ parser.add_argument("--num_matched", required=True, type=int)
+ args = parser.parse_args()
+ main(**args.__dict__)
diff --git a/hloc/pairs_from_exhaustive.py b/hloc/pairs_from_exhaustive.py
new file mode 100644
index 0000000..438b814
--- /dev/null
+++ b/hloc/pairs_from_exhaustive.py
@@ -0,0 +1,66 @@
+import argparse
+import collections.abc as collections
+from pathlib import Path
+from typing import List, Optional, Union
+
+from . import logger
+from .utils.io import list_h5_names
+from .utils.parsers import parse_image_lists
+
+
+def main(
+ output: Path,
+ image_list: Optional[Union[Path, List[str]]] = None,
+ features: Optional[Path] = None,
+ ref_list: Optional[Union[Path, List[str]]] = None,
+ ref_features: Optional[Path] = None,
+):
+ if image_list is not None:
+ if isinstance(image_list, (str, Path)):
+ names_q = parse_image_lists(image_list)
+ elif isinstance(image_list, collections.Iterable):
+ names_q = list(image_list)
+ else:
+ raise ValueError(f"Unknown type for image list: {image_list}")
+ elif features is not None:
+ names_q = list_h5_names(features)
+ else:
+ raise ValueError("Provide either a list of images or a feature file.")
+
+ self_matching = False
+ if ref_list is not None:
+ if isinstance(ref_list, (str, Path)):
+ names_ref = parse_image_lists(ref_list)
+ elif isinstance(image_list, collections.Iterable):
+ names_ref = list(ref_list)
+ else:
+ raise ValueError(
+ f"Unknown type for reference image list: {ref_list}"
+ )
+ elif ref_features is not None:
+ names_ref = list_h5_names(ref_features)
+ else:
+ self_matching = True
+ names_ref = names_q
+
+ pairs = []
+ for i, n1 in enumerate(names_q):
+ for j, n2 in enumerate(names_ref):
+ if self_matching and j <= i:
+ continue
+ pairs.append((n1, n2))
+
+ logger.info(f"Found {len(pairs)} pairs.")
+ with open(output, "w") as f:
+ f.write("\n".join(" ".join([i, j]) for i, j in pairs))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--output", required=True, type=Path)
+ parser.add_argument("--image_list", type=Path)
+ parser.add_argument("--features", type=Path)
+ parser.add_argument("--ref_list", type=Path)
+ parser.add_argument("--ref_features", type=Path)
+ args = parser.parse_args()
+ main(**args.__dict__)
diff --git a/hloc/pairs_from_poses.py b/hloc/pairs_from_poses.py
new file mode 100644
index 0000000..b6b4f88
--- /dev/null
+++ b/hloc/pairs_from_poses.py
@@ -0,0 +1,70 @@
+import argparse
+from pathlib import Path
+
+import numpy as np
+import scipy.spatial
+
+from . import logger
+from .pairs_from_retrieval import pairs_from_score_matrix
+from .utils.read_write_model import read_images_binary
+
+DEFAULT_ROT_THRESH = 30 # in degrees
+
+
+def get_pairwise_distances(images):
+ ids = np.array(list(images.keys()))
+ Rs = []
+ ts = []
+ for id_ in ids:
+ image = images[id_]
+ R = image.qvec2rotmat()
+ t = image.tvec
+ Rs.append(R)
+ ts.append(t)
+ Rs = np.stack(Rs, 0)
+ ts = np.stack(ts, 0)
+
+ # Invert the poses from world-to-camera to camera-to-world.
+ Rs = Rs.transpose(0, 2, 1)
+ ts = -(Rs @ ts[:, :, None])[:, :, 0]
+
+ dist = scipy.spatial.distance.squareform(scipy.spatial.distance.pdist(ts))
+
+ # Instead of computing the angle between two camera orientations,
+ # we compute the angle between the principal axes, as two images rotated
+ # around their principal axis still observe the same scene.
+ axes = Rs[:, :, -1]
+ dots = np.einsum("mi,ni->mn", axes, axes, optimize=True)
+ dR = np.rad2deg(np.arccos(np.clip(dots, -1.0, 1.0)))
+
+ return ids, dist, dR
+
+
+def main(model, output, num_matched, rotation_threshold=DEFAULT_ROT_THRESH):
+ logger.info("Reading the COLMAP model...")
+ images = read_images_binary(model / "images.bin")
+
+ logger.info(f"Obtaining pairwise distances between {len(images)} images...")
+ ids, dist, dR = get_pairwise_distances(images)
+ scores = -dist
+
+ invalid = dR >= rotation_threshold
+ np.fill_diagonal(invalid, True)
+ pairs = pairs_from_score_matrix(scores, invalid, num_matched)
+ pairs = [(images[ids[i]].name, images[ids[j]].name) for i, j in pairs]
+
+ logger.info(f"Found {len(pairs)} pairs.")
+ with open(output, "w") as f:
+ f.write("\n".join(" ".join(p) for p in pairs))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", required=True, type=Path)
+ parser.add_argument("--output", required=True, type=Path)
+ parser.add_argument("--num_matched", required=True, type=int)
+ parser.add_argument(
+ "--rotation_threshold", default=DEFAULT_ROT_THRESH, type=float
+ )
+ args = parser.parse_args()
+ main(**args.__dict__)
diff --git a/hloc/pairs_from_retrieval.py b/hloc/pairs_from_retrieval.py
new file mode 100644
index 0000000..6948fe6
--- /dev/null
+++ b/hloc/pairs_from_retrieval.py
@@ -0,0 +1,137 @@
+import argparse
+import collections.abc as collections
+from pathlib import Path
+from typing import Optional
+
+import h5py
+import numpy as np
+import torch
+
+from . import logger
+from .utils.io import list_h5_names
+from .utils.parsers import parse_image_lists
+from .utils.read_write_model import read_images_binary
+
+
+def parse_names(prefix, names, names_all):
+ if prefix is not None:
+ if not isinstance(prefix, str):
+ prefix = tuple(prefix)
+ names = [n for n in names_all if n.startswith(prefix)]
+ if len(names) == 0:
+ raise ValueError(
+ f"Could not find any image with the prefix `{prefix}`."
+ )
+ elif names is not None:
+ if isinstance(names, (str, Path)):
+ names = parse_image_lists(names)
+ elif isinstance(names, collections.Iterable):
+ names = list(names)
+ else:
+ raise ValueError(
+ f"Unknown type of image list: {names}."
+ "Provide either a list or a path to a list file."
+ )
+ else:
+ names = names_all
+ return names
+
+
+def get_descriptors(names, path, name2idx=None, key="global_descriptor"):
+ if name2idx is None:
+ with h5py.File(str(path), "r", libver="latest") as fd:
+ desc = [fd[n][key].__array__() for n in names]
+ else:
+ desc = []
+ for n in names:
+ with h5py.File(str(path[name2idx[n]]), "r", libver="latest") as fd:
+ desc.append(fd[n][key].__array__())
+ return torch.from_numpy(np.stack(desc, 0)).float()
+
+
+def pairs_from_score_matrix(
+ scores: torch.Tensor,
+ invalid: np.array,
+ num_select: int,
+ min_score: Optional[float] = None,
+):
+ assert scores.shape == invalid.shape
+ if isinstance(scores, np.ndarray):
+ scores = torch.from_numpy(scores)
+ invalid = torch.from_numpy(invalid).to(scores.device)
+ if min_score is not None:
+ invalid |= scores < min_score
+ scores.masked_fill_(invalid, float("-inf"))
+
+ topk = torch.topk(scores, num_select, dim=1)
+ indices = topk.indices.cpu().numpy()
+ valid = topk.values.isfinite().cpu().numpy()
+
+ pairs = []
+ for i, j in zip(*np.where(valid)):
+ pairs.append((i, indices[i, j]))
+ return pairs
+
+
+def main(
+ descriptors,
+ output,
+ num_matched,
+ query_prefix=None,
+ query_list=None,
+ db_prefix=None,
+ db_list=None,
+ db_model=None,
+ db_descriptors=None,
+):
+ logger.info("Extracting image pairs from a retrieval database.")
+
+ # We handle multiple reference feature files.
+ # We only assume that names are unique among them and map names to files.
+ if db_descriptors is None:
+ db_descriptors = descriptors
+ if isinstance(db_descriptors, (Path, str)):
+ db_descriptors = [db_descriptors]
+ name2db = {
+ n: i for i, p in enumerate(db_descriptors) for n in list_h5_names(p)
+ }
+ db_names_h5 = list(name2db.keys())
+ query_names_h5 = list_h5_names(descriptors)
+
+ if db_model:
+ images = read_images_binary(db_model / "images.bin")
+ db_names = [i.name for i in images.values()]
+ else:
+ db_names = parse_names(db_prefix, db_list, db_names_h5)
+ if len(db_names) == 0:
+ raise ValueError("Could not find any database image.")
+ query_names = parse_names(query_prefix, query_list, query_names_h5)
+
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ db_desc = get_descriptors(db_names, db_descriptors, name2db)
+ query_desc = get_descriptors(query_names, descriptors)
+ sim = torch.einsum("id,jd->ij", query_desc.to(device), db_desc.to(device))
+
+ # Avoid self-matching
+ self = np.array(query_names)[:, None] == np.array(db_names)[None]
+ pairs = pairs_from_score_matrix(sim, self, num_matched, min_score=0)
+ pairs = [(query_names[i], db_names[j]) for i, j in pairs]
+
+ logger.info(f"Found {len(pairs)} pairs.")
+ with open(output, "w") as f:
+ f.write("\n".join(" ".join([i, j]) for i, j in pairs))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--descriptors", type=Path, required=True)
+ parser.add_argument("--output", type=Path, required=True)
+ parser.add_argument("--num_matched", type=int, required=True)
+ parser.add_argument("--query_prefix", type=str, nargs="+")
+ parser.add_argument("--query_list", type=Path)
+ parser.add_argument("--db_prefix", type=str, nargs="+")
+ parser.add_argument("--db_list", type=Path)
+ parser.add_argument("--db_model", type=Path)
+ parser.add_argument("--db_descriptors", type=Path)
+ args = parser.parse_args()
+ main(**args.__dict__)
diff --git a/hloc/pipelines/4Seasons/localize.py b/hloc/pipelines/4Seasons/localize.py
index 0451130..50ed957 100644
--- a/hloc/pipelines/4Seasons/localize.py
+++ b/hloc/pipelines/4Seasons/localize.py
@@ -1,16 +1,21 @@
-from pathlib import Path
import argparse
+from pathlib import Path
-from ... import extract_features, match_features, localize_sfm, logger
-from .utils import get_timestamps, delete_unused_images
-from .utils import generate_query_lists, generate_localization_pairs
-from .utils import prepare_submission, evaluate_submission
+from ... import extract_features, localize_sfm, logger, match_features
+from .utils import (
+ delete_unused_images,
+ evaluate_submission,
+ generate_localization_pairs,
+ generate_query_lists,
+ get_timestamps,
+ prepare_submission,
+)
relocalization_files = {
- "training": "RelocalizationFilesTrain//relocalizationFile_recording_2020-03-24_17-36-22.txt",
- "validation": "RelocalizationFilesVal/relocalizationFile_recording_2020-03-03_12-03-23.txt",
- "test0": "RelocalizationFilesTest/relocalizationFile_recording_2020-03-24_17-45-31_*.txt",
- "test1": "RelocalizationFilesTest/relocalizationFile_recording_2020-04-23_19-37-00_*.txt",
+ "training": "RelocalizationFilesTrain//relocalizationFile_recording_2020-03-24_17-36-22.txt", # noqa: E501
+ "validation": "RelocalizationFilesVal/relocalizationFile_recording_2020-03-03_12-03-23.txt", # noqa: E501
+ "test0": "RelocalizationFilesTest/relocalizationFile_recording_2020-03-24_17-45-31_*.txt", # noqa: E501
+ "test1": "RelocalizationFilesTest/relocalizationFile_recording_2020-04-23_19-37-00_*.txt", # noqa: E501
}
parser = argparse.ArgumentParser()
@@ -67,9 +72,7 @@
generate_query_lists(timestamps, seq_dir, query_list)
# Generate the localization pairs from the given reference frames.
-generate_localization_pairs(
- sequence, reloc, num_loc_pairs, ref_pairs, loc_pairs
-)
+generate_localization_pairs(sequence, reloc, num_loc_pairs, ref_pairs, loc_pairs)
# Extract, match, amd localize.
ffile = extract_features.main(fconf, seq_images, output_dir)
diff --git a/hloc/pipelines/4Seasons/prepare_reference.py b/hloc/pipelines/4Seasons/prepare_reference.py
index 9074df8..f47aee7 100644
--- a/hloc/pipelines/4Seasons/prepare_reference.py
+++ b/hloc/pipelines/4Seasons/prepare_reference.py
@@ -1,10 +1,8 @@
-from pathlib import Path
import argparse
+from pathlib import Path
-from ... import extract_features, match_features
-from ... import pairs_from_poses, triangulation
-from .utils import get_timestamps, delete_unused_images
-from .utils import build_empty_colmap_model
+from ... import extract_features, match_features, pairs_from_poses, triangulation
+from .utils import build_empty_colmap_model, delete_unused_images, get_timestamps
parser = argparse.ArgumentParser()
parser.add_argument(
diff --git a/hloc/pipelines/4Seasons/utils.py b/hloc/pipelines/4Seasons/utils.py
index 8def70f..e5aace9 100644
--- a/hloc/pipelines/4Seasons/utils.py
+++ b/hloc/pipelines/4Seasons/utils.py
@@ -1,11 +1,18 @@
-import os
-import numpy as np
+import glob
import logging
+import os
from pathlib import Path
-from ...utils.read_write_model import qvec2rotmat, rotmat2qvec
-from ...utils.read_write_model import Image, write_model, Camera
+import numpy as np
+
from ...utils.parsers import parse_retrieval
+from ...utils.read_write_model import (
+ Camera,
+ Image,
+ qvec2rotmat,
+ rotmat2qvec,
+ write_model,
+)
logger = logging.getLogger(__name__)
@@ -28,10 +35,10 @@ def get_timestamps(files, idx):
def delete_unused_images(root, timestamps):
"""Delete all images in root if they are not contained in timestamps."""
- images = list(root.glob("**/*.png"))
+ images = glob.glob((root / "**/*.png").as_posix(), recursive=True)
deleted = 0
for image in images:
- ts = image.stem
+ ts = Path(image).stem
if ts not in timestamps:
os.remove(image)
deleted += 1
@@ -48,11 +55,7 @@ def camera_from_calibration_file(id_, path):
model_name = "PINHOLE"
params = [float(i) for i in [fx, fy, cx, cy]]
camera = Camera(
- id=id_,
- model=model_name,
- width=int(width),
- height=int(height),
- params=params,
+ id=id_, model=model_name, width=int(width), height=int(height), params=params
)
return camera
@@ -153,9 +156,7 @@ def generate_localization_pairs(sequence, reloc, num, ref_pairs, out_path):
"""
if "test" in sequence:
# hard pairs will be overwritten by easy ones if available
- relocs = [
- str(reloc).replace("*", d) for d in ["hard", "moderate", "easy"]
- ]
+ relocs = [str(reloc).replace("*", d) for d in ["hard", "moderate", "easy"]]
else:
relocs = [reloc]
query_to_ref_ts = {}
@@ -213,12 +214,8 @@ def evaluate_submission(submission_dir, relocs, ths=[0.1, 0.2, 0.5]):
"""Compute the relocalization recall from predicted and ground truth poses."""
for reloc in relocs.parent.glob(relocs.name):
poses_gt = parse_relocalization(reloc, has_poses=True)
- poses_pred = parse_relocalization(
- submission_dir / reloc.name, has_poses=True
- )
- poses_pred = {
- (ref_ts, q_ts): (R, t) for ref_ts, q_ts, R, t in poses_pred
- }
+ poses_pred = parse_relocalization(submission_dir / reloc.name, has_poses=True)
+ poses_pred = {(ref_ts, q_ts): (R, t) for ref_ts, q_ts, R, t in poses_pred}
error = []
for ref_ts, q_ts, R_gt, t_gt in poses_gt:
diff --git a/hloc/pipelines/7Scenes/create_gt_sfm.py b/hloc/pipelines/7Scenes/create_gt_sfm.py
index 2aca434..95dfa46 100644
--- a/hloc/pipelines/7Scenes/create_gt_sfm.py
+++ b/hloc/pipelines/7Scenes/create_gt_sfm.py
@@ -1,17 +1,17 @@
from pathlib import Path
+
import numpy as np
-import torch
import PIL.Image
-from tqdm import tqdm
import pycolmap
+import torch
+from tqdm import tqdm
-from ...utils.read_write_model import write_model, read_model
+from ...utils.read_write_model import read_model, write_model
def scene_coordinates(p2D, R_w2c, t_w2c, depth, camera):
assert len(depth) == len(p2D)
- ret = pycolmap.image_to_world(p2D, camera._asdict())
- p2D_norm = np.asarray(ret["world_points"])
+ p2D_norm = np.stack(pycolmap.Camera(camera._asdict()).image_to_world(p2D))
p2D_h = np.concatenate([p2D_norm, np.ones_like(p2D_norm[:, :1])], 1)
p3D_c = p2D_h * depth[:, None]
p3D_w = (p3D_c - t_w2c) @ R_w2c
@@ -28,9 +28,7 @@ def interpolate_depth(depth, kp):
# To maximize the number of points that have depth:
# do bilinear interpolation first and then nearest for the remaining points
- interp_lin = grid_sample(depth, kp, align_corners=True, mode="bilinear")[
- 0, :, 0
- ]
+ interp_lin = grid_sample(depth, kp, align_corners=True, mode="bilinear")[0, :, 0]
interp_nn = torch.nn.functional.grid_sample(
depth, kp, align_corners=True, mode="nearest"
)[0, :, 0]
@@ -54,8 +52,7 @@ def project_to_image(p3D, R, t, camera, eps: float = 1e-4, pad: int = 1):
p3D = (p3D @ R.T) + t
visible = p3D[:, -1] >= eps # keep points in front of the camera
p2D_norm = p3D[:, :-1] / p3D[:, -1:].clip(min=eps)
- ret = pycolmap.world_to_image(p2D_norm, camera._asdict())
- p2D = np.asarray(ret["image_points"])
+ p2D = np.stack(pycolmap.Camera(camera._asdict()).world_to_image(p2D_norm))
size = np.array([camera.width - pad - 1, camera.height - pad - 1])
valid = np.all((p2D >= pad) & (p2D <= size), -1)
valid &= visible
@@ -129,15 +126,7 @@ def correct_sfm_with_gt_depth(sfm_path, depth_folder_path, output_path):
dataset = Path("datasets/7scenes")
outputs = Path("outputs/7Scenes")
- SCENES = [
- "chess",
- "fire",
- "heads",
- "office",
- "pumpkin",
- "redkitchen",
- "stairs",
- ]
+ SCENES = ["chess", "fire", "heads", "office", "pumpkin", "redkitchen", "stairs"]
for scene in SCENES:
sfm_path = outputs / scene / "sfm_superpoint+superglue"
depth_path = dataset / f"depth/7scenes_{scene}/train/depth"
diff --git a/hloc/pipelines/7Scenes/pipeline.py b/hloc/pipelines/7Scenes/pipeline.py
index 54d0e81..6fc28c6 100644
--- a/hloc/pipelines/7Scenes/pipeline.py
+++ b/hloc/pipelines/7Scenes/pipeline.py
@@ -1,11 +1,17 @@
-from pathlib import Path
import argparse
+from pathlib import Path
-from .utils import create_reference_sfm
-from .create_gt_sfm import correct_sfm_with_gt_depth
+from ... import (
+ extract_features,
+ localize_sfm,
+ logger,
+ match_features,
+ pairs_from_covisibility,
+ triangulation,
+)
from ..Cambridge.utils import create_query_list_with_intrinsics, evaluate
-from ... import extract_features, match_features, pairs_from_covisibility
-from ... import triangulation, localize_sfm, logger
+from .create_gt_sfm import correct_sfm_with_gt_depth
+from .utils import create_reference_sfm
SCENES = ["chess", "fire", "heads", "office", "pumpkin", "redkitchen", "stairs"]
@@ -45,9 +51,7 @@ def run_scene(
create_reference_sfm(gt_dir, ref_sfm_sift, test_list)
create_query_list_with_intrinsics(gt_dir, query_list, test_list)
- features = extract_features.main(
- feature_conf, images, outputs, as_half=True
- )
+ features = extract_features.main(feature_conf, images, outputs, as_half=True)
sfm_pairs = outputs / f"pairs-db-covis{num_covis}.txt"
pairs_from_covisibility.main(ref_sfm_sift, sfm_pairs, num_matched=num_covis)
@@ -114,9 +118,7 @@ def run_scene(
results = (
args.outputs
/ scene
- / "results_{}.txt".format(
- "dense" if args.use_dense_depth else "sparse"
- )
+ / "results_{}.txt".format("dense" if args.use_dense_depth else "sparse")
)
if args.overwrite or not results.exists():
run_scene(
diff --git a/hloc/pipelines/7Scenes/utils.py b/hloc/pipelines/7Scenes/utils.py
index 51070a0..1cb0212 100644
--- a/hloc/pipelines/7Scenes/utils.py
+++ b/hloc/pipelines/7Scenes/utils.py
@@ -1,4 +1,5 @@
import logging
+
import numpy as np
from hloc.utils.read_write_model import read_model, write_model
diff --git a/hloc/pipelines/Aachen/README.md b/hloc/pipelines/Aachen/README.md
index 1aefdb7..57b66d6 100644
--- a/hloc/pipelines/Aachen/README.md
+++ b/hloc/pipelines/Aachen/README.md
@@ -6,7 +6,7 @@ Download the dataset from [visuallocalization.net](https://www.visuallocalizatio
```bash
export dataset=datasets/aachen
wget -r -np -nH -R "index.html*,aachen_v1_1.zip" --cut-dirs=4 https://data.ciirc.cvut.cz/public/projects/2020VisualLocalization/Aachen-Day-Night/ -P $dataset
-unzip $dataset/images/database_and_query_images.zip -d $dataset/images
+unzip $dataset/images/database_and_query_images.zip -d $dataset
```
## Pipeline
diff --git a/hloc/pipelines/Aachen/pipeline.py b/hloc/pipelines/Aachen/pipeline.py
index 1dbf8ab..e31ce72 100644
--- a/hloc/pipelines/Aachen/pipeline.py
+++ b/hloc/pipelines/Aachen/pipeline.py
@@ -1,102 +1,109 @@
+import argparse
from pathlib import Path
from pprint import pformat
-import argparse
-from ... import extract_features, match_features
-from ... import pairs_from_covisibility, pairs_from_retrieval
-from ... import colmap_from_nvm, triangulation, localize_sfm
+from ... import (
+ colmap_from_nvm,
+ extract_features,
+ localize_sfm,
+ logger,
+ match_features,
+ pairs_from_covisibility,
+ pairs_from_retrieval,
+ triangulation,
+)
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "--dataset",
- type=Path,
- default="datasets/aachen",
- help="Path to the dataset, default: %(default)s",
-)
-parser.add_argument(
- "--outputs",
- type=Path,
- default="outputs/aachen",
- help="Path to the output directory, default: %(default)s",
-)
-parser.add_argument(
- "--num_covis",
- type=int,
- default=20,
- help="Number of image pairs for SfM, default: %(default)s",
-)
-parser.add_argument(
- "--num_loc",
- type=int,
- default=50,
- help="Number of image pairs for loc, default: %(default)s",
-)
-args = parser.parse_args()
+def run(args):
+ # Setup the paths
+ dataset = args.dataset
+ images = dataset / "images_upright/"
-# Setup the paths
-dataset = args.dataset
-images = dataset / "images/images_upright/"
+ outputs = args.outputs # where everything will be saved
+ sift_sfm = outputs / "sfm_sift" # from which we extract the reference poses
+ reference_sfm = outputs / "sfm_superpoint+superglue" # the SfM model we will build
+ sfm_pairs = (
+ outputs / f"pairs-db-covis{args.num_covis}.txt"
+ ) # top-k most covisible in SIFT model
+ loc_pairs = (
+ outputs / f"pairs-query-netvlad{args.num_loc}.txt"
+ ) # top-k retrieved by NetVLAD
+ results = outputs / f"Aachen_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
-outputs = args.outputs # where everything will be saved
-sift_sfm = outputs / "sfm_sift" # from which we extract the reference poses
-reference_sfm = (
- outputs / "sfm_superpoint+superglue"
-) # the SfM model we will build
-sfm_pairs = (
- outputs / f"pairs-db-covis{args.num_covis}.txt"
-) # top-k most covisible in SIFT model
-loc_pairs = (
- outputs / f"pairs-query-netvlad{args.num_loc}.txt"
-) # top-k retrieved by NetVLAD
-results = (
- outputs / f"Aachen_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
-)
+ # list the standard configurations available
+ logger.info("Configs for feature extractors:\n%s", pformat(extract_features.confs))
+ logger.info("Configs for feature matchers:\n%s", pformat(match_features.confs))
-# list the standard configurations available
-print(f"Configs for feature extractors:\n{pformat(extract_features.confs)}")
-print(f"Configs for feature matchers:\n{pformat(match_features.confs)}")
+ # pick one of the configurations for extraction and matching
+ retrieval_conf = extract_features.confs["netvlad"]
+ feature_conf = extract_features.confs["superpoint_aachen"]
+ matcher_conf = match_features.confs["superglue"]
-# pick one of the configurations for extraction and matching
-retrieval_conf = extract_features.confs["netvlad"]
-feature_conf = extract_features.confs["superpoint_aachen"]
-matcher_conf = match_features.confs["superglue"]
+ features = extract_features.main(feature_conf, images, outputs)
-features = extract_features.main(feature_conf, images, outputs)
+ colmap_from_nvm.main(
+ dataset / "3D-models/aachen_cvpr2018_db.nvm",
+ dataset / "3D-models/database_intrinsics.txt",
+ dataset / "aachen.db",
+ sift_sfm,
+ )
+ pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
+ sfm_matches = match_features.main(
+ matcher_conf, sfm_pairs, feature_conf["output"], outputs
+ )
-colmap_from_nvm.main(
- dataset / "3D-models/aachen_cvpr2018_db.nvm",
- dataset / "3D-models/database_intrinsics.txt",
- dataset / "aachen.db",
- sift_sfm,
-)
-pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
-sfm_matches = match_features.main(
- matcher_conf, sfm_pairs, feature_conf["output"], outputs
-)
+ triangulation.main(
+ reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
+ )
-triangulation.main(
- reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
-)
+ global_descriptors = extract_features.main(retrieval_conf, images, outputs)
+ pairs_from_retrieval.main(
+ global_descriptors,
+ loc_pairs,
+ args.num_loc,
+ query_prefix="query",
+ db_model=reference_sfm,
+ )
+ loc_matches = match_features.main(
+ matcher_conf, loc_pairs, feature_conf["output"], outputs
+ )
+
+ localize_sfm.main(
+ reference_sfm,
+ dataset / "queries/*_time_queries_with_intrinsics.txt",
+ loc_pairs,
+ features,
+ loc_matches,
+ results,
+ covisibility_clustering=False,
+ ) # not required with SuperPoint+SuperGlue
-global_descriptors = extract_features.main(retrieval_conf, images, outputs)
-pairs_from_retrieval.main(
- global_descriptors,
- loc_pairs,
- args.num_loc,
- query_prefix="query",
- db_model=reference_sfm,
-)
-loc_matches = match_features.main(
- matcher_conf, loc_pairs, feature_conf["output"], outputs
-)
-localize_sfm.main(
- reference_sfm,
- dataset / "queries/*_time_queries_with_intrinsics.txt",
- loc_pairs,
- features,
- loc_matches,
- results,
- covisibility_clustering=False,
-) # not required with SuperPoint+SuperGlue
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--dataset",
+ type=Path,
+ default="datasets/aachen",
+ help="Path to the dataset, default: %(default)s",
+ )
+ parser.add_argument(
+ "--outputs",
+ type=Path,
+ default="outputs/aachen",
+ help="Path to the output directory, default: %(default)s",
+ )
+ parser.add_argument(
+ "--num_covis",
+ type=int,
+ default=20,
+ help="Number of image pairs for SfM, default: %(default)s",
+ )
+ parser.add_argument(
+ "--num_loc",
+ type=int,
+ default=50,
+ help="Number of image pairs for loc, default: %(default)s",
+ )
+ args = parser.parse_args()
+ run(args)
diff --git a/hloc/pipelines/Aachen_v1_1/README.md b/hloc/pipelines/Aachen_v1_1/README.md
index 33a310b..c17e751 100644
--- a/hloc/pipelines/Aachen_v1_1/README.md
+++ b/hloc/pipelines/Aachen_v1_1/README.md
@@ -6,9 +6,8 @@ Download the dataset from [visuallocalization.net](https://www.visuallocalizatio
```bash
export dataset=datasets/aachen_v1.1
wget -r -np -nH -R "index.html*" --cut-dirs=4 https://data.ciirc.cvut.cz/public/projects/2020VisualLocalization/Aachen-Day-Night/ -P $dataset
-unzip $dataset/images/database_and_query_images.zip -d $dataset/images
+unzip $dataset/images/database_and_query_images.zip -d $dataset
unzip $dataset/aachen_v1_1.zip -d $dataset
-rsync -a $dataset/images_upright/ $dataset/images/images_upright/
```
## Pipeline
diff --git a/hloc/pipelines/Aachen_v1_1/pipeline.py b/hloc/pipelines/Aachen_v1_1/pipeline.py
index fb8cc3f..0753604 100644
--- a/hloc/pipelines/Aachen_v1_1/pipeline.py
+++ b/hloc/pipelines/Aachen_v1_1/pipeline.py
@@ -1,95 +1,104 @@
+import argparse
from pathlib import Path
from pprint import pformat
-import argparse
-from ... import extract_features, match_features, triangulation
-from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm
+from ... import (
+ extract_features,
+ localize_sfm,
+ logger,
+ match_features,
+ pairs_from_covisibility,
+ pairs_from_retrieval,
+ triangulation,
+)
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "--dataset",
- type=Path,
- default="datasets/aachen_v1.1",
- help="Path to the dataset, default: %(default)s",
-)
-parser.add_argument(
- "--outputs",
- type=Path,
- default="outputs/aachen_v1.1",
- help="Path to the output directory, default: %(default)s",
-)
-parser.add_argument(
- "--num_covis",
- type=int,
- default=20,
- help="Number of image pairs for SfM, default: %(default)s",
-)
-parser.add_argument(
- "--num_loc",
- type=int,
- default=50,
- help="Number of image pairs for loc, default: %(default)s",
-)
-args = parser.parse_args()
+def run(args):
+ # Setup the paths
+ dataset = args.dataset
+ images = dataset / "images_upright/"
+ sift_sfm = dataset / "3D-models/aachen_v_1_1"
-# Setup the paths
-dataset = args.dataset
-images = dataset / "images/images_upright/"
-sift_sfm = dataset / "3D-models/aachen_v_1_1"
+ outputs = args.outputs # where everything will be saved
+ reference_sfm = outputs / "sfm_superpoint+superglue" # the SfM model we will build
+ sfm_pairs = (
+ outputs / f"pairs-db-covis{args.num_covis}.txt"
+ ) # top-k most covisible in SIFT model
+ loc_pairs = (
+ outputs / f"pairs-query-netvlad{args.num_loc}.txt"
+ ) # top-k retrieved by NetVLAD
+ results = (
+ outputs / f"Aachen-v1.1_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
+ )
-outputs = args.outputs # where everything will be saved
-reference_sfm = (
- outputs / "sfm_superpoint+superglue"
-) # the SfM model we will build
-sfm_pairs = (
- outputs / f"pairs-db-covis{args.num_covis}.txt"
-) # top-k most covisible in SIFT model
-loc_pairs = (
- outputs / f"pairs-query-netvlad{args.num_loc}.txt"
-) # top-k retrieved by NetVLAD
-results = (
- outputs / f"Aachen-v1.1_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
-)
+ # list the standard configurations available
+ logger.info("Configs for feature extractors:\n%s", pformat(extract_features.confs))
+ logger.info("Configs for feature matchers:\n%s", pformat(match_features.confs))
-# list the standard configurations available
-print(f"Configs for feature extractors:\n{pformat(extract_features.confs)}")
-print(f"Configs for feature matchers:\n{pformat(match_features.confs)}")
+ # pick one of the configurations for extraction and matching
+ retrieval_conf = extract_features.confs["netvlad"]
+ feature_conf = extract_features.confs["superpoint_max"]
+ matcher_conf = match_features.confs["superglue"]
-# pick one of the configurations for extraction and matching
-retrieval_conf = extract_features.confs["netvlad"]
-feature_conf = extract_features.confs["superpoint_max"]
-matcher_conf = match_features.confs["superglue"]
+ features = extract_features.main(feature_conf, images, outputs)
-features = extract_features.main(feature_conf, images, outputs)
+ pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
+ sfm_matches = match_features.main(
+ matcher_conf, sfm_pairs, feature_conf["output"], outputs
+ )
-pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
-sfm_matches = match_features.main(
- matcher_conf, sfm_pairs, feature_conf["output"], outputs
-)
+ triangulation.main(
+ reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
+ )
-triangulation.main(
- reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
-)
+ global_descriptors = extract_features.main(retrieval_conf, images, outputs)
+ pairs_from_retrieval.main(
+ global_descriptors,
+ loc_pairs,
+ args.num_loc,
+ query_prefix="query",
+ db_model=reference_sfm,
+ )
+ loc_matches = match_features.main(
+ matcher_conf, loc_pairs, feature_conf["output"], outputs
+ )
+
+ localize_sfm.main(
+ reference_sfm,
+ dataset / "queries/*_time_queries_with_intrinsics.txt",
+ loc_pairs,
+ features,
+ loc_matches,
+ results,
+ covisibility_clustering=False,
+ ) # not required with SuperPoint+SuperGlue
-global_descriptors = extract_features.main(retrieval_conf, images, outputs)
-pairs_from_retrieval.main(
- global_descriptors,
- loc_pairs,
- args.num_loc,
- query_prefix="query",
- db_model=reference_sfm,
-)
-loc_matches = match_features.main(
- matcher_conf, loc_pairs, feature_conf["output"], outputs
-)
-localize_sfm.main(
- reference_sfm,
- dataset / "queries/*_time_queries_with_intrinsics.txt",
- loc_pairs,
- features,
- loc_matches,
- results,
- covisibility_clustering=False,
-) # not required with SuperPoint+SuperGlue
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--dataset",
+ type=Path,
+ default="datasets/aachen_v1.1",
+ help="Path to the dataset, default: %(default)s",
+ )
+ parser.add_argument(
+ "--outputs",
+ type=Path,
+ default="outputs/aachen_v1.1",
+ help="Path to the output directory, default: %(default)s",
+ )
+ parser.add_argument(
+ "--num_covis",
+ type=int,
+ default=20,
+ help="Number of image pairs for SfM, default: %(default)s",
+ )
+ parser.add_argument(
+ "--num_loc",
+ type=int,
+ default=50,
+ help="Number of image pairs for loc, default: %(default)s",
+ )
+ args = parser.parse_args()
+ run(args)
diff --git a/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py b/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py
index ccfd62d..9c0a769 100644
--- a/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py
+++ b/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py
@@ -1,94 +1,104 @@
+import argparse
from pathlib import Path
from pprint import pformat
-import argparse
-from ... import extract_features, match_dense, triangulation
-from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm
+from ... import (
+ extract_features,
+ localize_sfm,
+ logger,
+ match_dense,
+ pairs_from_covisibility,
+ pairs_from_retrieval,
+ triangulation,
+)
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "--dataset",
- type=Path,
- default="datasets/aachen_v1.1",
- help="Path to the dataset, default: %(default)s",
-)
-parser.add_argument(
- "--outputs",
- type=Path,
- default="outputs/aachen_v1.1",
- help="Path to the output directory, default: %(default)s",
-)
-parser.add_argument(
- "--num_covis",
- type=int,
- default=20,
- help="Number of image pairs for SfM, default: %(default)s",
-)
-parser.add_argument(
- "--num_loc",
- type=int,
- default=50,
- help="Number of image pairs for loc, default: %(default)s",
-)
-args = parser.parse_args()
+def run(args):
+ # Setup the paths
+ dataset = args.dataset
+ images = dataset / "images_upright/"
+ sift_sfm = dataset / "3D-models/aachen_v_1_1"
-# Setup the paths
-dataset = args.dataset
-images = dataset / "images/images_upright/"
-sift_sfm = dataset / "3D-models/aachen_v_1_1"
+ outputs = args.outputs # where everything will be saved
+ outputs.mkdir()
+ reference_sfm = outputs / "sfm_loftr" # the SfM model we will build
+ sfm_pairs = (
+ outputs / f"pairs-db-covis{args.num_covis}.txt"
+ ) # top-k most covisible in SIFT model
+ loc_pairs = (
+ outputs / f"pairs-query-netvlad{args.num_loc}.txt"
+ ) # top-k retrieved by NetVLAD
+ results = outputs / f"Aachen-v1.1_hloc_loftr_netvlad{args.num_loc}.txt"
-outputs = args.outputs # where everything will be saved
-outputs.mkdir()
-reference_sfm = outputs / "sfm_loftr" # the SfM model we will build
-sfm_pairs = (
- outputs / f"pairs-db-covis{args.num_covis}.txt"
-) # top-k most covisible in SIFT model
-loc_pairs = (
- outputs / f"pairs-query-netvlad{args.num_loc}.txt"
-) # top-k retrieved by NetVLAD
-results = outputs / f"Aachen-v1.1_hloc_loftr_netvlad{args.num_loc}.txt"
+ # list the standard configurations available
+ logger.info("Configs for dense feature matchers:\n%s", pformat(match_dense.confs))
-# list the standard configurations available
-print(f"Configs for dense feature matchers:\n{pformat(match_dense.confs)}")
+ # pick one of the configurations for extraction and matching
+ retrieval_conf = extract_features.confs["netvlad"]
+ matcher_conf = match_dense.confs["loftr_aachen"]
-# pick one of the configurations for extraction and matching
-retrieval_conf = extract_features.confs["netvlad"]
-matcher_conf = match_dense.confs["loftr_aachen"]
+ pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
+ features, sfm_matches = match_dense.main(
+ matcher_conf, sfm_pairs, images, outputs, max_kps=8192, overwrite=False
+ )
-pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
-features, sfm_matches = match_dense.main(
- matcher_conf, sfm_pairs, images, outputs, max_kps=8192, overwrite=False
-)
+ triangulation.main(
+ reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
+ )
-triangulation.main(
- reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
-)
+ global_descriptors = extract_features.main(retrieval_conf, images, outputs)
+ pairs_from_retrieval.main(
+ global_descriptors,
+ loc_pairs,
+ args.num_loc,
+ query_prefix="query",
+ db_model=reference_sfm,
+ )
+ features, loc_matches = match_dense.main(
+ matcher_conf,
+ loc_pairs,
+ images,
+ outputs,
+ features=features,
+ max_kps=None,
+ matches=sfm_matches,
+ )
+
+ localize_sfm.main(
+ reference_sfm,
+ dataset / "queries/*_time_queries_with_intrinsics.txt",
+ loc_pairs,
+ features,
+ loc_matches,
+ results,
+ covisibility_clustering=False,
+ ) # not required with loftr
-global_descriptors = extract_features.main(retrieval_conf, images, outputs)
-pairs_from_retrieval.main(
- global_descriptors,
- loc_pairs,
- args.num_loc,
- query_prefix="query",
- db_model=reference_sfm,
-)
-features, loc_matches = match_dense.main(
- matcher_conf,
- loc_pairs,
- images,
- outputs,
- features=features,
- max_kps=None,
- matches=sfm_matches,
-)
-localize_sfm.main(
- reference_sfm,
- dataset / "queries/*_time_queries_with_intrinsics.txt",
- loc_pairs,
- features,
- loc_matches,
- results,
- covisibility_clustering=False,
-) # not required with loftr
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--dataset",
+ type=Path,
+ default="datasets/aachen_v1.1",
+ help="Path to the dataset, default: %(default)s",
+ )
+ parser.add_argument(
+ "--outputs",
+ type=Path,
+ default="outputs/aachen_v1.1",
+ help="Path to the output directory, default: %(default)s",
+ )
+ parser.add_argument(
+ "--num_covis",
+ type=int,
+ default=20,
+ help="Number of image pairs for SfM, default: %(default)s",
+ )
+ parser.add_argument(
+ "--num_loc",
+ type=int,
+ default=50,
+ help="Number of image pairs for loc, default: %(default)s",
+ )
+ args = parser.parse_args()
diff --git a/hloc/pipelines/CMU/pipeline.py b/hloc/pipelines/CMU/pipeline.py
index 788dc7b..4706a05 100644
--- a/hloc/pipelines/CMU/pipeline.py
+++ b/hloc/pipelines/CMU/pipeline.py
@@ -1,8 +1,15 @@
-from pathlib import Path
import argparse
+from pathlib import Path
-from ... import extract_features, match_features, triangulation, logger
-from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm
+from ... import (
+ extract_features,
+ localize_sfm,
+ logger,
+ match_features,
+ pairs_from_covisibility,
+ pairs_from_retrieval,
+ triangulation,
+)
TEST_SLICES = [2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18, 19, 20, 21]
@@ -46,34 +53,20 @@ def run_slice(slice_, root, outputs, num_covis, num_loc):
matcher_conf = match_features.confs["superglue"]
pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=num_covis)
- features = extract_features.main(
- feature_conf, ref_images, outputs, as_half=True
- )
+ features = extract_features.main(feature_conf, ref_images, outputs, as_half=True)
sfm_matches = match_features.main(
matcher_conf, sfm_pairs, feature_conf["output"], outputs
)
- triangulation.main(
- ref_sfm, sift_sfm, ref_images, sfm_pairs, features, sfm_matches
- )
+ triangulation.main(ref_sfm, sift_sfm, ref_images, sfm_pairs, features, sfm_matches)
generate_query_list(root, query_list, slice_)
- global_descriptors = extract_features.main(
- retrieval_conf, ref_images, outputs
- )
- global_descriptors = extract_features.main(
- retrieval_conf, query_images, outputs
- )
+ global_descriptors = extract_features.main(retrieval_conf, ref_images, outputs)
+ global_descriptors = extract_features.main(retrieval_conf, query_images, outputs)
pairs_from_retrieval.main(
- global_descriptors,
- loc_pairs,
- num_loc,
- query_list=query_list,
- db_model=ref_sfm,
+ global_descriptors, loc_pairs, num_loc, query_list=query_list, db_model=ref_sfm
)
- features = extract_features.main(
- feature_conf, query_images, outputs, as_half=True
- )
+ features = extract_features.main(feature_conf, query_images, outputs, as_half=True)
loc_matches = match_features.main(
matcher_conf, loc_pairs, feature_conf["output"], outputs
)
@@ -136,9 +129,5 @@ def run_slice(slice_, root, outputs, num_covis, num_loc):
for slice_ in slices:
logger.info("Working on slice %s.", slice_)
run_slice(
- f"slice{slice_}",
- args.dataset,
- args.outputs,
- args.num_covis,
- args.num_loc,
+ f"slice{slice_}", args.dataset, args.outputs, args.num_covis, args.num_loc
)
diff --git a/hloc/pipelines/Cambridge/pipeline.py b/hloc/pipelines/Cambridge/pipeline.py
index 7b9e5c5..3a676e5 100644
--- a/hloc/pipelines/Cambridge/pipeline.py
+++ b/hloc/pipelines/Cambridge/pipeline.py
@@ -1,17 +1,18 @@
-from pathlib import Path
import argparse
+from pathlib import Path
-from .utils import create_query_list_with_intrinsics, scale_sfm_images, evaluate
-from ... import extract_features, match_features, pairs_from_covisibility
-from ... import triangulation, localize_sfm, pairs_from_retrieval, logger
+from ... import (
+ extract_features,
+ localize_sfm,
+ logger,
+ match_features,
+ pairs_from_covisibility,
+ pairs_from_retrieval,
+ triangulation,
+)
+from .utils import create_query_list_with_intrinsics, evaluate, scale_sfm_images
-SCENES = [
- "KingsCollege",
- "OldHospital",
- "ShopFacade",
- "StMarysChurch",
- "GreatCourt",
-]
+SCENES = ["KingsCollege", "OldHospital", "ShopFacade", "StMarysChurch", "GreatCourt"]
def run_scene(images, gt_dir, outputs, results, num_covis, num_loc):
@@ -41,11 +42,7 @@ def run_scene(images, gt_dir, outputs, results, num_covis, num_loc):
retrieval_conf = extract_features.confs["netvlad"]
create_query_list_with_intrinsics(
- gt_dir / "empty_all",
- query_list,
- test_list,
- ext=".txt",
- image_dir=images,
+ gt_dir / "empty_all", query_list, test_list, ext=".txt", image_dir=images
)
with open(test_list, "r") as f:
query_seqs = {q.split("/")[0] for q in f.read().rstrip().split("\n")}
@@ -59,9 +56,7 @@ def run_scene(images, gt_dir, outputs, results, num_covis, num_loc):
query_prefix=query_seqs,
)
- features = extract_features.main(
- feature_conf, images, outputs, as_half=True
- )
+ features = extract_features.main(feature_conf, images, outputs, as_half=True)
pairs_from_covisibility.main(ref_sfm_sift, sfm_pairs, num_matched=num_covis)
sfm_matches = match_features.main(
matcher_conf, sfm_pairs, feature_conf["output"], outputs
diff --git a/hloc/pipelines/Cambridge/utils.py b/hloc/pipelines/Cambridge/utils.py
index 90215b4..36460f0 100644
--- a/hloc/pipelines/Cambridge/utils.py
+++ b/hloc/pipelines/Cambridge/utils.py
@@ -1,15 +1,16 @@
-import cv2
import logging
+
+import cv2
import numpy as np
from hloc.utils.read_write_model import (
+ qvec2rotmat,
read_cameras_binary,
+ read_cameras_text,
read_images_binary,
+ read_images_text,
read_model,
write_model,
- qvec2rotmat,
- read_images_text,
- read_cameras_text,
)
logger = logging.getLogger(__name__)
@@ -42,9 +43,7 @@ def scale_sfm_images(full_model, scaled_model, image_dir):
sy = h / camera.height
assert sx == sy, (sx, sy)
scaled_cameras[cam_id] = camera._replace(
- width=w,
- height=h,
- params=camera.params * np.array([sx, sx, sy, 1.0]),
+ width=w, height=h, params=camera.params * np.array([sx, sx, sy, 1.0])
)
write_model(scaled_cameras, images, points3D, scaled_model)
diff --git a/hloc/pipelines/RobotCar/colmap_from_nvm.py b/hloc/pipelines/RobotCar/colmap_from_nvm.py
index 40d7e8b..e90ed72 100644
--- a/hloc/pipelines/RobotCar/colmap_from_nvm.py
+++ b/hloc/pipelines/RobotCar/colmap_from_nvm.py
@@ -1,29 +1,31 @@
import argparse
+import logging
import sqlite3
-from tqdm import tqdm
from collections import defaultdict
-import numpy as np
from pathlib import Path
-import logging
+
+import numpy as np
+from tqdm import tqdm
from ...colmap_from_nvm import (
- recover_database_images_and_ids,
camera_center_to_translation,
+ recover_database_images_and_ids,
+)
+from ...utils.read_write_model import (
+ CAMERA_MODEL_IDS,
+ Camera,
+ Image,
+ Point3D,
+ write_model,
)
-from ...utils.read_write_model import Camera, Image, Point3D, CAMERA_MODEL_IDS
-from ...utils.read_write_model import write_model
logger = logging.getLogger(__name__)
-def read_nvm_model(
- nvm_path, database_path, image_ids, camera_ids, skip_points=False
-):
+def read_nvm_model(nvm_path, database_path, image_ids, camera_ids, skip_points=False):
# Extract the intrinsics from the db file instead of the NVM model
db = sqlite3.connect(str(database_path))
- ret = db.execute(
- "SELECT camera_id, model, width, height, params FROM cameras;"
- )
+ ret = db.execute("SELECT camera_id, model, width, height, params FROM cameras;")
cameras = {}
for camera_id, camera_model, width, height, params in ret:
params = np.fromstring(params, dtype=np.double).reshape(-1)
diff --git a/hloc/pipelines/RobotCar/pipeline.py b/hloc/pipelines/RobotCar/pipeline.py
index 20877cf..7a7ee31 100644
--- a/hloc/pipelines/RobotCar/pipeline.py
+++ b/hloc/pipelines/RobotCar/pipeline.py
@@ -1,10 +1,16 @@
-from pathlib import Path
import argparse
+import glob
+from pathlib import Path
+from ... import (
+ extract_features,
+ localize_sfm,
+ match_features,
+ pairs_from_covisibility,
+ pairs_from_retrieval,
+ triangulation,
+)
from . import colmap_from_nvm
-from ... import extract_features, match_features, triangulation
-from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm
-
CONDITIONS = [
"dawn",
@@ -33,102 +39,105 @@ def generate_query_list(dataset, image_dir, path):
params = ["SIMPLE_RADIAL", w, h, fx, cx, cy, 0.0]
cameras[side] = [str(p) for p in params]
- queries = sorted(image_dir.glob("**/*.jpg"))
- queries = [str(q.relative_to(image_dir.parents[0])) for q in queries]
+ queries = glob.glob((image_dir / "**/*.jpg").as_posix(), recursive=True)
+ queries = [
+ Path(q).relative_to(image_dir.parents[0]).as_posix() for q in sorted(queries)
+ ]
out = [[q] + cameras[Path(q).parent.name] for q in queries]
with open(path, "w") as f:
f.write("\n".join(map(" ".join, out)))
-parser = argparse.ArgumentParser()
-parser.add_argument(
- "--dataset",
- type=Path,
- default="datasets/robotcar",
- help="Path to the dataset, default: %(default)s",
-)
-parser.add_argument(
- "--outputs",
- type=Path,
- default="outputs/robotcar",
- help="Path to the output directory, default: %(default)s",
-)
-parser.add_argument(
- "--num_covis",
- type=int,
- default=20,
- help="Number of image pairs for SfM, default: %(default)s",
-)
-parser.add_argument(
- "--num_loc",
- type=int,
- default=20,
- help="Number of image pairs for loc, default: %(default)s",
-)
-args = parser.parse_args()
-
-# Setup the paths
-dataset = args.dataset
-images = dataset / "images/"
-
-outputs = args.outputs # where everything will be saved
-outputs.mkdir(exist_ok=True, parents=True)
-query_list = outputs / "{condition}_queries_with_intrinsics.txt"
-sift_sfm = outputs / "sfm_sift"
-reference_sfm = outputs / "sfm_superpoint+superglue"
-sfm_pairs = outputs / f"pairs-db-covis{args.num_covis}.txt"
-loc_pairs = outputs / f"pairs-query-netvlad{args.num_loc}.txt"
-results = (
- outputs / f"RobotCar_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
-)
-
-# pick one of the configurations for extraction and matching
-retrieval_conf = extract_features.confs["netvlad"]
-feature_conf = extract_features.confs["superpoint_aachen"]
-matcher_conf = match_features.confs["superglue"]
-
-for condition in CONDITIONS:
- generate_query_list(
- dataset, images / condition, str(query_list).format(condition=condition)
+def run(args):
+ # Setup the paths
+ dataset = args.dataset
+ images = dataset / "images/"
+
+ outputs = args.outputs # where everything will be saved
+ outputs.mkdir(exist_ok=True, parents=True)
+ query_list = outputs / "{condition}_queries_with_intrinsics.txt"
+ sift_sfm = outputs / "sfm_sift"
+ reference_sfm = outputs / "sfm_superpoint+superglue"
+ sfm_pairs = outputs / f"pairs-db-covis{args.num_covis}.txt"
+ loc_pairs = outputs / f"pairs-query-netvlad{args.num_loc}.txt"
+ results = outputs / f"RobotCar_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
+
+ # pick one of the configurations for extraction and matching
+ retrieval_conf = extract_features.confs["netvlad"]
+ feature_conf = extract_features.confs["superpoint_aachen"]
+ matcher_conf = match_features.confs["superglue"]
+
+ for condition in CONDITIONS:
+ generate_query_list(
+ dataset, images / condition, str(query_list).format(condition=condition)
+ )
+
+ features = extract_features.main(feature_conf, images, outputs, as_half=True)
+
+ colmap_from_nvm.main(
+ dataset / "3D-models/all-merged/all.nvm",
+ dataset / "3D-models/overcast-reference.db",
+ sift_sfm,
+ )
+ pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
+ sfm_matches = match_features.main(
+ matcher_conf, sfm_pairs, feature_conf["output"], outputs
)
-features = extract_features.main(feature_conf, images, outputs, as_half=True)
+ triangulation.main(
+ reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
+ )
-colmap_from_nvm.main(
- dataset / "3D-models/all-merged/all.nvm",
- dataset / "3D-models/overcast-reference.db",
- sift_sfm,
-)
-pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
-sfm_matches = match_features.main(
- matcher_conf, sfm_pairs, feature_conf["output"], outputs
-)
+ global_descriptors = extract_features.main(retrieval_conf, images, outputs)
+ # TODO: do per location and per camera
+ pairs_from_retrieval.main(
+ global_descriptors,
+ loc_pairs,
+ args.num_loc,
+ query_prefix=CONDITIONS,
+ db_model=reference_sfm,
+ )
+ loc_matches = match_features.main(
+ matcher_conf, loc_pairs, feature_conf["output"], outputs
+ )
-triangulation.main(
- reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
-)
+ localize_sfm.main(
+ reference_sfm,
+ Path(str(query_list).format(condition="*")),
+ loc_pairs,
+ features,
+ loc_matches,
+ results,
+ covisibility_clustering=False,
+ prepend_camera_name=True,
+ )
-global_descriptors = extract_features.main(retrieval_conf, images, outputs)
-# TODO: do per location and per camera
-pairs_from_retrieval.main(
- global_descriptors,
- loc_pairs,
- args.num_loc,
- query_prefix=CONDITIONS,
- db_model=reference_sfm,
-)
-loc_matches = match_features.main(
- matcher_conf, loc_pairs, feature_conf["output"], outputs
-)
-localize_sfm.main(
- reference_sfm,
- Path(str(query_list).format(condition="*")),
- loc_pairs,
- features,
- loc_matches,
- results,
- covisibility_clustering=False,
- prepend_camera_name=True,
-)
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--dataset",
+ type=Path,
+ default="datasets/robotcar",
+ help="Path to the dataset, default: %(default)s",
+ )
+ parser.add_argument(
+ "--outputs",
+ type=Path,
+ default="outputs/robotcar",
+ help="Path to the output directory, default: %(default)s",
+ )
+ parser.add_argument(
+ "--num_covis",
+ type=int,
+ default=20,
+ help="Number of image pairs for SfM, default: %(default)s",
+ )
+ parser.add_argument(
+ "--num_loc",
+ type=int,
+ default=20,
+ help="Number of image pairs for loc, default: %(default)s",
+ )
+ args = parser.parse_args()
diff --git a/hloc/reconstruction.py b/hloc/reconstruction.py
new file mode 100644
index 0000000..ff4a90a
--- /dev/null
+++ b/hloc/reconstruction.py
@@ -0,0 +1,199 @@
+import argparse
+import multiprocessing
+import shutil
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import pycolmap
+
+from . import logger
+from .triangulation import (
+ OutputCapture,
+ estimation_and_geometric_verification,
+ import_features,
+ import_matches,
+ parse_option_args,
+)
+from .utils.database import COLMAPDatabase
+
+
+def create_empty_db(database_path: Path):
+ if database_path.exists():
+ logger.warning("The database already exists, deleting it.")
+ database_path.unlink()
+ logger.info("Creating an empty database...")
+ db = COLMAPDatabase.connect(database_path)
+ db.create_tables()
+ db.commit()
+ db.close()
+
+
+def import_images(
+ image_dir: Path,
+ database_path: Path,
+ camera_mode: pycolmap.CameraMode,
+ image_list: Optional[List[str]] = None,
+ options: Optional[Dict[str, Any]] = None,
+):
+ logger.info("Importing images into the database...")
+ if options is None:
+ options = {}
+ images = list(image_dir.iterdir())
+ if len(images) == 0:
+ raise IOError(f"No images found in {image_dir}.")
+ with pycolmap.ostream():
+ pycolmap.import_images(
+ database_path,
+ image_dir,
+ camera_mode,
+ image_list=image_list or [],
+ options=options,
+ )
+
+
+def get_image_ids(database_path: Path) -> Dict[str, int]:
+ db = COLMAPDatabase.connect(database_path)
+ images = {}
+ for name, image_id in db.execute("SELECT name, image_id FROM images;"):
+ images[name] = image_id
+ db.close()
+ return images
+
+
+def run_reconstruction(
+ sfm_dir: Path,
+ database_path: Path,
+ image_dir: Path,
+ verbose: bool = False,
+ options: Optional[Dict[str, Any]] = None,
+) -> pycolmap.Reconstruction:
+ models_path = sfm_dir / "models"
+ models_path.mkdir(exist_ok=True, parents=True)
+ logger.info("Running 3D reconstruction...")
+ if options is None:
+ options = {}
+ options = {"num_threads": min(multiprocessing.cpu_count(), 16), **options}
+ with OutputCapture(verbose):
+ with pycolmap.ostream():
+ reconstructions = pycolmap.incremental_mapping(
+ database_path, image_dir, models_path, options=options
+ )
+
+ if len(reconstructions) == 0:
+ logger.error("Could not reconstruct any model!")
+ return None
+ logger.info(f"Reconstructed {len(reconstructions)} model(s).")
+
+ largest_index = None
+ largest_num_images = 0
+ for index, rec in reconstructions.items():
+ num_images = rec.num_reg_images()
+ if num_images > largest_num_images:
+ largest_index = index
+ largest_num_images = num_images
+ assert largest_index is not None
+ logger.info(
+ f"Largest model is #{largest_index} "
+ f"with {largest_num_images} images."
+ )
+
+ for filename in ["images.bin", "cameras.bin", "points3D.bin"]:
+ if (sfm_dir / filename).exists():
+ (sfm_dir / filename).unlink()
+ shutil.move(
+ str(models_path / str(largest_index) / filename), str(sfm_dir)
+ )
+ return reconstructions[largest_index]
+
+
+def main(
+ sfm_dir: Path,
+ image_dir: Path,
+ pairs: Path,
+ features: Path,
+ matches: Path,
+ camera_mode: pycolmap.CameraMode = pycolmap.CameraMode.AUTO,
+ verbose: bool = False,
+ skip_geometric_verification: bool = False,
+ min_match_score: Optional[float] = None,
+ image_list: Optional[List[str]] = None,
+ image_options: Optional[Dict[str, Any]] = None,
+ mapper_options: Optional[Dict[str, Any]] = None,
+) -> pycolmap.Reconstruction:
+ assert features.exists(), features
+ assert pairs.exists(), pairs
+ assert matches.exists(), matches
+
+ sfm_dir.mkdir(parents=True, exist_ok=True)
+ database = sfm_dir / "database.db"
+
+ create_empty_db(database)
+ import_images(image_dir, database, camera_mode, image_list, image_options)
+ image_ids = get_image_ids(database)
+ import_features(image_ids, database, features)
+ import_matches(
+ image_ids,
+ database,
+ pairs,
+ matches,
+ min_match_score,
+ skip_geometric_verification,
+ )
+ if not skip_geometric_verification:
+ estimation_and_geometric_verification(database, pairs, verbose)
+ reconstruction = run_reconstruction(
+ sfm_dir, database, image_dir, verbose, mapper_options
+ )
+ if reconstruction is not None:
+ logger.info(
+ f"Reconstruction statistics:\n{reconstruction.summary()}"
+ + f"\n\tnum_input_images = {len(image_ids)}"
+ )
+ return reconstruction
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--sfm_dir", type=Path, required=True)
+ parser.add_argument("--image_dir", type=Path, required=True)
+
+ parser.add_argument("--pairs", type=Path, required=True)
+ parser.add_argument("--features", type=Path, required=True)
+ parser.add_argument("--matches", type=Path, required=True)
+
+ parser.add_argument(
+ "--camera_mode",
+ type=str,
+ default="AUTO",
+ choices=list(pycolmap.CameraMode.__members__.keys()),
+ )
+ parser.add_argument("--skip_geometric_verification", action="store_true")
+ parser.add_argument("--min_match_score", type=float)
+ parser.add_argument("--verbose", action="store_true")
+
+ parser.add_argument(
+ "--image_options",
+ nargs="+",
+ default=[],
+ help="List of key=value from {}".format(
+ pycolmap.ImageReaderOptions().todict()
+ ),
+ )
+ parser.add_argument(
+ "--mapper_options",
+ nargs="+",
+ default=[],
+ help="List of key=value from {}".format(
+ pycolmap.IncrementalMapperOptions().todict()
+ ),
+ )
+ args = parser.parse_args().__dict__
+
+ image_options = parse_option_args(
+ args.pop("image_options"), pycolmap.ImageReaderOptions()
+ )
+ mapper_options = parse_option_args(
+ args.pop("mapper_options"), pycolmap.IncrementalMapperOptions()
+ )
+
+ main(**args, image_options=image_options, mapper_options=mapper_options)
diff --git a/hloc/triangulation.py b/hloc/triangulation.py
new file mode 100644
index 0000000..385fed9
--- /dev/null
+++ b/hloc/triangulation.py
@@ -0,0 +1,320 @@
+import argparse
+import contextlib
+import io
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+import pycolmap
+from tqdm import tqdm
+
+from . import logger
+from .utils.database import COLMAPDatabase
+from .utils.geometry import compute_epipolar_errors
+from .utils.io import get_keypoints, get_matches
+from .utils.parsers import parse_retrieval
+
+
+class OutputCapture:
+ def __init__(self, verbose: bool):
+ self.verbose = verbose
+
+ def __enter__(self):
+ if not self.verbose:
+ self.capture = contextlib.redirect_stdout(io.StringIO())
+ self.out = self.capture.__enter__()
+
+ def __exit__(self, exc_type, *args):
+ if not self.verbose:
+ self.capture.__exit__(exc_type, *args)
+ if exc_type is not None:
+ logger.error("Failed with output:\n%s", self.out.getvalue())
+ sys.stdout.flush()
+
+
+def create_db_from_model(
+ reconstruction: pycolmap.Reconstruction, database_path: Path
+) -> Dict[str, int]:
+ if database_path.exists():
+ logger.warning("The database already exists, deleting it.")
+ database_path.unlink()
+
+ db = COLMAPDatabase.connect(database_path)
+ db.create_tables()
+
+ for i, camera in reconstruction.cameras.items():
+ db.add_camera(
+ camera.model.value,
+ camera.width,
+ camera.height,
+ camera.params,
+ camera_id=i,
+ prior_focal_length=True,
+ )
+
+ for i, image in reconstruction.images.items():
+ db.add_image(image.name, image.camera_id, image_id=i)
+
+ db.commit()
+ db.close()
+ return {image.name: i for i, image in reconstruction.images.items()}
+
+
+def import_features(
+ image_ids: Dict[str, int], database_path: Path, features_path: Path
+):
+ logger.info("Importing features into the database...")
+ db = COLMAPDatabase.connect(database_path)
+
+ for image_name, image_id in tqdm(image_ids.items()):
+ keypoints = get_keypoints(features_path, image_name)
+ keypoints += 0.5 # COLMAP origin
+ db.add_keypoints(image_id, keypoints)
+
+ db.commit()
+ db.close()
+
+
+def import_matches(
+ image_ids: Dict[str, int],
+ database_path: Path,
+ pairs_path: Path,
+ matches_path: Path,
+ min_match_score: Optional[float] = None,
+ skip_geometric_verification: bool = False,
+):
+ logger.info("Importing matches into the database...")
+
+ with open(str(pairs_path), "r") as f:
+ pairs = [p.split() for p in f.readlines()]
+
+ db = COLMAPDatabase.connect(database_path)
+
+ matched = set()
+ for name0, name1 in tqdm(pairs):
+ id0, id1 = image_ids[name0], image_ids[name1]
+ if len({(id0, id1), (id1, id0)} & matched) > 0:
+ continue
+ matches, scores = get_matches(matches_path, name0, name1)
+ if min_match_score:
+ matches = matches[scores > min_match_score]
+ db.add_matches(id0, id1, matches)
+ matched |= {(id0, id1), (id1, id0)}
+
+ if skip_geometric_verification:
+ db.add_two_view_geometry(id0, id1, matches)
+
+ db.commit()
+ db.close()
+
+
+def estimation_and_geometric_verification(
+ database_path: Path, pairs_path: Path, verbose: bool = False
+):
+ logger.info("Performing geometric verification of the matches...")
+ with OutputCapture(verbose):
+ with pycolmap.ostream():
+ pycolmap.verify_matches(
+ database_path,
+ pairs_path,
+ options=dict(
+ ransac=dict(max_num_trials=20000, min_inlier_ratio=0.1)
+ ),
+ )
+
+
+def geometric_verification(
+ image_ids: Dict[str, int],
+ reference: pycolmap.Reconstruction,
+ database_path: Path,
+ features_path: Path,
+ pairs_path: Path,
+ matches_path: Path,
+ max_error: float = 4.0,
+):
+ logger.info("Performing geometric verification of the matches...")
+
+ pairs = parse_retrieval(pairs_path)
+ db = COLMAPDatabase.connect(database_path)
+
+ inlier_ratios = []
+ matched = set()
+ for name0 in tqdm(pairs):
+ id0 = image_ids[name0]
+ image0 = reference.images[id0]
+ cam0 = reference.cameras[image0.camera_id]
+ kps0, noise0 = get_keypoints(
+ features_path, name0, return_uncertainty=True
+ )
+ noise0 = 1.0 if noise0 is None else noise0
+ if len(kps0) > 0:
+ kps0 = np.stack(cam0.cam_from_img(kps0))
+ else:
+ kps0 = np.zeros((0, 2))
+
+ for name1 in pairs[name0]:
+ id1 = image_ids[name1]
+ image1 = reference.images[id1]
+ cam1 = reference.cameras[image1.camera_id]
+ kps1, noise1 = get_keypoints(
+ features_path, name1, return_uncertainty=True
+ )
+ noise1 = 1.0 if noise1 is None else noise1
+ if len(kps1) > 0:
+ kps1 = np.stack(cam1.cam_from_img(kps1))
+ else:
+ kps1 = np.zeros((0, 2))
+
+ matches = get_matches(matches_path, name0, name1)[0]
+
+ if len({(id0, id1), (id1, id0)} & matched) > 0:
+ continue
+ matched |= {(id0, id1), (id1, id0)}
+
+ if matches.shape[0] == 0:
+ db.add_two_view_geometry(id0, id1, matches)
+ continue
+
+ cam1_from_cam0 = (
+ image1.cam_from_world * image0.cam_from_world.inverse()
+ )
+ errors0, errors1 = compute_epipolar_errors(
+ cam1_from_cam0, kps0[matches[:, 0]], kps1[matches[:, 1]]
+ )
+ valid_matches = np.logical_and(
+ errors0 <= cam0.cam_from_img_threshold(noise0 * max_error),
+ errors1 <= cam1.cam_from_img_threshold(noise1 * max_error),
+ )
+ # TODO: We could also add E to the database, but we need
+ # to reverse the transformations if id0 > id1 in utils/database.py.
+ db.add_two_view_geometry(id0, id1, matches[valid_matches, :])
+ inlier_ratios.append(np.mean(valid_matches))
+ logger.info(
+ "mean/med/min/max valid matches %.2f/%.2f/%.2f/%.2f%%.",
+ np.mean(inlier_ratios) * 100,
+ np.median(inlier_ratios) * 100,
+ np.min(inlier_ratios) * 100,
+ np.max(inlier_ratios) * 100,
+ )
+
+ db.commit()
+ db.close()
+
+
+def run_triangulation(
+ model_path: Path,
+ database_path: Path,
+ image_dir: Path,
+ reference_model: pycolmap.Reconstruction,
+ verbose: bool = False,
+ options: Optional[Dict[str, Any]] = None,
+) -> pycolmap.Reconstruction:
+ model_path.mkdir(parents=True, exist_ok=True)
+ logger.info("Running 3D triangulation...")
+ if options is None:
+ options = {}
+ with OutputCapture(verbose):
+ with pycolmap.ostream():
+ reconstruction = pycolmap.triangulate_points(
+ reference_model,
+ database_path,
+ image_dir,
+ model_path,
+ options=options,
+ )
+ return reconstruction
+
+
+def main(
+ sfm_dir: Path,
+ reference_model: Path,
+ image_dir: Path,
+ pairs: Path,
+ features: Path,
+ matches: Path,
+ skip_geometric_verification: bool = False,
+ estimate_two_view_geometries: bool = False,
+ min_match_score: Optional[float] = None,
+ verbose: bool = False,
+ mapper_options: Optional[Dict[str, Any]] = None,
+) -> pycolmap.Reconstruction:
+ assert reference_model.exists(), reference_model
+ assert features.exists(), features
+ assert pairs.exists(), pairs
+ assert matches.exists(), matches
+
+ sfm_dir.mkdir(parents=True, exist_ok=True)
+ database = sfm_dir / "database.db"
+ reference = pycolmap.Reconstruction(reference_model)
+
+ image_ids = create_db_from_model(reference, database)
+ import_features(image_ids, database, features)
+ import_matches(
+ image_ids,
+ database,
+ pairs,
+ matches,
+ min_match_score,
+ skip_geometric_verification,
+ )
+ if not skip_geometric_verification:
+ if estimate_two_view_geometries:
+ estimation_and_geometric_verification(database, pairs, verbose)
+ else:
+ geometric_verification(
+ image_ids, reference, database, features, pairs, matches
+ )
+ reconstruction = run_triangulation(
+ sfm_dir, database, image_dir, reference, verbose, mapper_options
+ )
+ logger.info(
+ "Finished the triangulation with statistics:\n%s",
+ reconstruction.summary(),
+ )
+ return reconstruction
+
+
+def parse_option_args(args: List[str], default_options) -> Dict[str, Any]:
+ options = {}
+ for arg in args:
+ idx = arg.find("=")
+ if idx == -1:
+ raise ValueError("Options format: key1=value1 key2=value2 etc.")
+ key, value = arg[:idx], arg[idx + 1 :]
+ if not hasattr(default_options, key):
+ raise ValueError(
+ f'Unknown option "{key}", allowed options and default values'
+ f" for {default_options.summary()}"
+ )
+ value = eval(value)
+ target_type = type(getattr(default_options, key))
+ if not isinstance(value, target_type):
+ raise ValueError(
+ f'Incorrect type for option "{key}":'
+ f" {type(value)} vs {target_type}"
+ )
+ options[key] = value
+ return options
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--sfm_dir", type=Path, required=True)
+ parser.add_argument("--reference_sfm_model", type=Path, required=True)
+ parser.add_argument("--image_dir", type=Path, required=True)
+
+ parser.add_argument("--pairs", type=Path, required=True)
+ parser.add_argument("--features", type=Path, required=True)
+ parser.add_argument("--matches", type=Path, required=True)
+
+ parser.add_argument("--skip_geometric_verification", action="store_true")
+ parser.add_argument("--min_match_score", type=float)
+ parser.add_argument("--verbose", action="store_true")
+ args = parser.parse_args().__dict__
+
+ mapper_options = parse_option_args(
+ args.pop("mapper_options"), pycolmap.IncrementalMapperOptions()
+ )
+
+ main(**args, mapper_options=mapper_options)
diff --git a/hloc/utils/database.py b/hloc/utils/database.py
index 050f5ec..683c250 100644
--- a/hloc/utils/database.py
+++ b/hloc/utils/database.py
@@ -31,10 +31,10 @@
# This script is based on an original implementation by True Price.
-import sys
import sqlite3
-import numpy as np
+import sys
+import numpy as np
IS_PYTHON3 = sys.version_info[0] >= 3
@@ -100,9 +100,7 @@
cols INTEGER NOT NULL,
data BLOB)"""
-CREATE_NAME_INDEX = (
- "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
-)
+CREATE_NAME_INDEX = "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
CREATE_ALL = "; ".join(
[
@@ -152,34 +150,20 @@ def __init__(self, *args, **kwargs):
super(COLMAPDatabase, self).__init__(*args, **kwargs)
self.create_tables = lambda: self.executescript(CREATE_ALL)
- self.create_cameras_table = lambda: self.executescript(
- CREATE_CAMERAS_TABLE
- )
+ self.create_cameras_table = lambda: self.executescript(CREATE_CAMERAS_TABLE)
self.create_descriptors_table = lambda: self.executescript(
CREATE_DESCRIPTORS_TABLE
)
- self.create_images_table = lambda: self.executescript(
- CREATE_IMAGES_TABLE
- )
+ self.create_images_table = lambda: self.executescript(CREATE_IMAGES_TABLE)
self.create_two_view_geometries_table = lambda: self.executescript(
CREATE_TWO_VIEW_GEOMETRIES_TABLE
)
- self.create_keypoints_table = lambda: self.executescript(
- CREATE_KEYPOINTS_TABLE
- )
- self.create_matches_table = lambda: self.executescript(
- CREATE_MATCHES_TABLE
- )
+ self.create_keypoints_table = lambda: self.executescript(CREATE_KEYPOINTS_TABLE)
+ self.create_matches_table = lambda: self.executescript(CREATE_MATCHES_TABLE)
self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX)
def add_camera(
- self,
- model,
- width,
- height,
- params,
- prior_focal_length=False,
- camera_id=None,
+ self, model, width, height, params, prior_focal_length=False, camera_id=None
):
params = np.asarray(params, np.float64)
cursor = self.execute(
diff --git a/hloc/utils/geometry.py b/hloc/utils/geometry.py
index 1f33e2e..5995ccc 100644
--- a/hloc/utils/geometry.py
+++ b/hloc/utils/geometry.py
@@ -6,28 +6,11 @@ def to_homogeneous(p):
return np.pad(p, ((0, 0),) * (p.ndim - 1) + ((0, 1),), constant_values=1)
-def vector_to_cross_product_matrix(v):
- return np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
-
-
-def compute_epipolar_errors(qvec_r2t, tvec_r2t, p2d_r, p2d_t):
- T_r2t = pose_matrix_from_qvec_tvec(qvec_r2t, tvec_r2t)
- # Compute errors in normalized plane to avoid distortion.
- E = vector_to_cross_product_matrix(T_r2t[:3, -1]) @ T_r2t[:3, :3]
- l2d_r2t = (E @ to_homogeneous(p2d_r).T).T
- l2d_t2r = (E.T @ to_homogeneous(p2d_t).T).T
- errors_r = np.abs(
- np.sum(to_homogeneous(p2d_r) * l2d_t2r, axis=1)
- ) / np.linalg.norm(l2d_t2r[:, :2], axis=1)
- errors_t = np.abs(
- np.sum(to_homogeneous(p2d_t) * l2d_r2t, axis=1)
- ) / np.linalg.norm(l2d_r2t[:, :2], axis=1)
- return E, errors_r, errors_t
-
-
-def pose_matrix_from_qvec_tvec(qvec, tvec):
- pose = np.zeros((4, 4))
- pose[:3, :3] = pycolmap.qvec_to_rotmat(qvec)
- pose[:3, -1] = tvec
- pose[-1, -1] = 1
- return pose
+def compute_epipolar_errors(j_from_i: pycolmap.Rigid3d, p2d_i, p2d_j):
+ j_E_i = j_from_i.essential_matrix()
+ l2d_j = to_homogeneous(p2d_i) @ j_E_i.T
+ l2d_i = to_homogeneous(p2d_j) @ j_E_i
+ dist = np.abs(np.sum(to_homogeneous(p2d_i) * l2d_i, axis=1))
+ errors_i = dist / np.linalg.norm(l2d_i[:, :2], axis=1)
+ errors_j = dist / np.linalg.norm(l2d_j[:, :2], axis=1)
+ return errors_i, errors_j
diff --git a/hloc/utils/parsers.py b/hloc/utils/parsers.py
index faaa8f2..9407dcf 100644
--- a/hloc/utils/parsers.py
+++ b/hloc/utils/parsers.py
@@ -1,7 +1,8 @@
-from pathlib import Path
import logging
-import numpy as np
from collections import defaultdict
+from pathlib import Path
+
+import numpy as np
import pycolmap
logger = logging.getLogger(__name__)
@@ -18,7 +19,9 @@ def parse_image_list(path, with_intrinsics=False):
if with_intrinsics:
model, width, height, *params = data
params = np.array(params, float)
- cam = pycolmap.Camera(model, int(width), int(height), params)
+ cam = pycolmap.Camera(
+ model=model, width=int(width), height=int(height), params=params
+ )
images.append((name, cam))
else:
images.append(name)
diff --git a/hloc/utils/read_write_model.py b/hloc/utils/read_write_model.py
index 65bed51..197921d 100644
--- a/hloc/utils/read_write_model.py
+++ b/hloc/utils/read_write_model.py
@@ -29,12 +29,13 @@
#
# Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
-import os
-import collections
-import numpy as np
-import struct
import argparse
+import collections
import logging
+import os
+import struct
+
+import numpy as np
logger = logging.getLogger(__name__)
@@ -42,9 +43,7 @@
CameraModel = collections.namedtuple(
"CameraModel", ["model_id", "model_name", "num_params"]
)
-Camera = collections.namedtuple(
- "Camera", ["id", "model", "width", "height", "params"]
-)
+Camera = collections.namedtuple("Camera", ["id", "model", "width", "height", "params"])
BaseImage = collections.namedtuple(
"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]
)
@@ -128,11 +127,7 @@ def read_cameras_text(path):
height = int(elems[3])
params = np.array(tuple(map(float, elems[4:])))
cameras[camera_id] = Camera(
- id=camera_id,
- model=model,
- width=width,
- height=height,
- params=params,
+ id=camera_id, model=model, width=width, height=height, params=params
)
return cameras
@@ -157,9 +152,7 @@ def read_cameras_binary(path_to_model_file):
height = camera_properties[3]
num_params = CAMERA_MODEL_IDS[model_id].num_params
params = read_next_bytes(
- fid,
- num_bytes=8 * num_params,
- format_char_sequence="d" * num_params,
+ fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params
)
cameras[camera_id] = Camera(
id=camera_id,
@@ -230,10 +223,7 @@ def read_images_text(path):
image_name = elems[9]
elems = fid.readline().split()
xys = np.column_stack(
- [
- tuple(map(float, elems[0::3])),
- tuple(map(float, elems[1::3])),
- ]
+ [tuple(map(float, elems[0::3])), tuple(map(float, elems[1::3]))]
)
point3D_ids = np.array(tuple(map(int, elems[2::3])))
images[image_id] = Image(
@@ -270,19 +260,16 @@ def read_images_binary(path_to_model_file):
while current_char != b"\x00": # look for the ASCII 0 entry
image_name += current_char.decode("utf-8")
current_char = read_next_bytes(fid, 1, "c")[0]
- num_points2D = read_next_bytes(
- fid, num_bytes=8, format_char_sequence="Q"
- )[0]
+ num_points2D = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[
+ 0
+ ]
x_y_id_s = read_next_bytes(
fid,
num_bytes=24 * num_points2D,
format_char_sequence="ddq" * num_points2D,
)
xys = np.column_stack(
- [
- tuple(map(float, x_y_id_s[0::3])),
- tuple(map(float, x_y_id_s[1::3])),
- ]
+ [tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))]
)
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
images[image_id] = Image(
@@ -321,13 +308,7 @@ def write_images_text(images, path):
with open(path, "w") as fid:
fid.write(HEADER)
for _, img in images.items():
- image_header = [
- img.id,
- *img.qvec,
- *img.tvec,
- img.camera_id,
- img.name,
- ]
+ image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
first_line = " ".join(map(str, image_header))
fid.write(first_line + "\n")
@@ -407,9 +388,9 @@ def read_points3D_binary(path_to_model_file):
xyz = np.array(binary_point_line_properties[1:4])
rgb = np.array(binary_point_line_properties[4:7])
error = np.array(binary_point_line_properties[7])
- track_length = read_next_bytes(
- fid, num_bytes=8, format_char_sequence="Q"
- )[0]
+ track_length = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[
+ 0
+ ]
track_elems = read_next_bytes(
fid,
num_bytes=8 * track_length,
@@ -442,7 +423,7 @@ def write_points3D_text(points3D, path):
) / len(points3D)
HEADER = (
"# 3D point list with one line of data per point:\n"
- + "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n"
+ + "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" # noqa: E501
+ "# Number of points: {}, mean track length: {}\n".format(
len(points3D), mean_track_length
)
@@ -498,12 +479,8 @@ def read_model(path, ext=""):
ext = ".txt"
else:
try:
- cameras, images, points3D = read_model(
- os.path.join(path, "model/")
- )
- logger.warning(
- "This SfM file structure was deprecated in hloc v1.1"
- )
+ cameras, images, points3D = read_model(os.path.join(path, "model/"))
+ logger.warning("This SfM file structure was deprecated in hloc v1.1")
return cameras, images, points3D
except FileNotFoundError:
raise FileNotFoundError(
@@ -595,9 +572,7 @@ def main():
)
args = parser.parse_args()
- cameras, images, points3D = read_model(
- path=args.input_model, ext=args.input_format
- )
+ cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format)
print("num_cameras:", len(cameras))
print("num_images:", len(images))
@@ -605,11 +580,7 @@ def main():
if args.output_model is not None:
write_model(
- cameras,
- images,
- points3D,
- path=args.output_model,
- ext=args.output_format,
+ cameras, images, points3D, path=args.output_model, ext=args.output_format
)
diff --git a/hloc/utils/viz.py b/hloc/utils/viz.py
index 21d3807..360466b 100644
--- a/hloc/utils/viz.py
+++ b/hloc/utils/viz.py
@@ -20,7 +20,7 @@ def cm_RdGn(x):
def plot_images(
- imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True
+ imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True, figsize=4.5
):
"""Plot a set of images horizontally.
Args:
@@ -37,23 +37,19 @@ def plot_images(
ratios = [i.shape[1] / i.shape[0] for i in imgs] # W / H
else:
ratios = [4 / 3] * n
- figsize = [sum(ratios) * 4.5, 4.5]
- fig, ax = plt.subplots(
+ figsize = [sum(ratios) * figsize, figsize]
+ fig, axs = plt.subplots(
1, n, figsize=figsize, dpi=dpi, gridspec_kw={"width_ratios": ratios}
)
if n == 1:
- ax = [ax]
- for i in range(n):
- ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
- ax[i].get_yaxis().set_ticks([])
- ax[i].get_xaxis().set_ticks([])
- ax[i].set_axis_off()
- for spine in ax[i].spines.values(): # remove frame
- spine.set_visible(False)
+ axs = [axs]
+ for i, (img, ax) in enumerate(zip(imgs, axs)):
+ ax.imshow(img, cmap=plt.get_cmap(cmaps[i]))
+ ax.set_axis_off()
if titles:
- ax[i].set_title(titles[i])
+ ax.set_title(titles[i])
fig.tight_layout(pad=pad)
-
+ return fig
def plot_keypoints(kpts, colors="lime", ps=4):
"""Plot keypoints for existing images.
@@ -96,21 +92,19 @@ def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.0):
if lw > 0:
# transform the points into the figure coordinate system
- transFigure = fig.transFigure.inverted()
- fkpts0 = transFigure.transform(ax0.transData.transform(kpts0))
- fkpts1 = transFigure.transform(ax1.transData.transform(kpts1))
- fig.lines += [
- matplotlib.lines.Line2D(
- (fkpts0[i, 0], fkpts1[i, 0]),
- (fkpts0[i, 1], fkpts1[i, 1]),
- zorder=1,
- transform=fig.transFigure,
- c=color[i],
- linewidth=lw,
- alpha=a,
+ for i in range(len(kpts0)):
+ fig.add_artist(
+ matplotlib.patches.ConnectionPatch(
+ xyA=(kpts0[i, 0], kpts0[i, 1]),
+ coordsA=ax0.transData,
+ xyB=(kpts1[i, 0], kpts1[i, 1]),
+ coordsB=ax1.transData,
+ zorder=1,
+ color=color[i],
+ linewidth=lw,
+ alpha=a,
+ )
)
- for i in range(len(kpts0))
- ]
# freeze the axes to prevent the transform to change
ax0.autoscale(enable=False)
@@ -134,13 +128,7 @@ def add_text(
):
ax = plt.gcf().axes[idx]
t = ax.text(
- *pos,
- text,
- fontsize=fs,
- ha=ha,
- va=va,
- color=color,
- transform=ax.transAxes
+ *pos, text, fontsize=fs, ha=ha, va=va, color=color, transform=ax.transAxes
)
if lcolor is not None:
t.set_path_effects(
diff --git a/hloc/utils/viz_3d.py b/hloc/utils/viz_3d.py
index fbd6505..e608f78 100644
--- a/hloc/utils/viz_3d.py
+++ b/hloc/utils/viz_3d.py
@@ -9,9 +9,10 @@
"""
from typing import Optional
+
import numpy as np
-import pycolmap
import plotly.graph_objects as go
+import pycolmap
def to_homogeneous(points):
@@ -46,9 +47,7 @@ def init_figure(height: int = 800) -> go.Figure:
dragmode="orbit",
),
margin=dict(l=0, r=0, b=0, t=0, pad=0),
- legend=dict(
- orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.1
- ),
+ legend=dict(orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.1),
)
return fig
@@ -70,9 +69,7 @@ def plot_points(
mode="markers",
name=name,
legendgroup=name,
- marker=dict(
- size=ps, color=color, line_width=0.0, colorscale=colorscale
- ),
+ marker=dict(size=ps, color=color, line_width=0.0, colorscale=colorscale),
)
fig.add_trace(tr)
@@ -85,7 +82,9 @@ def plot_camera(
color: str = "rgb(0, 0, 255)",
name: Optional[str] = None,
legendgroup: Optional[str] = None,
+ fill: bool = False,
size: float = 1.0,
+ text: Optional[str] = None,
):
"""Plot a camera frustum from pose and intrinsic matrix."""
W, H = K[0, 2] * 2, K[1, 2] * 2
@@ -98,43 +97,34 @@ def plot_camera(
scale = 1.0
corners = to_homogeneous(corners) @ np.linalg.inv(K).T
corners = (corners / 2 * scale) @ R.T + t
-
- x, y, z = corners.T
- rect = go.Scatter3d(
- x=x,
- y=y,
- z=z,
- line=dict(color=color),
- legendgroup=legendgroup,
- name=name,
- marker=dict(size=0.0001),
- showlegend=False,
- )
- fig.add_trace(rect)
+ legendgroup = legendgroup if legendgroup is not None else name
x, y, z = np.concatenate(([t], corners)).T
i = [0, 0, 0, 0]
j = [1, 2, 3, 4]
k = [2, 3, 4, 1]
- pyramid = go.Mesh3d(
- x=x,
- y=y,
- z=z,
- color=color,
- i=i,
- j=j,
- k=k,
- legendgroup=legendgroup,
- name=name,
- showlegend=False,
- )
- fig.add_trace(pyramid)
+ if fill:
+ pyramid = go.Mesh3d(
+ x=x,
+ y=y,
+ z=z,
+ color=color,
+ i=i,
+ j=j,
+ k=k,
+ legendgroup=legendgroup,
+ name=name,
+ showlegend=False,
+ hovertemplate=text.replace("\n", "
"),
+ )
+ fig.add_trace(pyramid)
+
triangles = np.vstack((i, j, k)).T
vertices = np.concatenate(([t], corners))
tri_points = np.array([vertices[i] for i in triangles.reshape(-1)])
-
x, y, z = tri_points.T
+
pyramid = go.Scatter3d(
x=x,
y=y,
@@ -144,6 +134,7 @@ def plot_camera(
name=name,
line=dict(color=color, width=1),
showlegend=False,
+ hovertemplate=text.replace("\n", "
"),
)
fig.add_trace(pyramid)
@@ -156,19 +147,19 @@ def plot_camera_colmap(
**kwargs
):
"""Plot a camera frustum from PyCOLMAP objects"""
+ world_t_camera = image.cam_from_world.inverse()
plot_camera(
fig,
- image.rotmat().T,
- image.projection_center(),
+ world_t_camera.rotation.matrix(),
+ world_t_camera.translation,
camera.calibration_matrix(),
name=name or str(image.image_id),
+ text=str(image),
**kwargs
)
-def plot_cameras(
- fig: go.Figure, reconstruction: pycolmap.Reconstruction, **kwargs
-):
+def plot_cameras(fig: go.Figure, reconstruction: pycolmap.Reconstruction, **kwargs):
"""Plot a camera as a cone with camera frustum."""
for image_id, image in reconstruction.images.items():
plot_camera_colmap(
@@ -185,13 +176,14 @@ def plot_reconstruction(
min_track_length: int = 2,
points: bool = True,
cameras: bool = True,
+ points_rgb: bool = True,
cs: float = 1.0,
):
# Filter outliers
bbs = rec.compute_bounding_box(0.001, 0.999)
# Filter points, use original reproj error here
- xyzs = [
- p3D.xyz
+ p3Ds = [
+ p3D
for _, p3D in rec.points3D.items()
if (
(p3D.xyz >= bbs[0]).all()
@@ -200,7 +192,12 @@ def plot_reconstruction(
and p3D.track.length() >= min_track_length
)
]
+ xyzs = [p3D.xyz for p3D in p3Ds]
+ if points_rgb:
+ pcolor = [p3D.color for p3D in p3Ds]
+ else:
+ pcolor = color
if points:
- plot_points(fig, np.array(xyzs), color=color, ps=1, name=name)
+ plot_points(fig, np.array(xyzs), color=pcolor, ps=1, name=name)
if cameras:
plot_cameras(fig, rec, color=color, legendgroup=name, size=cs)
diff --git a/hloc/visualization.py b/hloc/visualization.py
new file mode 100644
index 0000000..77369ef
--- /dev/null
+++ b/hloc/visualization.py
@@ -0,0 +1,182 @@
+import pickle
+import random
+
+import numpy as np
+import pycolmap
+from matplotlib import cm
+
+from .utils.io import read_image
+from .utils.viz import (
+ add_text,
+ cm_RdGn,
+ plot_images,
+ plot_keypoints,
+ plot_matches,
+)
+
+
+def visualize_sfm_2d(
+ reconstruction,
+ image_dir,
+ color_by="visibility",
+ selected=[],
+ n=1,
+ seed=0,
+ dpi=75,
+):
+ assert image_dir.exists()
+ if not isinstance(reconstruction, pycolmap.Reconstruction):
+ reconstruction = pycolmap.Reconstruction(reconstruction)
+
+ if not selected:
+ image_ids = reconstruction.reg_image_ids()
+ selected = random.Random(seed).sample(image_ids, min(n, len(image_ids)))
+
+ for i in selected:
+ image = reconstruction.images[i]
+ keypoints = np.array([p.xy for p in image.points2D])
+ visible = np.array([p.has_point3D() for p in image.points2D])
+
+ if color_by == "visibility":
+ color = [(0, 0, 1) if v else (1, 0, 0) for v in visible]
+ text = f"visible: {np.count_nonzero(visible)}/{len(visible)}"
+ elif color_by == "track_length":
+ tl = np.array(
+ [
+ (
+ reconstruction.points3D[p.point3D_id].track.length()
+ if p.has_point3D()
+ else 1
+ )
+ for p in image.points2D
+ ]
+ )
+ max_, med_ = np.max(tl), np.median(tl[tl > 1])
+ tl = np.log(tl)
+ color = cm.jet(tl / tl.max()).tolist()
+ text = f"max/median track length: {max_}/{med_}"
+ elif color_by == "depth":
+ p3ids = [p.point3D_id for p in image.points2D if p.has_point3D()]
+ z = np.array(
+ [
+ (image.cam_from_world * reconstruction.points3D[j].xyz)[-1]
+ for j in p3ids
+ ]
+ )
+ z -= z.min()
+ color = cm.jet(z / np.percentile(z, 99.9))
+ text = f"visible: {np.count_nonzero(visible)}/{len(visible)}"
+ keypoints = keypoints[visible]
+ else:
+ raise NotImplementedError(f"Coloring not implemented: {color_by}.")
+
+ name = image.name
+ fig = plot_images([read_image(image_dir / name)], dpi=dpi)
+ plot_keypoints([keypoints], colors=[color], ps=4)
+ add_text(0, text)
+ add_text(0, name, pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom")
+ return fig
+
+
+def visualize_loc(
+ results,
+ image_dir,
+ reconstruction=None,
+ db_image_dir=None,
+ selected=[],
+ n=1,
+ seed=0,
+ prefix=None,
+ **kwargs,
+):
+ assert image_dir.exists()
+
+ with open(str(results) + "_logs.pkl", "rb") as f:
+ logs = pickle.load(f)
+
+ if not selected:
+ queries = list(logs["loc"].keys())
+ if prefix:
+ queries = [q for q in queries if q.startswith(prefix)]
+ selected = random.Random(seed).sample(queries, min(n, len(queries)))
+
+ if reconstruction is not None:
+ if not isinstance(reconstruction, pycolmap.Reconstruction):
+ reconstruction = pycolmap.Reconstruction(reconstruction)
+
+ for qname in selected:
+ loc = logs["loc"][qname]
+ visualize_loc_from_log(
+ image_dir, qname, loc, reconstruction, db_image_dir, **kwargs
+ )
+
+
+def visualize_loc_from_log(
+ image_dir,
+ query_name,
+ loc,
+ reconstruction=None,
+ db_image_dir=None,
+ top_k_db=2,
+ dpi=75,
+):
+ q_image = read_image(image_dir / query_name)
+ if loc.get("covisibility_clustering", False):
+ # select the first, largest cluster if the localization failed
+ loc = loc["log_clusters"][loc["best_cluster"] or 0]
+
+ inliers = np.array(loc["PnP_ret"]["inliers"])
+ mkp_q = loc["keypoints_query"]
+ n = len(loc["db"])
+ if reconstruction is not None:
+ # for each pair of query keypoint and its matched 3D point,
+ # we need to find its corresponding keypoint in each database image
+ # that observes it. We also count the number of inliers in each.
+ kp_idxs, kp_to_3D_to_db = loc["keypoint_index_to_db"]
+ counts = np.zeros(n)
+ dbs_kp_q_db = [[] for _ in range(n)]
+ inliers_dbs = [[] for _ in range(n)]
+ for i, (inl, (p3D_id, db_idxs)) in enumerate(
+ zip(inliers, kp_to_3D_to_db)
+ ):
+ track = reconstruction.points3D[p3D_id].track
+ track = {el.image_id: el.point2D_idx for el in track.elements}
+ for db_idx in db_idxs:
+ counts[db_idx] += inl
+ kp_db = track[loc["db"][db_idx]]
+ dbs_kp_q_db[db_idx].append((i, kp_db))
+ inliers_dbs[db_idx].append(inl)
+ else:
+ # for inloc the database keypoints are already in the logs
+ assert "keypoints_db" in loc
+ assert "indices_db" in loc
+ counts = np.array(
+ [np.sum(loc["indices_db"][inliers] == i) for i in range(n)]
+ )
+
+ # display the database images with the most inlier matches
+ db_sort = np.argsort(-counts)
+ for db_idx in db_sort[:top_k_db]:
+ if reconstruction is not None:
+ db = reconstruction.images[loc["db"][db_idx]]
+ db_name = db.name
+ db_kp_q_db = np.array(dbs_kp_q_db[db_idx])
+ kp_q = mkp_q[db_kp_q_db[:, 0]]
+ kp_db = np.array([db.points2D[i].xy for i in db_kp_q_db[:, 1]])
+ inliers_db = inliers_dbs[db_idx]
+ else:
+ db_name = loc["db"][db_idx]
+ kp_q = mkp_q[loc["indices_db"] == db_idx]
+ kp_db = loc["keypoints_db"][loc["indices_db"] == db_idx]
+ inliers_db = inliers[loc["indices_db"] == db_idx]
+
+ db_image = read_image((db_image_dir or image_dir) / db_name)
+ color = cm_RdGn(inliers_db).tolist()
+ text = f"inliers: {sum(inliers_db)}/{len(inliers_db)}"
+
+ plot_images([q_image, db_image], dpi=dpi)
+ plot_matches(kp_q, kp_db, color, a=0.1)
+ add_text(0, text)
+ opts = dict(pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom")
+ add_text(0, query_name, **opts)
+ add_text(1, db_name, **opts)
diff --git a/requirements.txt b/requirements.txt
index ad5e055..5bc1a74 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ opencv-python==4.6.0.66
pandas==2.0.3
plotly==5.15.0
protobuf==4.23.2
-pycolmap==0.5.0
+pycolmap==0.6.0
pytlsd==0.0.2
pytorch-lightning==1.4.9
PyYAML==6.0
@@ -34,4 +34,5 @@ onnxruntime
poselib
roma #dust3r
huggingface_hub
-psutil
\ No newline at end of file
+psutil
+easydict
\ No newline at end of file
diff --git a/third_party/mast3r b/third_party/mast3r
new file mode 160000
index 0000000..6d6d485
--- /dev/null
+++ b/third_party/mast3r
@@ -0,0 +1 @@
+Subproject commit 6d6d4858f03f1736ca21928fa3eda0511fbb0a8d
diff --git a/ui/app_class.py b/ui/app_class.py
index 50917a7..f1c9805 100644
--- a/ui/app_class.py
+++ b/ui/app_class.py
@@ -3,7 +3,10 @@
import gradio as gr
import numpy as np
+from easydict import EasyDict as edict
+from omegaconf import OmegaConf
+from ui.sfm import SfmEngine
from ui.utils import (
GRADIO_VERSION,
gen_examples,
@@ -115,7 +118,7 @@ def init_interface(self):
label="Match thres.",
value=0.1,
)
- match_setting_max_features = gr.Slider(
+ match_setting_max_keypoints = gr.Slider(
minimum=10,
maximum=10000,
step=10,
@@ -199,7 +202,7 @@ def init_interface(self):
input_image0,
input_image1,
match_setting_threshold,
- match_setting_max_features,
+ match_setting_max_keypoints,
detect_keypoints_threshold,
matcher_list,
ransac_method,
@@ -314,7 +317,7 @@ def init_interface(self):
input_image0,
input_image1,
match_setting_threshold,
- match_setting_max_features,
+ match_setting_max_keypoints,
detect_keypoints_threshold,
matcher_list,
input_image0,
@@ -377,31 +380,15 @@ def init_interface(self):
],
outputs=[output_wrapped, geometry_result],
)
- with gr.Tab("Under construction"):
- self.init_tab_sfm()
-
- def init_tab_sfm(self):
- with gr.Row():
- with gr.Column():
- with gr.Row():
- gr.Textbox("Under construction", label="A", visible=True)
- gr.Textbox("Under construction", label="B", visible=True)
- gr.Textbox("Under construction", label="C", visible=True)
- with gr.Row():
- with gr.Accordion("Open for More", open=False):
- gr.Textbox(
- "Under construction", label="A1", visible=True
- )
- gr.Textbox(
- "Under construction", label="B1", visible=True
- )
- gr.Textbox(
- "Under construction", label="C1", visible=True
- )
- with gr.Column():
- gr.Textbox("Under construction", label="D", visible=True)
- gr.Textbox("Under construction", label="E", visible=True)
- gr.Textbox("Under construction", label="F", visible=True)
+ with gr.Tab("Structure from Motion(under-dev)"):
+ sfm_ui = AppSfmUI( # noqa: F841
+ {
+ **self.cfg,
+ "matcher_zoo": self.matcher_zoo,
+ "outputs": "experiments/sfm",
+ }
+ )
+ sfm_ui.call_empty()
def run(self):
self.app.queue().launch(
@@ -559,3 +546,256 @@ def get_link(link, tag="Link"):
# height=1000,
)
return tab
+
+
+class AppBaseUI:
+ def __init__(self, cfg: Dict[str, Any] = {}):
+ self.cfg = OmegaConf.create(cfg)
+ self.inputs = edict({})
+ self.outputs = edict({})
+ self.ui = edict({})
+
+ def _init_ui(self):
+ NotImplemented
+
+ def call(self, **kwargs):
+ NotImplemented
+
+ def info(self):
+ gr.Info("SFM is under construction.")
+
+
+class AppSfmUI(AppBaseUI):
+ def __init__(self, cfg: Dict[str, Any] = None):
+ super().__init__(cfg)
+ assert "matcher_zoo" in self.cfg
+ self.matcher_zoo = self.cfg["matcher_zoo"]
+ self.sfm_engine = SfmEngine(cfg)
+ self._init_ui()
+
+ def init_retrieval_dropdown(self):
+ algos = []
+ for k, v in self.cfg["retrieval_zoo"].items():
+ if v.get("enable", True):
+ algos.append(k)
+ return algos
+
+ def _update_options(self, option):
+ if option == "sparse":
+ return gr.Textbox("sparse", visible=True)
+ elif option == "dense":
+ return gr.Textbox("dense", visible=True)
+ else:
+ return gr.Textbox("not set", visible=True)
+
+ def _on_select_custom_params(self, value: bool = False):
+ return gr.Textbox(
+ label="Camera Params",
+ value="0,0,0,0",
+ interactive=value,
+ visible=value,
+ )
+
+ def _init_ui(self):
+ with gr.Row():
+ # data settting and camera settings
+ with gr.Column():
+ self.inputs.input_images = gr.File(
+ label="SfM",
+ interactive=True,
+ file_count="multiple",
+ min_width=300,
+ )
+ # camera setting
+ with gr.Accordion("Camera Settings", open=True):
+ with gr.Column():
+ with gr.Row():
+ with gr.Column():
+ self.inputs.camera_model = gr.Dropdown(
+ choices=[
+ "PINHOLE",
+ "SIMPLE_RADIAL",
+ "OPENCV",
+ ],
+ value="PINHOLE",
+ label="Camera Model",
+ interactive=True,
+ )
+ with gr.Column():
+ gr.Checkbox(
+ label="Shared Params",
+ value=True,
+ interactive=True,
+ )
+ camera_custom_params_cb = gr.Checkbox(
+ label="Custom Params",
+ value=False,
+ interactive=True,
+ )
+ with gr.Row():
+ self.inputs.camera_params = gr.Textbox(
+ label="Camera Params",
+ value="0,0,0,0",
+ interactive=False,
+ visible=False,
+ )
+ camera_custom_params_cb.select(
+ fn=self._on_select_custom_params,
+ inputs=camera_custom_params_cb,
+ outputs=self.inputs.camera_params,
+ )
+
+ with gr.Accordion("Matching Settings", open=True):
+ # feature extraction and matching setting
+ with gr.Row():
+ # matcher setting
+ self.inputs.matcher_key = gr.Dropdown(
+ choices=self.matcher_zoo.keys(),
+ value="disk+lightglue",
+ label="Matching Model",
+ interactive=True,
+ )
+ with gr.Row():
+ with gr.Accordion("Advanced Settings", open=False):
+ with gr.Column():
+ with gr.Row():
+ # matching setting
+ self.inputs.max_keypoints = gr.Slider(
+ label="Max Keypoints",
+ minimum=100,
+ maximum=10000,
+ value=1000,
+ interactive=True,
+ )
+ self.inputs.keypoint_threshold = gr.Slider(
+ label="Keypoint Threshold",
+ minimum=0,
+ maximum=1,
+ value=0.01,
+ )
+ with gr.Row():
+ self.inputs.match_threshold = gr.Slider(
+ label="Match Threshold",
+ minimum=0.01,
+ maximum=12.0,
+ value=0.2,
+ )
+ self.inputs.ransac_threshold = gr.Slider(
+ label="Ransac Threshold",
+ minimum=0.01,
+ maximum=12.0,
+ value=4.0,
+ step=0.01,
+ interactive=True,
+ )
+
+ with gr.Row():
+ self.inputs.ransac_confidence = gr.Slider(
+ label="Ransac Confidence",
+ minimum=0.01,
+ maximum=1.0,
+ value=0.9999,
+ step=0.0001,
+ interactive=True,
+ )
+ self.inputs.ransac_max_iter = gr.Slider(
+ label="Ransac Max Iter",
+ minimum=1,
+ maximum=100,
+ value=100,
+ step=1,
+ interactive=True,
+ )
+ with gr.Accordion("Scene Graph Settings", open=True):
+ # mapping setting
+ self.inputs.scene_graph = gr.Dropdown(
+ choices=["all", "swin", "oneref"],
+ value="all",
+ label="Scene Graph",
+ interactive=True,
+ )
+
+ # global feature setting
+ self.inputs.global_feature = gr.Dropdown(
+ choices=self.init_retrieval_dropdown(),
+ value="netvlad",
+ label="Global features",
+ interactive=True,
+ )
+ self.inputs.top_k = gr.Slider(
+ label="Number of Images per Image to Match",
+ minimum=1,
+ maximum=100,
+ value=10,
+ step=1,
+ )
+ # button_match = gr.Button("Run Matching", variant="primary")
+
+ # mapping setting
+ with gr.Column():
+ with gr.Accordion("Mapping Settings", open=True):
+ with gr.Row():
+ with gr.Accordion("Buddle Settings", open=True):
+ with gr.Row():
+ self.inputs.mapper_refine_focal_length = (
+ gr.Checkbox(
+ label="Refine Focal Length",
+ value=False,
+ interactive=True,
+ )
+ )
+ self.inputs.mapper_refine_principle_points = (
+ gr.Checkbox(
+ label="Refine Principle Points",
+ value=False,
+ interactive=True,
+ )
+ )
+ self.inputs.mapper_refine_extra_params = (
+ gr.Checkbox(
+ label="Refine Extra Params",
+ value=False,
+ interactive=True,
+ )
+ )
+ with gr.Accordion("Retriangluation Settings", open=True):
+ gr.Textbox(
+ label="Retriangluation Details",
+ )
+ self.ui.button_sfm = gr.Button("Run SFM", variant="primary")
+ self.outputs.model_3d = gr.Model3D(
+ interactive=True,
+ )
+ self.outputs.output_image = gr.Image(
+ label="SFM Visualize",
+ type="numpy",
+ image_mode="RGB",
+ interactive=False,
+ )
+
+ def call_empty(self):
+ self.ui.button_sfm.click(fn=self.info, inputs=[], outputs=[])
+
+ def call(self):
+ self.ui.button_sfm.click(
+ fn=self.sfm_engine.call,
+ inputs=[
+ self.inputs.matcher_key,
+ self.inputs.input_images, # images
+ self.inputs.camera_model,
+ self.inputs.camera_params,
+ self.inputs.max_keypoints,
+ self.inputs.keypoint_threshold,
+ self.inputs.match_threshold,
+ self.inputs.ransac_threshold,
+ self.inputs.ransac_confidence,
+ self.inputs.ransac_max_iter,
+ self.inputs.scene_graph,
+ self.inputs.global_feature,
+ self.inputs.top_k,
+ self.inputs.mapper_refine_focal_length,
+ self.inputs.mapper_refine_principle_points,
+ self.inputs.mapper_refine_extra_params,
+ ],
+ outputs=[self.outputs.model_3d, self.outputs.output_image],
+ )
diff --git a/ui/config.yaml b/ui/config.yaml
index dff927f..63fe9d1 100644
--- a/ui/config.yaml
+++ b/ui/config.yaml
@@ -27,6 +27,17 @@ matcher_zoo:
paper: https://arxiv.org/abs/2405.12979
project: https://hwjiang1510.github.io/OmniGlue
display: true
+ Mast3R:
+ enable: false
+ matcher: mast3r
+ dense: true
+ info:
+ name: Mast3R #dispaly name
+ source: "CVPR 2024"
+ github: https://github.com/naver/mast3r
+ paper: https://arxiv.org/abs/2406.09756
+ project: https://dust3r.europe.naverlabs.com
+ display: true
DUSt3R:
# TODO: duster is under development
enable: true
@@ -395,4 +406,12 @@ matcher_zoo:
github: https://github.com/feixue94/sfd2
paper: https://arxiv.org/abs/2304.14845
project: https://feixue94.github.io/
- display: true
\ No newline at end of file
+ display: true
+
+retrieval_zoo:
+ netvlad:
+ enable: true
+ openibl:
+ enable: true
+ cosplace:
+ enable: true
diff --git a/ui/sfm.py b/ui/sfm.py
new file mode 100644
index 0000000..b13ac3d
--- /dev/null
+++ b/ui/sfm.py
@@ -0,0 +1,164 @@
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, List
+
+import pycolmap
+
+from hloc import (
+ extract_features,
+ logger,
+ match_features,
+ pairs_from_retrieval,
+ reconstruction,
+ visualization,
+)
+
+from .viz import fig2im
+
+
+class SfmEngine:
+ def __init__(self, cfg: Dict[str, Any] = None):
+ self.cfg = cfg
+ if "outputs" in cfg and Path(cfg["outputs"]):
+ outputs = Path(cfg["outputs"])
+ outputs.mkdir(parents=True, exist_ok=True)
+ else:
+ outputs = tempfile.mkdtemp()
+ self.outputs = Path(outputs)
+
+ def call(
+ self,
+ key: str,
+ images: Path,
+ camera_model: str,
+ camera_params: List[float],
+ max_keypoints: int,
+ keypoint_threshold: float,
+ match_threshold: float,
+ ransac_threshold: int,
+ ransac_confidence: float,
+ ransac_max_iter: int,
+ scene_graph: bool,
+ global_feature: str,
+ top_k: int = 10,
+ mapper_refine_focal_length: bool = False,
+ mapper_refine_principle_points: bool = False,
+ mapper_refine_extra_params: bool = False,
+ ):
+ """
+ Call a list of functions to perform feature extraction, matching, and reconstruction.
+
+ Args:
+ key (str): The key to retrieve the matcher and feature models.
+ images (Path): The directory containing the images.
+ outputs (Path): The directory to store the outputs.
+ camera_model (str): The camera model.
+ camera_params (List[float]): The camera parameters.
+ max_keypoints (int): The maximum number of features.
+ match_threshold (float): The match threshold.
+ ransac_threshold (int): The RANSAC threshold.
+ ransac_confidence (float): The RANSAC confidence.
+ ransac_max_iter (int): The maximum number of RANSAC iterations.
+ scene_graph (bool): Whether to compute the scene graph.
+ global_feature (str): Whether to compute the global feature.
+ top_k (int): The number of image-pair to use.
+ mapper_refine_focal_length (bool): Whether to refine the focal length.
+ mapper_refine_principle_points (bool): Whether to refine the principle points.
+ mapper_refine_extra_params (bool): Whether to refine the extra parameters.
+
+ Returns:
+ Path: The directory containing the SfM results.
+ """
+ if len(images) == 0:
+ logger.error(f"{images} does not exist.")
+
+ temp_images = Path(tempfile.mkdtemp())
+ # copy images
+ logger.info(f"Copying images to {temp_images}.")
+ for image in images:
+ shutil.copy(image, temp_images)
+
+ matcher_zoo = self.cfg["matcher_zoo"]
+ model = matcher_zoo[key]
+ match_conf = model["matcher"]
+ match_conf["model"]["max_keypoints"] = max_keypoints
+ match_conf["model"]["match_threshold"] = match_threshold
+
+ feature_conf = model["feature"]
+ feature_conf["model"]["max_keypoints"] = max_keypoints
+ feature_conf["model"]["keypoint_threshold"] = keypoint_threshold
+
+ # retrieval
+ retrieval_name = self.cfg.get("retrieval_name", "netvlad")
+ retrieval_conf = extract_features.confs[retrieval_name]
+
+ mapper_options = {
+ "ba_refine_extra_params": mapper_refine_extra_params,
+ "ba_refine_focal_length": mapper_refine_focal_length,
+ "ba_refine_principal_point": mapper_refine_principle_points,
+ "ba_local_max_num_iterations": 40,
+ "ba_local_max_refinements": 3,
+ "ba_global_max_num_iterations": 100,
+ # below 3 options are for individual/video data, for internet photos, they should be left
+ # default
+ "min_focal_length_ratio": 0.1,
+ "max_focal_length_ratio": 10,
+ "max_extra_param": 1e15,
+ }
+
+ sfm_dir = self.outputs / "sfm_{}".format(key)
+ sfm_pairs = self.outputs / "pairs-sfm.txt"
+ sfm_dir.mkdir(exist_ok=True, parents=True)
+
+ # extract features
+ retrieval_path = extract_features.main(
+ retrieval_conf, temp_images, self.outputs
+ )
+ pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=top_k)
+
+ feature_path = extract_features.main(
+ feature_conf, temp_images, self.outputs
+ )
+ # match features
+ match_path = match_features.main(
+ match_conf, sfm_pairs, feature_conf["output"], self.outputs
+ )
+ # reconstruction
+ already_sfm = False
+ if sfm_dir.exists():
+ try:
+ model = pycolmap.Reconstruction(str(sfm_dir))
+ already_sfm = True
+ except ValueError:
+ logger.info(f"sfm_dir not exists model: {sfm_dir}")
+ if not already_sfm:
+ model = reconstruction.main(
+ sfm_dir,
+ temp_images,
+ sfm_pairs,
+ feature_path,
+ match_path,
+ mapper_options=mapper_options,
+ )
+
+ vertices = []
+ for point3D_id, point3D in model.points3D.items():
+ vertices.append([point3D.xyz, point3D.color])
+
+ model_3d = sfm_dir / "points3D.obj"
+ with open(model_3d, "w") as f:
+ for p, c in vertices:
+ # Write vertex position
+ f.write("v {} {} {}\n".format(p[0], p[1], p[2]))
+ # Write vertex normal (color)
+ f.write(
+ "vn {} {} {}\n".format(
+ c[0] / 255.0, c[1] / 255.0, c[2] / 255.0
+ )
+ )
+ viz_2d = visualization.visualize_sfm_2d(
+ model, temp_images, color_by="visibility", n=2, dpi=300
+ )
+
+ return model_3d, fig2im(viz_2d) / 255.0