diff --git a/.gitignore b/.gitignore index 908ae94..371e266 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,8 @@ third_party/QuadTreeAttention desktop.ini *.egg-info output.pkl +experiments* +gen_example.py +datasets/lines/terrace0.JPG +datasets/lines/terrace1.JPG +datasets/South-Building* diff --git a/.gitmodules b/.gitmodules index 336af93..e4ee749 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,3 +64,6 @@ [submodule "third_party/DarkFeat"] path = third_party/DarkFeat url = https://github.com/agipro/DarkFeat.git +[submodule "third_party/mast3r"] + path = third_party/mast3r + url = https://github.com/naver/mast3r diff --git a/Dockerfile b/Dockerfile index 9501dec..863cdf8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,7 +20,7 @@ ENV PATH /opt/conda/envs/imw/bin:$PATH # Make RUN commands use the new environment SHELL ["conda", "run", "-n", "imw", "/bin/bash", "-c"] RUN pip install --upgrade pip -RUN pip install -r env-docker.txt +RUN pip install -r requirements.txt RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y # Export port diff --git a/README.md b/README.md index bd02d4b..2a3ca15 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Here is a demo of the tool: https://github.com/Vincentqyw/image-matching-webui/assets/18531182/263534692-c3484d1b-cc00-4fdc-9b31-e5b7af07ecd9 The tool currently supports various popular image matching algorithms, namely: +- [x] [MASt3R](https://github.com/naver/mast3r), CVPR 2024 - [x] [DUSt3R](https://github.com/naver/dust3r), CVPR 2024 - [x] [OmniGlue](https://github.com/Vincentqyw/omniglue-onnx), CVPR 2024 - [x] [XFeat](https://github.com/verlab/accelerated_features), CVPR 2024 diff --git a/hloc/__init__.py b/hloc/__init__.py index 7c2e3dd..cf64b08 100644 --- a/hloc/__init__.py +++ b/hloc/__init__.py @@ -3,7 +3,7 @@ import torch from packaging import version -__version__ = "1.3" +__version__ = "1.5" formatter = logging.Formatter( fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s", @@ -23,14 +23,18 @@ except ImportError: logger.warning("pycolmap is not installed, some features may not work.") else: - minimal_version = version.parse("0.3.0") - found_version = version.parse(getattr(pycolmap, "__version__")) - if found_version < minimal_version: - logger.warning( - "hloc now requires pycolmap>=%s but found pycolmap==%s, " - "please upgrade with `pip install --upgrade pycolmap`", - minimal_version, - found_version, - ) + min_version = version.parse("0.6.0") + found_version = pycolmap.__version__ + if found_version != "dev": + version = version.parse(found_version) + if version < min_version: + s = f"pycolmap>={min_version}" + logger.warning( + "hloc requires %s but found pycolmap==%s, " + 'please upgrade with `pip install --upgrade "%s"`', + s, + found_version, + s, + ) DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") diff --git a/hloc/colmap_from_nvm.py b/hloc/colmap_from_nvm.py new file mode 100644 index 0000000..1f3ad89 --- /dev/null +++ b/hloc/colmap_from_nvm.py @@ -0,0 +1,220 @@ +import argparse +import sqlite3 +from collections import defaultdict +from pathlib import Path + +import numpy as np +from tqdm import tqdm + +from . import logger +from .utils.read_write_model import ( + CAMERA_MODEL_NAMES, + Camera, + Image, + Point3D, + write_model, +) + + +def recover_database_images_and_ids(database_path): + images = {} + cameras = {} + db = sqlite3.connect(str(database_path)) + ret = db.execute("SELECT name, image_id, camera_id FROM images;") + for name, image_id, camera_id in ret: + images[name] = image_id + cameras[name] = camera_id + db.close() + logger.info( + f"Found {len(images)} images and {len(cameras)} cameras in database." + ) + return images, cameras + + +def quaternion_to_rotation_matrix(qvec): + qvec = qvec / np.linalg.norm(qvec) + w, x, y, z = qvec + R = np.array( + [ + [ + 1 - 2 * y * y - 2 * z * z, + 2 * x * y - 2 * z * w, + 2 * x * z + 2 * y * w, + ], + [ + 2 * x * y + 2 * z * w, + 1 - 2 * x * x - 2 * z * z, + 2 * y * z - 2 * x * w, + ], + [ + 2 * x * z - 2 * y * w, + 2 * y * z + 2 * x * w, + 1 - 2 * x * x - 2 * y * y, + ], + ] + ) + return R + + +def camera_center_to_translation(c, qvec): + R = quaternion_to_rotation_matrix(qvec) + return (-1) * np.matmul(R, c) + + +def read_nvm_model( + nvm_path, intrinsics_path, image_ids, camera_ids, skip_points=False +): + with open(intrinsics_path, "r") as f: + raw_intrinsics = f.readlines() + + logger.info(f"Reading {len(raw_intrinsics)} cameras...") + cameras = {} + for intrinsics in raw_intrinsics: + intrinsics = intrinsics.strip("\n").split(" ") + name, camera_model, width, height = intrinsics[:4] + params = [float(p) for p in intrinsics[4:]] + camera_model = CAMERA_MODEL_NAMES[camera_model] + assert len(params) == camera_model.num_params + camera_id = camera_ids[name] + camera = Camera( + id=camera_id, + model=camera_model.model_name, + width=int(width), + height=int(height), + params=params, + ) + cameras[camera_id] = camera + + nvm_f = open(nvm_path, "r") + line = nvm_f.readline() + while line == "\n" or line.startswith("NVM_V3"): + line = nvm_f.readline() + num_images = int(line) + assert num_images == len(cameras) + + logger.info(f"Reading {num_images} images...") + image_idx_to_db_image_id = [] + image_data = [] + i = 0 + while i < num_images: + line = nvm_f.readline() + if line == "\n": + continue + data = line.strip("\n").split(" ") + image_data.append(data) + image_idx_to_db_image_id.append(image_ids[data[0]]) + i += 1 + + line = nvm_f.readline() + while line == "\n": + line = nvm_f.readline() + num_points = int(line) + + if skip_points: + logger.info(f"Skipping {num_points} points.") + num_points = 0 + else: + logger.info(f"Reading {num_points} points...") + points3D = {} + image_idx_to_keypoints = defaultdict(list) + i = 0 + pbar = tqdm(total=num_points, unit="pts") + while i < num_points: + line = nvm_f.readline() + if line == "\n": + continue + + data = line.strip("\n").split(" ") + x, y, z, r, g, b, num_observations = data[:7] + obs_image_ids, point2D_idxs = [], [] + for j in range(int(num_observations)): + s = 7 + 4 * j + img_index, kp_index, kx, ky = data[s : s + 4] + image_idx_to_keypoints[int(img_index)].append( + (int(kp_index), float(kx), float(ky), i) + ) + db_image_id = image_idx_to_db_image_id[int(img_index)] + obs_image_ids.append(db_image_id) + point2D_idxs.append(kp_index) + + point = Point3D( + id=i, + xyz=np.array([x, y, z], float), + rgb=np.array([r, g, b], int), + error=1.0, # fake + image_ids=np.array(obs_image_ids, int), + point2D_idxs=np.array(point2D_idxs, int), + ) + points3D[i] = point + + i += 1 + pbar.update(1) + pbar.close() + + logger.info("Parsing image data...") + images = {} + for i, data in enumerate(image_data): + # Skip the focal length. Skip the distortion and terminal 0. + name, _, qw, qx, qy, qz, cx, cy, cz, _, _ = data + qvec = np.array([qw, qx, qy, qz], float) + c = np.array([cx, cy, cz], float) + t = camera_center_to_translation(c, qvec) + + if i in image_idx_to_keypoints: + # NVM only stores triangulated 2D keypoints: add dummy ones + keypoints = image_idx_to_keypoints[i] + point2D_idxs = np.array([d[0] for d in keypoints]) + tri_xys = np.array([[x, y] for _, x, y, _ in keypoints]) + tri_ids = np.array([i for _, _, _, i in keypoints]) + + num_2Dpoints = max(point2D_idxs) + 1 + xys = np.zeros((num_2Dpoints, 2), float) + point3D_ids = np.full(num_2Dpoints, -1, int) + xys[point2D_idxs] = tri_xys + point3D_ids[point2D_idxs] = tri_ids + else: + xys = np.zeros((0, 2), float) + point3D_ids = np.full(0, -1, int) + + image_id = image_ids[name] + image = Image( + id=image_id, + qvec=qvec, + tvec=t, + camera_id=camera_ids[name], + name=name, + xys=xys, + point3D_ids=point3D_ids, + ) + images[image_id] = image + + return cameras, images, points3D + + +def main(nvm, intrinsics, database, output, skip_points=False): + assert nvm.exists(), nvm + assert intrinsics.exists(), intrinsics + assert database.exists(), database + + image_ids, camera_ids = recover_database_images_and_ids(database) + + logger.info("Reading the NVM model...") + model = read_nvm_model( + nvm, intrinsics, image_ids, camera_ids, skip_points=skip_points + ) + + logger.info("Writing the COLMAP model...") + output.mkdir(exist_ok=True, parents=True) + write_model(*model, path=str(output), ext=".bin") + logger.info("Done.") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--nvm", required=True, type=Path) + parser.add_argument("--intrinsics", required=True, type=Path) + parser.add_argument("--database", required=True, type=Path) + parser.add_argument("--output", required=True, type=Path) + parser.add_argument("--skip_points", action="store_true") + args = parser.parse_args() + main(**args.__dict__) diff --git a/hloc/extract_features.py b/hloc/extract_features.py index 1e66f03..62eeb45 100644 --- a/hloc/extract_features.py +++ b/hloc/extract_features.py @@ -329,6 +329,11 @@ "model": {"name": "cosplace"}, "preprocessing": {"resize_max": 1024}, }, + "eigenplaces": { + "output": "global-feats-eigenplaces", + "model": {"name": "eigenplaces"}, + "preprocessing": {"resize_max": 1024}, + }, } diff --git a/hloc/extractors/eigenplaces.py b/hloc/extractors/eigenplaces.py new file mode 100644 index 0000000..fd9953b --- /dev/null +++ b/hloc/extractors/eigenplaces.py @@ -0,0 +1,57 @@ +""" +Code for loading models trained with EigenPlaces (or CosPlace) as a global +features extractor for geolocalization through image retrieval. +Multiple models are available with different backbones. Below is a summary of +models available (backbone : list of available output descriptors +dimensionality). For example you can use a model based on a ResNet50 with +descriptors dimensionality 1024. + +EigenPlaces trained models: + ResNet18: [ 256, 512] + ResNet50: [128, 256, 512, 2048] + ResNet101: [128, 256, 512, 2048] + VGG16: [ 512] + +CosPlace trained models: + ResNet18: [32, 64, 128, 256, 512] + ResNet50: [32, 64, 128, 256, 512, 1024, 2048] + ResNet101: [32, 64, 128, 256, 512, 1024, 2048] + ResNet152: [32, 64, 128, 256, 512, 1024, 2048] + VGG16: [ 64, 128, 256, 512] + +EigenPlaces paper (ICCV 2023): https://arxiv.org/abs/2308.10832 +CosPlace paper (CVPR 2022): https://arxiv.org/abs/2204.02287 +""" + +import torch +import torchvision.transforms as tvf + +from ..utils.base_model import BaseModel + + +class EigenPlaces(BaseModel): + default_conf = { + "variant": "EigenPlaces", + "backbone": "ResNet101", + "fc_output_dim": 2048, + } + required_inputs = ["image"] + + def _init(self, conf): + self.net = torch.hub.load( + "gmberton/" + conf["variant"], + "get_trained_model", + backbone=conf["backbone"], + fc_output_dim=conf["fc_output_dim"], + ).eval() + + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + self.norm_rgb = tvf.Normalize(mean=mean, std=std) + + def _forward(self, data): + image = self.norm_rgb(data["image"]) + desc = self.net(image) + return { + "global_descriptor": desc, + } diff --git a/hloc/localize_inloc.py b/hloc/localize_inloc.py new file mode 100644 index 0000000..1e003b1 --- /dev/null +++ b/hloc/localize_inloc.py @@ -0,0 +1,183 @@ +import argparse +import pickle +from pathlib import Path + +import cv2 +import h5py +import numpy as np +import pycolmap +import torch +from scipy.io import loadmat +from tqdm import tqdm + +from . import logger +from .utils.parsers import names_to_pair, parse_retrieval + + +def interpolate_scan(scan, kp): + h, w, c = scan.shape + kp = kp / np.array([[w - 1, h - 1]]) * 2 - 1 + assert np.all(kp > -1) and np.all(kp < 1) + scan = torch.from_numpy(scan).permute(2, 0, 1)[None] + kp = torch.from_numpy(kp)[None, None] + grid_sample = torch.nn.functional.grid_sample + + # To maximize the number of points that have depth: + # do bilinear interpolation first and then nearest for the remaining points + interp_lin = grid_sample(scan, kp, align_corners=True, mode="bilinear")[ + 0, :, 0 + ] + interp_nn = torch.nn.functional.grid_sample( + scan, kp, align_corners=True, mode="nearest" + )[0, :, 0] + interp = torch.where(torch.isnan(interp_lin), interp_nn, interp_lin) + valid = ~torch.any(torch.isnan(interp), 0) + + kp3d = interp.T.numpy() + valid = valid.numpy() + return kp3d, valid + + +def get_scan_pose(dataset_dir, rpath): + split_image_rpath = rpath.split("/") + floor_name = split_image_rpath[-3] + scan_id = split_image_rpath[-2] + image_name = split_image_rpath[-1] + building_name = image_name[:3] + + path = Path( + dataset_dir, + "database/alignments", + floor_name, + f"transformations/{building_name}_trans_{scan_id}.txt", + ) + with open(path) as f: + raw_lines = f.readlines() + + P_after_GICP = np.array( + [ + np.fromstring(raw_lines[7], sep=" "), + np.fromstring(raw_lines[8], sep=" "), + np.fromstring(raw_lines[9], sep=" "), + np.fromstring(raw_lines[10], sep=" "), + ] + ) + + return P_after_GICP + + +def pose_from_cluster( + dataset_dir, q, retrieved, feature_file, match_file, skip=None +): + height, width = cv2.imread(str(dataset_dir / q)).shape[:2] + cx = 0.5 * width + cy = 0.5 * height + focal_length = 4032.0 * 28.0 / 36.0 + + all_mkpq = [] + all_mkpr = [] + all_mkp3d = [] + all_indices = [] + kpq = feature_file[q]["keypoints"].__array__() + num_matches = 0 + + for i, r in enumerate(retrieved): + kpr = feature_file[r]["keypoints"].__array__() + pair = names_to_pair(q, r) + m = match_file[pair]["matches0"].__array__() + v = m > -1 + + if skip and (np.count_nonzero(v) < skip): + continue + + mkpq, mkpr = kpq[v], kpr[m[v]] + num_matches += len(mkpq) + + scan_r = loadmat(Path(dataset_dir, r + ".mat"))["XYZcut"] + mkp3d, valid = interpolate_scan(scan_r, mkpr) + Tr = get_scan_pose(dataset_dir, r) + mkp3d = (Tr[:3, :3] @ mkp3d.T + Tr[:3, -1:]).T + + all_mkpq.append(mkpq[valid]) + all_mkpr.append(mkpr[valid]) + all_mkp3d.append(mkp3d[valid]) + all_indices.append(np.full(np.count_nonzero(valid), i)) + + all_mkpq = np.concatenate(all_mkpq, 0) + all_mkpr = np.concatenate(all_mkpr, 0) + all_mkp3d = np.concatenate(all_mkp3d, 0) + all_indices = np.concatenate(all_indices, 0) + + cfg = { + "model": "SIMPLE_PINHOLE", + "width": width, + "height": height, + "params": [focal_length, cx, cy], + } + ret = pycolmap.absolute_pose_estimation(all_mkpq, all_mkp3d, cfg, 48.00) + ret["cfg"] = cfg + return ret, all_mkpq, all_mkpr, all_mkp3d, all_indices, num_matches + + +def main(dataset_dir, retrieval, features, matches, results, skip_matches=None): + assert retrieval.exists(), retrieval + assert features.exists(), features + assert matches.exists(), matches + + retrieval_dict = parse_retrieval(retrieval) + queries = list(retrieval_dict.keys()) + + feature_file = h5py.File(features, "r", libver="latest") + match_file = h5py.File(matches, "r", libver="latest") + + poses = {} + logs = { + "features": features, + "matches": matches, + "retrieval": retrieval, + "loc": {}, + } + logger.info("Starting localization...") + for q in tqdm(queries): + db = retrieval_dict[q] + ret, mkpq, mkpr, mkp3d, indices, num_matches = pose_from_cluster( + dataset_dir, q, db, feature_file, match_file, skip_matches + ) + + poses[q] = (ret["qvec"], ret["tvec"]) + logs["loc"][q] = { + "db": db, + "PnP_ret": ret, + "keypoints_query": mkpq, + "keypoints_db": mkpr, + "3d_points": mkp3d, + "indices_db": indices, + "num_matches": num_matches, + } + + logger.info(f"Writing poses to {results}...") + with open(results, "w") as f: + for q in queries: + qvec, tvec = poses[q] + qvec = " ".join(map(str, qvec)) + tvec = " ".join(map(str, tvec)) + name = q.split("/")[-1] + f.write(f"{name} {qvec} {tvec}\n") + + logs_path = f"{results}_logs.pkl" + logger.info(f"Writing logs to {logs_path}...") + with open(logs_path, "wb") as f: + pickle.dump(logs, f) + logger.info("Done!") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--dataset_dir", type=Path, required=True) + parser.add_argument("--retrieval", type=Path, required=True) + parser.add_argument("--features", type=Path, required=True) + parser.add_argument("--matches", type=Path, required=True) + parser.add_argument("--results", type=Path, required=True) + parser.add_argument("--skip_matches", type=int) + args = parser.parse_args() + main(**args.__dict__) diff --git a/hloc/localize_sfm.py b/hloc/localize_sfm.py new file mode 100644 index 0000000..a1cb672 --- /dev/null +++ b/hloc/localize_sfm.py @@ -0,0 +1,247 @@ +import argparse +import pickle +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Union + +import numpy as np +import pycolmap +from tqdm import tqdm + +from . import logger +from .utils.io import get_keypoints, get_matches +from .utils.parsers import parse_image_lists, parse_retrieval + + +def do_covisibility_clustering( + frame_ids: List[int], reconstruction: pycolmap.Reconstruction +): + clusters = [] + visited = set() + for frame_id in frame_ids: + # Check if already labeled + if frame_id in visited: + continue + + # New component + clusters.append([]) + queue = {frame_id} + while len(queue): + exploration_frame = queue.pop() + + # Already part of the component + if exploration_frame in visited: + continue + visited.add(exploration_frame) + clusters[-1].append(exploration_frame) + + observed = reconstruction.images[exploration_frame].points2D + connected_frames = { + obs.image_id + for p2D in observed + if p2D.has_point3D() + for obs in reconstruction.points3D[ + p2D.point3D_id + ].track.elements + } + connected_frames &= set(frame_ids) + connected_frames -= visited + queue |= connected_frames + + clusters = sorted(clusters, key=len, reverse=True) + return clusters + + +class QueryLocalizer: + def __init__(self, reconstruction, config=None): + self.reconstruction = reconstruction + self.config = config or {} + + def localize(self, points2D_all, points2D_idxs, points3D_id, query_camera): + points2D = points2D_all[points2D_idxs] + points3D = [self.reconstruction.points3D[j].xyz for j in points3D_id] + ret = pycolmap.absolute_pose_estimation( + points2D, + points3D, + query_camera, + estimation_options=self.config.get("estimation", {}), + refinement_options=self.config.get("refinement", {}), + ) + return ret + + +def pose_from_cluster( + localizer: QueryLocalizer, + qname: str, + query_camera: pycolmap.Camera, + db_ids: List[int], + features_path: Path, + matches_path: Path, + **kwargs, +): + kpq = get_keypoints(features_path, qname) + kpq += 0.5 # COLMAP coordinates + + kp_idx_to_3D = defaultdict(list) + kp_idx_to_3D_to_db = defaultdict(lambda: defaultdict(list)) + num_matches = 0 + for i, db_id in enumerate(db_ids): + image = localizer.reconstruction.images[db_id] + if image.num_points3D == 0: + logger.debug(f"No 3D points found for {image.name}.") + continue + points3D_ids = np.array( + [p.point3D_id if p.has_point3D() else -1 for p in image.points2D] + ) + + matches, _ = get_matches(matches_path, qname, image.name) + matches = matches[points3D_ids[matches[:, 1]] != -1] + num_matches += len(matches) + for idx, m in matches: + id_3D = points3D_ids[m] + kp_idx_to_3D_to_db[idx][id_3D].append(i) + # avoid duplicate observations + if id_3D not in kp_idx_to_3D[idx]: + kp_idx_to_3D[idx].append(id_3D) + + idxs = list(kp_idx_to_3D.keys()) + mkp_idxs = [i for i in idxs for _ in kp_idx_to_3D[i]] + mp3d_ids = [j for i in idxs for j in kp_idx_to_3D[i]] + ret = localizer.localize(kpq, mkp_idxs, mp3d_ids, query_camera, **kwargs) + if ret is not None: + ret["camera"] = query_camera + + # mostly for logging and post-processing + mkp_to_3D_to_db = [ + (j, kp_idx_to_3D_to_db[i][j]) for i in idxs for j in kp_idx_to_3D[i] + ] + log = { + "db": db_ids, + "PnP_ret": ret, + "keypoints_query": kpq[mkp_idxs], + "points3D_ids": mp3d_ids, + "points3D_xyz": None, # we don't log xyz anymore because of file size + "num_matches": num_matches, + "keypoint_index_to_db": (mkp_idxs, mkp_to_3D_to_db), + } + return ret, log + + +def main( + reference_sfm: Union[Path, pycolmap.Reconstruction], + queries: Path, + retrieval: Path, + features: Path, + matches: Path, + results: Path, + ransac_thresh: int = 12, + covisibility_clustering: bool = False, + prepend_camera_name: bool = False, + config: Dict = None, +): + assert retrieval.exists(), retrieval + assert features.exists(), features + assert matches.exists(), matches + + queries = parse_image_lists(queries, with_intrinsics=True) + retrieval_dict = parse_retrieval(retrieval) + + logger.info("Reading the 3D model...") + if not isinstance(reference_sfm, pycolmap.Reconstruction): + reference_sfm = pycolmap.Reconstruction(reference_sfm) + db_name_to_id = {img.name: i for i, img in reference_sfm.images.items()} + + config = { + "estimation": {"ransac": {"max_error": ransac_thresh}}, + **(config or {}), + } + localizer = QueryLocalizer(reference_sfm, config) + + cam_from_world = {} + logs = { + "features": features, + "matches": matches, + "retrieval": retrieval, + "loc": {}, + } + logger.info("Starting localization...") + for qname, qcam in tqdm(queries): + if qname not in retrieval_dict: + logger.warning( + f"No images retrieved for query image {qname}. Skipping..." + ) + continue + db_names = retrieval_dict[qname] + db_ids = [] + for n in db_names: + if n not in db_name_to_id: + logger.warning(f"Image {n} was retrieved but not in database") + continue + db_ids.append(db_name_to_id[n]) + + if covisibility_clustering: + clusters = do_covisibility_clustering(db_ids, reference_sfm) + best_inliers = 0 + best_cluster = None + logs_clusters = [] + for i, cluster_ids in enumerate(clusters): + ret, log = pose_from_cluster( + localizer, qname, qcam, cluster_ids, features, matches + ) + if ret is not None and ret["num_inliers"] > best_inliers: + best_cluster = i + best_inliers = ret["num_inliers"] + logs_clusters.append(log) + if best_cluster is not None: + ret = logs_clusters[best_cluster]["PnP_ret"] + cam_from_world[qname] = ret["cam_from_world"] + logs["loc"][qname] = { + "db": db_ids, + "best_cluster": best_cluster, + "log_clusters": logs_clusters, + "covisibility_clustering": covisibility_clustering, + } + else: + ret, log = pose_from_cluster( + localizer, qname, qcam, db_ids, features, matches + ) + if ret is not None: + cam_from_world[qname] = ret["cam_from_world"] + else: + closest = reference_sfm.images[db_ids[0]] + cam_from_world[qname] = closest.cam_from_world + log["covisibility_clustering"] = covisibility_clustering + logs["loc"][qname] = log + + logger.info(f"Localized {len(cam_from_world)} / {len(queries)} images.") + logger.info(f"Writing poses to {results}...") + with open(results, "w") as f: + for query, t in cam_from_world.items(): + qvec = " ".join(map(str, t.rotation.quat[[3, 0, 1, 2]])) + tvec = " ".join(map(str, t.translation)) + name = query.split("/")[-1] + if prepend_camera_name: + name = query.split("/")[-2] + "/" + name + f.write(f"{name} {qvec} {tvec}\n") + + logs_path = f"{results}_logs.pkl" + logger.info(f"Writing logs to {logs_path}...") + # TODO: Resolve pickling issue with pycolmap objects. + with open(logs_path, "wb") as f: + pickle.dump(logs, f) + logger.info("Done!") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--reference_sfm", type=Path, required=True) + parser.add_argument("--queries", type=Path, required=True) + parser.add_argument("--features", type=Path, required=True) + parser.add_argument("--matches", type=Path, required=True) + parser.add_argument("--retrieval", type=Path, required=True) + parser.add_argument("--results", type=Path, required=True) + parser.add_argument("--ransac_thresh", type=float, default=12.0) + parser.add_argument("--covisibility_clustering", action="store_true") + parser.add_argument("--prepend_camera_name", action="store_true") + args = parser.parse_args() + main(**args.__dict__) diff --git a/hloc/match_dense.py b/hloc/match_dense.py index ac95d93..e1253d7 100644 --- a/hloc/match_dense.py +++ b/hloc/match_dense.py @@ -1,11 +1,25 @@ +import argparse +import pprint +from collections import Counter, defaultdict +from itertools import chain +from pathlib import Path from types import SimpleNamespace +from typing import Dict, Iterable, List, Optional, Set, Tuple, Union import cv2 +import h5py import numpy as np import torch import torchvision.transforms.functional as F +from scipy.spatial import KDTree +from tqdm import tqdm +from . import logger, matchers from .extract_features import read_image, resize_image +from .match_features import find_unique_new_pairs +from .utils.base_model import dynamic_load +from .utils.io import list_h5_names +from .utils.parsers import names_to_pair, parse_retrieval device = "cuda" if torch.cuda.is_available() else "cpu" @@ -144,6 +158,20 @@ "dfactor": 16, }, }, + "mast3r": { + "output": "matches-mast3r", + "model": { + "name": "mast3r", + "weights": "vit_large", + "max_keypoints": 2000, + "match_threshold": 0.2, + }, + "preprocessing": { + "grayscale": False, + "resize_max": 512, + "dfactor": 16, + }, + }, "xfeat_dense": { "output": "matches-xfeat_dense", "model": { @@ -262,12 +290,483 @@ } +def to_cpts(kpts, ps): + if ps > 0.0: + kpts = np.round(np.round((kpts + 0.5) / ps) * ps - 0.5, 2) + return [tuple(cpt) for cpt in kpts] + + +def assign_keypoints( + kpts: np.ndarray, + other_cpts: Union[List[Tuple], np.ndarray], + max_error: float, + update: bool = False, + ref_bins: Optional[List[Counter]] = None, + scores: Optional[np.ndarray] = None, + cell_size: Optional[int] = None, +): + if not update: + # Without update this is just a NN search + if len(other_cpts) == 0 or len(kpts) == 0: + return np.full(len(kpts), -1) + dist, kpt_ids = KDTree(np.array(other_cpts)).query(kpts) + valid = dist <= max_error + kpt_ids[~valid] = -1 + return kpt_ids + else: + ps = cell_size if cell_size is not None else max_error + ps = max(ps, max_error) + # With update we quantize and bin (optionally) + assert isinstance(other_cpts, list) + kpt_ids = [] + cpts = to_cpts(kpts, ps) + bpts = to_cpts(kpts, int(max_error)) + cp_to_id = {val: i for i, val in enumerate(other_cpts)} + for i, (cpt, bpt) in enumerate(zip(cpts, bpts)): + try: + kid = cp_to_id[cpt] + except KeyError: + kid = len(cp_to_id) + cp_to_id[cpt] = kid + other_cpts.append(cpt) + if ref_bins is not None: + ref_bins.append(Counter()) + if ref_bins is not None: + score = scores[i] if scores is not None else 1 + ref_bins[cp_to_id[cpt]][bpt] += score + kpt_ids.append(kid) + return np.array(kpt_ids) + + +def get_grouped_ids(array): + # Group array indices based on its values + # all duplicates are grouped as a set + idx_sort = np.argsort(array) + sorted_array = array[idx_sort] + _, ids, _ = np.unique(sorted_array, return_counts=True, return_index=True) + res = np.split(idx_sort, ids[1:]) + return res + + +def get_unique_matches(match_ids, scores): + if len(match_ids.shape) == 1: + return [0] + + isets1 = get_grouped_ids(match_ids[:, 0]) + isets2 = get_grouped_ids(match_ids[:, 1]) + uid1s = [ids[scores[ids].argmax()] for ids in isets1 if len(ids) > 0] + uid2s = [ids[scores[ids].argmax()] for ids in isets2 if len(ids) > 0] + uids = list(set(uid1s).intersection(uid2s)) + return match_ids[uids], scores[uids] + + +def matches_to_matches0(matches, scores): + if len(matches) == 0: + return np.zeros(0, dtype=np.int32), np.zeros(0, dtype=np.float16) + n_kps0 = np.max(matches[:, 0]) + 1 + matches0 = -np.ones((n_kps0,)) + scores0 = np.zeros((n_kps0,)) + matches0[matches[:, 0]] = matches[:, 1] + scores0[matches[:, 0]] = scores + return matches0.astype(np.int32), scores0.astype(np.float16) + + +def kpids_to_matches0(kpt_ids0, kpt_ids1, scores): + valid = (kpt_ids0 != -1) & (kpt_ids1 != -1) + matches = np.dstack([kpt_ids0[valid], kpt_ids1[valid]]) + matches = matches.reshape(-1, 2) + scores = scores[valid] + + # Remove n-to-1 matches + matches, scores = get_unique_matches(matches, scores) + return matches_to_matches0(matches, scores) + + def scale_keypoints(kpts, scale): if np.any(scale != 1.0): kpts *= kpts.new_tensor(scale) return kpts +class ImagePairDataset(torch.utils.data.Dataset): + default_conf = { + "grayscale": True, + "resize_max": 1024, + "dfactor": 8, + "cache_images": False, + } + + def __init__(self, image_dir, conf, pairs): + self.image_dir = image_dir + self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf}) + self.pairs = pairs + if self.conf.cache_images: + image_names = set(sum(pairs, ())) # unique image names in pairs + logger.info( + f"Loading and caching {len(image_names)} unique images." + ) + self.images = {} + self.scales = {} + for name in tqdm(image_names): + image = read_image(self.image_dir / name, self.conf.grayscale) + self.images[name], self.scales[name] = self.preprocess(image) + + def preprocess(self, image: np.ndarray): + image = image.astype(np.float32, copy=False) + size = image.shape[:2][::-1] + scale = np.array([1.0, 1.0]) + + if self.conf.resize_max: + scale = self.conf.resize_max / max(size) + if scale < 1.0: + size_new = tuple(int(round(x * scale)) for x in size) + image = resize_image(image, size_new, "cv2_area") + scale = np.array(size) / np.array(size_new) + + if self.conf.grayscale: + assert image.ndim == 2, image.shape + image = image[None] + else: + image = image.transpose((2, 0, 1)) # HxWxC to CxHxW + image = torch.from_numpy(image / 255.0).float() + + # assure that the size is divisible by dfactor + size_new = tuple( + map( + lambda x: int(x // self.conf.dfactor * self.conf.dfactor), + image.shape[-2:], + ) + ) + image = F.resize(image, size=size_new) + scale = np.array(size) / np.array(size_new)[::-1] + return image, scale + + def __len__(self): + return len(self.pairs) + + def __getitem__(self, idx): + name0, name1 = self.pairs[idx] + if self.conf.cache_images: + image0, scale0 = self.images[name0], self.scales[name0] + image1, scale1 = self.images[name1], self.scales[name1] + else: + image0 = read_image(self.image_dir / name0, self.conf.grayscale) + image1 = read_image(self.image_dir / name1, self.conf.grayscale) + image0, scale0 = self.preprocess(image0) + image1, scale1 = self.preprocess(image1) + return image0, image1, scale0, scale1, name0, name1 + + +@torch.no_grad() +def match_dense( + conf: Dict, + pairs: List[Tuple[str, str]], + image_dir: Path, + match_path: Path, # out + existing_refs: Optional[List] = [], +): + device = "cuda" if torch.cuda.is_available() else "cpu" + Model = dynamic_load(matchers, conf["model"]["name"]) + model = Model(conf["model"]).eval().to(device) + + dataset = ImagePairDataset(image_dir, conf["preprocessing"], pairs) + loader = torch.utils.data.DataLoader( + dataset, num_workers=16, batch_size=1, shuffle=False + ) + + logger.info("Performing dense matching...") + with h5py.File(str(match_path), "a") as fd: + for data in tqdm(loader, smoothing=0.1): + # load image-pair data + image0, image1, scale0, scale1, (name0,), (name1,) = data + scale0, scale1 = scale0[0].numpy(), scale1[0].numpy() + image0, image1 = image0.to(device), image1.to(device) + + # match semi-dense + # for consistency with pairs_from_*: refine kpts of image0 + if name0 in existing_refs: + # special case: flip to enable refinement in query image + pred = model({"image0": image1, "image1": image0}) + pred = { + **pred, + "keypoints0": pred["keypoints1"], + "keypoints1": pred["keypoints0"], + } + else: + # usual case + pred = model({"image0": image0, "image1": image1}) + + # Rescale keypoints and move to cpu + kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"] + kpts0 = scale_keypoints(kpts0 + 0.5, scale0) - 0.5 + kpts1 = scale_keypoints(kpts1 + 0.5, scale1) - 0.5 + kpts0 = kpts0.cpu().numpy() + kpts1 = kpts1.cpu().numpy() + scores = pred["scores"].cpu().numpy() + + # Write matches and matching scores in hloc format + pair = names_to_pair(name0, name1) + if pair in fd: + del fd[pair] + grp = fd.create_group(pair) + + # Write dense matching output + grp.create_dataset("keypoints0", data=kpts0) + grp.create_dataset("keypoints1", data=kpts1) + grp.create_dataset("scores", data=scores) + del model, loader + + +# default: quantize all! +def load_keypoints( + conf: Dict, feature_paths_refs: List[Path], quantize: Optional[set] = None +): + name2ref = { + n: i for i, p in enumerate(feature_paths_refs) for n in list_h5_names(p) + } + + existing_refs = set(name2ref.keys()) + if quantize is None: + quantize = existing_refs # quantize all + if len(existing_refs) > 0: + logger.info(f"Loading keypoints from {len(existing_refs)} images.") + + # Load query keypoints + cpdict = defaultdict(list) + bindict = defaultdict(list) + for name in existing_refs: + with h5py.File(str(feature_paths_refs[name2ref[name]]), "r") as fd: + kps = fd[name]["keypoints"].__array__() + if name not in quantize: + cpdict[name] = kps + else: + if "scores" in fd[name].keys(): + kp_scores = fd[name]["scores"].__array__() + else: + # we set the score to 1.0 if not provided + # increase for more weight on reference keypoints for + # stronger anchoring + kp_scores = [1.0 for _ in range(kps.shape[0])] + # bin existing keypoints of reference images for association + assign_keypoints( + kps, + cpdict[name], + conf["max_error"], + True, + bindict[name], + kp_scores, + conf["cell_size"], + ) + return cpdict, bindict + + +def aggregate_matches( + conf: Dict, + pairs: List[Tuple[str, str]], + match_path: Path, + feature_path: Path, + required_queries: Optional[Set[str]] = None, + max_kps: Optional[int] = None, + cpdict: Dict[str, Iterable] = defaultdict(list), + bindict: Dict[str, List[Counter]] = defaultdict(list), +): + if required_queries is None: + required_queries = set(sum(pairs, ())) + # default: do not overwrite existing features in feature_path! + required_queries -= set(list_h5_names(feature_path)) + + # if an entry in cpdict is provided as np.ndarray we assume it is fixed + required_queries -= set( + [k for k, v in cpdict.items() if isinstance(v, np.ndarray)] + ) + + # sort pairs for reduced RAM + pairs_per_q = Counter(list(chain(*pairs))) + pairs_score = [min(pairs_per_q[i], pairs_per_q[j]) for i, j in pairs] + pairs = [p for _, p in sorted(zip(pairs_score, pairs))] + + if len(required_queries) > 0: + logger.info( + f"Aggregating keypoints for {len(required_queries)} images." + ) + n_kps = 0 + with h5py.File(str(match_path), "a") as fd: + for name0, name1 in tqdm(pairs, smoothing=0.1): + pair = names_to_pair(name0, name1) + grp = fd[pair] + kpts0 = grp["keypoints0"].__array__() + kpts1 = grp["keypoints1"].__array__() + scores = grp["scores"].__array__() + + # Aggregate local features + update0 = name0 in required_queries + update1 = name1 in required_queries + + # in localization we do not want to bin the query kp + # assumes that the query is name0! + if update0 and not update1 and max_kps is None: + max_error0 = cell_size0 = 0.0 + else: + max_error0 = conf["max_error"] + cell_size0 = conf["cell_size"] + + # Get match ids and extend query keypoints (cpdict) + mkp_ids0 = assign_keypoints( + kpts0, + cpdict[name0], + max_error0, + update0, + bindict[name0], + scores, + cell_size0, + ) + mkp_ids1 = assign_keypoints( + kpts1, + cpdict[name1], + conf["max_error"], + update1, + bindict[name1], + scores, + conf["cell_size"], + ) + + # Build matches from assignments + matches0, scores0 = kpids_to_matches0(mkp_ids0, mkp_ids1, scores) + + assert kpts0.shape[0] == scores.shape[0] + grp.create_dataset("matches0", data=matches0) + grp.create_dataset("matching_scores0", data=scores0) + + # Convert bins to kps if finished, and store them + for name in (name0, name1): + pairs_per_q[name] -= 1 + if pairs_per_q[name] > 0 or name not in required_queries: + continue + kp_score = [c.most_common(1)[0][1] for c in bindict[name]] + cpdict[name] = [c.most_common(1)[0][0] for c in bindict[name]] + cpdict[name] = np.array(cpdict[name], dtype=np.float32) + + # Select top-k query kps by score (reassign matches later) + if max_kps: + top_k = min(max_kps, cpdict[name].shape[0]) + top_k = np.argsort(kp_score)[::-1][:top_k] + cpdict[name] = cpdict[name][top_k] + kp_score = np.array(kp_score)[top_k] + + # Write query keypoints + with h5py.File(feature_path, "a") as kfd: + if name in kfd: + del kfd[name] + kgrp = kfd.create_group(name) + kgrp.create_dataset("keypoints", data=cpdict[name]) + kgrp.create_dataset("score", data=kp_score) + n_kps += cpdict[name].shape[0] + del bindict[name] + + if len(required_queries) > 0: + avg_kp_per_image = round(n_kps / len(required_queries), 1) + logger.info( + f"Finished assignment, found {avg_kp_per_image} " + f"keypoints/image (avg.), total {n_kps}." + ) + return cpdict + + +def assign_matches( + pairs: List[Tuple[str, str]], + match_path: Path, + keypoints: Union[List[Path], Dict[str, np.array]], + max_error: float, +): + if isinstance(keypoints, list): + keypoints = load_keypoints({}, keypoints, kpts_as_bin=set([])) + assert len(set(sum(pairs, ())) - set(keypoints.keys())) == 0 + with h5py.File(str(match_path), "a") as fd: + for name0, name1 in tqdm(pairs): + pair = names_to_pair(name0, name1) + grp = fd[pair] + kpts0 = grp["keypoints0"].__array__() + kpts1 = grp["keypoints1"].__array__() + scores = grp["scores"].__array__() + + # NN search across cell boundaries + mkp_ids0 = assign_keypoints(kpts0, keypoints[name0], max_error) + mkp_ids1 = assign_keypoints(kpts1, keypoints[name1], max_error) + + matches0, scores0 = kpids_to_matches0(mkp_ids0, mkp_ids1, scores) + + # overwrite matches0 and matching_scores0 + del grp["matches0"], grp["matching_scores0"] + grp.create_dataset("matches0", data=matches0) + grp.create_dataset("matching_scores0", data=scores0) + + +@torch.no_grad() +def match_and_assign( + conf: Dict, + pairs_path: Path, + image_dir: Path, + match_path: Path, # out + feature_path_q: Path, # out + feature_paths_refs: Optional[List[Path]] = [], + max_kps: Optional[int] = 8192, + overwrite: bool = False, +) -> Path: + for path in feature_paths_refs: + if not path.exists(): + raise FileNotFoundError(f"Reference feature file {path}.") + pairs = parse_retrieval(pairs_path) + pairs = [(q, r) for q, rs in pairs.items() for r in rs] + pairs = find_unique_new_pairs(pairs, None if overwrite else match_path) + required_queries = set(sum(pairs, ())) + + name2ref = { + n: i for i, p in enumerate(feature_paths_refs) for n in list_h5_names(p) + } + existing_refs = required_queries.intersection(set(name2ref.keys())) + + # images which require feature extraction + required_queries = required_queries - existing_refs + + if feature_path_q.exists(): + existing_queries = set(list_h5_names(feature_path_q)) + feature_paths_refs.append(feature_path_q) + existing_refs = set.union(existing_refs, existing_queries) + if not overwrite: + required_queries = required_queries - existing_queries + + if len(pairs) == 0 and len(required_queries) == 0: + logger.info("All pairs exist. Skipping dense matching.") + return + + # extract semi-dense matches + match_dense(conf, pairs, image_dir, match_path, existing_refs=existing_refs) + + logger.info("Assigning matches...") + + # Pre-load existing keypoints + cpdict, bindict = load_keypoints( + conf, feature_paths_refs, quantize=required_queries + ) + + # Reassign matches by aggregation + cpdict = aggregate_matches( + conf, + pairs, + match_path, + feature_path=feature_path_q, + required_queries=required_queries, + max_kps=max_kps, + cpdict=cpdict, + bindict=bindict, + ) + + # Invalidate matches that are far from selected bin by reassignment + if max_kps is not None: + logger.info(f'Reassign matches with max_error={conf["max_error"]}.') + assign_matches(pairs, match_path, cpdict, max_error=conf["max_error"]) + + def scale_lines(lines, scale): if np.any(scale != 1.0): lines *= lines.new_tensor(scale) @@ -483,3 +982,88 @@ def preprocess(image: np.ndarray): del pred torch.cuda.empty_cache() return ret + + +@torch.no_grad() +def main( + conf: Dict, + pairs: Path, + image_dir: Path, + export_dir: Optional[Path] = None, + matches: Optional[Path] = None, # out + features: Optional[Path] = None, # out + features_ref: Optional[Path] = None, + max_kps: Optional[int] = 8192, + overwrite: bool = False, +) -> Path: + logger.info( + "Extracting semi-dense features with configuration:" + f"\n{pprint.pformat(conf)}" + ) + + if features is None: + features = "feats_" + + if isinstance(features, Path): + features_q = features + if matches is None: + raise ValueError( + "Either provide both features and matches as Path" + " or both as names." + ) + else: + if export_dir is None: + raise ValueError( + "Provide an export_dir if features and matches" + f" are not file paths: {features}, {matches}." + ) + features_q = Path(export_dir, f'{features}{conf["output"]}.h5') + if matches is None: + matches = Path(export_dir, f'{conf["output"]}_{pairs.stem}.h5') + + if features_ref is None: + features_ref = [] + elif isinstance(features_ref, list): + features_ref = list(features_ref) + elif isinstance(features_ref, Path): + features_ref = [features_ref] + else: + raise TypeError(str(features_ref)) + + match_and_assign( + conf, + pairs, + image_dir, + matches, + features_q, + features_ref, + max_kps, + overwrite, + ) + + return features_q, matches + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--pairs", type=Path, required=True) + parser.add_argument("--image_dir", type=Path, required=True) + parser.add_argument("--export_dir", type=Path, required=True) + parser.add_argument( + "--matches", type=Path, default=confs["loftr"]["output"] + ) + parser.add_argument( + "--features", type=str, default="feats_" + confs["loftr"]["output"] + ) + parser.add_argument( + "--conf", type=str, default="loftr", choices=list(confs.keys()) + ) + args = parser.parse_args() + main( + confs[args.conf], + args.pairs, + args.image_dir, + args.export_dir, + args.matches, + args.features, + ) diff --git a/hloc/matchers/mast3r.py b/hloc/matchers/mast3r.py new file mode 100644 index 0000000..46489cc --- /dev/null +++ b/hloc/matchers/mast3r.py @@ -0,0 +1,111 @@ +import os +import sys +import urllib.request +from pathlib import Path + +import numpy as np +import torch +import torchvision.transforms as tfm + +from .. import logger + +mast3r_path = Path(__file__).parent / "../../third_party/mast3r" +sys.path.append(str(mast3r_path)) + +dust3r_path = Path(__file__).parent / "../../third_party/dust3r" +sys.path.append(str(dust3r_path)) + +from dust3r.image_pairs import make_pairs +from dust3r.inference import inference +from mast3r.fast_nn import fast_reciprocal_NNs +from mast3r.model import AsymmetricMASt3R + +from hloc.matchers.duster import Duster + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + +class Mast3r(Duster): + default_conf = { + "name": "Mast3r", + "model_path": mast3r_path + / "model_weights/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth", + "max_keypoints": 2000, + "vit_patch_size": 16, + } + + def _init(self, conf): + self.normalize = tfm.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + self.model_path = self.conf["model_path"] + self.download_weights() + self.net = AsymmetricMASt3R.from_pretrained(self.model_path).to(device) + logger.info("Loaded Mast3r model") + + def download_weights(self): + url = "https://download.europe.naverlabs.com/ComputerVision/MASt3R/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth" + + self.model_path.parent.mkdir(parents=True, exist_ok=True) + if not os.path.isfile(self.model_path): + logger.info("Downloading Mast3r(ViT large)... (takes a while)") + urllib.request.urlretrieve(url, self.model_path) + logger.info("Downloading Mast3r(ViT large)... done!") + + def _forward(self, data): + img0, img1 = data["image0"], data["image1"] + mean = torch.tensor([0.5, 0.5, 0.5]).to(device) + std = torch.tensor([0.5, 0.5, 0.5]).to(device) + + img0 = (img0 - mean.view(1, 3, 1, 1)) / std.view(1, 3, 1, 1) + img1 = (img1 - mean.view(1, 3, 1, 1)) / std.view(1, 3, 1, 1) + + images = [ + {"img": img0, "idx": 0, "instance": 0}, + {"img": img1, "idx": 1, "instance": 1}, + ] + pairs = make_pairs( + images, scene_graph="complete", prefilter=None, symmetrize=True + ) + output = inference(pairs, self.net, device, batch_size=1) + + # at this stage, you have the raw dust3r predictions + _, pred1 = output["view1"], output["pred1"] + _, pred2 = output["view2"], output["pred2"] + + desc1, desc2 = ( + pred1["desc"][1].squeeze(0).detach(), + pred2["desc"][1].squeeze(0).detach(), + ) + + # find 2D-2D matches between the two images + matches_im0, matches_im1 = fast_reciprocal_NNs( + desc1, + desc2, + subsample_or_initxy1=2, + device=device, + dist="dot", + block_size=2**13, + ) + + mkpts0 = matches_im0.copy() + mkpts1 = matches_im1.copy() + + if len(mkpts0) == 0: + pred = { + "keypoints0": torch.zeros([0, 2]), + "keypoints1": torch.zeros([0, 2]), + } + logger.warning(f"Matched {0} points") + else: + + top_k = self.conf["max_keypoints"] + if top_k is not None and len(mkpts0) > top_k: + keep = np.round(np.linspace(0, len(mkpts0) - 1, top_k)).astype( + int + ) + mkpts0 = mkpts0[keep] + mkpts1 = mkpts1[keep] + pred = { + "keypoints0": torch.from_numpy(mkpts0), + "keypoints1": torch.from_numpy(mkpts1), + } + return pred diff --git a/hloc/matchers/superglue.py b/hloc/matchers/superglue.py index 7e427f4..6fae344 100644 --- a/hloc/matchers/superglue.py +++ b/hloc/matchers/superglue.py @@ -4,7 +4,9 @@ from ..utils.base_model import BaseModel sys.path.append(str(Path(__file__).parent / "../../third_party")) -from SuperGluePretrainedNetwork.models.superglue import SuperGlue as SG +from SuperGluePretrainedNetwork.models.superglue import ( # noqa: E402 + SuperGlue as SG, +) class SuperGlue(BaseModel): diff --git a/hloc/pairs_from_covisibility.py b/hloc/pairs_from_covisibility.py new file mode 100644 index 0000000..49f3e57 --- /dev/null +++ b/hloc/pairs_from_covisibility.py @@ -0,0 +1,60 @@ +import argparse +from collections import defaultdict +from pathlib import Path + +import numpy as np +from tqdm import tqdm + +from . import logger +from .utils.read_write_model import read_model + + +def main(model, output, num_matched): + logger.info("Reading the COLMAP model...") + cameras, images, points3D = read_model(model) + + logger.info("Extracting image pairs from covisibility info...") + pairs = [] + for image_id, image in tqdm(images.items()): + matched = image.point3D_ids != -1 + points3D_covis = image.point3D_ids[matched] + + covis = defaultdict(int) + for point_id in points3D_covis: + for image_covis_id in points3D[point_id].image_ids: + if image_covis_id != image_id: + covis[image_covis_id] += 1 + + if len(covis) == 0: + logger.info(f"Image {image_id} does not have any covisibility.") + continue + + covis_ids = np.array(list(covis.keys())) + covis_num = np.array([covis[i] for i in covis_ids]) + + if len(covis_ids) <= num_matched: + top_covis_ids = covis_ids[np.argsort(-covis_num)] + else: + # get covisible image ids with top k number of common matches + ind_top = np.argpartition(covis_num, -num_matched) + ind_top = ind_top[-num_matched:] # unsorted top k + ind_top = ind_top[np.argsort(-covis_num[ind_top])] + top_covis_ids = [covis_ids[i] for i in ind_top] + assert covis_num[ind_top[0]] == np.max(covis_num) + + for i in top_covis_ids: + pair = (image.name, images[i].name) + pairs.append(pair) + + logger.info(f"Found {len(pairs)} pairs.") + with open(output, "w") as f: + f.write("\n".join(" ".join([i, j]) for i, j in pairs)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model", required=True, type=Path) + parser.add_argument("--output", required=True, type=Path) + parser.add_argument("--num_matched", required=True, type=int) + args = parser.parse_args() + main(**args.__dict__) diff --git a/hloc/pairs_from_exhaustive.py b/hloc/pairs_from_exhaustive.py new file mode 100644 index 0000000..438b814 --- /dev/null +++ b/hloc/pairs_from_exhaustive.py @@ -0,0 +1,66 @@ +import argparse +import collections.abc as collections +from pathlib import Path +from typing import List, Optional, Union + +from . import logger +from .utils.io import list_h5_names +from .utils.parsers import parse_image_lists + + +def main( + output: Path, + image_list: Optional[Union[Path, List[str]]] = None, + features: Optional[Path] = None, + ref_list: Optional[Union[Path, List[str]]] = None, + ref_features: Optional[Path] = None, +): + if image_list is not None: + if isinstance(image_list, (str, Path)): + names_q = parse_image_lists(image_list) + elif isinstance(image_list, collections.Iterable): + names_q = list(image_list) + else: + raise ValueError(f"Unknown type for image list: {image_list}") + elif features is not None: + names_q = list_h5_names(features) + else: + raise ValueError("Provide either a list of images or a feature file.") + + self_matching = False + if ref_list is not None: + if isinstance(ref_list, (str, Path)): + names_ref = parse_image_lists(ref_list) + elif isinstance(image_list, collections.Iterable): + names_ref = list(ref_list) + else: + raise ValueError( + f"Unknown type for reference image list: {ref_list}" + ) + elif ref_features is not None: + names_ref = list_h5_names(ref_features) + else: + self_matching = True + names_ref = names_q + + pairs = [] + for i, n1 in enumerate(names_q): + for j, n2 in enumerate(names_ref): + if self_matching and j <= i: + continue + pairs.append((n1, n2)) + + logger.info(f"Found {len(pairs)} pairs.") + with open(output, "w") as f: + f.write("\n".join(" ".join([i, j]) for i, j in pairs)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--output", required=True, type=Path) + parser.add_argument("--image_list", type=Path) + parser.add_argument("--features", type=Path) + parser.add_argument("--ref_list", type=Path) + parser.add_argument("--ref_features", type=Path) + args = parser.parse_args() + main(**args.__dict__) diff --git a/hloc/pairs_from_poses.py b/hloc/pairs_from_poses.py new file mode 100644 index 0000000..b6b4f88 --- /dev/null +++ b/hloc/pairs_from_poses.py @@ -0,0 +1,70 @@ +import argparse +from pathlib import Path + +import numpy as np +import scipy.spatial + +from . import logger +from .pairs_from_retrieval import pairs_from_score_matrix +from .utils.read_write_model import read_images_binary + +DEFAULT_ROT_THRESH = 30 # in degrees + + +def get_pairwise_distances(images): + ids = np.array(list(images.keys())) + Rs = [] + ts = [] + for id_ in ids: + image = images[id_] + R = image.qvec2rotmat() + t = image.tvec + Rs.append(R) + ts.append(t) + Rs = np.stack(Rs, 0) + ts = np.stack(ts, 0) + + # Invert the poses from world-to-camera to camera-to-world. + Rs = Rs.transpose(0, 2, 1) + ts = -(Rs @ ts[:, :, None])[:, :, 0] + + dist = scipy.spatial.distance.squareform(scipy.spatial.distance.pdist(ts)) + + # Instead of computing the angle between two camera orientations, + # we compute the angle between the principal axes, as two images rotated + # around their principal axis still observe the same scene. + axes = Rs[:, :, -1] + dots = np.einsum("mi,ni->mn", axes, axes, optimize=True) + dR = np.rad2deg(np.arccos(np.clip(dots, -1.0, 1.0))) + + return ids, dist, dR + + +def main(model, output, num_matched, rotation_threshold=DEFAULT_ROT_THRESH): + logger.info("Reading the COLMAP model...") + images = read_images_binary(model / "images.bin") + + logger.info(f"Obtaining pairwise distances between {len(images)} images...") + ids, dist, dR = get_pairwise_distances(images) + scores = -dist + + invalid = dR >= rotation_threshold + np.fill_diagonal(invalid, True) + pairs = pairs_from_score_matrix(scores, invalid, num_matched) + pairs = [(images[ids[i]].name, images[ids[j]].name) for i, j in pairs] + + logger.info(f"Found {len(pairs)} pairs.") + with open(output, "w") as f: + f.write("\n".join(" ".join(p) for p in pairs)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--model", required=True, type=Path) + parser.add_argument("--output", required=True, type=Path) + parser.add_argument("--num_matched", required=True, type=int) + parser.add_argument( + "--rotation_threshold", default=DEFAULT_ROT_THRESH, type=float + ) + args = parser.parse_args() + main(**args.__dict__) diff --git a/hloc/pairs_from_retrieval.py b/hloc/pairs_from_retrieval.py new file mode 100644 index 0000000..6948fe6 --- /dev/null +++ b/hloc/pairs_from_retrieval.py @@ -0,0 +1,137 @@ +import argparse +import collections.abc as collections +from pathlib import Path +from typing import Optional + +import h5py +import numpy as np +import torch + +from . import logger +from .utils.io import list_h5_names +from .utils.parsers import parse_image_lists +from .utils.read_write_model import read_images_binary + + +def parse_names(prefix, names, names_all): + if prefix is not None: + if not isinstance(prefix, str): + prefix = tuple(prefix) + names = [n for n in names_all if n.startswith(prefix)] + if len(names) == 0: + raise ValueError( + f"Could not find any image with the prefix `{prefix}`." + ) + elif names is not None: + if isinstance(names, (str, Path)): + names = parse_image_lists(names) + elif isinstance(names, collections.Iterable): + names = list(names) + else: + raise ValueError( + f"Unknown type of image list: {names}." + "Provide either a list or a path to a list file." + ) + else: + names = names_all + return names + + +def get_descriptors(names, path, name2idx=None, key="global_descriptor"): + if name2idx is None: + with h5py.File(str(path), "r", libver="latest") as fd: + desc = [fd[n][key].__array__() for n in names] + else: + desc = [] + for n in names: + with h5py.File(str(path[name2idx[n]]), "r", libver="latest") as fd: + desc.append(fd[n][key].__array__()) + return torch.from_numpy(np.stack(desc, 0)).float() + + +def pairs_from_score_matrix( + scores: torch.Tensor, + invalid: np.array, + num_select: int, + min_score: Optional[float] = None, +): + assert scores.shape == invalid.shape + if isinstance(scores, np.ndarray): + scores = torch.from_numpy(scores) + invalid = torch.from_numpy(invalid).to(scores.device) + if min_score is not None: + invalid |= scores < min_score + scores.masked_fill_(invalid, float("-inf")) + + topk = torch.topk(scores, num_select, dim=1) + indices = topk.indices.cpu().numpy() + valid = topk.values.isfinite().cpu().numpy() + + pairs = [] + for i, j in zip(*np.where(valid)): + pairs.append((i, indices[i, j])) + return pairs + + +def main( + descriptors, + output, + num_matched, + query_prefix=None, + query_list=None, + db_prefix=None, + db_list=None, + db_model=None, + db_descriptors=None, +): + logger.info("Extracting image pairs from a retrieval database.") + + # We handle multiple reference feature files. + # We only assume that names are unique among them and map names to files. + if db_descriptors is None: + db_descriptors = descriptors + if isinstance(db_descriptors, (Path, str)): + db_descriptors = [db_descriptors] + name2db = { + n: i for i, p in enumerate(db_descriptors) for n in list_h5_names(p) + } + db_names_h5 = list(name2db.keys()) + query_names_h5 = list_h5_names(descriptors) + + if db_model: + images = read_images_binary(db_model / "images.bin") + db_names = [i.name for i in images.values()] + else: + db_names = parse_names(db_prefix, db_list, db_names_h5) + if len(db_names) == 0: + raise ValueError("Could not find any database image.") + query_names = parse_names(query_prefix, query_list, query_names_h5) + + device = "cuda" if torch.cuda.is_available() else "cpu" + db_desc = get_descriptors(db_names, db_descriptors, name2db) + query_desc = get_descriptors(query_names, descriptors) + sim = torch.einsum("id,jd->ij", query_desc.to(device), db_desc.to(device)) + + # Avoid self-matching + self = np.array(query_names)[:, None] == np.array(db_names)[None] + pairs = pairs_from_score_matrix(sim, self, num_matched, min_score=0) + pairs = [(query_names[i], db_names[j]) for i, j in pairs] + + logger.info(f"Found {len(pairs)} pairs.") + with open(output, "w") as f: + f.write("\n".join(" ".join([i, j]) for i, j in pairs)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--descriptors", type=Path, required=True) + parser.add_argument("--output", type=Path, required=True) + parser.add_argument("--num_matched", type=int, required=True) + parser.add_argument("--query_prefix", type=str, nargs="+") + parser.add_argument("--query_list", type=Path) + parser.add_argument("--db_prefix", type=str, nargs="+") + parser.add_argument("--db_list", type=Path) + parser.add_argument("--db_model", type=Path) + parser.add_argument("--db_descriptors", type=Path) + args = parser.parse_args() + main(**args.__dict__) diff --git a/hloc/pipelines/4Seasons/localize.py b/hloc/pipelines/4Seasons/localize.py index 0451130..50ed957 100644 --- a/hloc/pipelines/4Seasons/localize.py +++ b/hloc/pipelines/4Seasons/localize.py @@ -1,16 +1,21 @@ -from pathlib import Path import argparse +from pathlib import Path -from ... import extract_features, match_features, localize_sfm, logger -from .utils import get_timestamps, delete_unused_images -from .utils import generate_query_lists, generate_localization_pairs -from .utils import prepare_submission, evaluate_submission +from ... import extract_features, localize_sfm, logger, match_features +from .utils import ( + delete_unused_images, + evaluate_submission, + generate_localization_pairs, + generate_query_lists, + get_timestamps, + prepare_submission, +) relocalization_files = { - "training": "RelocalizationFilesTrain//relocalizationFile_recording_2020-03-24_17-36-22.txt", - "validation": "RelocalizationFilesVal/relocalizationFile_recording_2020-03-03_12-03-23.txt", - "test0": "RelocalizationFilesTest/relocalizationFile_recording_2020-03-24_17-45-31_*.txt", - "test1": "RelocalizationFilesTest/relocalizationFile_recording_2020-04-23_19-37-00_*.txt", + "training": "RelocalizationFilesTrain//relocalizationFile_recording_2020-03-24_17-36-22.txt", # noqa: E501 + "validation": "RelocalizationFilesVal/relocalizationFile_recording_2020-03-03_12-03-23.txt", # noqa: E501 + "test0": "RelocalizationFilesTest/relocalizationFile_recording_2020-03-24_17-45-31_*.txt", # noqa: E501 + "test1": "RelocalizationFilesTest/relocalizationFile_recording_2020-04-23_19-37-00_*.txt", # noqa: E501 } parser = argparse.ArgumentParser() @@ -67,9 +72,7 @@ generate_query_lists(timestamps, seq_dir, query_list) # Generate the localization pairs from the given reference frames. -generate_localization_pairs( - sequence, reloc, num_loc_pairs, ref_pairs, loc_pairs -) +generate_localization_pairs(sequence, reloc, num_loc_pairs, ref_pairs, loc_pairs) # Extract, match, amd localize. ffile = extract_features.main(fconf, seq_images, output_dir) diff --git a/hloc/pipelines/4Seasons/prepare_reference.py b/hloc/pipelines/4Seasons/prepare_reference.py index 9074df8..f47aee7 100644 --- a/hloc/pipelines/4Seasons/prepare_reference.py +++ b/hloc/pipelines/4Seasons/prepare_reference.py @@ -1,10 +1,8 @@ -from pathlib import Path import argparse +from pathlib import Path -from ... import extract_features, match_features -from ... import pairs_from_poses, triangulation -from .utils import get_timestamps, delete_unused_images -from .utils import build_empty_colmap_model +from ... import extract_features, match_features, pairs_from_poses, triangulation +from .utils import build_empty_colmap_model, delete_unused_images, get_timestamps parser = argparse.ArgumentParser() parser.add_argument( diff --git a/hloc/pipelines/4Seasons/utils.py b/hloc/pipelines/4Seasons/utils.py index 8def70f..e5aace9 100644 --- a/hloc/pipelines/4Seasons/utils.py +++ b/hloc/pipelines/4Seasons/utils.py @@ -1,11 +1,18 @@ -import os -import numpy as np +import glob import logging +import os from pathlib import Path -from ...utils.read_write_model import qvec2rotmat, rotmat2qvec -from ...utils.read_write_model import Image, write_model, Camera +import numpy as np + from ...utils.parsers import parse_retrieval +from ...utils.read_write_model import ( + Camera, + Image, + qvec2rotmat, + rotmat2qvec, + write_model, +) logger = logging.getLogger(__name__) @@ -28,10 +35,10 @@ def get_timestamps(files, idx): def delete_unused_images(root, timestamps): """Delete all images in root if they are not contained in timestamps.""" - images = list(root.glob("**/*.png")) + images = glob.glob((root / "**/*.png").as_posix(), recursive=True) deleted = 0 for image in images: - ts = image.stem + ts = Path(image).stem if ts not in timestamps: os.remove(image) deleted += 1 @@ -48,11 +55,7 @@ def camera_from_calibration_file(id_, path): model_name = "PINHOLE" params = [float(i) for i in [fx, fy, cx, cy]] camera = Camera( - id=id_, - model=model_name, - width=int(width), - height=int(height), - params=params, + id=id_, model=model_name, width=int(width), height=int(height), params=params ) return camera @@ -153,9 +156,7 @@ def generate_localization_pairs(sequence, reloc, num, ref_pairs, out_path): """ if "test" in sequence: # hard pairs will be overwritten by easy ones if available - relocs = [ - str(reloc).replace("*", d) for d in ["hard", "moderate", "easy"] - ] + relocs = [str(reloc).replace("*", d) for d in ["hard", "moderate", "easy"]] else: relocs = [reloc] query_to_ref_ts = {} @@ -213,12 +214,8 @@ def evaluate_submission(submission_dir, relocs, ths=[0.1, 0.2, 0.5]): """Compute the relocalization recall from predicted and ground truth poses.""" for reloc in relocs.parent.glob(relocs.name): poses_gt = parse_relocalization(reloc, has_poses=True) - poses_pred = parse_relocalization( - submission_dir / reloc.name, has_poses=True - ) - poses_pred = { - (ref_ts, q_ts): (R, t) for ref_ts, q_ts, R, t in poses_pred - } + poses_pred = parse_relocalization(submission_dir / reloc.name, has_poses=True) + poses_pred = {(ref_ts, q_ts): (R, t) for ref_ts, q_ts, R, t in poses_pred} error = [] for ref_ts, q_ts, R_gt, t_gt in poses_gt: diff --git a/hloc/pipelines/7Scenes/create_gt_sfm.py b/hloc/pipelines/7Scenes/create_gt_sfm.py index 2aca434..95dfa46 100644 --- a/hloc/pipelines/7Scenes/create_gt_sfm.py +++ b/hloc/pipelines/7Scenes/create_gt_sfm.py @@ -1,17 +1,17 @@ from pathlib import Path + import numpy as np -import torch import PIL.Image -from tqdm import tqdm import pycolmap +import torch +from tqdm import tqdm -from ...utils.read_write_model import write_model, read_model +from ...utils.read_write_model import read_model, write_model def scene_coordinates(p2D, R_w2c, t_w2c, depth, camera): assert len(depth) == len(p2D) - ret = pycolmap.image_to_world(p2D, camera._asdict()) - p2D_norm = np.asarray(ret["world_points"]) + p2D_norm = np.stack(pycolmap.Camera(camera._asdict()).image_to_world(p2D)) p2D_h = np.concatenate([p2D_norm, np.ones_like(p2D_norm[:, :1])], 1) p3D_c = p2D_h * depth[:, None] p3D_w = (p3D_c - t_w2c) @ R_w2c @@ -28,9 +28,7 @@ def interpolate_depth(depth, kp): # To maximize the number of points that have depth: # do bilinear interpolation first and then nearest for the remaining points - interp_lin = grid_sample(depth, kp, align_corners=True, mode="bilinear")[ - 0, :, 0 - ] + interp_lin = grid_sample(depth, kp, align_corners=True, mode="bilinear")[0, :, 0] interp_nn = torch.nn.functional.grid_sample( depth, kp, align_corners=True, mode="nearest" )[0, :, 0] @@ -54,8 +52,7 @@ def project_to_image(p3D, R, t, camera, eps: float = 1e-4, pad: int = 1): p3D = (p3D @ R.T) + t visible = p3D[:, -1] >= eps # keep points in front of the camera p2D_norm = p3D[:, :-1] / p3D[:, -1:].clip(min=eps) - ret = pycolmap.world_to_image(p2D_norm, camera._asdict()) - p2D = np.asarray(ret["image_points"]) + p2D = np.stack(pycolmap.Camera(camera._asdict()).world_to_image(p2D_norm)) size = np.array([camera.width - pad - 1, camera.height - pad - 1]) valid = np.all((p2D >= pad) & (p2D <= size), -1) valid &= visible @@ -129,15 +126,7 @@ def correct_sfm_with_gt_depth(sfm_path, depth_folder_path, output_path): dataset = Path("datasets/7scenes") outputs = Path("outputs/7Scenes") - SCENES = [ - "chess", - "fire", - "heads", - "office", - "pumpkin", - "redkitchen", - "stairs", - ] + SCENES = ["chess", "fire", "heads", "office", "pumpkin", "redkitchen", "stairs"] for scene in SCENES: sfm_path = outputs / scene / "sfm_superpoint+superglue" depth_path = dataset / f"depth/7scenes_{scene}/train/depth" diff --git a/hloc/pipelines/7Scenes/pipeline.py b/hloc/pipelines/7Scenes/pipeline.py index 54d0e81..6fc28c6 100644 --- a/hloc/pipelines/7Scenes/pipeline.py +++ b/hloc/pipelines/7Scenes/pipeline.py @@ -1,11 +1,17 @@ -from pathlib import Path import argparse +from pathlib import Path -from .utils import create_reference_sfm -from .create_gt_sfm import correct_sfm_with_gt_depth +from ... import ( + extract_features, + localize_sfm, + logger, + match_features, + pairs_from_covisibility, + triangulation, +) from ..Cambridge.utils import create_query_list_with_intrinsics, evaluate -from ... import extract_features, match_features, pairs_from_covisibility -from ... import triangulation, localize_sfm, logger +from .create_gt_sfm import correct_sfm_with_gt_depth +from .utils import create_reference_sfm SCENES = ["chess", "fire", "heads", "office", "pumpkin", "redkitchen", "stairs"] @@ -45,9 +51,7 @@ def run_scene( create_reference_sfm(gt_dir, ref_sfm_sift, test_list) create_query_list_with_intrinsics(gt_dir, query_list, test_list) - features = extract_features.main( - feature_conf, images, outputs, as_half=True - ) + features = extract_features.main(feature_conf, images, outputs, as_half=True) sfm_pairs = outputs / f"pairs-db-covis{num_covis}.txt" pairs_from_covisibility.main(ref_sfm_sift, sfm_pairs, num_matched=num_covis) @@ -114,9 +118,7 @@ def run_scene( results = ( args.outputs / scene - / "results_{}.txt".format( - "dense" if args.use_dense_depth else "sparse" - ) + / "results_{}.txt".format("dense" if args.use_dense_depth else "sparse") ) if args.overwrite or not results.exists(): run_scene( diff --git a/hloc/pipelines/7Scenes/utils.py b/hloc/pipelines/7Scenes/utils.py index 51070a0..1cb0212 100644 --- a/hloc/pipelines/7Scenes/utils.py +++ b/hloc/pipelines/7Scenes/utils.py @@ -1,4 +1,5 @@ import logging + import numpy as np from hloc.utils.read_write_model import read_model, write_model diff --git a/hloc/pipelines/Aachen/README.md b/hloc/pipelines/Aachen/README.md index 1aefdb7..57b66d6 100644 --- a/hloc/pipelines/Aachen/README.md +++ b/hloc/pipelines/Aachen/README.md @@ -6,7 +6,7 @@ Download the dataset from [visuallocalization.net](https://www.visuallocalizatio ```bash export dataset=datasets/aachen wget -r -np -nH -R "index.html*,aachen_v1_1.zip" --cut-dirs=4 https://data.ciirc.cvut.cz/public/projects/2020VisualLocalization/Aachen-Day-Night/ -P $dataset -unzip $dataset/images/database_and_query_images.zip -d $dataset/images +unzip $dataset/images/database_and_query_images.zip -d $dataset ``` ## Pipeline diff --git a/hloc/pipelines/Aachen/pipeline.py b/hloc/pipelines/Aachen/pipeline.py index 1dbf8ab..e31ce72 100644 --- a/hloc/pipelines/Aachen/pipeline.py +++ b/hloc/pipelines/Aachen/pipeline.py @@ -1,102 +1,109 @@ +import argparse from pathlib import Path from pprint import pformat -import argparse -from ... import extract_features, match_features -from ... import pairs_from_covisibility, pairs_from_retrieval -from ... import colmap_from_nvm, triangulation, localize_sfm +from ... import ( + colmap_from_nvm, + extract_features, + localize_sfm, + logger, + match_features, + pairs_from_covisibility, + pairs_from_retrieval, + triangulation, +) -parser = argparse.ArgumentParser() -parser.add_argument( - "--dataset", - type=Path, - default="datasets/aachen", - help="Path to the dataset, default: %(default)s", -) -parser.add_argument( - "--outputs", - type=Path, - default="outputs/aachen", - help="Path to the output directory, default: %(default)s", -) -parser.add_argument( - "--num_covis", - type=int, - default=20, - help="Number of image pairs for SfM, default: %(default)s", -) -parser.add_argument( - "--num_loc", - type=int, - default=50, - help="Number of image pairs for loc, default: %(default)s", -) -args = parser.parse_args() +def run(args): + # Setup the paths + dataset = args.dataset + images = dataset / "images_upright/" -# Setup the paths -dataset = args.dataset -images = dataset / "images/images_upright/" + outputs = args.outputs # where everything will be saved + sift_sfm = outputs / "sfm_sift" # from which we extract the reference poses + reference_sfm = outputs / "sfm_superpoint+superglue" # the SfM model we will build + sfm_pairs = ( + outputs / f"pairs-db-covis{args.num_covis}.txt" + ) # top-k most covisible in SIFT model + loc_pairs = ( + outputs / f"pairs-query-netvlad{args.num_loc}.txt" + ) # top-k retrieved by NetVLAD + results = outputs / f"Aachen_hloc_superpoint+superglue_netvlad{args.num_loc}.txt" -outputs = args.outputs # where everything will be saved -sift_sfm = outputs / "sfm_sift" # from which we extract the reference poses -reference_sfm = ( - outputs / "sfm_superpoint+superglue" -) # the SfM model we will build -sfm_pairs = ( - outputs / f"pairs-db-covis{args.num_covis}.txt" -) # top-k most covisible in SIFT model -loc_pairs = ( - outputs / f"pairs-query-netvlad{args.num_loc}.txt" -) # top-k retrieved by NetVLAD -results = ( - outputs / f"Aachen_hloc_superpoint+superglue_netvlad{args.num_loc}.txt" -) + # list the standard configurations available + logger.info("Configs for feature extractors:\n%s", pformat(extract_features.confs)) + logger.info("Configs for feature matchers:\n%s", pformat(match_features.confs)) -# list the standard configurations available -print(f"Configs for feature extractors:\n{pformat(extract_features.confs)}") -print(f"Configs for feature matchers:\n{pformat(match_features.confs)}") + # pick one of the configurations for extraction and matching + retrieval_conf = extract_features.confs["netvlad"] + feature_conf = extract_features.confs["superpoint_aachen"] + matcher_conf = match_features.confs["superglue"] -# pick one of the configurations for extraction and matching -retrieval_conf = extract_features.confs["netvlad"] -feature_conf = extract_features.confs["superpoint_aachen"] -matcher_conf = match_features.confs["superglue"] + features = extract_features.main(feature_conf, images, outputs) -features = extract_features.main(feature_conf, images, outputs) + colmap_from_nvm.main( + dataset / "3D-models/aachen_cvpr2018_db.nvm", + dataset / "3D-models/database_intrinsics.txt", + dataset / "aachen.db", + sift_sfm, + ) + pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis) + sfm_matches = match_features.main( + matcher_conf, sfm_pairs, feature_conf["output"], outputs + ) -colmap_from_nvm.main( - dataset / "3D-models/aachen_cvpr2018_db.nvm", - dataset / "3D-models/database_intrinsics.txt", - dataset / "aachen.db", - sift_sfm, -) -pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis) -sfm_matches = match_features.main( - matcher_conf, sfm_pairs, feature_conf["output"], outputs -) + triangulation.main( + reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches + ) -triangulation.main( - reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches -) + global_descriptors = extract_features.main(retrieval_conf, images, outputs) + pairs_from_retrieval.main( + global_descriptors, + loc_pairs, + args.num_loc, + query_prefix="query", + db_model=reference_sfm, + ) + loc_matches = match_features.main( + matcher_conf, loc_pairs, feature_conf["output"], outputs + ) + + localize_sfm.main( + reference_sfm, + dataset / "queries/*_time_queries_with_intrinsics.txt", + loc_pairs, + features, + loc_matches, + results, + covisibility_clustering=False, + ) # not required with SuperPoint+SuperGlue -global_descriptors = extract_features.main(retrieval_conf, images, outputs) -pairs_from_retrieval.main( - global_descriptors, - loc_pairs, - args.num_loc, - query_prefix="query", - db_model=reference_sfm, -) -loc_matches = match_features.main( - matcher_conf, loc_pairs, feature_conf["output"], outputs -) -localize_sfm.main( - reference_sfm, - dataset / "queries/*_time_queries_with_intrinsics.txt", - loc_pairs, - features, - loc_matches, - results, - covisibility_clustering=False, -) # not required with SuperPoint+SuperGlue +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset", + type=Path, + default="datasets/aachen", + help="Path to the dataset, default: %(default)s", + ) + parser.add_argument( + "--outputs", + type=Path, + default="outputs/aachen", + help="Path to the output directory, default: %(default)s", + ) + parser.add_argument( + "--num_covis", + type=int, + default=20, + help="Number of image pairs for SfM, default: %(default)s", + ) + parser.add_argument( + "--num_loc", + type=int, + default=50, + help="Number of image pairs for loc, default: %(default)s", + ) + args = parser.parse_args() + run(args) diff --git a/hloc/pipelines/Aachen_v1_1/README.md b/hloc/pipelines/Aachen_v1_1/README.md index 33a310b..c17e751 100644 --- a/hloc/pipelines/Aachen_v1_1/README.md +++ b/hloc/pipelines/Aachen_v1_1/README.md @@ -6,9 +6,8 @@ Download the dataset from [visuallocalization.net](https://www.visuallocalizatio ```bash export dataset=datasets/aachen_v1.1 wget -r -np -nH -R "index.html*" --cut-dirs=4 https://data.ciirc.cvut.cz/public/projects/2020VisualLocalization/Aachen-Day-Night/ -P $dataset -unzip $dataset/images/database_and_query_images.zip -d $dataset/images +unzip $dataset/images/database_and_query_images.zip -d $dataset unzip $dataset/aachen_v1_1.zip -d $dataset -rsync -a $dataset/images_upright/ $dataset/images/images_upright/ ``` ## Pipeline diff --git a/hloc/pipelines/Aachen_v1_1/pipeline.py b/hloc/pipelines/Aachen_v1_1/pipeline.py index fb8cc3f..0753604 100644 --- a/hloc/pipelines/Aachen_v1_1/pipeline.py +++ b/hloc/pipelines/Aachen_v1_1/pipeline.py @@ -1,95 +1,104 @@ +import argparse from pathlib import Path from pprint import pformat -import argparse -from ... import extract_features, match_features, triangulation -from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm +from ... import ( + extract_features, + localize_sfm, + logger, + match_features, + pairs_from_covisibility, + pairs_from_retrieval, + triangulation, +) -parser = argparse.ArgumentParser() -parser.add_argument( - "--dataset", - type=Path, - default="datasets/aachen_v1.1", - help="Path to the dataset, default: %(default)s", -) -parser.add_argument( - "--outputs", - type=Path, - default="outputs/aachen_v1.1", - help="Path to the output directory, default: %(default)s", -) -parser.add_argument( - "--num_covis", - type=int, - default=20, - help="Number of image pairs for SfM, default: %(default)s", -) -parser.add_argument( - "--num_loc", - type=int, - default=50, - help="Number of image pairs for loc, default: %(default)s", -) -args = parser.parse_args() +def run(args): + # Setup the paths + dataset = args.dataset + images = dataset / "images_upright/" + sift_sfm = dataset / "3D-models/aachen_v_1_1" -# Setup the paths -dataset = args.dataset -images = dataset / "images/images_upright/" -sift_sfm = dataset / "3D-models/aachen_v_1_1" + outputs = args.outputs # where everything will be saved + reference_sfm = outputs / "sfm_superpoint+superglue" # the SfM model we will build + sfm_pairs = ( + outputs / f"pairs-db-covis{args.num_covis}.txt" + ) # top-k most covisible in SIFT model + loc_pairs = ( + outputs / f"pairs-query-netvlad{args.num_loc}.txt" + ) # top-k retrieved by NetVLAD + results = ( + outputs / f"Aachen-v1.1_hloc_superpoint+superglue_netvlad{args.num_loc}.txt" + ) -outputs = args.outputs # where everything will be saved -reference_sfm = ( - outputs / "sfm_superpoint+superglue" -) # the SfM model we will build -sfm_pairs = ( - outputs / f"pairs-db-covis{args.num_covis}.txt" -) # top-k most covisible in SIFT model -loc_pairs = ( - outputs / f"pairs-query-netvlad{args.num_loc}.txt" -) # top-k retrieved by NetVLAD -results = ( - outputs / f"Aachen-v1.1_hloc_superpoint+superglue_netvlad{args.num_loc}.txt" -) + # list the standard configurations available + logger.info("Configs for feature extractors:\n%s", pformat(extract_features.confs)) + logger.info("Configs for feature matchers:\n%s", pformat(match_features.confs)) -# list the standard configurations available -print(f"Configs for feature extractors:\n{pformat(extract_features.confs)}") -print(f"Configs for feature matchers:\n{pformat(match_features.confs)}") + # pick one of the configurations for extraction and matching + retrieval_conf = extract_features.confs["netvlad"] + feature_conf = extract_features.confs["superpoint_max"] + matcher_conf = match_features.confs["superglue"] -# pick one of the configurations for extraction and matching -retrieval_conf = extract_features.confs["netvlad"] -feature_conf = extract_features.confs["superpoint_max"] -matcher_conf = match_features.confs["superglue"] + features = extract_features.main(feature_conf, images, outputs) -features = extract_features.main(feature_conf, images, outputs) + pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis) + sfm_matches = match_features.main( + matcher_conf, sfm_pairs, feature_conf["output"], outputs + ) -pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis) -sfm_matches = match_features.main( - matcher_conf, sfm_pairs, feature_conf["output"], outputs -) + triangulation.main( + reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches + ) -triangulation.main( - reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches -) + global_descriptors = extract_features.main(retrieval_conf, images, outputs) + pairs_from_retrieval.main( + global_descriptors, + loc_pairs, + args.num_loc, + query_prefix="query", + db_model=reference_sfm, + ) + loc_matches = match_features.main( + matcher_conf, loc_pairs, feature_conf["output"], outputs + ) + + localize_sfm.main( + reference_sfm, + dataset / "queries/*_time_queries_with_intrinsics.txt", + loc_pairs, + features, + loc_matches, + results, + covisibility_clustering=False, + ) # not required with SuperPoint+SuperGlue -global_descriptors = extract_features.main(retrieval_conf, images, outputs) -pairs_from_retrieval.main( - global_descriptors, - loc_pairs, - args.num_loc, - query_prefix="query", - db_model=reference_sfm, -) -loc_matches = match_features.main( - matcher_conf, loc_pairs, feature_conf["output"], outputs -) -localize_sfm.main( - reference_sfm, - dataset / "queries/*_time_queries_with_intrinsics.txt", - loc_pairs, - features, - loc_matches, - results, - covisibility_clustering=False, -) # not required with SuperPoint+SuperGlue +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset", + type=Path, + default="datasets/aachen_v1.1", + help="Path to the dataset, default: %(default)s", + ) + parser.add_argument( + "--outputs", + type=Path, + default="outputs/aachen_v1.1", + help="Path to the output directory, default: %(default)s", + ) + parser.add_argument( + "--num_covis", + type=int, + default=20, + help="Number of image pairs for SfM, default: %(default)s", + ) + parser.add_argument( + "--num_loc", + type=int, + default=50, + help="Number of image pairs for loc, default: %(default)s", + ) + args = parser.parse_args() + run(args) diff --git a/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py b/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py index ccfd62d..9c0a769 100644 --- a/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py +++ b/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py @@ -1,94 +1,104 @@ +import argparse from pathlib import Path from pprint import pformat -import argparse -from ... import extract_features, match_dense, triangulation -from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm +from ... import ( + extract_features, + localize_sfm, + logger, + match_dense, + pairs_from_covisibility, + pairs_from_retrieval, + triangulation, +) -parser = argparse.ArgumentParser() -parser.add_argument( - "--dataset", - type=Path, - default="datasets/aachen_v1.1", - help="Path to the dataset, default: %(default)s", -) -parser.add_argument( - "--outputs", - type=Path, - default="outputs/aachen_v1.1", - help="Path to the output directory, default: %(default)s", -) -parser.add_argument( - "--num_covis", - type=int, - default=20, - help="Number of image pairs for SfM, default: %(default)s", -) -parser.add_argument( - "--num_loc", - type=int, - default=50, - help="Number of image pairs for loc, default: %(default)s", -) -args = parser.parse_args() +def run(args): + # Setup the paths + dataset = args.dataset + images = dataset / "images_upright/" + sift_sfm = dataset / "3D-models/aachen_v_1_1" -# Setup the paths -dataset = args.dataset -images = dataset / "images/images_upright/" -sift_sfm = dataset / "3D-models/aachen_v_1_1" + outputs = args.outputs # where everything will be saved + outputs.mkdir() + reference_sfm = outputs / "sfm_loftr" # the SfM model we will build + sfm_pairs = ( + outputs / f"pairs-db-covis{args.num_covis}.txt" + ) # top-k most covisible in SIFT model + loc_pairs = ( + outputs / f"pairs-query-netvlad{args.num_loc}.txt" + ) # top-k retrieved by NetVLAD + results = outputs / f"Aachen-v1.1_hloc_loftr_netvlad{args.num_loc}.txt" -outputs = args.outputs # where everything will be saved -outputs.mkdir() -reference_sfm = outputs / "sfm_loftr" # the SfM model we will build -sfm_pairs = ( - outputs / f"pairs-db-covis{args.num_covis}.txt" -) # top-k most covisible in SIFT model -loc_pairs = ( - outputs / f"pairs-query-netvlad{args.num_loc}.txt" -) # top-k retrieved by NetVLAD -results = outputs / f"Aachen-v1.1_hloc_loftr_netvlad{args.num_loc}.txt" + # list the standard configurations available + logger.info("Configs for dense feature matchers:\n%s", pformat(match_dense.confs)) -# list the standard configurations available -print(f"Configs for dense feature matchers:\n{pformat(match_dense.confs)}") + # pick one of the configurations for extraction and matching + retrieval_conf = extract_features.confs["netvlad"] + matcher_conf = match_dense.confs["loftr_aachen"] -# pick one of the configurations for extraction and matching -retrieval_conf = extract_features.confs["netvlad"] -matcher_conf = match_dense.confs["loftr_aachen"] + pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis) + features, sfm_matches = match_dense.main( + matcher_conf, sfm_pairs, images, outputs, max_kps=8192, overwrite=False + ) -pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis) -features, sfm_matches = match_dense.main( - matcher_conf, sfm_pairs, images, outputs, max_kps=8192, overwrite=False -) + triangulation.main( + reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches + ) -triangulation.main( - reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches -) + global_descriptors = extract_features.main(retrieval_conf, images, outputs) + pairs_from_retrieval.main( + global_descriptors, + loc_pairs, + args.num_loc, + query_prefix="query", + db_model=reference_sfm, + ) + features, loc_matches = match_dense.main( + matcher_conf, + loc_pairs, + images, + outputs, + features=features, + max_kps=None, + matches=sfm_matches, + ) + + localize_sfm.main( + reference_sfm, + dataset / "queries/*_time_queries_with_intrinsics.txt", + loc_pairs, + features, + loc_matches, + results, + covisibility_clustering=False, + ) # not required with loftr -global_descriptors = extract_features.main(retrieval_conf, images, outputs) -pairs_from_retrieval.main( - global_descriptors, - loc_pairs, - args.num_loc, - query_prefix="query", - db_model=reference_sfm, -) -features, loc_matches = match_dense.main( - matcher_conf, - loc_pairs, - images, - outputs, - features=features, - max_kps=None, - matches=sfm_matches, -) -localize_sfm.main( - reference_sfm, - dataset / "queries/*_time_queries_with_intrinsics.txt", - loc_pairs, - features, - loc_matches, - results, - covisibility_clustering=False, -) # not required with loftr +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset", + type=Path, + default="datasets/aachen_v1.1", + help="Path to the dataset, default: %(default)s", + ) + parser.add_argument( + "--outputs", + type=Path, + default="outputs/aachen_v1.1", + help="Path to the output directory, default: %(default)s", + ) + parser.add_argument( + "--num_covis", + type=int, + default=20, + help="Number of image pairs for SfM, default: %(default)s", + ) + parser.add_argument( + "--num_loc", + type=int, + default=50, + help="Number of image pairs for loc, default: %(default)s", + ) + args = parser.parse_args() diff --git a/hloc/pipelines/CMU/pipeline.py b/hloc/pipelines/CMU/pipeline.py index 788dc7b..4706a05 100644 --- a/hloc/pipelines/CMU/pipeline.py +++ b/hloc/pipelines/CMU/pipeline.py @@ -1,8 +1,15 @@ -from pathlib import Path import argparse +from pathlib import Path -from ... import extract_features, match_features, triangulation, logger -from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm +from ... import ( + extract_features, + localize_sfm, + logger, + match_features, + pairs_from_covisibility, + pairs_from_retrieval, + triangulation, +) TEST_SLICES = [2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18, 19, 20, 21] @@ -46,34 +53,20 @@ def run_slice(slice_, root, outputs, num_covis, num_loc): matcher_conf = match_features.confs["superglue"] pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=num_covis) - features = extract_features.main( - feature_conf, ref_images, outputs, as_half=True - ) + features = extract_features.main(feature_conf, ref_images, outputs, as_half=True) sfm_matches = match_features.main( matcher_conf, sfm_pairs, feature_conf["output"], outputs ) - triangulation.main( - ref_sfm, sift_sfm, ref_images, sfm_pairs, features, sfm_matches - ) + triangulation.main(ref_sfm, sift_sfm, ref_images, sfm_pairs, features, sfm_matches) generate_query_list(root, query_list, slice_) - global_descriptors = extract_features.main( - retrieval_conf, ref_images, outputs - ) - global_descriptors = extract_features.main( - retrieval_conf, query_images, outputs - ) + global_descriptors = extract_features.main(retrieval_conf, ref_images, outputs) + global_descriptors = extract_features.main(retrieval_conf, query_images, outputs) pairs_from_retrieval.main( - global_descriptors, - loc_pairs, - num_loc, - query_list=query_list, - db_model=ref_sfm, + global_descriptors, loc_pairs, num_loc, query_list=query_list, db_model=ref_sfm ) - features = extract_features.main( - feature_conf, query_images, outputs, as_half=True - ) + features = extract_features.main(feature_conf, query_images, outputs, as_half=True) loc_matches = match_features.main( matcher_conf, loc_pairs, feature_conf["output"], outputs ) @@ -136,9 +129,5 @@ def run_slice(slice_, root, outputs, num_covis, num_loc): for slice_ in slices: logger.info("Working on slice %s.", slice_) run_slice( - f"slice{slice_}", - args.dataset, - args.outputs, - args.num_covis, - args.num_loc, + f"slice{slice_}", args.dataset, args.outputs, args.num_covis, args.num_loc ) diff --git a/hloc/pipelines/Cambridge/pipeline.py b/hloc/pipelines/Cambridge/pipeline.py index 7b9e5c5..3a676e5 100644 --- a/hloc/pipelines/Cambridge/pipeline.py +++ b/hloc/pipelines/Cambridge/pipeline.py @@ -1,17 +1,18 @@ -from pathlib import Path import argparse +from pathlib import Path -from .utils import create_query_list_with_intrinsics, scale_sfm_images, evaluate -from ... import extract_features, match_features, pairs_from_covisibility -from ... import triangulation, localize_sfm, pairs_from_retrieval, logger +from ... import ( + extract_features, + localize_sfm, + logger, + match_features, + pairs_from_covisibility, + pairs_from_retrieval, + triangulation, +) +from .utils import create_query_list_with_intrinsics, evaluate, scale_sfm_images -SCENES = [ - "KingsCollege", - "OldHospital", - "ShopFacade", - "StMarysChurch", - "GreatCourt", -] +SCENES = ["KingsCollege", "OldHospital", "ShopFacade", "StMarysChurch", "GreatCourt"] def run_scene(images, gt_dir, outputs, results, num_covis, num_loc): @@ -41,11 +42,7 @@ def run_scene(images, gt_dir, outputs, results, num_covis, num_loc): retrieval_conf = extract_features.confs["netvlad"] create_query_list_with_intrinsics( - gt_dir / "empty_all", - query_list, - test_list, - ext=".txt", - image_dir=images, + gt_dir / "empty_all", query_list, test_list, ext=".txt", image_dir=images ) with open(test_list, "r") as f: query_seqs = {q.split("/")[0] for q in f.read().rstrip().split("\n")} @@ -59,9 +56,7 @@ def run_scene(images, gt_dir, outputs, results, num_covis, num_loc): query_prefix=query_seqs, ) - features = extract_features.main( - feature_conf, images, outputs, as_half=True - ) + features = extract_features.main(feature_conf, images, outputs, as_half=True) pairs_from_covisibility.main(ref_sfm_sift, sfm_pairs, num_matched=num_covis) sfm_matches = match_features.main( matcher_conf, sfm_pairs, feature_conf["output"], outputs diff --git a/hloc/pipelines/Cambridge/utils.py b/hloc/pipelines/Cambridge/utils.py index 90215b4..36460f0 100644 --- a/hloc/pipelines/Cambridge/utils.py +++ b/hloc/pipelines/Cambridge/utils.py @@ -1,15 +1,16 @@ -import cv2 import logging + +import cv2 import numpy as np from hloc.utils.read_write_model import ( + qvec2rotmat, read_cameras_binary, + read_cameras_text, read_images_binary, + read_images_text, read_model, write_model, - qvec2rotmat, - read_images_text, - read_cameras_text, ) logger = logging.getLogger(__name__) @@ -42,9 +43,7 @@ def scale_sfm_images(full_model, scaled_model, image_dir): sy = h / camera.height assert sx == sy, (sx, sy) scaled_cameras[cam_id] = camera._replace( - width=w, - height=h, - params=camera.params * np.array([sx, sx, sy, 1.0]), + width=w, height=h, params=camera.params * np.array([sx, sx, sy, 1.0]) ) write_model(scaled_cameras, images, points3D, scaled_model) diff --git a/hloc/pipelines/RobotCar/colmap_from_nvm.py b/hloc/pipelines/RobotCar/colmap_from_nvm.py index 40d7e8b..e90ed72 100644 --- a/hloc/pipelines/RobotCar/colmap_from_nvm.py +++ b/hloc/pipelines/RobotCar/colmap_from_nvm.py @@ -1,29 +1,31 @@ import argparse +import logging import sqlite3 -from tqdm import tqdm from collections import defaultdict -import numpy as np from pathlib import Path -import logging + +import numpy as np +from tqdm import tqdm from ...colmap_from_nvm import ( - recover_database_images_and_ids, camera_center_to_translation, + recover_database_images_and_ids, +) +from ...utils.read_write_model import ( + CAMERA_MODEL_IDS, + Camera, + Image, + Point3D, + write_model, ) -from ...utils.read_write_model import Camera, Image, Point3D, CAMERA_MODEL_IDS -from ...utils.read_write_model import write_model logger = logging.getLogger(__name__) -def read_nvm_model( - nvm_path, database_path, image_ids, camera_ids, skip_points=False -): +def read_nvm_model(nvm_path, database_path, image_ids, camera_ids, skip_points=False): # Extract the intrinsics from the db file instead of the NVM model db = sqlite3.connect(str(database_path)) - ret = db.execute( - "SELECT camera_id, model, width, height, params FROM cameras;" - ) + ret = db.execute("SELECT camera_id, model, width, height, params FROM cameras;") cameras = {} for camera_id, camera_model, width, height, params in ret: params = np.fromstring(params, dtype=np.double).reshape(-1) diff --git a/hloc/pipelines/RobotCar/pipeline.py b/hloc/pipelines/RobotCar/pipeline.py index 20877cf..7a7ee31 100644 --- a/hloc/pipelines/RobotCar/pipeline.py +++ b/hloc/pipelines/RobotCar/pipeline.py @@ -1,10 +1,16 @@ -from pathlib import Path import argparse +import glob +from pathlib import Path +from ... import ( + extract_features, + localize_sfm, + match_features, + pairs_from_covisibility, + pairs_from_retrieval, + triangulation, +) from . import colmap_from_nvm -from ... import extract_features, match_features, triangulation -from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm - CONDITIONS = [ "dawn", @@ -33,102 +39,105 @@ def generate_query_list(dataset, image_dir, path): params = ["SIMPLE_RADIAL", w, h, fx, cx, cy, 0.0] cameras[side] = [str(p) for p in params] - queries = sorted(image_dir.glob("**/*.jpg")) - queries = [str(q.relative_to(image_dir.parents[0])) for q in queries] + queries = glob.glob((image_dir / "**/*.jpg").as_posix(), recursive=True) + queries = [ + Path(q).relative_to(image_dir.parents[0]).as_posix() for q in sorted(queries) + ] out = [[q] + cameras[Path(q).parent.name] for q in queries] with open(path, "w") as f: f.write("\n".join(map(" ".join, out))) -parser = argparse.ArgumentParser() -parser.add_argument( - "--dataset", - type=Path, - default="datasets/robotcar", - help="Path to the dataset, default: %(default)s", -) -parser.add_argument( - "--outputs", - type=Path, - default="outputs/robotcar", - help="Path to the output directory, default: %(default)s", -) -parser.add_argument( - "--num_covis", - type=int, - default=20, - help="Number of image pairs for SfM, default: %(default)s", -) -parser.add_argument( - "--num_loc", - type=int, - default=20, - help="Number of image pairs for loc, default: %(default)s", -) -args = parser.parse_args() - -# Setup the paths -dataset = args.dataset -images = dataset / "images/" - -outputs = args.outputs # where everything will be saved -outputs.mkdir(exist_ok=True, parents=True) -query_list = outputs / "{condition}_queries_with_intrinsics.txt" -sift_sfm = outputs / "sfm_sift" -reference_sfm = outputs / "sfm_superpoint+superglue" -sfm_pairs = outputs / f"pairs-db-covis{args.num_covis}.txt" -loc_pairs = outputs / f"pairs-query-netvlad{args.num_loc}.txt" -results = ( - outputs / f"RobotCar_hloc_superpoint+superglue_netvlad{args.num_loc}.txt" -) - -# pick one of the configurations for extraction and matching -retrieval_conf = extract_features.confs["netvlad"] -feature_conf = extract_features.confs["superpoint_aachen"] -matcher_conf = match_features.confs["superglue"] - -for condition in CONDITIONS: - generate_query_list( - dataset, images / condition, str(query_list).format(condition=condition) +def run(args): + # Setup the paths + dataset = args.dataset + images = dataset / "images/" + + outputs = args.outputs # where everything will be saved + outputs.mkdir(exist_ok=True, parents=True) + query_list = outputs / "{condition}_queries_with_intrinsics.txt" + sift_sfm = outputs / "sfm_sift" + reference_sfm = outputs / "sfm_superpoint+superglue" + sfm_pairs = outputs / f"pairs-db-covis{args.num_covis}.txt" + loc_pairs = outputs / f"pairs-query-netvlad{args.num_loc}.txt" + results = outputs / f"RobotCar_hloc_superpoint+superglue_netvlad{args.num_loc}.txt" + + # pick one of the configurations for extraction and matching + retrieval_conf = extract_features.confs["netvlad"] + feature_conf = extract_features.confs["superpoint_aachen"] + matcher_conf = match_features.confs["superglue"] + + for condition in CONDITIONS: + generate_query_list( + dataset, images / condition, str(query_list).format(condition=condition) + ) + + features = extract_features.main(feature_conf, images, outputs, as_half=True) + + colmap_from_nvm.main( + dataset / "3D-models/all-merged/all.nvm", + dataset / "3D-models/overcast-reference.db", + sift_sfm, + ) + pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis) + sfm_matches = match_features.main( + matcher_conf, sfm_pairs, feature_conf["output"], outputs ) -features = extract_features.main(feature_conf, images, outputs, as_half=True) + triangulation.main( + reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches + ) -colmap_from_nvm.main( - dataset / "3D-models/all-merged/all.nvm", - dataset / "3D-models/overcast-reference.db", - sift_sfm, -) -pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis) -sfm_matches = match_features.main( - matcher_conf, sfm_pairs, feature_conf["output"], outputs -) + global_descriptors = extract_features.main(retrieval_conf, images, outputs) + # TODO: do per location and per camera + pairs_from_retrieval.main( + global_descriptors, + loc_pairs, + args.num_loc, + query_prefix=CONDITIONS, + db_model=reference_sfm, + ) + loc_matches = match_features.main( + matcher_conf, loc_pairs, feature_conf["output"], outputs + ) -triangulation.main( - reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches -) + localize_sfm.main( + reference_sfm, + Path(str(query_list).format(condition="*")), + loc_pairs, + features, + loc_matches, + results, + covisibility_clustering=False, + prepend_camera_name=True, + ) -global_descriptors = extract_features.main(retrieval_conf, images, outputs) -# TODO: do per location and per camera -pairs_from_retrieval.main( - global_descriptors, - loc_pairs, - args.num_loc, - query_prefix=CONDITIONS, - db_model=reference_sfm, -) -loc_matches = match_features.main( - matcher_conf, loc_pairs, feature_conf["output"], outputs -) -localize_sfm.main( - reference_sfm, - Path(str(query_list).format(condition="*")), - loc_pairs, - features, - loc_matches, - results, - covisibility_clustering=False, - prepend_camera_name=True, -) +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset", + type=Path, + default="datasets/robotcar", + help="Path to the dataset, default: %(default)s", + ) + parser.add_argument( + "--outputs", + type=Path, + default="outputs/robotcar", + help="Path to the output directory, default: %(default)s", + ) + parser.add_argument( + "--num_covis", + type=int, + default=20, + help="Number of image pairs for SfM, default: %(default)s", + ) + parser.add_argument( + "--num_loc", + type=int, + default=20, + help="Number of image pairs for loc, default: %(default)s", + ) + args = parser.parse_args() diff --git a/hloc/reconstruction.py b/hloc/reconstruction.py new file mode 100644 index 0000000..ff4a90a --- /dev/null +++ b/hloc/reconstruction.py @@ -0,0 +1,199 @@ +import argparse +import multiprocessing +import shutil +from pathlib import Path +from typing import Any, Dict, List, Optional + +import pycolmap + +from . import logger +from .triangulation import ( + OutputCapture, + estimation_and_geometric_verification, + import_features, + import_matches, + parse_option_args, +) +from .utils.database import COLMAPDatabase + + +def create_empty_db(database_path: Path): + if database_path.exists(): + logger.warning("The database already exists, deleting it.") + database_path.unlink() + logger.info("Creating an empty database...") + db = COLMAPDatabase.connect(database_path) + db.create_tables() + db.commit() + db.close() + + +def import_images( + image_dir: Path, + database_path: Path, + camera_mode: pycolmap.CameraMode, + image_list: Optional[List[str]] = None, + options: Optional[Dict[str, Any]] = None, +): + logger.info("Importing images into the database...") + if options is None: + options = {} + images = list(image_dir.iterdir()) + if len(images) == 0: + raise IOError(f"No images found in {image_dir}.") + with pycolmap.ostream(): + pycolmap.import_images( + database_path, + image_dir, + camera_mode, + image_list=image_list or [], + options=options, + ) + + +def get_image_ids(database_path: Path) -> Dict[str, int]: + db = COLMAPDatabase.connect(database_path) + images = {} + for name, image_id in db.execute("SELECT name, image_id FROM images;"): + images[name] = image_id + db.close() + return images + + +def run_reconstruction( + sfm_dir: Path, + database_path: Path, + image_dir: Path, + verbose: bool = False, + options: Optional[Dict[str, Any]] = None, +) -> pycolmap.Reconstruction: + models_path = sfm_dir / "models" + models_path.mkdir(exist_ok=True, parents=True) + logger.info("Running 3D reconstruction...") + if options is None: + options = {} + options = {"num_threads": min(multiprocessing.cpu_count(), 16), **options} + with OutputCapture(verbose): + with pycolmap.ostream(): + reconstructions = pycolmap.incremental_mapping( + database_path, image_dir, models_path, options=options + ) + + if len(reconstructions) == 0: + logger.error("Could not reconstruct any model!") + return None + logger.info(f"Reconstructed {len(reconstructions)} model(s).") + + largest_index = None + largest_num_images = 0 + for index, rec in reconstructions.items(): + num_images = rec.num_reg_images() + if num_images > largest_num_images: + largest_index = index + largest_num_images = num_images + assert largest_index is not None + logger.info( + f"Largest model is #{largest_index} " + f"with {largest_num_images} images." + ) + + for filename in ["images.bin", "cameras.bin", "points3D.bin"]: + if (sfm_dir / filename).exists(): + (sfm_dir / filename).unlink() + shutil.move( + str(models_path / str(largest_index) / filename), str(sfm_dir) + ) + return reconstructions[largest_index] + + +def main( + sfm_dir: Path, + image_dir: Path, + pairs: Path, + features: Path, + matches: Path, + camera_mode: pycolmap.CameraMode = pycolmap.CameraMode.AUTO, + verbose: bool = False, + skip_geometric_verification: bool = False, + min_match_score: Optional[float] = None, + image_list: Optional[List[str]] = None, + image_options: Optional[Dict[str, Any]] = None, + mapper_options: Optional[Dict[str, Any]] = None, +) -> pycolmap.Reconstruction: + assert features.exists(), features + assert pairs.exists(), pairs + assert matches.exists(), matches + + sfm_dir.mkdir(parents=True, exist_ok=True) + database = sfm_dir / "database.db" + + create_empty_db(database) + import_images(image_dir, database, camera_mode, image_list, image_options) + image_ids = get_image_ids(database) + import_features(image_ids, database, features) + import_matches( + image_ids, + database, + pairs, + matches, + min_match_score, + skip_geometric_verification, + ) + if not skip_geometric_verification: + estimation_and_geometric_verification(database, pairs, verbose) + reconstruction = run_reconstruction( + sfm_dir, database, image_dir, verbose, mapper_options + ) + if reconstruction is not None: + logger.info( + f"Reconstruction statistics:\n{reconstruction.summary()}" + + f"\n\tnum_input_images = {len(image_ids)}" + ) + return reconstruction + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sfm_dir", type=Path, required=True) + parser.add_argument("--image_dir", type=Path, required=True) + + parser.add_argument("--pairs", type=Path, required=True) + parser.add_argument("--features", type=Path, required=True) + parser.add_argument("--matches", type=Path, required=True) + + parser.add_argument( + "--camera_mode", + type=str, + default="AUTO", + choices=list(pycolmap.CameraMode.__members__.keys()), + ) + parser.add_argument("--skip_geometric_verification", action="store_true") + parser.add_argument("--min_match_score", type=float) + parser.add_argument("--verbose", action="store_true") + + parser.add_argument( + "--image_options", + nargs="+", + default=[], + help="List of key=value from {}".format( + pycolmap.ImageReaderOptions().todict() + ), + ) + parser.add_argument( + "--mapper_options", + nargs="+", + default=[], + help="List of key=value from {}".format( + pycolmap.IncrementalMapperOptions().todict() + ), + ) + args = parser.parse_args().__dict__ + + image_options = parse_option_args( + args.pop("image_options"), pycolmap.ImageReaderOptions() + ) + mapper_options = parse_option_args( + args.pop("mapper_options"), pycolmap.IncrementalMapperOptions() + ) + + main(**args, image_options=image_options, mapper_options=mapper_options) diff --git a/hloc/triangulation.py b/hloc/triangulation.py new file mode 100644 index 0000000..385fed9 --- /dev/null +++ b/hloc/triangulation.py @@ -0,0 +1,320 @@ +import argparse +import contextlib +import io +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional + +import numpy as np +import pycolmap +from tqdm import tqdm + +from . import logger +from .utils.database import COLMAPDatabase +from .utils.geometry import compute_epipolar_errors +from .utils.io import get_keypoints, get_matches +from .utils.parsers import parse_retrieval + + +class OutputCapture: + def __init__(self, verbose: bool): + self.verbose = verbose + + def __enter__(self): + if not self.verbose: + self.capture = contextlib.redirect_stdout(io.StringIO()) + self.out = self.capture.__enter__() + + def __exit__(self, exc_type, *args): + if not self.verbose: + self.capture.__exit__(exc_type, *args) + if exc_type is not None: + logger.error("Failed with output:\n%s", self.out.getvalue()) + sys.stdout.flush() + + +def create_db_from_model( + reconstruction: pycolmap.Reconstruction, database_path: Path +) -> Dict[str, int]: + if database_path.exists(): + logger.warning("The database already exists, deleting it.") + database_path.unlink() + + db = COLMAPDatabase.connect(database_path) + db.create_tables() + + for i, camera in reconstruction.cameras.items(): + db.add_camera( + camera.model.value, + camera.width, + camera.height, + camera.params, + camera_id=i, + prior_focal_length=True, + ) + + for i, image in reconstruction.images.items(): + db.add_image(image.name, image.camera_id, image_id=i) + + db.commit() + db.close() + return {image.name: i for i, image in reconstruction.images.items()} + + +def import_features( + image_ids: Dict[str, int], database_path: Path, features_path: Path +): + logger.info("Importing features into the database...") + db = COLMAPDatabase.connect(database_path) + + for image_name, image_id in tqdm(image_ids.items()): + keypoints = get_keypoints(features_path, image_name) + keypoints += 0.5 # COLMAP origin + db.add_keypoints(image_id, keypoints) + + db.commit() + db.close() + + +def import_matches( + image_ids: Dict[str, int], + database_path: Path, + pairs_path: Path, + matches_path: Path, + min_match_score: Optional[float] = None, + skip_geometric_verification: bool = False, +): + logger.info("Importing matches into the database...") + + with open(str(pairs_path), "r") as f: + pairs = [p.split() for p in f.readlines()] + + db = COLMAPDatabase.connect(database_path) + + matched = set() + for name0, name1 in tqdm(pairs): + id0, id1 = image_ids[name0], image_ids[name1] + if len({(id0, id1), (id1, id0)} & matched) > 0: + continue + matches, scores = get_matches(matches_path, name0, name1) + if min_match_score: + matches = matches[scores > min_match_score] + db.add_matches(id0, id1, matches) + matched |= {(id0, id1), (id1, id0)} + + if skip_geometric_verification: + db.add_two_view_geometry(id0, id1, matches) + + db.commit() + db.close() + + +def estimation_and_geometric_verification( + database_path: Path, pairs_path: Path, verbose: bool = False +): + logger.info("Performing geometric verification of the matches...") + with OutputCapture(verbose): + with pycolmap.ostream(): + pycolmap.verify_matches( + database_path, + pairs_path, + options=dict( + ransac=dict(max_num_trials=20000, min_inlier_ratio=0.1) + ), + ) + + +def geometric_verification( + image_ids: Dict[str, int], + reference: pycolmap.Reconstruction, + database_path: Path, + features_path: Path, + pairs_path: Path, + matches_path: Path, + max_error: float = 4.0, +): + logger.info("Performing geometric verification of the matches...") + + pairs = parse_retrieval(pairs_path) + db = COLMAPDatabase.connect(database_path) + + inlier_ratios = [] + matched = set() + for name0 in tqdm(pairs): + id0 = image_ids[name0] + image0 = reference.images[id0] + cam0 = reference.cameras[image0.camera_id] + kps0, noise0 = get_keypoints( + features_path, name0, return_uncertainty=True + ) + noise0 = 1.0 if noise0 is None else noise0 + if len(kps0) > 0: + kps0 = np.stack(cam0.cam_from_img(kps0)) + else: + kps0 = np.zeros((0, 2)) + + for name1 in pairs[name0]: + id1 = image_ids[name1] + image1 = reference.images[id1] + cam1 = reference.cameras[image1.camera_id] + kps1, noise1 = get_keypoints( + features_path, name1, return_uncertainty=True + ) + noise1 = 1.0 if noise1 is None else noise1 + if len(kps1) > 0: + kps1 = np.stack(cam1.cam_from_img(kps1)) + else: + kps1 = np.zeros((0, 2)) + + matches = get_matches(matches_path, name0, name1)[0] + + if len({(id0, id1), (id1, id0)} & matched) > 0: + continue + matched |= {(id0, id1), (id1, id0)} + + if matches.shape[0] == 0: + db.add_two_view_geometry(id0, id1, matches) + continue + + cam1_from_cam0 = ( + image1.cam_from_world * image0.cam_from_world.inverse() + ) + errors0, errors1 = compute_epipolar_errors( + cam1_from_cam0, kps0[matches[:, 0]], kps1[matches[:, 1]] + ) + valid_matches = np.logical_and( + errors0 <= cam0.cam_from_img_threshold(noise0 * max_error), + errors1 <= cam1.cam_from_img_threshold(noise1 * max_error), + ) + # TODO: We could also add E to the database, but we need + # to reverse the transformations if id0 > id1 in utils/database.py. + db.add_two_view_geometry(id0, id1, matches[valid_matches, :]) + inlier_ratios.append(np.mean(valid_matches)) + logger.info( + "mean/med/min/max valid matches %.2f/%.2f/%.2f/%.2f%%.", + np.mean(inlier_ratios) * 100, + np.median(inlier_ratios) * 100, + np.min(inlier_ratios) * 100, + np.max(inlier_ratios) * 100, + ) + + db.commit() + db.close() + + +def run_triangulation( + model_path: Path, + database_path: Path, + image_dir: Path, + reference_model: pycolmap.Reconstruction, + verbose: bool = False, + options: Optional[Dict[str, Any]] = None, +) -> pycolmap.Reconstruction: + model_path.mkdir(parents=True, exist_ok=True) + logger.info("Running 3D triangulation...") + if options is None: + options = {} + with OutputCapture(verbose): + with pycolmap.ostream(): + reconstruction = pycolmap.triangulate_points( + reference_model, + database_path, + image_dir, + model_path, + options=options, + ) + return reconstruction + + +def main( + sfm_dir: Path, + reference_model: Path, + image_dir: Path, + pairs: Path, + features: Path, + matches: Path, + skip_geometric_verification: bool = False, + estimate_two_view_geometries: bool = False, + min_match_score: Optional[float] = None, + verbose: bool = False, + mapper_options: Optional[Dict[str, Any]] = None, +) -> pycolmap.Reconstruction: + assert reference_model.exists(), reference_model + assert features.exists(), features + assert pairs.exists(), pairs + assert matches.exists(), matches + + sfm_dir.mkdir(parents=True, exist_ok=True) + database = sfm_dir / "database.db" + reference = pycolmap.Reconstruction(reference_model) + + image_ids = create_db_from_model(reference, database) + import_features(image_ids, database, features) + import_matches( + image_ids, + database, + pairs, + matches, + min_match_score, + skip_geometric_verification, + ) + if not skip_geometric_verification: + if estimate_two_view_geometries: + estimation_and_geometric_verification(database, pairs, verbose) + else: + geometric_verification( + image_ids, reference, database, features, pairs, matches + ) + reconstruction = run_triangulation( + sfm_dir, database, image_dir, reference, verbose, mapper_options + ) + logger.info( + "Finished the triangulation with statistics:\n%s", + reconstruction.summary(), + ) + return reconstruction + + +def parse_option_args(args: List[str], default_options) -> Dict[str, Any]: + options = {} + for arg in args: + idx = arg.find("=") + if idx == -1: + raise ValueError("Options format: key1=value1 key2=value2 etc.") + key, value = arg[:idx], arg[idx + 1 :] + if not hasattr(default_options, key): + raise ValueError( + f'Unknown option "{key}", allowed options and default values' + f" for {default_options.summary()}" + ) + value = eval(value) + target_type = type(getattr(default_options, key)) + if not isinstance(value, target_type): + raise ValueError( + f'Incorrect type for option "{key}":' + f" {type(value)} vs {target_type}" + ) + options[key] = value + return options + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sfm_dir", type=Path, required=True) + parser.add_argument("--reference_sfm_model", type=Path, required=True) + parser.add_argument("--image_dir", type=Path, required=True) + + parser.add_argument("--pairs", type=Path, required=True) + parser.add_argument("--features", type=Path, required=True) + parser.add_argument("--matches", type=Path, required=True) + + parser.add_argument("--skip_geometric_verification", action="store_true") + parser.add_argument("--min_match_score", type=float) + parser.add_argument("--verbose", action="store_true") + args = parser.parse_args().__dict__ + + mapper_options = parse_option_args( + args.pop("mapper_options"), pycolmap.IncrementalMapperOptions() + ) + + main(**args, mapper_options=mapper_options) diff --git a/hloc/utils/database.py b/hloc/utils/database.py index 050f5ec..683c250 100644 --- a/hloc/utils/database.py +++ b/hloc/utils/database.py @@ -31,10 +31,10 @@ # This script is based on an original implementation by True Price. -import sys import sqlite3 -import numpy as np +import sys +import numpy as np IS_PYTHON3 = sys.version_info[0] >= 3 @@ -100,9 +100,7 @@ cols INTEGER NOT NULL, data BLOB)""" -CREATE_NAME_INDEX = ( - "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)" -) +CREATE_NAME_INDEX = "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)" CREATE_ALL = "; ".join( [ @@ -152,34 +150,20 @@ def __init__(self, *args, **kwargs): super(COLMAPDatabase, self).__init__(*args, **kwargs) self.create_tables = lambda: self.executescript(CREATE_ALL) - self.create_cameras_table = lambda: self.executescript( - CREATE_CAMERAS_TABLE - ) + self.create_cameras_table = lambda: self.executescript(CREATE_CAMERAS_TABLE) self.create_descriptors_table = lambda: self.executescript( CREATE_DESCRIPTORS_TABLE ) - self.create_images_table = lambda: self.executescript( - CREATE_IMAGES_TABLE - ) + self.create_images_table = lambda: self.executescript(CREATE_IMAGES_TABLE) self.create_two_view_geometries_table = lambda: self.executescript( CREATE_TWO_VIEW_GEOMETRIES_TABLE ) - self.create_keypoints_table = lambda: self.executescript( - CREATE_KEYPOINTS_TABLE - ) - self.create_matches_table = lambda: self.executescript( - CREATE_MATCHES_TABLE - ) + self.create_keypoints_table = lambda: self.executescript(CREATE_KEYPOINTS_TABLE) + self.create_matches_table = lambda: self.executescript(CREATE_MATCHES_TABLE) self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX) def add_camera( - self, - model, - width, - height, - params, - prior_focal_length=False, - camera_id=None, + self, model, width, height, params, prior_focal_length=False, camera_id=None ): params = np.asarray(params, np.float64) cursor = self.execute( diff --git a/hloc/utils/geometry.py b/hloc/utils/geometry.py index 1f33e2e..5995ccc 100644 --- a/hloc/utils/geometry.py +++ b/hloc/utils/geometry.py @@ -6,28 +6,11 @@ def to_homogeneous(p): return np.pad(p, ((0, 0),) * (p.ndim - 1) + ((0, 1),), constant_values=1) -def vector_to_cross_product_matrix(v): - return np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]]) - - -def compute_epipolar_errors(qvec_r2t, tvec_r2t, p2d_r, p2d_t): - T_r2t = pose_matrix_from_qvec_tvec(qvec_r2t, tvec_r2t) - # Compute errors in normalized plane to avoid distortion. - E = vector_to_cross_product_matrix(T_r2t[:3, -1]) @ T_r2t[:3, :3] - l2d_r2t = (E @ to_homogeneous(p2d_r).T).T - l2d_t2r = (E.T @ to_homogeneous(p2d_t).T).T - errors_r = np.abs( - np.sum(to_homogeneous(p2d_r) * l2d_t2r, axis=1) - ) / np.linalg.norm(l2d_t2r[:, :2], axis=1) - errors_t = np.abs( - np.sum(to_homogeneous(p2d_t) * l2d_r2t, axis=1) - ) / np.linalg.norm(l2d_r2t[:, :2], axis=1) - return E, errors_r, errors_t - - -def pose_matrix_from_qvec_tvec(qvec, tvec): - pose = np.zeros((4, 4)) - pose[:3, :3] = pycolmap.qvec_to_rotmat(qvec) - pose[:3, -1] = tvec - pose[-1, -1] = 1 - return pose +def compute_epipolar_errors(j_from_i: pycolmap.Rigid3d, p2d_i, p2d_j): + j_E_i = j_from_i.essential_matrix() + l2d_j = to_homogeneous(p2d_i) @ j_E_i.T + l2d_i = to_homogeneous(p2d_j) @ j_E_i + dist = np.abs(np.sum(to_homogeneous(p2d_i) * l2d_i, axis=1)) + errors_i = dist / np.linalg.norm(l2d_i[:, :2], axis=1) + errors_j = dist / np.linalg.norm(l2d_j[:, :2], axis=1) + return errors_i, errors_j diff --git a/hloc/utils/parsers.py b/hloc/utils/parsers.py index faaa8f2..9407dcf 100644 --- a/hloc/utils/parsers.py +++ b/hloc/utils/parsers.py @@ -1,7 +1,8 @@ -from pathlib import Path import logging -import numpy as np from collections import defaultdict +from pathlib import Path + +import numpy as np import pycolmap logger = logging.getLogger(__name__) @@ -18,7 +19,9 @@ def parse_image_list(path, with_intrinsics=False): if with_intrinsics: model, width, height, *params = data params = np.array(params, float) - cam = pycolmap.Camera(model, int(width), int(height), params) + cam = pycolmap.Camera( + model=model, width=int(width), height=int(height), params=params + ) images.append((name, cam)) else: images.append(name) diff --git a/hloc/utils/read_write_model.py b/hloc/utils/read_write_model.py index 65bed51..197921d 100644 --- a/hloc/utils/read_write_model.py +++ b/hloc/utils/read_write_model.py @@ -29,12 +29,13 @@ # # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de) -import os -import collections -import numpy as np -import struct import argparse +import collections import logging +import os +import struct + +import numpy as np logger = logging.getLogger(__name__) @@ -42,9 +43,7 @@ CameraModel = collections.namedtuple( "CameraModel", ["model_id", "model_name", "num_params"] ) -Camera = collections.namedtuple( - "Camera", ["id", "model", "width", "height", "params"] -) +Camera = collections.namedtuple("Camera", ["id", "model", "width", "height", "params"]) BaseImage = collections.namedtuple( "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"] ) @@ -128,11 +127,7 @@ def read_cameras_text(path): height = int(elems[3]) params = np.array(tuple(map(float, elems[4:]))) cameras[camera_id] = Camera( - id=camera_id, - model=model, - width=width, - height=height, - params=params, + id=camera_id, model=model, width=width, height=height, params=params ) return cameras @@ -157,9 +152,7 @@ def read_cameras_binary(path_to_model_file): height = camera_properties[3] num_params = CAMERA_MODEL_IDS[model_id].num_params params = read_next_bytes( - fid, - num_bytes=8 * num_params, - format_char_sequence="d" * num_params, + fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params ) cameras[camera_id] = Camera( id=camera_id, @@ -230,10 +223,7 @@ def read_images_text(path): image_name = elems[9] elems = fid.readline().split() xys = np.column_stack( - [ - tuple(map(float, elems[0::3])), - tuple(map(float, elems[1::3])), - ] + [tuple(map(float, elems[0::3])), tuple(map(float, elems[1::3]))] ) point3D_ids = np.array(tuple(map(int, elems[2::3]))) images[image_id] = Image( @@ -270,19 +260,16 @@ def read_images_binary(path_to_model_file): while current_char != b"\x00": # look for the ASCII 0 entry image_name += current_char.decode("utf-8") current_char = read_next_bytes(fid, 1, "c")[0] - num_points2D = read_next_bytes( - fid, num_bytes=8, format_char_sequence="Q" - )[0] + num_points2D = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[ + 0 + ] x_y_id_s = read_next_bytes( fid, num_bytes=24 * num_points2D, format_char_sequence="ddq" * num_points2D, ) xys = np.column_stack( - [ - tuple(map(float, x_y_id_s[0::3])), - tuple(map(float, x_y_id_s[1::3])), - ] + [tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))] ) point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3]))) images[image_id] = Image( @@ -321,13 +308,7 @@ def write_images_text(images, path): with open(path, "w") as fid: fid.write(HEADER) for _, img in images.items(): - image_header = [ - img.id, - *img.qvec, - *img.tvec, - img.camera_id, - img.name, - ] + image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name] first_line = " ".join(map(str, image_header)) fid.write(first_line + "\n") @@ -407,9 +388,9 @@ def read_points3D_binary(path_to_model_file): xyz = np.array(binary_point_line_properties[1:4]) rgb = np.array(binary_point_line_properties[4:7]) error = np.array(binary_point_line_properties[7]) - track_length = read_next_bytes( - fid, num_bytes=8, format_char_sequence="Q" - )[0] + track_length = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[ + 0 + ] track_elems = read_next_bytes( fid, num_bytes=8 * track_length, @@ -442,7 +423,7 @@ def write_points3D_text(points3D, path): ) / len(points3D) HEADER = ( "# 3D point list with one line of data per point:\n" - + "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + + "# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" # noqa: E501 + "# Number of points: {}, mean track length: {}\n".format( len(points3D), mean_track_length ) @@ -498,12 +479,8 @@ def read_model(path, ext=""): ext = ".txt" else: try: - cameras, images, points3D = read_model( - os.path.join(path, "model/") - ) - logger.warning( - "This SfM file structure was deprecated in hloc v1.1" - ) + cameras, images, points3D = read_model(os.path.join(path, "model/")) + logger.warning("This SfM file structure was deprecated in hloc v1.1") return cameras, images, points3D except FileNotFoundError: raise FileNotFoundError( @@ -595,9 +572,7 @@ def main(): ) args = parser.parse_args() - cameras, images, points3D = read_model( - path=args.input_model, ext=args.input_format - ) + cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format) print("num_cameras:", len(cameras)) print("num_images:", len(images)) @@ -605,11 +580,7 @@ def main(): if args.output_model is not None: write_model( - cameras, - images, - points3D, - path=args.output_model, - ext=args.output_format, + cameras, images, points3D, path=args.output_model, ext=args.output_format ) diff --git a/hloc/utils/viz.py b/hloc/utils/viz.py index 21d3807..360466b 100644 --- a/hloc/utils/viz.py +++ b/hloc/utils/viz.py @@ -20,7 +20,7 @@ def cm_RdGn(x): def plot_images( - imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True + imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True, figsize=4.5 ): """Plot a set of images horizontally. Args: @@ -37,23 +37,19 @@ def plot_images( ratios = [i.shape[1] / i.shape[0] for i in imgs] # W / H else: ratios = [4 / 3] * n - figsize = [sum(ratios) * 4.5, 4.5] - fig, ax = plt.subplots( + figsize = [sum(ratios) * figsize, figsize] + fig, axs = plt.subplots( 1, n, figsize=figsize, dpi=dpi, gridspec_kw={"width_ratios": ratios} ) if n == 1: - ax = [ax] - for i in range(n): - ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i])) - ax[i].get_yaxis().set_ticks([]) - ax[i].get_xaxis().set_ticks([]) - ax[i].set_axis_off() - for spine in ax[i].spines.values(): # remove frame - spine.set_visible(False) + axs = [axs] + for i, (img, ax) in enumerate(zip(imgs, axs)): + ax.imshow(img, cmap=plt.get_cmap(cmaps[i])) + ax.set_axis_off() if titles: - ax[i].set_title(titles[i]) + ax.set_title(titles[i]) fig.tight_layout(pad=pad) - + return fig def plot_keypoints(kpts, colors="lime", ps=4): """Plot keypoints for existing images. @@ -96,21 +92,19 @@ def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.0): if lw > 0: # transform the points into the figure coordinate system - transFigure = fig.transFigure.inverted() - fkpts0 = transFigure.transform(ax0.transData.transform(kpts0)) - fkpts1 = transFigure.transform(ax1.transData.transform(kpts1)) - fig.lines += [ - matplotlib.lines.Line2D( - (fkpts0[i, 0], fkpts1[i, 0]), - (fkpts0[i, 1], fkpts1[i, 1]), - zorder=1, - transform=fig.transFigure, - c=color[i], - linewidth=lw, - alpha=a, + for i in range(len(kpts0)): + fig.add_artist( + matplotlib.patches.ConnectionPatch( + xyA=(kpts0[i, 0], kpts0[i, 1]), + coordsA=ax0.transData, + xyB=(kpts1[i, 0], kpts1[i, 1]), + coordsB=ax1.transData, + zorder=1, + color=color[i], + linewidth=lw, + alpha=a, + ) ) - for i in range(len(kpts0)) - ] # freeze the axes to prevent the transform to change ax0.autoscale(enable=False) @@ -134,13 +128,7 @@ def add_text( ): ax = plt.gcf().axes[idx] t = ax.text( - *pos, - text, - fontsize=fs, - ha=ha, - va=va, - color=color, - transform=ax.transAxes + *pos, text, fontsize=fs, ha=ha, va=va, color=color, transform=ax.transAxes ) if lcolor is not None: t.set_path_effects( diff --git a/hloc/utils/viz_3d.py b/hloc/utils/viz_3d.py index fbd6505..e608f78 100644 --- a/hloc/utils/viz_3d.py +++ b/hloc/utils/viz_3d.py @@ -9,9 +9,10 @@ """ from typing import Optional + import numpy as np -import pycolmap import plotly.graph_objects as go +import pycolmap def to_homogeneous(points): @@ -46,9 +47,7 @@ def init_figure(height: int = 800) -> go.Figure: dragmode="orbit", ), margin=dict(l=0, r=0, b=0, t=0, pad=0), - legend=dict( - orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.1 - ), + legend=dict(orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.1), ) return fig @@ -70,9 +69,7 @@ def plot_points( mode="markers", name=name, legendgroup=name, - marker=dict( - size=ps, color=color, line_width=0.0, colorscale=colorscale - ), + marker=dict(size=ps, color=color, line_width=0.0, colorscale=colorscale), ) fig.add_trace(tr) @@ -85,7 +82,9 @@ def plot_camera( color: str = "rgb(0, 0, 255)", name: Optional[str] = None, legendgroup: Optional[str] = None, + fill: bool = False, size: float = 1.0, + text: Optional[str] = None, ): """Plot a camera frustum from pose and intrinsic matrix.""" W, H = K[0, 2] * 2, K[1, 2] * 2 @@ -98,43 +97,34 @@ def plot_camera( scale = 1.0 corners = to_homogeneous(corners) @ np.linalg.inv(K).T corners = (corners / 2 * scale) @ R.T + t - - x, y, z = corners.T - rect = go.Scatter3d( - x=x, - y=y, - z=z, - line=dict(color=color), - legendgroup=legendgroup, - name=name, - marker=dict(size=0.0001), - showlegend=False, - ) - fig.add_trace(rect) + legendgroup = legendgroup if legendgroup is not None else name x, y, z = np.concatenate(([t], corners)).T i = [0, 0, 0, 0] j = [1, 2, 3, 4] k = [2, 3, 4, 1] - pyramid = go.Mesh3d( - x=x, - y=y, - z=z, - color=color, - i=i, - j=j, - k=k, - legendgroup=legendgroup, - name=name, - showlegend=False, - ) - fig.add_trace(pyramid) + if fill: + pyramid = go.Mesh3d( + x=x, + y=y, + z=z, + color=color, + i=i, + j=j, + k=k, + legendgroup=legendgroup, + name=name, + showlegend=False, + hovertemplate=text.replace("\n", "
"), + ) + fig.add_trace(pyramid) + triangles = np.vstack((i, j, k)).T vertices = np.concatenate(([t], corners)) tri_points = np.array([vertices[i] for i in triangles.reshape(-1)]) - x, y, z = tri_points.T + pyramid = go.Scatter3d( x=x, y=y, @@ -144,6 +134,7 @@ def plot_camera( name=name, line=dict(color=color, width=1), showlegend=False, + hovertemplate=text.replace("\n", "
"), ) fig.add_trace(pyramid) @@ -156,19 +147,19 @@ def plot_camera_colmap( **kwargs ): """Plot a camera frustum from PyCOLMAP objects""" + world_t_camera = image.cam_from_world.inverse() plot_camera( fig, - image.rotmat().T, - image.projection_center(), + world_t_camera.rotation.matrix(), + world_t_camera.translation, camera.calibration_matrix(), name=name or str(image.image_id), + text=str(image), **kwargs ) -def plot_cameras( - fig: go.Figure, reconstruction: pycolmap.Reconstruction, **kwargs -): +def plot_cameras(fig: go.Figure, reconstruction: pycolmap.Reconstruction, **kwargs): """Plot a camera as a cone with camera frustum.""" for image_id, image in reconstruction.images.items(): plot_camera_colmap( @@ -185,13 +176,14 @@ def plot_reconstruction( min_track_length: int = 2, points: bool = True, cameras: bool = True, + points_rgb: bool = True, cs: float = 1.0, ): # Filter outliers bbs = rec.compute_bounding_box(0.001, 0.999) # Filter points, use original reproj error here - xyzs = [ - p3D.xyz + p3Ds = [ + p3D for _, p3D in rec.points3D.items() if ( (p3D.xyz >= bbs[0]).all() @@ -200,7 +192,12 @@ def plot_reconstruction( and p3D.track.length() >= min_track_length ) ] + xyzs = [p3D.xyz for p3D in p3Ds] + if points_rgb: + pcolor = [p3D.color for p3D in p3Ds] + else: + pcolor = color if points: - plot_points(fig, np.array(xyzs), color=color, ps=1, name=name) + plot_points(fig, np.array(xyzs), color=pcolor, ps=1, name=name) if cameras: plot_cameras(fig, rec, color=color, legendgroup=name, size=cs) diff --git a/hloc/visualization.py b/hloc/visualization.py new file mode 100644 index 0000000..77369ef --- /dev/null +++ b/hloc/visualization.py @@ -0,0 +1,182 @@ +import pickle +import random + +import numpy as np +import pycolmap +from matplotlib import cm + +from .utils.io import read_image +from .utils.viz import ( + add_text, + cm_RdGn, + plot_images, + plot_keypoints, + plot_matches, +) + + +def visualize_sfm_2d( + reconstruction, + image_dir, + color_by="visibility", + selected=[], + n=1, + seed=0, + dpi=75, +): + assert image_dir.exists() + if not isinstance(reconstruction, pycolmap.Reconstruction): + reconstruction = pycolmap.Reconstruction(reconstruction) + + if not selected: + image_ids = reconstruction.reg_image_ids() + selected = random.Random(seed).sample(image_ids, min(n, len(image_ids))) + + for i in selected: + image = reconstruction.images[i] + keypoints = np.array([p.xy for p in image.points2D]) + visible = np.array([p.has_point3D() for p in image.points2D]) + + if color_by == "visibility": + color = [(0, 0, 1) if v else (1, 0, 0) for v in visible] + text = f"visible: {np.count_nonzero(visible)}/{len(visible)}" + elif color_by == "track_length": + tl = np.array( + [ + ( + reconstruction.points3D[p.point3D_id].track.length() + if p.has_point3D() + else 1 + ) + for p in image.points2D + ] + ) + max_, med_ = np.max(tl), np.median(tl[tl > 1]) + tl = np.log(tl) + color = cm.jet(tl / tl.max()).tolist() + text = f"max/median track length: {max_}/{med_}" + elif color_by == "depth": + p3ids = [p.point3D_id for p in image.points2D if p.has_point3D()] + z = np.array( + [ + (image.cam_from_world * reconstruction.points3D[j].xyz)[-1] + for j in p3ids + ] + ) + z -= z.min() + color = cm.jet(z / np.percentile(z, 99.9)) + text = f"visible: {np.count_nonzero(visible)}/{len(visible)}" + keypoints = keypoints[visible] + else: + raise NotImplementedError(f"Coloring not implemented: {color_by}.") + + name = image.name + fig = plot_images([read_image(image_dir / name)], dpi=dpi) + plot_keypoints([keypoints], colors=[color], ps=4) + add_text(0, text) + add_text(0, name, pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom") + return fig + + +def visualize_loc( + results, + image_dir, + reconstruction=None, + db_image_dir=None, + selected=[], + n=1, + seed=0, + prefix=None, + **kwargs, +): + assert image_dir.exists() + + with open(str(results) + "_logs.pkl", "rb") as f: + logs = pickle.load(f) + + if not selected: + queries = list(logs["loc"].keys()) + if prefix: + queries = [q for q in queries if q.startswith(prefix)] + selected = random.Random(seed).sample(queries, min(n, len(queries))) + + if reconstruction is not None: + if not isinstance(reconstruction, pycolmap.Reconstruction): + reconstruction = pycolmap.Reconstruction(reconstruction) + + for qname in selected: + loc = logs["loc"][qname] + visualize_loc_from_log( + image_dir, qname, loc, reconstruction, db_image_dir, **kwargs + ) + + +def visualize_loc_from_log( + image_dir, + query_name, + loc, + reconstruction=None, + db_image_dir=None, + top_k_db=2, + dpi=75, +): + q_image = read_image(image_dir / query_name) + if loc.get("covisibility_clustering", False): + # select the first, largest cluster if the localization failed + loc = loc["log_clusters"][loc["best_cluster"] or 0] + + inliers = np.array(loc["PnP_ret"]["inliers"]) + mkp_q = loc["keypoints_query"] + n = len(loc["db"]) + if reconstruction is not None: + # for each pair of query keypoint and its matched 3D point, + # we need to find its corresponding keypoint in each database image + # that observes it. We also count the number of inliers in each. + kp_idxs, kp_to_3D_to_db = loc["keypoint_index_to_db"] + counts = np.zeros(n) + dbs_kp_q_db = [[] for _ in range(n)] + inliers_dbs = [[] for _ in range(n)] + for i, (inl, (p3D_id, db_idxs)) in enumerate( + zip(inliers, kp_to_3D_to_db) + ): + track = reconstruction.points3D[p3D_id].track + track = {el.image_id: el.point2D_idx for el in track.elements} + for db_idx in db_idxs: + counts[db_idx] += inl + kp_db = track[loc["db"][db_idx]] + dbs_kp_q_db[db_idx].append((i, kp_db)) + inliers_dbs[db_idx].append(inl) + else: + # for inloc the database keypoints are already in the logs + assert "keypoints_db" in loc + assert "indices_db" in loc + counts = np.array( + [np.sum(loc["indices_db"][inliers] == i) for i in range(n)] + ) + + # display the database images with the most inlier matches + db_sort = np.argsort(-counts) + for db_idx in db_sort[:top_k_db]: + if reconstruction is not None: + db = reconstruction.images[loc["db"][db_idx]] + db_name = db.name + db_kp_q_db = np.array(dbs_kp_q_db[db_idx]) + kp_q = mkp_q[db_kp_q_db[:, 0]] + kp_db = np.array([db.points2D[i].xy for i in db_kp_q_db[:, 1]]) + inliers_db = inliers_dbs[db_idx] + else: + db_name = loc["db"][db_idx] + kp_q = mkp_q[loc["indices_db"] == db_idx] + kp_db = loc["keypoints_db"][loc["indices_db"] == db_idx] + inliers_db = inliers[loc["indices_db"] == db_idx] + + db_image = read_image((db_image_dir or image_dir) / db_name) + color = cm_RdGn(inliers_db).tolist() + text = f"inliers: {sum(inliers_db)}/{len(inliers_db)}" + + plot_images([q_image, db_image], dpi=dpi) + plot_matches(kp_q, kp_db, color, a=0.1) + add_text(0, text) + opts = dict(pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom") + add_text(0, query_name, **opts) + add_text(1, db_name, **opts) diff --git a/requirements.txt b/requirements.txt index ad5e055..5bc1a74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ opencv-python==4.6.0.66 pandas==2.0.3 plotly==5.15.0 protobuf==4.23.2 -pycolmap==0.5.0 +pycolmap==0.6.0 pytlsd==0.0.2 pytorch-lightning==1.4.9 PyYAML==6.0 @@ -34,4 +34,5 @@ onnxruntime poselib roma #dust3r huggingface_hub -psutil \ No newline at end of file +psutil +easydict \ No newline at end of file diff --git a/third_party/mast3r b/third_party/mast3r new file mode 160000 index 0000000..6d6d485 --- /dev/null +++ b/third_party/mast3r @@ -0,0 +1 @@ +Subproject commit 6d6d4858f03f1736ca21928fa3eda0511fbb0a8d diff --git a/ui/app_class.py b/ui/app_class.py index 50917a7..f1c9805 100644 --- a/ui/app_class.py +++ b/ui/app_class.py @@ -3,7 +3,10 @@ import gradio as gr import numpy as np +from easydict import EasyDict as edict +from omegaconf import OmegaConf +from ui.sfm import SfmEngine from ui.utils import ( GRADIO_VERSION, gen_examples, @@ -115,7 +118,7 @@ def init_interface(self): label="Match thres.", value=0.1, ) - match_setting_max_features = gr.Slider( + match_setting_max_keypoints = gr.Slider( minimum=10, maximum=10000, step=10, @@ -199,7 +202,7 @@ def init_interface(self): input_image0, input_image1, match_setting_threshold, - match_setting_max_features, + match_setting_max_keypoints, detect_keypoints_threshold, matcher_list, ransac_method, @@ -314,7 +317,7 @@ def init_interface(self): input_image0, input_image1, match_setting_threshold, - match_setting_max_features, + match_setting_max_keypoints, detect_keypoints_threshold, matcher_list, input_image0, @@ -377,31 +380,15 @@ def init_interface(self): ], outputs=[output_wrapped, geometry_result], ) - with gr.Tab("Under construction"): - self.init_tab_sfm() - - def init_tab_sfm(self): - with gr.Row(): - with gr.Column(): - with gr.Row(): - gr.Textbox("Under construction", label="A", visible=True) - gr.Textbox("Under construction", label="B", visible=True) - gr.Textbox("Under construction", label="C", visible=True) - with gr.Row(): - with gr.Accordion("Open for More", open=False): - gr.Textbox( - "Under construction", label="A1", visible=True - ) - gr.Textbox( - "Under construction", label="B1", visible=True - ) - gr.Textbox( - "Under construction", label="C1", visible=True - ) - with gr.Column(): - gr.Textbox("Under construction", label="D", visible=True) - gr.Textbox("Under construction", label="E", visible=True) - gr.Textbox("Under construction", label="F", visible=True) + with gr.Tab("Structure from Motion(under-dev)"): + sfm_ui = AppSfmUI( # noqa: F841 + { + **self.cfg, + "matcher_zoo": self.matcher_zoo, + "outputs": "experiments/sfm", + } + ) + sfm_ui.call_empty() def run(self): self.app.queue().launch( @@ -559,3 +546,256 @@ def get_link(link, tag="Link"): # height=1000, ) return tab + + +class AppBaseUI: + def __init__(self, cfg: Dict[str, Any] = {}): + self.cfg = OmegaConf.create(cfg) + self.inputs = edict({}) + self.outputs = edict({}) + self.ui = edict({}) + + def _init_ui(self): + NotImplemented + + def call(self, **kwargs): + NotImplemented + + def info(self): + gr.Info("SFM is under construction.") + + +class AppSfmUI(AppBaseUI): + def __init__(self, cfg: Dict[str, Any] = None): + super().__init__(cfg) + assert "matcher_zoo" in self.cfg + self.matcher_zoo = self.cfg["matcher_zoo"] + self.sfm_engine = SfmEngine(cfg) + self._init_ui() + + def init_retrieval_dropdown(self): + algos = [] + for k, v in self.cfg["retrieval_zoo"].items(): + if v.get("enable", True): + algos.append(k) + return algos + + def _update_options(self, option): + if option == "sparse": + return gr.Textbox("sparse", visible=True) + elif option == "dense": + return gr.Textbox("dense", visible=True) + else: + return gr.Textbox("not set", visible=True) + + def _on_select_custom_params(self, value: bool = False): + return gr.Textbox( + label="Camera Params", + value="0,0,0,0", + interactive=value, + visible=value, + ) + + def _init_ui(self): + with gr.Row(): + # data settting and camera settings + with gr.Column(): + self.inputs.input_images = gr.File( + label="SfM", + interactive=True, + file_count="multiple", + min_width=300, + ) + # camera setting + with gr.Accordion("Camera Settings", open=True): + with gr.Column(): + with gr.Row(): + with gr.Column(): + self.inputs.camera_model = gr.Dropdown( + choices=[ + "PINHOLE", + "SIMPLE_RADIAL", + "OPENCV", + ], + value="PINHOLE", + label="Camera Model", + interactive=True, + ) + with gr.Column(): + gr.Checkbox( + label="Shared Params", + value=True, + interactive=True, + ) + camera_custom_params_cb = gr.Checkbox( + label="Custom Params", + value=False, + interactive=True, + ) + with gr.Row(): + self.inputs.camera_params = gr.Textbox( + label="Camera Params", + value="0,0,0,0", + interactive=False, + visible=False, + ) + camera_custom_params_cb.select( + fn=self._on_select_custom_params, + inputs=camera_custom_params_cb, + outputs=self.inputs.camera_params, + ) + + with gr.Accordion("Matching Settings", open=True): + # feature extraction and matching setting + with gr.Row(): + # matcher setting + self.inputs.matcher_key = gr.Dropdown( + choices=self.matcher_zoo.keys(), + value="disk+lightglue", + label="Matching Model", + interactive=True, + ) + with gr.Row(): + with gr.Accordion("Advanced Settings", open=False): + with gr.Column(): + with gr.Row(): + # matching setting + self.inputs.max_keypoints = gr.Slider( + label="Max Keypoints", + minimum=100, + maximum=10000, + value=1000, + interactive=True, + ) + self.inputs.keypoint_threshold = gr.Slider( + label="Keypoint Threshold", + minimum=0, + maximum=1, + value=0.01, + ) + with gr.Row(): + self.inputs.match_threshold = gr.Slider( + label="Match Threshold", + minimum=0.01, + maximum=12.0, + value=0.2, + ) + self.inputs.ransac_threshold = gr.Slider( + label="Ransac Threshold", + minimum=0.01, + maximum=12.0, + value=4.0, + step=0.01, + interactive=True, + ) + + with gr.Row(): + self.inputs.ransac_confidence = gr.Slider( + label="Ransac Confidence", + minimum=0.01, + maximum=1.0, + value=0.9999, + step=0.0001, + interactive=True, + ) + self.inputs.ransac_max_iter = gr.Slider( + label="Ransac Max Iter", + minimum=1, + maximum=100, + value=100, + step=1, + interactive=True, + ) + with gr.Accordion("Scene Graph Settings", open=True): + # mapping setting + self.inputs.scene_graph = gr.Dropdown( + choices=["all", "swin", "oneref"], + value="all", + label="Scene Graph", + interactive=True, + ) + + # global feature setting + self.inputs.global_feature = gr.Dropdown( + choices=self.init_retrieval_dropdown(), + value="netvlad", + label="Global features", + interactive=True, + ) + self.inputs.top_k = gr.Slider( + label="Number of Images per Image to Match", + minimum=1, + maximum=100, + value=10, + step=1, + ) + # button_match = gr.Button("Run Matching", variant="primary") + + # mapping setting + with gr.Column(): + with gr.Accordion("Mapping Settings", open=True): + with gr.Row(): + with gr.Accordion("Buddle Settings", open=True): + with gr.Row(): + self.inputs.mapper_refine_focal_length = ( + gr.Checkbox( + label="Refine Focal Length", + value=False, + interactive=True, + ) + ) + self.inputs.mapper_refine_principle_points = ( + gr.Checkbox( + label="Refine Principle Points", + value=False, + interactive=True, + ) + ) + self.inputs.mapper_refine_extra_params = ( + gr.Checkbox( + label="Refine Extra Params", + value=False, + interactive=True, + ) + ) + with gr.Accordion("Retriangluation Settings", open=True): + gr.Textbox( + label="Retriangluation Details", + ) + self.ui.button_sfm = gr.Button("Run SFM", variant="primary") + self.outputs.model_3d = gr.Model3D( + interactive=True, + ) + self.outputs.output_image = gr.Image( + label="SFM Visualize", + type="numpy", + image_mode="RGB", + interactive=False, + ) + + def call_empty(self): + self.ui.button_sfm.click(fn=self.info, inputs=[], outputs=[]) + + def call(self): + self.ui.button_sfm.click( + fn=self.sfm_engine.call, + inputs=[ + self.inputs.matcher_key, + self.inputs.input_images, # images + self.inputs.camera_model, + self.inputs.camera_params, + self.inputs.max_keypoints, + self.inputs.keypoint_threshold, + self.inputs.match_threshold, + self.inputs.ransac_threshold, + self.inputs.ransac_confidence, + self.inputs.ransac_max_iter, + self.inputs.scene_graph, + self.inputs.global_feature, + self.inputs.top_k, + self.inputs.mapper_refine_focal_length, + self.inputs.mapper_refine_principle_points, + self.inputs.mapper_refine_extra_params, + ], + outputs=[self.outputs.model_3d, self.outputs.output_image], + ) diff --git a/ui/config.yaml b/ui/config.yaml index dff927f..63fe9d1 100644 --- a/ui/config.yaml +++ b/ui/config.yaml @@ -27,6 +27,17 @@ matcher_zoo: paper: https://arxiv.org/abs/2405.12979 project: https://hwjiang1510.github.io/OmniGlue display: true + Mast3R: + enable: false + matcher: mast3r + dense: true + info: + name: Mast3R #dispaly name + source: "CVPR 2024" + github: https://github.com/naver/mast3r + paper: https://arxiv.org/abs/2406.09756 + project: https://dust3r.europe.naverlabs.com + display: true DUSt3R: # TODO: duster is under development enable: true @@ -395,4 +406,12 @@ matcher_zoo: github: https://github.com/feixue94/sfd2 paper: https://arxiv.org/abs/2304.14845 project: https://feixue94.github.io/ - display: true \ No newline at end of file + display: true + +retrieval_zoo: + netvlad: + enable: true + openibl: + enable: true + cosplace: + enable: true diff --git a/ui/sfm.py b/ui/sfm.py new file mode 100644 index 0000000..b13ac3d --- /dev/null +++ b/ui/sfm.py @@ -0,0 +1,164 @@ +import shutil +import tempfile +from pathlib import Path +from typing import Any, Dict, List + +import pycolmap + +from hloc import ( + extract_features, + logger, + match_features, + pairs_from_retrieval, + reconstruction, + visualization, +) + +from .viz import fig2im + + +class SfmEngine: + def __init__(self, cfg: Dict[str, Any] = None): + self.cfg = cfg + if "outputs" in cfg and Path(cfg["outputs"]): + outputs = Path(cfg["outputs"]) + outputs.mkdir(parents=True, exist_ok=True) + else: + outputs = tempfile.mkdtemp() + self.outputs = Path(outputs) + + def call( + self, + key: str, + images: Path, + camera_model: str, + camera_params: List[float], + max_keypoints: int, + keypoint_threshold: float, + match_threshold: float, + ransac_threshold: int, + ransac_confidence: float, + ransac_max_iter: int, + scene_graph: bool, + global_feature: str, + top_k: int = 10, + mapper_refine_focal_length: bool = False, + mapper_refine_principle_points: bool = False, + mapper_refine_extra_params: bool = False, + ): + """ + Call a list of functions to perform feature extraction, matching, and reconstruction. + + Args: + key (str): The key to retrieve the matcher and feature models. + images (Path): The directory containing the images. + outputs (Path): The directory to store the outputs. + camera_model (str): The camera model. + camera_params (List[float]): The camera parameters. + max_keypoints (int): The maximum number of features. + match_threshold (float): The match threshold. + ransac_threshold (int): The RANSAC threshold. + ransac_confidence (float): The RANSAC confidence. + ransac_max_iter (int): The maximum number of RANSAC iterations. + scene_graph (bool): Whether to compute the scene graph. + global_feature (str): Whether to compute the global feature. + top_k (int): The number of image-pair to use. + mapper_refine_focal_length (bool): Whether to refine the focal length. + mapper_refine_principle_points (bool): Whether to refine the principle points. + mapper_refine_extra_params (bool): Whether to refine the extra parameters. + + Returns: + Path: The directory containing the SfM results. + """ + if len(images) == 0: + logger.error(f"{images} does not exist.") + + temp_images = Path(tempfile.mkdtemp()) + # copy images + logger.info(f"Copying images to {temp_images}.") + for image in images: + shutil.copy(image, temp_images) + + matcher_zoo = self.cfg["matcher_zoo"] + model = matcher_zoo[key] + match_conf = model["matcher"] + match_conf["model"]["max_keypoints"] = max_keypoints + match_conf["model"]["match_threshold"] = match_threshold + + feature_conf = model["feature"] + feature_conf["model"]["max_keypoints"] = max_keypoints + feature_conf["model"]["keypoint_threshold"] = keypoint_threshold + + # retrieval + retrieval_name = self.cfg.get("retrieval_name", "netvlad") + retrieval_conf = extract_features.confs[retrieval_name] + + mapper_options = { + "ba_refine_extra_params": mapper_refine_extra_params, + "ba_refine_focal_length": mapper_refine_focal_length, + "ba_refine_principal_point": mapper_refine_principle_points, + "ba_local_max_num_iterations": 40, + "ba_local_max_refinements": 3, + "ba_global_max_num_iterations": 100, + # below 3 options are for individual/video data, for internet photos, they should be left + # default + "min_focal_length_ratio": 0.1, + "max_focal_length_ratio": 10, + "max_extra_param": 1e15, + } + + sfm_dir = self.outputs / "sfm_{}".format(key) + sfm_pairs = self.outputs / "pairs-sfm.txt" + sfm_dir.mkdir(exist_ok=True, parents=True) + + # extract features + retrieval_path = extract_features.main( + retrieval_conf, temp_images, self.outputs + ) + pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=top_k) + + feature_path = extract_features.main( + feature_conf, temp_images, self.outputs + ) + # match features + match_path = match_features.main( + match_conf, sfm_pairs, feature_conf["output"], self.outputs + ) + # reconstruction + already_sfm = False + if sfm_dir.exists(): + try: + model = pycolmap.Reconstruction(str(sfm_dir)) + already_sfm = True + except ValueError: + logger.info(f"sfm_dir not exists model: {sfm_dir}") + if not already_sfm: + model = reconstruction.main( + sfm_dir, + temp_images, + sfm_pairs, + feature_path, + match_path, + mapper_options=mapper_options, + ) + + vertices = [] + for point3D_id, point3D in model.points3D.items(): + vertices.append([point3D.xyz, point3D.color]) + + model_3d = sfm_dir / "points3D.obj" + with open(model_3d, "w") as f: + for p, c in vertices: + # Write vertex position + f.write("v {} {} {}\n".format(p[0], p[1], p[2])) + # Write vertex normal (color) + f.write( + "vn {} {} {}\n".format( + c[0] / 255.0, c[1] / 255.0, c[2] / 255.0 + ) + ) + viz_2d = visualization.visualize_sfm_2d( + model, temp_images, color_by="visibility", n=2, dpi=300 + ) + + return model_3d, fig2im(viz_2d) / 255.0