From b1baa6d60678ede66ebb532a6d022dd36f76e515 Mon Sep 17 00:00:00 2001
From: Realcat <realcat@126.com>
Date: Wed, 17 Jul 2024 00:13:25 +0800
Subject: [PATCH] Add: mast3r (#48)

* add: sfm

* add: mast3r
---
 .gitignore                                   |   5 +
 .gitmodules                                  |   3 +
 Dockerfile                                   |   2 +-
 README.md                                    |   1 +
 hloc/__init__.py                             |  24 +-
 hloc/colmap_from_nvm.py                      | 220 +++++++
 hloc/extract_features.py                     |   5 +
 hloc/extractors/eigenplaces.py               |  57 ++
 hloc/localize_inloc.py                       | 183 ++++++
 hloc/localize_sfm.py                         | 247 ++++++++
 hloc/match_dense.py                          | 584 +++++++++++++++++++
 hloc/matchers/mast3r.py                      | 111 ++++
 hloc/matchers/superglue.py                   |   4 +-
 hloc/pairs_from_covisibility.py              |  60 ++
 hloc/pairs_from_exhaustive.py                |  66 +++
 hloc/pairs_from_poses.py                     |  70 +++
 hloc/pairs_from_retrieval.py                 | 137 +++++
 hloc/pipelines/4Seasons/localize.py          |  27 +-
 hloc/pipelines/4Seasons/prepare_reference.py |   8 +-
 hloc/pipelines/4Seasons/utils.py             |  37 +-
 hloc/pipelines/7Scenes/create_gt_sfm.py      |  27 +-
 hloc/pipelines/7Scenes/pipeline.py           |  24 +-
 hloc/pipelines/7Scenes/utils.py              |   1 +
 hloc/pipelines/Aachen/README.md              |   2 +-
 hloc/pipelines/Aachen/pipeline.py            | 183 +++---
 hloc/pipelines/Aachen_v1_1/README.md         |   3 +-
 hloc/pipelines/Aachen_v1_1/pipeline.py       | 171 +++---
 hloc/pipelines/Aachen_v1_1/pipeline_loftr.py | 172 +++---
 hloc/pipelines/CMU/pipeline.py               |  45 +-
 hloc/pipelines/Cambridge/pipeline.py         |  33 +-
 hloc/pipelines/Cambridge/utils.py            |  13 +-
 hloc/pipelines/RobotCar/colmap_from_nvm.py   |  26 +-
 hloc/pipelines/RobotCar/pipeline.py          | 191 +++---
 hloc/reconstruction.py                       | 199 +++++++
 hloc/triangulation.py                        | 320 ++++++++++
 hloc/utils/database.py                       |  32 +-
 hloc/utils/geometry.py                       |  33 +-
 hloc/utils/parsers.py                        |   9 +-
 hloc/utils/read_write_model.py               |  73 +--
 hloc/utils/viz.py                            |  56 +-
 hloc/utils/viz_3d.py                         |  81 ++-
 hloc/visualization.py                        | 182 ++++++
 requirements.txt                             |   5 +-
 third_party/mast3r                           |   1 +
 ui/app_class.py                              | 296 +++++++++-
 ui/config.yaml                               |  21 +-
 ui/sfm.py                                    | 164 ++++++
 47 files changed, 3515 insertions(+), 699 deletions(-)
 create mode 100644 hloc/colmap_from_nvm.py
 create mode 100644 hloc/extractors/eigenplaces.py
 create mode 100644 hloc/localize_inloc.py
 create mode 100644 hloc/localize_sfm.py
 create mode 100644 hloc/matchers/mast3r.py
 create mode 100644 hloc/pairs_from_covisibility.py
 create mode 100644 hloc/pairs_from_exhaustive.py
 create mode 100644 hloc/pairs_from_poses.py
 create mode 100644 hloc/pairs_from_retrieval.py
 create mode 100644 hloc/reconstruction.py
 create mode 100644 hloc/triangulation.py
 create mode 100644 hloc/visualization.py
 create mode 160000 third_party/mast3r
 create mode 100644 ui/sfm.py

diff --git a/.gitignore b/.gitignore
index 908ae94..371e266 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,8 @@ third_party/QuadTreeAttention
 desktop.ini
 *.egg-info
 output.pkl
+experiments*
+gen_example.py
+datasets/lines/terrace0.JPG
+datasets/lines/terrace1.JPG
+datasets/South-Building*
diff --git a/.gitmodules b/.gitmodules
index 336af93..e4ee749 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -64,3 +64,6 @@
 [submodule "third_party/DarkFeat"]
 	path = third_party/DarkFeat
 	url = https://github.com/agipro/DarkFeat.git
+[submodule "third_party/mast3r"]
+	path = third_party/mast3r
+	url = https://github.com/naver/mast3r
diff --git a/Dockerfile b/Dockerfile
index 9501dec..863cdf8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,7 +20,7 @@ ENV PATH /opt/conda/envs/imw/bin:$PATH
 # Make RUN commands use the new environment
 SHELL ["conda", "run", "-n", "imw", "/bin/bash", "-c"]
 RUN pip install --upgrade pip
-RUN pip install -r env-docker.txt
+RUN pip install -r requirements.txt
 RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
 
 # Export port
diff --git a/README.md b/README.md
index bd02d4b..2a3ca15 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ Here is a demo of the tool:
 https://github.com/Vincentqyw/image-matching-webui/assets/18531182/263534692-c3484d1b-cc00-4fdc-9b31-e5b7af07ecd9
 
 The tool currently supports various popular image matching algorithms, namely:
+- [x] [MASt3R](https://github.com/naver/mast3r), CVPR 2024
 - [x] [DUSt3R](https://github.com/naver/dust3r), CVPR 2024
 - [x] [OmniGlue](https://github.com/Vincentqyw/omniglue-onnx), CVPR 2024
 - [x] [XFeat](https://github.com/verlab/accelerated_features), CVPR 2024
diff --git a/hloc/__init__.py b/hloc/__init__.py
index 7c2e3dd..cf64b08 100644
--- a/hloc/__init__.py
+++ b/hloc/__init__.py
@@ -3,7 +3,7 @@
 import torch
 from packaging import version
 
-__version__ = "1.3"
+__version__ = "1.5"
 
 formatter = logging.Formatter(
     fmt="[%(asctime)s %(name)s %(levelname)s] %(message)s",
@@ -23,14 +23,18 @@
 except ImportError:
     logger.warning("pycolmap is not installed, some features may not work.")
 else:
-    minimal_version = version.parse("0.3.0")
-    found_version = version.parse(getattr(pycolmap, "__version__"))
-    if found_version < minimal_version:
-        logger.warning(
-            "hloc now requires pycolmap>=%s but found pycolmap==%s, "
-            "please upgrade with `pip install --upgrade pycolmap`",
-            minimal_version,
-            found_version,
-        )
+    min_version = version.parse("0.6.0")
+    found_version = pycolmap.__version__
+    if found_version != "dev":
+        version = version.parse(found_version)
+        if version < min_version:
+            s = f"pycolmap>={min_version}"
+            logger.warning(
+                "hloc requires %s but found pycolmap==%s, "
+                'please upgrade with `pip install --upgrade "%s"`',
+                s,
+                found_version,
+                s,
+            )
 
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
diff --git a/hloc/colmap_from_nvm.py b/hloc/colmap_from_nvm.py
new file mode 100644
index 0000000..1f3ad89
--- /dev/null
+++ b/hloc/colmap_from_nvm.py
@@ -0,0 +1,220 @@
+import argparse
+import sqlite3
+from collections import defaultdict
+from pathlib import Path
+
+import numpy as np
+from tqdm import tqdm
+
+from . import logger
+from .utils.read_write_model import (
+    CAMERA_MODEL_NAMES,
+    Camera,
+    Image,
+    Point3D,
+    write_model,
+)
+
+
+def recover_database_images_and_ids(database_path):
+    images = {}
+    cameras = {}
+    db = sqlite3.connect(str(database_path))
+    ret = db.execute("SELECT name, image_id, camera_id FROM images;")
+    for name, image_id, camera_id in ret:
+        images[name] = image_id
+        cameras[name] = camera_id
+    db.close()
+    logger.info(
+        f"Found {len(images)} images and {len(cameras)} cameras in database."
+    )
+    return images, cameras
+
+
+def quaternion_to_rotation_matrix(qvec):
+    qvec = qvec / np.linalg.norm(qvec)
+    w, x, y, z = qvec
+    R = np.array(
+        [
+            [
+                1 - 2 * y * y - 2 * z * z,
+                2 * x * y - 2 * z * w,
+                2 * x * z + 2 * y * w,
+            ],
+            [
+                2 * x * y + 2 * z * w,
+                1 - 2 * x * x - 2 * z * z,
+                2 * y * z - 2 * x * w,
+            ],
+            [
+                2 * x * z - 2 * y * w,
+                2 * y * z + 2 * x * w,
+                1 - 2 * x * x - 2 * y * y,
+            ],
+        ]
+    )
+    return R
+
+
+def camera_center_to_translation(c, qvec):
+    R = quaternion_to_rotation_matrix(qvec)
+    return (-1) * np.matmul(R, c)
+
+
+def read_nvm_model(
+    nvm_path, intrinsics_path, image_ids, camera_ids, skip_points=False
+):
+    with open(intrinsics_path, "r") as f:
+        raw_intrinsics = f.readlines()
+
+    logger.info(f"Reading {len(raw_intrinsics)} cameras...")
+    cameras = {}
+    for intrinsics in raw_intrinsics:
+        intrinsics = intrinsics.strip("\n").split(" ")
+        name, camera_model, width, height = intrinsics[:4]
+        params = [float(p) for p in intrinsics[4:]]
+        camera_model = CAMERA_MODEL_NAMES[camera_model]
+        assert len(params) == camera_model.num_params
+        camera_id = camera_ids[name]
+        camera = Camera(
+            id=camera_id,
+            model=camera_model.model_name,
+            width=int(width),
+            height=int(height),
+            params=params,
+        )
+        cameras[camera_id] = camera
+
+    nvm_f = open(nvm_path, "r")
+    line = nvm_f.readline()
+    while line == "\n" or line.startswith("NVM_V3"):
+        line = nvm_f.readline()
+    num_images = int(line)
+    assert num_images == len(cameras)
+
+    logger.info(f"Reading {num_images} images...")
+    image_idx_to_db_image_id = []
+    image_data = []
+    i = 0
+    while i < num_images:
+        line = nvm_f.readline()
+        if line == "\n":
+            continue
+        data = line.strip("\n").split(" ")
+        image_data.append(data)
+        image_idx_to_db_image_id.append(image_ids[data[0]])
+        i += 1
+
+    line = nvm_f.readline()
+    while line == "\n":
+        line = nvm_f.readline()
+    num_points = int(line)
+
+    if skip_points:
+        logger.info(f"Skipping {num_points} points.")
+        num_points = 0
+    else:
+        logger.info(f"Reading {num_points} points...")
+    points3D = {}
+    image_idx_to_keypoints = defaultdict(list)
+    i = 0
+    pbar = tqdm(total=num_points, unit="pts")
+    while i < num_points:
+        line = nvm_f.readline()
+        if line == "\n":
+            continue
+
+        data = line.strip("\n").split(" ")
+        x, y, z, r, g, b, num_observations = data[:7]
+        obs_image_ids, point2D_idxs = [], []
+        for j in range(int(num_observations)):
+            s = 7 + 4 * j
+            img_index, kp_index, kx, ky = data[s : s + 4]
+            image_idx_to_keypoints[int(img_index)].append(
+                (int(kp_index), float(kx), float(ky), i)
+            )
+            db_image_id = image_idx_to_db_image_id[int(img_index)]
+            obs_image_ids.append(db_image_id)
+            point2D_idxs.append(kp_index)
+
+        point = Point3D(
+            id=i,
+            xyz=np.array([x, y, z], float),
+            rgb=np.array([r, g, b], int),
+            error=1.0,  # fake
+            image_ids=np.array(obs_image_ids, int),
+            point2D_idxs=np.array(point2D_idxs, int),
+        )
+        points3D[i] = point
+
+        i += 1
+        pbar.update(1)
+    pbar.close()
+
+    logger.info("Parsing image data...")
+    images = {}
+    for i, data in enumerate(image_data):
+        # Skip the focal length. Skip the distortion and terminal 0.
+        name, _, qw, qx, qy, qz, cx, cy, cz, _, _ = data
+        qvec = np.array([qw, qx, qy, qz], float)
+        c = np.array([cx, cy, cz], float)
+        t = camera_center_to_translation(c, qvec)
+
+        if i in image_idx_to_keypoints:
+            # NVM only stores triangulated 2D keypoints: add dummy ones
+            keypoints = image_idx_to_keypoints[i]
+            point2D_idxs = np.array([d[0] for d in keypoints])
+            tri_xys = np.array([[x, y] for _, x, y, _ in keypoints])
+            tri_ids = np.array([i for _, _, _, i in keypoints])
+
+            num_2Dpoints = max(point2D_idxs) + 1
+            xys = np.zeros((num_2Dpoints, 2), float)
+            point3D_ids = np.full(num_2Dpoints, -1, int)
+            xys[point2D_idxs] = tri_xys
+            point3D_ids[point2D_idxs] = tri_ids
+        else:
+            xys = np.zeros((0, 2), float)
+            point3D_ids = np.full(0, -1, int)
+
+        image_id = image_ids[name]
+        image = Image(
+            id=image_id,
+            qvec=qvec,
+            tvec=t,
+            camera_id=camera_ids[name],
+            name=name,
+            xys=xys,
+            point3D_ids=point3D_ids,
+        )
+        images[image_id] = image
+
+    return cameras, images, points3D
+
+
+def main(nvm, intrinsics, database, output, skip_points=False):
+    assert nvm.exists(), nvm
+    assert intrinsics.exists(), intrinsics
+    assert database.exists(), database
+
+    image_ids, camera_ids = recover_database_images_and_ids(database)
+
+    logger.info("Reading the NVM model...")
+    model = read_nvm_model(
+        nvm, intrinsics, image_ids, camera_ids, skip_points=skip_points
+    )
+
+    logger.info("Writing the COLMAP model...")
+    output.mkdir(exist_ok=True, parents=True)
+    write_model(*model, path=str(output), ext=".bin")
+    logger.info("Done.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--nvm", required=True, type=Path)
+    parser.add_argument("--intrinsics", required=True, type=Path)
+    parser.add_argument("--database", required=True, type=Path)
+    parser.add_argument("--output", required=True, type=Path)
+    parser.add_argument("--skip_points", action="store_true")
+    args = parser.parse_args()
+    main(**args.__dict__)
diff --git a/hloc/extract_features.py b/hloc/extract_features.py
index 1e66f03..62eeb45 100644
--- a/hloc/extract_features.py
+++ b/hloc/extract_features.py
@@ -329,6 +329,11 @@
         "model": {"name": "cosplace"},
         "preprocessing": {"resize_max": 1024},
     },
+    "eigenplaces": {
+        "output": "global-feats-eigenplaces",
+        "model": {"name": "eigenplaces"},
+        "preprocessing": {"resize_max": 1024},
+    },
 }
 
 
diff --git a/hloc/extractors/eigenplaces.py b/hloc/extractors/eigenplaces.py
new file mode 100644
index 0000000..fd9953b
--- /dev/null
+++ b/hloc/extractors/eigenplaces.py
@@ -0,0 +1,57 @@
+"""
+Code for loading models trained with EigenPlaces (or CosPlace) as a global
+features extractor for geolocalization through image retrieval.
+Multiple models are available with different backbones. Below is a summary of
+models available (backbone : list of available output descriptors
+dimensionality). For example you can use a model based on a ResNet50 with
+descriptors dimensionality 1024.
+
+EigenPlaces trained models:
+    ResNet18:  [     256, 512]
+    ResNet50:  [128, 256, 512, 2048]
+    ResNet101: [128, 256, 512, 2048]
+    VGG16:     [     512]
+
+CosPlace trained models:
+    ResNet18:  [32, 64, 128, 256, 512]
+    ResNet50:  [32, 64, 128, 256, 512, 1024, 2048]
+    ResNet101: [32, 64, 128, 256, 512, 1024, 2048]
+    ResNet152: [32, 64, 128, 256, 512, 1024, 2048]
+    VGG16:     [    64, 128, 256, 512]
+
+EigenPlaces paper (ICCV 2023): https://arxiv.org/abs/2308.10832
+CosPlace paper (CVPR 2022): https://arxiv.org/abs/2204.02287
+"""
+
+import torch
+import torchvision.transforms as tvf
+
+from ..utils.base_model import BaseModel
+
+
+class EigenPlaces(BaseModel):
+    default_conf = {
+        "variant": "EigenPlaces",
+        "backbone": "ResNet101",
+        "fc_output_dim": 2048,
+    }
+    required_inputs = ["image"]
+
+    def _init(self, conf):
+        self.net = torch.hub.load(
+            "gmberton/" + conf["variant"],
+            "get_trained_model",
+            backbone=conf["backbone"],
+            fc_output_dim=conf["fc_output_dim"],
+        ).eval()
+
+        mean = [0.485, 0.456, 0.406]
+        std = [0.229, 0.224, 0.225]
+        self.norm_rgb = tvf.Normalize(mean=mean, std=std)
+
+    def _forward(self, data):
+        image = self.norm_rgb(data["image"])
+        desc = self.net(image)
+        return {
+            "global_descriptor": desc,
+        }
diff --git a/hloc/localize_inloc.py b/hloc/localize_inloc.py
new file mode 100644
index 0000000..1e003b1
--- /dev/null
+++ b/hloc/localize_inloc.py
@@ -0,0 +1,183 @@
+import argparse
+import pickle
+from pathlib import Path
+
+import cv2
+import h5py
+import numpy as np
+import pycolmap
+import torch
+from scipy.io import loadmat
+from tqdm import tqdm
+
+from . import logger
+from .utils.parsers import names_to_pair, parse_retrieval
+
+
+def interpolate_scan(scan, kp):
+    h, w, c = scan.shape
+    kp = kp / np.array([[w - 1, h - 1]]) * 2 - 1
+    assert np.all(kp > -1) and np.all(kp < 1)
+    scan = torch.from_numpy(scan).permute(2, 0, 1)[None]
+    kp = torch.from_numpy(kp)[None, None]
+    grid_sample = torch.nn.functional.grid_sample
+
+    # To maximize the number of points that have depth:
+    # do bilinear interpolation first and then nearest for the remaining points
+    interp_lin = grid_sample(scan, kp, align_corners=True, mode="bilinear")[
+        0, :, 0
+    ]
+    interp_nn = torch.nn.functional.grid_sample(
+        scan, kp, align_corners=True, mode="nearest"
+    )[0, :, 0]
+    interp = torch.where(torch.isnan(interp_lin), interp_nn, interp_lin)
+    valid = ~torch.any(torch.isnan(interp), 0)
+
+    kp3d = interp.T.numpy()
+    valid = valid.numpy()
+    return kp3d, valid
+
+
+def get_scan_pose(dataset_dir, rpath):
+    split_image_rpath = rpath.split("/")
+    floor_name = split_image_rpath[-3]
+    scan_id = split_image_rpath[-2]
+    image_name = split_image_rpath[-1]
+    building_name = image_name[:3]
+
+    path = Path(
+        dataset_dir,
+        "database/alignments",
+        floor_name,
+        f"transformations/{building_name}_trans_{scan_id}.txt",
+    )
+    with open(path) as f:
+        raw_lines = f.readlines()
+
+    P_after_GICP = np.array(
+        [
+            np.fromstring(raw_lines[7], sep=" "),
+            np.fromstring(raw_lines[8], sep=" "),
+            np.fromstring(raw_lines[9], sep=" "),
+            np.fromstring(raw_lines[10], sep=" "),
+        ]
+    )
+
+    return P_after_GICP
+
+
+def pose_from_cluster(
+    dataset_dir, q, retrieved, feature_file, match_file, skip=None
+):
+    height, width = cv2.imread(str(dataset_dir / q)).shape[:2]
+    cx = 0.5 * width
+    cy = 0.5 * height
+    focal_length = 4032.0 * 28.0 / 36.0
+
+    all_mkpq = []
+    all_mkpr = []
+    all_mkp3d = []
+    all_indices = []
+    kpq = feature_file[q]["keypoints"].__array__()
+    num_matches = 0
+
+    for i, r in enumerate(retrieved):
+        kpr = feature_file[r]["keypoints"].__array__()
+        pair = names_to_pair(q, r)
+        m = match_file[pair]["matches0"].__array__()
+        v = m > -1
+
+        if skip and (np.count_nonzero(v) < skip):
+            continue
+
+        mkpq, mkpr = kpq[v], kpr[m[v]]
+        num_matches += len(mkpq)
+
+        scan_r = loadmat(Path(dataset_dir, r + ".mat"))["XYZcut"]
+        mkp3d, valid = interpolate_scan(scan_r, mkpr)
+        Tr = get_scan_pose(dataset_dir, r)
+        mkp3d = (Tr[:3, :3] @ mkp3d.T + Tr[:3, -1:]).T
+
+        all_mkpq.append(mkpq[valid])
+        all_mkpr.append(mkpr[valid])
+        all_mkp3d.append(mkp3d[valid])
+        all_indices.append(np.full(np.count_nonzero(valid), i))
+
+    all_mkpq = np.concatenate(all_mkpq, 0)
+    all_mkpr = np.concatenate(all_mkpr, 0)
+    all_mkp3d = np.concatenate(all_mkp3d, 0)
+    all_indices = np.concatenate(all_indices, 0)
+
+    cfg = {
+        "model": "SIMPLE_PINHOLE",
+        "width": width,
+        "height": height,
+        "params": [focal_length, cx, cy],
+    }
+    ret = pycolmap.absolute_pose_estimation(all_mkpq, all_mkp3d, cfg, 48.00)
+    ret["cfg"] = cfg
+    return ret, all_mkpq, all_mkpr, all_mkp3d, all_indices, num_matches
+
+
+def main(dataset_dir, retrieval, features, matches, results, skip_matches=None):
+    assert retrieval.exists(), retrieval
+    assert features.exists(), features
+    assert matches.exists(), matches
+
+    retrieval_dict = parse_retrieval(retrieval)
+    queries = list(retrieval_dict.keys())
+
+    feature_file = h5py.File(features, "r", libver="latest")
+    match_file = h5py.File(matches, "r", libver="latest")
+
+    poses = {}
+    logs = {
+        "features": features,
+        "matches": matches,
+        "retrieval": retrieval,
+        "loc": {},
+    }
+    logger.info("Starting localization...")
+    for q in tqdm(queries):
+        db = retrieval_dict[q]
+        ret, mkpq, mkpr, mkp3d, indices, num_matches = pose_from_cluster(
+            dataset_dir, q, db, feature_file, match_file, skip_matches
+        )
+
+        poses[q] = (ret["qvec"], ret["tvec"])
+        logs["loc"][q] = {
+            "db": db,
+            "PnP_ret": ret,
+            "keypoints_query": mkpq,
+            "keypoints_db": mkpr,
+            "3d_points": mkp3d,
+            "indices_db": indices,
+            "num_matches": num_matches,
+        }
+
+    logger.info(f"Writing poses to {results}...")
+    with open(results, "w") as f:
+        for q in queries:
+            qvec, tvec = poses[q]
+            qvec = " ".join(map(str, qvec))
+            tvec = " ".join(map(str, tvec))
+            name = q.split("/")[-1]
+            f.write(f"{name} {qvec} {tvec}\n")
+
+    logs_path = f"{results}_logs.pkl"
+    logger.info(f"Writing logs to {logs_path}...")
+    with open(logs_path, "wb") as f:
+        pickle.dump(logs, f)
+    logger.info("Done!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset_dir", type=Path, required=True)
+    parser.add_argument("--retrieval", type=Path, required=True)
+    parser.add_argument("--features", type=Path, required=True)
+    parser.add_argument("--matches", type=Path, required=True)
+    parser.add_argument("--results", type=Path, required=True)
+    parser.add_argument("--skip_matches", type=int)
+    args = parser.parse_args()
+    main(**args.__dict__)
diff --git a/hloc/localize_sfm.py b/hloc/localize_sfm.py
new file mode 100644
index 0000000..a1cb672
--- /dev/null
+++ b/hloc/localize_sfm.py
@@ -0,0 +1,247 @@
+import argparse
+import pickle
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Union
+
+import numpy as np
+import pycolmap
+from tqdm import tqdm
+
+from . import logger
+from .utils.io import get_keypoints, get_matches
+from .utils.parsers import parse_image_lists, parse_retrieval
+
+
+def do_covisibility_clustering(
+    frame_ids: List[int], reconstruction: pycolmap.Reconstruction
+):
+    clusters = []
+    visited = set()
+    for frame_id in frame_ids:
+        # Check if already labeled
+        if frame_id in visited:
+            continue
+
+        # New component
+        clusters.append([])
+        queue = {frame_id}
+        while len(queue):
+            exploration_frame = queue.pop()
+
+            # Already part of the component
+            if exploration_frame in visited:
+                continue
+            visited.add(exploration_frame)
+            clusters[-1].append(exploration_frame)
+
+            observed = reconstruction.images[exploration_frame].points2D
+            connected_frames = {
+                obs.image_id
+                for p2D in observed
+                if p2D.has_point3D()
+                for obs in reconstruction.points3D[
+                    p2D.point3D_id
+                ].track.elements
+            }
+            connected_frames &= set(frame_ids)
+            connected_frames -= visited
+            queue |= connected_frames
+
+    clusters = sorted(clusters, key=len, reverse=True)
+    return clusters
+
+
+class QueryLocalizer:
+    def __init__(self, reconstruction, config=None):
+        self.reconstruction = reconstruction
+        self.config = config or {}
+
+    def localize(self, points2D_all, points2D_idxs, points3D_id, query_camera):
+        points2D = points2D_all[points2D_idxs]
+        points3D = [self.reconstruction.points3D[j].xyz for j in points3D_id]
+        ret = pycolmap.absolute_pose_estimation(
+            points2D,
+            points3D,
+            query_camera,
+            estimation_options=self.config.get("estimation", {}),
+            refinement_options=self.config.get("refinement", {}),
+        )
+        return ret
+
+
+def pose_from_cluster(
+    localizer: QueryLocalizer,
+    qname: str,
+    query_camera: pycolmap.Camera,
+    db_ids: List[int],
+    features_path: Path,
+    matches_path: Path,
+    **kwargs,
+):
+    kpq = get_keypoints(features_path, qname)
+    kpq += 0.5  # COLMAP coordinates
+
+    kp_idx_to_3D = defaultdict(list)
+    kp_idx_to_3D_to_db = defaultdict(lambda: defaultdict(list))
+    num_matches = 0
+    for i, db_id in enumerate(db_ids):
+        image = localizer.reconstruction.images[db_id]
+        if image.num_points3D == 0:
+            logger.debug(f"No 3D points found for {image.name}.")
+            continue
+        points3D_ids = np.array(
+            [p.point3D_id if p.has_point3D() else -1 for p in image.points2D]
+        )
+
+        matches, _ = get_matches(matches_path, qname, image.name)
+        matches = matches[points3D_ids[matches[:, 1]] != -1]
+        num_matches += len(matches)
+        for idx, m in matches:
+            id_3D = points3D_ids[m]
+            kp_idx_to_3D_to_db[idx][id_3D].append(i)
+            # avoid duplicate observations
+            if id_3D not in kp_idx_to_3D[idx]:
+                kp_idx_to_3D[idx].append(id_3D)
+
+    idxs = list(kp_idx_to_3D.keys())
+    mkp_idxs = [i for i in idxs for _ in kp_idx_to_3D[i]]
+    mp3d_ids = [j for i in idxs for j in kp_idx_to_3D[i]]
+    ret = localizer.localize(kpq, mkp_idxs, mp3d_ids, query_camera, **kwargs)
+    if ret is not None:
+        ret["camera"] = query_camera
+
+    # mostly for logging and post-processing
+    mkp_to_3D_to_db = [
+        (j, kp_idx_to_3D_to_db[i][j]) for i in idxs for j in kp_idx_to_3D[i]
+    ]
+    log = {
+        "db": db_ids,
+        "PnP_ret": ret,
+        "keypoints_query": kpq[mkp_idxs],
+        "points3D_ids": mp3d_ids,
+        "points3D_xyz": None,  # we don't log xyz anymore because of file size
+        "num_matches": num_matches,
+        "keypoint_index_to_db": (mkp_idxs, mkp_to_3D_to_db),
+    }
+    return ret, log
+
+
+def main(
+    reference_sfm: Union[Path, pycolmap.Reconstruction],
+    queries: Path,
+    retrieval: Path,
+    features: Path,
+    matches: Path,
+    results: Path,
+    ransac_thresh: int = 12,
+    covisibility_clustering: bool = False,
+    prepend_camera_name: bool = False,
+    config: Dict = None,
+):
+    assert retrieval.exists(), retrieval
+    assert features.exists(), features
+    assert matches.exists(), matches
+
+    queries = parse_image_lists(queries, with_intrinsics=True)
+    retrieval_dict = parse_retrieval(retrieval)
+
+    logger.info("Reading the 3D model...")
+    if not isinstance(reference_sfm, pycolmap.Reconstruction):
+        reference_sfm = pycolmap.Reconstruction(reference_sfm)
+    db_name_to_id = {img.name: i for i, img in reference_sfm.images.items()}
+
+    config = {
+        "estimation": {"ransac": {"max_error": ransac_thresh}},
+        **(config or {}),
+    }
+    localizer = QueryLocalizer(reference_sfm, config)
+
+    cam_from_world = {}
+    logs = {
+        "features": features,
+        "matches": matches,
+        "retrieval": retrieval,
+        "loc": {},
+    }
+    logger.info("Starting localization...")
+    for qname, qcam in tqdm(queries):
+        if qname not in retrieval_dict:
+            logger.warning(
+                f"No images retrieved for query image {qname}. Skipping..."
+            )
+            continue
+        db_names = retrieval_dict[qname]
+        db_ids = []
+        for n in db_names:
+            if n not in db_name_to_id:
+                logger.warning(f"Image {n} was retrieved but not in database")
+                continue
+            db_ids.append(db_name_to_id[n])
+
+        if covisibility_clustering:
+            clusters = do_covisibility_clustering(db_ids, reference_sfm)
+            best_inliers = 0
+            best_cluster = None
+            logs_clusters = []
+            for i, cluster_ids in enumerate(clusters):
+                ret, log = pose_from_cluster(
+                    localizer, qname, qcam, cluster_ids, features, matches
+                )
+                if ret is not None and ret["num_inliers"] > best_inliers:
+                    best_cluster = i
+                    best_inliers = ret["num_inliers"]
+                logs_clusters.append(log)
+            if best_cluster is not None:
+                ret = logs_clusters[best_cluster]["PnP_ret"]
+                cam_from_world[qname] = ret["cam_from_world"]
+            logs["loc"][qname] = {
+                "db": db_ids,
+                "best_cluster": best_cluster,
+                "log_clusters": logs_clusters,
+                "covisibility_clustering": covisibility_clustering,
+            }
+        else:
+            ret, log = pose_from_cluster(
+                localizer, qname, qcam, db_ids, features, matches
+            )
+            if ret is not None:
+                cam_from_world[qname] = ret["cam_from_world"]
+            else:
+                closest = reference_sfm.images[db_ids[0]]
+                cam_from_world[qname] = closest.cam_from_world
+            log["covisibility_clustering"] = covisibility_clustering
+            logs["loc"][qname] = log
+
+    logger.info(f"Localized {len(cam_from_world)} / {len(queries)} images.")
+    logger.info(f"Writing poses to {results}...")
+    with open(results, "w") as f:
+        for query, t in cam_from_world.items():
+            qvec = " ".join(map(str, t.rotation.quat[[3, 0, 1, 2]]))
+            tvec = " ".join(map(str, t.translation))
+            name = query.split("/")[-1]
+            if prepend_camera_name:
+                name = query.split("/")[-2] + "/" + name
+            f.write(f"{name} {qvec} {tvec}\n")
+
+    logs_path = f"{results}_logs.pkl"
+    logger.info(f"Writing logs to {logs_path}...")
+    # TODO: Resolve pickling issue with pycolmap objects.
+    with open(logs_path, "wb") as f:
+        pickle.dump(logs, f)
+    logger.info("Done!")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--reference_sfm", type=Path, required=True)
+    parser.add_argument("--queries", type=Path, required=True)
+    parser.add_argument("--features", type=Path, required=True)
+    parser.add_argument("--matches", type=Path, required=True)
+    parser.add_argument("--retrieval", type=Path, required=True)
+    parser.add_argument("--results", type=Path, required=True)
+    parser.add_argument("--ransac_thresh", type=float, default=12.0)
+    parser.add_argument("--covisibility_clustering", action="store_true")
+    parser.add_argument("--prepend_camera_name", action="store_true")
+    args = parser.parse_args()
+    main(**args.__dict__)
diff --git a/hloc/match_dense.py b/hloc/match_dense.py
index ac95d93..e1253d7 100644
--- a/hloc/match_dense.py
+++ b/hloc/match_dense.py
@@ -1,11 +1,25 @@
+import argparse
+import pprint
+from collections import Counter, defaultdict
+from itertools import chain
+from pathlib import Path
 from types import SimpleNamespace
+from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
 
 import cv2
+import h5py
 import numpy as np
 import torch
 import torchvision.transforms.functional as F
+from scipy.spatial import KDTree
+from tqdm import tqdm
 
+from . import logger, matchers
 from .extract_features import read_image, resize_image
+from .match_features import find_unique_new_pairs
+from .utils.base_model import dynamic_load
+from .utils.io import list_h5_names
+from .utils.parsers import names_to_pair, parse_retrieval
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -144,6 +158,20 @@
             "dfactor": 16,
         },
     },
+    "mast3r": {
+        "output": "matches-mast3r",
+        "model": {
+            "name": "mast3r",
+            "weights": "vit_large",
+            "max_keypoints": 2000,
+            "match_threshold": 0.2,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 512,
+            "dfactor": 16,
+        },
+    },
     "xfeat_dense": {
         "output": "matches-xfeat_dense",
         "model": {
@@ -262,12 +290,483 @@
 }
 
 
+def to_cpts(kpts, ps):
+    if ps > 0.0:
+        kpts = np.round(np.round((kpts + 0.5) / ps) * ps - 0.5, 2)
+    return [tuple(cpt) for cpt in kpts]
+
+
+def assign_keypoints(
+    kpts: np.ndarray,
+    other_cpts: Union[List[Tuple], np.ndarray],
+    max_error: float,
+    update: bool = False,
+    ref_bins: Optional[List[Counter]] = None,
+    scores: Optional[np.ndarray] = None,
+    cell_size: Optional[int] = None,
+):
+    if not update:
+        # Without update this is just a NN search
+        if len(other_cpts) == 0 or len(kpts) == 0:
+            return np.full(len(kpts), -1)
+        dist, kpt_ids = KDTree(np.array(other_cpts)).query(kpts)
+        valid = dist <= max_error
+        kpt_ids[~valid] = -1
+        return kpt_ids
+    else:
+        ps = cell_size if cell_size is not None else max_error
+        ps = max(ps, max_error)
+        # With update we quantize and bin (optionally)
+        assert isinstance(other_cpts, list)
+        kpt_ids = []
+        cpts = to_cpts(kpts, ps)
+        bpts = to_cpts(kpts, int(max_error))
+        cp_to_id = {val: i for i, val in enumerate(other_cpts)}
+        for i, (cpt, bpt) in enumerate(zip(cpts, bpts)):
+            try:
+                kid = cp_to_id[cpt]
+            except KeyError:
+                kid = len(cp_to_id)
+                cp_to_id[cpt] = kid
+                other_cpts.append(cpt)
+                if ref_bins is not None:
+                    ref_bins.append(Counter())
+            if ref_bins is not None:
+                score = scores[i] if scores is not None else 1
+                ref_bins[cp_to_id[cpt]][bpt] += score
+            kpt_ids.append(kid)
+        return np.array(kpt_ids)
+
+
+def get_grouped_ids(array):
+    # Group array indices based on its values
+    # all duplicates are grouped as a set
+    idx_sort = np.argsort(array)
+    sorted_array = array[idx_sort]
+    _, ids, _ = np.unique(sorted_array, return_counts=True, return_index=True)
+    res = np.split(idx_sort, ids[1:])
+    return res
+
+
+def get_unique_matches(match_ids, scores):
+    if len(match_ids.shape) == 1:
+        return [0]
+
+    isets1 = get_grouped_ids(match_ids[:, 0])
+    isets2 = get_grouped_ids(match_ids[:, 1])
+    uid1s = [ids[scores[ids].argmax()] for ids in isets1 if len(ids) > 0]
+    uid2s = [ids[scores[ids].argmax()] for ids in isets2 if len(ids) > 0]
+    uids = list(set(uid1s).intersection(uid2s))
+    return match_ids[uids], scores[uids]
+
+
+def matches_to_matches0(matches, scores):
+    if len(matches) == 0:
+        return np.zeros(0, dtype=np.int32), np.zeros(0, dtype=np.float16)
+    n_kps0 = np.max(matches[:, 0]) + 1
+    matches0 = -np.ones((n_kps0,))
+    scores0 = np.zeros((n_kps0,))
+    matches0[matches[:, 0]] = matches[:, 1]
+    scores0[matches[:, 0]] = scores
+    return matches0.astype(np.int32), scores0.astype(np.float16)
+
+
+def kpids_to_matches0(kpt_ids0, kpt_ids1, scores):
+    valid = (kpt_ids0 != -1) & (kpt_ids1 != -1)
+    matches = np.dstack([kpt_ids0[valid], kpt_ids1[valid]])
+    matches = matches.reshape(-1, 2)
+    scores = scores[valid]
+
+    # Remove n-to-1 matches
+    matches, scores = get_unique_matches(matches, scores)
+    return matches_to_matches0(matches, scores)
+
+
 def scale_keypoints(kpts, scale):
     if np.any(scale != 1.0):
         kpts *= kpts.new_tensor(scale)
     return kpts
 
 
+class ImagePairDataset(torch.utils.data.Dataset):
+    default_conf = {
+        "grayscale": True,
+        "resize_max": 1024,
+        "dfactor": 8,
+        "cache_images": False,
+    }
+
+    def __init__(self, image_dir, conf, pairs):
+        self.image_dir = image_dir
+        self.conf = conf = SimpleNamespace(**{**self.default_conf, **conf})
+        self.pairs = pairs
+        if self.conf.cache_images:
+            image_names = set(sum(pairs, ()))  # unique image names in pairs
+            logger.info(
+                f"Loading and caching {len(image_names)} unique images."
+            )
+            self.images = {}
+            self.scales = {}
+            for name in tqdm(image_names):
+                image = read_image(self.image_dir / name, self.conf.grayscale)
+                self.images[name], self.scales[name] = self.preprocess(image)
+
+    def preprocess(self, image: np.ndarray):
+        image = image.astype(np.float32, copy=False)
+        size = image.shape[:2][::-1]
+        scale = np.array([1.0, 1.0])
+
+        if self.conf.resize_max:
+            scale = self.conf.resize_max / max(size)
+            if scale < 1.0:
+                size_new = tuple(int(round(x * scale)) for x in size)
+                image = resize_image(image, size_new, "cv2_area")
+                scale = np.array(size) / np.array(size_new)
+
+        if self.conf.grayscale:
+            assert image.ndim == 2, image.shape
+            image = image[None]
+        else:
+            image = image.transpose((2, 0, 1))  # HxWxC to CxHxW
+        image = torch.from_numpy(image / 255.0).float()
+
+        # assure that the size is divisible by dfactor
+        size_new = tuple(
+            map(
+                lambda x: int(x // self.conf.dfactor * self.conf.dfactor),
+                image.shape[-2:],
+            )
+        )
+        image = F.resize(image, size=size_new)
+        scale = np.array(size) / np.array(size_new)[::-1]
+        return image, scale
+
+    def __len__(self):
+        return len(self.pairs)
+
+    def __getitem__(self, idx):
+        name0, name1 = self.pairs[idx]
+        if self.conf.cache_images:
+            image0, scale0 = self.images[name0], self.scales[name0]
+            image1, scale1 = self.images[name1], self.scales[name1]
+        else:
+            image0 = read_image(self.image_dir / name0, self.conf.grayscale)
+            image1 = read_image(self.image_dir / name1, self.conf.grayscale)
+            image0, scale0 = self.preprocess(image0)
+            image1, scale1 = self.preprocess(image1)
+        return image0, image1, scale0, scale1, name0, name1
+
+
+@torch.no_grad()
+def match_dense(
+    conf: Dict,
+    pairs: List[Tuple[str, str]],
+    image_dir: Path,
+    match_path: Path,  # out
+    existing_refs: Optional[List] = [],
+):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    Model = dynamic_load(matchers, conf["model"]["name"])
+    model = Model(conf["model"]).eval().to(device)
+
+    dataset = ImagePairDataset(image_dir, conf["preprocessing"], pairs)
+    loader = torch.utils.data.DataLoader(
+        dataset, num_workers=16, batch_size=1, shuffle=False
+    )
+
+    logger.info("Performing dense matching...")
+    with h5py.File(str(match_path), "a") as fd:
+        for data in tqdm(loader, smoothing=0.1):
+            # load image-pair data
+            image0, image1, scale0, scale1, (name0,), (name1,) = data
+            scale0, scale1 = scale0[0].numpy(), scale1[0].numpy()
+            image0, image1 = image0.to(device), image1.to(device)
+
+            # match semi-dense
+            # for consistency with pairs_from_*: refine kpts of image0
+            if name0 in existing_refs:
+                # special case: flip to enable refinement in query image
+                pred = model({"image0": image1, "image1": image0})
+                pred = {
+                    **pred,
+                    "keypoints0": pred["keypoints1"],
+                    "keypoints1": pred["keypoints0"],
+                }
+            else:
+                # usual case
+                pred = model({"image0": image0, "image1": image1})
+
+            # Rescale keypoints and move to cpu
+            kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"]
+            kpts0 = scale_keypoints(kpts0 + 0.5, scale0) - 0.5
+            kpts1 = scale_keypoints(kpts1 + 0.5, scale1) - 0.5
+            kpts0 = kpts0.cpu().numpy()
+            kpts1 = kpts1.cpu().numpy()
+            scores = pred["scores"].cpu().numpy()
+
+            # Write matches and matching scores in hloc format
+            pair = names_to_pair(name0, name1)
+            if pair in fd:
+                del fd[pair]
+            grp = fd.create_group(pair)
+
+            # Write dense matching output
+            grp.create_dataset("keypoints0", data=kpts0)
+            grp.create_dataset("keypoints1", data=kpts1)
+            grp.create_dataset("scores", data=scores)
+    del model, loader
+
+
+# default: quantize all!
+def load_keypoints(
+    conf: Dict, feature_paths_refs: List[Path], quantize: Optional[set] = None
+):
+    name2ref = {
+        n: i for i, p in enumerate(feature_paths_refs) for n in list_h5_names(p)
+    }
+
+    existing_refs = set(name2ref.keys())
+    if quantize is None:
+        quantize = existing_refs  # quantize all
+    if len(existing_refs) > 0:
+        logger.info(f"Loading keypoints from {len(existing_refs)} images.")
+
+    # Load query keypoints
+    cpdict = defaultdict(list)
+    bindict = defaultdict(list)
+    for name in existing_refs:
+        with h5py.File(str(feature_paths_refs[name2ref[name]]), "r") as fd:
+            kps = fd[name]["keypoints"].__array__()
+            if name not in quantize:
+                cpdict[name] = kps
+            else:
+                if "scores" in fd[name].keys():
+                    kp_scores = fd[name]["scores"].__array__()
+                else:
+                    # we set the score to 1.0 if not provided
+                    # increase for more weight on reference keypoints for
+                    # stronger anchoring
+                    kp_scores = [1.0 for _ in range(kps.shape[0])]
+                # bin existing keypoints of reference images for association
+                assign_keypoints(
+                    kps,
+                    cpdict[name],
+                    conf["max_error"],
+                    True,
+                    bindict[name],
+                    kp_scores,
+                    conf["cell_size"],
+                )
+    return cpdict, bindict
+
+
+def aggregate_matches(
+    conf: Dict,
+    pairs: List[Tuple[str, str]],
+    match_path: Path,
+    feature_path: Path,
+    required_queries: Optional[Set[str]] = None,
+    max_kps: Optional[int] = None,
+    cpdict: Dict[str, Iterable] = defaultdict(list),
+    bindict: Dict[str, List[Counter]] = defaultdict(list),
+):
+    if required_queries is None:
+        required_queries = set(sum(pairs, ()))
+        # default: do not overwrite existing features in feature_path!
+        required_queries -= set(list_h5_names(feature_path))
+
+    # if an entry in cpdict is provided as np.ndarray we assume it is fixed
+    required_queries -= set(
+        [k for k, v in cpdict.items() if isinstance(v, np.ndarray)]
+    )
+
+    # sort pairs for reduced RAM
+    pairs_per_q = Counter(list(chain(*pairs)))
+    pairs_score = [min(pairs_per_q[i], pairs_per_q[j]) for i, j in pairs]
+    pairs = [p for _, p in sorted(zip(pairs_score, pairs))]
+
+    if len(required_queries) > 0:
+        logger.info(
+            f"Aggregating keypoints for {len(required_queries)} images."
+        )
+    n_kps = 0
+    with h5py.File(str(match_path), "a") as fd:
+        for name0, name1 in tqdm(pairs, smoothing=0.1):
+            pair = names_to_pair(name0, name1)
+            grp = fd[pair]
+            kpts0 = grp["keypoints0"].__array__()
+            kpts1 = grp["keypoints1"].__array__()
+            scores = grp["scores"].__array__()
+
+            # Aggregate local features
+            update0 = name0 in required_queries
+            update1 = name1 in required_queries
+
+            # in localization we do not want to bin the query kp
+            # assumes that the query is name0!
+            if update0 and not update1 and max_kps is None:
+                max_error0 = cell_size0 = 0.0
+            else:
+                max_error0 = conf["max_error"]
+                cell_size0 = conf["cell_size"]
+
+            # Get match ids and extend query keypoints (cpdict)
+            mkp_ids0 = assign_keypoints(
+                kpts0,
+                cpdict[name0],
+                max_error0,
+                update0,
+                bindict[name0],
+                scores,
+                cell_size0,
+            )
+            mkp_ids1 = assign_keypoints(
+                kpts1,
+                cpdict[name1],
+                conf["max_error"],
+                update1,
+                bindict[name1],
+                scores,
+                conf["cell_size"],
+            )
+
+            # Build matches from assignments
+            matches0, scores0 = kpids_to_matches0(mkp_ids0, mkp_ids1, scores)
+
+            assert kpts0.shape[0] == scores.shape[0]
+            grp.create_dataset("matches0", data=matches0)
+            grp.create_dataset("matching_scores0", data=scores0)
+
+            # Convert bins to kps if finished, and store them
+            for name in (name0, name1):
+                pairs_per_q[name] -= 1
+                if pairs_per_q[name] > 0 or name not in required_queries:
+                    continue
+                kp_score = [c.most_common(1)[0][1] for c in bindict[name]]
+                cpdict[name] = [c.most_common(1)[0][0] for c in bindict[name]]
+                cpdict[name] = np.array(cpdict[name], dtype=np.float32)
+
+                # Select top-k query kps by score (reassign matches later)
+                if max_kps:
+                    top_k = min(max_kps, cpdict[name].shape[0])
+                    top_k = np.argsort(kp_score)[::-1][:top_k]
+                    cpdict[name] = cpdict[name][top_k]
+                    kp_score = np.array(kp_score)[top_k]
+
+                # Write query keypoints
+                with h5py.File(feature_path, "a") as kfd:
+                    if name in kfd:
+                        del kfd[name]
+                    kgrp = kfd.create_group(name)
+                    kgrp.create_dataset("keypoints", data=cpdict[name])
+                    kgrp.create_dataset("score", data=kp_score)
+                    n_kps += cpdict[name].shape[0]
+                del bindict[name]
+
+    if len(required_queries) > 0:
+        avg_kp_per_image = round(n_kps / len(required_queries), 1)
+        logger.info(
+            f"Finished assignment, found {avg_kp_per_image} "
+            f"keypoints/image (avg.), total {n_kps}."
+        )
+    return cpdict
+
+
+def assign_matches(
+    pairs: List[Tuple[str, str]],
+    match_path: Path,
+    keypoints: Union[List[Path], Dict[str, np.array]],
+    max_error: float,
+):
+    if isinstance(keypoints, list):
+        keypoints = load_keypoints({}, keypoints, kpts_as_bin=set([]))
+    assert len(set(sum(pairs, ())) - set(keypoints.keys())) == 0
+    with h5py.File(str(match_path), "a") as fd:
+        for name0, name1 in tqdm(pairs):
+            pair = names_to_pair(name0, name1)
+            grp = fd[pair]
+            kpts0 = grp["keypoints0"].__array__()
+            kpts1 = grp["keypoints1"].__array__()
+            scores = grp["scores"].__array__()
+
+            # NN search across cell boundaries
+            mkp_ids0 = assign_keypoints(kpts0, keypoints[name0], max_error)
+            mkp_ids1 = assign_keypoints(kpts1, keypoints[name1], max_error)
+
+            matches0, scores0 = kpids_to_matches0(mkp_ids0, mkp_ids1, scores)
+
+            # overwrite matches0 and matching_scores0
+            del grp["matches0"], grp["matching_scores0"]
+            grp.create_dataset("matches0", data=matches0)
+            grp.create_dataset("matching_scores0", data=scores0)
+
+
+@torch.no_grad()
+def match_and_assign(
+    conf: Dict,
+    pairs_path: Path,
+    image_dir: Path,
+    match_path: Path,  # out
+    feature_path_q: Path,  # out
+    feature_paths_refs: Optional[List[Path]] = [],
+    max_kps: Optional[int] = 8192,
+    overwrite: bool = False,
+) -> Path:
+    for path in feature_paths_refs:
+        if not path.exists():
+            raise FileNotFoundError(f"Reference feature file {path}.")
+    pairs = parse_retrieval(pairs_path)
+    pairs = [(q, r) for q, rs in pairs.items() for r in rs]
+    pairs = find_unique_new_pairs(pairs, None if overwrite else match_path)
+    required_queries = set(sum(pairs, ()))
+
+    name2ref = {
+        n: i for i, p in enumerate(feature_paths_refs) for n in list_h5_names(p)
+    }
+    existing_refs = required_queries.intersection(set(name2ref.keys()))
+
+    # images which require feature extraction
+    required_queries = required_queries - existing_refs
+
+    if feature_path_q.exists():
+        existing_queries = set(list_h5_names(feature_path_q))
+        feature_paths_refs.append(feature_path_q)
+        existing_refs = set.union(existing_refs, existing_queries)
+        if not overwrite:
+            required_queries = required_queries - existing_queries
+
+    if len(pairs) == 0 and len(required_queries) == 0:
+        logger.info("All pairs exist. Skipping dense matching.")
+        return
+
+    # extract semi-dense matches
+    match_dense(conf, pairs, image_dir, match_path, existing_refs=existing_refs)
+
+    logger.info("Assigning matches...")
+
+    # Pre-load existing keypoints
+    cpdict, bindict = load_keypoints(
+        conf, feature_paths_refs, quantize=required_queries
+    )
+
+    # Reassign matches by aggregation
+    cpdict = aggregate_matches(
+        conf,
+        pairs,
+        match_path,
+        feature_path=feature_path_q,
+        required_queries=required_queries,
+        max_kps=max_kps,
+        cpdict=cpdict,
+        bindict=bindict,
+    )
+
+    # Invalidate matches that are far from selected bin by reassignment
+    if max_kps is not None:
+        logger.info(f'Reassign matches with max_error={conf["max_error"]}.')
+        assign_matches(pairs, match_path, cpdict, max_error=conf["max_error"])
+
+
 def scale_lines(lines, scale):
     if np.any(scale != 1.0):
         lines *= lines.new_tensor(scale)
@@ -483,3 +982,88 @@ def preprocess(image: np.ndarray):
     del pred
     torch.cuda.empty_cache()
     return ret
+
+
+@torch.no_grad()
+def main(
+    conf: Dict,
+    pairs: Path,
+    image_dir: Path,
+    export_dir: Optional[Path] = None,
+    matches: Optional[Path] = None,  # out
+    features: Optional[Path] = None,  # out
+    features_ref: Optional[Path] = None,
+    max_kps: Optional[int] = 8192,
+    overwrite: bool = False,
+) -> Path:
+    logger.info(
+        "Extracting semi-dense features with configuration:"
+        f"\n{pprint.pformat(conf)}"
+    )
+
+    if features is None:
+        features = "feats_"
+
+    if isinstance(features, Path):
+        features_q = features
+        if matches is None:
+            raise ValueError(
+                "Either provide both features and matches as Path"
+                " or both as names."
+            )
+    else:
+        if export_dir is None:
+            raise ValueError(
+                "Provide an export_dir if features and matches"
+                f" are not file paths: {features}, {matches}."
+            )
+        features_q = Path(export_dir, f'{features}{conf["output"]}.h5')
+        if matches is None:
+            matches = Path(export_dir, f'{conf["output"]}_{pairs.stem}.h5')
+
+    if features_ref is None:
+        features_ref = []
+    elif isinstance(features_ref, list):
+        features_ref = list(features_ref)
+    elif isinstance(features_ref, Path):
+        features_ref = [features_ref]
+    else:
+        raise TypeError(str(features_ref))
+
+    match_and_assign(
+        conf,
+        pairs,
+        image_dir,
+        matches,
+        features_q,
+        features_ref,
+        max_kps,
+        overwrite,
+    )
+
+    return features_q, matches
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--pairs", type=Path, required=True)
+    parser.add_argument("--image_dir", type=Path, required=True)
+    parser.add_argument("--export_dir", type=Path, required=True)
+    parser.add_argument(
+        "--matches", type=Path, default=confs["loftr"]["output"]
+    )
+    parser.add_argument(
+        "--features", type=str, default="feats_" + confs["loftr"]["output"]
+    )
+    parser.add_argument(
+        "--conf", type=str, default="loftr", choices=list(confs.keys())
+    )
+    args = parser.parse_args()
+    main(
+        confs[args.conf],
+        args.pairs,
+        args.image_dir,
+        args.export_dir,
+        args.matches,
+        args.features,
+    )
diff --git a/hloc/matchers/mast3r.py b/hloc/matchers/mast3r.py
new file mode 100644
index 0000000..46489cc
--- /dev/null
+++ b/hloc/matchers/mast3r.py
@@ -0,0 +1,111 @@
+import os
+import sys
+import urllib.request
+from pathlib import Path
+
+import numpy as np
+import torch
+import torchvision.transforms as tfm
+
+from .. import logger
+
+mast3r_path = Path(__file__).parent / "../../third_party/mast3r"
+sys.path.append(str(mast3r_path))
+
+dust3r_path = Path(__file__).parent / "../../third_party/dust3r"
+sys.path.append(str(dust3r_path))
+
+from dust3r.image_pairs import make_pairs
+from dust3r.inference import inference
+from mast3r.fast_nn import fast_reciprocal_NNs
+from mast3r.model import AsymmetricMASt3R
+
+from hloc.matchers.duster import Duster
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+class Mast3r(Duster):
+    default_conf = {
+        "name": "Mast3r",
+        "model_path": mast3r_path
+        / "model_weights/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth",
+        "max_keypoints": 2000,
+        "vit_patch_size": 16,
+    }
+
+    def _init(self, conf):
+        self.normalize = tfm.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+        self.model_path = self.conf["model_path"]
+        self.download_weights()
+        self.net = AsymmetricMASt3R.from_pretrained(self.model_path).to(device)
+        logger.info("Loaded Mast3r model")
+
+    def download_weights(self):
+        url = "https://download.europe.naverlabs.com/ComputerVision/MASt3R/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth"
+
+        self.model_path.parent.mkdir(parents=True, exist_ok=True)
+        if not os.path.isfile(self.model_path):
+            logger.info("Downloading Mast3r(ViT large)... (takes a while)")
+            urllib.request.urlretrieve(url, self.model_path)
+            logger.info("Downloading Mast3r(ViT large)... done!")
+
+    def _forward(self, data):
+        img0, img1 = data["image0"], data["image1"]
+        mean = torch.tensor([0.5, 0.5, 0.5]).to(device)
+        std = torch.tensor([0.5, 0.5, 0.5]).to(device)
+
+        img0 = (img0 - mean.view(1, 3, 1, 1)) / std.view(1, 3, 1, 1)
+        img1 = (img1 - mean.view(1, 3, 1, 1)) / std.view(1, 3, 1, 1)
+
+        images = [
+            {"img": img0, "idx": 0, "instance": 0},
+            {"img": img1, "idx": 1, "instance": 1},
+        ]
+        pairs = make_pairs(
+            images, scene_graph="complete", prefilter=None, symmetrize=True
+        )
+        output = inference(pairs, self.net, device, batch_size=1)
+
+        # at this stage, you have the raw dust3r predictions
+        _, pred1 = output["view1"], output["pred1"]
+        _, pred2 = output["view2"], output["pred2"]
+
+        desc1, desc2 = (
+            pred1["desc"][1].squeeze(0).detach(),
+            pred2["desc"][1].squeeze(0).detach(),
+        )
+
+        # find 2D-2D matches between the two images
+        matches_im0, matches_im1 = fast_reciprocal_NNs(
+            desc1,
+            desc2,
+            subsample_or_initxy1=2,
+            device=device,
+            dist="dot",
+            block_size=2**13,
+        )
+
+        mkpts0 = matches_im0.copy()
+        mkpts1 = matches_im1.copy()
+
+        if len(mkpts0) == 0:
+            pred = {
+                "keypoints0": torch.zeros([0, 2]),
+                "keypoints1": torch.zeros([0, 2]),
+            }
+            logger.warning(f"Matched {0} points")
+        else:
+
+            top_k = self.conf["max_keypoints"]
+            if top_k is not None and len(mkpts0) > top_k:
+                keep = np.round(np.linspace(0, len(mkpts0) - 1, top_k)).astype(
+                    int
+                )
+                mkpts0 = mkpts0[keep]
+                mkpts1 = mkpts1[keep]
+            pred = {
+                "keypoints0": torch.from_numpy(mkpts0),
+                "keypoints1": torch.from_numpy(mkpts1),
+            }
+        return pred
diff --git a/hloc/matchers/superglue.py b/hloc/matchers/superglue.py
index 7e427f4..6fae344 100644
--- a/hloc/matchers/superglue.py
+++ b/hloc/matchers/superglue.py
@@ -4,7 +4,9 @@
 from ..utils.base_model import BaseModel
 
 sys.path.append(str(Path(__file__).parent / "../../third_party"))
-from SuperGluePretrainedNetwork.models.superglue import SuperGlue as SG
+from SuperGluePretrainedNetwork.models.superglue import (  # noqa: E402
+    SuperGlue as SG,
+)
 
 
 class SuperGlue(BaseModel):
diff --git a/hloc/pairs_from_covisibility.py b/hloc/pairs_from_covisibility.py
new file mode 100644
index 0000000..49f3e57
--- /dev/null
+++ b/hloc/pairs_from_covisibility.py
@@ -0,0 +1,60 @@
+import argparse
+from collections import defaultdict
+from pathlib import Path
+
+import numpy as np
+from tqdm import tqdm
+
+from . import logger
+from .utils.read_write_model import read_model
+
+
+def main(model, output, num_matched):
+    logger.info("Reading the COLMAP model...")
+    cameras, images, points3D = read_model(model)
+
+    logger.info("Extracting image pairs from covisibility info...")
+    pairs = []
+    for image_id, image in tqdm(images.items()):
+        matched = image.point3D_ids != -1
+        points3D_covis = image.point3D_ids[matched]
+
+        covis = defaultdict(int)
+        for point_id in points3D_covis:
+            for image_covis_id in points3D[point_id].image_ids:
+                if image_covis_id != image_id:
+                    covis[image_covis_id] += 1
+
+        if len(covis) == 0:
+            logger.info(f"Image {image_id} does not have any covisibility.")
+            continue
+
+        covis_ids = np.array(list(covis.keys()))
+        covis_num = np.array([covis[i] for i in covis_ids])
+
+        if len(covis_ids) <= num_matched:
+            top_covis_ids = covis_ids[np.argsort(-covis_num)]
+        else:
+            # get covisible image ids with top k number of common matches
+            ind_top = np.argpartition(covis_num, -num_matched)
+            ind_top = ind_top[-num_matched:]  # unsorted top k
+            ind_top = ind_top[np.argsort(-covis_num[ind_top])]
+            top_covis_ids = [covis_ids[i] for i in ind_top]
+            assert covis_num[ind_top[0]] == np.max(covis_num)
+
+        for i in top_covis_ids:
+            pair = (image.name, images[i].name)
+            pairs.append(pair)
+
+    logger.info(f"Found {len(pairs)} pairs.")
+    with open(output, "w") as f:
+        f.write("\n".join(" ".join([i, j]) for i, j in pairs))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", required=True, type=Path)
+    parser.add_argument("--output", required=True, type=Path)
+    parser.add_argument("--num_matched", required=True, type=int)
+    args = parser.parse_args()
+    main(**args.__dict__)
diff --git a/hloc/pairs_from_exhaustive.py b/hloc/pairs_from_exhaustive.py
new file mode 100644
index 0000000..438b814
--- /dev/null
+++ b/hloc/pairs_from_exhaustive.py
@@ -0,0 +1,66 @@
+import argparse
+import collections.abc as collections
+from pathlib import Path
+from typing import List, Optional, Union
+
+from . import logger
+from .utils.io import list_h5_names
+from .utils.parsers import parse_image_lists
+
+
+def main(
+    output: Path,
+    image_list: Optional[Union[Path, List[str]]] = None,
+    features: Optional[Path] = None,
+    ref_list: Optional[Union[Path, List[str]]] = None,
+    ref_features: Optional[Path] = None,
+):
+    if image_list is not None:
+        if isinstance(image_list, (str, Path)):
+            names_q = parse_image_lists(image_list)
+        elif isinstance(image_list, collections.Iterable):
+            names_q = list(image_list)
+        else:
+            raise ValueError(f"Unknown type for image list: {image_list}")
+    elif features is not None:
+        names_q = list_h5_names(features)
+    else:
+        raise ValueError("Provide either a list of images or a feature file.")
+
+    self_matching = False
+    if ref_list is not None:
+        if isinstance(ref_list, (str, Path)):
+            names_ref = parse_image_lists(ref_list)
+        elif isinstance(image_list, collections.Iterable):
+            names_ref = list(ref_list)
+        else:
+            raise ValueError(
+                f"Unknown type for reference image list: {ref_list}"
+            )
+    elif ref_features is not None:
+        names_ref = list_h5_names(ref_features)
+    else:
+        self_matching = True
+        names_ref = names_q
+
+    pairs = []
+    for i, n1 in enumerate(names_q):
+        for j, n2 in enumerate(names_ref):
+            if self_matching and j <= i:
+                continue
+            pairs.append((n1, n2))
+
+    logger.info(f"Found {len(pairs)} pairs.")
+    with open(output, "w") as f:
+        f.write("\n".join(" ".join([i, j]) for i, j in pairs))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--output", required=True, type=Path)
+    parser.add_argument("--image_list", type=Path)
+    parser.add_argument("--features", type=Path)
+    parser.add_argument("--ref_list", type=Path)
+    parser.add_argument("--ref_features", type=Path)
+    args = parser.parse_args()
+    main(**args.__dict__)
diff --git a/hloc/pairs_from_poses.py b/hloc/pairs_from_poses.py
new file mode 100644
index 0000000..b6b4f88
--- /dev/null
+++ b/hloc/pairs_from_poses.py
@@ -0,0 +1,70 @@
+import argparse
+from pathlib import Path
+
+import numpy as np
+import scipy.spatial
+
+from . import logger
+from .pairs_from_retrieval import pairs_from_score_matrix
+from .utils.read_write_model import read_images_binary
+
+DEFAULT_ROT_THRESH = 30  # in degrees
+
+
+def get_pairwise_distances(images):
+    ids = np.array(list(images.keys()))
+    Rs = []
+    ts = []
+    for id_ in ids:
+        image = images[id_]
+        R = image.qvec2rotmat()
+        t = image.tvec
+        Rs.append(R)
+        ts.append(t)
+    Rs = np.stack(Rs, 0)
+    ts = np.stack(ts, 0)
+
+    # Invert the poses from world-to-camera to camera-to-world.
+    Rs = Rs.transpose(0, 2, 1)
+    ts = -(Rs @ ts[:, :, None])[:, :, 0]
+
+    dist = scipy.spatial.distance.squareform(scipy.spatial.distance.pdist(ts))
+
+    # Instead of computing the angle between two camera orientations,
+    # we compute the angle between the principal axes, as two images rotated
+    # around their principal axis still observe the same scene.
+    axes = Rs[:, :, -1]
+    dots = np.einsum("mi,ni->mn", axes, axes, optimize=True)
+    dR = np.rad2deg(np.arccos(np.clip(dots, -1.0, 1.0)))
+
+    return ids, dist, dR
+
+
+def main(model, output, num_matched, rotation_threshold=DEFAULT_ROT_THRESH):
+    logger.info("Reading the COLMAP model...")
+    images = read_images_binary(model / "images.bin")
+
+    logger.info(f"Obtaining pairwise distances between {len(images)} images...")
+    ids, dist, dR = get_pairwise_distances(images)
+    scores = -dist
+
+    invalid = dR >= rotation_threshold
+    np.fill_diagonal(invalid, True)
+    pairs = pairs_from_score_matrix(scores, invalid, num_matched)
+    pairs = [(images[ids[i]].name, images[ids[j]].name) for i, j in pairs]
+
+    logger.info(f"Found {len(pairs)} pairs.")
+    with open(output, "w") as f:
+        f.write("\n".join(" ".join(p) for p in pairs))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", required=True, type=Path)
+    parser.add_argument("--output", required=True, type=Path)
+    parser.add_argument("--num_matched", required=True, type=int)
+    parser.add_argument(
+        "--rotation_threshold", default=DEFAULT_ROT_THRESH, type=float
+    )
+    args = parser.parse_args()
+    main(**args.__dict__)
diff --git a/hloc/pairs_from_retrieval.py b/hloc/pairs_from_retrieval.py
new file mode 100644
index 0000000..6948fe6
--- /dev/null
+++ b/hloc/pairs_from_retrieval.py
@@ -0,0 +1,137 @@
+import argparse
+import collections.abc as collections
+from pathlib import Path
+from typing import Optional
+
+import h5py
+import numpy as np
+import torch
+
+from . import logger
+from .utils.io import list_h5_names
+from .utils.parsers import parse_image_lists
+from .utils.read_write_model import read_images_binary
+
+
+def parse_names(prefix, names, names_all):
+    if prefix is not None:
+        if not isinstance(prefix, str):
+            prefix = tuple(prefix)
+        names = [n for n in names_all if n.startswith(prefix)]
+        if len(names) == 0:
+            raise ValueError(
+                f"Could not find any image with the prefix `{prefix}`."
+            )
+    elif names is not None:
+        if isinstance(names, (str, Path)):
+            names = parse_image_lists(names)
+        elif isinstance(names, collections.Iterable):
+            names = list(names)
+        else:
+            raise ValueError(
+                f"Unknown type of image list: {names}."
+                "Provide either a list or a path to a list file."
+            )
+    else:
+        names = names_all
+    return names
+
+
+def get_descriptors(names, path, name2idx=None, key="global_descriptor"):
+    if name2idx is None:
+        with h5py.File(str(path), "r", libver="latest") as fd:
+            desc = [fd[n][key].__array__() for n in names]
+    else:
+        desc = []
+        for n in names:
+            with h5py.File(str(path[name2idx[n]]), "r", libver="latest") as fd:
+                desc.append(fd[n][key].__array__())
+    return torch.from_numpy(np.stack(desc, 0)).float()
+
+
+def pairs_from_score_matrix(
+    scores: torch.Tensor,
+    invalid: np.array,
+    num_select: int,
+    min_score: Optional[float] = None,
+):
+    assert scores.shape == invalid.shape
+    if isinstance(scores, np.ndarray):
+        scores = torch.from_numpy(scores)
+    invalid = torch.from_numpy(invalid).to(scores.device)
+    if min_score is not None:
+        invalid |= scores < min_score
+    scores.masked_fill_(invalid, float("-inf"))
+
+    topk = torch.topk(scores, num_select, dim=1)
+    indices = topk.indices.cpu().numpy()
+    valid = topk.values.isfinite().cpu().numpy()
+
+    pairs = []
+    for i, j in zip(*np.where(valid)):
+        pairs.append((i, indices[i, j]))
+    return pairs
+
+
+def main(
+    descriptors,
+    output,
+    num_matched,
+    query_prefix=None,
+    query_list=None,
+    db_prefix=None,
+    db_list=None,
+    db_model=None,
+    db_descriptors=None,
+):
+    logger.info("Extracting image pairs from a retrieval database.")
+
+    # We handle multiple reference feature files.
+    # We only assume that names are unique among them and map names to files.
+    if db_descriptors is None:
+        db_descriptors = descriptors
+    if isinstance(db_descriptors, (Path, str)):
+        db_descriptors = [db_descriptors]
+    name2db = {
+        n: i for i, p in enumerate(db_descriptors) for n in list_h5_names(p)
+    }
+    db_names_h5 = list(name2db.keys())
+    query_names_h5 = list_h5_names(descriptors)
+
+    if db_model:
+        images = read_images_binary(db_model / "images.bin")
+        db_names = [i.name for i in images.values()]
+    else:
+        db_names = parse_names(db_prefix, db_list, db_names_h5)
+    if len(db_names) == 0:
+        raise ValueError("Could not find any database image.")
+    query_names = parse_names(query_prefix, query_list, query_names_h5)
+
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    db_desc = get_descriptors(db_names, db_descriptors, name2db)
+    query_desc = get_descriptors(query_names, descriptors)
+    sim = torch.einsum("id,jd->ij", query_desc.to(device), db_desc.to(device))
+
+    # Avoid self-matching
+    self = np.array(query_names)[:, None] == np.array(db_names)[None]
+    pairs = pairs_from_score_matrix(sim, self, num_matched, min_score=0)
+    pairs = [(query_names[i], db_names[j]) for i, j in pairs]
+
+    logger.info(f"Found {len(pairs)} pairs.")
+    with open(output, "w") as f:
+        f.write("\n".join(" ".join([i, j]) for i, j in pairs))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--descriptors", type=Path, required=True)
+    parser.add_argument("--output", type=Path, required=True)
+    parser.add_argument("--num_matched", type=int, required=True)
+    parser.add_argument("--query_prefix", type=str, nargs="+")
+    parser.add_argument("--query_list", type=Path)
+    parser.add_argument("--db_prefix", type=str, nargs="+")
+    parser.add_argument("--db_list", type=Path)
+    parser.add_argument("--db_model", type=Path)
+    parser.add_argument("--db_descriptors", type=Path)
+    args = parser.parse_args()
+    main(**args.__dict__)
diff --git a/hloc/pipelines/4Seasons/localize.py b/hloc/pipelines/4Seasons/localize.py
index 0451130..50ed957 100644
--- a/hloc/pipelines/4Seasons/localize.py
+++ b/hloc/pipelines/4Seasons/localize.py
@@ -1,16 +1,21 @@
-from pathlib import Path
 import argparse
+from pathlib import Path
 
-from ... import extract_features, match_features, localize_sfm, logger
-from .utils import get_timestamps, delete_unused_images
-from .utils import generate_query_lists, generate_localization_pairs
-from .utils import prepare_submission, evaluate_submission
+from ... import extract_features, localize_sfm, logger, match_features
+from .utils import (
+    delete_unused_images,
+    evaluate_submission,
+    generate_localization_pairs,
+    generate_query_lists,
+    get_timestamps,
+    prepare_submission,
+)
 
 relocalization_files = {
-    "training": "RelocalizationFilesTrain//relocalizationFile_recording_2020-03-24_17-36-22.txt",
-    "validation": "RelocalizationFilesVal/relocalizationFile_recording_2020-03-03_12-03-23.txt",
-    "test0": "RelocalizationFilesTest/relocalizationFile_recording_2020-03-24_17-45-31_*.txt",
-    "test1": "RelocalizationFilesTest/relocalizationFile_recording_2020-04-23_19-37-00_*.txt",
+    "training": "RelocalizationFilesTrain//relocalizationFile_recording_2020-03-24_17-36-22.txt",  # noqa: E501
+    "validation": "RelocalizationFilesVal/relocalizationFile_recording_2020-03-03_12-03-23.txt",  # noqa: E501
+    "test0": "RelocalizationFilesTest/relocalizationFile_recording_2020-03-24_17-45-31_*.txt",  # noqa: E501
+    "test1": "RelocalizationFilesTest/relocalizationFile_recording_2020-04-23_19-37-00_*.txt",  # noqa: E501
 }
 
 parser = argparse.ArgumentParser()
@@ -67,9 +72,7 @@
 generate_query_lists(timestamps, seq_dir, query_list)
 
 # Generate the localization pairs from the given reference frames.
-generate_localization_pairs(
-    sequence, reloc, num_loc_pairs, ref_pairs, loc_pairs
-)
+generate_localization_pairs(sequence, reloc, num_loc_pairs, ref_pairs, loc_pairs)
 
 # Extract, match, amd localize.
 ffile = extract_features.main(fconf, seq_images, output_dir)
diff --git a/hloc/pipelines/4Seasons/prepare_reference.py b/hloc/pipelines/4Seasons/prepare_reference.py
index 9074df8..f47aee7 100644
--- a/hloc/pipelines/4Seasons/prepare_reference.py
+++ b/hloc/pipelines/4Seasons/prepare_reference.py
@@ -1,10 +1,8 @@
-from pathlib import Path
 import argparse
+from pathlib import Path
 
-from ... import extract_features, match_features
-from ... import pairs_from_poses, triangulation
-from .utils import get_timestamps, delete_unused_images
-from .utils import build_empty_colmap_model
+from ... import extract_features, match_features, pairs_from_poses, triangulation
+from .utils import build_empty_colmap_model, delete_unused_images, get_timestamps
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
diff --git a/hloc/pipelines/4Seasons/utils.py b/hloc/pipelines/4Seasons/utils.py
index 8def70f..e5aace9 100644
--- a/hloc/pipelines/4Seasons/utils.py
+++ b/hloc/pipelines/4Seasons/utils.py
@@ -1,11 +1,18 @@
-import os
-import numpy as np
+import glob
 import logging
+import os
 from pathlib import Path
 
-from ...utils.read_write_model import qvec2rotmat, rotmat2qvec
-from ...utils.read_write_model import Image, write_model, Camera
+import numpy as np
+
 from ...utils.parsers import parse_retrieval
+from ...utils.read_write_model import (
+    Camera,
+    Image,
+    qvec2rotmat,
+    rotmat2qvec,
+    write_model,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -28,10 +35,10 @@ def get_timestamps(files, idx):
 
 def delete_unused_images(root, timestamps):
     """Delete all images in root if they are not contained in timestamps."""
-    images = list(root.glob("**/*.png"))
+    images = glob.glob((root / "**/*.png").as_posix(), recursive=True)
     deleted = 0
     for image in images:
-        ts = image.stem
+        ts = Path(image).stem
         if ts not in timestamps:
             os.remove(image)
             deleted += 1
@@ -48,11 +55,7 @@ def camera_from_calibration_file(id_, path):
     model_name = "PINHOLE"
     params = [float(i) for i in [fx, fy, cx, cy]]
     camera = Camera(
-        id=id_,
-        model=model_name,
-        width=int(width),
-        height=int(height),
-        params=params,
+        id=id_, model=model_name, width=int(width), height=int(height), params=params
     )
     return camera
 
@@ -153,9 +156,7 @@ def generate_localization_pairs(sequence, reloc, num, ref_pairs, out_path):
     """
     if "test" in sequence:
         # hard pairs will be overwritten by easy ones if available
-        relocs = [
-            str(reloc).replace("*", d) for d in ["hard", "moderate", "easy"]
-        ]
+        relocs = [str(reloc).replace("*", d) for d in ["hard", "moderate", "easy"]]
     else:
         relocs = [reloc]
     query_to_ref_ts = {}
@@ -213,12 +214,8 @@ def evaluate_submission(submission_dir, relocs, ths=[0.1, 0.2, 0.5]):
     """Compute the relocalization recall from predicted and ground truth poses."""
     for reloc in relocs.parent.glob(relocs.name):
         poses_gt = parse_relocalization(reloc, has_poses=True)
-        poses_pred = parse_relocalization(
-            submission_dir / reloc.name, has_poses=True
-        )
-        poses_pred = {
-            (ref_ts, q_ts): (R, t) for ref_ts, q_ts, R, t in poses_pred
-        }
+        poses_pred = parse_relocalization(submission_dir / reloc.name, has_poses=True)
+        poses_pred = {(ref_ts, q_ts): (R, t) for ref_ts, q_ts, R, t in poses_pred}
 
         error = []
         for ref_ts, q_ts, R_gt, t_gt in poses_gt:
diff --git a/hloc/pipelines/7Scenes/create_gt_sfm.py b/hloc/pipelines/7Scenes/create_gt_sfm.py
index 2aca434..95dfa46 100644
--- a/hloc/pipelines/7Scenes/create_gt_sfm.py
+++ b/hloc/pipelines/7Scenes/create_gt_sfm.py
@@ -1,17 +1,17 @@
 from pathlib import Path
+
 import numpy as np
-import torch
 import PIL.Image
-from tqdm import tqdm
 import pycolmap
+import torch
+from tqdm import tqdm
 
-from ...utils.read_write_model import write_model, read_model
+from ...utils.read_write_model import read_model, write_model
 
 
 def scene_coordinates(p2D, R_w2c, t_w2c, depth, camera):
     assert len(depth) == len(p2D)
-    ret = pycolmap.image_to_world(p2D, camera._asdict())
-    p2D_norm = np.asarray(ret["world_points"])
+    p2D_norm = np.stack(pycolmap.Camera(camera._asdict()).image_to_world(p2D))
     p2D_h = np.concatenate([p2D_norm, np.ones_like(p2D_norm[:, :1])], 1)
     p3D_c = p2D_h * depth[:, None]
     p3D_w = (p3D_c - t_w2c) @ R_w2c
@@ -28,9 +28,7 @@ def interpolate_depth(depth, kp):
 
     # To maximize the number of points that have depth:
     # do bilinear interpolation first and then nearest for the remaining points
-    interp_lin = grid_sample(depth, kp, align_corners=True, mode="bilinear")[
-        0, :, 0
-    ]
+    interp_lin = grid_sample(depth, kp, align_corners=True, mode="bilinear")[0, :, 0]
     interp_nn = torch.nn.functional.grid_sample(
         depth, kp, align_corners=True, mode="nearest"
     )[0, :, 0]
@@ -54,8 +52,7 @@ def project_to_image(p3D, R, t, camera, eps: float = 1e-4, pad: int = 1):
     p3D = (p3D @ R.T) + t
     visible = p3D[:, -1] >= eps  # keep points in front of the camera
     p2D_norm = p3D[:, :-1] / p3D[:, -1:].clip(min=eps)
-    ret = pycolmap.world_to_image(p2D_norm, camera._asdict())
-    p2D = np.asarray(ret["image_points"])
+    p2D = np.stack(pycolmap.Camera(camera._asdict()).world_to_image(p2D_norm))
     size = np.array([camera.width - pad - 1, camera.height - pad - 1])
     valid = np.all((p2D >= pad) & (p2D <= size), -1)
     valid &= visible
@@ -129,15 +126,7 @@ def correct_sfm_with_gt_depth(sfm_path, depth_folder_path, output_path):
     dataset = Path("datasets/7scenes")
     outputs = Path("outputs/7Scenes")
 
-    SCENES = [
-        "chess",
-        "fire",
-        "heads",
-        "office",
-        "pumpkin",
-        "redkitchen",
-        "stairs",
-    ]
+    SCENES = ["chess", "fire", "heads", "office", "pumpkin", "redkitchen", "stairs"]
     for scene in SCENES:
         sfm_path = outputs / scene / "sfm_superpoint+superglue"
         depth_path = dataset / f"depth/7scenes_{scene}/train/depth"
diff --git a/hloc/pipelines/7Scenes/pipeline.py b/hloc/pipelines/7Scenes/pipeline.py
index 54d0e81..6fc28c6 100644
--- a/hloc/pipelines/7Scenes/pipeline.py
+++ b/hloc/pipelines/7Scenes/pipeline.py
@@ -1,11 +1,17 @@
-from pathlib import Path
 import argparse
+from pathlib import Path
 
-from .utils import create_reference_sfm
-from .create_gt_sfm import correct_sfm_with_gt_depth
+from ... import (
+    extract_features,
+    localize_sfm,
+    logger,
+    match_features,
+    pairs_from_covisibility,
+    triangulation,
+)
 from ..Cambridge.utils import create_query_list_with_intrinsics, evaluate
-from ... import extract_features, match_features, pairs_from_covisibility
-from ... import triangulation, localize_sfm, logger
+from .create_gt_sfm import correct_sfm_with_gt_depth
+from .utils import create_reference_sfm
 
 SCENES = ["chess", "fire", "heads", "office", "pumpkin", "redkitchen", "stairs"]
 
@@ -45,9 +51,7 @@ def run_scene(
     create_reference_sfm(gt_dir, ref_sfm_sift, test_list)
     create_query_list_with_intrinsics(gt_dir, query_list, test_list)
 
-    features = extract_features.main(
-        feature_conf, images, outputs, as_half=True
-    )
+    features = extract_features.main(feature_conf, images, outputs, as_half=True)
 
     sfm_pairs = outputs / f"pairs-db-covis{num_covis}.txt"
     pairs_from_covisibility.main(ref_sfm_sift, sfm_pairs, num_matched=num_covis)
@@ -114,9 +118,7 @@ def run_scene(
         results = (
             args.outputs
             / scene
-            / "results_{}.txt".format(
-                "dense" if args.use_dense_depth else "sparse"
-            )
+            / "results_{}.txt".format("dense" if args.use_dense_depth else "sparse")
         )
         if args.overwrite or not results.exists():
             run_scene(
diff --git a/hloc/pipelines/7Scenes/utils.py b/hloc/pipelines/7Scenes/utils.py
index 51070a0..1cb0212 100644
--- a/hloc/pipelines/7Scenes/utils.py
+++ b/hloc/pipelines/7Scenes/utils.py
@@ -1,4 +1,5 @@
 import logging
+
 import numpy as np
 
 from hloc.utils.read_write_model import read_model, write_model
diff --git a/hloc/pipelines/Aachen/README.md b/hloc/pipelines/Aachen/README.md
index 1aefdb7..57b66d6 100644
--- a/hloc/pipelines/Aachen/README.md
+++ b/hloc/pipelines/Aachen/README.md
@@ -6,7 +6,7 @@ Download the dataset from [visuallocalization.net](https://www.visuallocalizatio
 ```bash
 export dataset=datasets/aachen
 wget -r -np -nH -R "index.html*,aachen_v1_1.zip" --cut-dirs=4  https://data.ciirc.cvut.cz/public/projects/2020VisualLocalization/Aachen-Day-Night/ -P $dataset
-unzip $dataset/images/database_and_query_images.zip -d $dataset/images
+unzip $dataset/images/database_and_query_images.zip -d $dataset
 ```
 
 ## Pipeline
diff --git a/hloc/pipelines/Aachen/pipeline.py b/hloc/pipelines/Aachen/pipeline.py
index 1dbf8ab..e31ce72 100644
--- a/hloc/pipelines/Aachen/pipeline.py
+++ b/hloc/pipelines/Aachen/pipeline.py
@@ -1,102 +1,109 @@
+import argparse
 from pathlib import Path
 from pprint import pformat
-import argparse
 
-from ... import extract_features, match_features
-from ... import pairs_from_covisibility, pairs_from_retrieval
-from ... import colmap_from_nvm, triangulation, localize_sfm
+from ... import (
+    colmap_from_nvm,
+    extract_features,
+    localize_sfm,
+    logger,
+    match_features,
+    pairs_from_covisibility,
+    pairs_from_retrieval,
+    triangulation,
+)
 
 
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "--dataset",
-    type=Path,
-    default="datasets/aachen",
-    help="Path to the dataset, default: %(default)s",
-)
-parser.add_argument(
-    "--outputs",
-    type=Path,
-    default="outputs/aachen",
-    help="Path to the output directory, default: %(default)s",
-)
-parser.add_argument(
-    "--num_covis",
-    type=int,
-    default=20,
-    help="Number of image pairs for SfM, default: %(default)s",
-)
-parser.add_argument(
-    "--num_loc",
-    type=int,
-    default=50,
-    help="Number of image pairs for loc, default: %(default)s",
-)
-args = parser.parse_args()
+def run(args):
+    # Setup the paths
+    dataset = args.dataset
+    images = dataset / "images_upright/"
 
-# Setup the paths
-dataset = args.dataset
-images = dataset / "images/images_upright/"
+    outputs = args.outputs  # where everything will be saved
+    sift_sfm = outputs / "sfm_sift"  # from which we extract the reference poses
+    reference_sfm = outputs / "sfm_superpoint+superglue"  # the SfM model we will build
+    sfm_pairs = (
+        outputs / f"pairs-db-covis{args.num_covis}.txt"
+    )  # top-k most covisible in SIFT model
+    loc_pairs = (
+        outputs / f"pairs-query-netvlad{args.num_loc}.txt"
+    )  # top-k retrieved by NetVLAD
+    results = outputs / f"Aachen_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
 
-outputs = args.outputs  # where everything will be saved
-sift_sfm = outputs / "sfm_sift"  # from which we extract the reference poses
-reference_sfm = (
-    outputs / "sfm_superpoint+superglue"
-)  # the SfM model we will build
-sfm_pairs = (
-    outputs / f"pairs-db-covis{args.num_covis}.txt"
-)  # top-k most covisible in SIFT model
-loc_pairs = (
-    outputs / f"pairs-query-netvlad{args.num_loc}.txt"
-)  # top-k retrieved by NetVLAD
-results = (
-    outputs / f"Aachen_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
-)
+    # list the standard configurations available
+    logger.info("Configs for feature extractors:\n%s", pformat(extract_features.confs))
+    logger.info("Configs for feature matchers:\n%s", pformat(match_features.confs))
 
-# list the standard configurations available
-print(f"Configs for feature extractors:\n{pformat(extract_features.confs)}")
-print(f"Configs for feature matchers:\n{pformat(match_features.confs)}")
+    # pick one of the configurations for extraction and matching
+    retrieval_conf = extract_features.confs["netvlad"]
+    feature_conf = extract_features.confs["superpoint_aachen"]
+    matcher_conf = match_features.confs["superglue"]
 
-# pick one of the configurations for extraction and matching
-retrieval_conf = extract_features.confs["netvlad"]
-feature_conf = extract_features.confs["superpoint_aachen"]
-matcher_conf = match_features.confs["superglue"]
+    features = extract_features.main(feature_conf, images, outputs)
 
-features = extract_features.main(feature_conf, images, outputs)
+    colmap_from_nvm.main(
+        dataset / "3D-models/aachen_cvpr2018_db.nvm",
+        dataset / "3D-models/database_intrinsics.txt",
+        dataset / "aachen.db",
+        sift_sfm,
+    )
+    pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
+    sfm_matches = match_features.main(
+        matcher_conf, sfm_pairs, feature_conf["output"], outputs
+    )
 
-colmap_from_nvm.main(
-    dataset / "3D-models/aachen_cvpr2018_db.nvm",
-    dataset / "3D-models/database_intrinsics.txt",
-    dataset / "aachen.db",
-    sift_sfm,
-)
-pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
-sfm_matches = match_features.main(
-    matcher_conf, sfm_pairs, feature_conf["output"], outputs
-)
+    triangulation.main(
+        reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
+    )
 
-triangulation.main(
-    reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
-)
+    global_descriptors = extract_features.main(retrieval_conf, images, outputs)
+    pairs_from_retrieval.main(
+        global_descriptors,
+        loc_pairs,
+        args.num_loc,
+        query_prefix="query",
+        db_model=reference_sfm,
+    )
+    loc_matches = match_features.main(
+        matcher_conf, loc_pairs, feature_conf["output"], outputs
+    )
+
+    localize_sfm.main(
+        reference_sfm,
+        dataset / "queries/*_time_queries_with_intrinsics.txt",
+        loc_pairs,
+        features,
+        loc_matches,
+        results,
+        covisibility_clustering=False,
+    )  # not required with SuperPoint+SuperGlue
 
-global_descriptors = extract_features.main(retrieval_conf, images, outputs)
-pairs_from_retrieval.main(
-    global_descriptors,
-    loc_pairs,
-    args.num_loc,
-    query_prefix="query",
-    db_model=reference_sfm,
-)
-loc_matches = match_features.main(
-    matcher_conf, loc_pairs, feature_conf["output"], outputs
-)
 
-localize_sfm.main(
-    reference_sfm,
-    dataset / "queries/*_time_queries_with_intrinsics.txt",
-    loc_pairs,
-    features,
-    loc_matches,
-    results,
-    covisibility_clustering=False,
-)  # not required with SuperPoint+SuperGlue
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dataset",
+        type=Path,
+        default="datasets/aachen",
+        help="Path to the dataset, default: %(default)s",
+    )
+    parser.add_argument(
+        "--outputs",
+        type=Path,
+        default="outputs/aachen",
+        help="Path to the output directory, default: %(default)s",
+    )
+    parser.add_argument(
+        "--num_covis",
+        type=int,
+        default=20,
+        help="Number of image pairs for SfM, default: %(default)s",
+    )
+    parser.add_argument(
+        "--num_loc",
+        type=int,
+        default=50,
+        help="Number of image pairs for loc, default: %(default)s",
+    )
+    args = parser.parse_args()
+    run(args)
diff --git a/hloc/pipelines/Aachen_v1_1/README.md b/hloc/pipelines/Aachen_v1_1/README.md
index 33a310b..c17e751 100644
--- a/hloc/pipelines/Aachen_v1_1/README.md
+++ b/hloc/pipelines/Aachen_v1_1/README.md
@@ -6,9 +6,8 @@ Download the dataset from [visuallocalization.net](https://www.visuallocalizatio
 ```bash
 export dataset=datasets/aachen_v1.1
 wget -r -np -nH -R "index.html*" --cut-dirs=4  https://data.ciirc.cvut.cz/public/projects/2020VisualLocalization/Aachen-Day-Night/ -P $dataset
-unzip $dataset/images/database_and_query_images.zip -d $dataset/images
+unzip $dataset/images/database_and_query_images.zip -d $dataset
 unzip $dataset/aachen_v1_1.zip -d $dataset
-rsync -a $dataset/images_upright/ $dataset/images/images_upright/
 ```
 
 ## Pipeline
diff --git a/hloc/pipelines/Aachen_v1_1/pipeline.py b/hloc/pipelines/Aachen_v1_1/pipeline.py
index fb8cc3f..0753604 100644
--- a/hloc/pipelines/Aachen_v1_1/pipeline.py
+++ b/hloc/pipelines/Aachen_v1_1/pipeline.py
@@ -1,95 +1,104 @@
+import argparse
 from pathlib import Path
 from pprint import pformat
-import argparse
 
-from ... import extract_features, match_features, triangulation
-from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm
+from ... import (
+    extract_features,
+    localize_sfm,
+    logger,
+    match_features,
+    pairs_from_covisibility,
+    pairs_from_retrieval,
+    triangulation,
+)
 
 
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "--dataset",
-    type=Path,
-    default="datasets/aachen_v1.1",
-    help="Path to the dataset, default: %(default)s",
-)
-parser.add_argument(
-    "--outputs",
-    type=Path,
-    default="outputs/aachen_v1.1",
-    help="Path to the output directory, default: %(default)s",
-)
-parser.add_argument(
-    "--num_covis",
-    type=int,
-    default=20,
-    help="Number of image pairs for SfM, default: %(default)s",
-)
-parser.add_argument(
-    "--num_loc",
-    type=int,
-    default=50,
-    help="Number of image pairs for loc, default: %(default)s",
-)
-args = parser.parse_args()
+def run(args):
+    # Setup the paths
+    dataset = args.dataset
+    images = dataset / "images_upright/"
+    sift_sfm = dataset / "3D-models/aachen_v_1_1"
 
-# Setup the paths
-dataset = args.dataset
-images = dataset / "images/images_upright/"
-sift_sfm = dataset / "3D-models/aachen_v_1_1"
+    outputs = args.outputs  # where everything will be saved
+    reference_sfm = outputs / "sfm_superpoint+superglue"  # the SfM model we will build
+    sfm_pairs = (
+        outputs / f"pairs-db-covis{args.num_covis}.txt"
+    )  # top-k most covisible in SIFT model
+    loc_pairs = (
+        outputs / f"pairs-query-netvlad{args.num_loc}.txt"
+    )  # top-k retrieved by NetVLAD
+    results = (
+        outputs / f"Aachen-v1.1_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
+    )
 
-outputs = args.outputs  # where everything will be saved
-reference_sfm = (
-    outputs / "sfm_superpoint+superglue"
-)  # the SfM model we will build
-sfm_pairs = (
-    outputs / f"pairs-db-covis{args.num_covis}.txt"
-)  # top-k most covisible in SIFT model
-loc_pairs = (
-    outputs / f"pairs-query-netvlad{args.num_loc}.txt"
-)  # top-k retrieved by NetVLAD
-results = (
-    outputs / f"Aachen-v1.1_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
-)
+    # list the standard configurations available
+    logger.info("Configs for feature extractors:\n%s", pformat(extract_features.confs))
+    logger.info("Configs for feature matchers:\n%s", pformat(match_features.confs))
 
-# list the standard configurations available
-print(f"Configs for feature extractors:\n{pformat(extract_features.confs)}")
-print(f"Configs for feature matchers:\n{pformat(match_features.confs)}")
+    # pick one of the configurations for extraction and matching
+    retrieval_conf = extract_features.confs["netvlad"]
+    feature_conf = extract_features.confs["superpoint_max"]
+    matcher_conf = match_features.confs["superglue"]
 
-# pick one of the configurations for extraction and matching
-retrieval_conf = extract_features.confs["netvlad"]
-feature_conf = extract_features.confs["superpoint_max"]
-matcher_conf = match_features.confs["superglue"]
+    features = extract_features.main(feature_conf, images, outputs)
 
-features = extract_features.main(feature_conf, images, outputs)
+    pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
+    sfm_matches = match_features.main(
+        matcher_conf, sfm_pairs, feature_conf["output"], outputs
+    )
 
-pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
-sfm_matches = match_features.main(
-    matcher_conf, sfm_pairs, feature_conf["output"], outputs
-)
+    triangulation.main(
+        reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
+    )
 
-triangulation.main(
-    reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
-)
+    global_descriptors = extract_features.main(retrieval_conf, images, outputs)
+    pairs_from_retrieval.main(
+        global_descriptors,
+        loc_pairs,
+        args.num_loc,
+        query_prefix="query",
+        db_model=reference_sfm,
+    )
+    loc_matches = match_features.main(
+        matcher_conf, loc_pairs, feature_conf["output"], outputs
+    )
+
+    localize_sfm.main(
+        reference_sfm,
+        dataset / "queries/*_time_queries_with_intrinsics.txt",
+        loc_pairs,
+        features,
+        loc_matches,
+        results,
+        covisibility_clustering=False,
+    )  # not required with SuperPoint+SuperGlue
 
-global_descriptors = extract_features.main(retrieval_conf, images, outputs)
-pairs_from_retrieval.main(
-    global_descriptors,
-    loc_pairs,
-    args.num_loc,
-    query_prefix="query",
-    db_model=reference_sfm,
-)
-loc_matches = match_features.main(
-    matcher_conf, loc_pairs, feature_conf["output"], outputs
-)
 
-localize_sfm.main(
-    reference_sfm,
-    dataset / "queries/*_time_queries_with_intrinsics.txt",
-    loc_pairs,
-    features,
-    loc_matches,
-    results,
-    covisibility_clustering=False,
-)  # not required with SuperPoint+SuperGlue
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dataset",
+        type=Path,
+        default="datasets/aachen_v1.1",
+        help="Path to the dataset, default: %(default)s",
+    )
+    parser.add_argument(
+        "--outputs",
+        type=Path,
+        default="outputs/aachen_v1.1",
+        help="Path to the output directory, default: %(default)s",
+    )
+    parser.add_argument(
+        "--num_covis",
+        type=int,
+        default=20,
+        help="Number of image pairs for SfM, default: %(default)s",
+    )
+    parser.add_argument(
+        "--num_loc",
+        type=int,
+        default=50,
+        help="Number of image pairs for loc, default: %(default)s",
+    )
+    args = parser.parse_args()
+    run(args)
diff --git a/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py b/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py
index ccfd62d..9c0a769 100644
--- a/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py
+++ b/hloc/pipelines/Aachen_v1_1/pipeline_loftr.py
@@ -1,94 +1,104 @@
+import argparse
 from pathlib import Path
 from pprint import pformat
-import argparse
 
-from ... import extract_features, match_dense, triangulation
-from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm
+from ... import (
+    extract_features,
+    localize_sfm,
+    logger,
+    match_dense,
+    pairs_from_covisibility,
+    pairs_from_retrieval,
+    triangulation,
+)
 
 
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "--dataset",
-    type=Path,
-    default="datasets/aachen_v1.1",
-    help="Path to the dataset, default: %(default)s",
-)
-parser.add_argument(
-    "--outputs",
-    type=Path,
-    default="outputs/aachen_v1.1",
-    help="Path to the output directory, default: %(default)s",
-)
-parser.add_argument(
-    "--num_covis",
-    type=int,
-    default=20,
-    help="Number of image pairs for SfM, default: %(default)s",
-)
-parser.add_argument(
-    "--num_loc",
-    type=int,
-    default=50,
-    help="Number of image pairs for loc, default: %(default)s",
-)
-args = parser.parse_args()
+def run(args):
+    # Setup the paths
+    dataset = args.dataset
+    images = dataset / "images_upright/"
+    sift_sfm = dataset / "3D-models/aachen_v_1_1"
 
-# Setup the paths
-dataset = args.dataset
-images = dataset / "images/images_upright/"
-sift_sfm = dataset / "3D-models/aachen_v_1_1"
+    outputs = args.outputs  # where everything will be saved
+    outputs.mkdir()
+    reference_sfm = outputs / "sfm_loftr"  # the SfM model we will build
+    sfm_pairs = (
+        outputs / f"pairs-db-covis{args.num_covis}.txt"
+    )  # top-k most covisible in SIFT model
+    loc_pairs = (
+        outputs / f"pairs-query-netvlad{args.num_loc}.txt"
+    )  # top-k retrieved by NetVLAD
+    results = outputs / f"Aachen-v1.1_hloc_loftr_netvlad{args.num_loc}.txt"
 
-outputs = args.outputs  # where everything will be saved
-outputs.mkdir()
-reference_sfm = outputs / "sfm_loftr"  # the SfM model we will build
-sfm_pairs = (
-    outputs / f"pairs-db-covis{args.num_covis}.txt"
-)  # top-k most covisible in SIFT model
-loc_pairs = (
-    outputs / f"pairs-query-netvlad{args.num_loc}.txt"
-)  # top-k retrieved by NetVLAD
-results = outputs / f"Aachen-v1.1_hloc_loftr_netvlad{args.num_loc}.txt"
+    # list the standard configurations available
+    logger.info("Configs for dense feature matchers:\n%s", pformat(match_dense.confs))
 
-# list the standard configurations available
-print(f"Configs for dense feature matchers:\n{pformat(match_dense.confs)}")
+    # pick one of the configurations for extraction and matching
+    retrieval_conf = extract_features.confs["netvlad"]
+    matcher_conf = match_dense.confs["loftr_aachen"]
 
-# pick one of the configurations for extraction and matching
-retrieval_conf = extract_features.confs["netvlad"]
-matcher_conf = match_dense.confs["loftr_aachen"]
+    pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
+    features, sfm_matches = match_dense.main(
+        matcher_conf, sfm_pairs, images, outputs, max_kps=8192, overwrite=False
+    )
 
-pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
-features, sfm_matches = match_dense.main(
-    matcher_conf, sfm_pairs, images, outputs, max_kps=8192, overwrite=False
-)
+    triangulation.main(
+        reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
+    )
 
-triangulation.main(
-    reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
-)
+    global_descriptors = extract_features.main(retrieval_conf, images, outputs)
+    pairs_from_retrieval.main(
+        global_descriptors,
+        loc_pairs,
+        args.num_loc,
+        query_prefix="query",
+        db_model=reference_sfm,
+    )
+    features, loc_matches = match_dense.main(
+        matcher_conf,
+        loc_pairs,
+        images,
+        outputs,
+        features=features,
+        max_kps=None,
+        matches=sfm_matches,
+    )
+
+    localize_sfm.main(
+        reference_sfm,
+        dataset / "queries/*_time_queries_with_intrinsics.txt",
+        loc_pairs,
+        features,
+        loc_matches,
+        results,
+        covisibility_clustering=False,
+    )  # not required with loftr
 
-global_descriptors = extract_features.main(retrieval_conf, images, outputs)
-pairs_from_retrieval.main(
-    global_descriptors,
-    loc_pairs,
-    args.num_loc,
-    query_prefix="query",
-    db_model=reference_sfm,
-)
-features, loc_matches = match_dense.main(
-    matcher_conf,
-    loc_pairs,
-    images,
-    outputs,
-    features=features,
-    max_kps=None,
-    matches=sfm_matches,
-)
 
-localize_sfm.main(
-    reference_sfm,
-    dataset / "queries/*_time_queries_with_intrinsics.txt",
-    loc_pairs,
-    features,
-    loc_matches,
-    results,
-    covisibility_clustering=False,
-)  # not required with loftr
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dataset",
+        type=Path,
+        default="datasets/aachen_v1.1",
+        help="Path to the dataset, default: %(default)s",
+    )
+    parser.add_argument(
+        "--outputs",
+        type=Path,
+        default="outputs/aachen_v1.1",
+        help="Path to the output directory, default: %(default)s",
+    )
+    parser.add_argument(
+        "--num_covis",
+        type=int,
+        default=20,
+        help="Number of image pairs for SfM, default: %(default)s",
+    )
+    parser.add_argument(
+        "--num_loc",
+        type=int,
+        default=50,
+        help="Number of image pairs for loc, default: %(default)s",
+    )
+    args = parser.parse_args()
diff --git a/hloc/pipelines/CMU/pipeline.py b/hloc/pipelines/CMU/pipeline.py
index 788dc7b..4706a05 100644
--- a/hloc/pipelines/CMU/pipeline.py
+++ b/hloc/pipelines/CMU/pipeline.py
@@ -1,8 +1,15 @@
-from pathlib import Path
 import argparse
+from pathlib import Path
 
-from ... import extract_features, match_features, triangulation, logger
-from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm
+from ... import (
+    extract_features,
+    localize_sfm,
+    logger,
+    match_features,
+    pairs_from_covisibility,
+    pairs_from_retrieval,
+    triangulation,
+)
 
 TEST_SLICES = [2, 3, 4, 5, 6, 13, 14, 15, 16, 17, 18, 19, 20, 21]
 
@@ -46,34 +53,20 @@ def run_slice(slice_, root, outputs, num_covis, num_loc):
     matcher_conf = match_features.confs["superglue"]
 
     pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=num_covis)
-    features = extract_features.main(
-        feature_conf, ref_images, outputs, as_half=True
-    )
+    features = extract_features.main(feature_conf, ref_images, outputs, as_half=True)
     sfm_matches = match_features.main(
         matcher_conf, sfm_pairs, feature_conf["output"], outputs
     )
-    triangulation.main(
-        ref_sfm, sift_sfm, ref_images, sfm_pairs, features, sfm_matches
-    )
+    triangulation.main(ref_sfm, sift_sfm, ref_images, sfm_pairs, features, sfm_matches)
 
     generate_query_list(root, query_list, slice_)
-    global_descriptors = extract_features.main(
-        retrieval_conf, ref_images, outputs
-    )
-    global_descriptors = extract_features.main(
-        retrieval_conf, query_images, outputs
-    )
+    global_descriptors = extract_features.main(retrieval_conf, ref_images, outputs)
+    global_descriptors = extract_features.main(retrieval_conf, query_images, outputs)
     pairs_from_retrieval.main(
-        global_descriptors,
-        loc_pairs,
-        num_loc,
-        query_list=query_list,
-        db_model=ref_sfm,
+        global_descriptors, loc_pairs, num_loc, query_list=query_list, db_model=ref_sfm
     )
 
-    features = extract_features.main(
-        feature_conf, query_images, outputs, as_half=True
-    )
+    features = extract_features.main(feature_conf, query_images, outputs, as_half=True)
     loc_matches = match_features.main(
         matcher_conf, loc_pairs, feature_conf["output"], outputs
     )
@@ -136,9 +129,5 @@ def run_slice(slice_, root, outputs, num_covis, num_loc):
     for slice_ in slices:
         logger.info("Working on slice %s.", slice_)
         run_slice(
-            f"slice{slice_}",
-            args.dataset,
-            args.outputs,
-            args.num_covis,
-            args.num_loc,
+            f"slice{slice_}", args.dataset, args.outputs, args.num_covis, args.num_loc
         )
diff --git a/hloc/pipelines/Cambridge/pipeline.py b/hloc/pipelines/Cambridge/pipeline.py
index 7b9e5c5..3a676e5 100644
--- a/hloc/pipelines/Cambridge/pipeline.py
+++ b/hloc/pipelines/Cambridge/pipeline.py
@@ -1,17 +1,18 @@
-from pathlib import Path
 import argparse
+from pathlib import Path
 
-from .utils import create_query_list_with_intrinsics, scale_sfm_images, evaluate
-from ... import extract_features, match_features, pairs_from_covisibility
-from ... import triangulation, localize_sfm, pairs_from_retrieval, logger
+from ... import (
+    extract_features,
+    localize_sfm,
+    logger,
+    match_features,
+    pairs_from_covisibility,
+    pairs_from_retrieval,
+    triangulation,
+)
+from .utils import create_query_list_with_intrinsics, evaluate, scale_sfm_images
 
-SCENES = [
-    "KingsCollege",
-    "OldHospital",
-    "ShopFacade",
-    "StMarysChurch",
-    "GreatCourt",
-]
+SCENES = ["KingsCollege", "OldHospital", "ShopFacade", "StMarysChurch", "GreatCourt"]
 
 
 def run_scene(images, gt_dir, outputs, results, num_covis, num_loc):
@@ -41,11 +42,7 @@ def run_scene(images, gt_dir, outputs, results, num_covis, num_loc):
     retrieval_conf = extract_features.confs["netvlad"]
 
     create_query_list_with_intrinsics(
-        gt_dir / "empty_all",
-        query_list,
-        test_list,
-        ext=".txt",
-        image_dir=images,
+        gt_dir / "empty_all", query_list, test_list, ext=".txt", image_dir=images
     )
     with open(test_list, "r") as f:
         query_seqs = {q.split("/")[0] for q in f.read().rstrip().split("\n")}
@@ -59,9 +56,7 @@ def run_scene(images, gt_dir, outputs, results, num_covis, num_loc):
         query_prefix=query_seqs,
     )
 
-    features = extract_features.main(
-        feature_conf, images, outputs, as_half=True
-    )
+    features = extract_features.main(feature_conf, images, outputs, as_half=True)
     pairs_from_covisibility.main(ref_sfm_sift, sfm_pairs, num_matched=num_covis)
     sfm_matches = match_features.main(
         matcher_conf, sfm_pairs, feature_conf["output"], outputs
diff --git a/hloc/pipelines/Cambridge/utils.py b/hloc/pipelines/Cambridge/utils.py
index 90215b4..36460f0 100644
--- a/hloc/pipelines/Cambridge/utils.py
+++ b/hloc/pipelines/Cambridge/utils.py
@@ -1,15 +1,16 @@
-import cv2
 import logging
+
+import cv2
 import numpy as np
 
 from hloc.utils.read_write_model import (
+    qvec2rotmat,
     read_cameras_binary,
+    read_cameras_text,
     read_images_binary,
+    read_images_text,
     read_model,
     write_model,
-    qvec2rotmat,
-    read_images_text,
-    read_cameras_text,
 )
 
 logger = logging.getLogger(__name__)
@@ -42,9 +43,7 @@ def scale_sfm_images(full_model, scaled_model, image_dir):
         sy = h / camera.height
         assert sx == sy, (sx, sy)
         scaled_cameras[cam_id] = camera._replace(
-            width=w,
-            height=h,
-            params=camera.params * np.array([sx, sx, sy, 1.0]),
+            width=w, height=h, params=camera.params * np.array([sx, sx, sy, 1.0])
         )
 
     write_model(scaled_cameras, images, points3D, scaled_model)
diff --git a/hloc/pipelines/RobotCar/colmap_from_nvm.py b/hloc/pipelines/RobotCar/colmap_from_nvm.py
index 40d7e8b..e90ed72 100644
--- a/hloc/pipelines/RobotCar/colmap_from_nvm.py
+++ b/hloc/pipelines/RobotCar/colmap_from_nvm.py
@@ -1,29 +1,31 @@
 import argparse
+import logging
 import sqlite3
-from tqdm import tqdm
 from collections import defaultdict
-import numpy as np
 from pathlib import Path
-import logging
+
+import numpy as np
+from tqdm import tqdm
 
 from ...colmap_from_nvm import (
-    recover_database_images_and_ids,
     camera_center_to_translation,
+    recover_database_images_and_ids,
+)
+from ...utils.read_write_model import (
+    CAMERA_MODEL_IDS,
+    Camera,
+    Image,
+    Point3D,
+    write_model,
 )
-from ...utils.read_write_model import Camera, Image, Point3D, CAMERA_MODEL_IDS
-from ...utils.read_write_model import write_model
 
 logger = logging.getLogger(__name__)
 
 
-def read_nvm_model(
-    nvm_path, database_path, image_ids, camera_ids, skip_points=False
-):
+def read_nvm_model(nvm_path, database_path, image_ids, camera_ids, skip_points=False):
     # Extract the intrinsics from the db file instead of the NVM model
     db = sqlite3.connect(str(database_path))
-    ret = db.execute(
-        "SELECT camera_id, model, width, height, params FROM cameras;"
-    )
+    ret = db.execute("SELECT camera_id, model, width, height, params FROM cameras;")
     cameras = {}
     for camera_id, camera_model, width, height, params in ret:
         params = np.fromstring(params, dtype=np.double).reshape(-1)
diff --git a/hloc/pipelines/RobotCar/pipeline.py b/hloc/pipelines/RobotCar/pipeline.py
index 20877cf..7a7ee31 100644
--- a/hloc/pipelines/RobotCar/pipeline.py
+++ b/hloc/pipelines/RobotCar/pipeline.py
@@ -1,10 +1,16 @@
-from pathlib import Path
 import argparse
+import glob
+from pathlib import Path
 
+from ... import (
+    extract_features,
+    localize_sfm,
+    match_features,
+    pairs_from_covisibility,
+    pairs_from_retrieval,
+    triangulation,
+)
 from . import colmap_from_nvm
-from ... import extract_features, match_features, triangulation
-from ... import pairs_from_covisibility, pairs_from_retrieval, localize_sfm
-
 
 CONDITIONS = [
     "dawn",
@@ -33,102 +39,105 @@ def generate_query_list(dataset, image_dir, path):
             params = ["SIMPLE_RADIAL", w, h, fx, cx, cy, 0.0]
             cameras[side] = [str(p) for p in params]
 
-    queries = sorted(image_dir.glob("**/*.jpg"))
-    queries = [str(q.relative_to(image_dir.parents[0])) for q in queries]
+    queries = glob.glob((image_dir / "**/*.jpg").as_posix(), recursive=True)
+    queries = [
+        Path(q).relative_to(image_dir.parents[0]).as_posix() for q in sorted(queries)
+    ]
 
     out = [[q] + cameras[Path(q).parent.name] for q in queries]
     with open(path, "w") as f:
         f.write("\n".join(map(" ".join, out)))
 
 
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "--dataset",
-    type=Path,
-    default="datasets/robotcar",
-    help="Path to the dataset, default: %(default)s",
-)
-parser.add_argument(
-    "--outputs",
-    type=Path,
-    default="outputs/robotcar",
-    help="Path to the output directory, default: %(default)s",
-)
-parser.add_argument(
-    "--num_covis",
-    type=int,
-    default=20,
-    help="Number of image pairs for SfM, default: %(default)s",
-)
-parser.add_argument(
-    "--num_loc",
-    type=int,
-    default=20,
-    help="Number of image pairs for loc, default: %(default)s",
-)
-args = parser.parse_args()
-
-# Setup the paths
-dataset = args.dataset
-images = dataset / "images/"
-
-outputs = args.outputs  # where everything will be saved
-outputs.mkdir(exist_ok=True, parents=True)
-query_list = outputs / "{condition}_queries_with_intrinsics.txt"
-sift_sfm = outputs / "sfm_sift"
-reference_sfm = outputs / "sfm_superpoint+superglue"
-sfm_pairs = outputs / f"pairs-db-covis{args.num_covis}.txt"
-loc_pairs = outputs / f"pairs-query-netvlad{args.num_loc}.txt"
-results = (
-    outputs / f"RobotCar_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
-)
-
-# pick one of the configurations for extraction and matching
-retrieval_conf = extract_features.confs["netvlad"]
-feature_conf = extract_features.confs["superpoint_aachen"]
-matcher_conf = match_features.confs["superglue"]
-
-for condition in CONDITIONS:
-    generate_query_list(
-        dataset, images / condition, str(query_list).format(condition=condition)
+def run(args):
+    # Setup the paths
+    dataset = args.dataset
+    images = dataset / "images/"
+
+    outputs = args.outputs  # where everything will be saved
+    outputs.mkdir(exist_ok=True, parents=True)
+    query_list = outputs / "{condition}_queries_with_intrinsics.txt"
+    sift_sfm = outputs / "sfm_sift"
+    reference_sfm = outputs / "sfm_superpoint+superglue"
+    sfm_pairs = outputs / f"pairs-db-covis{args.num_covis}.txt"
+    loc_pairs = outputs / f"pairs-query-netvlad{args.num_loc}.txt"
+    results = outputs / f"RobotCar_hloc_superpoint+superglue_netvlad{args.num_loc}.txt"
+
+    # pick one of the configurations for extraction and matching
+    retrieval_conf = extract_features.confs["netvlad"]
+    feature_conf = extract_features.confs["superpoint_aachen"]
+    matcher_conf = match_features.confs["superglue"]
+
+    for condition in CONDITIONS:
+        generate_query_list(
+            dataset, images / condition, str(query_list).format(condition=condition)
+        )
+
+    features = extract_features.main(feature_conf, images, outputs, as_half=True)
+
+    colmap_from_nvm.main(
+        dataset / "3D-models/all-merged/all.nvm",
+        dataset / "3D-models/overcast-reference.db",
+        sift_sfm,
+    )
+    pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
+    sfm_matches = match_features.main(
+        matcher_conf, sfm_pairs, feature_conf["output"], outputs
     )
 
-features = extract_features.main(feature_conf, images, outputs, as_half=True)
+    triangulation.main(
+        reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
+    )
 
-colmap_from_nvm.main(
-    dataset / "3D-models/all-merged/all.nvm",
-    dataset / "3D-models/overcast-reference.db",
-    sift_sfm,
-)
-pairs_from_covisibility.main(sift_sfm, sfm_pairs, num_matched=args.num_covis)
-sfm_matches = match_features.main(
-    matcher_conf, sfm_pairs, feature_conf["output"], outputs
-)
+    global_descriptors = extract_features.main(retrieval_conf, images, outputs)
+    # TODO: do per location and per camera
+    pairs_from_retrieval.main(
+        global_descriptors,
+        loc_pairs,
+        args.num_loc,
+        query_prefix=CONDITIONS,
+        db_model=reference_sfm,
+    )
+    loc_matches = match_features.main(
+        matcher_conf, loc_pairs, feature_conf["output"], outputs
+    )
 
-triangulation.main(
-    reference_sfm, sift_sfm, images, sfm_pairs, features, sfm_matches
-)
+    localize_sfm.main(
+        reference_sfm,
+        Path(str(query_list).format(condition="*")),
+        loc_pairs,
+        features,
+        loc_matches,
+        results,
+        covisibility_clustering=False,
+        prepend_camera_name=True,
+    )
 
-global_descriptors = extract_features.main(retrieval_conf, images, outputs)
-# TODO: do per location and per camera
-pairs_from_retrieval.main(
-    global_descriptors,
-    loc_pairs,
-    args.num_loc,
-    query_prefix=CONDITIONS,
-    db_model=reference_sfm,
-)
-loc_matches = match_features.main(
-    matcher_conf, loc_pairs, feature_conf["output"], outputs
-)
 
-localize_sfm.main(
-    reference_sfm,
-    Path(str(query_list).format(condition="*")),
-    loc_pairs,
-    features,
-    loc_matches,
-    results,
-    covisibility_clustering=False,
-    prepend_camera_name=True,
-)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dataset",
+        type=Path,
+        default="datasets/robotcar",
+        help="Path to the dataset, default: %(default)s",
+    )
+    parser.add_argument(
+        "--outputs",
+        type=Path,
+        default="outputs/robotcar",
+        help="Path to the output directory, default: %(default)s",
+    )
+    parser.add_argument(
+        "--num_covis",
+        type=int,
+        default=20,
+        help="Number of image pairs for SfM, default: %(default)s",
+    )
+    parser.add_argument(
+        "--num_loc",
+        type=int,
+        default=20,
+        help="Number of image pairs for loc, default: %(default)s",
+    )
+    args = parser.parse_args()
diff --git a/hloc/reconstruction.py b/hloc/reconstruction.py
new file mode 100644
index 0000000..ff4a90a
--- /dev/null
+++ b/hloc/reconstruction.py
@@ -0,0 +1,199 @@
+import argparse
+import multiprocessing
+import shutil
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import pycolmap
+
+from . import logger
+from .triangulation import (
+    OutputCapture,
+    estimation_and_geometric_verification,
+    import_features,
+    import_matches,
+    parse_option_args,
+)
+from .utils.database import COLMAPDatabase
+
+
+def create_empty_db(database_path: Path):
+    if database_path.exists():
+        logger.warning("The database already exists, deleting it.")
+        database_path.unlink()
+    logger.info("Creating an empty database...")
+    db = COLMAPDatabase.connect(database_path)
+    db.create_tables()
+    db.commit()
+    db.close()
+
+
+def import_images(
+    image_dir: Path,
+    database_path: Path,
+    camera_mode: pycolmap.CameraMode,
+    image_list: Optional[List[str]] = None,
+    options: Optional[Dict[str, Any]] = None,
+):
+    logger.info("Importing images into the database...")
+    if options is None:
+        options = {}
+    images = list(image_dir.iterdir())
+    if len(images) == 0:
+        raise IOError(f"No images found in {image_dir}.")
+    with pycolmap.ostream():
+        pycolmap.import_images(
+            database_path,
+            image_dir,
+            camera_mode,
+            image_list=image_list or [],
+            options=options,
+        )
+
+
+def get_image_ids(database_path: Path) -> Dict[str, int]:
+    db = COLMAPDatabase.connect(database_path)
+    images = {}
+    for name, image_id in db.execute("SELECT name, image_id FROM images;"):
+        images[name] = image_id
+    db.close()
+    return images
+
+
+def run_reconstruction(
+    sfm_dir: Path,
+    database_path: Path,
+    image_dir: Path,
+    verbose: bool = False,
+    options: Optional[Dict[str, Any]] = None,
+) -> pycolmap.Reconstruction:
+    models_path = sfm_dir / "models"
+    models_path.mkdir(exist_ok=True, parents=True)
+    logger.info("Running 3D reconstruction...")
+    if options is None:
+        options = {}
+    options = {"num_threads": min(multiprocessing.cpu_count(), 16), **options}
+    with OutputCapture(verbose):
+        with pycolmap.ostream():
+            reconstructions = pycolmap.incremental_mapping(
+                database_path, image_dir, models_path, options=options
+            )
+
+    if len(reconstructions) == 0:
+        logger.error("Could not reconstruct any model!")
+        return None
+    logger.info(f"Reconstructed {len(reconstructions)} model(s).")
+
+    largest_index = None
+    largest_num_images = 0
+    for index, rec in reconstructions.items():
+        num_images = rec.num_reg_images()
+        if num_images > largest_num_images:
+            largest_index = index
+            largest_num_images = num_images
+    assert largest_index is not None
+    logger.info(
+        f"Largest model is #{largest_index} "
+        f"with {largest_num_images} images."
+    )
+
+    for filename in ["images.bin", "cameras.bin", "points3D.bin"]:
+        if (sfm_dir / filename).exists():
+            (sfm_dir / filename).unlink()
+        shutil.move(
+            str(models_path / str(largest_index) / filename), str(sfm_dir)
+        )
+    return reconstructions[largest_index]
+
+
+def main(
+    sfm_dir: Path,
+    image_dir: Path,
+    pairs: Path,
+    features: Path,
+    matches: Path,
+    camera_mode: pycolmap.CameraMode = pycolmap.CameraMode.AUTO,
+    verbose: bool = False,
+    skip_geometric_verification: bool = False,
+    min_match_score: Optional[float] = None,
+    image_list: Optional[List[str]] = None,
+    image_options: Optional[Dict[str, Any]] = None,
+    mapper_options: Optional[Dict[str, Any]] = None,
+) -> pycolmap.Reconstruction:
+    assert features.exists(), features
+    assert pairs.exists(), pairs
+    assert matches.exists(), matches
+
+    sfm_dir.mkdir(parents=True, exist_ok=True)
+    database = sfm_dir / "database.db"
+
+    create_empty_db(database)
+    import_images(image_dir, database, camera_mode, image_list, image_options)
+    image_ids = get_image_ids(database)
+    import_features(image_ids, database, features)
+    import_matches(
+        image_ids,
+        database,
+        pairs,
+        matches,
+        min_match_score,
+        skip_geometric_verification,
+    )
+    if not skip_geometric_verification:
+        estimation_and_geometric_verification(database, pairs, verbose)
+    reconstruction = run_reconstruction(
+        sfm_dir, database, image_dir, verbose, mapper_options
+    )
+    if reconstruction is not None:
+        logger.info(
+            f"Reconstruction statistics:\n{reconstruction.summary()}"
+            + f"\n\tnum_input_images = {len(image_ids)}"
+        )
+    return reconstruction
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--sfm_dir", type=Path, required=True)
+    parser.add_argument("--image_dir", type=Path, required=True)
+
+    parser.add_argument("--pairs", type=Path, required=True)
+    parser.add_argument("--features", type=Path, required=True)
+    parser.add_argument("--matches", type=Path, required=True)
+
+    parser.add_argument(
+        "--camera_mode",
+        type=str,
+        default="AUTO",
+        choices=list(pycolmap.CameraMode.__members__.keys()),
+    )
+    parser.add_argument("--skip_geometric_verification", action="store_true")
+    parser.add_argument("--min_match_score", type=float)
+    parser.add_argument("--verbose", action="store_true")
+
+    parser.add_argument(
+        "--image_options",
+        nargs="+",
+        default=[],
+        help="List of key=value from {}".format(
+            pycolmap.ImageReaderOptions().todict()
+        ),
+    )
+    parser.add_argument(
+        "--mapper_options",
+        nargs="+",
+        default=[],
+        help="List of key=value from {}".format(
+            pycolmap.IncrementalMapperOptions().todict()
+        ),
+    )
+    args = parser.parse_args().__dict__
+
+    image_options = parse_option_args(
+        args.pop("image_options"), pycolmap.ImageReaderOptions()
+    )
+    mapper_options = parse_option_args(
+        args.pop("mapper_options"), pycolmap.IncrementalMapperOptions()
+    )
+
+    main(**args, image_options=image_options, mapper_options=mapper_options)
diff --git a/hloc/triangulation.py b/hloc/triangulation.py
new file mode 100644
index 0000000..385fed9
--- /dev/null
+++ b/hloc/triangulation.py
@@ -0,0 +1,320 @@
+import argparse
+import contextlib
+import io
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+import pycolmap
+from tqdm import tqdm
+
+from . import logger
+from .utils.database import COLMAPDatabase
+from .utils.geometry import compute_epipolar_errors
+from .utils.io import get_keypoints, get_matches
+from .utils.parsers import parse_retrieval
+
+
+class OutputCapture:
+    def __init__(self, verbose: bool):
+        self.verbose = verbose
+
+    def __enter__(self):
+        if not self.verbose:
+            self.capture = contextlib.redirect_stdout(io.StringIO())
+            self.out = self.capture.__enter__()
+
+    def __exit__(self, exc_type, *args):
+        if not self.verbose:
+            self.capture.__exit__(exc_type, *args)
+            if exc_type is not None:
+                logger.error("Failed with output:\n%s", self.out.getvalue())
+        sys.stdout.flush()
+
+
+def create_db_from_model(
+    reconstruction: pycolmap.Reconstruction, database_path: Path
+) -> Dict[str, int]:
+    if database_path.exists():
+        logger.warning("The database already exists, deleting it.")
+        database_path.unlink()
+
+    db = COLMAPDatabase.connect(database_path)
+    db.create_tables()
+
+    for i, camera in reconstruction.cameras.items():
+        db.add_camera(
+            camera.model.value,
+            camera.width,
+            camera.height,
+            camera.params,
+            camera_id=i,
+            prior_focal_length=True,
+        )
+
+    for i, image in reconstruction.images.items():
+        db.add_image(image.name, image.camera_id, image_id=i)
+
+    db.commit()
+    db.close()
+    return {image.name: i for i, image in reconstruction.images.items()}
+
+
+def import_features(
+    image_ids: Dict[str, int], database_path: Path, features_path: Path
+):
+    logger.info("Importing features into the database...")
+    db = COLMAPDatabase.connect(database_path)
+
+    for image_name, image_id in tqdm(image_ids.items()):
+        keypoints = get_keypoints(features_path, image_name)
+        keypoints += 0.5  # COLMAP origin
+        db.add_keypoints(image_id, keypoints)
+
+    db.commit()
+    db.close()
+
+
+def import_matches(
+    image_ids: Dict[str, int],
+    database_path: Path,
+    pairs_path: Path,
+    matches_path: Path,
+    min_match_score: Optional[float] = None,
+    skip_geometric_verification: bool = False,
+):
+    logger.info("Importing matches into the database...")
+
+    with open(str(pairs_path), "r") as f:
+        pairs = [p.split() for p in f.readlines()]
+
+    db = COLMAPDatabase.connect(database_path)
+
+    matched = set()
+    for name0, name1 in tqdm(pairs):
+        id0, id1 = image_ids[name0], image_ids[name1]
+        if len({(id0, id1), (id1, id0)} & matched) > 0:
+            continue
+        matches, scores = get_matches(matches_path, name0, name1)
+        if min_match_score:
+            matches = matches[scores > min_match_score]
+        db.add_matches(id0, id1, matches)
+        matched |= {(id0, id1), (id1, id0)}
+
+        if skip_geometric_verification:
+            db.add_two_view_geometry(id0, id1, matches)
+
+    db.commit()
+    db.close()
+
+
+def estimation_and_geometric_verification(
+    database_path: Path, pairs_path: Path, verbose: bool = False
+):
+    logger.info("Performing geometric verification of the matches...")
+    with OutputCapture(verbose):
+        with pycolmap.ostream():
+            pycolmap.verify_matches(
+                database_path,
+                pairs_path,
+                options=dict(
+                    ransac=dict(max_num_trials=20000, min_inlier_ratio=0.1)
+                ),
+            )
+
+
+def geometric_verification(
+    image_ids: Dict[str, int],
+    reference: pycolmap.Reconstruction,
+    database_path: Path,
+    features_path: Path,
+    pairs_path: Path,
+    matches_path: Path,
+    max_error: float = 4.0,
+):
+    logger.info("Performing geometric verification of the matches...")
+
+    pairs = parse_retrieval(pairs_path)
+    db = COLMAPDatabase.connect(database_path)
+
+    inlier_ratios = []
+    matched = set()
+    for name0 in tqdm(pairs):
+        id0 = image_ids[name0]
+        image0 = reference.images[id0]
+        cam0 = reference.cameras[image0.camera_id]
+        kps0, noise0 = get_keypoints(
+            features_path, name0, return_uncertainty=True
+        )
+        noise0 = 1.0 if noise0 is None else noise0
+        if len(kps0) > 0:
+            kps0 = np.stack(cam0.cam_from_img(kps0))
+        else:
+            kps0 = np.zeros((0, 2))
+
+        for name1 in pairs[name0]:
+            id1 = image_ids[name1]
+            image1 = reference.images[id1]
+            cam1 = reference.cameras[image1.camera_id]
+            kps1, noise1 = get_keypoints(
+                features_path, name1, return_uncertainty=True
+            )
+            noise1 = 1.0 if noise1 is None else noise1
+            if len(kps1) > 0:
+                kps1 = np.stack(cam1.cam_from_img(kps1))
+            else:
+                kps1 = np.zeros((0, 2))
+
+            matches = get_matches(matches_path, name0, name1)[0]
+
+            if len({(id0, id1), (id1, id0)} & matched) > 0:
+                continue
+            matched |= {(id0, id1), (id1, id0)}
+
+            if matches.shape[0] == 0:
+                db.add_two_view_geometry(id0, id1, matches)
+                continue
+
+            cam1_from_cam0 = (
+                image1.cam_from_world * image0.cam_from_world.inverse()
+            )
+            errors0, errors1 = compute_epipolar_errors(
+                cam1_from_cam0, kps0[matches[:, 0]], kps1[matches[:, 1]]
+            )
+            valid_matches = np.logical_and(
+                errors0 <= cam0.cam_from_img_threshold(noise0 * max_error),
+                errors1 <= cam1.cam_from_img_threshold(noise1 * max_error),
+            )
+            # TODO: We could also add E to the database, but we need
+            # to reverse the transformations if id0 > id1 in utils/database.py.
+            db.add_two_view_geometry(id0, id1, matches[valid_matches, :])
+            inlier_ratios.append(np.mean(valid_matches))
+    logger.info(
+        "mean/med/min/max valid matches %.2f/%.2f/%.2f/%.2f%%.",
+        np.mean(inlier_ratios) * 100,
+        np.median(inlier_ratios) * 100,
+        np.min(inlier_ratios) * 100,
+        np.max(inlier_ratios) * 100,
+    )
+
+    db.commit()
+    db.close()
+
+
+def run_triangulation(
+    model_path: Path,
+    database_path: Path,
+    image_dir: Path,
+    reference_model: pycolmap.Reconstruction,
+    verbose: bool = False,
+    options: Optional[Dict[str, Any]] = None,
+) -> pycolmap.Reconstruction:
+    model_path.mkdir(parents=True, exist_ok=True)
+    logger.info("Running 3D triangulation...")
+    if options is None:
+        options = {}
+    with OutputCapture(verbose):
+        with pycolmap.ostream():
+            reconstruction = pycolmap.triangulate_points(
+                reference_model,
+                database_path,
+                image_dir,
+                model_path,
+                options=options,
+            )
+    return reconstruction
+
+
+def main(
+    sfm_dir: Path,
+    reference_model: Path,
+    image_dir: Path,
+    pairs: Path,
+    features: Path,
+    matches: Path,
+    skip_geometric_verification: bool = False,
+    estimate_two_view_geometries: bool = False,
+    min_match_score: Optional[float] = None,
+    verbose: bool = False,
+    mapper_options: Optional[Dict[str, Any]] = None,
+) -> pycolmap.Reconstruction:
+    assert reference_model.exists(), reference_model
+    assert features.exists(), features
+    assert pairs.exists(), pairs
+    assert matches.exists(), matches
+
+    sfm_dir.mkdir(parents=True, exist_ok=True)
+    database = sfm_dir / "database.db"
+    reference = pycolmap.Reconstruction(reference_model)
+
+    image_ids = create_db_from_model(reference, database)
+    import_features(image_ids, database, features)
+    import_matches(
+        image_ids,
+        database,
+        pairs,
+        matches,
+        min_match_score,
+        skip_geometric_verification,
+    )
+    if not skip_geometric_verification:
+        if estimate_two_view_geometries:
+            estimation_and_geometric_verification(database, pairs, verbose)
+        else:
+            geometric_verification(
+                image_ids, reference, database, features, pairs, matches
+            )
+    reconstruction = run_triangulation(
+        sfm_dir, database, image_dir, reference, verbose, mapper_options
+    )
+    logger.info(
+        "Finished the triangulation with statistics:\n%s",
+        reconstruction.summary(),
+    )
+    return reconstruction
+
+
+def parse_option_args(args: List[str], default_options) -> Dict[str, Any]:
+    options = {}
+    for arg in args:
+        idx = arg.find("=")
+        if idx == -1:
+            raise ValueError("Options format: key1=value1 key2=value2 etc.")
+        key, value = arg[:idx], arg[idx + 1 :]
+        if not hasattr(default_options, key):
+            raise ValueError(
+                f'Unknown option "{key}", allowed options and default values'
+                f" for {default_options.summary()}"
+            )
+        value = eval(value)
+        target_type = type(getattr(default_options, key))
+        if not isinstance(value, target_type):
+            raise ValueError(
+                f'Incorrect type for option "{key}":'
+                f" {type(value)} vs {target_type}"
+            )
+        options[key] = value
+    return options
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--sfm_dir", type=Path, required=True)
+    parser.add_argument("--reference_sfm_model", type=Path, required=True)
+    parser.add_argument("--image_dir", type=Path, required=True)
+
+    parser.add_argument("--pairs", type=Path, required=True)
+    parser.add_argument("--features", type=Path, required=True)
+    parser.add_argument("--matches", type=Path, required=True)
+
+    parser.add_argument("--skip_geometric_verification", action="store_true")
+    parser.add_argument("--min_match_score", type=float)
+    parser.add_argument("--verbose", action="store_true")
+    args = parser.parse_args().__dict__
+
+    mapper_options = parse_option_args(
+        args.pop("mapper_options"), pycolmap.IncrementalMapperOptions()
+    )
+
+    main(**args, mapper_options=mapper_options)
diff --git a/hloc/utils/database.py b/hloc/utils/database.py
index 050f5ec..683c250 100644
--- a/hloc/utils/database.py
+++ b/hloc/utils/database.py
@@ -31,10 +31,10 @@
 
 # This script is based on an original implementation by True Price.
 
-import sys
 import sqlite3
-import numpy as np
+import sys
 
+import numpy as np
 
 IS_PYTHON3 = sys.version_info[0] >= 3
 
@@ -100,9 +100,7 @@
     cols INTEGER NOT NULL,
     data BLOB)"""
 
-CREATE_NAME_INDEX = (
-    "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
-)
+CREATE_NAME_INDEX = "CREATE UNIQUE INDEX IF NOT EXISTS index_name ON images(name)"
 
 CREATE_ALL = "; ".join(
     [
@@ -152,34 +150,20 @@ def __init__(self, *args, **kwargs):
         super(COLMAPDatabase, self).__init__(*args, **kwargs)
 
         self.create_tables = lambda: self.executescript(CREATE_ALL)
-        self.create_cameras_table = lambda: self.executescript(
-            CREATE_CAMERAS_TABLE
-        )
+        self.create_cameras_table = lambda: self.executescript(CREATE_CAMERAS_TABLE)
         self.create_descriptors_table = lambda: self.executescript(
             CREATE_DESCRIPTORS_TABLE
         )
-        self.create_images_table = lambda: self.executescript(
-            CREATE_IMAGES_TABLE
-        )
+        self.create_images_table = lambda: self.executescript(CREATE_IMAGES_TABLE)
         self.create_two_view_geometries_table = lambda: self.executescript(
             CREATE_TWO_VIEW_GEOMETRIES_TABLE
         )
-        self.create_keypoints_table = lambda: self.executescript(
-            CREATE_KEYPOINTS_TABLE
-        )
-        self.create_matches_table = lambda: self.executescript(
-            CREATE_MATCHES_TABLE
-        )
+        self.create_keypoints_table = lambda: self.executescript(CREATE_KEYPOINTS_TABLE)
+        self.create_matches_table = lambda: self.executescript(CREATE_MATCHES_TABLE)
         self.create_name_index = lambda: self.executescript(CREATE_NAME_INDEX)
 
     def add_camera(
-        self,
-        model,
-        width,
-        height,
-        params,
-        prior_focal_length=False,
-        camera_id=None,
+        self, model, width, height, params, prior_focal_length=False, camera_id=None
     ):
         params = np.asarray(params, np.float64)
         cursor = self.execute(
diff --git a/hloc/utils/geometry.py b/hloc/utils/geometry.py
index 1f33e2e..5995ccc 100644
--- a/hloc/utils/geometry.py
+++ b/hloc/utils/geometry.py
@@ -6,28 +6,11 @@ def to_homogeneous(p):
     return np.pad(p, ((0, 0),) * (p.ndim - 1) + ((0, 1),), constant_values=1)
 
 
-def vector_to_cross_product_matrix(v):
-    return np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
-
-
-def compute_epipolar_errors(qvec_r2t, tvec_r2t, p2d_r, p2d_t):
-    T_r2t = pose_matrix_from_qvec_tvec(qvec_r2t, tvec_r2t)
-    # Compute errors in normalized plane to avoid distortion.
-    E = vector_to_cross_product_matrix(T_r2t[:3, -1]) @ T_r2t[:3, :3]
-    l2d_r2t = (E @ to_homogeneous(p2d_r).T).T
-    l2d_t2r = (E.T @ to_homogeneous(p2d_t).T).T
-    errors_r = np.abs(
-        np.sum(to_homogeneous(p2d_r) * l2d_t2r, axis=1)
-    ) / np.linalg.norm(l2d_t2r[:, :2], axis=1)
-    errors_t = np.abs(
-        np.sum(to_homogeneous(p2d_t) * l2d_r2t, axis=1)
-    ) / np.linalg.norm(l2d_r2t[:, :2], axis=1)
-    return E, errors_r, errors_t
-
-
-def pose_matrix_from_qvec_tvec(qvec, tvec):
-    pose = np.zeros((4, 4))
-    pose[:3, :3] = pycolmap.qvec_to_rotmat(qvec)
-    pose[:3, -1] = tvec
-    pose[-1, -1] = 1
-    return pose
+def compute_epipolar_errors(j_from_i: pycolmap.Rigid3d, p2d_i, p2d_j):
+    j_E_i = j_from_i.essential_matrix()
+    l2d_j = to_homogeneous(p2d_i) @ j_E_i.T
+    l2d_i = to_homogeneous(p2d_j) @ j_E_i
+    dist = np.abs(np.sum(to_homogeneous(p2d_i) * l2d_i, axis=1))
+    errors_i = dist / np.linalg.norm(l2d_i[:, :2], axis=1)
+    errors_j = dist / np.linalg.norm(l2d_j[:, :2], axis=1)
+    return errors_i, errors_j
diff --git a/hloc/utils/parsers.py b/hloc/utils/parsers.py
index faaa8f2..9407dcf 100644
--- a/hloc/utils/parsers.py
+++ b/hloc/utils/parsers.py
@@ -1,7 +1,8 @@
-from pathlib import Path
 import logging
-import numpy as np
 from collections import defaultdict
+from pathlib import Path
+
+import numpy as np
 import pycolmap
 
 logger = logging.getLogger(__name__)
@@ -18,7 +19,9 @@ def parse_image_list(path, with_intrinsics=False):
             if with_intrinsics:
                 model, width, height, *params = data
                 params = np.array(params, float)
-                cam = pycolmap.Camera(model, int(width), int(height), params)
+                cam = pycolmap.Camera(
+                    model=model, width=int(width), height=int(height), params=params
+                )
                 images.append((name, cam))
             else:
                 images.append(name)
diff --git a/hloc/utils/read_write_model.py b/hloc/utils/read_write_model.py
index 65bed51..197921d 100644
--- a/hloc/utils/read_write_model.py
+++ b/hloc/utils/read_write_model.py
@@ -29,12 +29,13 @@
 #
 # Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
 
-import os
-import collections
-import numpy as np
-import struct
 import argparse
+import collections
 import logging
+import os
+import struct
+
+import numpy as np
 
 logger = logging.getLogger(__name__)
 
@@ -42,9 +43,7 @@
 CameraModel = collections.namedtuple(
     "CameraModel", ["model_id", "model_name", "num_params"]
 )
-Camera = collections.namedtuple(
-    "Camera", ["id", "model", "width", "height", "params"]
-)
+Camera = collections.namedtuple("Camera", ["id", "model", "width", "height", "params"])
 BaseImage = collections.namedtuple(
     "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"]
 )
@@ -128,11 +127,7 @@ def read_cameras_text(path):
                 height = int(elems[3])
                 params = np.array(tuple(map(float, elems[4:])))
                 cameras[camera_id] = Camera(
-                    id=camera_id,
-                    model=model,
-                    width=width,
-                    height=height,
-                    params=params,
+                    id=camera_id, model=model, width=width, height=height, params=params
                 )
     return cameras
 
@@ -157,9 +152,7 @@ def read_cameras_binary(path_to_model_file):
             height = camera_properties[3]
             num_params = CAMERA_MODEL_IDS[model_id].num_params
             params = read_next_bytes(
-                fid,
-                num_bytes=8 * num_params,
-                format_char_sequence="d" * num_params,
+                fid, num_bytes=8 * num_params, format_char_sequence="d" * num_params
             )
             cameras[camera_id] = Camera(
                 id=camera_id,
@@ -230,10 +223,7 @@ def read_images_text(path):
                 image_name = elems[9]
                 elems = fid.readline().split()
                 xys = np.column_stack(
-                    [
-                        tuple(map(float, elems[0::3])),
-                        tuple(map(float, elems[1::3])),
-                    ]
+                    [tuple(map(float, elems[0::3])), tuple(map(float, elems[1::3]))]
                 )
                 point3D_ids = np.array(tuple(map(int, elems[2::3])))
                 images[image_id] = Image(
@@ -270,19 +260,16 @@ def read_images_binary(path_to_model_file):
             while current_char != b"\x00":  # look for the ASCII 0 entry
                 image_name += current_char.decode("utf-8")
                 current_char = read_next_bytes(fid, 1, "c")[0]
-            num_points2D = read_next_bytes(
-                fid, num_bytes=8, format_char_sequence="Q"
-            )[0]
+            num_points2D = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[
+                0
+            ]
             x_y_id_s = read_next_bytes(
                 fid,
                 num_bytes=24 * num_points2D,
                 format_char_sequence="ddq" * num_points2D,
             )
             xys = np.column_stack(
-                [
-                    tuple(map(float, x_y_id_s[0::3])),
-                    tuple(map(float, x_y_id_s[1::3])),
-                ]
+                [tuple(map(float, x_y_id_s[0::3])), tuple(map(float, x_y_id_s[1::3]))]
             )
             point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
             images[image_id] = Image(
@@ -321,13 +308,7 @@ def write_images_text(images, path):
     with open(path, "w") as fid:
         fid.write(HEADER)
         for _, img in images.items():
-            image_header = [
-                img.id,
-                *img.qvec,
-                *img.tvec,
-                img.camera_id,
-                img.name,
-            ]
+            image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
             first_line = " ".join(map(str, image_header))
             fid.write(first_line + "\n")
 
@@ -407,9 +388,9 @@ def read_points3D_binary(path_to_model_file):
             xyz = np.array(binary_point_line_properties[1:4])
             rgb = np.array(binary_point_line_properties[4:7])
             error = np.array(binary_point_line_properties[7])
-            track_length = read_next_bytes(
-                fid, num_bytes=8, format_char_sequence="Q"
-            )[0]
+            track_length = read_next_bytes(fid, num_bytes=8, format_char_sequence="Q")[
+                0
+            ]
             track_elems = read_next_bytes(
                 fid,
                 num_bytes=8 * track_length,
@@ -442,7 +423,7 @@ def write_points3D_text(points3D, path):
         ) / len(points3D)
     HEADER = (
         "# 3D point list with one line of data per point:\n"
-        + "#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n"
+        + "#   POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n"  # noqa: E501
         + "# Number of points: {}, mean track length: {}\n".format(
             len(points3D), mean_track_length
         )
@@ -498,12 +479,8 @@ def read_model(path, ext=""):
             ext = ".txt"
         else:
             try:
-                cameras, images, points3D = read_model(
-                    os.path.join(path, "model/")
-                )
-                logger.warning(
-                    "This SfM file structure was deprecated in hloc v1.1"
-                )
+                cameras, images, points3D = read_model(os.path.join(path, "model/"))
+                logger.warning("This SfM file structure was deprecated in hloc v1.1")
                 return cameras, images, points3D
             except FileNotFoundError:
                 raise FileNotFoundError(
@@ -595,9 +572,7 @@ def main():
     )
     args = parser.parse_args()
 
-    cameras, images, points3D = read_model(
-        path=args.input_model, ext=args.input_format
-    )
+    cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format)
 
     print("num_cameras:", len(cameras))
     print("num_images:", len(images))
@@ -605,11 +580,7 @@ def main():
 
     if args.output_model is not None:
         write_model(
-            cameras,
-            images,
-            points3D,
-            path=args.output_model,
-            ext=args.output_format,
+            cameras, images, points3D, path=args.output_model, ext=args.output_format
         )
 
 
diff --git a/hloc/utils/viz.py b/hloc/utils/viz.py
index 21d3807..360466b 100644
--- a/hloc/utils/viz.py
+++ b/hloc/utils/viz.py
@@ -20,7 +20,7 @@ def cm_RdGn(x):
 
 
 def plot_images(
-    imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True
+    imgs, titles=None, cmaps="gray", dpi=100, pad=0.5, adaptive=True, figsize=4.5
 ):
     """Plot a set of images horizontally.
     Args:
@@ -37,23 +37,19 @@ def plot_images(
         ratios = [i.shape[1] / i.shape[0] for i in imgs]  # W / H
     else:
         ratios = [4 / 3] * n
-    figsize = [sum(ratios) * 4.5, 4.5]
-    fig, ax = plt.subplots(
+    figsize = [sum(ratios) * figsize, figsize]
+    fig, axs = plt.subplots(
         1, n, figsize=figsize, dpi=dpi, gridspec_kw={"width_ratios": ratios}
     )
     if n == 1:
-        ax = [ax]
-    for i in range(n):
-        ax[i].imshow(imgs[i], cmap=plt.get_cmap(cmaps[i]))
-        ax[i].get_yaxis().set_ticks([])
-        ax[i].get_xaxis().set_ticks([])
-        ax[i].set_axis_off()
-        for spine in ax[i].spines.values():  # remove frame
-            spine.set_visible(False)
+        axs = [axs]
+    for i, (img, ax) in enumerate(zip(imgs, axs)):
+        ax.imshow(img, cmap=plt.get_cmap(cmaps[i]))
+        ax.set_axis_off()
         if titles:
-            ax[i].set_title(titles[i])
+            ax.set_title(titles[i])
     fig.tight_layout(pad=pad)
-
+    return fig
 
 def plot_keypoints(kpts, colors="lime", ps=4):
     """Plot keypoints for existing images.
@@ -96,21 +92,19 @@ def plot_matches(kpts0, kpts1, color=None, lw=1.5, ps=4, indices=(0, 1), a=1.0):
 
     if lw > 0:
         # transform the points into the figure coordinate system
-        transFigure = fig.transFigure.inverted()
-        fkpts0 = transFigure.transform(ax0.transData.transform(kpts0))
-        fkpts1 = transFigure.transform(ax1.transData.transform(kpts1))
-        fig.lines += [
-            matplotlib.lines.Line2D(
-                (fkpts0[i, 0], fkpts1[i, 0]),
-                (fkpts0[i, 1], fkpts1[i, 1]),
-                zorder=1,
-                transform=fig.transFigure,
-                c=color[i],
-                linewidth=lw,
-                alpha=a,
+        for i in range(len(kpts0)):
+            fig.add_artist(
+                matplotlib.patches.ConnectionPatch(
+                    xyA=(kpts0[i, 0], kpts0[i, 1]),
+                    coordsA=ax0.transData,
+                    xyB=(kpts1[i, 0], kpts1[i, 1]),
+                    coordsB=ax1.transData,
+                    zorder=1,
+                    color=color[i],
+                    linewidth=lw,
+                    alpha=a,
+                )
             )
-            for i in range(len(kpts0))
-        ]
 
     # freeze the axes to prevent the transform to change
     ax0.autoscale(enable=False)
@@ -134,13 +128,7 @@ def add_text(
 ):
     ax = plt.gcf().axes[idx]
     t = ax.text(
-        *pos,
-        text,
-        fontsize=fs,
-        ha=ha,
-        va=va,
-        color=color,
-        transform=ax.transAxes
+        *pos, text, fontsize=fs, ha=ha, va=va, color=color, transform=ax.transAxes
     )
     if lcolor is not None:
         t.set_path_effects(
diff --git a/hloc/utils/viz_3d.py b/hloc/utils/viz_3d.py
index fbd6505..e608f78 100644
--- a/hloc/utils/viz_3d.py
+++ b/hloc/utils/viz_3d.py
@@ -9,9 +9,10 @@
 """
 
 from typing import Optional
+
 import numpy as np
-import pycolmap
 import plotly.graph_objects as go
+import pycolmap
 
 
 def to_homogeneous(points):
@@ -46,9 +47,7 @@ def init_figure(height: int = 800) -> go.Figure:
             dragmode="orbit",
         ),
         margin=dict(l=0, r=0, b=0, t=0, pad=0),
-        legend=dict(
-            orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.1
-        ),
+        legend=dict(orientation="h", yanchor="top", y=0.99, xanchor="left", x=0.1),
     )
     return fig
 
@@ -70,9 +69,7 @@ def plot_points(
         mode="markers",
         name=name,
         legendgroup=name,
-        marker=dict(
-            size=ps, color=color, line_width=0.0, colorscale=colorscale
-        ),
+        marker=dict(size=ps, color=color, line_width=0.0, colorscale=colorscale),
     )
     fig.add_trace(tr)
 
@@ -85,7 +82,9 @@ def plot_camera(
     color: str = "rgb(0, 0, 255)",
     name: Optional[str] = None,
     legendgroup: Optional[str] = None,
+    fill: bool = False,
     size: float = 1.0,
+    text: Optional[str] = None,
 ):
     """Plot a camera frustum from pose and intrinsic matrix."""
     W, H = K[0, 2] * 2, K[1, 2] * 2
@@ -98,43 +97,34 @@ def plot_camera(
         scale = 1.0
     corners = to_homogeneous(corners) @ np.linalg.inv(K).T
     corners = (corners / 2 * scale) @ R.T + t
-
-    x, y, z = corners.T
-    rect = go.Scatter3d(
-        x=x,
-        y=y,
-        z=z,
-        line=dict(color=color),
-        legendgroup=legendgroup,
-        name=name,
-        marker=dict(size=0.0001),
-        showlegend=False,
-    )
-    fig.add_trace(rect)
+    legendgroup = legendgroup if legendgroup is not None else name
 
     x, y, z = np.concatenate(([t], corners)).T
     i = [0, 0, 0, 0]
     j = [1, 2, 3, 4]
     k = [2, 3, 4, 1]
 
-    pyramid = go.Mesh3d(
-        x=x,
-        y=y,
-        z=z,
-        color=color,
-        i=i,
-        j=j,
-        k=k,
-        legendgroup=legendgroup,
-        name=name,
-        showlegend=False,
-    )
-    fig.add_trace(pyramid)
+    if fill:
+        pyramid = go.Mesh3d(
+            x=x,
+            y=y,
+            z=z,
+            color=color,
+            i=i,
+            j=j,
+            k=k,
+            legendgroup=legendgroup,
+            name=name,
+            showlegend=False,
+            hovertemplate=text.replace("\n", "<br>"),
+        )
+        fig.add_trace(pyramid)
+
     triangles = np.vstack((i, j, k)).T
     vertices = np.concatenate(([t], corners))
     tri_points = np.array([vertices[i] for i in triangles.reshape(-1)])
-
     x, y, z = tri_points.T
+
     pyramid = go.Scatter3d(
         x=x,
         y=y,
@@ -144,6 +134,7 @@ def plot_camera(
         name=name,
         line=dict(color=color, width=1),
         showlegend=False,
+        hovertemplate=text.replace("\n", "<br>"),
     )
     fig.add_trace(pyramid)
 
@@ -156,19 +147,19 @@ def plot_camera_colmap(
     **kwargs
 ):
     """Plot a camera frustum from PyCOLMAP objects"""
+    world_t_camera = image.cam_from_world.inverse()
     plot_camera(
         fig,
-        image.rotmat().T,
-        image.projection_center(),
+        world_t_camera.rotation.matrix(),
+        world_t_camera.translation,
         camera.calibration_matrix(),
         name=name or str(image.image_id),
+        text=str(image),
         **kwargs
     )
 
 
-def plot_cameras(
-    fig: go.Figure, reconstruction: pycolmap.Reconstruction, **kwargs
-):
+def plot_cameras(fig: go.Figure, reconstruction: pycolmap.Reconstruction, **kwargs):
     """Plot a camera as a cone with camera frustum."""
     for image_id, image in reconstruction.images.items():
         plot_camera_colmap(
@@ -185,13 +176,14 @@ def plot_reconstruction(
     min_track_length: int = 2,
     points: bool = True,
     cameras: bool = True,
+    points_rgb: bool = True,
     cs: float = 1.0,
 ):
     # Filter outliers
     bbs = rec.compute_bounding_box(0.001, 0.999)
     # Filter points, use original reproj error here
-    xyzs = [
-        p3D.xyz
+    p3Ds = [
+        p3D
         for _, p3D in rec.points3D.items()
         if (
             (p3D.xyz >= bbs[0]).all()
@@ -200,7 +192,12 @@ def plot_reconstruction(
             and p3D.track.length() >= min_track_length
         )
     ]
+    xyzs = [p3D.xyz for p3D in p3Ds]
+    if points_rgb:
+        pcolor = [p3D.color for p3D in p3Ds]
+    else:
+        pcolor = color
     if points:
-        plot_points(fig, np.array(xyzs), color=color, ps=1, name=name)
+        plot_points(fig, np.array(xyzs), color=pcolor, ps=1, name=name)
     if cameras:
         plot_cameras(fig, rec, color=color, legendgroup=name, size=cs)
diff --git a/hloc/visualization.py b/hloc/visualization.py
new file mode 100644
index 0000000..77369ef
--- /dev/null
+++ b/hloc/visualization.py
@@ -0,0 +1,182 @@
+import pickle
+import random
+
+import numpy as np
+import pycolmap
+from matplotlib import cm
+
+from .utils.io import read_image
+from .utils.viz import (
+    add_text,
+    cm_RdGn,
+    plot_images,
+    plot_keypoints,
+    plot_matches,
+)
+
+
+def visualize_sfm_2d(
+    reconstruction,
+    image_dir,
+    color_by="visibility",
+    selected=[],
+    n=1,
+    seed=0,
+    dpi=75,
+):
+    assert image_dir.exists()
+    if not isinstance(reconstruction, pycolmap.Reconstruction):
+        reconstruction = pycolmap.Reconstruction(reconstruction)
+
+    if not selected:
+        image_ids = reconstruction.reg_image_ids()
+        selected = random.Random(seed).sample(image_ids, min(n, len(image_ids)))
+
+    for i in selected:
+        image = reconstruction.images[i]
+        keypoints = np.array([p.xy for p in image.points2D])
+        visible = np.array([p.has_point3D() for p in image.points2D])
+
+        if color_by == "visibility":
+            color = [(0, 0, 1) if v else (1, 0, 0) for v in visible]
+            text = f"visible: {np.count_nonzero(visible)}/{len(visible)}"
+        elif color_by == "track_length":
+            tl = np.array(
+                [
+                    (
+                        reconstruction.points3D[p.point3D_id].track.length()
+                        if p.has_point3D()
+                        else 1
+                    )
+                    for p in image.points2D
+                ]
+            )
+            max_, med_ = np.max(tl), np.median(tl[tl > 1])
+            tl = np.log(tl)
+            color = cm.jet(tl / tl.max()).tolist()
+            text = f"max/median track length: {max_}/{med_}"
+        elif color_by == "depth":
+            p3ids = [p.point3D_id for p in image.points2D if p.has_point3D()]
+            z = np.array(
+                [
+                    (image.cam_from_world * reconstruction.points3D[j].xyz)[-1]
+                    for j in p3ids
+                ]
+            )
+            z -= z.min()
+            color = cm.jet(z / np.percentile(z, 99.9))
+            text = f"visible: {np.count_nonzero(visible)}/{len(visible)}"
+            keypoints = keypoints[visible]
+        else:
+            raise NotImplementedError(f"Coloring not implemented: {color_by}.")
+
+        name = image.name
+        fig = plot_images([read_image(image_dir / name)], dpi=dpi)
+        plot_keypoints([keypoints], colors=[color], ps=4)
+        add_text(0, text)
+        add_text(0, name, pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom")
+    return fig
+
+
+def visualize_loc(
+    results,
+    image_dir,
+    reconstruction=None,
+    db_image_dir=None,
+    selected=[],
+    n=1,
+    seed=0,
+    prefix=None,
+    **kwargs,
+):
+    assert image_dir.exists()
+
+    with open(str(results) + "_logs.pkl", "rb") as f:
+        logs = pickle.load(f)
+
+    if not selected:
+        queries = list(logs["loc"].keys())
+        if prefix:
+            queries = [q for q in queries if q.startswith(prefix)]
+        selected = random.Random(seed).sample(queries, min(n, len(queries)))
+
+    if reconstruction is not None:
+        if not isinstance(reconstruction, pycolmap.Reconstruction):
+            reconstruction = pycolmap.Reconstruction(reconstruction)
+
+    for qname in selected:
+        loc = logs["loc"][qname]
+        visualize_loc_from_log(
+            image_dir, qname, loc, reconstruction, db_image_dir, **kwargs
+        )
+
+
+def visualize_loc_from_log(
+    image_dir,
+    query_name,
+    loc,
+    reconstruction=None,
+    db_image_dir=None,
+    top_k_db=2,
+    dpi=75,
+):
+    q_image = read_image(image_dir / query_name)
+    if loc.get("covisibility_clustering", False):
+        # select the first, largest cluster if the localization failed
+        loc = loc["log_clusters"][loc["best_cluster"] or 0]
+
+    inliers = np.array(loc["PnP_ret"]["inliers"])
+    mkp_q = loc["keypoints_query"]
+    n = len(loc["db"])
+    if reconstruction is not None:
+        # for each pair of query keypoint and its matched 3D point,
+        # we need to find its corresponding keypoint in each database image
+        # that observes it. We also count the number of inliers in each.
+        kp_idxs, kp_to_3D_to_db = loc["keypoint_index_to_db"]
+        counts = np.zeros(n)
+        dbs_kp_q_db = [[] for _ in range(n)]
+        inliers_dbs = [[] for _ in range(n)]
+        for i, (inl, (p3D_id, db_idxs)) in enumerate(
+            zip(inliers, kp_to_3D_to_db)
+        ):
+            track = reconstruction.points3D[p3D_id].track
+            track = {el.image_id: el.point2D_idx for el in track.elements}
+            for db_idx in db_idxs:
+                counts[db_idx] += inl
+                kp_db = track[loc["db"][db_idx]]
+                dbs_kp_q_db[db_idx].append((i, kp_db))
+                inliers_dbs[db_idx].append(inl)
+    else:
+        # for inloc the database keypoints are already in the logs
+        assert "keypoints_db" in loc
+        assert "indices_db" in loc
+        counts = np.array(
+            [np.sum(loc["indices_db"][inliers] == i) for i in range(n)]
+        )
+
+    # display the database images with the most inlier matches
+    db_sort = np.argsort(-counts)
+    for db_idx in db_sort[:top_k_db]:
+        if reconstruction is not None:
+            db = reconstruction.images[loc["db"][db_idx]]
+            db_name = db.name
+            db_kp_q_db = np.array(dbs_kp_q_db[db_idx])
+            kp_q = mkp_q[db_kp_q_db[:, 0]]
+            kp_db = np.array([db.points2D[i].xy for i in db_kp_q_db[:, 1]])
+            inliers_db = inliers_dbs[db_idx]
+        else:
+            db_name = loc["db"][db_idx]
+            kp_q = mkp_q[loc["indices_db"] == db_idx]
+            kp_db = loc["keypoints_db"][loc["indices_db"] == db_idx]
+            inliers_db = inliers[loc["indices_db"] == db_idx]
+
+        db_image = read_image((db_image_dir or image_dir) / db_name)
+        color = cm_RdGn(inliers_db).tolist()
+        text = f"inliers: {sum(inliers_db)}/{len(inliers_db)}"
+
+        plot_images([q_image, db_image], dpi=dpi)
+        plot_matches(kp_q, kp_db, color, a=0.1)
+        add_text(0, text)
+        opts = dict(pos=(0.01, 0.01), fs=5, lcolor=None, va="bottom")
+        add_text(0, query_name, **opts)
+        add_text(1, db_name, **opts)
diff --git a/requirements.txt b/requirements.txt
index ad5e055..5bc1a74 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,7 +16,7 @@ opencv-python==4.6.0.66
 pandas==2.0.3
 plotly==5.15.0
 protobuf==4.23.2
-pycolmap==0.5.0
+pycolmap==0.6.0
 pytlsd==0.0.2
 pytorch-lightning==1.4.9
 PyYAML==6.0
@@ -34,4 +34,5 @@ onnxruntime
 poselib
 roma #dust3r
 huggingface_hub
-psutil
\ No newline at end of file
+psutil
+easydict
\ No newline at end of file
diff --git a/third_party/mast3r b/third_party/mast3r
new file mode 160000
index 0000000..6d6d485
--- /dev/null
+++ b/third_party/mast3r
@@ -0,0 +1 @@
+Subproject commit 6d6d4858f03f1736ca21928fa3eda0511fbb0a8d
diff --git a/ui/app_class.py b/ui/app_class.py
index 50917a7..f1c9805 100644
--- a/ui/app_class.py
+++ b/ui/app_class.py
@@ -3,7 +3,10 @@
 
 import gradio as gr
 import numpy as np
+from easydict import EasyDict as edict
+from omegaconf import OmegaConf
 
+from ui.sfm import SfmEngine
 from ui.utils import (
     GRADIO_VERSION,
     gen_examples,
@@ -115,7 +118,7 @@ def init_interface(self):
                                         label="Match thres.",
                                         value=0.1,
                                     )
-                                    match_setting_max_features = gr.Slider(
+                                    match_setting_max_keypoints = gr.Slider(
                                         minimum=10,
                                         maximum=10000,
                                         step=10,
@@ -199,7 +202,7 @@ def init_interface(self):
                             input_image0,
                             input_image1,
                             match_setting_threshold,
-                            match_setting_max_features,
+                            match_setting_max_keypoints,
                             detect_keypoints_threshold,
                             matcher_list,
                             ransac_method,
@@ -314,7 +317,7 @@ def init_interface(self):
                         input_image0,
                         input_image1,
                         match_setting_threshold,
-                        match_setting_max_features,
+                        match_setting_max_keypoints,
                         detect_keypoints_threshold,
                         matcher_list,
                         input_image0,
@@ -377,31 +380,15 @@ def init_interface(self):
                         ],
                         outputs=[output_wrapped, geometry_result],
                     )
-            with gr.Tab("Under construction"):
-                self.init_tab_sfm()
-
-    def init_tab_sfm(self):
-        with gr.Row():
-            with gr.Column():
-                with gr.Row():
-                    gr.Textbox("Under construction", label="A", visible=True)
-                    gr.Textbox("Under construction", label="B", visible=True)
-                    gr.Textbox("Under construction", label="C", visible=True)
-                with gr.Row():
-                    with gr.Accordion("Open for More", open=False):
-                        gr.Textbox(
-                            "Under construction", label="A1", visible=True
-                        )
-                        gr.Textbox(
-                            "Under construction", label="B1", visible=True
-                        )
-                        gr.Textbox(
-                            "Under construction", label="C1", visible=True
-                        )
-            with gr.Column():
-                gr.Textbox("Under construction", label="D", visible=True)
-                gr.Textbox("Under construction", label="E", visible=True)
-                gr.Textbox("Under construction", label="F", visible=True)
+            with gr.Tab("Structure from Motion(under-dev)"):
+                sfm_ui = AppSfmUI(  # noqa: F841
+                    {
+                        **self.cfg,
+                        "matcher_zoo": self.matcher_zoo,
+                        "outputs": "experiments/sfm",
+                    }
+                )
+                sfm_ui.call_empty()
 
     def run(self):
         self.app.queue().launch(
@@ -559,3 +546,256 @@ def get_link(link, tag="Link"):
                 # height=1000,
             )
             return tab
+
+
+class AppBaseUI:
+    def __init__(self, cfg: Dict[str, Any] = {}):
+        self.cfg = OmegaConf.create(cfg)
+        self.inputs = edict({})
+        self.outputs = edict({})
+        self.ui = edict({})
+
+    def _init_ui(self):
+        NotImplemented
+
+    def call(self, **kwargs):
+        NotImplemented
+
+    def info(self):
+        gr.Info("SFM is under construction.")
+
+
+class AppSfmUI(AppBaseUI):
+    def __init__(self, cfg: Dict[str, Any] = None):
+        super().__init__(cfg)
+        assert "matcher_zoo" in self.cfg
+        self.matcher_zoo = self.cfg["matcher_zoo"]
+        self.sfm_engine = SfmEngine(cfg)
+        self._init_ui()
+
+    def init_retrieval_dropdown(self):
+        algos = []
+        for k, v in self.cfg["retrieval_zoo"].items():
+            if v.get("enable", True):
+                algos.append(k)
+        return algos
+
+    def _update_options(self, option):
+        if option == "sparse":
+            return gr.Textbox("sparse", visible=True)
+        elif option == "dense":
+            return gr.Textbox("dense", visible=True)
+        else:
+            return gr.Textbox("not set", visible=True)
+
+    def _on_select_custom_params(self, value: bool = False):
+        return gr.Textbox(
+            label="Camera Params",
+            value="0,0,0,0",
+            interactive=value,
+            visible=value,
+        )
+
+    def _init_ui(self):
+        with gr.Row():
+            # data settting and camera settings
+            with gr.Column():
+                self.inputs.input_images = gr.File(
+                    label="SfM",
+                    interactive=True,
+                    file_count="multiple",
+                    min_width=300,
+                )
+                # camera setting
+                with gr.Accordion("Camera Settings", open=True):
+                    with gr.Column():
+                        with gr.Row():
+                            with gr.Column():
+                                self.inputs.camera_model = gr.Dropdown(
+                                    choices=[
+                                        "PINHOLE",
+                                        "SIMPLE_RADIAL",
+                                        "OPENCV",
+                                    ],
+                                    value="PINHOLE",
+                                    label="Camera Model",
+                                    interactive=True,
+                                )
+                            with gr.Column():
+                                gr.Checkbox(
+                                    label="Shared Params",
+                                    value=True,
+                                    interactive=True,
+                                )
+                                camera_custom_params_cb = gr.Checkbox(
+                                    label="Custom Params",
+                                    value=False,
+                                    interactive=True,
+                                )
+                        with gr.Row():
+                            self.inputs.camera_params = gr.Textbox(
+                                label="Camera Params",
+                                value="0,0,0,0",
+                                interactive=False,
+                                visible=False,
+                            )
+                        camera_custom_params_cb.select(
+                            fn=self._on_select_custom_params,
+                            inputs=camera_custom_params_cb,
+                            outputs=self.inputs.camera_params,
+                        )
+
+                with gr.Accordion("Matching Settings", open=True):
+                    # feature extraction and matching setting
+                    with gr.Row():
+                        # matcher setting
+                        self.inputs.matcher_key = gr.Dropdown(
+                            choices=self.matcher_zoo.keys(),
+                            value="disk+lightglue",
+                            label="Matching Model",
+                            interactive=True,
+                        )
+                    with gr.Row():
+                        with gr.Accordion("Advanced Settings", open=False):
+                            with gr.Column():
+                                with gr.Row():
+                                    # matching setting
+                                    self.inputs.max_keypoints = gr.Slider(
+                                        label="Max Keypoints",
+                                        minimum=100,
+                                        maximum=10000,
+                                        value=1000,
+                                        interactive=True,
+                                    )
+                                    self.inputs.keypoint_threshold = gr.Slider(
+                                        label="Keypoint Threshold",
+                                        minimum=0,
+                                        maximum=1,
+                                        value=0.01,
+                                    )
+                                with gr.Row():
+                                    self.inputs.match_threshold = gr.Slider(
+                                        label="Match Threshold",
+                                        minimum=0.01,
+                                        maximum=12.0,
+                                        value=0.2,
+                                    )
+                                    self.inputs.ransac_threshold = gr.Slider(
+                                        label="Ransac Threshold",
+                                        minimum=0.01,
+                                        maximum=12.0,
+                                        value=4.0,
+                                        step=0.01,
+                                        interactive=True,
+                                    )
+
+                                with gr.Row():
+                                    self.inputs.ransac_confidence = gr.Slider(
+                                        label="Ransac Confidence",
+                                        minimum=0.01,
+                                        maximum=1.0,
+                                        value=0.9999,
+                                        step=0.0001,
+                                        interactive=True,
+                                    )
+                                    self.inputs.ransac_max_iter = gr.Slider(
+                                        label="Ransac Max Iter",
+                                        minimum=1,
+                                        maximum=100,
+                                        value=100,
+                                        step=1,
+                                        interactive=True,
+                                    )
+                with gr.Accordion("Scene Graph Settings", open=True):
+                    # mapping setting
+                    self.inputs.scene_graph = gr.Dropdown(
+                        choices=["all", "swin", "oneref"],
+                        value="all",
+                        label="Scene Graph",
+                        interactive=True,
+                    )
+
+                    # global feature setting
+                    self.inputs.global_feature = gr.Dropdown(
+                        choices=self.init_retrieval_dropdown(),
+                        value="netvlad",
+                        label="Global features",
+                        interactive=True,
+                    )
+                    self.inputs.top_k = gr.Slider(
+                        label="Number of Images per Image to Match",
+                        minimum=1,
+                        maximum=100,
+                        value=10,
+                        step=1,
+                    )
+                # button_match = gr.Button("Run Matching", variant="primary")
+
+            # mapping setting
+            with gr.Column():
+                with gr.Accordion("Mapping Settings", open=True):
+                    with gr.Row():
+                        with gr.Accordion("Buddle Settings", open=True):
+                            with gr.Row():
+                                self.inputs.mapper_refine_focal_length = (
+                                    gr.Checkbox(
+                                        label="Refine Focal Length",
+                                        value=False,
+                                        interactive=True,
+                                    )
+                                )
+                                self.inputs.mapper_refine_principle_points = (
+                                    gr.Checkbox(
+                                        label="Refine Principle Points",
+                                        value=False,
+                                        interactive=True,
+                                    )
+                                )
+                                self.inputs.mapper_refine_extra_params = (
+                                    gr.Checkbox(
+                                        label="Refine Extra Params",
+                                        value=False,
+                                        interactive=True,
+                                    )
+                                )
+                    with gr.Accordion("Retriangluation Settings", open=True):
+                        gr.Textbox(
+                            label="Retriangluation Details",
+                        )
+                    self.ui.button_sfm = gr.Button("Run SFM", variant="primary")
+                self.outputs.model_3d = gr.Model3D(
+                    interactive=True,
+                )
+                self.outputs.output_image = gr.Image(
+                    label="SFM Visualize",
+                    type="numpy",
+                    image_mode="RGB",
+                    interactive=False,
+                )
+
+    def call_empty(self):
+        self.ui.button_sfm.click(fn=self.info, inputs=[], outputs=[])
+
+    def call(self):
+        self.ui.button_sfm.click(
+            fn=self.sfm_engine.call,
+            inputs=[
+                self.inputs.matcher_key,
+                self.inputs.input_images,  # images
+                self.inputs.camera_model,
+                self.inputs.camera_params,
+                self.inputs.max_keypoints,
+                self.inputs.keypoint_threshold,
+                self.inputs.match_threshold,
+                self.inputs.ransac_threshold,
+                self.inputs.ransac_confidence,
+                self.inputs.ransac_max_iter,
+                self.inputs.scene_graph,
+                self.inputs.global_feature,
+                self.inputs.top_k,
+                self.inputs.mapper_refine_focal_length,
+                self.inputs.mapper_refine_principle_points,
+                self.inputs.mapper_refine_extra_params,
+            ],
+            outputs=[self.outputs.model_3d, self.outputs.output_image],
+        )
diff --git a/ui/config.yaml b/ui/config.yaml
index dff927f..63fe9d1 100644
--- a/ui/config.yaml
+++ b/ui/config.yaml
@@ -27,6 +27,17 @@ matcher_zoo:
       paper: https://arxiv.org/abs/2405.12979
       project: https://hwjiang1510.github.io/OmniGlue
       display: true
+  Mast3R:
+    enable: false
+    matcher: mast3r
+    dense: true
+    info:
+      name: Mast3R #dispaly name
+      source: "CVPR 2024"
+      github: https://github.com/naver/mast3r
+      paper: https://arxiv.org/abs/2406.09756
+      project: https://dust3r.europe.naverlabs.com
+      display: true
   DUSt3R:
     # TODO: duster is under development  
     enable: true
@@ -395,4 +406,12 @@ matcher_zoo:
       github: https://github.com/feixue94/sfd2
       paper: https://arxiv.org/abs/2304.14845
       project: https://feixue94.github.io/
-      display: true
\ No newline at end of file
+      display: true
+
+retrieval_zoo:
+  netvlad:
+    enable: true
+  openibl:
+    enable: true
+  cosplace:
+    enable: true
diff --git a/ui/sfm.py b/ui/sfm.py
new file mode 100644
index 0000000..b13ac3d
--- /dev/null
+++ b/ui/sfm.py
@@ -0,0 +1,164 @@
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, List
+
+import pycolmap
+
+from hloc import (
+    extract_features,
+    logger,
+    match_features,
+    pairs_from_retrieval,
+    reconstruction,
+    visualization,
+)
+
+from .viz import fig2im
+
+
+class SfmEngine:
+    def __init__(self, cfg: Dict[str, Any] = None):
+        self.cfg = cfg
+        if "outputs" in cfg and Path(cfg["outputs"]):
+            outputs = Path(cfg["outputs"])
+            outputs.mkdir(parents=True, exist_ok=True)
+        else:
+            outputs = tempfile.mkdtemp()
+        self.outputs = Path(outputs)
+
+    def call(
+        self,
+        key: str,
+        images: Path,
+        camera_model: str,
+        camera_params: List[float],
+        max_keypoints: int,
+        keypoint_threshold: float,
+        match_threshold: float,
+        ransac_threshold: int,
+        ransac_confidence: float,
+        ransac_max_iter: int,
+        scene_graph: bool,
+        global_feature: str,
+        top_k: int = 10,
+        mapper_refine_focal_length: bool = False,
+        mapper_refine_principle_points: bool = False,
+        mapper_refine_extra_params: bool = False,
+    ):
+        """
+        Call a list of functions to perform feature extraction, matching, and reconstruction.
+
+        Args:
+            key (str): The key to retrieve the matcher and feature models.
+            images (Path): The directory containing the images.
+            outputs (Path): The directory to store the outputs.
+            camera_model (str): The camera model.
+            camera_params (List[float]): The camera parameters.
+            max_keypoints (int): The maximum number of features.
+            match_threshold (float): The match threshold.
+            ransac_threshold (int): The RANSAC threshold.
+            ransac_confidence (float): The RANSAC confidence.
+            ransac_max_iter (int): The maximum number of RANSAC iterations.
+            scene_graph (bool): Whether to compute the scene graph.
+            global_feature (str): Whether to compute the global feature.
+            top_k (int): The number of image-pair to use.
+            mapper_refine_focal_length (bool): Whether to refine the focal length.
+            mapper_refine_principle_points (bool): Whether to refine the principle points.
+            mapper_refine_extra_params (bool): Whether to refine the extra parameters.
+
+        Returns:
+            Path: The directory containing the SfM results.
+        """
+        if len(images) == 0:
+            logger.error(f"{images} does not exist.")
+
+        temp_images = Path(tempfile.mkdtemp())
+        # copy images
+        logger.info(f"Copying images to {temp_images}.")
+        for image in images:
+            shutil.copy(image, temp_images)
+
+        matcher_zoo = self.cfg["matcher_zoo"]
+        model = matcher_zoo[key]
+        match_conf = model["matcher"]
+        match_conf["model"]["max_keypoints"] = max_keypoints
+        match_conf["model"]["match_threshold"] = match_threshold
+
+        feature_conf = model["feature"]
+        feature_conf["model"]["max_keypoints"] = max_keypoints
+        feature_conf["model"]["keypoint_threshold"] = keypoint_threshold
+
+        # retrieval
+        retrieval_name = self.cfg.get("retrieval_name", "netvlad")
+        retrieval_conf = extract_features.confs[retrieval_name]
+
+        mapper_options = {
+            "ba_refine_extra_params": mapper_refine_extra_params,
+            "ba_refine_focal_length": mapper_refine_focal_length,
+            "ba_refine_principal_point": mapper_refine_principle_points,
+            "ba_local_max_num_iterations": 40,
+            "ba_local_max_refinements": 3,
+            "ba_global_max_num_iterations": 100,
+            # below 3 options are for individual/video data, for internet photos, they should be left
+            # default
+            "min_focal_length_ratio": 0.1,
+            "max_focal_length_ratio": 10,
+            "max_extra_param": 1e15,
+        }
+
+        sfm_dir = self.outputs / "sfm_{}".format(key)
+        sfm_pairs = self.outputs / "pairs-sfm.txt"
+        sfm_dir.mkdir(exist_ok=True, parents=True)
+
+        # extract features
+        retrieval_path = extract_features.main(
+            retrieval_conf, temp_images, self.outputs
+        )
+        pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=top_k)
+
+        feature_path = extract_features.main(
+            feature_conf, temp_images, self.outputs
+        )
+        # match features
+        match_path = match_features.main(
+            match_conf, sfm_pairs, feature_conf["output"], self.outputs
+        )
+        # reconstruction
+        already_sfm = False
+        if sfm_dir.exists():
+            try:
+                model = pycolmap.Reconstruction(str(sfm_dir))
+                already_sfm = True
+            except ValueError:
+                logger.info(f"sfm_dir not exists model: {sfm_dir}")
+        if not already_sfm:
+            model = reconstruction.main(
+                sfm_dir,
+                temp_images,
+                sfm_pairs,
+                feature_path,
+                match_path,
+                mapper_options=mapper_options,
+            )
+
+        vertices = []
+        for point3D_id, point3D in model.points3D.items():
+            vertices.append([point3D.xyz, point3D.color])
+
+        model_3d = sfm_dir / "points3D.obj"
+        with open(model_3d, "w") as f:
+            for p, c in vertices:
+                # Write vertex position
+                f.write("v {} {} {}\n".format(p[0], p[1], p[2]))
+                # Write vertex normal (color)
+                f.write(
+                    "vn {} {} {}\n".format(
+                        c[0] / 255.0, c[1] / 255.0, c[2] / 255.0
+                    )
+                )
+        viz_2d = visualization.visualize_sfm_2d(
+            model, temp_images, color_by="visibility", n=2, dpi=300
+        )
+
+        return model_3d, fig2im(viz_2d) / 255.0