diff --git a/src/main/java/net/ripe/rpki/rsyncit/rsync/RsyncWriter.java b/src/main/java/net/ripe/rpki/rsyncit/rsync/RsyncWriter.java index 35dc73d..a9c6ba0 100644 --- a/src/main/java/net/ripe/rpki/rsyncit/rsync/RsyncWriter.java +++ b/src/main/java/net/ripe/rpki/rsyncit/rsync/RsyncWriter.java @@ -9,18 +9,17 @@ import java.io.File; import java.io.IOException; import java.io.UncheckedIOException; +import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.attribute.FileTime; +import java.nio.file.attribute.PosixFilePermission; import java.nio.file.attribute.PosixFilePermissions; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; -import java.util.Collection; -import java.util.Comparator; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.ForkJoinPool; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -36,6 +35,14 @@ public class RsyncWriter { // directory names (`tmp-2021-04-26T10:09:06.023Z-4352054854289820810`). public static final Pattern PUBLICATION_DIRECTORY_PATTERN = Pattern.compile("^(tmp|published)-\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d+(-\\d+)?$"); + // Internal directories (used to store all the RPKI objects per CA, etc) are set to this modification time so + // that rsync does not see the directories as modified every time we fully write the repository. The RPKI objects + // have their creation time as last modified time, so rsync will copy these as needed. + public static final FileTime INTERNAL_DIRECTORY_LAST_MODIFIED_TIME = FileTime.fromMillis(0); + public static final Set FILE_PERMISSIONS = PosixFilePermissions.fromString("rw-r--r--"); + public static final Set DIRECTORY_PERMISSIONS = PosixFilePermissions.fromString("rwxr-xr-x"); + + private final ForkJoinPool fileWriterPool = new ForkJoinPool(2 * Runtime.getRuntime().availableProcessors()); @Getter @@ -58,6 +65,8 @@ public Path writeObjects(List objects) { } } + record ObjectTarget(Path targetPath, byte[] content, FileTime modificationTime){} + private Path writeObjectToNewDirectory(List objects, Instant now) throws IOException { // Since we don't know anything about URLs of the objects // they are grouped by the host name of the URL @@ -66,63 +75,71 @@ private Path writeObjectToNewDirectory(List objects, Instant now) th final String formattedNow = DateTimeFormatter.ISO_LOCAL_DATE_TIME.withZone(ZoneId.of("UTC")).format(now); - final Path targetDirectory = Paths.get(config.rsyncPath()).resolve("published-" + formattedNow); final Path temporaryDirectory = Files.createTempDirectory(Paths.get(config.rsyncPath()), "tmp-" + formattedNow + "-"); try { groupedByHost.forEach((hostName, os) -> { // create a directory per hostname (in realistic cases there will be just one) - var hostBasedPath = temporaryDirectory.resolve(hostName); - try { - Files.createDirectories(hostBasedPath); - } catch (IOException e) { - log.error("Could not create {}", hostBasedPath); - } - - // Filter out objects with potentially insecure URLs - var wellBehavingObjects = filterOutBadUrls(hostBasedPath, os); - - // Create directories in "shortest first" order. - // Use canonical path to avoid potential troubles with relative ".." paths - wellBehavingObjects - .stream() - .map(o -> { - // remove the filename, i.e. /foo/bar/object.cer -> /foo/bar - var objectParentDir = Paths.get(relativePath(o.url().getPath())).getParent(); - return hostBasedPath.resolve(objectParentDir).normalize(); - }) - .sorted() - .distinct() - .forEach(dir -> { - try { - Files.createDirectories(dir); - } catch (IOException ex) { - log.error("Could not create directory {}", dir, ex); - } - }); - - fileWriterPool.submit(() -> wellBehavingObjects.stream() - .parallel() - .forEach(o -> { - var path = Paths.get(relativePath(o.url().getPath())); - try { - var normalizedPath = hostBasedPath.resolve(path).normalize(); - Files.write(normalizedPath, o.bytes()); - // rsync relies on the correct timestamp for fast synchronization - Files.setLastModifiedTime(normalizedPath, FileTime.from(o.modificationTime())); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }) + var hostDirectory = temporaryDirectory.resolve(hostName); + var hostUrl = URI.create("rsync://" + hostName); + + // Gather the relative paths of files with legal names + var writableContent = filterOutBadUrls(hostDirectory, os).stream() + .map(rpkiObject -> { + var relativeUriPath = hostUrl.relativize(rpkiObject.url()).getPath(); + var targetPath = hostDirectory.resolve(relativeUriPath).normalize(); + + assert targetPath.normalize().startsWith(hostDirectory.normalize()); + + return new ObjectTarget(targetPath, rpkiObject.bytes(), FileTime.from(rpkiObject.modificationTime())); + }).toList(); + + // Create directories + // Since createDirectories is idempotent, we do not worry about the order in which it is actually + // executed. However, we do want a stable sort for .distinct() + var targetDirectories = writableContent.stream().map(o -> o.targetPath.getParent()) + .sorted(Comparator.comparing(Path::getNameCount).thenComparing(Path::toString)) + .distinct().toList(); + + var t0 = System.currentTimeMillis(); + fileWriterPool.submit(() -> targetDirectories.parallelStream() + .forEach(dir -> { + try { + Files.createDirectories(dir); + Files.setPosixFilePermissions(dir, DIRECTORY_PERMISSIONS); + Files.setLastModifiedTime(dir, INTERNAL_DIRECTORY_LAST_MODIFIED_TIME); + } catch (IOException e) { + log.error("Could not create directory {}", dir, e); + throw new UncheckedIOException(e); + } + }) ).join(); + + var t1 = System.currentTimeMillis(); + fileWriterPool.submit(() -> writableContent.parallelStream().forEach(content -> { + try { + Files.write(content.targetPath, content.content); + Files.setPosixFilePermissions(content.targetPath, FILE_PERMISSIONS); + Files.setLastModifiedTime(content.targetPath, content.modificationTime); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + })).join(); + + log.info("Wrote {} directories ({} ms) and {} files ({} ms) for host {}", + targetDirectories.size(), t1 - t0, + writableContent.size(), System.currentTimeMillis() - t1, + hostName); }); + // Calculate target directory after writing phase, to be sure variable is not used beforehand. + final Path targetDirectory = Paths.get(config.rsyncPath()).resolve("published-" + formattedNow); + // Directory write is fully complete, rename temporary to target directory name Files.setLastModifiedTime(temporaryDirectory, FileTime.from(now)); - Files.setPosixFilePermissions(temporaryDirectory, PosixFilePermissions.fromString("rwxr-xr-x")); + Files.setPosixFilePermissions(temporaryDirectory, DIRECTORY_PERMISSIONS); Files.move(temporaryDirectory, targetDirectory, ATOMIC_MOVE); return targetDirectory; - } finally { try { FileUtils.deleteDirectory(temporaryDirectory.toFile());