Skip to content

Commit

Permalink
MavenSupport: Add cache for remote artifacts
Browse files Browse the repository at this point in the history
Store remote artifact information in a disk cache backed by
DiskLruCache [1]. This brings a big performance boost for Gradle, Maven
and SBT dependency resolution.

The cached metadata contains the URLs and checksums of Maven artifacts which
is retrieved without actually downloading the remote artifacts. This is
information which is not available from the local Maven repository,
especially without downloading the remote artifacts.

Currently the max cache size is hardcoded to 1GB and expiry to six hours,
this will be made configurable in a later commit.

[1] https://github.com/JakeWharton/DiskLruCache
  • Loading branch information
mnonnenmacher committed Dec 11, 2017
1 parent 27a7cb9 commit 6c26437
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 2 deletions.
29 changes: 27 additions & 2 deletions analyzer/src/main/kotlin/MavenSupport.kt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ import ch.frankel.slf4k.*
import com.here.ort.model.Package
import com.here.ort.model.RemoteArtifact
import com.here.ort.model.VcsInfo
import com.here.ort.util.DiskCache
import com.here.ort.util.getUserConfigDirectory
import com.here.ort.util.log
import com.here.ort.util.yamlMapper

import org.apache.maven.artifact.repository.LegacyLocalRepositoryManager
import org.apache.maven.bridge.MavenRepositorySystem
Expand Down Expand Up @@ -66,7 +69,13 @@ fun Artifact.identifier() = "$groupId:$artifactId:$version"

class MavenSupport(localRepositoryManagerConverter: (LocalRepositoryManager) -> LocalRepositoryManager) {
companion object {
private const val GIGABYTE = 1024L * 1024L
private const val HOUR = 60 * 60

val SCM_REGEX = Regex("scm:(?<provider>[^:]+):(?<url>.+)")

private val remoteArtifactCache = DiskCache(File(getUserConfigDirectory(), "analyzer/remote_artifacts"),
GIGABYTE, 6 * HOUR)
}

val container = createContainer()
Expand Down Expand Up @@ -125,6 +134,16 @@ class MavenSupport(localRepositoryManagerConverter: (LocalRepositoryManager) ->
}

fun requestRemoteArtifact(artifact: Artifact, repositories: List<RemoteRepository>): RemoteArtifact {
val cacheKey = artifact.toString()
.replace(".", "-")
.replace(":", "_")
.toLowerCase()

remoteArtifactCache.read(cacheKey)?.let {
log.debug { "Reading remote artifact for $artifact from disk cache." }
return yamlMapper.readValue(it, RemoteArtifact::class.java)
}

val repoSystem = container.lookup(RepositorySystem::class.java, "default")
val remoteRepositoryManager = container.lookup(RemoteRepositoryManager::class.java, "default")
val repositoryLayoutProvider = container.lookup(RepositoryLayoutProvider::class.java, "default")
Expand Down Expand Up @@ -201,7 +220,10 @@ class MavenSupport(localRepositoryManagerConverter: (LocalRepositoryManager) ->
}

val downloadUrl = "${repository.url}/$remoteLocation"
return RemoteArtifact(downloadUrl, actualChecksum, checksum.algorithm)
return RemoteArtifact(downloadUrl, actualChecksum, checksum.algorithm).also {
log.debug { "Writing remote artifact for $artifact to disk cache." }
remoteArtifactCache.write(cacheKey, yamlMapper.writeValueAsString(it))
}
} else {
if (Main.stacktrace) {
artifactDownload.exception.printStackTrace()
Expand All @@ -213,7 +235,10 @@ class MavenSupport(localRepositoryManagerConverter: (LocalRepositoryManager) ->

log.info { "Could not receive data about remote artifact '$artifact'." }

return RemoteArtifact.EMPTY
return RemoteArtifact.EMPTY.also {
log.debug { "Writing empty remote artifact for $artifact to disk cache." }
remoteArtifactCache.write(cacheKey, yamlMapper.writeValueAsString(it))
}
}

/**
Expand Down
1 change: 1 addition & 0 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
disklrucacheVersion = 2.0.2
gradleToolingApiVersion = 4.3
jacksonVersion = 2.9.2
jcommanderVersion = 1.72
Expand Down
1 change: 1 addition & 0 deletions util/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ dependencies {
compile "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:$jacksonVersion"
compile "com.fasterxml.jackson.module:jackson-module-kotlin:$jacksonVersion"

compile "com.jakewharton:disklrucache:$disklrucacheVersion"
compile "com.squareup.okhttp3:okhttp:$okhttpVersion"
compile "com.vdurmont:semver4j:$semverVersion"

Expand Down
126 changes: 126 additions & 0 deletions util/src/main/kotlin/DiskCache.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright (c) 2017 HERE Europe B.V.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/

package com.here.ort.util

import ch.frankel.slf4k.*

import com.jakewharton.disklrucache.DiskLruCache

import java.io.File
import java.io.IOException

/**
* Wrapper around [DiskLruCache] that adds a workaround for the 64 character key length limit.
*/
class DiskCache(
/**
* The directory to store the cache data, must be exclusive to the cache.
*/
val directory: File,

/**
* The maximum size of the disk cache.
*/
val maxSize: Long,

/**
* Duration in seconds that cache entries are valid.
*/
val timeToLive: Int
) {
companion object {
const val INDEX_FULL_KEY = 0
const val INDEX_TIMESTAMP = 1
const val INDEX_DATA = 2
const val VALUE_COUNT = 3

/**
* Maximum length for a key in [DiskLruCache].
*/
const val MAX_KEY_LENGTH = 64

/**
* Length of the suffix appended to long keys.
*/
const val KEY_SUFFIX_LENGTH = 6
}

private val diskLruCache = DiskLruCache.open(directory, 0, VALUE_COUNT, maxSize)

/**
* Shorten the string to be usable as a key for [DiskLruCache] which has a maximum length of [MAX_KEY_LENGTH].
* Shortening the keys will lead to conflicts, so append a serial number to the key and store the full key in the
* cache, so that it is possible to store multiple entries for the same shortened key, and to check which of those
* belongs to which full key.
* If the string is shorter or equal to [MAX_KEY_LENGTH] - [KEY_SUFFIX_LENGTH] chars take it as is, otherwise
* shorten it and append the serial number.
*/
private fun String.asKey(): String {
return if (length <= MAX_KEY_LENGTH - KEY_SUFFIX_LENGTH) {
// String is short enough to be unique, use it as is.
this
} else {
// String is too long to be unique, append it with a serial number.
val key = substring(0..minOf(MAX_KEY_LENGTH - KEY_SUFFIX_LENGTH, length - 1))
for (index in 0..99999) {
val tryKey = "$key-" + "$index".padStart(5, '0')
val entry = diskLruCache.get(tryKey)
if (entry == null || entry.getString(INDEX_FULL_KEY) == this) {
return tryKey
}
}
throw IOException(
"Cannot generate key for '$this' because all possible keys starting with '$key' are taken.")
}
}

fun read(key: String): String? {
try {
diskLruCache.get(key.asKey())?.use { entry ->
val time = entry.getString(INDEX_TIMESTAMP).toLong()
if (time + timeToLive < timeInSeconds()) {
diskLruCache.remove(key)
} else {
return entry.getString(INDEX_DATA)
}
}
} catch (e: IOException) {
log.error { "Could not read cache entry for key '$key': ${e.message}" }
}
return null
}

fun write(key: String, data: String): Boolean {
try {
diskLruCache.edit(key.asKey()).apply {
set(INDEX_FULL_KEY, key)
set(INDEX_TIMESTAMP, timeInSeconds().toString())
set(INDEX_DATA, data)
commit()
return true
}
} catch (e: IOException) {
log.error { "Could not write to disk cache for key '$key': ${e.message}" }
}
return false
}

private fun timeInSeconds() = System.currentTimeMillis() / 1000L
}

0 comments on commit 6c26437

Please sign in to comment.