Skip to content

Commit

Permalink
RetrievedAggregator Implementation (#110)
Browse files Browse the repository at this point in the history
* Added aggregator impl

* Refactor dependencies and improve document retrieval

Updated the build scripts to use `api` instead of `implementation` for key dependencies to ensure better visibility and reusability. Enhanced `AggregatorDemo.kt` to display more detailed document retrieval information and handle errors more gracefully.

* Added logging and enhanced test coverage

Added logging to the provider resolution process to facilitate debugging. Updated test cases to include additional scenarios, ensuring comprehensive coverage.
  • Loading branch information
milux authored Nov 18, 2024
1 parent 14a0e40 commit d24381b
Show file tree
Hide file tree
Showing 22 changed files with 288 additions and 164 deletions.
2 changes: 1 addition & 1 deletion csaf-cvss/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ mavenPublishing {
}

dependencies {
implementation(project(":csaf-schema"))
api(project(":csaf-schema"))
}
9 changes: 6 additions & 3 deletions csaf-retrieval/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@ mavenPublishing {

dependencies {
api(project(":csaf-schema"))
implementation(project(":csaf-validation"))
implementation(libs.kotlinx.coroutines)
implementation(libs.kotlinx.json)
api(project(":csaf-validation"))
api(libs.kotlinx.coroutines)
api(libs.kotlinx.json)
implementation(libs.bundles.ktor.client)
implementation(libs.ktor.kotlinx.json)
implementation(libs.kotlin.logging)
implementation(libs.bundles.slf4j)
testImplementation(libs.kotlinx.coroutines.test)
testImplementation(libs.ktor.client.mock)
testImplementation(libs.mockk)
testImplementation(testFixtures(project(":csaf-validation")))
}
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class CsafLoader(engine: HttpClientEngine = Java.create()) {
securityTxt
.lineSequence()
.mapNotNull { line ->
SecurityTxt.csafEntry.matchEntire(line)?.let { it.groupValues[1] }
CSAF_ENTRY_REGEX.matchEntire(line)?.let { it.groupValues[1] }
}
.toList()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,6 @@ package io.github.csaf.sbom.retrieval

import io.ktor.client.statement.HttpResponse

/** Specifies the data source of the "provider-metadata.json". */
sealed class ProviderMetaDataSource

/** provider-metadata.json was fetched from a well-known location. */
object WellKnownPath : ProviderMetaDataSource()

/** provider-metadata.json was fetched from a location specified in a security.txt. */
data object SecurityTxt : ProviderMetaDataSource() {
val csafEntry = Regex("CSAF: (https://.*)")
}

/** provider-metadata.json was fetched from a special DNS path. */
object DNSPath : ProviderMetaDataSource()

/**
* This [RetrievalContext] holds all the necessary information that is needed to validate a
* validatable object. According to the requirements in the specification we probably need access to
Expand All @@ -43,14 +29,11 @@ object DNSPath : ProviderMetaDataSource()
* - The HTTP headers used in the HTTP communication to check for redirects; or the complete HTTP
* request; see [RetrievalContext.httpResponse])
*/
open class RetrievalContext() {
open class RetrievalContext {

/** The document to validate. */
var json: Any? = null

/** If this validates a provider, this will be the data source of the provider-metadata.json. */
var dataSource: ProviderMetaDataSource? = null

/** The HTTP response used to retrieve the [json]. */
var httpResponse: HttpResponse? = null
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package io.github.csaf.sbom.retrieval

import io.github.csaf.sbom.retrieval.CsafLoader.Companion.lazyLoader
import io.github.csaf.sbom.retrieval.roles.CSAFAggregatorRole
import io.github.csaf.sbom.retrieval.roles.CSAFListerRole
import io.github.csaf.sbom.schema.generated.Aggregator
Expand All @@ -40,6 +41,49 @@ class RetrievedAggregator(val json: Aggregator) : Validatable {
Aggregator.Category.aggregator -> CSAFAggregatorRole
}

/**
* Fetches a list of CSAF providers using the specified loader.
*
* @param loader An optional [CsafLoader] instance to use for fetching data. Defaults to
* [lazyLoader].
* @return A list of [Result] objects containing [RetrievedProvider] instances.
*/
suspend fun fetchProviders(loader: CsafLoader = lazyLoader): List<Result<RetrievedProvider>> {
return json.csaf_providers.map { providerMeta ->
val ctx = RetrievalContext()
loader.fetchProvider(providerMeta.metadata.url.toString(), ctx).mapCatching { p ->
RetrievedProvider(p).also { it.validate(ctx) }
}
}
}

/**
* Fetches a list of CSAF publishers using the specified loader.
*
* @param loader An optional [CsafLoader] instance to use for fetching data. Defaults to
* [lazyLoader].
* @return A list of [Result] objects containing [RetrievedProvider] instances.
*/
suspend fun fetchPublishers(loader: CsafLoader = lazyLoader): List<Result<RetrievedProvider>> {
return (json.csaf_publishers ?: emptyList()).map { publisherMeta ->
val ctx = RetrievalContext()
loader.fetchProvider(publisherMeta.metadata.url.toString(), ctx).mapCatching { p ->
RetrievedProvider(p).also { it.validate(ctx) }
}
}
}

/**
* Fetches all providers and publishers, optionally using the specified loader.
*
* @param loader An optional [CsafLoader] instance to use for fetching data. Defaults to
* [lazyLoader].
* @return A list of [Result] objects containing [RetrievedProvider] instances.
*/
suspend fun fetchAll(loader: CsafLoader = lazyLoader): List<Result<RetrievedProvider>> {
return fetchProviders(loader) + fetchPublishers(loader)
}

companion object {
/**
* Retrieves an [Aggregator] from a given [url].
Expand All @@ -51,15 +95,12 @@ class RetrievedAggregator(val json: Aggregator) : Validatable {
*/
suspend fun from(
url: String,
loader: CsafLoader = CsafLoader.lazyLoader
loader: CsafLoader = lazyLoader
): Result<RetrievedAggregator> {
val ctx = RetrievalContext()
val mapAndValidateAggregator = { a: Aggregator ->
RetrievedAggregator(a).also { it.validate(ctx) }
}
return loader
.fetchAggregator(url, ctx)
.mapCatching(mapAndValidateAggregator)
.mapCatching { a -> RetrievedAggregator(a).also { it.validate(ctx) } }
.recoverCatching { e ->
throw Exception("Failed to load CSAF Aggregator from $url", e)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import io.github.csaf.sbom.retrieval.roles.CSAFTrustedProviderRole
import io.github.csaf.sbom.schema.generated.Provider
import io.github.csaf.sbom.schema.generated.Provider.Feed
import io.github.csaf.sbom.schema.generated.ROLIEFeed
import io.github.oshai.kotlinlogging.KotlinLogging
import java.util.*
import java.util.concurrent.CompletableFuture
import java.util.stream.Stream
Expand Down Expand Up @@ -300,6 +301,7 @@ class RetrievedProvider(val json: Provider) : Validatable {
companion object {
const val DEFAULT_CHANNEL_CAPACITY = 256
private val ioScope = CoroutineScope(Dispatchers.IO + SupervisorJob())
private val log = KotlinLogging.logger {}

@JvmStatic
@JvmOverloads
Expand Down Expand Up @@ -333,36 +335,48 @@ class RetrievedProvider(val json: Provider) : Validatable {
): Result<RetrievedProvider> {
val ctx = RetrievalContext()
val mapAndValidateProvider = { p: Provider ->
// TODO: Add some more logging when any implemented tests can fail
RetrievedProvider(p).also { it.validate(ctx) }
}
// TODO: Only the last error will be available in result. We should do some logging.
// First, we need to check if a .well-known URL exists.
val wellKnownPath = "https://$domain/.well-known/csaf/provider-metadata.json"
return loader
.fetchProvider(wellKnownPath, ctx)
.onSuccess { ctx.dataSource = WellKnownPath }
.mapCatching(mapAndValidateProvider)
.recoverCatching {
log.info(it) {
"Failed to fetch and validate provider via .well-known, trying security.txt..."
}
// If failure, we fetch CSAF fields from security.txt and try observed URLs
// one-by-one.
loader.fetchSecurityTxtCsafUrls(domain).getOrThrow().firstNotNullOf { entry ->
loader
.fetchProvider(entry, ctx)
.onSuccess { ctx.dataSource = SecurityTxt }
.mapCatching(mapAndValidateProvider)
.getOrNull()
}
}
.recoverCatching {
log.info(it) {
"Failed to fetch and validate provider via security.txt, trying DNS..."
}
// If still failure, we try to fetch the provider directly via HTTPS request to
// "csaf.data.security.domain.tld", see
// https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#7110-requirement-10-dns-path.
loader
.fetchProvider("https://csaf.data.security.$domain", ctx)
.onSuccess { ctx.dataSource = DNSPath }
.mapCatching(mapAndValidateProvider)
.getOrThrow()
}
.recoverCatching {
log.info(it) {
"Failed to fetch and validate provider via DNS, resolution finally failed."
}
throw Exception(
"Failed to resolve provider for $domain via .well-known, security.txt or DNS.",
it
)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import kotlinx.coroutines.async
import kotlinx.coroutines.awaitAll
import kotlinx.coroutines.coroutineScope

val CSAF_ENTRY_REGEX = Regex("CSAF: (https://.*)")

/**
* An async replacement for `Iterable.map()`, which processes all elements in parallel using
* coroutines. The function preserves the order of the `Iterable` it is applied on.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2024, The Authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package io.github.csaf.sbom.retrieval.demo

import io.github.csaf.sbom.retrieval.RetrievedAggregator
import kotlinx.coroutines.channels.toList
import kotlinx.coroutines.runBlocking

fun main(args: Array<String>) {
runBlocking {
// Create a new "RetrievedAggregator" from wid.cert-bund.de. This will automatically
// discover a
// suitable provider-metadata.json
RetrievedAggregator.from(
"https://wid.cert-bund.de/.well-known/csaf-aggregator/aggregator.json"
)
.onSuccess { aggregator ->
println("Loaded aggregator.json @ ${aggregator.json.canonical_url}")
val providers = aggregator.fetchProviders()
val publishers = aggregator.fetchPublishers()
println(
"Found ${providers.filter { it.isSuccess }.size} providers and " +
"${publishers.filter { it.isSuccess }.size} publishers."
)
// Retrieve all documents from all feeds. Note: we currently only support index.txt
for (result in providers + publishers) {
result.onSuccess {
println("Provider @ ${it.json.canonical_url}")
println("Estimated number of documents: ${it.countExpectedDocuments()}")
for (error in it.fetchAllDocumentUrls().toList().filter { it.isFailure }) {
error.onFailure {
println("Could not fetch index/feed: ${it.message}, ${it.cause}")
}
}
println("---")
}
result.onFailure {
println("Could not fetch document: ${it.message}, ${it.cause}")
println("---")
}
}
}
.onFailure {
println("Could not fetch provider meta from ${args[0]}")
it.printStackTrace()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
* limitations under the License.
*
*/
package io.github.csaf.sbom.retrieval
package io.github.csaf.sbom.retrieval.demo

import io.github.csaf.sbom.retrieval.RetrievedProvider
import kotlinx.coroutines.runBlocking

fun main(args: Array<String>) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,7 @@
*/
package io.github.csaf.sbom.retrieval.requirements

import io.github.csaf.sbom.retrieval.DNSPath
import io.github.csaf.sbom.retrieval.RetrievalContext
import io.github.csaf.sbom.retrieval.SecurityTxt
import io.github.csaf.sbom.retrieval.WellKnownPath
import io.github.csaf.sbom.schema.generated.Csaf
import io.github.csaf.sbom.schema.generated.Csaf.Label
import io.github.csaf.sbom.validation.*
Expand Down Expand Up @@ -79,10 +76,7 @@ object Requirement2ValidFilename : Requirement {
*/
object Requirement3UsageOfTls : Requirement {
override fun check(ctx: RetrievalContext): ValidationResult {
var response = ctx.httpResponse
if (response == null) {
return ValidationNotApplicable
}
val response = ctx.httpResponse ?: return ValidationNotApplicable

return if (response.request.url.protocol == URLProtocol.HTTPS) {
ValidationSuccessful
Expand Down Expand Up @@ -145,54 +139,6 @@ object Requirement7 : Requirement {
}
}

/**
* Represents
* [Requirement 8: security.txt](https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#718-requirement-8-securitytxt).
*
* The check itself is already performed in the retrieval API, we can just check for the existence
* of the data source here.
*/
object Requirement8SecurityTxt : Requirement {
override fun check(ctx: RetrievalContext) =
if (ctx.dataSource == SecurityTxt) {
ValidationSuccessful
} else {
ValidationFailed(listOf("Not resolved via security.txt"))
}
}

/**
* Represents
* [Requirement 9: Well-known URL](https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#719-requirement-9-well-known-url-for-provider-metadatajson).
*
* The check itself is already performed in the retrieval API, we can just check for the existence
* of the data source here.
*/
object Requirement9WellKnownURL : Requirement {
override fun check(ctx: RetrievalContext) =
if (ctx.dataSource == WellKnownPath) {
ValidationSuccessful
} else {
ValidationFailed(listOf("Not resolved via .well-known"))
}
}

/**
* Represents
* [Requirement 10: DNS path](https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#7110-requirement-10-dns-path).
*
* The check itself is already performed in the retrieval API, we can just check for the existence
* of the data source here.
*/
object Requirement10DNSPath : Requirement {
override fun check(ctx: RetrievalContext) =
if (ctx.dataSource == DNSPath) {
ValidationSuccessful
} else {
ValidationFailed(listOf("Not resolved via CSAF domain (csaf.data.security.domain.tld)"))
}
}

// TODO(oxisto): This is actually a document requirement, but it is part of an OR clause in the role
// requirement :(
object Requirement11YearInFolder : Requirement {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,18 @@ object CSAFPublisherRole : Role {
/**
* The "CSAF provider" role. See
* https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#722-role-csaf-provider.
*
* Requirements 8, 9 and 10 need to be implicitly fulfilled by the domain-based fetching algorithm.
* They are therefore not explicitly checked. For reference, see these links:
* [Requirement 8: security.txt](https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#718-requirement-8-securitytxt)
* [Requirement 9: Well-known
* URL](https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#719-requirement-9-well-known-url-for-provider-metadatajson)
* [Requirement 10: DNS path](https://docs.oasis-open.org/csaf/csaf/v2.0/os/csaf-v2.0-os.html#7110-requirement-10-dns-path)
*/
object CSAFProviderRole : Role {
override val roleRequirements =
CSAFPublisherRole.roleRequirements +
allOf(Requirement6, Requirement7) +
oneOf(Requirement8SecurityTxt, Requirement9WellKnownURL, Requirement10DNSPath) +
(allOf(Requirement11YearInFolder, Requirement12, Requirement13, Requirement14) or
allOf(Requirement15, Requirement16, Requirement17))

Expand Down
Loading

0 comments on commit d24381b

Please sign in to comment.