Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset link simplification #15

Merged
merged 5 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@
import javax.persistence.Column;
import javax.persistence.MappedSuperclass;
import javax.persistence.Transient;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;

//Represents a link in a document
@MappedSuperclass
Expand Down Expand Up @@ -83,25 +86,48 @@ public DocumentLink() {

//--

public static List<String> validProtocols = Arrays.asList(new String[]{
public static List<String> validViewProtocols = Arrays.asList(new String[]{
"wms",
"http://www.opengis.net/def/serviceType/ogc/wms".toLowerCase(),
"http://www.opengis.net/def/serviceType/ogc/wmts".toLowerCase(),
"http://www.opengis.net/def/serviceType/ogc/wfs".toLowerCase(),
"https://tools.ietf.org/html/rfc4287".toLowerCase(),
"ATOM Syndication Format".toLowerCase(),
"OGC Web Feature Service".toLowerCase(),
"OGC Web Map Service".toLowerCase(),
"OGC Web Map Tile Service".toLowerCase(),
"wms",
"Web Map Service (WMS)".toLowerCase(),
"OGC:WMS".toLowerCase(),
"http://www.opengeospatial.org/standards/wms",
"wmts",
"http://www.opengis.net/def/serviceType/ogc/wmts".toLowerCase(),
"OGC Web Map Tile Service".toLowerCase(),
"OGC:WMTS".toLowerCase(),
"http://www.opengeospatial.org/standards/wmts"
});

public static List<String> validDownloadProtocols = Arrays.asList(new String[]{
"wfs",
"atom",
"http://www.opengeospatial.org/standards/wms",
"http://www.opengeospatial.org/standards/wmts",
"http://www.opengis.net/def/serviceType/ogc/wfs".toLowerCase(),
"OGC Web Feature Service".toLowerCase(),
"Web Feature Service (WFS)".toLowerCase(),
"OGC:WFS".toLowerCase(),
"http://www.opengeospatial.org/standards/wfs",
"INSPIRE Atom".toLowerCase()
"atom",
"https://tools.ietf.org/html/rfc4287".toLowerCase(),
"ATOM Syndication Format".toLowerCase(),
"INSPIRE Atom".toLowerCase(),
"wcs",
"OGC:WCS".toLowerCase(),
"http://www.opengis.net/def/serviceType/ogc/wcs".toLowerCase(),
"api features",
"OGC - API Features".toLowerCase(),
"OGC:OGC-API-Features-items".toLowerCase(),
"HTTP:OGC:API-Features".toLowerCase(),
"http://www.opengis.net/def/interface/ogcapi-features".toLowerCase(),
"SensorThings".toLowerCase(),
"sos",
"OGC:SOS".toLowerCase(),
"http://www.opengis.net/def/serviceType/ogc/sos".toLowerCase()
});

public static List<String> validProtocols = Stream.concat(validViewProtocols.stream(),
validDownloadProtocols.stream()).collect(Collectors.toList());

public static List<String> validAtomProtocols = Arrays.asList(new String[]{
"https://tools.ietf.org/html/rfc4287".toLowerCase(),
"ATOM Syndication Format".toLowerCase(),
Expand All @@ -116,20 +142,48 @@ public DocumentLink() {
"http://inspire.ec.europa.eu/metadata-codelist/SpatialDataServiceType/view".toLowerCase()
});

public static final String VALID_PROTOCOLS_VIEW_REGEX = "(.*wms.*|.*wmts.*|.*web map service.*)";

public static final String VALID_PROTOCOLS_DOWNLOAD_REGEX = "(.*wfs.*|.*atom.*|.*wcs.*|.*sos.*|.*api.*feature.*|.*sensorthings.*|.*web feature service.*)";

public static final String VALID_PROTOCOLS_REGEX = "(.*wfs.*|.*atom.*|.*wcs.*|.*sos.*|.*api.*feature.*|.*sensorthings.*|.*wms.*|.*wmts.*|.*web map service.*|.*web feature service.*)";

public boolean isInspireSimplifiedLink() {
if ((rawURL == null) || (protocol == null) || (applicationProfile == null))
// Relax the check to process links with the applicationProfile information
if ((rawURL == null) || (protocol == null))
return false;
if (rawURL.isEmpty() || protocol.isEmpty() || applicationProfile.isEmpty())
if (rawURL.isEmpty() || protocol.isEmpty())
return false;

if (!validProtocols.contains(protocol.toLowerCase())) {
// Check protocol match "simple" values instead of exact match
if (!protocol.toLowerCase().matches(VALID_PROTOCOLS_REGEX)) {
return false;
}
}


return true;
}


/*public boolean isInspireSimplifiedLink() {
if ((rawURL == null) || (protocol == null) || (applicationProfile == null))
if ((rawURL == null) || (protocol == null))
return false;
if (rawURL.isEmpty() || protocol.isEmpty() || applicationProfile.isEmpty())
if (rawURL.isEmpty() || protocol.isEmpty())
return false;

if (!validProtocols.contains(protocol.toLowerCase()))
return false;

if (!validAppProfiles.contains(applicationProfile.toLowerCase()))
return false;

return true;
}
}*/



//--
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,68 @@ public DatasetDocumentLink create(DatasetMetadataRecord datasetMetadataRecord, O
result.setFunction(onlineResource.getFunction());
result.setOperationName(onlineResource.getOperationName());
result.setRawURL(onlineResource.getRawURL());
result.setProtocol(onlineResource.getProtocol());

String protocolFromUrl = inferProtocolFromUrl(onlineResource.getRawURL());

if ((onlineResource.getProtocol() == null) && (protocolFromUrl != null)) {
// If no protocol defined, try to infer the protocol from the URL
result.setProtocol(protocolFromUrl);
} else {
result.setProtocol(onlineResource.getProtocol());

// if the XML document's protocol isn't compatible with the actual URL
// then use the inferred URL protocol.
// Example;
// xml protocol is WMS (view)
// but, url is "...?service=WFS" (inferred url protocol is download and not view)
// then, set the protocol to Download (ignore the XML)
if (protocolFromUrl != null) {
boolean isDownloadProtocol = ServiceDocumentLink.validDownloadProtocols.contains(onlineResource.getProtocol().toLowerCase());
boolean isDownloadUrlProtocol = ServiceDocumentLink.validDownloadProtocols.contains(protocolFromUrl.toLowerCase());
boolean isViewProtocol = ServiceDocumentLink.validViewProtocols.contains(onlineResource.getProtocol().toLowerCase());
boolean isViewUrlProtocol = ServiceDocumentLink.validViewProtocols.contains(protocolFromUrl);

if (isDownloadProtocol) {
josegar74 marked this conversation as resolved.
Show resolved Hide resolved
if (!isDownloadUrlProtocol && isViewUrlProtocol) {
result.setProtocol(protocolFromUrl);
}
} else if (isViewProtocol) {
if (!isViewUrlProtocol && isDownloadUrlProtocol) {
result.setProtocol(protocolFromUrl);
}
}
}
}

result.setApplicationProfile(onlineResource.getApplicationProfile());

result.setLinkCheckJobId(datasetMetadataRecord.getLinkCheckJobId());

return result;
}


private String inferProtocolFromUrl(String url) {
String normalizedUrl = url.toLowerCase();

if (normalizedUrl.indexOf("wms") > -1) {
return "wms";
} else if (normalizedUrl.indexOf("wmts") > -1) {
return "wmts";
} else if (normalizedUrl.indexOf("wfs") > -1) {
return "wfs";
} else if (normalizedUrl.indexOf("atom") > -1) {
return "atom";
} else if (normalizedUrl.indexOf("wcs") > -1) {
return "wcs";
} else if (normalizedUrl.indexOf("sos") > -1) {
return "sos";
} else if (normalizedUrl.indexOf("api features") > -1) {
return "api features";
} else if (normalizedUrl.indexOf("sensorthings") > -1) {
return "sensorthings";
}

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -323,13 +323,40 @@ private void process() {
.filter(x -> (x.getCapabilitiesDocumentType() == CapabilitiesType.WFS) || (x.getCapabilitiesDocumentType() == CapabilitiesType.Atom))
.collect(Collectors.toList());

if (!viewLinks.isEmpty())

if (!viewLinks.isEmpty()) {
localDatasetMetadataRecord.setINDICATOR_VIEW_LINK_TO_DATA(IndicatorStatus.PASS);
if (!downloadLinks.isEmpty())
localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS);
localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinks.size());
} else {
// Dataset link simplification
if (!localDatasetMetadataRecord.getDocumentLinks().isEmpty()) {
List<DocumentLink> viewLinksMetadataOnlineResources = localDatasetMetadataRecord.getDocumentLinks().stream()
.filter(x -> (x.getLinkState().equals(LinkState.Complete) && x.getLinkHTTPStatusCode() == 200) && (DocumentLink.validViewProtocols.contains(x.getProtocol().toLowerCase()) || x.getProtocol().toLowerCase().matches(DocumentLink.VALID_PROTOCOLS_VIEW_REGEX)))
.collect(Collectors.toList());

if (!viewLinksMetadataOnlineResources.isEmpty()) {
localDatasetMetadataRecord.setINDICATOR_VIEW_LINK_TO_DATA(IndicatorStatus.PASS);
localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinksMetadataOnlineResources.size());
}
}
}

localDatasetMetadataRecord.setNumberOfViewDataLinks(viewLinks.size());
localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinks.size());
if (!downloadLinks.isEmpty()) {
localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS);
localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinks.size());
} else {
// Dataset link simplification
if (!localDatasetMetadataRecord.getDocumentLinks().isEmpty()) {
List<DocumentLink> downloadLinksMetadataOnlineResources = localDatasetMetadataRecord.getDocumentLinks().stream()
.filter(x -> (x.getLinkState().equals(LinkState.Complete) && x.getLinkHTTPStatusCode() == 200) && (DocumentLink.validDownloadProtocols.contains(x.getProtocol().toLowerCase()) || x.getProtocol().toLowerCase().matches(DocumentLink.VALID_PROTOCOLS_DOWNLOAD_REGEX)))
.collect(Collectors.toList());

if (!downloadLinksMetadataOnlineResources.isEmpty()) {
localDatasetMetadataRecord.setINDICATOR_DOWNLOAD_LINK_TO_DATA(IndicatorStatus.PASS);
localDatasetMetadataRecord.setNumberOfDownloadDataLinks(downloadLinksMetadataOnlineResources.size());
}
}
}

// List<ServiceDocSearchResult> serviceLinks = new ArrayList<>();
// List<CapabilitiesLinkResult> capLinks = new ArrayList<>();
Expand Down
Loading