Skip to content

Commit

Permalink
Merge pull request #563 from pdowler/vos2
Browse files Browse the repository at this point in the history
integrate ArtifactSyncWorker
  • Loading branch information
pdowler authored Apr 3, 2024
2 parents 1853e77 + 4780012 commit bf7c302
Show file tree
Hide file tree
Showing 43 changed files with 781 additions and 359 deletions.
16 changes: 4 additions & 12 deletions .github/workflows/gradle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,11 @@ jobs:

- name: java build -- raven
run: cd raven && ../gradlew --info clean build javadoc checkstyleMain

- name: java build -- ringhold
run: cd ringhold && ../gradlew --info clean build javadoc checkstyleMain

- name: java build -- ratik
run: cd ratik && ../gradlew --info clean build javadoc checkstyleMain

## TODO: docker build depends on cadc-tomcat base image from docker-base.git
# - name: docker build -- baldur
# run: cd baldur && docker build . --file Dockerfile --tag baldur:$(date +%s)
# - name: docker build -- minoc
# run: cd minoc && docker build . --file Dockerfile --tag minoc:$(date +%s)
# - name: docker build -- luskan
# run: cd luskan && docker build . --file Dockerfile --tag luskan:$(date +%s)
# - name: docker build -- raven
# run: cd raven && docker build . --file Dockerfile --tag raven:$(date +%s)
## disabled until updated for cadc-inventory-db API changes
# - name: java build -- ringhold
# run: cd ringhold && ../gradlew --info clean build javadoc checkstyleMain

70 changes: 70 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Change Log

This is a cursory summary of changes to various storage-inventory components.
Check the README in specific modules for details.

## minoc:1.0.0
```
added optional `org.opencadc.minoc.trust.preauth` config
removed optional `org.opencadc.minoc.publicKeyFile` config
```
A `minoc` instance will download a public key from each trusted service and
use the public key(s) to validate URLs that include a _preauth_ token.

```
added optional `org.opencadc.minoc.readable` and `org.opencadc.minoc.writable` config
```
A `minoc` service will advertise (via inventory.StorageSite record in the database) the
_readable_ and _writable_ status; this information is synced to global inventory and
used by `raven` to determine if it should generate PUT or GET URLs that use the `minoc`
service(s) at that site. The configuration of _readGrantProvider_(s) and
_writeGrantProvider_(s) implicitly determines the status (_readable_ and _writable_
respectively); configuration of any _trust.preauth_ will also implicitly make make the
status _readable_ and _writable_.

The explicit _readable_ and _writable_ configuration options will override the above
implicit logic and set the status accordingly. This is currently optional but may be required
in a future version.

```
added optional config file: cadc-log.properties
added optional config file: cadc-vosi.properties
```

## raven:1.0.0
```
added org.opencadc.raven.inventory connection pool
```
A `raven` service uses this pool to perform database initialization. This pool is
configured in the `catalina.properties` file.

```
added optional `org.opencadc.raven.keys.preauth` config
removed optional `org.opencadc.raven.publicKeyFile` and `org.opencadc.minoc.privateKeyFile` config
```
When configured to do so, a `raven` service will generate it's own public/private key pair
and use the private key to _sign_ `minoc` URLs. All the `minoc` services known to the global
`raven` service must also be configured to _trust_ `raven`.

```
added optional config file: cadc-log.properties
added optional config file: cadc-vosi.properties
```

## luskan:1.0.0
```
changed config file: cadc-tap-tmp.properties
```
A `luskan` service now uses the DelegatingStorageManager` so this config file must
specify which storage manager implementation to use along with existing
implementation-specific configuration options.

```
added optional config file: cadc-log.properties
added optional config file: cadc-vosi.properties
```

## vault:0.5.0 (NEW)
This is a new service that implements the IVOA VOSpace 2.1 REST API to provide user-managed
hierarchical storage. `vault` is deployed with it's own database and an associated inventory
database, uses inventory services (`minoc`) for file storage and management.
6 changes: 3 additions & 3 deletions cadc-inventory-db/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ sourceCompatibility = 1.8

group = 'org.opencadc'

version = '0.15.0'
version = '1.0.0'

description = 'OpenCADC Storage Inventory database library'
def git_url = 'https://github.com/opencadc/storage-inventory'
Expand All @@ -27,8 +27,8 @@ mainClassName = 'org.opencadc.inventory.db.version.Main'
dependencies {
compile 'org.opencadc:cadc-util:[1.11.0,2.0)'
compile 'org.opencadc:cadc-gms:[1.0.0,)'
compile 'org.opencadc:cadc-inventory:[0.9.4,)'
compile 'org.opencadc:cadc-vos:[2.0.5,3.0)'
compile 'org.opencadc:cadc-inventory:[1.0.0,)'
compile 'org.opencadc:cadc-vos:[2.0.6,3.0)'

testCompile 'junit:junit:[4.0,)'

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
import org.junit.Before;
import org.junit.Test;
import org.opencadc.inventory.Artifact;
import org.opencadc.vospace.db.ArtifactSyncWorker;
import org.opencadc.vospace.db.DataNodeSizeWorker;
import org.opencadc.inventory.Namespace;
import org.opencadc.inventory.db.ArtifactDAO;
import org.opencadc.inventory.db.HarvestState;
Expand All @@ -101,8 +101,8 @@
*
* @author adriand
*/
public class ArtifactSyncWorkerTest {
private static final Logger log = Logger.getLogger(ArtifactSyncWorkerTest.class);
public class DataNodeSizeWorkerTest {
private static final Logger log = Logger.getLogger(DataNodeSizeWorkerTest.class);

static {
Log4jInit.setLevel("org.opencadc.inventory", Level.INFO);
Expand All @@ -117,7 +117,7 @@ public class ArtifactSyncWorkerTest {
ArtifactDAO artifactDAO;


public ArtifactSyncWorkerTest() throws Exception {
public DataNodeSizeWorkerTest() throws Exception {
DBConfig dbrc = new DBConfig();
ConnectionConfig cc = dbrc.getConnectionConfig(TestUtil.SERVER, TestUtil.DATABASE);
DBUtil.PoolConfig pool = new DBUtil.PoolConfig(cc, 1, 6000L, "select 123");
Expand Down Expand Up @@ -219,7 +219,7 @@ public void testSyncArtifact() throws Exception {
harvestStateDAO.put(hs);
hs = harvestStateDAO.get(hsName, resourceID);

ArtifactSyncWorker asWorker = new ArtifactSyncWorker(harvestStateDAO, hs, artifactDAO, siNamespace);
DataNodeSizeWorker asWorker = new DataNodeSizeWorker(harvestStateDAO, hs, artifactDAO, siNamespace);
asWorker.run();

actual = (DataNode)nodeDAO.get(orig.getID());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
package org.opencadc.inventory.db;

import java.net.URI;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.UUID;
import org.apache.log4j.Logger;
Expand Down Expand Up @@ -194,13 +195,15 @@ public void put(HarvestState val, boolean forceTimestampUpdate) {
if (curMaintCount == maintCount) {
String sql = "VACUUM " + gen.getTable(HarvestState.class);
log.warn("maintenance: " + curMaintCount + "==" + maintCount + " " + sql);
//JdbcTemplate jdbc = new JdbcTemplate(dataSource);
//jdbc.execute(sql);
try {
dataSource.getConnection().createStatement().execute(sql);
} catch (SQLException ex) {
log.error("ERROR: " + sql + " FAILED", ex);
// yes, log and proceed
try (Connection c = dataSource.getConnection()) {
c.createStatement().execute(sql);
} catch (SQLException ex) {
log.error("maintenance failed: " + sql, ex);
// yes, log and proceed
} // auto-close to return to pool
} catch (Exception ex) {
log.error("failed to close connection after maintenance: " + sql, ex);
}
curMaintCount = 0;
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public class InitDatabaseSI extends InitDatabase {
private static final Logger log = Logger.getLogger(InitDatabaseSI.class);

public static final String MODEL_NAME = "storage-inventory";
public static final String MODEL_VERSION = "0.15";
public static final String MODEL_VERSION = "1.0.0";
public static final String PREV_MODEL_VERSION = "0.14";
//public static final String PREV_MODEL_VERSION = "DO-NOT_UPGRADE-BY-ACCIDENT";

Expand All @@ -98,7 +98,7 @@ public class InitDatabaseSI extends InitDatabase {
};

static String[] UPGRADE_SQL = new String[] {
"inventory.upgrade-0.15.sql",
"inventory.upgrade-1.0.0.sql",
"generic.PreauthKeyPair.sql",
"generic.permissions.sql"
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,51 +67,92 @@

package org.opencadc.vospace.db;

import ca.nrc.cadc.date.DateUtil;
import ca.nrc.cadc.db.TransactionManager;
import ca.nrc.cadc.io.ResourceIterator;
import java.io.IOException;
import java.text.DateFormat;
import java.util.Date;
import org.apache.log4j.Logger;
import org.opencadc.inventory.Artifact;
import org.opencadc.inventory.Namespace;
import org.opencadc.inventory.db.ArtifactDAO;
import org.opencadc.inventory.db.HarvestState;
import org.opencadc.inventory.db.HarvestStateDAO;
import org.opencadc.vospace.DataNode;
import org.opencadc.vospace.db.NodeDAO;

/**
* This class performs the work of synchronizing the size of Data Nodes between backend storage and Node Persistence
* This class performs the work of synchronizing the size of Data Nodes from
* inventory (Artifact) to vopsace (Node).
*
* @author adriand
*/
public class ArtifactSyncWorker implements Runnable {
private static final Logger log = Logger.getLogger(ArtifactSyncWorker.class);
public class DataNodeSizeWorker implements Runnable {
private static final Logger log = Logger.getLogger(DataNodeSizeWorker.class);

// lookback when doing incremental harvest because head of sequence is
// not monotonic over short timescales (events arrive out of sequence)
private static final long LOOKBACK_TIME_MS = 60 * 1000L;

private final HarvestState harvestState;
private final NodeDAO nodeDAO;
private final ArtifactDAO artifactDAO;
private final HarvestStateDAO harvestStateDAO;
private final Namespace storageNamespace;

private long numArtifactsProcessed;

public ArtifactSyncWorker(HarvestStateDAO harvestStateDAO, HarvestState harvestState, ArtifactDAO artifactDAO,
Namespace namespace) {
/**
* Worker constructor.
*
* @param harvestStateDAO DAO class to persist progress in the vospace database
* @param harvestState current HarvestState instance
* @param artifactDAO DAO class to query for artifacts
* @param namespace artifact namespace
*/
public DataNodeSizeWorker(HarvestStateDAO harvestStateDAO, HarvestState harvestState,
ArtifactDAO artifactDAO, Namespace namespace) {
this.harvestState = harvestState;
this.harvestStateDAO = harvestStateDAO;
this.nodeDAO = new NodeDAO(harvestStateDAO);
this.artifactDAO = artifactDAO;
this.storageNamespace = namespace;
}

public long getNumArtifactsProcessed() {
return numArtifactsProcessed;
}

@Override
public void run() {
log.debug("Start harvesting " + harvestState.toString() + " at " + harvestState.curLastModified);
this.numArtifactsProcessed = 0L;
String opName = DataNodeSizeWorker.class.getSimpleName() + ".artifactQuery";
DateFormat df = DateUtil.getDateFormat(DateUtil.IVOA_DATE_FORMAT, DateUtil.UTC);
if (harvestState.curLastModified != null) {
log.debug(opName + " source=" + harvestState.getResourceID()
+ " instance=" + harvestState.instanceID
+ " start=" + df.format(harvestState.curLastModified));
} else {
log.debug(opName + " source=" + harvestState.getResourceID()
+ " instance=" + harvestState.instanceID
+ " start=null");
}

final Date now = new Date();
final Date lookBack = new Date(now.getTime() - LOOKBACK_TIME_MS);
Date startTime = getQueryLowerBound(lookBack, harvestState.curLastModified);
if (lookBack != null && harvestState.curLastModified != null) {
log.debug("lookBack=" + df.format(lookBack) + " curLastModified=" + df.format(harvestState.curLastModified)
+ " -> " + df.format(startTime));
}

TransactionManager tm = nodeDAO.getTransactionManager();
try (final ResourceIterator<Artifact> iter = artifactDAO.iterator(storageNamespace, null,
harvestState.curLastModified, true)) {
String uriBucket = null; // process all artifacts in a single thread
try (final ResourceIterator<Artifact> iter = artifactDAO.iterator(storageNamespace, uriBucket, startTime, true)) {
TransactionManager tm = nodeDAO.getTransactionManager();
while (iter.hasNext()) {
Artifact artifact = iter.next();
DataNode node = nodeDAO.getDataNode(artifact.getURI());
log.debug(artifact.getURI() + " len=" + artifact.getContentLength() + " -> " + node.getName());
if (node != null && !artifact.getContentLength().equals(node.bytesUsed)) {
tm.startTransaction();
try {
Expand All @@ -122,7 +163,8 @@ public void run() {
node.bytesUsed = artifact.getContentLength();
nodeDAO.put(node);
tm.commitTransaction();
log.debug("Updated size of data node " + node.getName());
log.debug("ArtifactSyncWorker.updateDataNode id=" + node.getID()
+ " bytesUsed=" + node.bytesUsed + " artifact.lastModified=" + df.format(artifact.getLastModified()));
} catch (Exception ex) {
log.debug("Failed to update data node size for " + node.getName(), ex);
tm.rollbackTransaction();
Expand All @@ -137,13 +179,38 @@ public void run() {
}
}
harvestState.curLastModified = artifact.getLastModified();
harvestState.curID = node.getID();
harvestState.curID = artifact.getID();
harvestStateDAO.put(harvestState);
numArtifactsProcessed++;
}
} catch (IOException ex) {
log.error("Error closing iterator", ex);
throw new RuntimeException("error while closing ResourceIterator", ex);
}
log.debug("End harvesting " + harvestState.toString() + " at " + harvestState.curLastModified);
if (harvestState.curLastModified != null) {
log.debug(opName + " source=" + harvestState.getResourceID()
+ " instance=" + harvestState.instanceID
+ " end=" + df.format(harvestState.curLastModified));
} else {
log.debug(opName + " source=" + harvestState.getResourceID()
+ " instance=" + harvestState.instanceID
+ " end=null");
}
}

private Date getQueryLowerBound(Date lookBack, Date lastModified) {
if (lookBack == null) {
// feature not enabled
return lastModified;
}
if (lastModified == null) {
// first harvest
return null;
}
if (lookBack.before(lastModified)) {
return lookBack;
}
return lastModified;

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@
public class InitDatabaseVOS extends ca.nrc.cadc.db.version.InitDatabase {
private static final Logger log = Logger.getLogger(InitDatabaseVOS.class);

public static final String MODEL_NAME = "vospace-inventory";
public static final String MODEL_VERSION = "0.15";
public static final String PREV_MODEL_VERSION = "0.3";
public static final String MODEL_NAME = "storage-vospace";
public static final String MODEL_VERSION = "1.0.0";
public static final String PREV_MODEL_VERSION = "n/a";

static String[] CREATE_SQL = new String[] {
"generic.ModelVersion.sql",
Expand All @@ -93,7 +93,6 @@ public class InitDatabaseVOS extends ca.nrc.cadc.db.version.InitDatabase {
};

static String[] UPGRADE_SQL = new String[] {
"vospace.upgrade-0.15.sql",
"generic.permissions.sql"
};

Expand Down
7 changes: 0 additions & 7 deletions cadc-inventory-db/src/main/resources/vospace.upgrade-0.15.sql

This file was deleted.

Loading

0 comments on commit bf7c302

Please sign in to comment.