Skip to content

Commit

Permalink
#1 factoring data profiler
Browse files Browse the repository at this point in the history
  • Loading branch information
michael-conway committed Dec 29, 2017
1 parent 3595a89 commit c721e0d
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 77 deletions.
6 changes: 5 additions & 1 deletion irodsext-data-profiler/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
<groupId>org.irods.jargon</groupId>
<version>4.2.1.0-SNAPSHOT</version>
</parent>
<groupId>org.irods.jargon</groupId>
<artifactId>irodsext-data-profiler</artifactId>
<name>irodsext-data-profiler</name>
<dependencies>
Expand All @@ -22,6 +21,11 @@
<artifactId>jargon-core</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.irods.jargon</groupId>
<artifactId>irodsext-data-typer</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<description>Represents a high-level summary of a data object or collection, gathering metadata, ACL information, and other facets of a data object or collection, suitable for presentation in an interface. This includes awareness of applicable special metadata types as well as any rules or operations associated with a collection or file</description>
<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
import org.irods.jargon.core.connection.IRODSAccount;
import org.irods.jargon.core.exception.DataNotFoundException;
import org.irods.jargon.core.exception.JargonException;
import org.irods.jargon.core.pub.CollectionAndDataObjectListAndSearchAO;
import org.irods.jargon.core.pub.IRODSAccessObjectFactory;
import org.irods.jargon.core.pub.domain.Collection;
import org.irods.jargon.core.pub.domain.DataObject;
import org.irods.jargon.core.pub.domain.ObjStat;
import org.irods.jargon.extensions.dataprofiler.DataProfile;
import org.irods.jargon.extensions.dataprofiler.DataProfileService;
import org.irods.jargon.extensions.dataprofiler.DataProfilerSettings;
Expand Down Expand Up @@ -42,10 +44,10 @@ public IrodsextDataProfilerService(DataProfilerSettings defaultDataProfilerSetti
* @see org.irods.jargon.extensions.dataprofiler.DataProfileService#
* retrieveDataProfile(java.lang.String)
*/
@SuppressWarnings("rawtypes")
@Override
public DataProfile retrieveDataProfile(String irodsAbsolutePath) throws DataNotFoundException, JargonException {
// TODO Auto-generated method stub
return null;
return retrieveDataProfile(irodsAbsolutePath, this.getDefaultDataProfilerSettings());
}

/*
Expand All @@ -55,64 +57,57 @@ public DataProfile retrieveDataProfile(String irodsAbsolutePath) throws DataNotF
* retrieveDataProfile(java.lang.String,
* org.irods.jargon.extensions.dataprofiler.DataProfilerSettings)
*/
@SuppressWarnings("rawtypes")
@Override
public DataProfile retrieveDataProfile(String irodsAbsolutePath, DataProfilerSettings dataProfilerSettings)
throws DataNotFoundException, JargonException {
// TODO Auto-generated method stub
return null;
}
log.info("retrieveDataProfile()");

/*
* (non-Javadoc)
*
* @see org.irods.jargon.extensions.dataprofiler.DataProfileService#
* retrieveDataProfileForCollection(java.lang.String)
*/
@Override
public DataProfile<Collection> retrieveDataProfileForCollection(String irodsAbsolutePath)
throws DataNotFoundException, JargonException {
// TODO Auto-generated method stub
return null;
}
if (irodsAbsolutePath == null || irodsAbsolutePath.isEmpty()) {
throw new IllegalArgumentException("null or empty irodsAbsolutePath");
}

if (dataProfilerSettings == null) {
throw new IllegalArgumentException("null dataProfilerSettings");
}

log.info("irodsAbsolutePath:{}", irodsAbsolutePath);
log.info("dataProfilerSettings:{}", dataProfilerSettings);

CollectionAndDataObjectListAndSearchAO collectionAndDataObjectListAndSearchAO = this
.getIrodsAccessObjectFactory().getCollectionAndDataObjectListAndSearchAO(getIrodsAccount());
log.info("getting objStat...");

ObjStat objStat = collectionAndDataObjectListAndSearchAO.retrieveObjectStatForPath(irodsAbsolutePath);

if (objStat.isSomeTypeOfCollection()) {
return retrieveDataProfileForCollection(irodsAbsolutePath, objStat, dataProfilerSettings);
} else {
return retrieveDataProfileForDataObject(irodsAbsolutePath, objStat, dataProfilerSettings);
}

/*
* (non-Javadoc)
*
* @see org.irods.jargon.extensions.dataprofiler.DataProfileService#
* retrieveDataProfileForCollection(java.lang.String,
* org.irods.jargon.extensions.dataprofiler.DataProfilerSettings)
*/
@Override
public DataProfile<Collection> retrieveDataProfileForCollection(String irodsAbsolutePath,
DataProfilerSettings dataProfilerSettings) throws DataNotFoundException, JargonException {
// TODO Auto-generated method stub
return null;
}

/*
* (non-Javadoc)
*
* @see org.irods.jargon.extensions.dataprofiler.DataProfileService#
* retrieveDataProfileForDataObject(java.lang.String)
*/
@Override
public DataProfile<DataObject> retrieveDataProfileForDataObject(String irodsAbsolutePath)
throws DataNotFoundException, JargonException {
// TODO Auto-generated method stub
return null;
private DataProfile<DataObject> retrieveDataProfileForDataObject(String irodsAbsolutePath, ObjStat objStat,
DataProfilerSettings dataProfilerSettings) {
log.info("retriveDataProfileForDataObject()");
log.info("objStat:{}", objStat);
DataProfile<DataObject> dataProfile = retrieveBaseDataObjectProfile(irodsAbsolutePath, dataProfilerSettings);

log.info("look for special attributes");

checkIfStarred(dataProfile, this.getIrodsAccount().getUserName());
checkIfShared(dataProfile, this.getIrodsAccount().getUserName());
extractTags(dataProfile);
establishDataType(dataProfile);

return dataProfile;
}

/*
* (non-Javadoc)
*
* @see org.irods.jargon.extensions.dataprofiler.DataProfileService#
* retrieveDataProfileForDataObject(java.lang.String,
* org.irods.jargon.extensions.dataprofiler.DataProfilerSettings)
*/
@Override
public DataProfile<DataObject> retrieveDataProfileForDataObject(String irodsAbsolutePath,
DataProfilerSettings dataProfilerSettings) throws DataNotFoundException, JargonException {
// TODO Auto-generated method stub
private DataProfile<Collection> retrieveDataProfileForCollection(String irodsAbsolutePath, ObjStat objStat,
DataProfilerSettings dataProfilerSettings) {
log.info("retriveDataProfileForCollection()");
log.info("objStat:{}", objStat);
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,18 @@
import org.irods.jargon.core.connection.SettableJargonProperties;
import org.irods.jargon.core.pub.IRODSAccessObjectFactory;
import org.irods.jargon.core.pub.IRODSFileSystem;
import org.irods.jargon.extensions.dataprofiler.DataProfile;
import org.irods.jargon.extensions.dataprofiler.DataProfileService;
import org.irods.jargon.extensions.dataprofiler.DataProfilerSettings;
import org.irods.jargon.extensions.datatyper.DataTypeResolutionService;
import org.irods.jargon.extensions.datatyper.DataTyperSettings;
import org.irods.jargon.testutils.IRODSTestSetupUtilities;
import org.irods.jargon.testutils.TestingPropertiesHelper;
import org.irods.jargon.testutils.filemanip.ScratchFileUtils;
import org.irodsext.datatyper.IrodsextDataTypeResolutionService;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
Expand Down Expand Up @@ -84,6 +90,17 @@ public void testBasicDataProfileWithCollection() throws Exception {
dataProfilerSettings.setRetrieveShared(false);
dataProfilerSettings.setRetrieveStarred(false);
dataProfilerSettings.setRetrieveTickets(false);
DataTyperSettings dataTyperSettings = new DataTyperSettings();
dataTyperSettings.setDetailedDetermination(false);
dataTyperSettings.setPersistDataTypes(false);
DataTypeResolutionService dataTyperService = new IrodsextDataTypeResolutionService(accessObjectFactory,
irodsAccount, dataTyperSettings);

DataProfileService dataProfilerService = new IrodsextDataProfilerService(dataProfilerSettings, dataTyperService,
accessObjectFactory, irodsAccount);
@SuppressWarnings("rawtypes")
DataProfile dataProfile = dataProfilerService.retrieveDataProfile(targetIrodsCollection);
Assert.assertNotNull("null data profile returned", dataProfile);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,38 @@ public IrodsextDataTypeResolutionService(IRODSAccessObjectFactory irodsAccessObj
public DataType resolveDataType(String irodsAbsolutePath) throws DataNotFoundException, JargonException {
log.info("resolveDataType()");

return resolveDataType(irodsAbsolutePath, this.getDefaultDataTyperSettings());

}

private String determineMimeTypeViaTika(String irodsAbsolutePath) throws JargonException {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
Metadata md = new Metadata();
String fileName = MiscIRODSUtils.getLastPathComponentForGivenAbsolutePath(irodsAbsolutePath);

md.add(Metadata.RESOURCE_NAME_KEY, fileName);
MediaType mediaType;
try {
mediaType = detector.detect(null, md);
} catch (IOException e) {
throw new JargonException("io exception determining file type by extension", e);
}
return mediaType.toString();
}

@Override
public DataType resolveDataType(String irodsAbsolutePath, DataTyperSettings dataTyperSettings)
throws DataNotFoundException, JargonException {
log.info("resolveDataType()");

if (irodsAbsolutePath == null || irodsAbsolutePath.isEmpty()) {
throw new IllegalArgumentException("null or empty irodsAbsolutePath");
}

log.info("irodsAbsolutePath:{}", irodsAbsolutePath);

if (this.getDefaultDataTyperSettings().isDetailedDetermination()) {
if (dataTyperSettings.isDetailedDetermination()) {
log.warn("detailedDetermination not yet implemented, will default to check of file path");
}

Expand All @@ -73,30 +98,6 @@ public DataType resolveDataType(String irodsAbsolutePath) throws DataNotFoundExc
dataType.setMimeType(mimeType);
log.info("dataType:{}", dataType);
return dataType;

}

private String determineMimeTypeViaTika(String irodsAbsolutePath) throws JargonException {
AutoDetectParser parser = new AutoDetectParser();
Detector detector = parser.getDetector();
Metadata md = new Metadata();
String fileName = MiscIRODSUtils.getLastPathComponentForGivenAbsolutePath(irodsAbsolutePath);

md.add(Metadata.RESOURCE_NAME_KEY, fileName);
MediaType mediaType;
try {
mediaType = detector.detect(null, md);
} catch (IOException e) {
throw new JargonException("io exception determining file type by extension", e);
}
return mediaType.toString();
}

@Override
public DataType resolveDataType(String irodsAbsolutePath, DataTyperSettings dataTyperSettings)
throws DataNotFoundException, JargonException {
// TODO Auto-generated method stub
return null;
}

/**
Expand Down

0 comments on commit c721e0d

Please sign in to comment.