-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5289b69
commit 3595a89
Showing
5 changed files
with
184 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
124 changes: 124 additions & 0 deletions
124
...xt-data-typer/src/main/java/org/irodsext/datatyper/IrodsextDataTypeResolutionService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
/** | ||
* | ||
*/ | ||
package org.irodsext.datatyper; | ||
|
||
import java.io.IOException; | ||
|
||
import org.apache.tika.detect.Detector; | ||
import org.apache.tika.metadata.Metadata; | ||
import org.apache.tika.mime.MediaType; | ||
import org.apache.tika.parser.AutoDetectParser; | ||
import org.irods.jargon.core.connection.IRODSAccount; | ||
import org.irods.jargon.core.exception.DataNotFoundException; | ||
import org.irods.jargon.core.exception.JargonException; | ||
import org.irods.jargon.core.pub.IRODSAccessObjectFactory; | ||
import org.irods.jargon.core.utils.LocalFileUtils; | ||
import org.irods.jargon.core.utils.MiscIRODSUtils; | ||
import org.irods.jargon.extensions.datatyper.DataType; | ||
import org.irods.jargon.extensions.datatyper.DataTypeResolutionService; | ||
import org.irods.jargon.extensions.datatyper.DataTyperSettings; | ||
import org.irods.jargon.extensions.datatyper.IrodsMimeTypes; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* Data type resolution service to determine MIME and info types of a file. Note | ||
* that this is at first a very basic service that will need to evolve over | ||
* time. | ||
* | ||
* @author Mike Conway - NIEHS | ||
* | ||
*/ | ||
public class IrodsextDataTypeResolutionService extends DataTypeResolutionService { | ||
|
||
public static final Logger log = LoggerFactory.getLogger(IrodsextDataTypeResolutionService.class); | ||
|
||
public IrodsextDataTypeResolutionService(IRODSAccessObjectFactory irodsAccessObjectFactory, | ||
IRODSAccount irodsAccount, DataTyperSettings dataTyperSettings) { | ||
super(irodsAccessObjectFactory, irodsAccount, dataTyperSettings); | ||
} | ||
|
||
@Override | ||
public DataType resolveDataType(String irodsAbsolutePath) throws DataNotFoundException, JargonException { | ||
log.info("resolveDataType()"); | ||
|
||
if (irodsAbsolutePath == null || irodsAbsolutePath.isEmpty()) { | ||
throw new IllegalArgumentException("null or empty irodsAbsolutePath"); | ||
} | ||
|
||
log.info("irodsAbsolutePath:{}", irodsAbsolutePath); | ||
|
||
if (this.getDefaultDataTyperSettings().isDetailedDetermination()) { | ||
log.warn("detailedDetermination not yet implemented, will default to check of file path"); | ||
} | ||
|
||
log.info("checking for known irods types - interim code..."); | ||
|
||
String mimeType = determimeMimeTypeOfIrodsObjects(irodsAbsolutePath); | ||
|
||
log.info("use Tika to derive based on file extenstion"); | ||
|
||
if (mimeType == null) { | ||
log.info("not a known irods type, try tika"); | ||
mimeType = determineMimeTypeViaTika(irodsAbsolutePath); | ||
} | ||
|
||
if (mimeType == null) { | ||
log.info("no mime type found via tika"); | ||
mimeType = ""; | ||
} | ||
|
||
DataType dataType = new DataType(); | ||
dataType.setMimeType(mimeType); | ||
log.info("dataType:{}", dataType); | ||
return dataType; | ||
|
||
} | ||
|
||
private String determineMimeTypeViaTika(String irodsAbsolutePath) throws JargonException { | ||
AutoDetectParser parser = new AutoDetectParser(); | ||
Detector detector = parser.getDetector(); | ||
Metadata md = new Metadata(); | ||
String fileName = MiscIRODSUtils.getLastPathComponentForGivenAbsolutePath(irodsAbsolutePath); | ||
|
||
md.add(Metadata.RESOURCE_NAME_KEY, fileName); | ||
MediaType mediaType; | ||
try { | ||
mediaType = detector.detect(null, md); | ||
} catch (IOException e) { | ||
throw new JargonException("io exception determining file type by extension", e); | ||
} | ||
return mediaType.toString(); | ||
} | ||
|
||
@Override | ||
public DataType resolveDataType(String irodsAbsolutePath, DataTyperSettings dataTyperSettings) | ||
throws DataNotFoundException, JargonException { | ||
// TODO Auto-generated method stub | ||
return null; | ||
} | ||
|
||
/** | ||
* front-load detection of special irods file types | ||
* | ||
* @param dataObject | ||
* @return | ||
*/ | ||
private String determimeMimeTypeOfIrodsObjects(final String irodsAbsolutePath) { | ||
|
||
String extension = LocalFileUtils.getFileExtension(irodsAbsolutePath); | ||
if (extension == null || extension.isEmpty()) { | ||
return null; | ||
} | ||
|
||
if (extension.equals(".r")) { | ||
log.info("irods rule detected in:{}", irodsAbsolutePath); | ||
return IrodsMimeTypes.APPLICATION_IRODS_RULE; | ||
} else { | ||
return null; | ||
} | ||
|
||
} | ||
|
||
} |
8 changes: 8 additions & 0 deletions
8
irodsext-data-typer/src/main/java/org/irodsext/datatyper/package-info.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
|
||
/** | ||
* Default data typer service to determine MIME and info types of a file | ||
* | ||
* @author Mike Conway - NIEHS | ||
* | ||
*/ | ||
package org.irodsext.datatyper; |
30 changes: 30 additions & 0 deletions
30
...ata-typer/src/test/java/org/irodsext/datatyper/IrodsextDataTypeResolutionServiceTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package org.irodsext.datatyper; | ||
|
||
import org.irods.jargon.core.connection.IRODSAccount; | ||
import org.irods.jargon.core.pub.IRODSAccessObjectFactory; | ||
import org.irods.jargon.extensions.datatyper.DataType; | ||
import org.irods.jargon.extensions.datatyper.DataTypeResolutionService; | ||
import org.irods.jargon.extensions.datatyper.DataTyperSettings; | ||
import org.junit.Assert; | ||
import org.junit.Test; | ||
import org.mockito.Mockito; | ||
|
||
public class IrodsextDataTypeResolutionServiceTest { | ||
|
||
@Test | ||
public void testResolveDataTypeString() throws Exception { | ||
String testName = "/a/path/file.txt"; | ||
IRODSAccount dummyAccount = Mockito.mock(IRODSAccount.class); | ||
IRODSAccessObjectFactory irodsAccessObjectFactory = Mockito.mock(IRODSAccessObjectFactory.class); | ||
DataTyperSettings dataTyperSettings = new DataTyperSettings(); | ||
dataTyperSettings.setDetailedDetermination(false); | ||
dataTyperSettings.setPersistDataTypes(false); | ||
DataTypeResolutionService dtrs = new IrodsextDataTypeResolutionService(irodsAccessObjectFactory, dummyAccount, | ||
dataTyperSettings); | ||
DataType actual = dtrs.resolveDataType(testName); | ||
Assert.assertNotNull("no type returned", actual); | ||
Assert.assertEquals("text/plain", actual.getMimeType()); | ||
|
||
} | ||
|
||
} |