Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom filename for content-disposition #1010

Merged
merged 10 commits into from
Mar 28, 2024
4 changes: 3 additions & 1 deletion docs/user-docs/annotation.md
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,7 @@ Supported JSON payload patterns:
- `{`... `"sha256": ` _column_ | `true` ...`}`: If _column_, then the _column_ stores the checksum generated by the 'sha256' cryptographic hash function. It MUST be ASCII/UTF-8 hexadecimal encoded. If `true`, then the client SHOULD generate a 'sha256' checksum and communicate it to the asset storage service according to its protocol. See implementation notes below.
- `{`... `"filename_ext_filter": [` { _filename extension_ [`,` _filename extension_ ]\* } `]` ...`}`: This property specifies a set of _filename extension_ filters for use by upload agents to indicate to the user the acceptable filename patterns (`.jpg`, `.png`, `.pdf`, ...). For example, `.jpg` would indicate that only JPEG files should be selected by the user.
- `{`... `"filename_ext_regexp": [` { _filename extension regexp_ [`,` _filename extension regexp_ ]\* } `]` ...`}`: This property specifies a set of _filename extension regexp_ for use by upload while populating the value of `filename_ext` property that will be available under the annotationed column object while writing the `url_pattern` value.
- `{`... `"stored_filename_pattern": ` _pattern_ ...`}`: A preferred filename can be derived by [Pattern Expansion](#pattern-expansion) on _pattern_. This attribute allows for changing the `content-disposition` of the file uploaded to hatrac and the value stored in the filename column. See implementation notes below.
- `{` ... `"display": {` _context_`:` _displayoption_ ...`}` ... `}`: Apply each _displayoption_ to the asset for any number of _context_ names. See [Context Names](#context-names) section for the list of supported _context_ names.

Supported display _displayoption_ JSON payload patterns:
Expand All @@ -738,7 +739,8 @@ Default heuristics:
- `md5_base64` for base64
- `size` for size in bytes
- `mimetype` for mimetype of the selected file.
- `filename` for filename
- `filename` for filename. NOTE: this is the filename of the uploaded file before `stored_filename_pattern` is generated
- `filename_basename` for the filename without the extension. Note: the following will be true `filename_basename + filename_ext = filename`
- `filename_ext` for the file extension based on the filename. This value is derived based on the optionally defined `filename_ext_filter` and `filename_ext_regexp`. If these annotations are missing, the last part of the filename after the last dot will be returned (also includes the `.` e.g. `.png`).
- If we cannot find matches, this property will return `null`. So make sure you're doing null checking while using this property (otherwise, the whole `url_pattern` might result in an empty string).
- Nothing may be inferred without additional payload patterns present.
Expand Down
12 changes: 12 additions & 0 deletions js/column.js
Original file line number Diff line number Diff line change
Expand Up @@ -2729,6 +2729,18 @@ Object.defineProperty(AssetPseudoColumn.prototype, "sha256", {
}
});

/**
*
*/
Object.defineProperty(AssetPseudoColumn.prototype, "filenamePattern", {
get: function () {
if (this._filenamePattern === undefined) {
this._filenamePattern = this._annotation.stored_filename_pattern;
}
return this._filenamePattern;
}
});

/**
* The column object that file extension is stored in.
* @member {string[]} filenameExtFilter
Expand Down
29 changes: 24 additions & 5 deletions js/hatrac.js
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,9 @@ var ERMrest = (function(module) {
this.CHUNK_QUEUE_SIZE = otherInfo.chunkQueueSize || 4;

this.file = file;
if (!this.file) throw new Error("No file provided while creating hatrac file object");

this.storedFilename = file.name; // the name that will be used for content-disposition and filename column
if (isNode) this.file.buffer = require('fs').readFileSync(file.path);

this.column = otherInfo.column;
Expand Down Expand Up @@ -445,10 +447,10 @@ var ERMrest = (function(module) {

// check if filename in content disposition is different from filename being uploaded
// if it is, create an update metadata request for updating the content-disposition
if (contentDisposition.substring(filenameIndex, contentDisposition.length) != self.file.name.replace(FILENAME_REGEXP, '_')) {
if (contentDisposition.substring(filenameIndex, contentDisposition.length) != self.storedFilename.replace(FILENAME_REGEXP, '_')) {
// Prepend the url with server uri if it is relative
var url = self._getAbsoluteUrl(self.url + ";metadata/content-disposition");
var data = "filename*=UTF-8''" + self.file.name.replace(FILENAME_REGEXP, '_');
var data = "filename*=UTF-8''" + self.storedFilename.replace(FILENAME_REGEXP, '_');
contextHeaderParams.action = "upload/metadata/update"

var config = {
Expand Down Expand Up @@ -528,7 +530,7 @@ var ERMrest = (function(module) {
"content-length": self.file.size,
"content-type": self.file.type,
"content-md5": self.hash.md5_base64,
"content-disposition": "filename*=UTF-8''" + self.file.name.replace(FILENAME_REGEXP, '_')
"content-disposition": "filename*=UTF-8''" + self.storedFilename.replace(FILENAME_REGEXP, '_')
};

if (!contextHeaderParams || !_isObject(contextHeaderParams)) {
Expand Down Expand Up @@ -828,13 +830,19 @@ var ERMrest = (function(module) {
if (this.column.md5 && typeof this.column.md5 === 'object') row[this.column.md5.name] = this.hash.md5_hex;
if (this.column.sha256 && typeof this.column.sha256 === 'object') row[this.column.sha256.name] = this.hash.sha256;

row[this.column.name].filename = this.file.name;
row[this.column.name].size = this.file.size;
row[this.column.name].mimetype = this.file.type;
row[this.column.name].md5_hex = this.hash.md5_hex;
row[this.column.name].md5_base64 = this.hash.md5_base64;
row[this.column.name].sha256 = this.hash.sha256;
row[this.column.name].filename_ext = _getFilenameExtension(this.file.name, this.column.filenameExtFilter, this.column.filenameExtRegexp);
row[this.column.name].filename = this.file.name;
var filename_ext = _getFilenameExtension(this.file.name, this.column.filenameExtFilter, this.column.filenameExtRegexp);
row[this.column.name].filename_ext = filename_ext
// filename_basename is everything from the file name except the last ext
// For example if we have a file named "file.tar.zip"
// => "file.tar" is the basename
// => ".zip" is the extension
row[this.column.name].filename_basename = filename_ext ? this.file.name.substring(0, this.file.name.length - filename_ext.length) : this.file.name;

// Generate url

Expand All @@ -843,6 +851,16 @@ var ERMrest = (function(module) {
var keyValues = module._getFormattedKeyValues(this.reference.table, this.reference._context, row);

var url = module._renderTemplate(template, keyValues, this.reference.table.schema.catalog, { avoidValidation: true, templateEngine: this.column.templateEngine });

if (this.column.filenamePattern) {
var filename = module._renderTemplate(this.column.filenamePattern, keyValues, this.reference.table.schema.catalog, { avoidValidation: true, templateEngine: this.column.templateEngine });

if (filename && filename.trim() !== '') {
this.storedFilename = filename;
// update the filename column value on the row being submitted
if (this.column.filenameColumn) row[this.column.filenameColumn.name] = this.storedFilename;
}
}

// If the template is null then throw an error
if (url === null || url.trim() === '') {
Expand Down Expand Up @@ -872,6 +890,7 @@ var ERMrest = (function(module) {

this.url = url;

// NOTE: url is returned but not used in either place this function is called
return this.url;
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
83 changes: 82 additions & 1 deletion test/specs/upload/conf/upload/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,93 @@
}
],
"annotations" : {}
},
"file_custom_filename": {
"comment": "asset/object with stored_filename_pattern in asset annotation",
"kind": "table",
"keys": [
{
"comment": null,
"annotations": {},
"unique_columns": [
"id"
]
}
],
"foreign_keys": [],
"table_name": "file_custom_filename",
"schema_name": "upload",
"column_definitions": [
{
"name": "id",
"nullok": false,
"type": {
"typename": "serial4"
},
"annotations": {}
},
{
"name": "timestamp",
"nullok": true,
"type": {
"typename": "timestamp"
},
"annotations": {}
},
{
"name": "filename",
"nullok": true,
"type": {
"typename": "text"
},
"annotations": {}
},
{
"comment": "asset/reference",
"name": "uri",
"nullok": true,
"type": {
"typename": "text"
},
"annotations": {
"tag:isrd.isi.edu,2017:asset": {
"url_pattern" : "/hatrac/js/ermrestjs/{{{_uri.filename_basename}}}/{{{_uri.md5_hex}}}",
"filename_column" : "filename",
"byte_count_column" : "bytes",
"md5" : "checksum",
"stored_filename_pattern": "{{{_timestamp}}}{{{_uri.filename_ext}}}"
}
}
},
{
"name": "bytes",
"nullok": true,
"type": {
"typename": "int8"
},
"annotations": {}
},
{
"name": "checksum",
"nullok": true,
"type": {
"typename": "text"
},
"annotations": {}
}
],
"annotations": {
"tag:isrd.isi.edu,2016:visible-columns" : {
"*" : ["id", "uri"]
}
}
}
},
"table_names": [
"file",
"file_update_table",
"referee"
"referee",
"file_custom_filename"
],
"comment": null,
"schema_name": "upload"
Expand Down
113 changes: 106 additions & 7 deletions test/specs/upload/tests/01.checksum.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ exports.execute = function (options) {
var schemaName = "upload",
schema,
tableName = "file",
table,
columnName = "uri",
column,
columnNameWOHatrac = "uri_wo_hatrac",
Expand Down Expand Up @@ -59,9 +58,8 @@ exports.execute = function (options) {
f.file = new File(filePath);
});

options.ermRest.resolve(baseUri, { cid: "test" }).then(function (response) {
reference = response;
reference = reference.contextualize.entryCreate;
ermRest.resolve(baseUri, { cid: "test" }).then(function (response) {
reference = response.contextualize.entryCreate;
column = reference.columns.find(function(c) { return c.name == columnName; });
columnWOHatrac = reference.columns.find(function(c) { return c.name == columnNameWOHatrac; });

Expand Down Expand Up @@ -127,10 +125,7 @@ exports.execute = function (options) {
expect(uploadObj.validateURL(validRow)).toBe(true);
});


it("should show progress on calculation of checksum as well as calculate correct hash in hex and base64 format with correct url", function(done) {
var chunkSize = uploadObj.PART_SIZE;

uploadObj.calculateChecksum(validRow).then(function(url) {
expect(uploadObj.hash instanceof ermRest.Checksum).toBeTruthy("Upload object hash is not of type ermRest.Checksum");

Expand Down Expand Up @@ -177,4 +172,108 @@ exports.execute = function (options) {
})

});

describe('For a file with a "custom filename" to be generated and used in place of the initial file name, ', function () {
var schemaName = "upload",
tableName = "file_custom_filename",
columnName = "uri",
column, reference, uploadObj, ermRest;

var file = {
name: "testfile500kb.png.zip",
size: 512000,
displaySize: "500KB",
type: "application/zip",
hash: "4b178700e5f3b15ce799f2c6c1465741",
hash_64: "SxeHAOXzsVznmfLGwUZXQQ=="
};

var time = Date.now();
var validRow = {
timestamp: time,
uri: { md5_hex: file.hash }
};

var baseUri = options.url + "/catalog/" + process.env.DEFAULT_CATALOG + "/entity/"
+ schemaName + ":" + tableName;

beforeAll(function (done) {
ermRest = options.ermRest;

var filePath = "test/specs/upload/files/" + file.name

exec("perl -e 'print \"\1\" x " + file.size + "' > " + filePath);

file.file = new File(filePath);

ermRest.resolve(baseUri, { cid: "test" }).then(function (response) {
reference = response.contextualize.entryCreate;
column = reference.columns.find(function(c) { return c.name == columnName; });

if (!column) throw new Error("Unable to find column " + columnName);
done();
}, function (err) {
console.dir(err);
done.fail();
});
});

it("should create an upload object", function(done) {

try {
uploadObj = new ermRest.Upload(file.file, {
column: column,
reference: reference,
chunkSize: 5 * 1024 * 1024
});

expect(uploadObj instanceof ermRest.Upload).toBe(true);
done();
} catch(e) {
console.dir(e);
done.fail();
}
});

it("should contain properties of file in `file` property of uploadObj (Size: " + file.size + " (" + file.displaySize + "), type: " + file.type + ", name: " + file.name + ")", function() {
expect(uploadObj.file.size).toBe(file.size);
// the file object should continue to inform us about the file being uploaded
// the name won't be updated when we readt the annotation and calculated the filename for content-disposition and the database
expect(uploadObj.file.name).toBe(file.name);
expect(uploadObj.file.type).toBe(file.type);
});

it("should calculate correct hash in hex and base64 format with correct url and generated filename", function(done) {
uploadObj.calculateChecksum(validRow).then(function(url) {
expect(uploadObj.hash instanceof ermRest.Checksum).toBeTruthy("Upload object hash is not of type ermRest.Checksum");

expect(url).toBe("/hatrac/js/ermrestjs/testfile500kb.png/" + file.hash, "File generated url is not the same");

// values that are attached to the row
expect(validRow.filename).not.toBe(file.name, "valid row filename is the same as original file's name");
expect(validRow.filename).toBe(time + ".zip", "valid row filename was not generated properly");
expect(validRow.bytes).toBe(file.size, "valid row bytes is incorrect");
expect(validRow.checksum).toBe(file.hash, "valid row checksum is incorrect");

done();

}, function(e) {
console.dir(e);
expect(file).toBe("");
done.fail();
}, function(uploadedSize) {
uploaded = uploadedSize;
});

});

it("should have the checksum properly defined.", function () {
var checksum = uploadObj.hash;

expect(checksum.file).toEqual(uploadObj.file, "file is incorrect");
expect(checksum.md5_hex).toBe(file.hash, "md5 hex is incorrect");
expect(checksum.md5_base64).toBe(file.hash_64, "md5 base64 is incorrect");
});

})
}