Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/cldsrv 546 post object #5601

Open
wants to merge 23 commits into
base: epic/RING-45960-postObject-api
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ const constants = {
'initiateMultipartUpload',
'objectPutPart',
'completeMultipartUpload',
'objectPost',
],
};

Expand Down
85 changes: 41 additions & 44 deletions lib/api/api.js
BourgoisMickael marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ const objectGetRetention = require('./objectGetRetention');
const objectGetTagging = require('./objectGetTagging');
const objectHead = require('./objectHead');
const objectPut = require('./objectPut');
const objectPost = require('./objectPost');
const objectPutACL = require('./objectPutACL');
const objectPutLegalHold = require('./objectPutLegalHold');
const objectPutTagging = require('./objectPutTagging');
Expand All @@ -67,7 +68,9 @@ const writeContinue = require('../utilities/writeContinue');
const validateQueryAndHeaders = require('../utilities/validateQueryAndHeaders');
const parseCopySource = require('./apiUtils/object/parseCopySource');
const { tagConditionKeyAuth } = require('./apiUtils/authorization/tagConditionKeys');
const { checkAuthResults } = require('./apiUtils/authorization/permissionChecks');
const checkHttpHeadersSize = require('./apiUtils/object/checkHttpHeadersSize');
const { processPostForm } = require('./apiUtils/apiCallers/callPostObject');

const monitoringMap = policies.actionMaps.actionMonitoringMapS3;

Expand Down Expand Up @@ -142,49 +145,6 @@ const api = {
// eslint-disable-next-line no-param-reassign
request.apiMethods = apiMethods;

function checkAuthResults(authResults) {
let returnTagCount = true;
const isImplicitDeny = {};
let isOnlyImplicitDeny = true;
if (apiMethod === 'objectGet') {
// first item checks s3:GetObject(Version) action
if (!authResults[0].isAllowed && !authResults[0].isImplicit) {
log.trace('get object authorization denial from Vault');
return errors.AccessDenied;
}
// TODO add support for returnTagCount in the bucket policy
// checks
isImplicitDeny[authResults[0].action] = authResults[0].isImplicit;
// second item checks s3:GetObject(Version)Tagging action
if (!authResults[1].isAllowed) {
log.trace('get tagging authorization denial ' +
'from Vault');
returnTagCount = false;
}
} else {
for (let i = 0; i < authResults.length; i++) {
isImplicitDeny[authResults[i].action] = true;
if (!authResults[i].isAllowed && !authResults[i].isImplicit) {
// Any explicit deny rejects the current API call
log.trace('authorization denial from Vault');
return errors.AccessDenied;
}
if (authResults[i].isAllowed) {
// If the action is allowed, the result is not implicit
// Deny.
isImplicitDeny[authResults[i].action] = false;
isOnlyImplicitDeny = false;
}
}
}
// These two APIs cannot use ACLs or Bucket Policies, hence, any
// implicit deny from vault must be treated as an explicit deny.
if ((apiMethod === 'bucketPut' || apiMethod === 'serviceGet') && isOnlyImplicitDeny) {
return errors.AccessDenied;
}
return { returnTagCount, isImplicitDeny };
}

return async.waterfall([
next => auth.server.doAuth(
request, log, (err, userInfo, authorizationResults, streamingV4Params) => {
Expand Down Expand Up @@ -256,7 +216,7 @@ const api = {
return callback(err);
}
if (authorizationResults) {
const checkedResults = checkAuthResults(authorizationResults);
const checkedResults = checkAuthResults(apiMethod, authorizationResults, log);
if (checkedResults instanceof Error) {
return callback(checkedResults);
}
Expand Down Expand Up @@ -286,6 +246,42 @@ const api = {
return this[apiMethod](userInfo, request, log, callback);
});
},
callPostObject(apiMethod, request, response, log, callback) {
request.apiMethod = apiMethod;

const requestContexts = prepareRequestContexts('objectPost', request,
undefined, undefined, undefined);
// Extract all the _apiMethods and store them in an array
const apiMethods = requestContexts ? requestContexts.map(context => context._apiMethod) : [];
// Attach the names to the current request
// eslint-disable-next-line no-param-reassign
request.apiMethods = apiMethods;

return processPostForm(request, response, requestContexts, log,
(err, userInfo, authorizationResults, streamingV4Params) => {
if (err) {
return callback(err);
}
if (authorizationResults) {
const checkedResults = checkAuthResults(authorizationResults);
if (checkedResults instanceof Error) {
return callback(checkedResults);
}
request.actionImplicitDenies = checkedResults.isImplicitDeny;
} else {
// create an object of keys apiMethods with all values to false:
// for backward compatibility, all apiMethods are allowed by default
// thus it is explicitly allowed, so implicit deny is false
request.actionImplicitDenies = apiMethods.reduce((acc, curr) => {
acc[curr] = false;
return acc;
}, {});
}
request._response = response;
return objectPost(userInfo, request, streamingV4Params,
log, callback, authorizationResults);
});
},
bucketDelete,
bucketDeleteCors,
bucketDeleteEncryption,
Expand Down Expand Up @@ -337,6 +333,7 @@ const api = {
objectCopy,
objectHead,
objectPut,
objectPost,
objectPutACL,
objectPutLegalHold,
objectPutTagging,
Expand Down
210 changes: 210 additions & 0 deletions lib/api/apiUtils/apiCallers/callPostObject.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
const { auth, errors } = require('arsenal');
const busboy = require('@fastify/busboy');
const writeContinue = require('../../../utilities/writeContinue');
const fs = require('fs');
const path = require('path');
const os = require('os');

/** @see doc: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-HTTPPOSTForms.html#HTTPPOSTFormDeclaration */
const MAX_FIELD_SIZE = 20 * 1024; // 20KB
/** @see doc: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html */
const MAX_KEY_SIZE = 1024;
const POST_OBJECT_OPTIONAL_FIELDS = [
'acl',
'awsaccesskeyid',
'bucket',
'cache-control',
'content-disposition',
'content-encoding',
'content-type',
'expires',
'policy',
'redirect',
'tagging',
'success_action_redirect',
'success_action_status',
'x-amz-meta-',
'x-amz-storage-class',
'x-amz-security-token',
'x-amz-signgnature',
'x-amz-website-redirect-location',
];

async function authenticateRequest(request, requestContexts, log) {
return new Promise(resolve => {
// TODO RING-45960 remove ignore auth check for POST object here
auth.server.doAuth(request, log, (err, userInfo, authorizationResults, streamingV4Params) =>
resolve({ userInfo, authorizationResults, streamingV4Params }), 's3', requestContexts);
});
}

async function parseFormData(request, response, requestContexts, log) {
/* eslint-disable no-param-reassign */
let formDataParser;
try {
formDataParser = busboy({ headers: request.headers });
Copy link
Contributor

@BourgoisMickael BourgoisMickael Jul 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some more details about multipart/form-data, parsing by busboy and aws behavior with some links to documentation:

Content-Type

Documentation for the Content-Type, with the boundary directive and example with body: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type

Again, busboy will accept application/x-www-form-urlencoded (https://github.com/fastify/busboy/blob/master/lib/types/urlencoded.js#L9)

That will unnecessarily read the whole body and trigger an error 400 InvalidArgument instead of the 412 Precondition Failed returned by aws and stopping the request early.

Content-Disposition

The documentation about Content-Disposition gives you details for its usage in multipart/form-data: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition

In the body, a part will look like

--boundary
Content-Disposition: form-data; name="f1"

value1
--boundary

Here busboy will parse this and emit a field event with fieldname being f1

You can send this with:

  • an html form with an <input type="text" name="f1">
  • curl (https://curl.se/docs/manpage.html#-F)
    • curl -F f1=value1
    • curl -F "f1=</etc/hostname" (this put the content of the file but not the filename)
  • nodejs with form-data or raw

You can also have

--boundary
Content-Disposition: form-data; name="f2"; filename="example.txt"

value2
--boundary

You can send this with:

Notice the filename directive, this will make busboy emit a file event with fieldname being f2.
As described in busboy documentation: https://www.npmjs.com/package/@fastify/busboy#busboy-methods with the default config isPartAFile.

You can notice in AWS documentation about the file form field: https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPOST.html#RESTObjectPOST-requests-form-fields

The file or text content.
...
Type: File or text content

So you can either send:

  • Content-Disposition: form-data; name="file" (with an input of type text)
  • Content-Disposition: form-data; name="file"; filename="whatever.txt"

Which means by default the file form field can be emitted either by the field or file busboy event.

Also by testing on AWS, it accepts other form fields like key, acl having a filename directive (meaning coming from an input type="file"), example this should be possible:

  • Content-Disposition: form-data; name="key"; filename="whatever.txt"
  • Content-Disposition: form-data; name="acl"; filename="whatever.txt"

So any form field can be emitted by either the field or file busboy event.
The processing / validation should be performed based on the fieldname and not the busboy event.

Note that it could be interesting to replace the default behavior of busboy and defining isPartAFile to consider a file only the fieldname === 'file' (with potentially presence of filename).


When doing that there should also be some limits defined on busboy, if a form field other than file is being a big file and now parsed as a string instead of a stream, limit would help busboy to stop reading it. (The default limit.fieldSize is 1MiB but AWS accepts only 20KB for the form-data exluding file).

The form data and boundaries (excluding the contents of the file) cannot exceed 20KB.
Note: it looks like it's not just the field value but potentially the whole boundary with Content-Disposition and potential headers of the part, but I'm not sure, might need to be tested.

${filename} replacement

About ${filename} replacement, it's not only on the key, but all form fields: https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-HTTPPOSTForms.html

The variable ${filename} is automatically replaced with the name of the file provided by the user and is recognized by all form fields. If the browser or client provides a full or partial path to the file, only the text following the last slash (/) or backslash () is used (for example, C:\Program Files\directory1\file.txt is interpreted as file.txt). If no file or file name is provided, the variable is replaced with an empty string.

Also the documentation defines how filename should be sanitized (https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition#filename)
Busboy should sanitize it, but you could add some tests with filename containing ../xxx, a/b/c, etc to ensure the ${filename} works as aws.

And validation on form fields, like key length, should then be performed after ${filename} replacement as it could impact the value.

Additional validation

You can receive a Content-Disposition: form-data without name, that would make an undefined fieldname and crash:
TypeError: Cannot read properties of undefined (reading 'toLowerCase')

Also by testing on AWS, they seem to trim the fieldname of whitespaces: name=" \n key \n " will should match key.


It's also possible to have multiple parts with the same name.
For example with this:

<input type="text" name="key">
<input type="text" name="key">
<input type="text" name="acl">
<input type="text" name="acl">
<input type="file" name="file" multiple>

or curl -F f1=value1 -F f1=value2

We should ensure, just like AWS that we don't have multiple times the same form field, whether busboy emits it via field or file.


Maybe the file should not be read if the parsing failed and an error was caught.
And when reading (pipe) the file stream, we need to catch error, otherwise it can crash:
Error: Part terminated early due to unexpected end of multipart data

It seems the finish event is triggered once the file is completely read, and there is a validation logic in there, the logic could happen earlier before writting file to disk.


There is a validation on Content-Length as well:

    <Code>EntityTooLarge</Code>
    <Message>Your proposed upload exceeds the maximum allowed size</Message>
    <ProposedSize>6442453100</ProposedSize>
    <MaxSizeAllowed>5368730624</MaxSizeAllowed>

The key should be validated just like it's done for regular PutObject.
See https://github.com/scality/Arsenal/blob/7eb2701f21aba91d45c2454ee8c5c257fea1c0db/lib/s3routes/routes.ts#L65-L73
Maybe there could be some blacklisted object prefix on cloudserver


You should find attached a javascript script to write the raw body form-data easier than using `nc`. It allows to test invalid bodies that are usually well formated by lib like `form-data` or by `curl` or `postman`.

Also you can add a delay between writes to test server timeout behavior.

const http = require('http');

const CRLF = '\r\n';

const body = `\
--test
Content-Disposition: form-data; name="key"

some_key
--test
Content-Disposition: form-data; name="file"; filename="some_name"

a
--test--
`.replace(/\n/g, CRLF);

const options = {
    method: 'POST',
    headers: {
        'Host': 'micktestpost.localhost',
        'Content-Type': 'multipart/form-data; boundary=test',
        'Content-Length': Buffer.byteLength(body),
    },
};

const req = http.request('http://localhost:8000', options, (res) => {
    console.log(req)
    console.log('RESPONSE', { statusCode: res.statusCode, message: res.statusMessage, headers: res.headers });

    res.on('data', (chunk) => {
        console.log(`BODY: ${chunk}`);
    });
    res.on('end', () => {
        console.log('END');
    });
});

req.on('error', (e) => {
    console.error(`problem with request: ${e.toString()}`);
});

// Split body in block of 20 chars
const split = body.match(/[\s\S]{1,20}/g);
// Increase delay without writting to test server timeout
const writeDelayMs = 1_000;

function writeBodyPart(i) {
    if (i < split.length) {
        console.log('Writting', i, split[i])
        req.write(split[i]);
        setTimeout(() => writeBodyPart(i + 1), writeDelayMs);
    } else {
        console.log('Done writting body')
        req.end();
    }
}

// setTimeout(() => writeBodyPart(0), writeDelayMs)

req.write(body);
req.end();

Generating the body raw without a lib can help you test any kind of invalid body to make sure you use busboy correctly and validate all input before using them to prevent a crash of the server.

Busboy already takes care of all the parsing, we need to configure it correctly (cf busboy readme) to match AWS spec and validate inputs to prevent a crash.

Ideally you could extract the parsing / validation (parseFormData function) with busboy into its own isolated class to make it easier for testing. And if you want to be consistent with callbacks usage instead of promises and async await, you can pass a callback as input of your parsing function to call once parsing /validation is done.

} catch (err) {
log.trace('Error creating form data parser', { error: err.toString() });
return Promise.reject(errors.PreconditionFailed
.customizeDescription('Bucket POST must be of the enclosure-type multipart/form-data'));
}

// formDataParser = busboy({ headers: request.headers });
writeContinue(request, response);

return new Promise((resolve, reject) => {
request.formData = {};
let totalFieldSize = 0;
let fileEventData = null;
let tempFileStream;
let tempFilePath;
let authResponse;
let fileWrittenPromiseResolve;
let formParserFinishedPromiseResolve;

const fileWrittenPromise = new Promise((res) => { fileWrittenPromiseResolve = res; });
const formParserFinishedPromise = new Promise((res) => { formParserFinishedPromiseResolve = res; });

formDataParser.on('field', (fieldname, val) => {
// Check if we have exceeded the max size allowed for all fields
totalFieldSize += Buffer.byteLength(val, 'utf8');
if (totalFieldSize > MAX_FIELD_SIZE) {
return reject(errors.MaxPostPreDataLengthExceeded);
}

// validate the fieldname
const lowerFieldname = fieldname.toLowerCase();
// special handling for key field
if (lowerFieldname === 'key') {
if (val.length > MAX_KEY_SIZE) {
return reject(errors.KeyTooLong);
} else if (val.length === 0) {
return reject(errors.InvalidArgument
.customizeDescription('User key must have a length greater than 0.'));
}
request.formData[lowerFieldname] = val;
}
// add only the recognized fields to the formData object
if (POST_OBJECT_OPTIONAL_FIELDS.some(field => lowerFieldname.startsWith(field))) {
Copy link
Contributor

@BourgoisMickael BourgoisMickael Jul 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

startsWith will allow things like acl abc. Known fields should be checked with equality and the special x-amz-meta- can check checked aside

request.formData[lowerFieldname] = val;
}
return undefined;
});

formDataParser.on('file', async (fieldname, file, filename, encoding, mimetype) => {
if (fileEventData) {
file.resume(); // Resume the stream to drain and discard the file
if (tempFilePath) {
fs.unlink(tempFilePath, unlinkErr => {
if (unlinkErr) {
log.error('Failed to delete temp file', { error: unlinkErr });
}
});
}
return reject(errors.InvalidArgument
.customizeDescription('POST requires exactly one file upload per request.'));
}

fileEventData = { fieldname, file, filename, encoding, mimetype };
if (!('key' in request.formData)) {
return reject(errors.InvalidArgument
.customizeDescription('Bucket POST must contain a field named '
+ "'key'. If it is specified, please check the order of the fields."));
}
// Replace `${filename}` with the actual filename
request.formData.key = request.formData.key.replace('${filename}', filename);
try {
// Authenticate request before streaming file
// TODO RING-45960 auth to be properly implemented
authResponse = await authenticateRequest(request, requestContexts, log);

// Create a temporary file to stream the file data
// This is to finalize validation on form data before storing the file
tempFilePath = path.join(os.tmpdir(), filename);
tempFileStream = fs.createWriteStream(tempFilePath);

file.pipe(tempFileStream);

tempFileStream.on('finish', () => {
request.fileEventData = { ...fileEventData, file: tempFilePath };
fileWrittenPromiseResolve();
});

tempFileStream.on('error', (err) => {
log.trace('Error streaming file to temporary location', { error: err.toString() });
reject(errors.InternalError);
});

// Wait for both file writing and form parsing to finish
return Promise.all([fileWrittenPromise, formParserFinishedPromise])
.then(() => resolve(authResponse))
.catch(reject);
} catch (err) {
return reject(err);
}
});

formDataParser.on('finish', () => {
if (!fileEventData) {
return reject(errors.InvalidArgument
.customizeDescription('POST requires exactly one file upload per request.'));
}
return formParserFinishedPromiseResolve();
});

formDataParser.on('error', (err) => {
log.trace('Error processing form data:', { error: err.toString() });
request.unpipe(formDataParser);
// Following observed AWS behaviour
reject(errors.MalformedPOSTRequest);
});

request.pipe(formDataParser);
return undefined;
});
}

function getFileStat(filePath, log) {
return new Promise((resolve, reject) => {
fs.stat(filePath, (err, stats) => {
if (err) {
log.trace('Error getting file size', { error: err.toString() });
return reject(errors.InternalError);
}
return resolve(stats);
});
});
}

async function processPostForm(request, response, requestContexts, log, callback) {
try {
const { userInfo, authorizationResults, streamingV4Params } =
await parseFormData(request, response, requestContexts, log);

const fileStat = await getFileStat(request.fileEventData.file, log);
request.parsedContentLength = fileStat.size;
request.fileEventData.file = fs.createReadStream(request.fileEventData.file);
if (request.formData['content-type']) {
request.headers['content-type'] = request.formData['content-type'];
} else {
request.headers['content-type'] = 'binary/octet-stream';
}

const authNames = { accountName: userInfo.getAccountDisplayName() };
if (userInfo.isRequesterAnIAMUser()) {
authNames.userName = userInfo.getIAMdisplayName();
}
log.addDefaultFields(authNames);

return callback(null, userInfo, authorizationResults, streamingV4Params);
} catch (err) {
return callback(err);
}
}

module.exports = {
authenticateRequest,
parseFormData,
processPostForm,
getFileStat,
};
Loading
Loading