diff --git a/CHANGES.rst b/CHANGES.rst index 75d31a882c..042dd1ec89 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -26,6 +26,15 @@ https://github.com/zopefoundation/Zope/blob/4.x/CHANGES.rst - Update to newest compatible versions of dependencies. +- Support the (non standard) ``charset`` parameter for + content type ``application/x-www-form-urlencoded``. + This is required (e.g. for ``Plone``) because + ``jquery`` constructs content types of the form + ```application/x-www-form-urlencoded; charset=utf-8``. + For details see + `plone/buildout.coredev#844 + `_. + 5.8 (2023-01-10) ---------------- diff --git a/src/ZPublisher/HTTPRequest.py b/src/ZPublisher/HTTPRequest.py index 04cd98b034..c974b87b9c 100644 --- a/src/ZPublisher/HTTPRequest.py +++ b/src/ZPublisher/HTTPRequest.py @@ -547,23 +547,24 @@ def processInputs( # problems when surrogates reach the application because # they cannot be encoded with a standard error handler. # We might want to prevent this. - character_encoding = '' # currently used encoding key = item.name if key is None: continue - key = item.name.encode("latin-1").decode(self.charset) + character_encoding = "" + key = item.name.encode("latin-1").decode( + item.name_charset or self.charset) if hasattr(item, 'file') and \ hasattr(item, 'filename') and \ hasattr(item, 'headers'): item = FileUpload(item, self.charset) else: - character_encoding = self.charset + character_encoding = item.value_charset or self.charset item = item.value.decode( character_encoding, "surrogateescape") # from here on, `item` contains the field value # either as `FileUpload` or `str` with - # `character_encoding` as encoding. + # `character_encoding` as encoding, # `key` the field name (`str`) flags = 0 @@ -1382,6 +1383,9 @@ class FormField(SimpleNamespace, ValueAccessor): the field name value the field value (`bytes`) + name_charset, value_charset + the charset for the name and value, respectively, or ``None`` + if no charset has been specified. File fields additionally have the attributes: file @@ -1396,17 +1400,21 @@ class FormField(SimpleNamespace, ValueAccessor): are used to represent textual data. """ + name_charset = value_charset = None + class ZopeFieldStorage(ValueAccessor): def __init__(self, fp, environ): self.file = fp method = environ.get("REQUEST_METHOD", "GET").upper() - qs = environ.get("QUERY_STRING", "") + url_qs = environ.get("QUERY_STRING", "") + post_qs = "" hl = [] content_type = environ.get("CONTENT_TYPE", "application/x-www-form-urlencoded") - content_type = content_type hl.append(("content-type", content_type)) + content_type, options = parse_options_header(content_type) + content_type = content_type.lower() content_disposition = environ.get("CONTENT_DISPOSITION") if content_disposition is not None: hl.append(("content-disposition", content_disposition)) @@ -1417,8 +1425,7 @@ def __init__(self, fp, environ): fpos = fp.tell() except Exception: fpos = None - if content_type.startswith("multipart/form-data"): - ct, options = parse_options_header(content_type) + if content_type == "multipart/form-data": parts = MultipartParser( fp, options["boundary"], mem_limit=FORM_MEMORY_LIMIT, @@ -1426,31 +1433,28 @@ def __init__(self, fp, environ): memfile_limit=FORM_MEMFILE_LIMIT, charset="latin-1").parts() elif content_type == "application/x-www-form-urlencoded": - if qs: - qs += "&" - qs += fp.read(FORM_MEMORY_LIMIT).decode("latin-1") + post_qs = fp.read(FORM_MEMORY_LIMIT).decode("latin-1") if fp.read(1): raise BadRequest("form data processing " "requires too much memory") - else: - # `processInputs` currently expects either - # form values or a response body, not both. - # reset `qs` to fulfill this expectation. - qs = "" + elif url_qs: + raise NotImplementedError("request parameters and body") if fpos is not None: fp.seek(fpos) - elif method not in ("GET", "HEAD"): - # `processInputs` currently expects either - # form values or a response body, not both. - # reset `qs` to fulfill this expectation. - qs = "" + elif url_qs and content_type != "application/x-www-form-urlencoded": + raise NotImplementedError("request parameters and body") fl = [] add_field = fl.append - for name, val in parse_qsl( - qs, # noqa: E121 - keep_blank_values=True, encoding="latin-1"): - add_field(FormField( - name=name, value=val.encode("latin-1"))) + post_opts = {} + if options.get("charset"): + post_opts["name_charset"] = post_opts["value_charset"] = \ + options["charset"] + for qs, opts in ((url_qs, {}), (post_qs, post_opts)): + for name, val in parse_qsl( + qs, # noqa: E121 + keep_blank_values=True, encoding="latin-1"): + add_field(FormField( + name=name, value=val.encode("latin-1"), **opts)) for part in parts: if part.filename: # a file @@ -1460,12 +1464,21 @@ def __init__(self, fp, environ): filename=part.filename, headers=part.headers) else: - field = FormField(name=part.name, value=part.raw) + field = FormField( + name=part.name, value=part.raw, + value_charset=_mp_charset(part)) add_field(field) if fl: self.list = fl +def _mp_charset(part): + """the charset of *part*.""" + content_type = part.headers.get("Content-Type", "") + _, options = parse_options_header(content_type) + return options.get("charset") + + # Original version: zope.publisher.browser.FileUpload class FileUpload: '''File upload objects diff --git a/src/ZPublisher/tests/testHTTPRequest.py b/src/ZPublisher/tests/testHTTPRequest.py index 2a148478f5..71e9e204d1 100644 --- a/src/ZPublisher/tests/testHTTPRequest.py +++ b/src/ZPublisher/tests/testHTTPRequest.py @@ -17,6 +17,7 @@ from contextlib import contextmanager from io import BytesIO from unittest.mock import patch +from urllib.parse import quote_plus from AccessControl.tainted import TaintedString from AccessControl.tainted import should_be_tainted @@ -144,8 +145,6 @@ def _makeOne(self, stdin=None, environ=None, response=None, clean=1): class HTTPRequestTests(unittest.TestCase, HTTPRequestFactoryMixin): def _processInputs(self, inputs): - from urllib.parse import quote_plus - # Have the inputs processed, and return a HTTPRequest object # holding the result. # inputs is expected to be a list of (key, value) tuples, no CGI @@ -1346,6 +1345,26 @@ def test_issue_1095(self): self.assertIsInstance(r[0].x, FileUpload) self.assertIsInstance(r[1].x, TaintedString) + def test_field_charset(self): + body = TEST_FIELD_CHARSET_DATA + env = self._makePostEnviron(body) + env["QUERY_STRING"] = "y=" + quote_plus("äöü") + req = self._makeOne(BytesIO(body), env) + req.processInputs() + self.assertEqual(req["x"], "äöü") + self.assertEqual(req["y"], "äöü") + + def test_form_charset(self): + body = ("x=" + quote_plus("äöü", encoding="latin-1")).encode("ASCII") + env = self._makePostEnviron(body) + env["CONTENT_TYPE"] = \ + "application/x-www-form-urlencoded; charset=latin-1" + env["QUERY_STRING"] = "y=" + quote_plus("äöü") + req = self._makeOne(BytesIO(body), env) + req.processInputs() + self.assertEqual(req["x"], "äöü") + self.assertEqual(req["y"], "äöü") + class TestHTTPRequestZope3Views(TestRequestViewsBase): @@ -1444,3 +1463,12 @@ def test_special(self): --12345-- ''' + +TEST_FIELD_CHARSET_DATA = b''' +--12345 +Content-Disposition: form-data; name="x" +Content-Type: text/plain; charset=latin-1 + +%s +--12345-- +''' % 'äöü'.encode("latin-1")