diff --git a/src/lib/utils.ts b/src/lib/utils.ts index c12603f..814feda 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -49,9 +49,19 @@ export function getSurt(url: string) { surt += urlObj.search; for (const [key, value] of urlObj.searchParams.entries()) { if (!value) { + // if no value set, by default the surt contains 'key=' + // however, for compatibility, only want to add a trailing '=' + // if original URL has it. + const keyEncoded = encodeURIComponent(key); const rx = new RegExp(`(?<=[&?])${rxEscape(key)}=(?=&|$)`); + // if original URL does *not* have trailing '=', attempt to remove it below if (!rx.exec(urlLower)) { - surt = surt.replace(rx, key); + // use URI encoded version to match the query arg if key is %-encoded + const rxEncoded = + key === keyEncoded + ? rx + : new RegExp(`(?<=[&?])${rxEscape(keyEncoded)}=(?=&|$)`); + surt = surt.replace(rxEncoded, keyEncoded); } } } diff --git a/test/testUtils.test.ts b/test/testUtils.test.ts index 0685a22..a96992f 100644 --- a/test/testUtils.test.ts +++ b/test/testUtils.test.ts @@ -139,4 +139,15 @@ describe("utils", () => { "com,example)/some/path?*&a=b&c=d&z", ); }); + + test("surt with %-encoded query, trailing = param", () => { + expect(getSurt("https://www.example.com/some/path?a=b&c=d&e^=&z")).toBe( + "com,example)/some/path?a=b&c=d&e%5E=&z", + ); + }); + test("surt with %-encoded query, no trailing = param", () => { + expect(getSurt("https://www.example.com/some/path?a=b&c=d&e^&z")).toBe( + "com,example)/some/path?a=b&c=d&e%5E&z", + ); + }); }); diff --git a/test/testWARCParser.test.ts b/test/testWARCParser.test.ts index 76e6480..e930e17 100644 --- a/test/testWARCParser.test.ts +++ b/test/testWARCParser.test.ts @@ -52,8 +52,8 @@ Multi-Line: Value1\r\n\ Also This\r\n\ \r\n\ Body", - ]) - ) + ]), + ), ); expect(result?.toString()).toBe(`\ HTTP/1.0 200 OK\r @@ -76,8 +76,8 @@ Content-Type: Value\r\n\ Content-Length: 0\r\n\ Bad: multi\nline\r\n\ \r\n", - ]) - ) + ]), + ), ); expect(result?.toString()).toBe(`HTTP/1.0 204 Empty\r Content-Type: Value\r @@ -89,7 +89,7 @@ Bad: multi\r test("StatusAndHeaders test 3", async () => { const parser = new StatusAndHeadersParser(); const result = await parser.parse( - new AsyncIterReader(getReader(["HTTP/1.0 204 None\r\n\r\n"])) + new AsyncIterReader(getReader(["HTTP/1.0 204 None\r\n\r\n"])), ); expect(result?.toString()).toBe("HTTP/1.0 204 None\r\n"); }); @@ -97,7 +97,7 @@ test("StatusAndHeaders test 3", async () => { test("StatusAndHeaders test empty", async () => { const parser = new StatusAndHeadersParser(); const result = await parser.parse( - new AsyncIterReader(getReader(["\r\n\r\n"])) + new AsyncIterReader(getReader(["\r\n\r\n"])), ); expect(result).toBe(null); }); @@ -178,7 +178,7 @@ text\r\n\ software: recorder test\r\n\ format: WARC File Format 1.0\r\n\ json-metadata: {"foo": "bar"}\r\n\ -' +', ); // eslint-disable-next-line @typescript-eslint/no-non-null-assertion -- checked in expect @@ -252,7 +252,7 @@ Content-Length: 0\r\n\ expect(record.warcHeaders.protocol).toBe("WARC/1.0"); expect(record.warcHeader("WARC-Record-ID")).toBe( - "" + "", ); expect(record.warcType).toBe("revisit"); expect(record.warcTargetURI).toBe("http://example.com/"); @@ -260,7 +260,7 @@ Content-Length: 0\r\n\ expect(record.warcRefersToTargetURI).toBe("http://example.com/foo"); expect(record.warcRefersToDate).toBe("1999-01-01T00:00:00Z"); expect(record.warcPayloadDigest).toBe( - "sha1:B6QJ6BNJ3R4B23XXMRKZKHLPGJY2VE4O" + "sha1:B6QJ6BNJ3R4B23XXMRKZKHLPGJY2VE4O", ); expect(record.warcContentType).toBe("application/http; msgtype=response"); expect(record.warcContentLength).toBe(0); @@ -307,7 +307,7 @@ Foo: Bar\r\n\ expect(record).not.toBeNull(); expect(record.warcHeaders.protocol).toBe("WARC/1.0"); expect(record.warcHeader("WARC-Record-ID")).toBe( - "" + "", ); expect(record.warcType).toBe("revisit"); expect(record.warcTargetURI).toBe("http://example.com/"); @@ -315,7 +315,7 @@ Foo: Bar\r\n\ expect(record.warcRefersToTargetURI).toBe("http://example.com/foo"); expect(record.warcRefersToDate).toBe("1999-01-01T00:00:00Z"); expect(record.warcPayloadDigest).toBe( - "sha1:B6QJ6BNJ3R4B23XXMRKZKHLPGJY2VE4O" + "sha1:B6QJ6BNJ3R4B23XXMRKZKHLPGJY2VE4O", ); expect(record.warcContentType).toBe("application/http; msgtype=response"); expect(record.warcContentLength).toBe(54); @@ -374,7 +374,7 @@ Foo: Bar\r\n\ expect(record.warcHeaders.protocol).toBe("WARC/1.0"); expect(record.warcHeader("WARC-Record-ID")).toBe( - "" + "", ); expect(record.warcType).toBe("revisit"); expect(record.warcContentLength).toBe(82); @@ -542,7 +542,7 @@ test("warc1.1 serialize records match", async () => { test("chunked warc read", async () => { const input = fs.createReadStream( - get_warc_path("data/example-iana.org-chunked.warc") + get_warc_path("data/example-iana.org-chunked.warc"), ); const parser = new WARCParser(input); @@ -558,7 +558,7 @@ test("chunked warc read", async () => { // can't read raw data anymore await expect(async () => await record.readFully(false)).rejects.toThrow( - "WARC Record decoding already started, but requesting raw payload" + "WARC Record decoding already started, but requesting raw payload", ); const text = await record.contentText(); @@ -568,7 +568,7 @@ test("chunked warc read", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any -- checking invalid type const busyRecord = record as any as { reader: LimitReader }; await expect(async () => await busyRecord.reader.readFully()).rejects.toThrow( - "WARC Record decoding already started, but requesting raw payload" + "WARC Record decoding already started, but requesting raw payload", ); expect(await record.readFully(true)).not.toBeNull(); @@ -576,7 +576,7 @@ test("chunked warc read", async () => { test("no await catch errors", async () => { const input = fs.createReadStream( - get_warc_path("data/example-iana.org-chunked.warc") + get_warc_path("data/example-iana.org-chunked.warc"), ); const parser = new WARCParser(input); @@ -597,10 +597,10 @@ test("no await catch errors", async () => { const record1 = (await parser.parse())!; expect(record1).not.toBeNull(); await expect(async () => await iter.next()).rejects.toThrow( - "Record already consumed.. Perhaps a promise was not awaited?" + "Record already consumed.. Perhaps a promise was not awaited?", ); await expect(async () => await record0.readline()).rejects.toThrow( - "Record already consumed.. Perhaps a promise was not awaited?" + "Record already consumed.. Perhaps a promise was not awaited?", ); let count = 0; @@ -685,7 +685,7 @@ text\r\n\ ["custom-header", "somevalue"], ["set-cookie", "greeting=hello"], ["set-cookie", "name=world"], - ]) + ]), ); } else { expect(JSON.stringify(headerEntries)).toBe( @@ -693,7 +693,7 @@ text\r\n\ ["content-type", 'text/plain; charset="UTF-8"'], ["custom-header", "somevalue"], ["set-cookie", "greeting=hello, name=world"], - ]) + ]), ); } @@ -718,7 +718,7 @@ text\r\n\ ["content-type", 'text/plain; charset="UTF-8"'], ["custom-header", "somevalue"], ["unicode-header", "%F0%9F%93%81%20text%20%F0%9F%97%84%EF%B8%8F"], - ]) + ]), ); expect(decoder.decode(await record2.readFully())).toBe("more\ntext");