From 6eeef70dd8adeda64fb9884522eca755ed02e6f9 Mon Sep 17 00:00:00 2001 From: Paul Pilone Date: Tue, 23 Aug 2022 22:23:58 -0400 Subject: [PATCH] Hacky fix for DOI searches. --- api/lambdas/search-by-keywords.test.ts | 23 ++++++++++++----------- api/lib/search-query-builder.ts | 14 +++++++++++--- lib/documents-mapping.ts | 3 +++ lib/open-search-client.test.ts | 3 +++ 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/api/lambdas/search-by-keywords.test.ts b/api/lambdas/search-by-keywords.test.ts index 07d87bf9..48e62913 100644 --- a/api/lambdas/search-by-keywords.test.ts +++ b/api/lambdas/search-by-keywords.test.ts @@ -62,8 +62,7 @@ describe('search-by-keywords.handler', () => { const uuid1 = randomUUID(); const uuid2 = randomUUID(); const uuid3 = randomUUID(); - - const doiUUID = randomUUID(); + const uuid4 = randomUUID(); beforeAll(async () => { const doc1 = { @@ -81,28 +80,30 @@ describe('search-by-keywords.handler', () => { uuid: uuid3, dc_title: 'This is a document with author text.', dc_contributor_author: 'Ocean Sea', - dc_identifier_uri: [ - 'http://hdl.handle.net/11329/1029', + dc_identifier_doi: [ 'http://dx.doi.org/10.25607/OBP-561', ], }; + const doc4 = { + uuid: uuid4, + dc_identifier_doi: [ + 'http://dx.doi.org/10.25607/OBP-765', + ], + }; + await osClient.addDocument(esUrl, documentsIndexName, doc1); await osClient.addDocument(esUrl, documentsIndexName, doc2); await osClient.addDocument(esUrl, documentsIndexName, doc3); + await osClient.addDocument(esUrl, documentsIndexName, doc4); await osClient.refreshIndex(esUrl, documentsIndexName); }); afterAll(async () => { - await osClient.deleteByQuery( - esUrl, - documentsIndexName, - { match: { uuid: doiUUID } } - ); - await osClient.deleteByQuery(esUrl, documentsIndexName, { match: { uuid: uuid1 } }); await osClient.deleteByQuery(esUrl, documentsIndexName, { match: { uuid: uuid2 } }); await osClient.deleteByQuery(esUrl, documentsIndexName, { match: { uuid: uuid3 } }); + await osClient.deleteByQuery(esUrl, documentsIndexName, { match: { uuid: uuid4 } }); await osClient.refreshIndex(esUrl, documentsIndexName); }); @@ -199,7 +200,7 @@ describe('search-by-keywords.handler', () => { test('should find documents with the DOI metadata field', (done) => { const proxyEvent = { queryStringParameters: { - keywords: ':dc_identifier_uri:10.25607/OBP-561', + keywords: ':dc_identifier_doi:10.25607/OBP-561', }, }; diff --git a/api/lib/search-query-builder.ts b/api/lib/search-query-builder.ts index 11836812..bb16919f 100644 --- a/api/lib/search-query-builder.ts +++ b/api/lib/search-query-builder.ts @@ -36,7 +36,12 @@ export const nestedQuery = (termPhrase: unknown) => ({ // All special characters: + - = && || > < ! ( ) { } [ ] ^ " ~ * ? : \ / // Characters we currently want to escape: + - = && || ! ( ) { } [ ] : \ / const queryStringSpecialCharacters = /\+|-|=|&{2}|\|{2}|!|\(|\)|{|}|\[|]|:|\/|\\/g; -const encodeQueryStringTermComp = (term: string): string => term.replace(queryStringSpecialCharacters, '\\$&'); +const encodeQueryStringTerm = (term: string, field: string): string => { + const encodedTerm = term.replace(queryStringSpecialCharacters, '\\$&'); + + // FIXME: This is a hack to make doi searches work. Fix this better please. + return field === 'dc_identifier_doi' ? `*${encodedTerm}` : encodedTerm; +}; const formatKeywordComp = (keywordComp: SearchKeywordComps) => { let openSearchOperator; @@ -52,8 +57,11 @@ const formatKeywordComp = (keywordComp: SearchKeywordComps) => { openSearchOperator = 'OR'; } - const escapedKeywordCompTerm = encodeQueryStringTermComp(keywordComp.term); - return `${openSearchOperator} ${keywordComp.field}:(${escapedKeywordCompTerm})`; + const encodedKeywordCompTerm = encodeQueryStringTerm( + keywordComp.term, + keywordComp.field + ); + return `${openSearchOperator} ${keywordComp.field}:(${encodedKeywordCompTerm})`; }; /** diff --git a/lib/documents-mapping.ts b/lib/documents-mapping.ts index cc03ca1b..6995bb57 100644 --- a/lib/documents-mapping.ts +++ b/lib/documents-mapping.ts @@ -85,6 +85,9 @@ export const documentsMapping = { type: 'text', analyzer: 'english', }, + dc_identifier_doi: { + type: 'keyword', + }, }, }, }; diff --git a/lib/open-search-client.test.ts b/lib/open-search-client.test.ts index d8666d21..32cfe512 100644 --- a/lib/open-search-client.test.ts +++ b/lib/open-search-client.test.ts @@ -404,6 +404,9 @@ describe('open-search-client', () => { uuid: { type: 'keyword', }, + dc_identifier_doi: { + type: 'keyword', + }, }, }); });