Skip to content

Commit

Permalink
Merge pull request #412 from NYPL/qa2-refactor-searchathon-fixes
Browse files Browse the repository at this point in the history
QA2: Searchathon fixes
  • Loading branch information
nonword authored Oct 18, 2024
2 parents 75eada0 + 48772b2 commit 2173ce7
Show file tree
Hide file tree
Showing 111 changed files with 411,931 additions and 10,003 deletions.
1 change: 0 additions & 1 deletion config/production.env
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ ENCRYPTED_NYPL_OAUTH_SECRET=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAA
NYPL_CORE_VERSION=v2.21

LOG_LEVEL=info
NAME_QUERIES=true
FEATURES=on-site-edd

SEARCH_ITEMS_SIZE=3
Expand Down
2 changes: 1 addition & 1 deletion config/qa.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Greg built self-hosted qa domain:
ENCRYPTED_ELASTICSEARCH_URI=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAJYwgZMGCSqGSIb3DQEHBqCBhTCBggIBADB9BgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDMIkDoQ9C/cCDCAq1wIBEIBQ+L3OgUGeOW9rs1CWkhpBjwM4LbbVRFIWedqew4UXIeSNMJ8cO9SNe4YGCUIoKwCDYt7W7ip3VtDRRRMVvz6QJw+Eg8ugTMVs2pbNFGNvaAQ=
ENCRYPTED_RESOURCES_INDEX=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAHIwcAYJKoZIhvcNAQcGoGMwYQIBADBcBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDEQrZlNp930LqpZVywIBEIAvpxx39QAHfdY1+Lxv54TnDVW3M5cSHiyTbuhaJgDK/3y+5iHyw6Y0MmtGdgnl5jQ=
ENCRYPTED_RESOURCES_INDEX=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAHIwcAYJKoZIhvcNAQcGoGMwYQIBADBcBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDGjIeRRI3cN5LwfcmgIBEIAvkB3K4sxagpqspNa3b3BnJQgbw6Ic0jDhuXzk8gmrCiGNMR90YjTZmyLj9aVUN3M=
ENCRYPTED_ELASTICSEARCH_API_KEY=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAJ4wgZsGCSqGSIb3DQEHBqCBjTCBigIBADCBhAYJKoZIhvcNAQcBMB4GCWCGSAFlAwQBLjARBAx+kryf2KUmGdBYD9sCARCAV3ygz3eXIdq8JX/wpG9JRWlTNMRcpNE1qT0zNlN4t+ZvXEoedLQa/3p1YjgHw06GIAdA9xtkMV4eH9a1K8uCvjP8XxxNKekcMj59TlResnu9QF3r7pGXuQ==

ENCRYPTED_SCSB_URL=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAH8wfQYJKoZIhvcNAQcGoHAwbgIBADBpBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBKllElmWYLxGOGopQIBEIA8JJyKde/8m8iCJGKR5D8HoTJhXHeyvw9eIDeuUNKiXLfJwoVz+PDAZSxkCQtM9O91zGhXbe3l6Bk1RlYJ
Expand Down
8 changes: 6 additions & 2 deletions lib/api-request.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
* This class wraps request params to ease interpretting the query
**/

// Build regex pattern for matching a phrase fully enclosed in quotes (or smart quotes):
const QUOTE_CHARS = '"\u201C\u201D\u201E\u201F\u2033\u2036'
const IN_QUOTES_PATTERN = new RegExp(`^[${QUOTE_CHARS}][^${QUOTE_CHARS}]+[${QUOTE_CHARS}]$`)

class ApiRequest {
static ADVANCED_SEARCH_PARAMS = ['title', 'subject', 'contributor']
static IDENTIFIER_NUMBER_PARAMS = ['isbn', 'issn', 'lccn', 'oclc']
Expand Down Expand Up @@ -43,10 +47,10 @@ class ApiRequest {
}

/**
* Returns true if query is fully wrapped in quotes
* Returns true if query is fully wrapped in quotes (or smart quotes)
**/
queryIsFullyQuoted () {
return /^"[^"]+"$/.test(this.params.q)
return IN_QUOTES_PATTERN.test(this.params.q)
}

/**
Expand Down
3 changes: 3 additions & 0 deletions lib/elasticsearch/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ const SEARCH_SCOPES = {
series: {
fields: ['seriesStatement.folded']
},
journal_title: {
fields: null
},
callnumber: {
// We do custom field matching for this search-scope
},
Expand Down
78 changes: 49 additions & 29 deletions lib/elasticsearch/elastic-query-builder.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ class ElasticQueryBuilder {
case 'title':
this.buildTitleQuery()
break
case 'journal_title':
this.buildTitleQuery()
this.requireIssuance('urn:biblevel:s')
break
case 'subject':
this.buildSubjectQuery()
break
Expand All @@ -36,6 +40,9 @@ class ElasticQueryBuilder {
this.buildAllQuery()
}

// Add user filters:
this.applyFilters()

// Apply global clauses:
// Hide specific nypl-sources when configured to do so:
this.applyHiddenNyplSources()
Expand Down Expand Up @@ -83,9 +90,6 @@ class ElasticQueryBuilder {
}
}

// Add user filters:
this.applyFilters()

// Lastly, if any identifier-number params are present (lccn=, isbn=, etc),
// add those clauses:
if (this.request.hasIdentifierNumberParam()) {
Expand Down Expand Up @@ -187,6 +191,18 @@ class ElasticQueryBuilder {
this.boostOnItemShelfmarkPrefix()
}

/**
* Require a specific issuance (e.g. require 'urn:biblevel:s' for journal-
* title searches)
*/
requireIssuance (issuance) {
this.query.addFilter({
term: {
'issuance.id': issuance
}
})
}

/**
* Require exact phrase-match on named fields
*/
Expand All @@ -213,8 +229,9 @@ class ElasticQueryBuilder {
prefixMatch('identifierV2.value', q),
termMatch('uri', q),
termMatch('items.idBarcode', q),
termMatch('idIsbn_clean', q),
prefixMatch('items.shelfMark.raw', q)
termMatch('idIsbn.clean', q),
termMatch('idIssn.clean', q),
prefixMatch('items.shelfMark.keywordLowercased', q)
]
}
})
Expand All @@ -233,17 +250,9 @@ class ElasticQueryBuilder {
}
})
} else if (this.request.params.issn) {
this.query.addMust({ term: { idIssn: this.request.params.issn } })
this.query.addMust({ term: { 'idIssn.clean': this.request.params.issn } })
} else if (this.request.params.isbn) {
this.query.addMust({
bool: {
should: [
{ term: { idIsbn: this.request.params.isbn } },
{ term: { idIsbn_clean: this.request.params.isbn } }
],
minimum_should_match: 1
}
})
this.query.addMust({ term: { 'idIsbn.clean': this.request.params.isbn } })
} else if (this.request.params.oclc) {
this.query.addMust({ term: { idOclc: this.request.params.oclc } })
}
Expand All @@ -258,8 +267,8 @@ class ElasticQueryBuilder {
this.query.addMust({
bool: {
should: [
prefixMatch('shelfMark.raw', q),
prefixMatch('items.shelfMark.raw', q)
prefixMatch('shelfMark.keywordLowercased', q),
prefixMatch('items.shelfMark.keywordLowercased', q)
]
}
})
Expand Down Expand Up @@ -350,14 +359,18 @@ class ElasticQueryBuilder {
* Boost bibs with items having exact callnumber match
**/
boostOnItemShelfmarkExact (boost = 50) {
this.query.addShould(termMatch('items.shelfMark.raw', this.request.querySansQuotes(), 50))
this.query.addShould(termMatch('items.shelfMark.keywordLowercased', this.request.querySansQuotes(), boost))
// Specially boost case-sensitive match:
this.query.addShould(termMatch('items.shelfMark.raw', this.request.querySansQuotes(), boost * 2))
}

/**
* Boost bibs with items having prefix callnumber match
**/
boostOnItemShelfmarkPrefix (boost = 10) {
this.query.addShould(prefixMatch('items.shelfMark.raw', boost))
this.query.addShould(prefixMatch('items.shelfMark.keywordLowercased', this.request.querySansQuotes(), boost))
// Specially boost case-sensitive match:
this.query.addShould(prefixMatch('items.shelfMark.raw', this.request.querySansQuotes(), boost * 2))
}

/**
Expand All @@ -367,20 +380,27 @@ class ElasticQueryBuilder {
const q = this.request.querySansQuotes()

this.query.addShoulds([
prefixMatch('identifierV2.value', q, 10),
termMatch('uri', q, 10),
prefixMatch('idIsbn', q, 10),
prefixMatch('identifierV2.value', q, 5),
termMatch('uri', q, 5),

// Tiered matches on isbn:
prefixMatch('idIsbn.clean', q, 10),
termMatch('idIsbn.clean', q, 20),
termMatch('idIsbn', q, 20),

// Tiered matches on issn:
prefixMatch('idIssn.clean', q, 10),
termMatch('idIssn.clean', q, 20),
termMatch('idIssn', q, 20),

prefixMatch('idLccn', q, 10),
prefixMatch('idOclc', q, 10),
prefixMatch('shelfMark.raw', q, 10),
prefixMatch('shelfMark.keywordLowercased', q, 10),
prefixMatch('items.shelfMark.keywordLowercased', q, 10),

// I would like this to be a prefix, but doesn't seem to work:
termMatch('items.identifierV2.value', q),
prefixMatch('items.shelfMark.raw', q)
termMatch('items.identifierV2.value', q)
])

if (/\d{6,}/.test(this.request.querySansQuotes())) {
this.query.addShould(prefixMatch('idIsbn_clean', q))
}
}

/**
Expand Down
2 changes: 1 addition & 1 deletion lib/elasticsearch/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ const termMatch = (field, value, boost = 1) => {
* For example:
* const body = { query: phraseMatch('title', 'Importance of Being' }
*/
const phraseMatch = (field, value, boost = 1) => {
const phraseMatch = (field, value, boost = 0) => {
return {
match_phrase: {
[field]: namedQuery({
Expand Down
7 changes: 5 additions & 2 deletions lib/jsonld_serializers.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ class JsonLdItemSerializer extends JsonLdSerializer {
}
})

// If updatedAt (int) found, add updatedAtDate (Date string)
if (stmts.updatedAt && /^\d+$/.test(stmts.updatedAt)) {
stmts.updatedAtDate = (new Date(stmts.updatedAt)).toISOString()
}

return stmts
}

Expand Down Expand Up @@ -228,8 +233,6 @@ class ResourceSerializer extends JsonLdItemSerializer {
})
}

stmts.suppressed = this.body.suppressed === true

if (this.body.items) {
stmts.items = this.body.items
// Amend items to include source identifier (e.g. urn:SierraNypl:1234, urn:RecapCul:4567)
Expand Down
6 changes: 4 additions & 2 deletions lib/load-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ module.exports.loadConfig = async () => {

// ECS task definition is using NODE_ENV, so we'll support that too for now
const envTag = (process.env.ENV || process.env.NODE_ENV)
const env = envTag &&
['local', 'qa', 'qa-new-domain', 'production'].includes(envTag)
let env = envTag &&
['local', 'qa', 'qa2', 'production'].includes(envTag)
? envTag
: 'production'
// ECS task definition for qa2 has NODE_ENV=qa2, so translate that to qa:
env = env === 'qa2' ? 'qa' : env
const envPath = `config/${env}.env`

// Load env vars
Expand Down
20 changes: 18 additions & 2 deletions lib/resources.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,17 @@ const SORT_FIELDS = {
}

// The following fields can be excluded from ES responses because we don't pass them to client:
const EXCLUDE_FIELDS = ['uris', '*_packed', '*_sort', 'items.*_packed', 'contentsTitle']
const EXCLUDE_FIELDS = [
'uris',
'*_packed',
'*_sort',
'items.*_packed',
'contentsTitle',
'suppressed',
// Hide contributor and creator transformed fields:
'*WithoutDates',
'*Normalized'
]

// Configure controller-wide parameter parsing:
const parseSearchParams = function (params) {
Expand Down Expand Up @@ -111,6 +121,12 @@ module.exports = function (app, _private = null) {
include_item_aggregations: { type: 'boolean', default: true }
})

// Validate uri:
const { id, nyplSource } = NyplSourceMapper.instance().splitIdentifier(params.uri)
if (!id || !nyplSource) {
throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`)
}

// If we need to return itemAggregations or filter on item_status,
// then we need to pre-retrieve SCSB item statuses to incorporate them into
// aggregations and filters.
Expand Down Expand Up @@ -211,7 +227,7 @@ module.exports = function (app, _private = null) {
.then((resp) => {
// need to check that the query actually found an entry
if (!resp.data) {
throw new errors.NotFoundError('Record not found')
throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`)
} else {
return resp.data
}
Expand Down
19 changes: 16 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
"sinon": "^17.0.1",
"standard": "^17.0.1",
"supertest": "^7.0.0",
"table": "^6.8.2"
"table": "^6.8.2",
"yaml": "^2.6.0"
},
"scripts": {
"test": "./node_modules/.bin/standard --env mocha && NODE_ENV=test ./node_modules/.bin/mocha test --exit",
Expand Down Expand Up @@ -58,7 +59,7 @@
"/logs-to-tsv/*"
]
},
"version": "1.1.3",
"version": "1.1.4",
"repository": {
"type": "git",
"url": "https://github.com/NYPL/discovery-api.git"
Expand Down
2 changes: 2 additions & 0 deletions swagger.v1.1.x.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"enum": [
"all",
"title",
"journal_title",
"contributor",
"subject",
"series",
Expand Down Expand Up @@ -287,6 +288,7 @@
"enum": [
"all",
"title",
"journal_title",
"contributor",
"subject",
"series",
Expand Down
4 changes: 4 additions & 0 deletions test/api-request.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ describe('ApiRequest', function () {
request = ApiRequest.fromParams({ q: '"toast"' })
expect(request.queryIsFullyQuoted()).to.eq(true)

// Test smart-quotes, the great scorge
request = ApiRequest.fromParams({ q: '“toast“' })
expect(request.queryIsFullyQuoted()).to.eq(true)

request = ApiRequest.fromParams({ q: '"toast" and jam' })
expect(request.queryIsFullyQuoted()).to.eq(false)
})
Expand Down
Loading

0 comments on commit 2173ce7

Please sign in to comment.