Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QA2: Searchathon fixes #412

Merged
merged 11 commits into from
Oct 18, 2024
1 change: 0 additions & 1 deletion config/production.env
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ ENCRYPTED_NYPL_OAUTH_SECRET=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAA
NYPL_CORE_VERSION=v2.21

LOG_LEVEL=info
NAME_QUERIES=true
FEATURES=on-site-edd

SEARCH_ITEMS_SIZE=3
Expand Down
2 changes: 1 addition & 1 deletion config/qa.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Greg built self-hosted qa domain:
ENCRYPTED_ELASTICSEARCH_URI=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAJYwgZMGCSqGSIb3DQEHBqCBhTCBggIBADB9BgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDMIkDoQ9C/cCDCAq1wIBEIBQ+L3OgUGeOW9rs1CWkhpBjwM4LbbVRFIWedqew4UXIeSNMJ8cO9SNe4YGCUIoKwCDYt7W7ip3VtDRRRMVvz6QJw+Eg8ugTMVs2pbNFGNvaAQ=
ENCRYPTED_RESOURCES_INDEX=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAHIwcAYJKoZIhvcNAQcGoGMwYQIBADBcBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDEQrZlNp930LqpZVywIBEIAvpxx39QAHfdY1+Lxv54TnDVW3M5cSHiyTbuhaJgDK/3y+5iHyw6Y0MmtGdgnl5jQ=
ENCRYPTED_RESOURCES_INDEX=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAHIwcAYJKoZIhvcNAQcGoGMwYQIBADBcBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDGjIeRRI3cN5LwfcmgIBEIAvkB3K4sxagpqspNa3b3BnJQgbw6Ic0jDhuXzk8gmrCiGNMR90YjTZmyLj9aVUN3M=
ENCRYPTED_ELASTICSEARCH_API_KEY=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAJ4wgZsGCSqGSIb3DQEHBqCBjTCBigIBADCBhAYJKoZIhvcNAQcBMB4GCWCGSAFlAwQBLjARBAx+kryf2KUmGdBYD9sCARCAV3ygz3eXIdq8JX/wpG9JRWlTNMRcpNE1qT0zNlN4t+ZvXEoedLQa/3p1YjgHw06GIAdA9xtkMV4eH9a1K8uCvjP8XxxNKekcMj59TlResnu9QF3r7pGXuQ==

ENCRYPTED_SCSB_URL=AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAH8wfQYJKoZIhvcNAQcGoHAwbgIBADBpBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBKllElmWYLxGOGopQIBEIA8JJyKde/8m8iCJGKR5D8HoTJhXHeyvw9eIDeuUNKiXLfJwoVz+PDAZSxkCQtM9O91zGhXbe3l6Bk1RlYJ
Expand Down
8 changes: 6 additions & 2 deletions lib/api-request.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
* This class wraps request params to ease interpretting the query
**/

// Build regex pattern for matching a phrase fully enclosed in quotes (or smart quotes):
const QUOTE_CHARS = '"\u201C\u201D\u201E\u201F\u2033\u2036'
const IN_QUOTES_PATTERN = new RegExp(`^[${QUOTE_CHARS}][^${QUOTE_CHARS}]+[${QUOTE_CHARS}]$`)

class ApiRequest {
static ADVANCED_SEARCH_PARAMS = ['title', 'subject', 'contributor']
static IDENTIFIER_NUMBER_PARAMS = ['isbn', 'issn', 'lccn', 'oclc']
Expand Down Expand Up @@ -43,10 +47,10 @@ class ApiRequest {
}

/**
* Returns true if query is fully wrapped in quotes
* Returns true if query is fully wrapped in quotes (or smart quotes)
**/
queryIsFullyQuoted () {
return /^"[^"]+"$/.test(this.params.q)
return IN_QUOTES_PATTERN.test(this.params.q)
}

/**
Expand Down
3 changes: 3 additions & 0 deletions lib/elasticsearch/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ const SEARCH_SCOPES = {
series: {
fields: ['seriesStatement.folded']
},
journal_title: {
fields: null
},
callnumber: {
fields: null
},
Expand Down
78 changes: 49 additions & 29 deletions lib/elasticsearch/elastic-query-builder.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ class ElasticQueryBuilder {
case 'title':
this.buildTitleQuery()
break
case 'journal_title':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great. Should we ticket the work to remove that transformation from RC?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, although no rush.

this.buildTitleQuery()
this.requireIssuance('urn:biblevel:s')
break
case 'subject':
this.buildSubjectQuery()
break
Expand All @@ -36,6 +40,9 @@ class ElasticQueryBuilder {
this.buildAllQuery()
}

// Add user filters:
this.applyFilters()

// Apply global clauses:
// Hide specific nypl-sources when configured to do so:
this.applyHiddenNyplSources()
Expand Down Expand Up @@ -83,9 +90,6 @@ class ElasticQueryBuilder {
}
}

// Add user filters:
this.applyFilters()

// Lastly, if any identifier-number params are present (lccn=, isbn=, etc),
// add those clauses:
if (this.request.hasIdentifierNumberParam()) {
Expand Down Expand Up @@ -187,6 +191,18 @@ class ElasticQueryBuilder {
this.boostOnItemShelfmarkPrefix()
}

/**
* Require a specific issuance (e.g. require 'urn:biblevel:s' for journal-
* title searches)
*/
requireIssuance (issuance) {
this.query.addFilter({
term: {
'issuance.id': issuance
}
})
}

/**
* Require exact phrase-match on named fields
*/
Expand All @@ -213,8 +229,9 @@ class ElasticQueryBuilder {
prefixMatch('identifierV2.value', q),
termMatch('uri', q),
termMatch('items.idBarcode', q),
termMatch('idIsbn_clean', q),
prefixMatch('items.shelfMark.raw', q)
termMatch('idIsbn.clean', q),
termMatch('idIssn.clean', q),
prefixMatch('items.shelfMark.keywordLowercased', q)
]
}
})
Expand All @@ -233,17 +250,9 @@ class ElasticQueryBuilder {
}
})
} else if (this.request.params.issn) {
this.query.addMust({ term: { idIssn: this.request.params.issn } })
this.query.addMust({ term: { 'idIssn.clean': this.request.params.issn } })
} else if (this.request.params.isbn) {
this.query.addMust({
bool: {
should: [
{ term: { idIsbn: this.request.params.isbn } },
{ term: { idIsbn_clean: this.request.params.isbn } }
],
minimum_should_match: 1
}
})
this.query.addMust({ term: { 'idIsbn.clean': this.request.params.isbn } })
} else if (this.request.params.oclc) {
this.query.addMust({ term: { idOclc: this.request.params.oclc } })
}
Expand All @@ -258,8 +267,8 @@ class ElasticQueryBuilder {
this.query.addMust({
bool: {
should: [
prefixMatch('shelfMark.raw', q),
prefixMatch('items.shelfMark.raw', q)
prefixMatch('shelfMark.keywordLowercased', q),
prefixMatch('items.shelfMark.keywordLowercased', q)
]
}
})
Expand Down Expand Up @@ -350,14 +359,18 @@ class ElasticQueryBuilder {
* Boost bibs with items having exact callnumber match
**/
boostOnItemShelfmarkExact (boost = 50) {
this.query.addShould(termMatch('items.shelfMark.raw', this.request.querySansQuotes(), 50))
this.query.addShould(termMatch('items.shelfMark.keywordLowercased', this.request.querySansQuotes(), boost))
// Specially boost case-sensitive match:
this.query.addShould(termMatch('items.shelfMark.raw', this.request.querySansQuotes(), boost * 2))
}

/**
* Boost bibs with items having prefix callnumber match
**/
boostOnItemShelfmarkPrefix (boost = 10) {
this.query.addShould(prefixMatch('items.shelfMark.raw', boost))
this.query.addShould(prefixMatch('items.shelfMark.keywordLowercased', this.request.querySansQuotes(), boost))
// Specially boost case-sensitive match:
this.query.addShould(prefixMatch('items.shelfMark.raw', this.request.querySansQuotes(), boost * 2))
}

/**
Expand All @@ -367,20 +380,27 @@ class ElasticQueryBuilder {
const q = this.request.querySansQuotes()

this.query.addShoulds([
prefixMatch('identifierV2.value', q, 10),
termMatch('uri', q, 10),
prefixMatch('idIsbn', q, 10),
prefixMatch('identifierV2.value', q, 5),
termMatch('uri', q, 5),

// Tiered matches on isbn:
prefixMatch('idIsbn.clean', q, 10),
termMatch('idIsbn.clean', q, 20),
termMatch('idIsbn', q, 20),

// Tiered matches on issn:
prefixMatch('idIssn.clean', q, 10),
termMatch('idIssn.clean', q, 20),
termMatch('idIssn', q, 20),

prefixMatch('idLccn', q, 10),
prefixMatch('idOclc', q, 10),
prefixMatch('shelfMark.raw', q, 10),
prefixMatch('shelfMark.keywordLowercased', q, 10),
prefixMatch('items.shelfMark.keywordLowercased', q, 10),

// I would like this to be a prefix, but doesn't seem to work:
termMatch('items.identifierV2.value', q),
prefixMatch('items.shelfMark.raw', q)
termMatch('items.identifierV2.value', q)
])

if (/\d{6,}/.test(this.request.querySansQuotes())) {
this.query.addShould(prefixMatch('idIsbn_clean', q))
}
}

/**
Expand Down
2 changes: 1 addition & 1 deletion lib/elasticsearch/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ const termMatch = (field, value, boost = 1) => {
* Given a field name and a value, returns a plainobject representing a ES
* `match_phrase` clause that can be inserted into a ES query
*/
const phraseMatch = (field, value, boost = 1) => {
const phraseMatch = (field, value, boost = 0) => {
return {
match_phrase: {
[field]: namedQuery({
Expand Down
7 changes: 5 additions & 2 deletions lib/jsonld_serializers.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ class JsonLdItemSerializer extends JsonLdSerializer {
}
})

// If updatedAt (int) found, add updatedAtDate (Date string)
if (stmts.updatedAt && /^\d+$/.test(stmts.updatedAt)) {
stmts.updatedAtDate = (new Date(stmts.updatedAt)).toISOString()
}

return stmts
}

Expand Down Expand Up @@ -228,8 +233,6 @@ class ResourceSerializer extends JsonLdItemSerializer {
})
}

stmts.suppressed = this.body.suppressed === true

if (this.body.items) {
stmts.items = this.body.items
// Amend items to include source identifier (e.g. urn:SierraNypl:1234, urn:RecapCul:4567)
Expand Down
6 changes: 4 additions & 2 deletions lib/load-config.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ module.exports.loadConfig = async () => {

// ECS task definition is using NODE_ENV, so we'll support that too for now
const envTag = (process.env.ENV || process.env.NODE_ENV)
const env = envTag &&
['local', 'qa', 'qa-new-domain', 'production'].includes(envTag)
let env = envTag &&
['local', 'qa', 'qa2', 'production'].includes(envTag)
? envTag
: 'production'
// ECS task definition for qa2 has NODE_ENV=qa2, so translate that to qa:
env = env === 'qa2' ? 'qa' : env
const envPath = `config/${env}.env`

// Load env vars
Expand Down
20 changes: 18 additions & 2 deletions lib/resources.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,17 @@ const SORT_FIELDS = {
}

// The following fields can be excluded from ES responses because we don't pass them to client:
const EXCLUDE_FIELDS = ['uris', '*_packed', '*_sort', 'items.*_packed', 'contentsTitle']
const EXCLUDE_FIELDS = [
'uris',
'*_packed',
'*_sort',
'items.*_packed',
'contentsTitle',
'suppressed',
// Hide contributor and creator transformed fields:
'*WithoutDates',
'*Normalized'
]

// Configure controller-wide parameter parsing:
const parseSearchParams = function (params) {
Expand Down Expand Up @@ -111,6 +121,12 @@ module.exports = function (app, _private = null) {
include_item_aggregations: { type: 'boolean', default: true }
})

// Validate uri:
const { id, nyplSource } = NyplSourceMapper.instance().splitIdentifier(params.uri)
if (!id || !nyplSource) {
throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`)
}

// If we need to return itemAggregations or filter on item_status,
// then we need to pre-retrieve SCSB item statuses to incorporate them into
// aggregations and filters.
Expand Down Expand Up @@ -211,7 +227,7 @@ module.exports = function (app, _private = null) {
.then((resp) => {
// need to check that the query actually found an entry
if (!resp.data) {
throw new errors.NotFoundError('Record not found')
throw new errors.NotFoundError(`Record not found: bibs/${nyplSource}/${id}`)
} else {
return resp.data
}
Expand Down
19 changes: 16 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
"sinon": "^17.0.1",
"standard": "^17.0.1",
"supertest": "^7.0.0",
"table": "^6.8.2"
"table": "^6.8.2",
"yaml": "^2.6.0"
},
"scripts": {
"test": "./node_modules/.bin/standard --env mocha && NODE_ENV=test ./node_modules/.bin/mocha test --exit",
Expand Down Expand Up @@ -58,7 +59,7 @@
"/logs-to-tsv/*"
]
},
"version": "1.1.3",
"version": "1.1.4",
"repository": {
"type": "git",
"url": "https://github.com/NYPL/discovery-api.git"
Expand Down
2 changes: 2 additions & 0 deletions swagger.v1.1.x.json
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"enum": [
"all",
"title",
"journal_title",
"contributor",
"subject",
"series",
Expand Down Expand Up @@ -287,6 +288,7 @@
"enum": [
"all",
"title",
"journal_title",
"contributor",
"subject",
"series",
Expand Down
4 changes: 4 additions & 0 deletions test/api-request.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ describe('ApiRequest', function () {
request = ApiRequest.fromParams({ q: '"toast"' })
expect(request.queryIsFullyQuoted()).to.eq(true)

// Test smart-quotes, the great scorge
request = ApiRequest.fromParams({ q: '“toast“' })
expect(request.queryIsFullyQuoted()).to.eq(true)

request = ApiRequest.fromParams({ q: '"toast" and jam' })
expect(request.queryIsFullyQuoted()).to.eq(false)
})
Expand Down
Loading
Loading