Skip to content

Commit

Permalink
Package updates, use RCI source-mapper
Browse files Browse the repository at this point in the history
Update vulnerable packages. Also updates where we pull nypl-source-mapper util
from - previously discovery-store-models (deprecated), now RCI. The
implementation in RCI is more secure and has a major API change in that it
works async, so the integration here had to change in several places (sync
functions become async, and related fallout).

https://newyorkpubliclibrary.atlassian.net/browse/SCC-4328
  • Loading branch information
nonword committed Oct 25, 2024
1 parent c2eb46e commit 920798e
Show file tree
Hide file tree
Showing 101 changed files with 447,499 additions and 403,658 deletions.
7 changes: 4 additions & 3 deletions lib/availability_resolver.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const scsbClient = require('./scsb-client')
const logger = require('./logger')
const ResourceSerializer = require('./jsonld_serializers').ResourceSerializer
const NyplSourceMapper = require('discovery-store-models/lib/nypl-source-mapper')
const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper')
const { nonRecapItemStatusAggregation } = require('./elasticsearch/client')
const { deepValue, isInRecap } = require('./util')

Expand Down Expand Up @@ -82,7 +82,7 @@ class AvailabilityResolver {
* If response contains an item status aggregation and appears to cover some
* ReCAP items, updates response to accurately reflect ReCAP statuses
*/
_fixItemStatusAggregation (options) {
async _fixItemStatusAggregation (options) {
const resp = this.elasticSearchResponse

// Return early if there are no item aggregations to fix:
Expand All @@ -97,7 +97,8 @@ class AvailabilityResolver {
const bnum = resp.hits.hits[0]?._id
if (!bnum) return Promise.resolve()

const { nyplSource } = NyplSourceMapper.instance().splitIdentifier(bnum)
const nyplSourceMapper = await NyplSourceMapper.instance()
const { nyplSource } = nyplSourceMapper.splitIdentifier(bnum)

// Get total number of items:
const numItems = (resp.hits.hits[0]._source.numItemsTotal || resp.hits.hits[0]._source.numItems)[0]
Expand Down
154 changes: 42 additions & 112 deletions lib/jsonld_serializers.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use strict'

const locations = require('@nypl/nypl-core-objects')('by-sierra-location')
const NyplSourceMapper = require('discovery-store-models/lib/nypl-source-mapper')
const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper')

const util = require('./util.js')
const logger = require('./logger')
Expand All @@ -14,15 +14,16 @@ class JsonLdSerializer {
this.options = options || {}
}

format () {
async format () {
const base = {}
if (this.options.root) {
// If expandContext, dump full context doc into result, otherwise just give URL
if (this.options.expandContext) base['@context'] = util.buildJsonLdContext({})
else base['@context'] = `${this.options.baseUrl}/context_all.jsonld`
}
base['@type'] = this.type
Object.assign(base, this.statements())
const statements = await this.statements()
Object.assign(base, statements)
return base
}
}
Expand All @@ -45,7 +46,7 @@ class JsonLdListSerializer extends JsonLdSerializer {
this.type = 'itemList'
}

statements () {
async statements () {
return Object.assign({}, {
itemListElement: this.items.map(this.itemSerializer.bind(this))
}, (this.options.extraRootProperties || {}))
Expand All @@ -67,7 +68,7 @@ class JsonLdItemSerializer extends JsonLdSerializer {
return `res:${this.body.uri}`
}

statements () {
async statements () {
const stmts = { '@id': this.resultId() }

// Takes any kind of value (array, object, string) and ensures .id props are formatted as '@id'
Expand Down Expand Up @@ -221,8 +222,8 @@ class ResourceSerializer extends JsonLdItemSerializer {
this.type = [...new Set([item.type].concat('nypl:Resource').flat())]
}

statements () {
const stmts = JsonLdItemSerializer.prototype.statements.call(this)
async statements () {
const stmts = await JsonLdItemSerializer.prototype.statements.call(this)

if (this.body.parentUri) stmts.memberOf = [util.eachValue(this.body.parentUri, (id) => ({ '@type': 'nypl:Resource', '@id': `res:${id}` }))].flat()

Expand All @@ -234,12 +235,16 @@ class ResourceSerializer extends JsonLdItemSerializer {
}

if (this.body.items) {
stmts.items = this.body.items
// Amend items to include source identifier (e.g. urn:SierraNypl:1234, urn:RecapCul:4567)
.map(ItemResourceSerializer.addSourceIdentifier)
.map((item) => {
return (new ItemResourceSerializer(item)).statements()
})
stmts.items = await Promise.all(
this.body.items
// Amend items to include source identifier (e.g. urn:SierraNypl:1234, urn:RecapCul:4567)
.map(ItemResourceSerializer.addSourceIdentifier)
)
stmts.items = await Promise.all(
stmts.items
.map((item) => new ItemResourceSerializer(item))
.map((itemSerializer) => itemSerializer.statements())
)
}

if (this.body.itemAggregations) {
Expand Down Expand Up @@ -321,8 +326,8 @@ class ItemResourceSerializer extends JsonLdItemSerializer {
this.type = 'bf:Item'
}

statements () {
const stmts = JsonLdItemSerializer.prototype.statements.call(this)
async statements () {
const stmts = await JsonLdItemSerializer.prototype.statements.call(this)

if (stmts.identifier) {
// Add idNyplSourceId convenience property by parsing identifiers that match urn:[source]:[id]
Expand Down Expand Up @@ -354,11 +359,11 @@ class ItemResourceSerializer extends JsonLdItemSerializer {
// urn:RecapCul:4567
// urn:RecapPul:6789
// urn:RecapHl:87654321
static addSourceIdentifier (item) {
static async addSourceIdentifier (item) {
// Ensure identifiers array exists:
item.identifier = item.identifier || []

const { id, nyplSource, type } = NyplSourceMapper.instance().splitIdentifier(item.uri)
const nyplSourceMapper = await NyplSourceMapper.instance()
const { id, nyplSource, type } = nyplSourceMapper.splitIdentifier(item.uri)
if (type === 'item') {
// Build prefix nyplSource as camel case
const sourceIdentifierPrefix = ItemResourceSerializer.sourceIdentifierPrefixByNyplSource(nyplSource)
Expand Down Expand Up @@ -410,8 +415,15 @@ class ResourceResultsSerializer extends SearchResultsSerializer {
return `resources:${result.uri}`
}

static serialize (resp, opts) {
const results = resp.hits.hits.map((h) => ({ score: h._score, record: ResourceSerializer.serialize(h._source), matched_queries: h.matched_queries }))
static async serialize (resp, opts) {
const results = await Promise.all(
resp.hits.hits.map((h) => {
// Serialize the bib record:
return ResourceSerializer.serialize(h._source)
// Serialize the "result" record wrapping the bib record:
.then((record) => ({ score: h._score, record, matched_queries: h.matched_queries }))
})
)
const totalResults = typeof resp.hits.total?.value === 'number' ? resp.hits.total.value : resp.hits.total
opts = Object.assign({ extraRootProperties: { totalResults } }, opts)
return (new ResourceResultsSerializer(results, opts)).format()
Expand All @@ -432,27 +444,31 @@ class AggregationsSerializer extends JsonLdListSerializer {
return `field:${result.field}`
}

static serialize (resp, options) {
static async serialize (resp, options) {
if ((typeof options) === 'undefined') options = {}

const items = Object.keys(resp.aggregations).map((field) => AggregationSerializer.serialize(Object.assign({ id: field }, resp.aggregations[field]), options))
const items = await Promise.all(
Object.entries(resp.aggregations)
// Add id property to body of aggregation:
.map(([id, agg]) => Object.assign({ id }, agg))
.map((agg) => AggregationSerializer.serialize(agg, options))
)
return (new AggregationsSerializer(items, Object.assign({ extraRootProperties: { totalResults: resp.hits.total } }, options))).format()
}
}

class AggregationSerializer extends JsonLdItemSerializer {
constructor (item, options) {
super(item, options)
// Serialize both the most general type (Resource) as well as any resource-specific type (Collection, Component, Capture, etc)
this.type = 'nypl:Aggregation'
}

resultId () {
return `res:${this.body.id}`
}

statements () {
const stmts = JsonLdItemSerializer.prototype.statements.call(this)
async statements () {
const stmts = await JsonLdItemSerializer.prototype.statements.call(this)

stmts.field = this.body.id
const field = this.body.id
Expand Down Expand Up @@ -486,90 +502,4 @@ class AggregationSerializer extends JsonLdItemSerializer {
}
}

/*
* Search Results: Agents
*/

class AgentResultsSerializer extends SearchResultsSerializer {
constructor (items, opts) {
super(items, opts)
this.resultType = 'nypl:Person'
}

resultId (result) {
return `agents:${result.uri}`
}

static serialize (resp) {
const results = []
const hits = resp.hits.hits
hits.forEach(function (h) {
const serialized = AgentSerializer.serialize(h._source)
results.push(serialized)
})
return (new AgentResultsSerializer(results, { extraRootProperties: { totalResults: resp.hits.total } })).format()
}
}
/*
* Agents
*/

class AgentSerializer extends JsonLdItemSerializer {
constructor (item, options) {
super(item, options)

this.type = ['edm:Agent']
let foafType = 'foaf:Person'
if (item.type === 'Meeting') foafType = 'foaf:Group'
if (item.type === 'Coporation') foafType = 'foaf:Organization'
if (item.type === 'Organization') foafType = 'foaf:Organization'
this.type.push(foafType)
}

resultId () {
return `agents:${this.body.uri}`
}

statements () {
const h = JsonLdItemSerializer.parsePackedStatements(this.body)

const stmts = JsonLdItemSerializer.prototype.statements.call(this)

if (h.label) stmts.prefLabel = h.label

if (h.dobString) stmts.birthDate = h.dobString
if (h.dobYear) stmts.birthYear = h.dobYear
if (h.dobDecade) stmts.birthDecade = h.dobDecade
if (h.dodString) stmts.deathDate = h.dodString
if (h.dodYear) stmts.deathYear = h.dodYear
if (h.dodDecade) stmts.deathDecade = h.dodDecade

stmts.topFiveTermsString = h.topFiveTerms
stmts.topFiveRolesString = h.topFiveRoles

if (h.description) stmts.description = h.description
if (h.viaf) stmts.uriViaf = 'viaf:' + h.viaf
if (h.wikidata) stmts.uriWikidata = 'wikidata:' + h.wikidata
if (h.lc) stmts.uriLc = 'lc:' + h.lc
if (h.dbpedia) stmts.uriDbpedia = 'dbpedia:' + h.dbpedia
if (h.depiction) stmts.depiction = h.depiction
if (h.wikipedia) stmts.wikipedia = 'https://wikipedia.org/wiki/' + h.wikipedia
if (h.label) stmts.prefLabel = h.label
if (h.useCount) stmts.useCount = h.useCount
if (h.score) stmts.searchResultScore = h.score

return stmts
}

static serialize (resp, options) {
if (resp.identifier) {
let bnum = null
if ((bnum = resp.identifier.filter((i) => i.match(/^urn:bnum:/))) && (bnum = bnum[0]) && (bnum = bnum.split(':')) && (bnum = bnum[bnum.length - 1])) {
resp.depiction = `https://s3.amazonaws.com/data.nypl.org/bookcovers/${bnum}_ol.jpg`
}
}
return (new AgentSerializer(resp, options)).format()
}
}

module.exports = { JsonLdSerializer, ResourceSerializer, ItemResourceSerializer, ItemResultsSerializer, ResourceResultsSerializer, AggregationsSerializer, AgentResultsSerializer, AgentSerializer, AggregationSerializer }
module.exports = { JsonLdSerializer, ResourceSerializer, ItemResourceSerializer, ItemResultsSerializer, ResourceResultsSerializer, AggregationsSerializer, AggregationSerializer }
8 changes: 3 additions & 5 deletions lib/ownership_determination.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
const NyplSourceMapper = require('discovery-store-models/lib/nypl-source-mapper')
// NYPL item ids start with an 'i'
const NYPL_ITEM_ID_PATTERN = /^i\d+/

// This is based on uri now but should be changed to use the 'item.owner' field once it's more reliably serialized.
// This just talks about ownership, not recap vs non-recap
const isItemNyplOwned = (item) => {
const { nyplSource } = NyplSourceMapper.instance().splitIdentifier(item.uri)
return nyplSource === 'sierra-nypl'
return NYPL_ITEM_ID_PATTERN.test(item?.uri)
}

module.exports = { isItemNyplOwned }
21 changes: 7 additions & 14 deletions lib/resources.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
const NyplSourceMapper = require('discovery-store-models/lib/nypl-source-mapper')
const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper')
const scsbClient = require('./scsb-client')

const ResourceResultsSerializer = require('./jsonld_serializers.js').ResourceResultsSerializer
Expand Down Expand Up @@ -104,7 +104,7 @@ module.exports = function (app, _private = null) {
app.resources = {}

// Get a single resource:
app.resources.findByUri = function (params, opts = {}, request) {
app.resources.findByUri = async function (params, opts = {}, request) {
// Parse all params we support:
params = parseParams(params, {
all_items: { type: 'boolean', default: false },
Expand All @@ -122,7 +122,8 @@ module.exports = function (app, _private = null) {
})

// Validate uri:
const { id, nyplSource } = NyplSourceMapper.instance().splitIdentifier(params.uri)
const nyplSourceMapper = await NyplSourceMapper.instance()
const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri)
if (!id || !nyplSource) {
throw new errors.InvalidParameterError(`Invalid bnum: ${params.uri}`)
}
Expand Down Expand Up @@ -194,8 +195,6 @@ module.exports = function (app, _private = null) {
app.logger.debug('Resources#findByUri', body)
return app.esClient.search(body)
.then((resp) => {
resp = resp.body

// Mindfully throw errors for known issues:
if (!resp || !resp.hits) {
throw new Error('Error connecting to index')
Expand All @@ -218,9 +217,10 @@ module.exports = function (app, _private = null) {
}

// Get a single raw annotated-marc resource:
app.resources.annotatedMarc = function (params, opts) {
app.resources.annotatedMarc = async function (params, opts) {
// Convert discovery id to nyplSource and un-prefixed id:
const { id, nyplSource } = NyplSourceMapper.instance().splitIdentifier(params.uri)
const nyplSourceMapper = await NyplSourceMapper.instance()
const { id, nyplSource } = nyplSourceMapper.splitIdentifier(params.uri)

app.logger.debug('Resources#annotatedMarc', { id, nyplSource })
return makeNyplDataApiClient().get(`bibs/${nyplSource}/${id}`)
Expand Down Expand Up @@ -259,8 +259,6 @@ module.exports = function (app, _private = null) {
app.logger.debug('Resources#itemsByFilter', body)
return app.esClient.search(body)
.then((resp) => {
resp = resp.body

if (!resp || !resp.hits || resp.hits.total === 0) return Promise.reject(new Error('No matching items'))
resp = new LocationLabelUpdater(resp).responseWithUpdatedLabels()
// Convert this ES bibs response into an array of flattened items:
Expand Down Expand Up @@ -634,7 +632,6 @@ module.exports = function (app, _private = null) {

return app.esClient.search(body)
.then((resp) => {
resp = resp.body
const massagedResponse = new ResponseMassager(resp)
return massagedResponse.massagedResponse(request)
.catch((e) => {
Expand Down Expand Up @@ -689,8 +686,6 @@ module.exports = function (app, _private = null) {
app.logger.debug('Resources#aggregations:', body)
return app.esClient.search(body)
.then((resp) => {
resp = resp.body

// Transform response slightly before serialization:
resp.aggregations = Object.keys(resp.aggregations)
.reduce((aggs, field) => {
Expand Down Expand Up @@ -735,8 +730,6 @@ module.exports = function (app, _private = null) {
app.logger.debug('Resources#aggregation:', body)
return app.esClient.search(body)
.then((resp) => {
resp = resp.body

// If it's nested, it will be in our special '_nested' prop:
resp = resp.aggregations[params.field]._nested || resp.aggregations[params.field]
resp.id = params.field
Expand Down
7 changes: 4 additions & 3 deletions lib/scsb-client.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
const scsbRestClient = require('@nypl/scsb-rest-client')
const NyplSourceMapper = require('discovery-store-models/lib/nypl-source-mapper')
const NyplSourceMapper = require('research-catalog-indexer/lib/utils/nypl-source-mapper')
const logger = require('./logger')
const { bNumberWithCheckDigit } = require('./util')

Expand Down Expand Up @@ -72,9 +72,10 @@ clientWrapper.recapCustomerCodeByBarcode = (barcode) => {
clientWrapper.getItemsAvailabilityForBarcodes = (barcodes) => scsbClient().getItemsAvailabilityForBarcodes(barcodes)

// bnum is a plain bnum without padding such as we use in the DiscoveryAPI
clientWrapper.getItemsAvailabilityForBnum = (bnum) => {
clientWrapper.getItemsAvailabilityForBnum = async (bnum) => {
// Identify nypl-source and unprefixed id:
const { nyplSource, id } = NyplSourceMapper.instance().splitIdentifier(bnum)
const nyplSourceMapper = await NyplSourceMapper.instance()
const { nyplSource, id } = nyplSourceMapper.splitIdentifier(bnum)
// Determine SCSB "institutionId" (e.g. NYPL, HL, CUL, PUL):
const institutionId = nyplSource.split('-').pop().toUpperCase()
// The "bibliographicId" in SCSB for our items is padded and prefixed;
Expand Down
Loading

0 comments on commit 920798e

Please sign in to comment.