Skip to content

Commit

Permalink
Adds publish robot.
Browse files Browse the repository at this point in the history
refs #5197
  • Loading branch information
justinlittman committed Oct 30, 2024
1 parent 06fca57 commit 344a604
Show file tree
Hide file tree
Showing 17 changed files with 174 additions and 107 deletions.
2 changes: 2 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ workflows:
name: validate
- ruby-rails/lint:
name: lint
context: dlss
- ruby-rails/test-rails:
name: test
api-only: true
context: dlss
- ruby-rails/docker-publish:
context: dlss
name: publish-latest
Expand Down
6 changes: 6 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ gem 'dor-workflow-client', '~> 7.3'
gem 'druid-tools', '~> 2.2'
gem 'folio_client', '~> 0.8'
gem 'graphql'
# gem 'lyber-core' # For robots
gem 'lyber-core', github: 'sul-dlss/lyber-core', branch: 'retries2'
gem 'mais_orcid_client'
gem 'marc'
gem 'marc-vocab', '~> 0.3.0' # for indexing
Expand All @@ -21,6 +23,10 @@ gem 'purl_fetcher-client', '~> 2.1'
gem 'stanford-mods' # for indexing
gem 'sul_orcid_client', '~> 0.3'

source 'https://gems.contribsys.com/' do
gem 'sidekiq-pro'
end

# Ruby general dependencies
gem 'bootsnap', '>= 1.4.2', require: false
gem 'bunny', '~> 2.17' # Send messages to RabbitMQ
Expand Down
30 changes: 30 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
GIT
remote: https://github.com/sul-dlss/lyber-core.git
revision: 52578ca63fe139636efa7751d57561e3ab4c10ff
branch: retries2
specs:
lyber-core (7.6.0)
activesupport
config
dor-services-client (~> 15.0)
dor-workflow-client (>= 7.4)
druid-tools
honeybadger
sidekiq (~> 7.0)
zeitwerk

GEM
remote: https://gems.contribsys.com/
specs:
sidekiq-pro (7.3.2)
sidekiq (>= 7.3.0, < 8)

GEM
remote: https://rubygems.org/
specs:
Expand Down Expand Up @@ -173,6 +194,13 @@ GEM
ed25519
docile (1.4.1)
domain_name (0.6.20240107)
dor-services-client (15.2.0)
activesupport (>= 4.2, < 8)
cocina-models (~> 0.99.0)
deprecation
faraday (~> 2.0)
faraday-retry
zeitwerk (~> 2.1)
dor-workflow-client (7.5.0)
activesupport (>= 3.2.1, < 8)
deprecation (>= 0.99.0)
Expand Down Expand Up @@ -610,6 +638,7 @@ DEPENDENCIES
jsonpath (~> 1.1)
jwt
lograge
lyber-core!
mais_orcid_client
marc
marc-vocab (~> 0.3.0)
Expand All @@ -636,6 +665,7 @@ DEPENDENCIES
rubocop-rspec_rails
ruby-cache (~> 0.3.0)
sidekiq (~> 7.0)
sidekiq-pro!
simplecov
sneakers (~> 2.11)
stanford-mods
Expand Down
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,21 @@ $ sudo systemctl restart rolling-index

**NOTE 3**: The rolling indexer logs to `{capistrano_shared_dir}/log/rolling_indexer.log`

## Robots

DSA hosts robots that perform DSA actions. This replaces the previous pattern in which a common accessioning robot which would invoke a DSA endpoint that would start a DSA job that would perform the action and then update the workflow status.

Robots are in `jobs/robots/*`. All DSA robots must be added to Workflow Server Rails' `QueueService` so that the workflow jobs are handled by DSA robots (instead of normal robots).

There also must be a sidekiq process to handle the DSA robot queues. For example:
```
:labels:
- robot
:concurrency: 5
:queues:
- [accessionWF_default_dsa, 2]
- accessionWF_low_dsa
```

## Other tools

Expand Down
2 changes: 1 addition & 1 deletion app/controllers/objects_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def accession
def publish
result = BackgroundJobResult.create
EventFactory.create(druid: params[:id], event_type: 'publish_request_received', data: { background_job_result_id: result.id })
PublishJob.set(queue: publish_queue).perform_later(druid: params[:id], background_job_result: result, workflow: params[:workflow])
PublishJob.set(queue: publish_queue).perform_later(druid: params[:id], background_job_result: result)
head :created, location: result
end

Expand Down
24 changes: 6 additions & 18 deletions app/jobs/publish_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,33 +8,21 @@ class PublishJob < ApplicationJob
# @param [String] druid the identifier of the item to be published
# @param [Integer,nil] user_version the version of the item to be published. If nil, the latest version will be published.
# @param [BackgroundJobResult] background_job_result identifier of a background job result to store status info
# @param [String,nil] workflow workflow to report to. If nil, no workflow will be reported to.
# @param [Boolean] log_success whether success should be logged
def perform(druid:, background_job_result:, workflow: nil, user_version: nil, log_success: true)
def perform(druid:, background_job_result:, user_version: nil)
background_job_result.processing!
cocina_object = CocinaObjectStore.find(druid)

# Note that LogFailureJob / LogSuccessJob will update the BackgroundJobResult.
# If workflow is nil, no workflow will be reported to.
if cocina_object.admin_policy?
return LogFailureJob.perform_later(druid:,
background_job_result:,
workflow:,
workflow_process: workflow_process_for(workflow),
output: { errors: [{ title: 'Publishing error', detail: 'Cannot publish an admin policy' }] })
background_job_result.output = { errors: [{ title: 'Publishing error', detail: 'Cannot publish an admin policy' }] }
background_job_result.complete!
return
end

Publish::MetadataTransferService.publish(druid:, user_version:, workflow:)
Publish::MetadataTransferService.publish(druid:, user_version:)
EventFactory.create(druid:, event_type: 'publishing_complete', data: { background_job_result_id: background_job_result.id })
return unless log_success

LogSuccessJob.perform_later(druid:,
background_job_result:,
workflow:,
workflow_process: workflow_process_for(workflow))
end

def workflow_process_for(workflow)
workflow == 'releaseWF' ? 'release-publish' : 'publish'
background_job_result.complete!
end
end
23 changes: 23 additions & 0 deletions app/jobs/robots/dor_repo/accession/publish.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# frozen_string_literal: true

module Robots
module DorRepo
module Accession
# Publishing metadata and shelving files for object.
class Publish < Robots::Robot
sidekiq_options retry: true

def initialize
super('accessionWF', 'publish', retriable_exceptions: [PurlFetcher::Client::Error])
end

def perform_work
return LyberCore::ReturnState.new(status: :skipped, note: 'Admin policy objects are not published') if cocina_object.admin_policy?

::Publish::MetadataTransferService.publish(druid:)
EventFactory.create(druid:, event_type: 'publishing_complete', data: {})
end
end
end
end
end
14 changes: 14 additions & 0 deletions app/jobs/robots/robot.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true

module Robots
# Base class for DSA robots.
class Robot < LyberCore::Robot
def cocina_object
@cocina_object ||= CocinaObjectStore.find(druid)
end

def object_client
raise 'Object Client should not be used from a DSA robot'
end
end
end
12 changes: 5 additions & 7 deletions app/services/publish/metadata_transfer_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@ module Publish
class MetadataTransferService
# @param [String] druid for the object to be published
# @param [Integer] user_version if a specific version is to be published
# @param [String] workflow (optional) the workflow used for reporting back status to (defaults to 'accessionWF')
def self.publish(druid:, user_version: nil, workflow: 'accessionWF')
new(druid:, workflow:, user_version:).publish
def self.publish(druid:, user_version: nil)
new(druid:, user_version:).publish
end

def initialize(druid:, workflow:, user_version:)
@workflow = workflow
def initialize(druid:, user_version:)
@public_cocina = PublicCocina.new(druid:, user_version:)
end

Expand All @@ -39,13 +37,13 @@ def republish_collection_members!
Array.wrap(
MemberService.for(druid, publishable: true)
).each do |member_druid|
PublishJob.set(queue: :publish_low).perform_later(druid: member_druid, background_job_result: BackgroundJobResult.create, workflow:, log_success: false)
PublishJob.set(queue: :publish_low).perform_later(druid: member_druid, background_job_result: BackgroundJobResult.create)
end
end

def republish_virtual_object_constituents!
VirtualObjectService.constituents(cocina_object, publishable: true).each do |constituent_druid|
PublishJob.set(queue: :publish_low).perform_later(druid: constituent_druid, background_job_result: BackgroundJobResult.create, workflow:, log_success: false)
PublishJob.set(queue: :publish_low).perform_later(druid: constituent_druid, background_job_result: BackgroundJobResult.create)
end
end

Expand Down
11 changes: 10 additions & 1 deletion config/initializers/sidekiq.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
# frozen_string_literal: true

Sidekiq.configure_server do |config|
config.redis = { url: Settings.redis_url }
# Add the following to a sidekiq.yml to have it handle robot jobs.
# :labels:
# - robot
if config[:labels].include?('robot')
config.redis = { url: Settings.robots_redis_url }
# For Sidekiq Pro
config.super_fetch!
else
config.redis = { url: Settings.redis_url }
end
end

Sidekiq.configure_client do |config|
Expand Down
6 changes: 3 additions & 3 deletions config/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ suri:

# Workflow
workflow:
url: 'https://workflow.example.com/workflow'
logfile: 'log/workflow_service.log'
shift_age: 'weekly'
timeout: 60
Expand All @@ -44,10 +45,9 @@ solr:
dor_indexing:
url: 'https://dor-indexing-app.example.edu/dor'
redis_url: 'redis://localhost:6379/'
# This is the redis used by all robots, including the robots running inside DSA.
robots_redis_url: 'redis://localhost:6379/1'

workflow:
url: 'https://workflow.example.com/workflow'
timeout: 60
sdr:
local_workspace_root: /dor/workspace
local_export_home: /dor/export
Expand Down
9 changes: 0 additions & 9 deletions openapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,6 @@ paths:
required: true
schema:
$ref: "#/components/schemas/Druid"
- name: workflow
in: query
description: which workflow should this be reported to
schema:
type: string
enum:
- accessionWF
- releaseWF
example: releaseWF
- name: lane-id
in: query
description: Lane for prioritizing the work
Expand Down
49 changes: 12 additions & 37 deletions spec/jobs/publish_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,25 @@

RSpec.describe PublishJob do
subject(:perform) do
described_class.perform_now(druid:, background_job_result: result, workflow:)
described_class.perform_now(druid:, background_job_result: result)
end

let(:druid) { 'druid:mk420bs7601' }
let(:result) { create(:background_job_result) }
let(:item) { instance_double(Cocina::Models::DRO, admin_policy?: false) }
let(:workflow) { 'accessionWF' }
let(:valid) { true }
let(:invalid_filenames) { [] }

before do
allow(CocinaObjectStore).to receive(:find).with(druid).and_return(item)
allow(result).to receive(:processing!)
allow(result).to receive(:complete!)
allow(EventFactory).to receive(:create)
allow(Publish::MetadataTransferService).to receive(:publish)
end

context 'with no errors' do
before do
allow(Publish::MetadataTransferService).to receive(:publish)
allow(LogSuccessJob).to receive(:perform_later)
perform
end

Expand All @@ -32,55 +31,31 @@
end

it 'invokes the Publish::MetadataTransferService' do
expect(Publish::MetadataTransferService).to have_received(:publish).with(druid:, workflow:, user_version: nil).once
expect(Publish::MetadataTransferService).to have_received(:publish).with(druid:, user_version: nil).once
end

it 'marks the job as complete' do
expect(EventFactory).to have_received(:create)

expect(LogSuccessJob).to have_received(:perform_later)
.with(druid:, background_job_result: result, workflow: 'accessionWF', workflow_process: 'publish')
end

context 'when log_success is set to false' do
subject(:perform) do
described_class.perform_now(druid:, user_version: 4, background_job_result: result, workflow:, log_success: false)
end

it 'does not mark the job as complete' do
expect(Publish::MetadataTransferService).to have_received(:publish).with(druid:, workflow:, user_version: 4).once
expect(EventFactory).to have_received(:create)

expect(LogSuccessJob).not_to have_received(:perform_later)
.with(druid:, background_job_result: result, workflow: 'accessionWF', workflow_process: 'publish')
end
end

context 'when log_success is set to true' do
subject(:perform) do
described_class.perform_now(druid:, background_job_result: result, workflow:, log_success: true)
end

it 'mark the job as complete' do
expect(EventFactory).to have_received(:create)

expect(LogSuccessJob).to have_received(:perform_later)
.with(druid:, background_job_result: result, workflow: 'accessionWF', workflow_process: 'publish')
end
expect(result).to have_received(:complete!).once
end
end

context 'when an AdminPolicy' do
let(:item) { instance_double(Cocina::Models::AdminPolicy, admin_policy?: true) }

before do
allow(LogFailureJob).to receive(:perform_later)
allow(result).to receive(:output=)
perform
end

it 'marks the job as a failure' do
expect(LogFailureJob).to have_received(:perform_later)
.with(druid:, background_job_result: result, workflow: 'accessionWF', workflow_process: 'publish', output: Hash)
expect(result).to have_received(:output=).with({ errors: [{ title: 'Publishing error', detail: 'Cannot publish an admin policy' }] })
expect(result).to have_received(:complete!).once
end

it 'does not transfer' do
expect(Publish::MetadataTransferService).not_to have_received(:publish)
end
end
end
Loading

0 comments on commit 344a604

Please sign in to comment.