diff --git a/lib/dor/text_extraction/speech_to_text.rb b/lib/dor/text_extraction/speech_to_text.rb index 5b693c7d..b646e7f0 100644 --- a/lib/dor/text_extraction/speech_to_text.rb +++ b/lib/dor/text_extraction/speech_to_text.rb @@ -71,6 +71,12 @@ def output_location "#{job_id}/output" end + # given a filename, look in the list of files that can be sent for speech to text, examine the cocina structural + # and return the languageTag for the file (or nil if no language is set) + def language_tag(filename) + stt_files.find { |file| file.filename == filename }&.languageTag + end + private # iterate through cocina structural contains and return all File objects for files that need to be stt'd diff --git a/lib/robots/dor_repo/speech_to_text/stt_create.rb b/lib/robots/dor_repo/speech_to_text/stt_create.rb index ac69c5dd..10ad007f 100644 --- a/lib/robots/dor_repo/speech_to_text/stt_create.rb +++ b/lib/robots/dor_repo/speech_to_text/stt_create.rb @@ -43,18 +43,23 @@ def message_body end def job_id - @job_id ||= Dor::TextExtraction::SpeechToText.new(cocina_object:).job_id + stt.job_id end # array of media files in the bucket folder for this job (excluding s3 folders) def media - aws_provider.client.list_objects(bucket: aws_provider.bucket_name, prefix: job_id).contents.map(&:key).reject { |key| key.end_with?('/') } + filenames = aws_provider.client.list_objects(bucket: aws_provider.bucket_name, prefix: job_id).contents.map(&:key).reject { |key| key.end_with?('/') } + filenames.map { |filename| { name: filename, options: { language: stt.language_tag(File.basename(filename)) } } } end # pulled from config, could later be overriden by settings in the workflow context def whisper_options Settings.speech_to_text.whisper.to_h end + + def stt + @stt ||= Dor::TextExtraction::SpeechToText.new(cocina_object:) + end end end end diff --git a/spec/lib/dor/text_extraction/speech_to_text_spec.rb b/spec/lib/dor/text_extraction/speech_to_text_spec.rb index 1ae8a4fd..66ea8c9b 100644 --- a/spec/lib/dor/text_extraction/speech_to_text_spec.rb +++ b/spec/lib/dor/text_extraction/speech_to_text_spec.rb @@ -19,7 +19,7 @@ let(:second_fileset_structural) { instance_double(Cocina::Models::FileSetStructural, contains: [mp4_file, mp4_file_not_shelved, mp4_file_not_preserved]) } let(:third_fileset_structural) { instance_double(Cocina::Models::FileSetStructural, contains: [text_file2]) } let(:m4a_file) { build_file('file1.m4a') } - let(:mp4_file) { build_file('file1.mp4') } + let(:mp4_file) { build_file('file1.mp4', language_tag: 'es') } let(:mp4_file_not_shelved) { build_file('file2.mp4', shelve: false) } let(:mp4_file_not_preserved) { build_file('file3.mp4', preserve: false) } let(:text_file) { build_file('file1.txt') } @@ -45,7 +45,7 @@ context 'when the object has no files that can be STTed' do let(:first_fileset_structural) { instance_double(Cocina::Models::FileSetStructural, contains: [text_file]) } - let(:second_fileset_structural) { instance_double(Cocina::Models::FileSetStructural, contains: [text_file, text_file]) } + let(:second_fileset_structural) { instance_double(Cocina::Models::FileSetStructural, contains: [text_file, text_file2]) } it 'returns false' do expect(stt.possible?).to be false @@ -88,6 +88,35 @@ end end + describe '#language_tag' do + context 'when the file cannot be found' do + let(:filename) { 'bogus.mp4' } + + it 'returns nil' do + expect(stt.filenames_to_stt).not_to include(filename) + expect(stt.language_tag(filename)).to be_nil + end + end + + context 'when the file is found and there is no language tag in cocina' do + let(:filename) { 'file1.m4a' } + + it 'returns nil' do + expect(stt.filenames_to_stt).to include(filename) + expect(stt.language_tag(filename)).to be_nil + end + end + + context 'when the file is found and there is a language tag in cocina' do + let(:filename) { 'file1.mp4' } + + it 'returns the language tag' do + expect(stt.filenames_to_stt).to include(filename) + expect(stt.language_tag(filename)).to eq 'es' + end + end + end + describe '#cleanup' do let(:client) { instance_double(Aws::S3::Client, list_objects:) } let(:list_objects) { instance_double(Aws::S3::Types::ListObjectsOutput, contents: [m4a_object, mp4_object]) } diff --git a/spec/robots/dor_repo/speech_to_text/stt_create_spec.rb b/spec/robots/dor_repo/speech_to_text/stt_create_spec.rb index c86ebbad..d3e04db2 100644 --- a/spec/robots/dor_repo/speech_to_text/stt_create_spec.rb +++ b/spec/robots/dor_repo/speech_to_text/stt_create_spec.rb @@ -10,7 +10,8 @@ let(:robot) { described_class.new } let(:aws_client) { instance_double(Aws::SQS::Client) } let(:aws_s3_client) { instance_double(Aws::S3::Client) } - let(:stt) { instance_double(Dor::TextExtraction::SpeechToText, job_id:, filenames_to_stt: ['file1.mov', 'file2.mp3']) } + let(:filenames_to_stt) { ['file1.mov', 'file2.mp3'] } + let(:stt) { instance_double(Dor::TextExtraction::SpeechToText, job_id:, filenames_to_stt:) } let(:cocina_model) { build(:dro, id: druid).new(structural: {}, type: object_type, access: { view: 'world' }) } let(:object_type) { Cocina::Models::ObjectType.media } let(:dsa_object_client) do @@ -23,10 +24,10 @@ instance_double(Dor::Workflow::Response::Process, lane_id: 'lane1', context: { 'runSpeechToText' => true }) end let(:job_id) { "#{bare_druid}-v1" } - let(:media) { ["#{job_id}/file1.mov", "#{job_id}/file2.mp3"] } + let(:media) { [{ name: "#{job_id}/#{filenames_to_stt[0]}", options: { language: 'en' } }, { name: "#{job_id}/#{filenames_to_stt[1]}", options: { language: 'es' } }] } let(:list_objects) { instance_double(Aws::S3::Types::ListObjectsOutput, contents: [mov_object, mp3_object]) } - let(:mov_object) { instance_double(Aws::S3::Types::Object, key: media[0]) } - let(:mp3_object) { instance_double(Aws::S3::Types::Object, key: media[1]) } + let(:mov_object) { instance_double(Aws::S3::Types::Object, key: media[0][:name]) } + let(:mp3_object) { instance_double(Aws::S3::Types::Object, key: media[1][:name]) } before do allow(Aws::S3::Client).to receive(:new).and_return(aws_s3_client) @@ -35,6 +36,8 @@ allow(Dor::TextExtraction::SpeechToText).to receive(:new).and_return(stt) allow(LyberCore::WorkflowClientFactory).to receive(:build).and_return(workflow_client) allow(aws_s3_client).to receive(:list_objects).and_return(list_objects) + allow(stt).to receive(:language_tag).with(filenames_to_stt[0]).and_return('en') + allow(stt).to receive(:language_tag).with(filenames_to_stt[1]).and_return('es') end context 'when the message is sent successfully' do diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 781b0a84..3ab7ce98 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -40,11 +40,13 @@ def clone_test_input(destination) system "rsync -rqOlt --delete #{source}/ #{destination}/" end -def build_file(filename, preserve: true, shelve: true, corrected: false, sdr_generated: false) +# rubocop:disable Metrics/ParameterLists +def build_file(filename, preserve: true, shelve: true, corrected: false, sdr_generated: false, language_tag: nil) extension = File.extname(filename) mimetype = { '.pdf' => 'application/pdf', '.tif' => 'image/tiff', '.jpg' => 'image/jpeg', '.txt' => 'text/plain', '.m4a' => 'audio/mp4', '.mp4' => 'video/mp4', '.vtt' => 'text/vtt', '.xml' => 'application/xml' } sdr_value = instance_double(Cocina::Models::FileAdministrative, sdrPreserve: preserve, shelve:) - instance_double(Cocina::Models::File, administrative: sdr_value, hasMimeType: mimetype[extension], + instance_double(Cocina::Models::File, administrative: sdr_value, hasMimeType: mimetype[extension], languageTag: language_tag, filename:, correctedForAccessibility: corrected, sdrGeneratedText: sdr_generated) end +# rubocop:enable Metrics/ParameterLists