Skip to content

Commit

Permalink
Merge pull request #475 from psu-libraries/176-punctuation-fix
Browse files Browse the repository at this point in the history
right to left language formatting
  • Loading branch information
Smullz622 authored May 22, 2023
2 parents 363cc7d + e3ebdb3 commit 15ec846
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 3 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ config/environments/*.local.yml
.bash_history
.cache
.local
.envrc
data/
vendor/**/*
1 change: 1 addition & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ RSpec/NestedGroups:
- 'spec/lib/psulib_traject/processors/call_number/lc_spec.rb'
- 'spec/lib/psulib_traject/processors/call_number/dewey_spec.rb'
- 'spec/lib/psulib_traject/processors/title_display_spec.rb'
- 'spec/lib/psulib_traject/processors/pub_display_spec.rb'

RSpec/ExpectActual:
Exclude:
Expand Down
1 change: 1 addition & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ PLATFORMS
universal-java-14
universal-java-15
universal-java-17
universal-java-19
x86_64-darwin-20
x86_64-linux

Expand Down
12 changes: 9 additions & 3 deletions config/traject.rb
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,19 @@
to_field 'pub_date_itsi', process_publication_date

## Publication fields for display
to_field 'publication_display_ssm', extract_marc('260abcefg3:264|*1|abc3') # display in search results
to_field 'overall_imprint_display_ssm', extract_marc('260abcefg3:264|*0|abc3:264|*1|abc3:264|*2|abc3:264|*3|abc3') # display on single item page
to_field 'copyright_display_ssm', extract_marc('264|*4|c')
to_field 'edition_display_ssm', extract_marc('250ab3')
to_field 'cartographic_mathematical_data_ssm', extract_marc('255abcdefg')
to_field 'other_edition_ssm', extract_marc('775|0*|iabcdefghkmnor')
to_field 'collection_facet', extract_marc('793a')
to_field 'publication_display_ssm', extract_marc('260abcefg3:264|*1|abc3') # display in search results
to_field 'overall_imprint_display_ssm', extract_marc('260abcefg3:264|*0|abc3:264|*1|abc3:264|*2|abc3:264|*3|abc3') # display on single item page
to_field 'edition_display_ssm', extract_marc('250ab3')
# processes display fields to help format vernacular display
each_record do |_record, context|
PsulibTraject::Processors::PubDisplay.new('publication', context).call
PsulibTraject::Processors::PubDisplay.new('overall_imprint', context).call
PsulibTraject::Processors::PubDisplay.new('edition', context).call
end

## Publication fields for Illiad and Aeon
to_field 'pub_date_illiad_ssm', extract_marc('260c:264|*1|c'), trim_punctuation
Expand Down
1 change: 1 addition & 0 deletions lib/psulib_traject.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ module PsulibTraject
require 'psulib_traject/processors/record_type'
require 'psulib_traject/processors/summary_holdings'
require 'psulib_traject/processors/title_display'
require 'psulib_traject/processors/pub_display'
require 'psulib_traject/processors/oclc_extract'
require 'psulib_traject/shelf_key'
require 'psulib_traject/solr_manager'
Expand Down
35 changes: 35 additions & 0 deletions lib/psulib_traject/processors/pub_display.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# frozen_string_literal: true

module PsulibTraject::Processors
class PubDisplay
def initialize(field, context)
@context = context
@field = field
end

def call
return if final.nil?

final[1] = vern_clean unless vern.nil?
end

private

attr_accessor :context, :field

def vern
final.length <= 1 ? nil : final[1]
end

def final
context.output_hash["#{field}_display_ssm"]
end

def vern_clean
return vern unless /[\u0621-\u064A]+\.$/.match?(vern) # regex to check for arabic

vern_value = vern.gsub('.', '')
".#{vern_value}"
end
end
end
72 changes: 72 additions & 0 deletions spec/integration/publication_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# frozen_string_literal: true

RSpec::Matchers.define_negated_matcher :not_include, :include

RSpec.describe 'Publication' do
let(:leader) { '1234567890' }

describe 'Record with a right to left vernacular edition' do
let(:field) { 'edition_display_ssm' }
let(:edition_250) do
{ '250' => { 'subfields' => [{ '6' => '880-03' },
{ 'a' => 'Chāp-i avval.' }] } }
end
let(:edition_vern_250) do
{ '880' => { 'subfields' => [{ '6' => '250-03' },
{ 'a' => 'چاپ اول.' }] } }
end
let(:result) { indexer.map_record(MARC::Record.new_from_hash('fields' => [edition_250, edition_vern_250], 'leader' => leader)) }

it 'has the vernacular edition with a period on the left in the edition statement' do
expect(result[field]).to eq ['Chāp-i avval.', '.چاپ اول']
expect(result[field].length).to eq 2
end

it 'has empty vern and latin edition field' do
expect(result).not_to include('edition_vern')
expect(result).not_to include('edition_latin')
end
end

describe 'Record with a left to right vernacular edition' do
let(:field) { 'edition_display_ssm' }
let(:edition_250) do
{ '250' => { 'subfields' => [{ '6' => '880-03' },
{ 'a' => 'Tōkyō : Hayakawa Shobō, 1999.' }] } }
end
let(:edition_vern_250) do
{ '880' => { 'subfields' => [{ '6' => '250-03' },
{ 'a' => '東京 : 早川書房, 1999.' }] } }
end
let(:result) { indexer.map_record(MARC::Record.new_from_hash('fields' => [edition_250, edition_vern_250], 'leader' => leader)) }

it 'has the vernacular edition with a period on the right in the edition statement' do
expect(result[field]).to eq ['Tōkyō : Hayakawa Shobō, 1999.', '東京 : 早川書房, 1999.']
expect(result[field].length).to eq 2
end

it 'has empty vern and latin edition field' do
expect(result).not_to include('edition_vern')
expect(result).not_to include('edition_latin')
end
end

describe 'Record with no vernacular edition' do
let(:field) { 'edition_display_ssm' }
let(:edition_250) do
{ '250' => { 'subfields' => [{ '6' => '880-03' },
{ 'a' => 'First Edition.' }] } }
end
let(:result) { indexer.map_record(MARC::Record.new_from_hash('fields' => [edition_250], 'leader' => leader)) }

it 'has the vernacular edition with a period on the right in the edition statement' do
expect(result[field]).to eq ['First Edition.']
expect(result[field].length).to eq 1
end

it 'has empty vern and latin edition field' do
expect(result).not_to include('edition_vern')
expect(result).not_to include('edition_latin')
end
end
end
59 changes: 59 additions & 0 deletions spec/lib/psulib_traject/processors/pub_display_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# frozen_string_literal: true

RSpec.describe PsulibTraject::Processors::PubDisplay do
let(:context) { instance_double 'Context' }

before do
allow(context).to receive(:output_hash).and_return(output_hash)
end

describe '::call' do
context 'when there is not a vernacular version of the field' do
let(:output_hash) do
{ 'edition_display_ssm' => ['thing'] }
end

it 'sets the field_display_ssm to the field_latin and deletes field_latin' do
described_class.new('edition', context).call
expect(output_hash['edition_display_ssm']).to eq ['thing']
end
end

context 'when there is a vernacular version of the field' do
context 'when field_vern contains right to left arabic equals sign' do
context 'when there is a period on the right end of field_vern' do
let(:output_hash) do
{ 'edition_display_ssm' => ['Chāp-i avval.', 'چاپ اول.'] }
end

it 'moves the period of the vern to the left, adds field_latin & field_vern to field_display_ssm, and deletes field_latin and field_vern' do
described_class.new('edition', context).call
expect(output_hash['edition_display_ssm']).to eq ['Chāp-i avval.', '.چاپ اول']
end
end

context 'when there is not a period on the right end of field_vern' do
let(:output_hash) do
{ 'edition_display_ssm' => ['Chāp-i avval.', 'چاپ اول'] }
end

it 'does not change the field_vern, adds field_latin & field_vern to field_display_ssm, and deletes field_latin and field_vern' do
described_class.new('edition', context).call
expect(output_hash['edition_display_ssm']).to eq ['Chāp-i avval.', 'چاپ اول']
end
end
end

context 'when field_vern does not contain right to left arabic equals sign' do
let(:output_hash) do
{ 'edition_display_ssm' => ['Tōkyō : Hayakawa Shobō, 1999.', '東京 : 早川書房, 1999.'] }
end

it 'adds field_latin & field_vern to field_display_ssm and deletes field_latin and field_vern' do
described_class.new('edition', context).call
expect(output_hash['edition_display_ssm']).to eq ['Tōkyō : Hayakawa Shobō, 1999.', '東京 : 早川書房, 1999.']
end
end
end
end
end

0 comments on commit 15ec846

Please sign in to comment.