diff --git a/marc_to_solr/lib/genre.rb b/marc_to_solr/lib/genre.rb new file mode 100644 index 00000000..cd866beb --- /dev/null +++ b/marc_to_solr/lib/genre.rb @@ -0,0 +1,97 @@ +# This class is responsible for listing the +# genres present in a given MARC record +class Genre + GENRES = [ + 'Bibliography', + 'Biography', + 'Catalogs', + 'Catalogues raisonnes', + 'Commentaries', + 'Congresses', + 'Diaries', + 'Dictionaries', + 'Drama', + 'Encyclopedias', + 'Exhibitions', + 'Fiction', + 'Guidebooks', + 'In art', + 'Indexes', + 'Librettos', + 'Manuscripts', + 'Newspapers', + 'Periodicals', + 'Pictorial works', + 'Poetry', + 'Portraits', + 'Scores', + 'Songs and music', + 'Sources', + 'Statistics', + 'Texts', + 'Translations' + ].freeze + + GENRE_STARTS_WITH = [ + 'Census', + 'Maps', + 'Methods', + 'Parts', + 'Personal narratives', + 'Scores and parts', + 'Study and teaching', + 'Translations into ' + ].freeze + + SUBJECT_GENRE_VOCABULARIES = ['sk', 'aat', 'lcgft', 'rbbin', 'rbgenr', 'rbmscv', + 'rbpap', 'rbpri', 'rbprov', 'rbpub', 'rbtyp', 'homoit'].freeze + + def initialize(record) + @record = record + end + + # 600/610/650/651 $v, $x filtered + # 655 $a, $v, $x filtered + def to_a + genres = [] + Traject::MarcExtractor.cached('600|*0|x:610|*0|x:611|*0|x:630|*0|x:650|*0|x:651|*0|x:655|*0|x').collect_matching_lines(record) do |field, spec, extractor| + genre = extractor.collect_subfields(field, spec).first + unless genre.nil? + genre = Traject::Macros::Marc21.trim_punctuation(genre) + genres << genre if GENRES.include?(genre) || GENRE_STARTS_WITH.any? { |g| genre[g] } + end + end + Traject::MarcExtractor.cached('650|*7|v:655|*7|a:655|*7|v').collect_matching_lines(record) do |field, spec, extractor| + should_include = false + field.subfields.each do |s_field| + # only include heading if it is part of the vocabulary + should_include = SUBJECT_GENRE_VOCABULARIES.include?(s_field.value) if s_field.code == '2' + end + genre = extractor.collect_subfields(field, spec).first + unless genre.nil? + genre = Traject::Macros::Marc21.trim_punctuation(genre) + if genre.match?(/^\s+$/) + logger.error "#{record['001']} - Blank genre field" + elsif should_include + genres << genre + end + end + end + Traject::MarcExtractor.cached('600|*0|v:610|*0|v:611|*0|v:630|*0|v:650|*0|v:651|*0|v:655|*0|a:655|*0|v').collect_matching_lines(record) do |field, spec, extractor| + genre = extractor.collect_subfields(field, spec).first + unless genre.nil? + genre = Traject::Macros::Marc21.trim_punctuation(genre) + if genre.match?(/^\s+$/) + logger.error "#{record['001']} - Blank genre field" + else + genres << genre + end + end + end + genres.uniq + end + + private + + attr_reader :record +end diff --git a/marc_to_solr/lib/princeton_marc.rb b/marc_to_solr/lib/princeton_marc.rb index 4b0aaace..b8f39d93 100644 --- a/marc_to_solr/lib/princeton_marc.rb +++ b/marc_to_solr/lib/princeton_marc.rb @@ -432,92 +432,6 @@ def remove_parens_035 standard_no standard_no.gsub(/^\(.*?\)/, '') end -GENRES = [ - 'Bibliography', - 'Biography', - 'Catalogs', - 'Catalogues raisonnes', - 'Commentaries', - 'Congresses', - 'Diaries', - 'Dictionaries', - 'Drama', - 'Encyclopedias', - 'Exhibitions', - 'Fiction', - 'Guidebooks', - 'In art', - 'Indexes', - 'Librettos', - 'Manuscripts', - 'Newspapers', - 'Periodicals', - 'Pictorial works', - 'Poetry', - 'Portraits', - 'Scores', - 'Songs and music', - 'Sources', - 'Statistics', - 'Texts', - 'Translations' -] - -GENRE_STARTS_WITH = [ - 'Census', - 'Maps', - 'Methods', - 'Parts', - 'Personal narratives', - 'Scores and parts', - 'Study and teaching', - 'Translations into ' -] - -SUBJECT_GENRE_VOCABULARIES = ['sk', 'aat', 'lcgft', 'rbbin', 'rbgenr', 'rbmscv', - 'rbpap', 'rbpri', 'rbprov', 'rbpub', 'rbtyp', 'homoit'] - -# 600/610/650/651 $v, $x filtered -# 655 $a, $v, $x filtered -def process_genre_facet record - genres = [] - Traject::MarcExtractor.cached('600|*0|x:610|*0|x:611|*0|x:630|*0|x:650|*0|x:651|*0|x:655|*0|x').collect_matching_lines(record) do |field, spec, extractor| - genre = extractor.collect_subfields(field, spec).first - unless genre.nil? - genre = Traject::Macros::Marc21.trim_punctuation(genre) - genres << genre if GENRES.include?(genre) || GENRE_STARTS_WITH.any? { |g| genre[g] } - end - end - Traject::MarcExtractor.cached('650|*7|v:655|*7|a:655|*7|v').collect_matching_lines(record) do |field, spec, extractor| - should_include = false - field.subfields.each do |s_field| - # only include heading if it is part of the vocabulary - should_include = SUBJECT_GENRE_VOCABULARIES.include?(s_field.value) if s_field.code == '2' - end - genre = extractor.collect_subfields(field, spec).first - unless genre.nil? - genre = Traject::Macros::Marc21.trim_punctuation(genre) - if genre.match?(/^\s+$/) - logger.error "#{record['001']} - Blank genre field" - elsif should_include - genres << genre - end - end - end - Traject::MarcExtractor.cached('600|*0|v:610|*0|v:611|*0|v:630|*0|v:650|*0|v:651|*0|v:655|*0|a:655|*0|v').collect_matching_lines(record) do |field, spec, extractor| - genre = extractor.collect_subfields(field, spec).first - unless genre.nil? - genre = Traject::Macros::Marc21.trim_punctuation(genre) - if genre.match?(/^\s+$/) - logger.error "#{record['001']} - Blank genre field" - else - genres << genre - end - end - end - genres.uniq -end - def everything_after_t record, fields values = [] Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, _spec, _extractor| diff --git a/marc_to_solr/lib/traject_config.rb b/marc_to_solr/lib/traject_config.rb index 279d3e79..3a62258d 100644 --- a/marc_to_solr/lib/traject_config.rb +++ b/marc_to_solr/lib/traject_config.rb @@ -7,6 +7,7 @@ require 'bundler/setup' require 'change_the_subject' require_relative './format' +require_relative './genre' require_relative './princeton_marc' require_relative './geo' require_relative './electronic_portfolio_builder' @@ -1023,8 +1024,7 @@ # 600/610/650/651 $v, $x filtered # 655 $a, $v, $x filtered to_field 'genre_facet' do |record, accumulator| - genres = process_genre_facet(record) - accumulator.replace(genres) + accumulator.replace(Genre.new(record).to_a) end # Related name(s): diff --git a/spec/marc_to_solr/lib/genre_spec.rb b/spec/marc_to_solr/lib/genre_spec.rb new file mode 100644 index 00000000..16ca6f74 --- /dev/null +++ b/spec/marc_to_solr/lib/genre_spec.rb @@ -0,0 +1,47 @@ +require 'rails_helper' + +RSpec.describe Genre do + describe '#to_a' do + let(:genres) do + g600 = { "600" => { "ind1" => "", "ind2" => "0", "subfields" => [{ "a" => "Exclude" }, { "v" => "John" }, { "x" => "Join" }] } } + g630 = { "630" => { "ind1" => "", "ind2" => "0", "subfields" => [{ "x" => "Fiction." }] } } + g655 = { "655" => { "ind1" => "", "ind2" => "0", "subfields" => [{ "a" => "Culture." }, { "x" => "Dramatic rendition" }, { "v" => "Awesome" }] } } + g655_2 = { "655" => { "ind1" => "", "ind2" => "7", "subfields" => [{ "a" => "Poetry" }, { "x" => "Translations into French" }, { "v" => "Maps" }] } } + g655_3 = { "655" => { "ind1" => "", "ind2" => "7", "subfields" => [{ "a" => "Manuscript" }, { "x" => "Translations into French" }, { "v" => "Genre" }, { "2" => "rbgenr" }] } } + sample_marc = MARC::Record.new_from_hash('fields' => [g600, g630, g655, g655_2, g655_3]) + described_class.new(sample_marc).to_a + end + + it 'trims punctuation' do + expect(genres).to include("Culture") + end + + it 'excludes $a when not 655' do + expect(genres).not_to include("Exclude") + end + + it 'excludes 2nd indicator of 7 if vocab type is not in approved list' do + expect(genres).not_to include("Maps") + expect(genres).not_to include("Poetry") + end + + it 'includes 2nd indicator of 7 if vocab type is in approved list' do + expect(genres).to include("Manuscript") + expect(genres).to include("Genre") + end + + it 'includes 6xx $v and 655 $a' do + expect(genres).to include("John") + expect(genres).to include("Awesome") + end + + it 'includes 6xx $x from filtered in terms' do + expect(genres).to include("Fiction") + end + + it 'excludes $x terms that do not match filter list' do + expect(genres).not_to include("Join") + expect(genres).not_to include("Dramatic renditon") + end + end +end diff --git a/spec/marc_to_solr/lib/princeton_marc_spec.rb b/spec/marc_to_solr/lib/princeton_marc_spec.rb index 07d269f2..deb86d9f 100644 --- a/spec/marc_to_solr/lib/princeton_marc_spec.rb +++ b/spec/marc_to_solr/lib/princeton_marc_spec.rb @@ -504,50 +504,6 @@ def fixture_record(fixture_name) end end - describe 'process_genre_facet function' do - before(:all) do - @g600 = { "600" => { "ind1" => "", "ind2" => "0", "subfields" => [{ "a" => "Exclude" }, { "v" => "John" }, { "x" => "Join" }] } } - @g630 = { "630" => { "ind1" => "", "ind2" => "0", "subfields" => [{ "x" => "Fiction." }] } } - @g655 = { "655" => { "ind1" => "", "ind2" => "0", "subfields" => [{ "a" => "Culture." }, { "x" => "Dramatic rendition" }, { "v" => "Awesome" }] } } - @g655_2 = { "655" => { "ind1" => "", "ind2" => "7", "subfields" => [{ "a" => "Poetry" }, { "x" => "Translations into French" }, { "v" => "Maps" }] } } - @g655_3 = { "655" => { "ind1" => "", "ind2" => "7", "subfields" => [{ "a" => "Manuscript" }, { "x" => "Translations into French" }, { "v" => "Genre" }, { "2" => "rbgenr" }] } } - @sample_marc = MARC::Record.new_from_hash('fields' => [@g600, @g630, @g655, @g655_2, @g655_3]) - @genres = process_genre_facet(@sample_marc) - end - - it 'trims punctuation' do - expect(@genres).to include("Culture") - end - - it 'excludes $a when not 655' do - expect(@genres).not_to include("Exclude") - end - - it 'excludes 2nd indicator of 7 if vocab type is not in approved list' do - expect(@genres).not_to include("Maps") - expect(@genres).not_to include("Poetry") - end - - it 'includes 2nd indicator of 7 if vocab type is in approved list' do - expect(@genres).to include("Manuscript") - expect(@genres).to include("Genre") - end - - it 'includes 6xx $v and 655 $a' do - expect(@genres).to include("John") - expect(@genres).to include("Awesome") - end - - it 'includes 6xx $x from filtered in terms' do - expect(@genres).to include("Fiction") - end - - it 'excludes $x terms that do not match filter list' do - expect(@genres).not_to include("Join") - expect(@genres).not_to include("Dramatic renditon") - end - end - describe 'process_hierarchy function' do before(:all) do @s610_ind2_5 = { "600" => { "ind1" => "", "ind2" => "5", "subfields" => [{ "a" => "Exclude" }] } }