Merge pull request #2513 from pulibrary/collect_matching_lines

Refactor use of #collect_matching_lines
pulibrary · Oct 8, 2024 · d37d361 · d37d361
2 parents 23ecd39 + da7f0df
commit d37d361
Show file tree

Hide file tree

Showing 2 changed files with 16 additions and 25 deletions.
diff --git a/marc_to_solr/lib/princeton_marc.rb b/marc_to_solr/lib/princeton_marc.rb
@@ -158,7 +158,6 @@ def other_versions record
 
 # only includes values before $t
 def process_names record
-  names = []
   Traject::MarcExtractor.cached('100aqbcdk:110abcdfgkln:111abcdfgklnpq:700aqbcdk:710abcdfgkln:711abcdfgklnpq').collect_matching_lines(record) do |field, spec, extractor|
     name = extractor.collect_subfields(field, spec).first
     unless name.nil?
@@ -169,10 +168,9 @@ def process_names record
         after_t = true if s_field.code == 't'
       end
       name = name.chomp(remove)
-      names << Traject::Macros::Marc21.trim_punctuation(name)
+      Traject::Macros::Marc21.trim_punctuation(name)
     end
-  end
-  names.uniq
+  end.compact.uniq
 end
 
 # only includes values before $t
@@ -249,7 +247,6 @@ def process_author_roles record
 # @param [MARC::Record]
 # @return [Array] pub info strings from fields 260 and 264.
 def set_pub_citation(record)
-  pub_citation = []
   Traject::MarcExtractor.cached('260:264').collect_matching_lines(record) do |field, _spec, _extractor|
     a_pub_info = nil
     b_pub_info = nil
@@ -263,9 +260,8 @@ def set_pub_citation(record)
     pub_info += a_pub_info unless a_pub_info.nil?
     pub_info += ": " if !a_pub_info.nil? && !b_pub_info.nil?
     pub_info += b_pub_info unless b_pub_info.nil?
-    pub_citation << pub_info if !pub_info.empty?
-  end
-  pub_citation
+    pub_info if !pub_info.empty?
+  end.compact
 end
 
 SEPARATOR = '—'
@@ -276,36 +272,33 @@ def set_pub_citation(record)
 # For example, if you only want subject headings from the Bilindex vocabulary,
 # you could use `process_hierarchy(record, '650|*7|abcvxyz') { |field| field['2'] == 'bidex' }`
 def process_hierarchy(record, fields)
-  headings = []
   split_on_subfield = ['t', 'v', 'x', 'y', 'z']
   Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, extractor|
     include_heading = block_given? ? yield(field) : true
     next unless include_heading && extractor.collect_subfields(field, spec).first
-    headings << HierarchicalHeading.new(field:, spec:, split_on_subfield:).to_s
-  end
-  headings.compact
+    HierarchicalHeading.new(field:, spec:, split_on_subfield:).to_s
+  end.compact
 end
 
 # for the split subject facet
 # split with em dash along x,z
 def process_subject_topic_facet record
-  subjects = []
-  Traject::MarcExtractor.cached('600|*0|abcdfklmnopqrtxz:610|*0|abfklmnoprstxz:611|*0|abcdefgklnpqstxz:630|*0|adfgklmnoprstxz:650|*0|abcxz:651|*0|axz').collect_matching_lines(record) do |field, spec, extractor|
+  lcsh_subjects = Traject::MarcExtractor.cached('600|*0|abcdfklmnopqrtxz:610|*0|abfklmnoprstxz:611|*0|abcdefgklnpqstxz:630|*0|adfgklmnoprstxz:650|*0|abcxz:651|*0|axz').collect_matching_lines(record) do |field, spec, extractor|
     subject = extractor.collect_subfields(field, spec).first
     unless subject.nil?
       hierarchical_string = HierarchicalHeading.new(field:, spec:, split_on_subfield: %w[x z]).to_s
-      subjects << hierarchical_string.split(SEPARATOR)
+      hierarchical_string.split(SEPARATOR)
     end
-  end
-  Traject::MarcExtractor.cached('650|*7|abcxz').collect_matching_lines(record) do |field, spec, extractor|
+  end.compact
+  other_thesaurus_subjects = Traject::MarcExtractor.cached('650|*7|abcxz').collect_matching_lines(record) do |field, spec, extractor|
     subject = extractor.collect_subfields(field, spec).first
     should_include = siku_heading?(field) || local_heading?(field) || any_thesaurus_match?(field, %w[homoit])
     if should_include && !subject.nil?
       hierarchical_string = HierarchicalHeading.new(field:, spec:, split_on_subfield: %w[x z]).to_s
-      subjects << hierarchical_string.split(SEPARATOR)
+      hierarchical_string.split(SEPARATOR)
     end
-  end
-  subjects.flatten
+  end.flatten.compact
+  lcsh_subjects + other_thesaurus_subjects
 end
 
 def strip_non_numeric num_str

diff --git a/marc_to_solr/lib/traject_config.rb b/marc_to_solr/lib/traject_config.rb
@@ -1279,8 +1279,7 @@
 # Process location code once
 # 852|b and 852|c
 each_record do |record, context|
-  location_codes = []
-  MarcExtractor.cached("852").collect_matching_lines(record) do |field, _spec, _extractor|
+  location_codes = MarcExtractor.cached("852").collect_matching_lines(record) do |field, _spec, _extractor|
     holding_b = nil
     is_alma = alma_code_start_22?(field['8'])
     is_scsb = scsb_doc?(record['001'].value)
@@ -1294,9 +1293,8 @@
         holding_b += "$#{field['c']}" if field['c'] && is_alma
       end
     end
-    location_codes << holding_b
-    location_codes.compact!
-  end
+    holding_b
+  end.compact
   if location_codes.any?
     location_codes.uniq!
     ## need to go through any location code that isn't from voyager, thesis, or graphic arts