From b3075e0fde60f580b86ebb4fffc8e92002211587 Mon Sep 17 00:00:00 2001
From: Nick Nicholas
Date: Sat, 19 Oct 2024 18:52:19 +1100
Subject: [PATCH] boilerplate copyright appended content processing:
https://github.com/metanorma/metanorma-iso/issues/730
---
Gemfile.devel | 4 +
lib/isodoc/iso/html/word_iso_intro-dis.html | 6 +-
lib/isodoc/iso/html/word_iso_intro.html | 5 +-
lib/isodoc/iso/word_cleanup.rb | 50 +++--
lib/metanorma/iso/boilerplate-fr.adoc | 4 +
lib/metanorma/iso/boilerplate-ru.adoc | 3 +
lib/metanorma/iso/boilerplate.adoc | 4 +
spec/isodoc/postproc_spec.rb | 93 +++++---
spec/isodoc/word_dis_spec.rb | 226 +++++++++++++-------
9 files changed, 266 insertions(+), 129 deletions(-)
create mode 100644 Gemfile.devel
diff --git a/Gemfile.devel b/Gemfile.devel
new file mode 100644
index 00000000..1cb46944
--- /dev/null
+++ b/Gemfile.devel
@@ -0,0 +1,4 @@
+gem "metanorma-standoc", git: "https://github.com/metanorma/metanorma-standoc", branch: "features/boilerplate-append"
+
+
+
diff --git a/lib/isodoc/iso/html/word_iso_intro-dis.html b/lib/isodoc/iso/html/word_iso_intro-dis.html
index aade9f23..f2593287 100644
--- a/lib/isodoc/iso/html/word_iso_intro-dis.html
+++ b/lib/isodoc/iso/html/word_iso_intro-dis.html
@@ -1,7 +1,9 @@
{% if doctype == "Amendment" or doctype == "Technical Corrigendum" %}
{% else %}
-
-
+
{% endif %}
diff --git a/lib/isodoc/iso/html/word_iso_intro.html b/lib/isodoc/iso/html/word_iso_intro.html
index e8630172..e2c273f3 100644
--- a/lib/isodoc/iso/html/word_iso_intro.html
+++ b/lib/isodoc/iso/html/word_iso_intro.html
@@ -1,8 +1,11 @@
+
{% if doctype == "Amendment" or doctype == "Technical Corrigendum" %}
diff --git a/lib/isodoc/iso/word_cleanup.rb b/lib/isodoc/iso/word_cleanup.rb
index 0424fd75..5521c504 100644
--- a/lib/isodoc/iso/word_cleanup.rb
+++ b/lib/isodoc/iso/word_cleanup.rb
@@ -110,26 +110,44 @@ def authority_hdr_cleanup(docxml)
end
def authority_cleanup(docxml)
- insert = docxml.at("//div[@id = 'boilerplate-license-destination']")
- auth = docxml.at("//div[@class = 'boilerplate-license']")&.remove
- auth&.xpath(".//p[not(@class)]")&.each { |p| p["class"] = "zzWarning" }
- auth and insert and insert.children = auth
- insert = docxml.at("//div[@id = 'boilerplate-copyright-destination']")
- auth = docxml.at("//div[@class = 'boilerplate-copyright']")&.remove
- auth&.xpath(".//p[not(@class)]")&.each do |p|
- p["class"] = "zzCopyright"
- end
- auth&.xpath(".//p[@id = 'boilerplate-message']")&.each do |p|
+ authority_license_cleanup(docxml)
+ authority_copyright_cleanup(docxml)
+ coverpage_note_cleanup(docxml)
+ end
+
+ def authority_copyright_cleanup(docxml)
+ auth = docxml.at("//div[@class = 'boilerplate-copyright']") or return
+ authority_copyright_style(auth)
+ authority_copyright_populate(docxml, auth)
+ end
+
+ def authority_copyright_style(auth)
+ auth.xpath(".//p[not(@class)]").each { |p| p["class"] = "zzCopyright" }
+ auth.xpath(".//p[@id = 'boilerplate-message']").each do |p|
p["class"] = "zzCopyright1"
end
- auth&.xpath(".//p[@id = 'boilerplate-address']")&.each do |p|
+ auth.xpath(".//p[@id = 'boilerplate-address']").each do |p|
p["class"] = "zzAddress"
end
- auth&.xpath(".//p[@id = 'boilerplate-place']")&.each do |p|
+ auth.xpath(".//p[@id = 'boilerplate-place']").each do |p|
p["class"] = "zzCopyright1"
end
- auth and insert and insert.children = auth
- coverpage_note_cleanup(docxml)
+ end
+
+ def authority_copyright_populate(doc, auth)
+ i = doc.at("//div[@id = 'boilerplate-copyright-default-destination']")
+ j = doc.at("//div[@id = 'boilerplate-copyright-append-destination']")
+ default = auth.at(".//div[@id = 'boilerplate-copyright-default']")
+ default and i and i.children = default.remove
+ j and j.children = auth.remove
+ end
+
+ def authority_license_cleanup(docxml)
+ dest = docxml.at("//div[@id = 'boilerplate-license-destination']") or
+ return
+ auth = docxml.at("//div[@class = 'boilerplate-license']") or return
+ auth.xpath(".//p[not(@class)]").each { |p| p["class"] = "zzWarning" }
+ dest.children = auth.remove
end
def word_cleanup(docxml)
@@ -142,9 +160,7 @@ def word_cleanup(docxml)
# supply missing annex title
def make_WordToC(docxml, level)
toc = ""
- if source = docxml.at("//div[@class = 'TOC']")
- toc = to_xml(source.children)
- end
+ s = docxml.at("//div[@class = 'TOC']") and toc = to_xml(s.children)
xpath = (1..level).each.map { |i| "//h#{i}" }.join (" | ")
docxml.xpath(xpath).each do |h|
x = ""
diff --git a/lib/metanorma/iso/boilerplate-fr.adoc b/lib/metanorma/iso/boilerplate-fr.adoc
index b00fbd4b..4307bc18 100644
--- a/lib/metanorma/iso/boilerplate-fr.adoc
+++ b/lib/metanorma/iso/boilerplate-fr.adoc
@@ -1,6 +1,7 @@
== copyright-statement
{% if document_scheme == "1951" %}
+[[boilerplate-copyright-default]]
=== REPRODUCTION INTERDITE
[[boilerplate-message]]
@@ -20,6 +21,7 @@ nationales de normalisation.
{% elsif document_scheme == "1972" or document_scheme == "1987" %}
+[[boilerplate-copyright-default]]
=== {blank}
[[boilerplate-year]]
@@ -31,6 +33,7 @@ Publié en Suisse
{% elsif document_scheme == "1989" %}
+[[boilerplate-copyright-default]]
=== {blank}
[[boilerplate-year]]
@@ -53,6 +56,7 @@ Imprimé en Suisse
{% else %}
+[[boilerplate-copyright-default]]
=== {% if stage_int >= 40 %}DOCUMENT PROTÉGÉ PAR COPYRIGHT{%else%}{blank}{%endif%}
[[boilerplate-year]]
diff --git a/lib/metanorma/iso/boilerplate-ru.adoc b/lib/metanorma/iso/boilerplate-ru.adoc
index 2528d432..aaf23c4b 100644
--- a/lib/metanorma/iso/boilerplate-ru.adoc
+++ b/lib/metanorma/iso/boilerplate-ru.adoc
@@ -2,6 +2,7 @@
{% if document_scheme == "1972" or document_scheme == "1987" %}
+[[boilerplate-copyright-default]]
=== {blank}
[[boilerplate-year]]
@@ -12,6 +13,7 @@
{% elsif document_scheme == "1989" %}
+[[boilerplate-copyright-default]]
=== {blank}
[[boilerplate-year]]
@@ -34,6 +36,7 @@ X.400 c=ch; a=400net;
{% else %}
+[[boilerplate-copyright-default]]
=== {% if stage_int >= 40 %}ДОКУМЕНТ, ОХРАНЯЕМЫЙ АВТОРСКИМ ПРАВОМ{% else %}{blank}{% endif %}
[[boilerplate-year]]
diff --git a/lib/metanorma/iso/boilerplate.adoc b/lib/metanorma/iso/boilerplate.adoc
index 5b971943..5292d1cc 100644
--- a/lib/metanorma/iso/boilerplate.adoc
+++ b/lib/metanorma/iso/boilerplate.adoc
@@ -2,6 +2,7 @@
{% if document_scheme == "1951" %}
+[[boilerplate-copyright-default]]
=== COPYRIGHT RESERVED
[[boilerplate-message]]
@@ -20,6 +21,7 @@ Also issued in French and Russian. Copies to be obtained through the national st
{% elsif document_scheme == "1972" or document_scheme == "1987" %}
+[[boilerplate-copyright-default]]
=== {blank}
[[boilerplate-year]]
@@ -31,6 +33,7 @@ Printed in Switzerland
{% elsif document_scheme == "1989" %}
+[[boilerplate-copyright-default]]
=== {blank}
[[boilerplate-year]]
@@ -53,6 +56,7 @@ Printed in Switzerland
{% else %}
+[[boilerplate-copyright-default]]
=== {% if stage_int >= 40 %}COPYRIGHT PROTECTED DOCUMENT{% else %}{blank}{% endif %}
[[boilerplate-year]]
diff --git a/spec/isodoc/postproc_spec.rb b/spec/isodoc/postproc_spec.rb
index 1d216fff..a72cc311 100644
--- a/spec/isodoc/postproc_spec.rb
+++ b/spec/isodoc/postproc_spec.rb
@@ -819,7 +819,7 @@
-
+
© ISO 2019, Published in Switzerland
I am the Walrus.
ISO copyright office
@@ -835,6 +835,9 @@
www.iso.org
+
+ Is there anybody out there?
+
@@ -862,7 +865,7 @@
-
+
© ISO 2019, Published in Switzerland
I am the Walrus.
@@ -880,6 +883,9 @@
www.iso.org
+
+ Is there anybody out there?
+
@@ -897,7 +903,8 @@
OUTPUT
- expect(Xml::C14n.format(IsoDoc::Iso::PresentationXMLConvert.new(WORD_HTML_CSS.dup
+ expect(Xml::C14n.format(IsoDoc::Iso::PresentationXMLConvert
+ .new(WORD_HTML_CSS.dup
.merge(presxml_options))
.convert("test", input, true))
.sub(%r{.*}m, ""))
@@ -909,45 +916,65 @@
.convert("test", presxml, false)
word = File.read("test.html", encoding: "UTF-8")
- expect(strip_guid(word)).to include('Warning for Stuff
')
+ expect(strip_guid(word))
+ .to include('Warning for Stuff
')
expect(word).to include("I am the Walrus.")
FileUtils.rm_rf "test.doc"
IsoDoc::Iso::WordConvert.new(WORD_HTML_CSS.dup)
.convert("test", presxml, false)
word = File.read("test.doc", encoding: "UTF-8")
- expect(Xml::C14n.format(word
- .sub(%r{^.*}m,
- '
')
- .sub(%r{
.*$}m, "
")))
+ contents = word.sub(%r{^.*.*$}m, "")
+ contents = Nokogiri::XML(contents)
+ .at("//div[a/@id = 'boilerplate-copyright-destination']")
+ expect(Xml::C14n.format(contents.to_xml))
.to be_equivalent_to Xml::C14n.format(<<~"OUTPUT")
-
+
+
+
+
+
+
+
+
+
+ © ISO 2019, Published in Switzerland
+
+
+
+ I am the Walrus.
+
+
+
+ ISO copyright office
+
+
+
+ ISO copyright office
+
+ Ch. de Blandonnet 8 ?~@? CP 401
+
+ CH-1214 Vernier, Geneva, Switzerland
+
+ Phone: +41 22 749 01 11
+
+ Email: copyright@iso.org
+
+ www.iso.org
+
+
+
+
-
- © ISO 2019, Published in Switzerland#{' '}
-
- I am the Walrus.#{' '}
-
- ISO copyright office
-
- ISO copyright office
-
-
- Ch. de Blandonnet 8 ?~@? CP 401
-
-
- CH-1214 Vernier, Geneva, Switzerland
-
-
- Phone: +41 22 749 01 11
-
-
- Email: copyright@iso.org
-
-
- www.iso.org#{' '}
+
+
+
+
+
Is there anybody out there?
+
+
-
+
OUTPUT
expect(word).to include('This document is not ' \
"an ISO International Standard")
diff --git a/spec/isodoc/word_dis_spec.rb b/spec/isodoc/word_dis_spec.rb
index 2fd50163..3ac3712c 100644
--- a/spec/isodoc/word_dis_spec.rb
+++ b/spec/isodoc/word_dis_spec.rb
@@ -164,27 +164,33 @@
INPUT
word = <<~OUTPUT
-
-
-
-
-
-
-
-
-
-
-
+
OUTPUT
FileUtils.rm_f "test.doc"
@@ -213,6 +219,12 @@
@@ -308,6 +320,12 @@
@@ -392,6 +410,12 @@
@@ -1805,7 +1829,8 @@
- COPYRIGHT PROTECTED DOCUMENT
+
+ COPYRIGHT PROTECTED DOCUMENT
© ISO 2019, Published in Switzerland
I am the Walrus.
@@ -1823,6 +1848,9 @@
www.iso.org
+
+ Is there anybody out there?
+
@@ -1844,36 +1872,59 @@
IsoDoc::Iso::WordConvert.new(wordstylesheet: "spec/assets/word.css")
.convert("test", presxml, false)
word = File.read("test.doc", encoding: "UTF-8")
- expect(Xml::C14n.format(word
- .sub(%r{^.*}m,
- '
')
- .sub(%r{
.*$}m, "
")))
+ contents = word.sub(%r{^.*.*$}m, "")
+ contents = Nokogiri::XML(contents)
+ .at("//div[a/@id = 'boilerplate-copyright-destination']")
+ expect(Xml::C14n.format(contents.to_xml))
.to be_equivalent_to Xml::C14n.format(<<~OUTPUT)
-
+
+
-
-
- © ISO 2019, Published in Switzerland
-
-
-
- I am the Walrus.
-
-
-
- ISO copyright office
-
-
-
- ISO copyright office
-
-
Ch. de Blandonnet 8 ?~@? CP 401
-
CH-1214 Vernier, Geneva, Switzerland
-
Phone: +41 22 749 01 11
-
Email: copyright@iso.org
-
www.iso.org
+
+
+
+
+
+ © ISO 2019, Published in Switzerland
+
+
+
+ I am the Walrus.
+
+
+
+ ISO copyright office
+
+
+
+ ISO copyright office
+
+
+ Ch. de Blandonnet 8 ?~@? CP 401
+
+
+ CH-1214 Vernier, Geneva, Switzerland
+
+
+ Phone: +41 22 749 01 11
+
+
+ Email: copyright@iso.org
+
+
+ www.iso.org
+
-
+
+
+
+
+
+
Is there anybody out there?
+
+
+
+
OUTPUT
FileUtils.rm_f "test.doc"
@@ -1882,36 +1933,59 @@
presxml.sub(%r{
50},
"
6000"), false)
word = File.read("test.doc", encoding: "UTF-8")
- expect(Xml::C14n.format(word
- .sub(%r{^.*
}m,
- '
')
- .sub(%r{
.*$}m, "
")))
+ contents = word.sub(%r{^.*.*$}m, "")
+ contents = Nokogiri::XML(contents)
+ .at("//div[a/@id = 'boilerplate-copyright-destination']")
+ expect(Xml::C14n.format(contents.to_xml))
.to be_equivalent_to Xml::C14n.format(<<~OUTPUT)
-
+
+
-
-
- © ISO 2019, Published in Switzerland
-
-
-
- I am the Walrus.
-
-
-
- ISO copyright office
-
-
-
- ISO copyright office
-
-
Ch. de Blandonnet 8 ?~@? CP 401
-
CH-1214 Vernier, Geneva, Switzerland
-
Phone: +41 22 749 01 11
-
Email: copyright@iso.org
-
www.iso.org
+
+
+
+
+
+ © ISO 2019, Published in Switzerland
+
+
+
+ I am the Walrus.
+
+
+
+ ISO copyright office
+
+
+
+ ISO copyright office
+
+
+ Ch. de Blandonnet 8 ?~@? CP 401
+
+
+ CH-1214 Vernier, Geneva, Switzerland
+
+
+ Phone: +41 22 749 01 11
+
+
+ Email: copyright@iso.org
+
+
+ www.iso.org
+
-
+
+
+
+
+
+
Is there anybody out there?
+
+
+
+
OUTPUT
end