Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional Comment accommodations #2141

Merged
merged 3 commits into from
Jul 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions lib/LaTeXML/Core/Document.pm
Original file line number Diff line number Diff line change
Expand Up @@ -678,12 +678,19 @@ sub insertComment {
my ($self, $text) = @_;
chomp($text);
$text =~ s/\-\-+/__/g;
$self->closeText_internal; # Close any open text node.
my $comment;
my $prev = $$self{node}->lastChild;
my $prevtype = $prev && $prev->nodeType;
if ($$self{node}->nodeType == XML_DOCUMENT_NODE) {
push(@{ $$self{pending} }, $comment = $$self{document}->createComment(' ' . $text . ' ')); }
elsif (($comment = $$self{node}->lastChild) && ($comment->nodeType == XML_COMMENT_NODE)) {
elsif ($prevtype && ($prevtype == XML_COMMENT_NODE)) {
$comment = $prev;
$comment->setData($comment->data . "\n " . $text . ' '); }
elsif ($prevtype && ($prevtype == XML_TEXT_NODE)) { # Put comment BEFORE text node
if (($comment = $prev->previousSibling) && ($comment->nodeType == XML_COMMENT_NODE)) {
$comment = $$self{node}->appendChild($$self{document}->createComment(' ' . $text . ' ')); }
else {
$comment = $$self{node}->insertBefore($$self{document}->createComment(' ' . $text . ' '), $prev); } }
else {
$comment = $$self{node}->appendChild($$self{document}->createComment(' ' . $text . ' ')); }
return $comment; }
Expand Down
2 changes: 1 addition & 1 deletion lib/LaTeXML/Core/Parameter.pm
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ sub digest {
my ($igullet) = @_;
$igullet->unread($value);
my @tokens = ();
while (defined(my $token = $igullet->getPendingComment || $igullet->readXToken(1, 1))) {
while (defined(my $token = $igullet->getPendingComment || $igullet->readXToken(1))) {
push(@tokens, $token); }
$value = Tokens(@tokens);
$value = $value->neutralize; }); } }
Expand Down
4 changes: 2 additions & 2 deletions lib/LaTeXML/Core/Stomach.pm
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ sub digestNextBody {
my $alignment = $STATE->lookupValue('Alignment');
my @aug = ();

while (defined($token = $$self{gullet}->getPendingComment || $$self{gullet}->readXToken(1, 1))) {
while (defined($token = $$self{gullet}->getPendingComment || $$self{gullet}->readXToken(1))) {
if ($alignment && scalar(@LaTeXML::LIST) && (Equals($token, T_ALIGN) ||
Equals($token, T_CS('\cr')) || Equals($token, T_CS('\hidden@cr')) ||
Equals($token, T_CS('\hidden@crcr')))) {
Expand Down Expand Up @@ -132,7 +132,7 @@ sub digest {
my $initdepth = scalar(@{ $$self{boxing} });
local @LaTeXML::LIST = ();
while (defined(my $token =
$$self{gullet}->getPendingComment || $$self{gullet}->readXToken(1, 1))) {
$$self{gullet}->getPendingComment || $$self{gullet}->readXToken(1))) {
push(@LaTeXML::LIST, $self->invokeToken($token));
last if $initdepth > scalar(@{ $$self{boxing} }); } # if we've closed the initial mode.
List(@LaTeXML::LIST, mode => ($ismath ? 'math' : 'text'));
Expand Down
12 changes: 8 additions & 4 deletions lib/LaTeXML/Core/Tokens.pm
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,21 @@ sub revert {
# NOT for creating valid TeX (use revert or UnTeX for that!)
sub toString {
my ($self) = @_;
return join('', map { $_->toString } @$self); }
return join('', map { ($$_[1] == CC_COMMENT ? '' : $_->toString) } @$self); }

# Methods for overloaded ops.

# Compare two Tokens lists, ignoring comments & markers
sub equals {
my ($a, $b) = @_;
return 0 unless defined $b && (ref $a) eq (ref $b);
my @a = @$a;
my @b = @$b;
while (@a && @b && ($a[0]->equals($b[0]))) {
shift(@a); shift(@b); }
return !(@a || @b); }
while (@a || @b) {
if (@a && (($a[0]->[1] == CC_COMMENT) || ($a[0]->[1] == CC_MARKER))) { shift(@a); next; }
if (@b && (($b[0]->[1] == CC_COMMENT) || ($b[0]->[1] == CC_MARKER))) { shift(@b); next; }
return unless @a && @b && shift(@a)->equals(shift(@b)); }
return 1; }

sub stringify {
my ($self) = @_;
Expand Down
45 changes: 25 additions & 20 deletions lib/LaTeXML/Package/TeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,9 @@ DefParameterType('DefToken', sub {
my ($gullet) = @_;
my $token = $gullet->readToken;
while ($token && ($token->getCatcode == CC_BEGIN)) {
my @toks = grep { !$_->equals(T_SPACE) } $gullet->readBalanced->unlist;
my $cc;
my @toks = grep { ($cc = $$_[1]) && ($cc != CC_SPACE) && ($cc != CC_COMMENT); }
$gullet->readBalanced->unlist;
$token = shift(@toks);
$gullet->unread(@toks); }
$token; },
Expand Down Expand Up @@ -3609,8 +3611,8 @@ sub pruneEmpty {
my ($document, $node) = @_;
# In some cases we could have e.g. a \noindent followed by a {table},
# in which case we end up with an empty ltx:para which we can prune.
if (!scalar($node->childNodes)) {
my $prev = $node->previousSibling;
if (!scalar(element_nodes($node))) {
my $prev = element_prev($node);
if (!$prev || ($document->getNodeQName($prev) ne 'ltx:para')) { # If $node WAS the 1st child
$document->addClass($node->parentNode, 'ltx_pruned_first'); }
$node->unlinkNode; }
Expand Down Expand Up @@ -3899,7 +3901,9 @@ sub cleanup_Math {
push(@texts, $space); } } }
else { # is XMText
foreach my $child ($xmnode->childNodes) {
if ($child->nodeType != XML_ELEMENT_NODE) { # Make sure we've got an element
my $t = $child->nodeType;
if ($t == XML_COMMENT_NODE) { }
elsif ($t != XML_ELEMENT_NODE) { # Make sure we've got an element
push(@texts, ['ltx:text', { class => 'ltx_markedasmath' }, $child]); }
else {
$document->addClass($child, 'ltx_markedasmath');
Expand Down Expand Up @@ -4327,7 +4331,8 @@ sub scriptHandler {
# and whether there are conflicting preceding scripts, which is an error
# Parsing is too late!
while (my $prev = pop(@LaTeXML::LIST)) {
if ($prev->getProperty('isSpace')) {
if (($prev->getProperty('isSpace'))
|| (ref $prev eq 'LaTeXML::Core::Comment')) {
$prevspace = 1; # a space avoids double-scripts
unshift(@putback, $prev); # put back? assuming it will add rpadding to previous???
next; }
Expand Down Expand Up @@ -4772,8 +4777,9 @@ DefMathLigature(matcher => sub { my ($document, $node) = @_;
&& ((($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN')
|| (($node->getAttribute('role') || 'UNKNOWN') eq 'NUMBER'))
&& (($s = $node->textContent . $s) =~ /^[0-9a-zA-Z]+$/)) {
$n++; $string = $s;
$node = $node->previousSibling; }
$string = $s;
do { $node = $node->previousSibling; $n++;
} while $node && ($node->nodeType == XML_COMMENT_NODE); }
(($string =~ /^[a-zA-Z]/) && ($n > 1) ? ($n, $string, role => 'UNKNOWN', meaning => undef) : undef);
} });

Expand Down Expand Up @@ -4831,16 +4837,15 @@ DefMathLigature(matcher => sub { my ($document, $node) = @_;
last; } }
# OR if XMHint with 0 <= width <= thickmuskip (5mu == ?)
elsif ($qn eq 'ltx:XMHint') {
## if (($w = $node->getAttribute('width')) && ($w=Dimension($w)->valueOf) && ($w >= 0) && ($w <= $skip)) {
## $string = $text . $string; } # Add to string, but omit from number
my $s;
if (($s = $node->getAttribute('name')) && ($s = $space_chars{$s})) {
$string = $s . $string; }
else {
last; } }
else {
last; }
$n++; $node = $node->previousSibling; }
do { $node = $node->previousSibling; $n++;
} while $node && ($node->nodeType == XML_COMMENT_NODE); }
if (($n > 1) && ($number =~ /\d/)) {
($n, $string, meaning => $number, role => 'NUMBER'); } });

Expand Down Expand Up @@ -4922,22 +4927,22 @@ DefPrimitive('\wlog{}', sub {
return; },
locked => 1);
# From plain.tex
DefPrimitive('\newcount Token', sub {
DefPrimitive('\newcount DefToken', sub {
DefRegisterI($_[1], undef, Number(0), allocate => '\count'); });
DefPrimitive('\newdimen Token', sub {
DefPrimitive('\newdimen DefToken', sub {
DefRegisterI($_[1], undef, Dimension(0), allocate => '\dimen'); });
DefPrimitive('\newskip Token', sub {
DefPrimitive('\newskip DefToken', sub {
DefRegisterI($_[1], undef, Glue(0), allocate => '\skip'); });
DefPrimitive('\newmuskip Token', sub {
DefPrimitive('\newmuskip DefToken', sub {
DefRegisterI($_[1], undef, MuGlue(0), allocate => '\muskip'); });
AssignValue(allocated_boxes => 0);
DefPrimitive('\newbox DefToken', sub {
my $n = LookupValue('allocated_boxes');
AssignValue(allocated_boxes => $n + 1, 'global');
AssignValue("box$n", List());
DefRegisterI($_[1], undef, Number($n), readonly => 1); });
DefPrimitive('\newhelp Token {}', sub { AssignValue(ToString($_[1]) => $_[2]); });
DefPrimitive('\newtoks Token', sub { DefRegisterI($_[1], undef, Tokens()); });
DefPrimitive('\newhelp DefToken {}', sub { AssignValue(ToString($_[1]) => $_[2]); });
DefPrimitive('\newtoks DefToken', sub { DefRegisterI($_[1], undef, Tokens()); });
# the next 4 actually work by doing a \chardef instead of \countdef, etc.
# which means they actually work quite differently
DefPrimitive('\alloc@@ {}', sub {
Expand All @@ -4947,10 +4952,10 @@ DefPrimitive('\alloc@@ {}', sub {
$n = $n->valueOf if ref $n;
AssignValue($c => $n + 1, 'global');
AssignRegister('\allocationnumber' => Number($n), 'global'); });
DefMacro('\newread Token', '\alloc@@{read}\global\chardef#1=\allocationnumber');
DefMacro('\newwrite Token', '\alloc@@{write}\global\chardef#1=\allocationnumber');
DefMacro('\newfam Token', '\alloc@@{fam}\global\chardef#1=\allocationnumber');
DefMacro('\newlanguage Token', '\alloc@@{language}\global\chardef#1=\allocationnumber');
DefMacro('\newread DefToken', '\alloc@@{read}\global\chardef#1=\allocationnumber');
DefMacro('\newwrite DefToken', '\alloc@@{write}\global\chardef#1=\allocationnumber');
DefMacro('\newfam DefToken', '\alloc@@{fam}\global\chardef#1=\allocationnumber');
DefMacro('\newlanguage DefToken', '\alloc@@{language}\global\chardef#1=\allocationnumber');

DefMacro('\e@alloc{}{}{}{}{}{}',
'\global\advance#3\@ne
Expand Down
Binary file modified t/tokenize/ligatures.pdf
Binary file not shown.
23 changes: 23 additions & 0 deletions t/tokenize/ligatures.tex
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,29 @@ \section{Text Ligatures}
In interjection --- like this --- gets em-dash.

A ``quote'' like this.

\section{Ignore comments}
%foo
`%bar
`Hopefully Quoted%baz
'%qux
'

A number
\ensuremath{%foo
12345.%bar
%baz
67890%qux
}
?

An --- emdash,
%foo
-%bar
-%baz
-%qux
perhaps?

\section{Typewriter non-Ligatures}
\texttt{LDots\ldots, versus dots ...}

Expand Down
30 changes: 27 additions & 3 deletions t/tokenize/ligatures.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,39 @@ In interjection — like this — gets em-dash.</p>
<tag role="refnum">2</tag>
<tag role="typerefnum">§2</tag>
</tags>
<title><tag close=" ">2</tag>Typewriter non-Ligatures</title>
<title><tag close=" ">2</tag>Ignore comments</title>
<para xml:id="S2.p1">
<p><text font="typewriter">LDots…, versus dots ...</text></p>
<p>“Hopefully Quoted”</p>
</para>
<para xml:id="S2.p2">
<p>A number
<Math mode="inline" tex="12345.67890" text="12345.67890" xml:id="S2.p2.m1">
<XMath>
<XMTok meaning="12345.67890" role="NUMBER">12345.67890</XMTok>
</XMath>
</Math>
?</p>
</para>
<para xml:id="S2.p3">
<p>An — emdash,
—perhaps?</p>
</para>
</section>
<section inlist="toc" xml:id="S3">
<tags>
<tag>3</tag>
<tag role="refnum">3</tag>
<tag role="typerefnum">§3</tag>
</tags>
<title><tag close=" ">3</tag>Typewriter non-Ligatures</title>
<para xml:id="S3.p1">
<p><text font="typewriter">LDots…, versus dots ...</text></p>
</para>
<para xml:id="S3.p2">
<p><text font="typewriter">A range: 1--10 gets en-dash.</text>
<text font="typewriter">In interjection --- like this --- gets em-dash.</text></p>
</para>
<para xml:id="S2.p3">
<para xml:id="S3.p3">
<p><text font="typewriter">A ‘‘quote’’ like this.</text></p>
</para>
</section>
Expand Down