From ce153598da456e142e7856bd492b68cc5ea56d13 Mon Sep 17 00:00:00 2001 From: Bruce Miller Date: Tue, 4 Jul 2023 01:40:38 -0400 Subject: [PATCH] Evolve readBalanced to replace new readTokens as scan_toks analog; it may return comments; Cleanup API of readXTokens; it never returns comments; it has option for special case handling of \if arguments; packParameters no longer unwraps \noexpand'd tokens; Use thse new gullet methods to rewrite parameter types: DefPlain, GeneralText, XUntil, DefExpanded, ExpandedIfToken --- lib/LaTeXML/Core/Gullet.pm | 107 ++++++++++++++--------------- lib/LaTeXML/Core/Tokens.pm | 3 - lib/LaTeXML/Package/TeX.pool.ltxml | 26 +++---- 3 files changed, 61 insertions(+), 75 deletions(-) diff --git a/lib/LaTeXML/Core/Gullet.pm b/lib/LaTeXML/Core/Gullet.pm index 381ae50eb..141653fdb 100644 --- a/lib/LaTeXML/Core/Gullet.pm +++ b/lib/LaTeXML/Core/Gullet.pm @@ -314,30 +314,28 @@ sub unread { # Note that most tokens pass through here, so be Fast & Clean! readToken is folded in. # `Toplevel' processing, (if $toplevel is true), used at the toplevel processing by Stomach, # will step to the next input stream (Mouth) if one is available, -# If $commentsok is true, will also pass comments. # $toplevel is doing TWO distinct things. When true: # * If a mouth is exhausted, move on to the containing mouth to continue reading # * expand even protected defns, essentially this means expand "for execution" -# Note that, unlike readTokens, this does NOT defer expansion of \the & friends. +# Note that, unlike readBalanced, this does NOT defer expansion of \the & friends. # Also, \noexpand'd tokens effectively act ilke \relax +# For arguments to \if,\ifx, etc use $for_conditional true, +# which handles \noexpand and CS which have been \let to tokens specially. sub readXToken { - my ($self, $toplevel, $commentsok) = @_; + my ($self, $toplevel, $for_conditional) = @_; $toplevel = 1 unless defined $toplevel; my $autoclose = $toplevel; # Potentially, these should have distinct controls? my $for_evaluation = $toplevel; - return shift(@{ $$self{pending_comments} }) if $commentsok && @{ $$self{pending_comments} }; my ($token, $cc, $defn, $atoken, $atype, $ahidden); while (1) { while (($token = shift(@{ $$self{pushback} })) && $CATCODE_HOLD[$cc = $$token[1]]) { if ($cc == CC_COMMENT) { - return $token if $commentsok; push(@{ $$self{pending_comments} }, $token); } elsif ($cc == CC_MARKER) { $self->handleMarker($token); } } if (!defined $token) { # Else read from current mouth while (($token = $$self{mouth}->readToken()) && $CATCODE_HOLD[$cc = $$token[1]]) { if ($cc == CC_COMMENT) { - return $token if $commentsok; push(@{ $$self{pending_comments} }, $token); } elsif ($cc == CC_MARKER) { $self->handleMarker($token); } } } @@ -346,7 +344,8 @@ sub readXToken { return unless $autoclose && $$self{autoclose} && @{ $$self{mouthstack} }; $self->closeMouth; } # Next input stream. elsif (my $unexpanded = $$token[2]) { # Handle \noexpand; Inline get_dont_expand - return $token; } # returns something that acts like \relax + return ($for_conditional && ($$unexpanded[1] == CC_ACTIVE) ? $unexpanded : T_CS('\relax')); + } ## Wow!!!!! See TeX the Program \S 309 elsif (!$LaTeXML::ALIGN_STATE # SHOULD count nesting of { }!!! when SCANNED (not digested) && $LaTeXML::READING_ALIGNMENT @@ -354,21 +353,23 @@ sub readXToken { $self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); } ## Note: use general-purpose lookup, since we may reexamine $defn below elsif ($LaTeXML::Core::State::CATCODE_ACTIVE_OR_CS[$cc] - && defined($defn = $STATE->lookupMeaning($token)) - && ((ref $defn) ne 'LaTeXML::Core::Token') # an actual definition - && $$defn{isExpandable} - && ($for_evaluation || !$$defn{isProtected})) { # is this the right logic here? don't expand unless di - local $LaTeXML::CURRENT_TOKEN = $token; - my $r; - no warnings 'recursion'; - my @expansion = map { (($r = ref $_) eq 'LaTeXML::Core::Token' ? $_ - : ($r eq 'LaTeXML::Core::Tokens' ? @$_ - : Error('misdefined', $r, undef, "Expected a Token, got " . Stringify($_), - "in " . ToString($defn)) || T_OTHER(Stringify($_)))) } - $defn->invoke($self); - next unless @expansion; - # add the newly expanded tokens back into the gullet stream, in the ordinary case. - unshift(@{ $$self{pushback} }, @expansion); } + && defined($defn = $STATE->lookupMeaning($token))) { + if ((ref $defn) eq 'LaTeXML::Core::Token') { # \let to a token? Return it! + return ($for_conditional ? $defn : $token); } + elsif (!$$defn{isExpandable} # Not expandable or is protected + || ($$defn{isProtected} && !$for_evaluation)) { + return $token; } + else { + local $LaTeXML::CURRENT_TOKEN = $token; + my $r; + no warnings 'recursion'; + my @expansion = map { (($r = ref $_) eq 'LaTeXML::Core::Token' ? $_ + : ($r eq 'LaTeXML::Core::Tokens' ? @$_ + : Error('misdefined', $r, undef, "Expected a Token, got " . Stringify($_), + "in " . ToString($defn)) || T_OTHER(Stringify($_)))) } + $defn->invoke($self); + # add the newly expanded tokens back into the gullet stream, in the ordinary case. + unshift(@{ $$self{pushback} }, @expansion); } } elsif ($$token[1] == CC_CS && !(defined $defn)) { $STATE->generateErrorStub($self, $token); # cs SHOULD have defn by now; report early! return $token; } @@ -377,10 +378,12 @@ sub readXToken { } return; } # never get here. -# readTokens approximates TeX's scan_toks (but doesn't parse \def parameter lists) -# It expects to read a balanced {...} (requiring the opening { -# and optionally ($expand) expands while reading, but deferring \the and related. +# readBalanced approximates TeX's scan_toks (but doesn't parse \def parameter lists) +# and only optionally requires the openning "{". +# It may return comments in the token lists. +# it optionally ($expand) expands while reading, but deferring \the and related. # The $macrodef flag affects whether # parameters are "packed" for macro bodies. +# If $require_open is true, the opening T_BEGIN has not yet been read, and is required. our $DEFERRED_COMMANDS = { '\the' => 1, '\showthe' => 1, @@ -388,21 +391,24 @@ our $DEFERRED_COMMANDS = { '\detokenize' => 1 }; -sub readTokens { - my ($self, $macrodef, $expand) = @_; +sub readBalanced { + my ($self, $expanded, $macrodef, $require_open) = @_; local $LaTeXML::ALIGN_STATE = 1000000; my $startloc = ($$self{verbosity} > 0) && $self->getLocator; # Does we need to expand to get the { ??? - my $token = ($expand ? $self->readXToken(0, 1) : $self->readToken()); - if ((!$token) || ($$token[1] != CC_BEGIN)) { - Error('expected', '{', $self, "Expected opening '{'"); - return Tokens(); } + if ($require_open) { + my $token = ($expanded ? $self->readXToken(0) : $self->readToken()); + if ((!$token) || ($$token[1] != CC_BEGIN)) { + Error('expected', '{', $self, "Expected opening '{'"); + return Tokens(); } } my @tokens = (); my $level = 1; - my ($cc, $defn, $atoken, $atype, $ahidden); + my ($token, $cc, $defn, $atoken, $atype, $ahidden); # Inlined readToken (we'll keep comments in the result) while (1) { - push(@tokens, shift(@{ $$self{pending_comments} })) if @{ $$self{pending_comments} }; + if (@{ $$self{pending_comments} }) { + push(@tokens, @{ $$self{pending_comments} }); + $$self{pending_comments} = []; } # Examine pushback first while (($token = shift(@{ $$self{pushback} })) && $CATCODE_HOLD[$cc = $$token[1]]) { if ($cc == CC_COMMENT) { push(@tokens, $token); } @@ -432,7 +438,7 @@ sub readTokens { && (($atoken, $atype, $ahidden) = $self->isColumnEnd($token))) { $self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); } ## Note: use general-purpose lookup, since we may reexamine $defn below - elsif ($expand && + elsif ($expanded && $LaTeXML::Core::State::CATCODE_ACTIVE_OR_CS[$cc] && defined($defn = $STATE->lookupMeaning($token)) && ((ref $defn) ne 'LaTeXML::Core::Token') # an actual definition @@ -448,14 +454,20 @@ sub readTokens { $defn->invoke($self); next unless @expansion; # If a special \the type command, push the expansion directly into the result + # Well, almost directly: handle any MARKER tokens now, and possibly un-pack T_PARAM if ($$DEFERRED_COMMANDS{ $$defn{cs}[0] }) { - if ($macrodef) { # "unpack" # to cover the packParameters at end! - @expansion = map { ($$_[1] == CC_PARAM ? ($_, $_) : $_); } @expansion } - push(@tokens, @expansion); } - else { # otherwise, prepend to pushback to be expanded further. + foreach my $t (@expansion) { + my $cc = $$t[1]; + if ($cc == CC_MARKER) { $self->handleMarker($t); } + elsif (($cc == CC_PARAM) && $macrodef) { + push(@tokens, $t, $t); } # "unpack" to cover the packParameters at end! + else { + push(@tokens, $t); } } + } + else { # otherwise, prepend to pushback to be expanded further. unshift(@{ $$self{pushback} }, @expansion); } } else { - if ($expand && ($$token[1] == CC_CS) && !(defined $defn)) { + if ($expanded && ($$token[1] == CC_CS) && !(defined $defn)) { $STATE->generateErrorStub($self, $token); } # cs SHOULD have defn by now; report early! push(@tokens, $token); } # just return it } @@ -463,7 +475,7 @@ sub readTokens { # TODO: The current implementation has a limitation where if the balancing end is in a different mouth, # it will not be recognized. my $loc_message = $startloc ? ("Started at " . ToString($startloc)) : ("Ended at " . ToString($self->getLocator)); - Error('expected', "}", $self, "Gullet->readTokens ran out of input in an unbalanced state.", + Error('expected', "}", $self, "Gullet->readBalanced ran out of input in an unbalanced state.", $loc_message); } return ($macrodef ? Tokens(@tokens)->packParameters : Tokens(@tokens)); } @@ -537,21 +549,6 @@ sub skipFiller { } return; } -# Read a sequence of tokens balanced in {} -# assuming the { has already been read. -# Returns a Tokens list of the balanced sequence, omitting the closing } -our @CATCODE_BALANCED_INTERESTING = ( - 0, 1, 1, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 1, 0, 0); - -sub readBalanced { - my ($self, $expanded) = @_; - $self->unread(T_BEGIN); # Since we've already read the {, but readTokens requires it. - return $self->readTokens(undef, $expanded); } - sub ifNext { my ($self, $token) = @_; if (my $tok = $self->readToken()) { diff --git a/lib/LaTeXML/Core/Tokens.pm b/lib/LaTeXML/Core/Tokens.pm index 5ad7c2b40..2c2d5f053 100644 --- a/lib/LaTeXML/Core/Tokens.pm +++ b/lib/LaTeXML/Core/Tokens.pm @@ -134,9 +134,6 @@ sub packParameters { # e.g. \detokenize{#,} is legal, while \textbf{#,} is not Error('misdefined', 'expansion', undef, "Parameter has a malformed arg, should be #1-#9 or ##. ", "In expansion " . ToString($self)); } } - elsif (my $inner = $$t[2]) { # unwrap \noexpand tokens - $repacked = 1; - push(@rescanned, ($$inner[2] || $inner)); } else { push(@rescanned, $t); } } return ($repacked ? bless [@rescanned], 'LaTeXML::Core::Tokens' : $self); } diff --git a/lib/LaTeXML/Package/TeX.pool.ltxml b/lib/LaTeXML/Package/TeX.pool.ltxml index 3a349f82e..87ea02c3b 100644 --- a/lib/LaTeXML/Package/TeX.pool.ltxml +++ b/lib/LaTeXML/Package/TeX.pool.ltxml @@ -117,7 +117,7 @@ DefParameterType('Plain', sub { DefParameterType('DefPlain', sub { my ($gullet, $inner) = @_; - my $value = $gullet->readTokens(1, 0); + my $value = $gullet->readBalanced(0, 1, 1); if ($inner) { ($value) = $inner->reparseArgument($gullet, $value); } return $value; }, @@ -152,7 +152,8 @@ DefParameterType('Optional', sub { DefParameterType('GeneralText', sub { my ($gullet) = @_; $gullet->unread($gullet->readXToken); # Force expansion to skip before required { - return $gullet->readTokens(0, 0); }); + + return $gullet->readBalanced(0, 0, 1); }); DefParameterType('Until', sub { my ($gullet, $until) = @_; @@ -231,7 +232,7 @@ DefParameterType('XUntil', sub { # but expanding \the-like commands only once. DefParameterType('Expanded', sub { my ($gullet) = @_; - $gullet->readTokens(0, 1); }, + $gullet->readBalanced(1, 0, 1); }, reversion => sub { my ($arg) = @_; (T_BEGIN, Revert($arg), T_END); }); @@ -242,7 +243,7 @@ DefParameterType('Expanded', sub { # and also packing # parameters DefParameterType('DefExpanded', sub { my ($gullet) = @_; - return $gullet->readTokens(1, 1); }, + return $gullet->readBalanced(1, 1, 1); }, reversion => sub { my ($arg) = @_; (T_BEGIN, Revert($arg), T_END); }); @@ -773,21 +774,12 @@ DefConditionalI('\ifmmode', undef, sub { LookupValue('IN_MATH'); }); DefParameterType('ExpandedIfToken', sub { my ($gullet) = @_; - my $token = $gullet->readXToken(0); - # Also resolve \let variants: - my $meaning = $STATE->lookupMeaning($token); - if ($meaning && ref $meaning eq 'LaTeXML::Core::Token') { - $token = $meaning; } + my $token = $gullet->readXToken(0, 1); if (!$token) { - Error('expected', 'ExpandedIfToken', $gullet, "conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty"); + Error('expected', 'ExpandedIfToken', $gullet, + "conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty"); $token = T_CS('\@empty'); } - if ($$token[2]) { # marked dont_expand - if ($$token[2][1] == CC_ACTIVE) { # treat as active character, if originally such - return $$token[2]; } - else { # otherwise, treat as relax for comparisons - return T_CS('\relax'); } } - else { # normal case, treat token as-is - return $token; } }); + return $token; }); DefConditional('\if ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCharcode == $_[2]->getCharcode; }); DefConditional('\ifcat ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCatcode == $_[2]->getCatcode; });