Skip to content

Commit

Permalink
Evolve readBalanced to replace new readTokens as scan_toks analog; it…
Browse files Browse the repository at this point in the history
… may return comments; Cleanup API of readXTokens; it never returns comments; it has option for special case handling of \if arguments; packParameters no longer unwraps \noexpand'd tokens; Use thse new gullet methods to rewrite parameter types: DefPlain, GeneralText, XUntil, DefExpanded, ExpandedIfToken
  • Loading branch information
brucemiller committed Jul 4, 2023
1 parent b325879 commit ce15359
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 75 deletions.
107 changes: 52 additions & 55 deletions lib/LaTeXML/Core/Gullet.pm
Original file line number Diff line number Diff line change
Expand Up @@ -314,30 +314,28 @@ sub unread {
# Note that most tokens pass through here, so be Fast & Clean! readToken is folded in.
# `Toplevel' processing, (if $toplevel is true), used at the toplevel processing by Stomach,
# will step to the next input stream (Mouth) if one is available,
# If $commentsok is true, will also pass comments.
# $toplevel is doing TWO distinct things. When true:
# * If a mouth is exhausted, move on to the containing mouth to continue reading
# * expand even protected defns, essentially this means expand "for execution"
# Note that, unlike readTokens, this does NOT defer expansion of \the & friends.
# Note that, unlike readBalanced, this does NOT defer expansion of \the & friends.
# Also, \noexpand'd tokens effectively act ilke \relax
# For arguments to \if,\ifx, etc use $for_conditional true,
# which handles \noexpand and CS which have been \let to tokens specially.
sub readXToken {
my ($self, $toplevel, $commentsok) = @_;
my ($self, $toplevel, $for_conditional) = @_;
$toplevel = 1 unless defined $toplevel;
my $autoclose = $toplevel; # Potentially, these should have distinct controls?
my $for_evaluation = $toplevel;
return shift(@{ $$self{pending_comments} }) if $commentsok && @{ $$self{pending_comments} };
my ($token, $cc, $defn, $atoken, $atype, $ahidden);
while (1) {
while (($token = shift(@{ $$self{pushback} })) && $CATCODE_HOLD[$cc = $$token[1]]) {
if ($cc == CC_COMMENT) {
return $token if $commentsok;
push(@{ $$self{pending_comments} }, $token); }
elsif ($cc == CC_MARKER) {
$self->handleMarker($token); } }
if (!defined $token) { # Else read from current mouth
while (($token = $$self{mouth}->readToken()) && $CATCODE_HOLD[$cc = $$token[1]]) {
if ($cc == CC_COMMENT) {
return $token if $commentsok;
push(@{ $$self{pending_comments} }, $token); }
elsif ($cc == CC_MARKER) {
$self->handleMarker($token); } } }
Expand All @@ -346,29 +344,32 @@ sub readXToken {
return unless $autoclose && $$self{autoclose} && @{ $$self{mouthstack} };
$self->closeMouth; } # Next input stream.
elsif (my $unexpanded = $$token[2]) { # Handle \noexpand; Inline get_dont_expand
return $token; } # returns something that acts like \relax
return ($for_conditional && ($$unexpanded[1] == CC_ACTIVE) ? $unexpanded : T_CS('\relax'));
}
## Wow!!!!! See TeX the Program \S 309
elsif (!$LaTeXML::ALIGN_STATE # SHOULD count nesting of { }!!! when SCANNED (not digested)
&& $LaTeXML::READING_ALIGNMENT
&& (($atoken, $atype, $ahidden) = $self->isColumnEnd($token))) {
$self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); }
## Note: use general-purpose lookup, since we may reexamine $defn below
elsif ($LaTeXML::Core::State::CATCODE_ACTIVE_OR_CS[$cc]
&& defined($defn = $STATE->lookupMeaning($token))
&& ((ref $defn) ne 'LaTeXML::Core::Token') # an actual definition
&& $$defn{isExpandable}
&& ($for_evaluation || !$$defn{isProtected})) { # is this the right logic here? don't expand unless di
local $LaTeXML::CURRENT_TOKEN = $token;
my $r;
no warnings 'recursion';
my @expansion = map { (($r = ref $_) eq 'LaTeXML::Core::Token' ? $_
: ($r eq 'LaTeXML::Core::Tokens' ? @$_
: Error('misdefined', $r, undef, "Expected a Token, got " . Stringify($_),
"in " . ToString($defn)) || T_OTHER(Stringify($_)))) }
$defn->invoke($self);
next unless @expansion;
# add the newly expanded tokens back into the gullet stream, in the ordinary case.
unshift(@{ $$self{pushback} }, @expansion); }
&& defined($defn = $STATE->lookupMeaning($token))) {
if ((ref $defn) eq 'LaTeXML::Core::Token') { # \let to a token? Return it!
return ($for_conditional ? $defn : $token); }
elsif (!$$defn{isExpandable} # Not expandable or is protected
|| ($$defn{isProtected} && !$for_evaluation)) {
return $token; }
else {
local $LaTeXML::CURRENT_TOKEN = $token;
my $r;
no warnings 'recursion';
my @expansion = map { (($r = ref $_) eq 'LaTeXML::Core::Token' ? $_
: ($r eq 'LaTeXML::Core::Tokens' ? @$_
: Error('misdefined', $r, undef, "Expected a Token, got " . Stringify($_),
"in " . ToString($defn)) || T_OTHER(Stringify($_)))) }
$defn->invoke($self);
# add the newly expanded tokens back into the gullet stream, in the ordinary case.
unshift(@{ $$self{pushback} }, @expansion); } }
elsif ($$token[1] == CC_CS && !(defined $defn)) {
$STATE->generateErrorStub($self, $token); # cs SHOULD have defn by now; report early!
return $token; }
Expand All @@ -377,32 +378,37 @@ sub readXToken {
}
return; } # never get here.

# readTokens approximates TeX's scan_toks (but doesn't parse \def parameter lists)
# It expects to read a balanced {...} (requiring the opening {
# and optionally ($expand) expands while reading, but deferring \the and related.
# readBalanced approximates TeX's scan_toks (but doesn't parse \def parameter lists)
# and only optionally requires the openning "{".
# It may return comments in the token lists.
# it optionally ($expand) expands while reading, but deferring \the and related.
# The $macrodef flag affects whether # parameters are "packed" for macro bodies.
# If $require_open is true, the opening T_BEGIN has not yet been read, and is required.
our $DEFERRED_COMMANDS = {
'\the' => 1,
'\showthe' => 1,
'\unexpanded' => 1,
'\detokenize' => 1
};

sub readTokens {
my ($self, $macrodef, $expand) = @_;
sub readBalanced {
my ($self, $expanded, $macrodef, $require_open) = @_;
local $LaTeXML::ALIGN_STATE = 1000000;
my $startloc = ($$self{verbosity} > 0) && $self->getLocator;
# Does we need to expand to get the { ???
my $token = ($expand ? $self->readXToken(0, 1) : $self->readToken());
if ((!$token) || ($$token[1] != CC_BEGIN)) {
Error('expected', '{', $self, "Expected opening '{'");
return Tokens(); }
if ($require_open) {
my $token = ($expanded ? $self->readXToken(0) : $self->readToken());
if ((!$token) || ($$token[1] != CC_BEGIN)) {
Error('expected', '{', $self, "Expected opening '{'");
return Tokens(); } }
my @tokens = ();
my $level = 1;
my ($cc, $defn, $atoken, $atype, $ahidden);
my ($token, $cc, $defn, $atoken, $atype, $ahidden);
# Inlined readToken (we'll keep comments in the result)
while (1) {
push(@tokens, shift(@{ $$self{pending_comments} })) if @{ $$self{pending_comments} };
if (@{ $$self{pending_comments} }) {
push(@tokens, @{ $$self{pending_comments} });
$$self{pending_comments} = []; }
# Examine pushback first
while (($token = shift(@{ $$self{pushback} })) && $CATCODE_HOLD[$cc = $$token[1]]) {
if ($cc == CC_COMMENT) { push(@tokens, $token); }
Expand Down Expand Up @@ -432,7 +438,7 @@ sub readTokens {
&& (($atoken, $atype, $ahidden) = $self->isColumnEnd($token))) {
$self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); }
## Note: use general-purpose lookup, since we may reexamine $defn below
elsif ($expand &&
elsif ($expanded &&
$LaTeXML::Core::State::CATCODE_ACTIVE_OR_CS[$cc]
&& defined($defn = $STATE->lookupMeaning($token))
&& ((ref $defn) ne 'LaTeXML::Core::Token') # an actual definition
Expand All @@ -448,22 +454,28 @@ sub readTokens {
$defn->invoke($self);
next unless @expansion;
# If a special \the type command, push the expansion directly into the result
# Well, almost directly: handle any MARKER tokens now, and possibly un-pack T_PARAM
if ($$DEFERRED_COMMANDS{ $$defn{cs}[0] }) {
if ($macrodef) { # "unpack" # to cover the packParameters at end!
@expansion = map { ($$_[1] == CC_PARAM ? ($_, $_) : $_); } @expansion }
push(@tokens, @expansion); }
else { # otherwise, prepend to pushback to be expanded further.
foreach my $t (@expansion) {
my $cc = $$t[1];
if ($cc == CC_MARKER) { $self->handleMarker($t); }
elsif (($cc == CC_PARAM) && $macrodef) {
push(@tokens, $t, $t); } # "unpack" to cover the packParameters at end!
else {
push(@tokens, $t); } }
}
else { # otherwise, prepend to pushback to be expanded further.
unshift(@{ $$self{pushback} }, @expansion); } }
else {
if ($expand && ($$token[1] == CC_CS) && !(defined $defn)) {
if ($expanded && ($$token[1] == CC_CS) && !(defined $defn)) {
$STATE->generateErrorStub($self, $token); } # cs SHOULD have defn by now; report early!
push(@tokens, $token); } # just return it
}
if ($level > 0) {
# TODO: The current implementation has a limitation where if the balancing end is in a different mouth,
# it will not be recognized.
my $loc_message = $startloc ? ("Started at " . ToString($startloc)) : ("Ended at " . ToString($self->getLocator));
Error('expected', "}", $self, "Gullet->readTokens ran out of input in an unbalanced state.",
Error('expected', "}", $self, "Gullet->readBalanced ran out of input in an unbalanced state.",
$loc_message); }
return ($macrodef ? Tokens(@tokens)->packParameters : Tokens(@tokens)); }

Expand Down Expand Up @@ -537,21 +549,6 @@ sub skipFiller {
}
return; }

# Read a sequence of tokens balanced in {}
# assuming the { has already been read.
# Returns a Tokens list of the balanced sequence, omitting the closing }
our @CATCODE_BALANCED_INTERESTING = (
0, 1, 1, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 1, 0, 0);

sub readBalanced {
my ($self, $expanded) = @_;
$self->unread(T_BEGIN); # Since we've already read the {, but readTokens requires it.
return $self->readTokens(undef, $expanded); }

sub ifNext {
my ($self, $token) = @_;
if (my $tok = $self->readToken()) {
Expand Down
3 changes: 0 additions & 3 deletions lib/LaTeXML/Core/Tokens.pm
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,6 @@ sub packParameters {
# e.g. \detokenize{#,} is legal, while \textbf{#,} is not
Error('misdefined', 'expansion', undef, "Parameter has a malformed arg, should be #1-#9 or ##. ",
"In expansion " . ToString($self)); } }
elsif (my $inner = $$t[2]) { # unwrap \noexpand tokens
$repacked = 1;
push(@rescanned, ($$inner[2] || $inner)); }
else {
push(@rescanned, $t); } }
return ($repacked ? bless [@rescanned], 'LaTeXML::Core::Tokens' : $self); }
Expand Down
26 changes: 9 additions & 17 deletions lib/LaTeXML/Package/TeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ DefParameterType('Plain', sub {

DefParameterType('DefPlain', sub {
my ($gullet, $inner) = @_;
my $value = $gullet->readTokens(1, 0);
my $value = $gullet->readBalanced(0, 1, 1);
if ($inner) {
($value) = $inner->reparseArgument($gullet, $value); }
return $value; },
Expand Down Expand Up @@ -152,7 +152,8 @@ DefParameterType('Optional', sub {
DefParameterType('GeneralText', sub {
my ($gullet) = @_;
$gullet->unread($gullet->readXToken); # Force expansion to skip <filler> before required {
return $gullet->readTokens(0, 0); });

return $gullet->readBalanced(0, 0, 1); });

DefParameterType('Until', sub {
my ($gullet, $until) = @_;
Expand Down Expand Up @@ -231,7 +232,7 @@ DefParameterType('XUntil', sub {
# but expanding \the-like commands only once.
DefParameterType('Expanded', sub {
my ($gullet) = @_;
$gullet->readTokens(0, 1); },
$gullet->readBalanced(1, 0, 1); },
reversion => sub {
my ($arg) = @_;
(T_BEGIN, Revert($arg), T_END); });
Expand All @@ -242,7 +243,7 @@ DefParameterType('Expanded', sub {
# and also packing # parameters
DefParameterType('DefExpanded', sub {
my ($gullet) = @_;
return $gullet->readTokens(1, 1); },
return $gullet->readBalanced(1, 1, 1); },
reversion => sub {
my ($arg) = @_;
(T_BEGIN, Revert($arg), T_END); });
Expand Down Expand Up @@ -773,21 +774,12 @@ DefConditionalI('\ifmmode', undef, sub { LookupValue('IN_MATH'); });

DefParameterType('ExpandedIfToken', sub {
my ($gullet) = @_;
my $token = $gullet->readXToken(0);
# Also resolve \let variants:
my $meaning = $STATE->lookupMeaning($token);
if ($meaning && ref $meaning eq 'LaTeXML::Core::Token') {
$token = $meaning; }
my $token = $gullet->readXToken(0, 1);
if (!$token) {
Error('expected', 'ExpandedIfToken', $gullet, "conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty");
Error('expected', 'ExpandedIfToken', $gullet,
"conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty");
$token = T_CS('\@empty'); }
if ($$token[2]) { # marked dont_expand
if ($$token[2][1] == CC_ACTIVE) { # treat as active character, if originally such
return $$token[2]; }
else { # otherwise, treat as relax for comparisons
return T_CS('\relax'); } }
else { # normal case, treat token as-is
return $token; } });
return $token; });

DefConditional('\if ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCharcode == $_[2]->getCharcode; });
DefConditional('\ifcat ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCatcode == $_[2]->getCatcode; });
Expand Down

0 comments on commit ce15359

Please sign in to comment.