diff --git a/lib/LaTeXML/Common/Error.pm b/lib/LaTeXML/Common/Error.pm index 228fa32b1..7ef70758f 100644 --- a/lib/LaTeXML/Common/Error.pm +++ b/lib/LaTeXML/Common/Error.pm @@ -15,7 +15,6 @@ use warnings; use LaTeXML::Global; use LaTeXML::Common::Object; use LaTeXML::Util::Pathname; -use LaTeXML::Core::Token qw(T_CS); use Time::HiRes; use Term::ANSIColor qw(colored colorstrip); diff --git a/lib/LaTeXML/Core/Definition/Conditional.pm b/lib/LaTeXML/Core/Definition/Conditional.pm index 0b12fa784..50f84eedb 100644 --- a/lib/LaTeXML/Core/Definition/Conditional.pm +++ b/lib/LaTeXML/Core/Definition/Conditional.pm @@ -120,7 +120,6 @@ sub skipConditionalBody { while (1) { my ($t, $cond_type); while ($t = shift(@{ $$gullet{pushback} }) || $$gullet{mouth}->readToken()) { - $t = $$t[2] if $$t[1] == CC_SMUGGLE_THE; if ($LaTeXML::Core::State::CATCODE_ACTIVE_OR_CS[$$t[1]] && ($cond_type = $STATE->lookupConditional($t))) { last; } } diff --git a/lib/LaTeXML/Core/Definition/Expandable.pm b/lib/LaTeXML/Core/Definition/Expandable.pm index aaa965a51..72bc4cf39 100644 --- a/lib/LaTeXML/Core/Definition/Expandable.pm +++ b/lib/LaTeXML/Core/Definition/Expandable.pm @@ -29,9 +29,7 @@ sub new { if (ref $expansion eq 'LaTeXML::Core::Tokens') { Fatal('misdefined', $cs, $source, "Expansion of '" . ToString($cs) . "' has unbalanced {}", "Expansion is " . ToString($expansion)) unless $expansion->isBalanced; - # rescan for match tokens and unwrap dont_expand... - $expansion = $expansion->packParameters unless $traits{nopackParameters}; - } + $expansion = $expansion->packParameters unless $traits{nopackParameters}; } elsif (!ref $expansion) { $expansion = TokenizeInternal($expansion)->packParameters; } @@ -55,9 +53,9 @@ sub invoke { no warnings 'recursion'; my ($self, $gullet, $onceonly) = @_; # shortcut for "trivial" macros; but only if not tracing & profiling!!!! - my $_tracing = $STATE->lookupValue('TRACING') || 0; - my $tracing = ($_tracing & TRACE_MACROS); - my $profiled = ($_tracing & TRACE_PROFILE) && ($LaTeXML::CURRENT_TOKEN || $$self{cs}); + my $_tracing = $STATE->lookupValue('TRACING') || 0; + my $tracing = ($_tracing & TRACE_MACROS); + my $profiled = ($_tracing & TRACE_PROFILE) && ($LaTeXML::CURRENT_TOKEN || $$self{cs}); my $expansion = $$self{expansion}; my $etype = ref $expansion; my $result; diff --git a/lib/LaTeXML/Core/Gullet.pm b/lib/LaTeXML/Core/Gullet.pm index f46bbc8c4..d1448f0a1 100644 --- a/lib/LaTeXML/Core/Gullet.pm +++ b/lib/LaTeXML/Core/Gullet.pm @@ -101,16 +101,6 @@ sub flush { $$self{mouthstack} = []; return; } -sub setup_scan { - my ($self) = @_; - if ($$self{pushback_has_smuggled_the}) { - $$self{pushback_has_smuggled_the} = 0; - # setup new scan by removing any smuggle CCs - for my $token (@{ $$self{pushback} }) { - if ($$token[1] == CC_SMUGGLE_THE) { - $token = $$token[2]; } } } - return; } - # Do something, while reading stuff from a specific Mouth. # This reads ONLY from that mouth (or any mouth openned by code in that source), # and the mouth should end up empty afterwards, and only be closed here. @@ -277,7 +267,6 @@ sub readToken { my ($token, $cc, $atoken, $atype, $ahidden); while (1) { while (($token = shift(@{ $$self{pushback} })) - && (($$token[1] != CC_SMUGGLE_THE) || ($token = $$token[2])) && $CATCODE_HOLD[$cc = $$token[1]]) { if ($cc == CC_COMMENT) { push(@{ $$self{pending_comments} }, $token); } @@ -305,6 +294,9 @@ sub readToken { && $LaTeXML::READING_ALIGNMENT && (($atoken, $atype, $ahidden) = $self->isColumnEnd($token))) { $self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); } + elsif ((defined $token) && ($$token[1] == CC_CS) && ($$token[0] eq '\dont_expand')) { + my $unexpanded = $self->readToken; # Replace next token with a special \relax + return T_CS('\special_relax'); } else { last; } } return $token; } @@ -325,29 +317,28 @@ sub unread { # Note that most tokens pass through here, so be Fast & Clean! readToken is folded in. # `Toplevel' processing, (if $toplevel is true), used at the toplevel processing by Stomach, # will step to the next input stream (Mouth) if one is available, -# If $commentsok is true, will also pass comments. # $toplevel is doing TWO distinct things. When true: # * If a mouth is exhausted, move on to the containing mouth to continue reading # * expand even protected defns, essentially this means expand "for execution" +# Note that, unlike readBalanced, this does NOT defer expansion of \the & friends. +# Also, \noexpand'd tokens effectively act ilke \relax +# For arguments to \if,\ifx, etc use $for_conditional true, +# which handles \noexpand and CS which have been \let to tokens specially. sub readXToken { - my ($self, $toplevel, $commentsok) = @_; + my ($self, $toplevel, $for_conditional) = @_; $toplevel = 1 unless defined $toplevel; my $autoclose = $toplevel; # Potentially, these should have distinct controls? my $for_evaluation = $toplevel; - return shift(@{ $$self{pending_comments} }) if $commentsok && @{ $$self{pending_comments} }; my ($token, $cc, $defn, $atoken, $atype, $ahidden); while (1) { - # NOTE: CC_SMUGGLE_THE should ONLY appear in pushback! while (($token = shift(@{ $$self{pushback} })) && $CATCODE_HOLD[$cc = $$token[1]]) { if ($cc == CC_COMMENT) { - return $token if $commentsok; push(@{ $$self{pending_comments} }, $token); } elsif ($cc == CC_MARKER) { $self->handleMarker($token); } } if (!defined $token) { # Else read from current mouth while (($token = $$self{mouth}->readToken()) && $CATCODE_HOLD[$cc = $$token[1]]) { if ($cc == CC_COMMENT) { - return $token if $commentsok; push(@{ $$self{pending_comments} }, $token); } elsif ($cc == CC_MARKER) { $self->handleMarker($token); } } } @@ -355,9 +346,9 @@ sub readXToken { if (!defined $token) { return unless $autoclose && $$self{autoclose} && @{ $$self{mouthstack} }; $self->closeMouth; } # Next input stream. - # Handle \noexpand and smuggled tokens; either expand to $$token[2] or defer till later - elsif (my $unexpanded = $$token[2]) { # Inline get_dont_expand - return ($cc != CC_SMUGGLE_THE) || $LaTeXML::SMUGGLE_THE ? $token : $unexpanded; } + elsif (($cc == CC_CS) && ($$token[0] eq '\dont_expand')) { + my $unexpanded = $self->readToken; + return ($for_conditional && ($$unexpanded[1] == CC_ACTIVE) ? $unexpanded : T_CS('\special_relax')); } ## Wow!!!!! See TeX the Program \S 309 elsif (!$LaTeXML::ALIGN_STATE # SHOULD count nesting of { }!!! when SCANNED (not digested) && $LaTeXML::READING_ALIGNMENT @@ -365,10 +356,97 @@ sub readXToken { $self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); } ## Note: use general-purpose lookup, since we may reexamine $defn below elsif ($LaTeXML::Core::State::CATCODE_ACTIVE_OR_CS[$cc] + && defined($defn = $STATE->lookupMeaning($token))) { + if ((ref $defn) eq 'LaTeXML::Core::Token') { # \let to a token? Return it! + return ($for_conditional ? $defn : $token); } + elsif (!$$defn{isExpandable} # Not expandable or is protected + || ($$defn{isProtected} && !$for_evaluation)) { + return $token; } + else { + local $LaTeXML::CURRENT_TOKEN = $token; + my $r; + no warnings 'recursion'; + my @expansion = map { (($r = ref $_) eq 'LaTeXML::Core::Token' ? $_ + : ($r eq 'LaTeXML::Core::Tokens' ? @$_ + : Error('misdefined', $r, undef, "Expected a Token, got " . Stringify($_), + "in " . ToString($defn)) || T_OTHER(Stringify($_)))) } + $defn->invoke($self); + # add the newly expanded tokens back into the gullet stream, in the ordinary case. + unshift(@{ $$self{pushback} }, @expansion); } } + elsif ($$token[1] == CC_CS && !(defined $defn)) { + $STATE->generateErrorStub($self, $token); # cs SHOULD have defn by now; report early! + return $token; } + else { + return $token; } # just return it + } + return; } # never get here. + +# readBalanced approximates TeX's scan_toks (but doesn't parse \def parameter lists) +# and only optionally requires the openning "{". +# It may return comments in the token lists. +# it optionally ($expand) expands while reading, but deferring \the and related. +# The $macrodef flag affects whether # parameters are "packed" for macro bodies. +# If $require_open is true, the opening T_BEGIN has not yet been read, and is required. +our $DEFERRED_COMMANDS = { + '\the' => 1, + '\showthe' => 1, + '\unexpanded' => 1, + '\detokenize' => 1 +}; + +sub readBalanced { + my ($self, $expanded, $macrodef, $require_open) = @_; + local $LaTeXML::ALIGN_STATE = 1000000; + my $startloc = ($$self{verbosity} > 0) && $self->getLocator; + # Does we need to expand to get the { ??? + if ($require_open) { + my $token = ($expanded ? $self->readXToken(0) : $self->readToken()); + if ((!$token) || ($$token[1] != CC_BEGIN)) { + Error('expected', '{', $self, "Expected opening '{'"); + return Tokens(); } } + my @tokens = (); + my $level = 1; + my ($token, $cc, $defn, $atoken, $atype, $ahidden); + # Inlined readToken (we'll keep comments in the result) + while (1) { + if (@{ $$self{pending_comments} }) { + push(@tokens, @{ $$self{pending_comments} }); + $$self{pending_comments} = []; } + # Examine pushback first + while (($token = shift(@{ $$self{pushback} })) && $CATCODE_HOLD[$cc = $$token[1]]) { + if ($cc == CC_COMMENT) { push(@tokens, $token); } + elsif ($cc == CC_MARKER) { $self->handleMarker($token); } } + if (!defined $token) { # Else read from current mouth + while (($token = $$self{mouth}->readToken()) && $CATCODE_HOLD[$cc = $$token[1]]) { + if ($cc == CC_COMMENT) { push(@tokens, $token); } + elsif ($cc == CC_MARKER) { $self->handleMarker($token); } } } + ProgressStep() if ($$self{progress}++ % $TOKEN_PROGRESS_QUANTUM) == 0; + if (!defined $token) { + # What's the right error handling now? + last; } + elsif (($cc == CC_CS) && ($$token[0] eq '\dont_expand')) { + push(@tokens, readToken($self)); } # Pass on NEXT token, unchanged. + elsif ($cc == CC_END) { + $level--; + if (!$level) { + last; } + push(@tokens, $token); } + elsif ($cc == CC_BEGIN) { + $level++; + push(@tokens, $token); } + ## Wow!!!!! See TeX the Program \S 309 + # Not sure if this code still applies within scan_toks??? + elsif (!$LaTeXML::ALIGN_STATE # SHOULD count nesting of { }!!! when SCANNED (not digested) + && $LaTeXML::READING_ALIGNMENT + && (($atoken, $atype, $ahidden) = $self->isColumnEnd($token))) { + $self->handleTemplate($LaTeXML::READING_ALIGNMENT, $token, $atype, $ahidden); } + ## Note: use general-purpose lookup, since we may reexamine $defn below + elsif ($expanded && + $LaTeXML::Core::State::CATCODE_ACTIVE_OR_CS[$cc] && defined($defn = $STATE->lookupMeaning($token)) && ((ref $defn) ne 'LaTeXML::Core::Token') # an actual definition && $$defn{isExpandable} - && ($for_evaluation || !$$defn{isProtected})) { # is this the right logic here? don't expand unless di + && (!$$defn{isProtected})) { # is this the right logic here? don't expand unless di local $LaTeXML::CURRENT_TOKEN = $token; my $r; no warnings 'recursion'; @@ -378,24 +456,33 @@ sub readXToken { "in " . ToString($defn)) || T_OTHER(Stringify($_)))) } $defn->invoke($self); next unless @expansion; - if ($$LaTeXML::Core::Token::SMUGGLE_THE_COMMANDS{ $$defn{cs}[0] }) { - # magic THE_TOKS handling, add to pushback with a single-use noexpand flag only valid - # at the exact time the token leaves the pushback. - # This is *required to be different* from the noexpand flag, as per the B Book - @expansion = map { ($LaTeXML::Core::Token::CATCODE_CAN_SMUGGLE_THE[$$_[1]] ? bless ["SMUGGLE_THE", CC_SMUGGLE_THE, $_], 'LaTeXML::Core::Token' : $_) } @expansion; - # PERFORMANCE: - # explicitly flag that we've seen this case, so that higher levels know to - # unset the flag from the entire {pushback} - $$self{pushback_has_smuggled_the} = 1; } - # add the newly expanded tokens back into the gullet stream, in the ordinary case. - unshift(@{ $$self{pushback} }, @expansion); } - elsif ($$token[1] == CC_CS && !(defined $defn)) { - $STATE->generateErrorStub($self, $token); # cs SHOULD have defn by now; report early! - return $token; } + # If a special \the type command, push the expansion directly into the result + # Well, almost directly: handle any MARKER tokens now, and possibly un-pack T_PARAM + if ($$DEFERRED_COMMANDS{ $$defn{cs}[0] }) { + foreach my $t (@expansion) { + my $cc = $$t[1]; + if ($cc == CC_MARKER) { $self->handleMarker($t); } + elsif (($cc == CC_PARAM) && $macrodef) { + push(@tokens, $t, $t); } # "unpack" to cover the packParameters at end! + else { + push(@tokens, $t); } } + } + else { # otherwise, prepend to pushback to be expanded further. + unshift(@{ $$self{pushback} }, @expansion); } } else { - return $token; } # just return it + if ($expanded && ($$token[1] == CC_CS) && !(defined $defn)) { + $STATE->generateErrorStub($self, $token); } # cs SHOULD have defn by now; report early! + push(@tokens, $token); } # just return it } - return; } # never get here. + if ($level > 0) { + # TODO: The current implementation has a limitation where if the balancing end is in a different mouth, + # it will not be recognized. + my $loc_message = $startloc ? ("Started at " . ToString($startloc)) : ("Ended at " . ToString($self->getLocator)); + Error('expected', "}", $self, "Gullet->readBalanced ran out of input in an unbalanced state.", + $loc_message); } + return ($macrodef ? Tokens(@tokens)->packParameters : Tokens(@tokens)); } + +#====================================================================== # Read the next raw line (string); # primarily to read from the Mouth, but keep any unread input! @@ -403,7 +490,7 @@ sub readRawLine { my ($self) = @_; # If we've got unread tokens, they presumably should come before the Mouth's raw data # but we'll convert them back to string. - my @tokens = map { ($$_[1] == CC_SMUGGLE_THE ? $$_[2] : $_) } @{ $$self{pushback} }; + my @tokens = @{ $$self{pushback} }; my @markers = grep { $_->getCatcode == CC_MARKER } @tokens; if (@markers) { # Whoops, profiling markers! @tokens = grep { $_->getCatcode != CC_MARKER } @tokens; # Remove @@ -465,45 +552,6 @@ sub skipFiller { } return; } -# Read a sequence of tokens balanced in {} -# assuming the { has already been read. -# Returns a Tokens list of the balanced sequence, omitting the closing } -our @CATCODE_BALANCED_INTERESTING = ( - 0, 1, 1, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 1, 0, 0); - -sub readBalanced { - my ($self, $expanded) = @_; - local $LaTeXML::ALIGN_STATE = 1000000; - my @tokens = (); - my ($token, $level) = (undef, 1); - my $startloc = ($$self{verbosity} > 0) && $self->getLocator; - # Inlined readToken (we'll keep comments in the result) - while ($token = ($expanded ? $self->readXToken(0, 1) : $self->readToken())) { - my $cc = $$token[1]; - if (!$CATCODE_BALANCED_INTERESTING[$cc]) { - push(@tokens, $token); } - elsif ($cc == CC_END) { - $level--; - if (!$level) { - last; } - push(@tokens, $token); } - elsif ($cc == CC_BEGIN) { - $level++; - push(@tokens, $token); } - elsif ($cc == CC_MARKER) { # Really should already have been handled by read(X)Token - LaTeXML::Core::Definition::stopProfiling($token, 'expand'); } } - if ($level > 0) { - # TODO: The current implementation has a limitation where if the balancing end is in a different mouth, - # it will not be recognized. - my $loc_message = $startloc ? ("Started at " . ToString($startloc)) : ("Ended at " . ToString($self->getLocator)); - Error('expected', "}", $self, "Gullet->readBalanced ran out of input in an unbalanced state.", - $loc_message); } - return Tokens(@tokens); } - sub ifNext { my ($self, $token) = @_; if (my $tok = $self->readToken()) { @@ -565,7 +613,6 @@ sub readUntil { my $want = $want[0]; # while(($token = $self->readToken) && !$token->equals($want)){ while (($token = shift(@{ $$self{pushback} }) || $$self{mouth}->readToken()) - && (($$token[1] != CC_SMUGGLE_THE) || ($token = $$token[2])) && !$token->equals($want)) { my $cc = $$token[1]; if ($cc == CC_MARKER) { # would have been handled by readToken, but we're bypassing diff --git a/lib/LaTeXML/Core/Parameter.pm b/lib/LaTeXML/Core/Parameter.pm index 8988b3e0a..7d987f949 100644 --- a/lib/LaTeXML/Core/Parameter.pm +++ b/lib/LaTeXML/Core/Parameter.pm @@ -88,7 +88,6 @@ sub read { my $value = &{ $$self{reader} }($gullet, @{ $$self{extra} || [] }); $value = $value->neutralize(@{ $$self{semiverbatim} }) if $$self{semiverbatim} && (ref $value) && $value->can('neutralize'); - $value = $value->packParameters if $value && $$self{packParameters}; if ($$self{semiverbatim}) { # Open coded revertCatcodes $STATE->endSemiverbatim(); } if ((!defined $value) && !$$self{optional}) { @@ -106,7 +105,6 @@ sub reparse { my ($self, $gullet, $tokens) = @_; # Needs neutralization, since the keyvals may have been tokenized already??? # perhaps a better test would involve whether $tokens is, in fact, Tokens? - $tokens = $tokens->packParameters if $tokens && $$self{packParameters}; if (($$self{type} eq 'Plain') || $$self{undigested}) { # Gack! return $tokens; } elsif ($$self{semiverbatim}) { # Needs neutralization diff --git a/lib/LaTeXML/Core/Parameters.pm b/lib/LaTeXML/Core/Parameters.pm index 9f1fe9c21..bc0454d1e 100644 --- a/lib/LaTeXML/Core/Parameters.pm +++ b/lib/LaTeXML/Core/Parameters.pm @@ -59,7 +59,6 @@ sub revertArguments { sub readArguments { my ($self, $gullet, $fordefn) = @_; my @args = (); - $gullet->setup_scan(); my ($p, $v); return map { $p = $_; $v = $p && $p->read($gullet, $fordefn); ($$p{novalue} ? () : $v); } @$self; } @@ -67,7 +66,6 @@ sub readArgumentsAndDigest { my ($self, $stomach, $fordefn) = @_; my @args = (); my $gullet = $stomach->getGullet; - $gullet->setup_scan(); foreach my $parameter (@$self) { my $value = $parameter->read($gullet, $fordefn); if (!$$parameter{novalue}) { diff --git a/lib/LaTeXML/Core/State.pm b/lib/LaTeXML/Core/State.pm index bc6dc8574..3d8f7c496 100644 --- a/lib/LaTeXML/Core/State.pm +++ b/lib/LaTeXML/Core/State.pm @@ -347,7 +347,6 @@ sub lookupMeaning { my ($self, $token) = @_; if (my $cs = $token && $CATCODE_ACTIVE_OR_CS[$$token[1]] - && !$$token[2] # return token itself, if \noexpand && $$token[0]) { my $e = $$self{meaning}{$cs}; return $e && $$e[0]; } else { return $token; } } @@ -422,7 +421,7 @@ sub lookupExpandable { return $defn; } return; } -# Whether token must be wrapped as dont_expand +# Whether token is affected by \noexpand sub isDontExpandable { my ($self, $token) = @_; # Basically: a CS or Active token that is either not defined, or is expandable @@ -466,7 +465,7 @@ sub lookupDigestableDefinition { # If a cs has been let to an executable token, lookup ITS defn. if (((ref $defn) eq 'LaTeXML::Core::Token') # If we're digesting an unexpanded, act like \relax - && ($lookupname = ($$defn[2] ? '\relax' : $CATCODE_EXECUTABLE_PRIMITIVE_NAME[$$defn[1]])) + && ($lookupname = $CATCODE_EXECUTABLE_PRIMITIVE_NAME[$$defn[1]]) && ($entry = $$self{meaning}{$lookupname})) { $defn = $$entry[0]; } return $defn; } diff --git a/lib/LaTeXML/Core/Token.pm b/lib/LaTeXML/Core/Token.pm index 343dffa01..7dbb99970 100644 --- a/lib/LaTeXML/Core/Token.pm +++ b/lib/LaTeXML/Core/Token.pm @@ -29,11 +29,11 @@ our @EXPORT = ( CC_ALIGN CC_EOL CC_PARAM CC_SUPER CC_SUB CC_IGNORE CC_SPACE CC_LETTER CC_OTHER CC_ACTIVE CC_COMMENT CC_INVALID - CC_CS CC_MARKER CC_ARG CC_SMUGGLE_THE), + CC_CS CC_MARKER CC_ARG), # Token constructors qw( T_BEGIN T_END T_MATH T_ALIGN T_PARAM T_SUB T_SUPER T_SPACE &T_LETTER &T_OTHER &T_ACTIVE &T_COMMENT &T_CS - T_CR &T_MARKER T_ARG T_SMUGGLE_THE + T_CR &T_MARKER T_ARG &Token), # String exploders qw(&Explode &ExplodeText &UnTeX) @@ -59,10 +59,9 @@ use constant CC_ACTIVE => 13; use constant CC_COMMENT => 14; use constant CC_INVALID => 15; # Extended Catcodes for expanded output. -use constant CC_CS => 16; -use constant CC_MARKER => 17; # non TeX extension! -use constant CC_ARG => 18; # "out_param" in B Book -use constant CC_SMUGGLE_THE => 19; # defered expansion once +use constant CC_CS => 16; +use constant CC_MARKER => 17; # non TeX extension! +use constant CC_ARG => 18; # "out_param" in B Book # [The documentation for constant is a bit confusing about subs, # but these apparently DO generate constants; you always get the same one] @@ -95,22 +94,6 @@ sub T_ARG { Fatal('malformed', 'T_ARG', 'value should be #1-#9', "Illegal: " . $v->stringify); } } return bless ["$int", CC_ARG], 'LaTeXML::Core::Token'; } -# This hides tokens coming from \the (-like) primitives from expansion; CC_CS,CC_ACTIVE, but also CC_PARAM and CC_ARG -our @CATCODE_CAN_SMUGGLE_THE = ( - 0, 0, 0, 0, - 0, 0, 1, 0, - 0, 0, 0, 0, - 0, 1, 0, 0, - 1, 0, 1, 0); - -sub T_SMUGGLE_THE { - my ($t) = @_; - my $cc = $$t[1]; - if ($cc == CC_SMUGGLE_THE) { - # LaTeXML Bug, we haven't correctly emulated scan_toks! Offending token was: - Fatal('unexpected', 'CC_SMUGGLE_THE', 'We are masking a \the-produced token twice, this must Never happen.', "Illegal: " . $t->stringify); } - return ($CATCODE_CAN_SMUGGLE_THE[$cc] ? bless ["SMUGGLE_THE", CC_SMUGGLE_THE, $t], 'LaTeXML::Core::Token' : $t); } - sub Token { my ($string, $cc) = @_; return bless [$string, (defined $cc ? $cc : CC_OTHER)], 'LaTeXML::Core::Token'; } @@ -230,16 +213,9 @@ our @CATCODE_SHORT_NAME = #[CONSTANT] T_ALIGN T_EOL T_PARAM T_SUPER T_SUB T_IGNORE T_SPACE T_LETTER T_OTHER T_ACTIVE T_COMMENT T_INVALID - T_CS T_MARKER T_ARG T_SMUGGLE_THE + T_CS T_MARKER T_ARG ); -our $SMUGGLE_THE_COMMANDS = { - '\the' => 1, - '\showthe' => 1, - '\unexpanded' => 1, - '\detokenize' => 1 -}; - #====================================================================== # Accessors. @@ -314,30 +290,6 @@ sub substituteParameters { sub packParameters { return $_[0]; } -# Mark a token as not to be expanded (\noexpand) by hiding itself as the 3rd element of a new token. -# Wonder if this should only have effect on expandable tokens? -sub with_dont_expand { - my ($self) = @_; - my $cc = $$self[1]; - if ($cc == CC_SMUGGLE_THE) { - # LaTeXML Bug, we haven't correctly emulated scan_toks! Offending token was: - Fatal('unexpected', 'CC_SMUGGLE_THE', 'We are marking as \noexpand a masked \the-produced token, this must Never happen.', "Illegal: " . $self->stringify); } - return ((($cc == CC_CS) || ($cc == CC_ACTIVE)) && $STATE->isDontExpandable($self)) - ? bless ['\relax', CC_CS, $self], 'LaTeXML::Core::Token' - : $self; } - -# Return the original token of a not-expanded token, -# or undef if it isn't marked as such. -sub get_dont_expand { - my ($self) = @_; - return $$self[2]; } - -sub without_dont_expand { - my ($self) = @_; - # Remove dont_expand flag, remove SMUGGLE_THE wrapper - my $inner = $$self[2]; - return $inner ? ($$inner[2] || $inner) : $self; } - #====================================================================== # Note that this converts the string to a more `user readable' form using `standard' chars for catcodes. # We'll need to be careful about using string instead of reverting for internal purposes where the @@ -371,18 +323,14 @@ sub equals { (defined $b && (ref $a) eq (ref $b)) && ($$a[1] == $$b[1]) - && (($$a[1] == CC_SPACE) || ($$a[0] eq $$b[0])) - && ((!$$a[2]) == (!$$b[2])) # must have same dont-expand-edness - ; } + && (($$a[1] == CC_SPACE) || ($$a[0] eq $$b[0])); } -my @CONTROLNAME = ( #[CONSTANT] +my @CONTROLNAME = ( #[CONSTANT] qw( NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US)); # Primarily for error reporting. sub stringify { my ($self) = @_; - if ($$self[2]) { - return $$self[2]->stringify() . ($$self[1] == CC_SMUGGLE_THE ? " (defer expand once)" : " (dont expand)"); } my $string = $self->toString; # Make the token's char content more printable, since this is for error messages. if (length($string) == 1) { diff --git a/lib/LaTeXML/Core/Tokens.pm b/lib/LaTeXML/Core/Tokens.pm index 88ead1910..2c2d5f053 100644 --- a/lib/LaTeXML/Core/Tokens.pm +++ b/lib/LaTeXML/Core/Tokens.pm @@ -54,7 +54,7 @@ sub clone { # Return a string containing the TeX form of the Tokens sub revert { my ($self) = @_; - return map { ($$_[1] == CC_SMUGGLE_THE ? $$_[2] : $_); } @$self; } + return @$self; } # toString is used often, and for more keyword-like reasons, # NOT for creating valid TeX (use revert or UnTeX for that!) @@ -86,13 +86,8 @@ sub beDigested { sub neutralize { my ($self, @extraspecials) = @_; - # Remove dont_expand, but preserve SMUGGLE_THE return Tokens(map { $_->neutralize(@extraspecials) } @$self); } -sub without_dont_expand { - my ($self) = @_; - return Tokens(map { $_->without_dont_expand } @$self); } - sub isBalanced { my ($self) = @_; my $level = 0; @@ -117,11 +112,9 @@ sub substituteParameters { push(@result, (ref $arg eq 'LaTeXML::Core::Token' ? $arg : @$arg)); } } } # ->unlist return bless [@result], 'LaTeXML::Core::Tokens'; } -# Process the CC_PARAM tokens for use as a macro body (and other token lists) -# Groups PARAM+OTHER token pair into match tokens. -# Collapses PARAM+PARAM token pair into a single PARAM -# B book suggests running this -# and remove dont_expand markers. +# Packs repeated CC_PARAM tokens into CC_ARG tokens for use as a macro body (and other token lists) +# Also unwraps \noexpand tokens, since that is also needed for macro bodies +# (but not strictly part of packing parameters) sub packParameters { my ($self) = @_; my @rescanned = (); @@ -130,7 +123,6 @@ sub packParameters { while (my $t = shift @toks) { if ($$t[1] == CC_PARAM && @toks) { $repacked = 1; - # NOTE for future cleanup: Only CC_CS & CC_ACTIVE should ever get with_dont_expand! my $next_t = shift @toks; my $next_cc = $next_t && $$next_t[1]; if ($next_cc == CC_OTHER) { @@ -142,9 +134,6 @@ sub packParameters { # e.g. \detokenize{#,} is legal, while \textbf{#,} is not Error('misdefined', 'expansion', undef, "Parameter has a malformed arg, should be #1-#9 or ##. ", "In expansion " . ToString($self)); } } - elsif (my $inner = $$t[2]) { # Open-coded $t->without_dont_expand - $repacked = 1; - push(@rescanned, ($$inner[2] || $inner)); } else { push(@rescanned, $t); } } return ($repacked ? bless [@rescanned], 'LaTeXML::Core::Tokens' : $self); } diff --git a/lib/LaTeXML/Package.pm b/lib/LaTeXML/Package.pm index 033827e3d..57b1501ec 100644 --- a/lib/LaTeXML/Package.pm +++ b/lib/LaTeXML/Package.pm @@ -384,8 +384,7 @@ sub Let { # If strings are given, assume CS tokens (most common case) $token1 = T_CS($token1) unless ref $token1; $token2 = T_CS($token2) unless ref $token2; - $STATE->assignMeaning($token1, - ($token2->get_dont_expand ? $token2 : $STATE->lookupMeaning($token2)), $scope); + $STATE->assignMeaning($token1, $STATE->lookupMeaning($token2), $scope); AfterAssignment(); return; } @@ -565,7 +564,7 @@ sub ComposeURL { my $parameter_options = { # [CONSTANT] nargs => 1, reversion => 1, optional => 1, novalue => 1, beforeDigest => 1, afterDigest => 1, - semiverbatim => 1, undigested => 1, packParameters => 1 }; + semiverbatim => 1, undigested => 1 }; sub DefParameterType { my ($type, $reader, %options) = @_; diff --git a/lib/LaTeXML/Package/TeX.pool.ltxml b/lib/LaTeXML/Package/TeX.pool.ltxml index fd414127e..67fbad11e 100644 --- a/lib/LaTeXML/Package/TeX.pool.ltxml +++ b/lib/LaTeXML/Package/TeX.pool.ltxml @@ -117,12 +117,11 @@ DefParameterType('Plain', sub { DefParameterType('DefPlain', sub { my ($gullet, $inner) = @_; - my $value = $gullet->readArg; + my $value = $gullet->readBalanced(0, 1, 1); if ($inner) { ($value) = $inner->reparseArgument($gullet, $value); } return $value; }, - packParameters => 1, - reversion => sub { + reversion => sub { my ($arg, $inner) = @_; (T_BEGIN, ($inner ? $inner->revertArguments($arg) : Revert($arg)), @@ -152,13 +151,9 @@ DefParameterType('Optional', sub { # which IS required in contrast to a general argument; ie a single token is not correct. DefParameterType('GeneralText', sub { my ($gullet) = @_; - my $open = $gullet->readXToken; - if ($open && ($open->getCatcode == CC_BEGIN)) { - return $gullet->readBalanced; } - else { - Error('expected', '{', $gullet, - "Expected here"); - return $open; } }); + $gullet->unread($gullet->readXToken); # Force expansion to skip before required { + + return $gullet->readBalanced(0, 0, 1); }); DefParameterType('Until', sub { my ($gullet, $until) = @_; @@ -233,44 +228,23 @@ DefParameterType('XUntil', sub { push(@tokens, $token); } } Tokens(@tokens); }); -# This is sorta like readbalanced, but expands as it goes. -# This appears to be needed by certain primitives (eg. \noalign ?) -# and maybe what we should be using for some Digested ?? +# This reads a braced tokens list, expanding as it goes, +# but expanding \the-like commands only once. DefParameterType('Expanded', sub { my ($gullet) = @_; - my $token = $gullet->readXToken(0); - if (!$token) { - Error('expected', 'expanded', $gullet, - "Expected here"); - return $token; } - my @tokens = (); - if ($token->getCatcode == CC_BEGIN) { - return scalar($gullet->readBalanced(1))->without_dont_expand; } - else { - return $token->without_dont_expand; } }, + $gullet->readBalanced(1, 0, 1); }, reversion => sub { my ($arg) = @_; (T_BEGIN, Revert($arg), T_END); }); -# Set SMUGGLE_THE=1 whenever you want to handle special TeX neutralization of -# tokens created by \the-like primitives. -# -# IMPORTANTLY, call packParameters early on the tokens read from the Gullet -# to enact the neutralization and discard the temporary smuggle flag that is required -# -# Whenever possible, use this `DefExpanded` parameter type directly, rather than hand-crafting a new one. +# This reads an expanded definition body, +# a braced tokens list, expanding as it goes, +# but expanding \the-like commands only once, +# and also packing # parameters DefParameterType('DefExpanded', sub { my ($gullet) = @_; - local $LaTeXML::SMUGGLE_THE = 1; - my $token = $gullet->readXToken; - if (!$token) { - Error('expected', 'defexpanded', $gullet, - "Expected here"); - return $token; } - my $expanded = ($token->getCatcode == CC_BEGIN ? $gullet->readBalanced(1) : $token); - return $expanded; }, - packParameters => 1, - reversion => sub { + return $gullet->readBalanced(1, 1, 1); }, + reversion => sub { my ($arg) = @_; (T_BEGIN, Revert($arg), T_END); }); @@ -344,7 +318,8 @@ DefParameterType('OptionalUndigested', sub { $_[0]->readOptional; }, # Read a keyword value (KeyVals), that will not be digested. DefParameterType('UndigestedKey', sub { $_[0]->readArg; }, undigested => 1); -DefParameterType('UndigestedDefKey', sub { $_[0]->readArg; }, undigested => 1, packParameters => 1); +DefParameterType('UndigestedDefKey', sub { + $_[0]->readArg->packParameters; }, undigested => 1); # Read a token as used when defining it, ie. it may be enclosed in braces. DefParameterType('DefToken', sub { @@ -799,21 +774,12 @@ DefConditionalI('\ifmmode', undef, sub { LookupValue('IN_MATH'); }); DefParameterType('ExpandedIfToken', sub { my ($gullet) = @_; - my $token = $gullet->readXToken(0); - # Also resolve \let variants: - my $meaning = $STATE->lookupMeaning($token); - if ($meaning && ref $meaning eq 'LaTeXML::Core::Token') { - $token = $meaning; } + my $token = $gullet->readXToken(0, 1); if (!$token) { - Error('expected', 'ExpandedIfToken', $gullet, "conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty"); + Error('expected', 'ExpandedIfToken', $gullet, + "conditional expected a token argument, readXToken came back empty. Falling back to \\\@empty"); $token = T_CS('\@empty'); } - if ($$token[2]) { # marked dont_expand - if ($$token[2][1] == CC_ACTIVE) { # treat as active character, if originally such - return $$token[2]; } - else { # otherwise, treat as relax for comparisons - return T_CS('\relax'); } } - else { # normal case, treat token as-is - return $token; } }); + return $token; }); DefConditional('\if ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCharcode == $_[2]->getCharcode; }); DefConditional('\ifcat ExpandedIfToken ExpandedIfToken', sub { $_[1]->getCatcode == $_[2]->getCatcode; }); @@ -843,10 +809,13 @@ DefConditionalI('\iffalse', undef, sub { 0; }); # This makes \relax disappear completely after digestion # (which seems most TeX like). DefPrimitive('\relax', sub { (); }); -## However, this keeps a box, so it can appear in UnTeX +### However, this keeps a box, so it can appear in UnTeX ### DefPrimitive('\relax',undef); ## But if you do that, you've got to watch out since it usually -## shouldn't be a box; See the isRelax code in handleScripts, below +### shouldn't be a box; See the isRelax code in handleScripts, below +# Internal token produced by Gullet in response to \dont_expand; +# Acts like \relax, but isn't equal to it. +DefPrimitiveI('\special_relax', undef, sub { (); }); DefMacro('\number Number', sub { Explode($_[1]->valueOf); }); # define it here (only approxmiately), since it's already useful. @@ -1012,11 +981,20 @@ DefMacro('\expandafter Token Token', sub { else { ($tok, $xtok); } }); -# Replace the next token with it's not-expanded variant +# If next token is expandable, prefix it with the internal marker \dont_expand +# That token is never defined, explicitly handled in Gullet & should never escape the Gullet DefMacroI('\noexpand', undef, sub { my $token = $_[0]->readToken; # Missing token likely the result of "{\noexpand}" for which TeX would be unperturbed - return ($token ? $token->with_dont_expand : ()); }); + return ($token + ? ((($$token[1] == CC_CS) || ($$token[1] == CC_ACTIVE)) && $STATE->isDontExpandable($token) + ? (T_CS('\dont_expand'), $token) + : $token) + : ()); }); + +DefPrimitiveI('\dont_expand', undef, sub { + Error('misdefined', '\dont_expand', $_[0], + "The token \\dont_expand should never reach Stomach!"); }); DefMacroI('\topmark', undef, Tokens()); DefMacroI('\firstmark', undef, Tokens()); @@ -2506,21 +2484,17 @@ DefPrimitive('\lowercase GeneralText', sub { # Converts $tokens to a string in the fashion of \message and others: # doubles #, converts to string; optionally adds spaces after control sequences # in the spirit of the B Book, "show_token_list" routine, in 292. +# [This could be a $tokens->unpackParameters, but for the curious space treatment] sub writableTokens { my ($tokens) = @_; my @tokens = $tokens->unlist; - # unwrap a \noexpand-created \relax to its actual content, - # to avoid confusing users with a \relax dontexpand @tokens = map { - my $t = ($$_[2] || $_); - my $cc = $$t[1]; - if ($cc == CC_CS) { ($t, T_SPACE); } + my $cc = $$_[1]; + if ($cc == CC_CS) { ($_, T_SPACE); } elsif ($cc == CC_SPACE) { (T_SPACE); } - elsif ($cc == CC_PARAM) { ($t, $t); } - elsif ($cc == CC_ARG) { - # B Book, 294. Reduce to param+integer - (T_PARAM, T_OTHER($$t[0])); } - else { $t; } + elsif ($cc == CC_PARAM) { ($_, $_); } + elsif ($cc == CC_ARG) { (T_PARAM, T_OTHER($$_[0])); } + else { $_; } } @tokens; return UnTeX(Tokens(@tokens), 1); } @@ -5716,7 +5690,7 @@ DefAccent('\lfhook', "\x{0326}", ",", below => 1); # COMBINING COMMA BELOW # We're given a number pointing into the font, from which we can derive the standalone char. # From that, we want to figure out the combining character, but there could be one for # both the above & below cases! We'll prefer the above case. -DefPrimitive('\accent Number Expanded', sub { +DefPrimitive('\accent Number {}', sub { my ($stomach, $num, $letter) = @_; my $n = $num->valueOf; my $fontinfo = lookupFontinfo(LookupValue('textfont_0')); diff --git a/lib/LaTeXML/Package/expl3.sty.ltxml b/lib/LaTeXML/Package/expl3.sty.ltxml index 63e4b4f2f..f33ca1532 100644 --- a/lib/LaTeXML/Package/expl3.sty.ltxml +++ b/lib/LaTeXML/Package/expl3.sty.ltxml @@ -33,7 +33,6 @@ DefMacroI(T_CS('\__expl_status_pop:w'), '{}', sub { my $token; my $nbraces = 0; while (($token = shift(@{ $$gullet{pushback} }) || $$gullet{mouth}->readToken()) - && (($$token[1] != CC_SMUGGLE_THE) || ($token = $$token[2])) && !$token->equals($want1) && !$token->equals($want2)) { push(@arg_until, $token); if ($$token[1] == CC_BEGIN) { # And if it's a BEGIN, copy till balanced END