Skip to content

Commit

Permalink
Intarray (#2151)
Browse files Browse the repository at this point in the history
* Have \font, \fontdimen, \hyphenchar,\skewchar actually connect to stored font metric data, since 2023's expl3 code is using it for intarray storage

* New expl3.lua for Perl analogues of (a subset of) lua extensions for expl3

* Cleanup binding only loading the style file, but preloading the lua extensions (in Perl)

* Refer to fontinfo hash, rather than copy it

* Use ->defined_as, and proper array bounds checking, per Deyan Ginev

* A more robust AutoLoad capability that undefines all triggers for a given file if (& before) it is loaded

* A safer approach to \@pushfilename book-keeping that cooperates better with expl3's \@expl@push@filename@aux@@ dangerous way of knowing the file that is about to be loaded
  • Loading branch information
brucemiller authored Jul 20, 2023
1 parent 43610f7 commit 202aced
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 67 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ lib/LaTeXML/Package/elsarticle.cls.ltxml
lib/LaTeXML/Package/ed.sty.ltxml
lib/LaTeXML/Package/expl3.sty.ltxml
lib/LaTeXML/Package/expl3.ltx.ltxml
lib/LaTeXML/Package/expl3.lua.ltxml
lib/LaTeXML/Package/expl3.pool.ltxml
lib/LaTeXML/Package/emulateapj.cls.ltxml
lib/LaTeXML/Package/emulateapj.sty.ltxml
Expand Down
31 changes: 24 additions & 7 deletions lib/LaTeXML/Package.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1032,24 +1032,32 @@ sub DefAutoload {
my $csname = (ref $cs ? ToString($cs) : $cs);
$csname = '\\' . $csname unless $cs =~ /^\\/;
$cs = T_CS($csname) unless ref $cs;
if ($defnfile =~ /^(.*?)\.(pool|sty|cls)\.ltxml$/) {
$defnfile =~ s/\.ltxml$//;
if ($defnfile =~ /^(.*?)\.(pool|sty|cls)$/) {
my ($name, $type) = ($1, $2);
# if already loaded, or set, DONT redefine!
if (!(
LookupValue($name . '.' . $type . '_loaded') ||
LookupValue($name . '.' . $type . '.ltxml_loaded') ||
LookupMeaning($cs))) {

AssignMapping('autoload_' . $defnfile, $csname => 1);
DefMacroI($cs, undef, sub {
$STATE->assign_internal('meaning', $csname => undef, 'global'); # UNDEFINE (no recurse)
if ($type eq 'pool') { LoadPool($name); } # Load appropriate definitions
ClearAutoLoad($defnfile);
if ($type eq 'pool') { LoadPool($name); } # Load appropriate definitions
elsif ($type eq 'cls') { LoadClass($name); }
else { RequirePackage($name); }
($cs); }); } } # Then return the original cs, so that it's be re-tried.
else {
Warning('unexpected', $defnfile, undef, "Don't know how to autoload $csname from $defnfile"); }
return; }

# Undefine ALL autoload triggers for this definition file.
sub ClearAutoLoad {
my ($defnfile) = @_;
$defnfile =~ s/\.ltxml$//;
foreach my $trigger (LookupMappingKeys('autoload_' . $defnfile)) {
$STATE->assign_internal('meaning', $trigger => undef, 'global'); }
return; }
#======================================================================
# Defining Expandable Control Sequences.
#======================================================================
Expand Down Expand Up @@ -2227,6 +2235,7 @@ sub loadLTXML {
# Note (only!) that the ltxml version of this was loaded; still could load raw tex!
AssignValue($request . '_loaded' => 1, 'global');
AssignValue($ltxname . '_loaded' => 1, 'global') if $ltxname ne $request;
ClearAutoLoad($request);
$STATE->getStomach->getGullet->readingFromMouth(LaTeXML::Core::Mouth::Binding->new($pathname), sub {
do $pathname;
Fatal('die', $pathname, $STATE->getStomach->getGullet,
Expand Down Expand Up @@ -2254,7 +2263,8 @@ sub loadTeXDefinitions {
# Of course, now it will be marked and wont get reloaded!
#
return if LookupValue($request . '_loaded') && !$options{reloadable};
AssignValue($request . '_loaded' => 1, 'global'); }
AssignValue($request . '_loaded' => 1, 'global');
ClearAutoLoad($request); }

my $stomach = $STATE->getStomach;
# Note that we are reading definitions (and recursive input is assumed also definitions)
Expand Down Expand Up @@ -2483,8 +2493,15 @@ sub InputDefinitions {
my $pushpop = LookupDefinition(T_CS('\@pushfilename'))
&& LookupDefinition(T_CS('\@popfilename'));
if ($options{handleoptions}) {
# Note: this is trying to emulate the LaTeX 2 (latex.ltx) use of \@pushfilename. For expl3, see expl3.sty.ltxml
Digest(T_CS('\@pushfilename')) if $pushpop;
# Bookkeeping of what is being loaded so that LaTeX can run hooks.
# Tricky: expl3 wants to know the fill CURRENTLY being read; \@currname,\@currext set LATER.
# Recent expl3 appends \@expl@push@filename@aux@@ which takes THREE arguments!!!
# These arguments mysteriously appear in \@onefilewith@ptions, MUCH later than \@pushfilename
# We place the neaded data after \@pushfilename, but since we're Digesting in isolation,
# they'll disappear if they aren't consumed by expl3. Whew!
Digest(Tokens(T_CS('\@pushfilename'),
T_BEGIN, T_END, T_BEGIN, T_END, T_BEGIN, Explode($name), T_END))
if $pushpop;
# For \RequirePackageWithOptions, pass the options from the outer class/style to the inner one.
if (my $passoptions = $options{withoptions} && $prevname
&& LookupValue('opt@' . $prevname . "." . $prevext)) {
Expand Down
81 changes: 61 additions & 20 deletions lib/LaTeXML/Package/TeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -1319,26 +1319,35 @@ sub lookupFontinfo {
# return LookupValue(($defn ? ToString($defn) : ToString($token)) . '_fontinfo'); }
return LookupValue('fontinfo_' . ($defn ? $defn->getCSName : ToString($token))); }

# Need to handle "at" too!!!
# This should eventually actually load the font metrics,
# and tie-in to the FontMetrics data used by Font.
DefPrimitive('\font Token SkipMatch:= SkipSpaces TeXFileName', sub {
my ($stomach, $cs, $name) = @_;
my $gullet = $stomach->getGullet;
$name = ToString($name);
my ($at, $scaled);
if ($gullet->readKeyword('at')) { $at = $gullet->readDimension; }
if ($gullet->readKeyword('scaled')) { $scaled = $gullet->readNumber; }
my %props = LaTeXML::Common::Font::decodeFontname($name,
($gullet->readKeyword('at')
? $gullet->readDimension->ptValue : undef),
($gullet->readKeyword('scaled')
? $gullet->readNumber->valueOf / 1000 : undef));
$at && $at->ptValue, $scaled && $scaled->valueOf / 1000);
if (!keys %props) { # Failed?
Info('unexpected', $name, $stomach, "Unrecognized font name '$name'",
"Font switch macro " . ToString($cs) . " will have no effect"); }
else {
$props{fontname} = $name; }
my $f = ($at ? $at->divide(Dimension('1em'))->valueOf
: ($scaled ? $scaled->valueOf / 1000
: 1));
my $fontinfo = \%props;
$$fontinfo{data} = [map { $_->multiply($f); }
Dimension(0), Dimension('0.5em'), Dimension(0),
Dimension(0), Dimension('1ex'), Dimension('1em')];
$gullet->skipSpaces;
AssignValue('fontinfo_' . ToString($cs) => {%props});
DefPrimitiveI($cs, undef, undef, font => {%props});

});
# Store the font info & metrics
AssignValue('fontinfo_' . ToString($cs) => $fontinfo);
# The font $cs should select the font
DefPrimitiveI($cs, undef, undef, font => $fontinfo);
return; });

# Not sure what this should be...
DefPrimitiveI('\nullfont', undef, undef, font => { family => 'nullfont' });
Expand Down Expand Up @@ -1499,9 +1508,6 @@ foreach my $letter (ord('A') .. ord('Z')) {
$STATE->assignUCcode(chr($letter + 0x20), $letter, 'global'); }

# Stub definitions ???
DefRegister('\hyphenchar{}', Number(ord('-')));
DefRegister('\skewchar{}', Number(0)); # no idea what the default is here

DefMacro('\hyphenation GeneralText', Tokens());
DefMacro('\patterns{}', Tokens());

Expand Down Expand Up @@ -1579,15 +1585,50 @@ DefRegister('\dp Number', Dimension(0),
DefParameterType('FontToken', sub {
my ($gullet) = @_;
my $token = $gullet->readToken;
if ($token->toString =~ /^\\(?:text|script|scriptscript)font$/) {
$gullet->readNumber; }
$token; }); #?
if ($token->toString =~ /^\\(text|script|scriptscript)font$/) {
my $type = $1;
if (my $fam = $gullet->readNumber) {
$token = LookupValue($type . 'font_' . $fam->valueOf); } }
elsif ($token->toString eq '\\font') {
$token = LookupValue('textfont_0'); } # ??? I assume shuld get current font?
$token; }); #?

# Access to the font parameters; Curiously, can be used as scratch arrays (eg LaTeX3)
DefRegister('\fontdimen Number FontToken' => Dimension(0),
getter => sub { my $p = ToString($_[0]);
if ($p == 2) { Dimension('0.5em'); } # interword space
elsif ($p == 5) { Dimension('1ex'); } # x-height
elsif ($p == 6) { Dimension('1em'); } # quad width
else { Dimension(0); } });
getter => sub {
my ($p, $font) = @_;
my $info = lookupFontinfo($font);
$p = ToString($p);
my $data = $info && $$info{data};
return ($data && $$data[$p - 1]) || Dimension(0); },
setter => sub {
my ($value, $scope, $p, $font) = @_;
my $info = lookupFontinfo($font);
$p = ToString($p);
if (my $data = $info && $$info{data}) {
$$data[$p - 1] = $value; } }
);

DefRegister('\hyphenchar FontToken' => Number(ord('-')),
getter => sub {
my ($font) = @_;
my $info = lookupFontinfo($font);
return ($info && $$info{hyphenchar}) || Number(ord('-')); },
setter => sub {
my ($value, $scope, $font) = @_;
if (my $info = lookupFontinfo($font)) {
$$info{hyphenchar} = $value; } }
);
DefRegister('\skewchar FontToken' => Number(0),
getter => sub {
my ($font) = @_;
my $info = lookupFontinfo($font);
return ($info && $$info{skewchar}) || Number(0); },
setter => sub {
my ($value, $scope, $font) = @_;
if (my $info = lookupFontinfo($font)) {
$$info{skewchar} = $value; } }
);

# Could be handled by setting dimensions whenever the box itself is set?

Expand Down
124 changes: 124 additions & 0 deletions lib/LaTeXML/Package/expl3.lua.ltxml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# -*- mode: Perl -*-
# /=====================================================================\ #
# | expl3.lua | #
# | Implementation for LaTeXML | #
# |=====================================================================| #
# | Part of LaTeXML: | #
# | Public domain software, produced as part of work done by the | #
# | United States Government & not subject to copyright in the US. | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <[email protected]> #_# | #
# | http://dlmf.nist.gov/LaTeXML/ (o o) | #
# \=========================================================ooo==U==ooo=/ #
package LaTeXML::Package::Pool;
use strict;
use warnings;
use LaTeXML::Package;
use List::Util qw(min max);

# Translation of the intarray portion of expl3.lua
# lua arrays are typically 1-based
# Apparently an expl3 intarray is encoded at the TeX level by \__intarray:w <Number>
# We store actual ints in the array, seems more efficient (could be Number or Tokens)
DefParameterType('Intarray', sub {
my ($gullet) = @_;
my $tok = $gullet->readXToken;
if ($tok && $tok->defined_as(T_CS('\__intarray:w'))) {
my $n = $gullet->readNumber;
my $address = '__intarray' . '_' . ToString($tok) . '_' . $n->valueOf;
my $table = LookupValue($address);
if (!$table) { # Create new table, if none
$table = [];
AssignValue($address => $table, 'global'); }
return $table; }
else {
$gullet->unread($tok);
Error('expected', '__intarray:w', $gullet,
"Expected an intarray identifier, got " . ToString($tok));
return []; } });

DefPrimitiveI('\__intarray:w', 'Number', sub {
Error('unexpected', '\__intarray:w', $_[0], "Unexpected isolated \\__intarray:w?");
return; },
protected => 1);

DefPrimitiveI('\__intarray_gset_count:Nw', 'Intarray Number', sub {
my ($stomach, $table, $newlength) = @_;
my $length = scalar(@$table);
$newlength = $newlength->valueOf;
$$table[$newlength - 1] = 0; # Grow in one step for efficiency
for (my $i = $length + 1 ; $i <= $newlength ; $i++) {
$$table[$i - 1] = 0; }
return; },
protected => 1);

DefMacroI('\intarray_count:N', 'Intarray', sub {
my ($gullet, $table) = @_;
return Explode(scalar(@$table)); });

DefMacroI('\__intarray_gset:wF', 'Number Intarray Number {}', sub {
my ($stomach, $pos, $table, $value, $ifmissing) = @_;
my $length = scalar(@$table);
$pos = $pos->valueOf;
if (($pos > 0) && ($pos <= $length)) {
$$table[$pos - 1] = $value->valueOf; }
else {
return $ifmissing; }
return; },
protected => 1);

DefMacroI('\__intarray_gset:w', 'Number Intarray Number', sub {
my ($stomach, $pos, $table, $value) = @_;
my $length = scalar(@$table);
$pos = $pos->valueOf;
if ($pos > 0) {
$$table[$pos - 1] = $value->valueOf; }
return; },
protected => 1);

DefPrimitiveI('\intarray_gzero:N', 'Intarray', sub {
my ($stomach, $table) = @_;
my $length = scalar(@$table);
for (my $i = 1 ; $i <= $length ; $i++) {
$$table[$i - 1] = 0; }
return; },
protected => 1);

DefMacroI('\__intarray_item:wF', 'Number Intarray {}', sub {
my ($stomach, $pos, $table, $ifmissing) = @_;
my $length = scalar(@$table);
$pos = $pos->valueOf;
if (($pos > 0) && ($pos <= $length)) {
return Explode($$table[$pos - 1]); }
else {
return $ifmissing; } });

DefMacroI('\__intarray_item:w', 'Number Intarray', sub {
my ($stomach, $pos, $table) = @_;
my $length = scalar(@$table);
$pos = $pos->valueOf;
if (($pos > 0) && ($pos <= $length)) {
return Explode($$table[$pos - 1]); }
return; });

DefMacroI('\__intarray_to_clist:Nn', 'Intarray', sub {
my ($stomach, $table) = @_;
return Tokenize(join(', ', @$table)); });

DefMacroI('\__intarray_range_to_clist:w', 'Intarray Number Number', sub {
my ($stomach, $table, $from, $to) = @_;
my $length = scalar(@$table);
$from = min(max($from->valueOf - 1, 0), $length - 1);
$to = min(max($to->valueOf - 1, $from), $length - 1);
return Tokenize(join(', ', @$table[$from .. $to])); });

DefPrimitiveI('\__intarray_gset_range:w', 'Number Intarray', sub {
my ($stomach, $from, $table) = @_;
my $length = scalar(@$table);
Error('unimplemented', '\__intarray_gset_range:w', $stomach,
"This command is not yet implemented");
# read commas, Numbers, store in successive positions starting at $from
return; },
protected => 1);

1;
8 changes: 3 additions & 5 deletions lib/LaTeXML/Package/expl3.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ use LaTeXML::Package;

if (FindFile('expl3', type => 'ltx', noltxml => 1)) {
InputDefinitions('expl3', type => 'ltx', noltxml => 1);
RawTeX(<<'EoRawTeX');
if (!($LaTeXML::DEBUG{compiling} || $LaTeXML::DEBUG{compiled})) {
RawTeX(<<'EoRawTeX');
\ExplSyntaxOn
\cs_gset_eq:NN \@expl@cs@to@str@@N \cs_to_str:N
\cs_gset_eq:NN \@expl@str@if@eq@@nnTF \str_if_eq:nnTF
Expand Down Expand Up @@ -99,12 +100,9 @@ if (FindFile('expl3', type => 'ltx', noltxml => 1)) {
\prop_gput:Nnn \g_msg_module_type_prop { cmd } { LaTeX }
\ExplSyntaxOff
EoRawTeX
}
}
else {
Info('missing_file', 'expl3.ltx', undef, 'Recent versions of LaTeX expect expl3.ltx to be available, consider installing texlive.'); }

# Can we do these better? xparse tests fail without them.
DefMacroI('\@pushfilenameaux', undef, Tokens(), locked => 1);
DefMacroI('\@expl@push@filename@aux@@', undef, Tokens(), locked => 1);

1;
38 changes: 3 additions & 35 deletions lib/LaTeXML/Package/expl3.sty.ltxml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- mode: Perl -*-
# /=====================================================================\ #
# | lipsum | #
# | expl3 | #
# | Implementation for LaTeXML | #
# |=====================================================================| #
# | Part of LaTeXML: | #
Expand All @@ -15,40 +15,8 @@ use strict;
use warnings;
use LaTeXML::Package;

LoadPool('expl3');
InputDefinitions('expl3', type => 'sty', noltxml => 1);

# Redefine aux macro to avoid reliance on argument ordering artefacts in latex.ltx
# The purpose here was originally to set `\g_file_curr_name_str`,
# but I am unsure if it is used anywhere currently?
DefMacroI('\@pushfilenameaux', undef, Tokens(), locked => 1);
DefMacroI('\@expl@push@filename@aux@@', undef, Tokens(), locked => 1);
InputDefinitions('expl3', type => 'lua');

DefMacroI(T_CS('\__expl_status_pop:w'), '{}', sub {
my ($gullet, $arg1) = @_;
my @arg_until;
# Custom readUntil that can match either \q_stop or \s__expl_stop
my $want1 = T_CS('\q_stop');
my $want2 = T_CS('\s__expl_stop');
my $token;
my $nbraces = 0;
while (($token = shift(@{ $$gullet{pushback} }) || $$gullet{mouth}->readToken())
&& !$token->equals($want1) && !$token->equals($want2)) {
push(@arg_until, $token);
if ($$token[1] == CC_BEGIN) { # And if it's a BEGIN, copy till balanced END
$nbraces++;
push(@arg_until, $gullet->readBalanced, T_END); } }
# Notice that IFF the arg looks like {balanced}, the outer braces are stripped
# so that delimited arguments behave more similarly to simple, undelimited arguments.
if (($nbraces == 1) && ($arg_until[0][1] == CC_BEGIN) && ($arg_until[-1][1] == CC_END)) {
shift(@arg_until); pop(@arg_until); }

return (
T_CS('\tl_set:Nn'),
T_CS('\l__expl_status_stack_tl'), T_BEGIN, @arg_until, T_END,
T_CS('\int_if_odd:nTF'), T_BEGIN, $arg1, T_END,
T_BEGIN, T_CS('\ExplSyntaxOn'), T_END,
T_BEGIN, T_CS('\ExplSyntaxOff'), T_END); },
protected => 1, locked => 1);
InputDefinitions('expl3', type => 'sty', noltxml => 1);

1;

0 comments on commit 202aced

Please sign in to comment.