diff --git a/MANIFEST b/MANIFEST index 6dfd9301f..b1dd66ad6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -54,6 +54,7 @@ lib/LaTeXML/Core/Definition.pm lib/LaTeXML/Core/Definition/Expandable.pm lib/LaTeXML/Core/Definition/Conditional.pm lib/LaTeXML/Core/Definition/Primitive.pm +lib/LaTeXML/Core/Definition/FontDef.pm lib/LaTeXML/Core/Definition/Register.pm lib/LaTeXML/Core/Definition/CharDef.pm lib/LaTeXML/Core/Definition/Constructor.pm @@ -1352,6 +1353,9 @@ t/fonts/mixed.xml t/fonts/omencodings.pdf t/fonts/omencodings.tex t/fonts/omencodings.xml +t/fonts/plainfonts.pdf +t/fonts/plainfonts.tex +t/fonts/plainfonts.xml t/fonts/sizes.pdf t/fonts/sizes.tex t/fonts/sizes.xml diff --git a/lib/LaTeXML/Common/Font.pm b/lib/LaTeXML/Common/Font.pm index 76191349f..beecf51a2 100644 --- a/lib/LaTeXML/Common/Font.pm +++ b/lib/LaTeXML/Common/Font.pm @@ -52,6 +52,7 @@ my $FLAG_EMPH = 0x10; # Mappings from various forms of names or component names in TeX # Given a font, we'd like to map it to the "logical" names derived from LaTeX, # (w/ loss of fine grained control). +# and (importantly) the encoding needed to lookup unicode in a FontMap! # I'd like to use Karl Berry's font naming scheme # (See http://www.tug.org/fontname/html/) # but it seems to be a one-way mapping, and moreover, doesn't even fit CM fonts! @@ -60,61 +61,58 @@ my $FLAG_EMPH = 0x10; # NOTE: This probably doesn't really belong in here... my %font_family = ( - cmr => { family => 'serif' }, - cmss => { family => 'sansserif' }, - cmssq => { family => 'sansserif' }, # quote style? - cmssqi => { family => 'sansserif', shape => 'italic' }, # quote style? - cmtt => { family => 'typewriter' }, cmvtt => { family => 'typewriter' }, - cmt => { family => 'serif' }, # for cmti "text italic" - cmfib => { family => 'serif' }, - cmfr => { family => 'serif' }, - cm => { family => 'serif' }, - cmdh => { family => 'serif' }, - cmr => { family => 'serif' }, - cmdunh => { family => 'serif' }, # like cmr10 but with tall body heights - cmu => { family => 'serif' }, # unslanted italic ?? - ptm => { family => 'serif' }, ppl => { family => 'serif' }, - pnc => { family => 'serif' }, pbk => { family => 'serif' }, - phv => { family => 'sansserif' }, pag => { family => 'serif' }, - pcr => { family => 'typewriter' }, pzc => { family => 'script' }, - put => { family => 'serif' }, bch => { family => 'serif' }, - psy => { family => 'symbol' }, pzd => { family => 'dingbats' }, - ccr => { family => 'serif' }, ccy => { family => 'symbol' }, - cmbr => { family => 'sansserif' }, cmtl => { family => 'typewriter' }, - cmbrs => { family => 'symbol' }, ul9 => { family => 'typewriter' }, - txr => { family => 'serif' }, txss => { family => 'sansserif' }, - txtt => { family => 'typewriter' }, txms => { family => 'symbol' }, - txsya => { family => 'symbol' }, txsyb => { family => 'symbol' }, - pxr => { family => 'serif' }, pxms => { family => 'symbol' }, - pxsya => { family => 'symbol' }, pxsyb => { family => 'symbol' }, - futs => { family => 'serif' }, - uaq => { family => 'serif' }, ugq => { family => 'sansserif' }, - eur => { family => 'serif' }, eus => { family => 'script' }, - euf => { family => 'fraktur' }, euex => { family => 'symbol' }, - # The following are actually math fonts. - ms => { family => 'symbol' }, - ccm => { family => 'serif', shape => 'italic' }, - cmm => { family => 'math', shape => 'italic', encoding => 'OML' }, - cmex => { family => 'symbol', encoding => 'OMX' }, # Not really symbol, but... - cmsy => { family => 'symbol', encoding => 'OMS' }, - ccitt => { family => 'typewriter', shape => 'italic' }, - cmsltt => { family => 'typewriter', shape => 'slanted' }, - cmbrm => { family => 'sansserif', shape => 'italic' }, - futm => { family => 'serif', shape => 'italic' }, - futmi => { family => 'serif', shape => 'italic' }, - txmi => { family => 'serif', shape => 'italic' }, - pxmi => { family => 'serif', shape => 'italic' }, - bbm => { family => 'blackboard' }, - bbold => { family => 'blackboard' }, - bbmss => { family => 'blackboard' }, - # some ams fonts - cmmib => { family => 'italic', series => 'bold' }, - cmbsy => { family => 'symbol', series => 'bold' }, - msa => { family => 'symbol', encoding => 'AMSa' }, - msb => { family => 'symbol', encoding => 'AMSb' }, - # Are these really the same? - msx => { family => 'symbol', encoding => 'AMSa' }, - msy => { family => 'symbol', encoding => 'AMSb' }, + # Computer Modern + cm => { family => 'serif' }, # base for synthesizing cmbx, cmsl ... + cmr => { family => 'serif' }, + cmm => { family => 'math', shape => 'italic', encoding => 'OML' }, # cmmi + cmsy => { encoding => 'OMS' }, + cmex => { encoding => 'OMX' }, + cmss => { family => 'sansserif' }, + cmtt => { family => 'typewriter' }, + cmvtt => { family => 'typewriter' }, + cmssq => { family => 'sansserif' }, # quote style? + cmssqi => { family => 'sansserif', shape => 'italic' }, # quote style? + cmt => { family => 'serif' }, # for cmti "text italic" + cmmib => { family => 'italic', series => 'bold' }, + cmbsy => { series => 'bold', encoding => 'OMS' }, + cmfib => { family => 'serif' }, + cmfr => { family => 'serif' }, + cmdh => { family => 'serif' }, + cmdunh => { family => 'serif' }, # like cmr10 but with tall body heights + cmu => { family => 'serif' }, # unslanted italic ?? + cmsltt => { family => 'typewriter', shape => 'slanted' }, + cmbrm => { family => 'sansserif', shape => 'italic' }, + # Some Blackboard Bold fonts + bbm => { family => 'blackboard' }, + bbold => { family => 'blackboard' }, + bbmss => { family => 'blackboard' }, + # Computer Concrete + ccr => { family => 'serif' }, + ccm => { family => 'serif', shape => 'italic' }, + cct => { family => 'serif' }, + ccitt => { family => 'typewriter', shape => 'italic' }, + # AMS fonts + msa => { encoding => 'AMSa' }, + msb => { encoding => 'AMSb' }, + msx => { encoding => 'AMSa' }, # Are these really the same? (or even real?) + msy => { encoding => 'AMSb' }, + # Euler + eur => { family => 'serif' }, + eus => { family => 'script' }, + euf => { family => 'fraktur' }, + euex => { encoding => 'OMX' }, + # TX Fonts (Times Roman) + txr => { family => 'serif' }, + txmi => { family => 'serif', shape => 'italic' }, + txss => { family => 'sansserif' }, + txtt => { family => 'typewriter' }, + txsya => { encoding => 'AMSa' }, + txsyb => { encoding => 'AMSb' }, + # PX Fonts (Palladio) + pxr => { family => 'serif' }, + pxmi => { family => 'serif', shape => 'italic' }, + pxsya => { encoding => 'AMSa' }, + pxsyb => { encoding => 'AMSb' }, # Pretend to recognize xy's fonts xydash => { family => 'graphic' }, xyatip => { family => 'graphic' }, @@ -125,17 +123,44 @@ my %font_family = ( xycmbt => { family => 'graphic' }, xyluat => { family => 'graphic' }, xylubt => { family => 'graphic' }, + # Fourier + futm => { family => 'serif', shape => 'italic' }, + futmi => { family => 'serif', shape => 'italic' }, + # More fonts that need to be better sorted, classified & labelled + # family symbol, dingbats are nonsense: We need an encoding and FontMap!!! + ptm => { family => 'serif' }, ppl => { family => 'serif' }, + pnc => { family => 'serif' }, pbk => { family => 'serif' }, + phv => { family => 'sansserif' }, pag => { family => 'serif' }, + pcr => { family => 'typewriter' }, pzc => { family => 'script' }, + put => { family => 'serif' }, bch => { family => 'serif' }, + psy => { family => 'symbol' }, pzd => { family => 'dingbats' }, + cmbr => { family => 'sansserif' }, cmtl => { family => 'typewriter' }, + cmbrs => { family => 'symbol' }, ul9 => { family => 'typewriter' }, + futs => { family => 'serif' }, + uaq => { family => 'serif' }, ugq => { family => 'sansserif' }, ); # Maps the "series code" to an abstract font series name my %font_series = ( - '' => { series => 'medium' }, m => { series => 'medium' }, mc => { series => 'medium' }, - b => { series => 'bold' }, bc => { series => 'bold' }, bx => { series => 'bold' }, - sb => { series => 'bold' }, sbc => { series => 'bold' }, bm => { series => 'bold' }); + '' => {}, # default medium + m => { series => 'medium' }, + mc => { series => 'medium' }, + b => { series => 'bold' }, + bc => { series => 'bold' }, + bx => { series => 'bold' }, + sb => { series => 'bold' }, + sbc => { series => 'bold' }, + bm => { series => 'bold' }); # Maps the "shape code" to an abstract font shape name. -my %font_shape = ('' => { shape => 'upright' }, n => { shape => 'upright' }, i => { shape => 'italic' }, it => { shape => 'italic' }, - sl => { shape => 'slanted' }, sc => { shape => 'smallcaps' }, csc => { shape => 'smallcaps' }); +my %font_shape = ( + '' => {}, # default upright + n => { shape => 'upright' }, + i => { shape => 'italic' }, + it => { shape => 'italic' }, + sl => { shape => 'slanted' }, + sc => { shape => 'smallcaps' }, + csc => { shape => 'smallcaps' }); # These could be exported... sub lookupFontFamily { @@ -181,7 +206,7 @@ my $FONTREGEXP sub decodeFontname { my ($name, $at, $scaled) = @_; if ($name =~ /^$FONTREGEXP$/o) { - my %props; + my %props = (series => 'medium', shape => 'upright', encoding => 'OT1'); my ($fam, $ser, $shp, $size) = ($1, $2, $3, $4); if (my $ffam = lookupFontFamily($fam)) { map { $props{$_} = $$ffam{$_} } keys %$ffam; } if (my $fser = lookupFontSeries($ser)) { map { $props{$_} = $$fser{$_} } keys %$fser; } @@ -191,8 +216,6 @@ sub decodeFontname { $size = $size * $scaled if defined $scaled; $props{name} = $name; $props{size} = $size; - # Experimental Hack !?!?!? - $props{encoding} = 'OT1' unless defined $props{encoding}; return %props; } else { Info('unrecognized', 'font', undef, "Unrecognized fontname '$name'"); @@ -251,7 +274,7 @@ sub textDefault { sub mathDefault { my ($self) = @_; return $self->new_internal('math', $DEFSERIES, 'italic', DEFSIZE(), - $DEFCOLOR, $DEFBACKGROUND, $DEFOPACITY, undef, $DEFLANGUAGE, 'text', 0); } + $DEFCOLOR, $DEFBACKGROUND, $DEFOPACITY, 'OT1', $DEFLANGUAGE, 'text', 0); } # Accessors # Using an array here is getting ridiculous! diff --git a/lib/LaTeXML/Core/Definition.pm b/lib/LaTeXML/Core/Definition.pm index 36f771d5b..f44b13700 100644 --- a/lib/LaTeXML/Core/Definition.pm +++ b/lib/LaTeXML/Core/Definition.pm @@ -23,6 +23,7 @@ use base qw(LaTeXML::Common::Object); require LaTeXML::Core::Definition::Expandable; require LaTeXML::Core::Definition::Conditional; require LaTeXML::Core::Definition::Primitive; +require LaTeXML::Core::Definition::FontDef; require LaTeXML::Core::Definition::Register; require LaTeXML::Core::Definition::CharDef; require LaTeXML::Core::Definition::Constructor; @@ -52,6 +53,9 @@ sub isExpandable { sub isRegister { return ''; } +sub isFontDef { # ONLY FontDef handles this! + return ''; } + sub isPrefix { return 0; } diff --git a/lib/LaTeXML/Core/Definition/CharDef.pm b/lib/LaTeXML/Core/Definition/CharDef.pm index d992ba8ec..999a6b581 100644 --- a/lib/LaTeXML/Core/Definition/CharDef.pm +++ b/lib/LaTeXML/Core/Definition/CharDef.pm @@ -54,10 +54,11 @@ sub invoke { ($local ? Tokens(T_CS('\mathchar'), $value->revert, T_CS('\relax')) : $$self{cs}), role => $$self{role}); } else { # else text; but note defered font/encoding till digestion! - my ($char, %props) = LaTeXML::Package::FontDecode($value->valueOf); - return Box($char, undef, undef, + # Decode the codepoint using current font & encoding + my ($glyph, $adjfont) = LaTeXML::Package::FontDecode($value->valueOf); + return Box($glyph, $adjfont, undef, ($local ? Tokens(T_CS('\char'), $value->revert, T_CS('\relax')) : $$self{cs}), - %props); } } + ); } } sub equals { my ($self, $other) = @_; diff --git a/lib/LaTeXML/Core/Definition/FontDef.pm b/lib/LaTeXML/Core/Definition/FontDef.pm new file mode 100644 index 000000000..23a7dc062 --- /dev/null +++ b/lib/LaTeXML/Core/Definition/FontDef.pm @@ -0,0 +1,73 @@ +# /=====================================================================\ # +# | LaTeXML::Core::Definition::FontDef | # +# | Representation of definitions of Fonts | # +# |=====================================================================| # +# | Part of LaTeXML: | # +# | Public domain software, produced as part of work done by the | # +# | United States Government & not subject to copyright in the US. | # +# |---------------------------------------------------------------------| # +# | Bruce Miller #_# | # +# | http://dlmf.nist.gov/LaTeXML/ (o o) | # +# \=========================================================ooo==U==ooo=/ # +package LaTeXML::Core::Definition::FontDef; +use strict; +use warnings; +use LaTeXML::Global; +use LaTeXML::Common::Object; +use LaTeXML::Common::Error; +use LaTeXML::Core::Token; +use LaTeXML::Core::Tokens; +use LaTeXML::Core::Box; +use base qw(LaTeXML::Core::Definition::Primitive); + +# A CharDef is a specialized register; +# You can't assign it; when you invoke the control sequence, it returns +# the result of evaluating the character (more like a regular primitive). +# When $mathglyph is provided, it is the unicode corresponding to the \mathchar of $value +sub new { + my ($class, $cs, $fontid, %traits) = @_; + return bless { cs => $cs, parameters => undef, + fontID => $fontid, + locator => $STATE->getStomach->getGullet->getMouth->getLocator, + %traits }, $class; } + +# Return the "font info" associated with the (TeX) font that this command selects (See \font) +sub isFontDef { + my ($self) = @_; + return $STATE->lookupValue($$self{fontID}); } + +sub invoke { + my ($self, $stomach) = @_; + if (my $fontinfo = $STATE->lookupValue($$self{fontID})) { + # Temporary hack for \the\font; remember the last font def executed + $STATE->assignValue(current_FontDef => $$self{cs}, 'local'); + $STATE->assignValue(font => $STATE->lookupValue('font')->merge(%$fontinfo), 'local'); + } + return Box(undef, undef, undef, $$self{cs}); } + +#=============================================================================== +1; + +__END__ + +=pod + +=head1 NAME + +C - Control sequence definitions for font symbols defined by \font. + +=head1 DESCRIPTION + +Representation for control sequences defined by \font. +It extends L. + +=head1 AUTHOR + +Bruce Miller + +=head1 COPYRIGHT + +Public domain software, produced as part of work done by the +United States Government & not subject to copyright in the US. + +=cut diff --git a/lib/LaTeXML/Core/Stomach.pm b/lib/LaTeXML/Core/Stomach.pm index 068bda3f5..26c69c9ca 100644 --- a/lib/LaTeXML/Core/Stomach.pm +++ b/lib/LaTeXML/Core/Stomach.pm @@ -242,8 +242,13 @@ sub invokeToken_simple { return LaTeXML::Core::Comment->new($comment); } else { $STATE->clearPrefixes; # prefixes shouldn't apply here. - return Box(LaTeXML::Package::FontDecodeString($meaning->toString, undef, 1), - undef, undef, $meaning); } } + if (my $mathcode = $STATE->lookupValue('IN_MATH') + && $STATE->lookupMathcode($meaning->toString)) { + my ($role, $glyph, $f, $reversion) = LaTeXML::Package::decodeMathChar($mathcode, $meaning); + return Box($glyph, $f, undef, $reversion, role => $role); } + else { + return Box(LaTeXML::Package::FontDecodeString($meaning->toString, undef, 1), + undef, undef, $meaning); } } } # Regurgitate: steal the previously digested boxes from the current level. sub regurgitate { @@ -359,10 +364,13 @@ sub setMode { # and save the text font for any embedded text. $STATE->assignValue(savedfont => $curfont, 'local'); $STATE->assignValue(script_base_level => scalar(@{ $$self{boxing} })); # See getScriptLevel - $STATE->assignValue(font => $STATE->lookupValue('mathfont')->merge( - color => $curfont->getColor, background => $curfont->getBackground, - size => $curfont->getSize, - mathstyle => ($mode =~ /^display/ ? 'display' : 'text')), 'local'); } + my $mathfont = $STATE->lookupValue('mathfont')->merge( + color => $curfont->getColor, background => $curfont->getBackground, + size => $curfont->getSize, + mathstyle => ($mode =~ /^display/ ? 'display' : 'text')); + $STATE->assignValue(font => $mathfont, 'local'); + $STATE->assignValue(initial_math_font => $mathfont, 'local'); + $STATE->assignValue(fontfamily => -1, 'local'); } else { # When entering text mode, we should set the font to the text font in use before the math # but inherit color and size diff --git a/lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml b/lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml index d9ba052a5..84fe2dd87 100644 --- a/lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml +++ b/lib/LaTeXML/Engine/Base_ParameterTypes.pool.ltxml @@ -283,34 +283,6 @@ DefParameterType('Variable', sub { my $params = $defn->getParameters; return Tokens($defn->getCS, ($params ? $params->revertArguments(@args) : ())); }); -# Same, but not necessarily writable -DefParameterType('Register', sub { - my ($gullet) = @_; - my $token = $gullet->readXToken; - my $defn = $token && LookupDefinition($token); - if ((defined $defn) && $defn->isRegister) { - [$defn, ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ())]; } - else { - if ($token && ($token->getCatcode == CC_CS)) { - if ($token->getString eq '\font') { - # \font is a bit of a register-like exception - return [$defn]; } - Error('expected', '', $gullet, - "A was supposed to be here", "Got " . Stringify($token), - "Defining it now."); - DefRegisterI($token, undef, Dimension(0)); # Dimension, or what? - return [LookupDefinition($token)]; } - else { - Error('expected', '', $gullet, - "A was supposed to be here", "Got " . Stringify($token), - "But it is not even definable."); - return [LookupDefinition(T_CS('\lx@DUMMY@REGISTER'))]; } } }, - reversion => sub { - my ($var) = @_; - my ($defn, @args) = @$var; - my $params = $defn->getParameters; - return Tokens($defn->getCS, ($params ? $params->revertArguments(@args) : ())); }); - DefParameterType('TeXFileName', sub { my ($gullet) = @_; my ($token, $cc, @tokens) = (); diff --git a/lib/LaTeXML/Engine/LaTeX.pool.ltxml b/lib/LaTeXML/Engine/LaTeX.pool.ltxml index 9dfa64d81..ee10feccf 100644 --- a/lib/LaTeXML/Engine/LaTeX.pool.ltxml +++ b/lib/LaTeXML/Engine/LaTeX.pool.ltxml @@ -2763,7 +2763,7 @@ DefMacro('\ProvideTextCommandDefault DefToken', '\ProvideTextCommand{#1}{?}'); #------------------------------------------------------------ DefPrimitive('\DeclareTextSymbol DefToken {}{Number}', sub { - my ($gullet, $cs, $encoding, $code) = @_; + my ($stomach, $cs, $encoding, $code) = @_; $code = $code->valueOf; my $css = ToString($cs); $encoding = ToString(Expand($encoding)); @@ -2772,11 +2772,16 @@ DefPrimitive('\DeclareTextSymbol DefToken {}{Number}', sub { '\expandafter\ifx\csname\cf@encoding\string' . $css . '\endcsname\relax\csname?\string' . $css . '\endcsname' . '\else\csname\cf@encoding\string' . $css . '\endcsname\fi'); } my $ecs = T_CS('\\' . $encoding . $css); - DefPrimitiveI($ecs, undef, FontDecode($code, $encoding)); + DefPrimitiveI($ecs, undef, sub { + my ($glyph, $adjfont) = FontDecode($code, $encoding); + Box($glyph, $adjfont, undef, $cs); }); return; }); -# hmmm... what needs doing here; basically it means use this encoding as the default for the symbol -DefMacro('\DeclareTextSymbolDefault DefToken {}', ''); # '\DeclareTextSymbol{#1}{?}'); +DefPrimitive('\DeclareTextSymbolDefault DefToken {}', sub { + my ($stomach, $cs, $encoding) = @_; + $encoding = ToString(Expand($encoding)); + DefMacroI(T_CS('\\?' . ToString($cs)), undef, T_CS('\\' . $encoding . ToString($cs))); + return; }); #------------------------------------------------------------ DefPrimitive('\DeclareTextAccent DefToken {}{}', sub { @@ -2804,8 +2809,9 @@ DefMacro('\UseTextAccent{}{}', '{\fontencoding{#1}#2{#3}}'); DefPrimitive('\DeclareMathAccent DefToken {}{} {Number}', sub { my ($stomach, $cs, $kind, $class, $code) = @_; $class = ToString($class); - my $info = LookupValue('fontdeclaration@' . $class); - my $glyph = FontDecode($code->valueOf, ($info ? $$info{encoding} : $class)); + my $info = LookupValue('fontdeclaration@' . $class); + # my $glyph = FontDecode($code->valueOf, ($info ? $$info{encoding} : $class)); + my ($glyph) = FontDecode($code->valueOf, ($info ? $$info{encoding} : $class)); DefMathI($cs, 'Digested', $glyph, operator_role => 'OVERACCENT'); return AddToPreamble('\DeclareMathAccent', $cs, $kind, $class, $code); }); @@ -2853,12 +2859,12 @@ my $symboltype_roles = { '\mathord' => 'ID', '\mathop' => 'BIGOP', '\mathbin' => 'BINOP', '\mathrel' => 'RELOP', '\mathopen' => 'OPEN', '\mathclose' => 'CLOSE', '\mathpunct' => 'PUNCT' }; DefPrimitive('\DeclareMathSymbol DefToken SkipSpaces DefToken {}{Number}', sub { - my ($stomach, $cs, $type, $font, $code) = @_; - my $encoding = ToString($font); # Or maybe just a font name or class? + my ($stomach, $cs, $type, $fontkind, $code) = @_; + my $encoding = ToString($fontkind); # Or maybe just a font name or class? if (my $decl = LookupValue('fontdeclaration@' . $encoding)) { $encoding = $$decl{encoding} if $$decl{encoding}; } - my $glyph = FontDecode($code->valueOf, $encoding); - my $role = $$symboltype_roles{ ToString($type) }; + my ($glyph) = FontDecode($code->valueOf, $encoding); + my $role = $$symboltype_roles{ ToString($type) }; DefMathI($cs, undef, $glyph, role => $role); return; }); diff --git a/lib/LaTeXML/Engine/TeX_Character.pool.ltxml b/lib/LaTeXML/Engine/TeX_Character.pool.ltxml index c727ed67d..b2d20a76b 100644 --- a/lib/LaTeXML/Engine/TeX_Character.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Character.pool.ltxml @@ -29,8 +29,8 @@ DefPrimitiveI('\ ', undef, sub { name => 'space', isSpace => 1, width => Dimension('0.5em')); }); DefPrimitive('\char Number', sub { - Box(FontDecode($_[1]->valueOf), undef, undef, - Tokens(T_CS('\char'), $_[1]->revert, T_CS('\relax'))); }); + my ($glyph, $adjfont) = FontDecode($_[1]->valueOf); + Box($glyph, $adjfont, undef, Tokens(T_CS('\char'), $_[1]->revert, T_CS('\relax'))); }); #====================================================================== # \accent c places an accent on a character. @@ -87,14 +87,13 @@ sub DefAccent { # Otherwise, use the Util::Unicode module to find the appropriate combining character DefPrimitive('\accent Number {}', sub { my ($stomach, $num, $letter) = @_; - my $n = $num->valueOf; - my $encoding = LookupValue('font')->getEncoding || 'OT1'; - my $char = ($encoding ? FontDecode($n, $encoding) : chr($n)); - if (my $entry = unicode_accent($char)) { + my $n = $num->valueOf; + my ($glyph, $adjfont) = FontDecode($n); + if (my $entry = unicode_accent($glyph)) { applyAccent($stomach, $letter, $$entry{combiner}, $$entry{standalone}, Invocation(T_CS('\accent'), $num, $letter)); } else { # Unknown accent ? Attempt to OVERLAY the accent on top of $letter - Digest(Tokens(T_CS('\lx@overlay'), T_BEGIN, $letter, T_END, T_BEGIN, T_OTHER($char), T_END)); } }); + Digest(Tokens(T_CS('\lx@overlay'), T_BEGIN, $letter, T_END, T_BEGIN, T_OTHER($glyph), T_END)); } }); #====================================================================== # \chardef iq provides an alternate way to define a control sequence that returns a character. diff --git a/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml b/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml index 6e466bab8..30b468e91 100644 --- a/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Debugging.pool.ltxml @@ -102,14 +102,15 @@ DefMacro('\meaning Token', sub { $type =~ s/^LaTeXML:://; # Pre-step: We can't extract the bodies of definitions which are defined via Perl subroutines. # So do the next best thing -- represent them as their tokens. - if ($type =~ /(primitive|conditional|constructor)$/i) { - $definition = $definition->getCSorAlias; - $type = ref $definition; - $type =~ s/^LaTeXML:://; - if (my $fontinfo = LookupValue('fontinfo_' . ToString($definition))) { - $meaning = 'select font ' . ($$fontinfo{fontname} || 'fontname'); + if ($type =~ /fontdef$/i) { + if (my $fontinfo = $definition->isFontDef) { + $meaning = 'select font ' . ($$fontinfo{name} || 'fontname'); $meaning .= ' at ' . $$fontinfo{at} if $$fontinfo{at}; $type = 'font'; } } + elsif ($type =~ /(primitive|conditional|constructor)$/i) { + $definition = $definition->getCSorAlias; + $type = ref $definition; + $type =~ s/^LaTeXML:://; } # The actual tests start here if ($type =~ /token$/i) { my $cc = $definition->getCatcode; diff --git a/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml b/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml index b8cecb755..c924b3021 100644 --- a/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Fonts.pool.ltxml @@ -27,69 +27,78 @@ use LaTeXML::Package; # \fontdimen iq holds font parameters. # \nullfont iq is a predefined font with no characters. -sub lookupFontinfo { - my ($token) = @_; - my $defn = LookupDefinition($token); - # return LookupValue(($defn ? ToString($defn) : ToString($token)) . '_fontinfo'); } - return LookupValue('fontinfo_' . ($defn ? $defn->getCSName : ToString($token))); } - -DefParameterType('FontToken', sub { +# A Defined Font, or Font Identifier, +# defined by \font, OR \textfont, \scriptfont,\scriptscriptfont, or even \font itself +# Actually returns the fontinfo (hash) associated with the font. +DefParameterType('FontDef', sub { my ($gullet) = @_; my $token = $gullet->readToken; - if ($token->toString =~ /^\\(text|script|scriptscript)font$/) { + # Or use LookupRegister??? + if ($token && ($token->toString =~ /^\\(text|script|scriptscript)font$/)) { my $type = $1; if (my $fam = $gullet->readNumber) { $token = LookupValue($type . 'font_' . $fam->valueOf); } } - elsif ($token->toString eq '\\font') { - $token = LookupValue('textfont_0'); } # ??? I assume shuld get current font? - $token; }); #? + # Need the cs that selected the current font! (which we don't have!) + if ($token && ($token->toString eq '\font')) { # Current font ??? Return last pure TeX font used (HACK) + $token = LookupValue('current_FontDef') || T_CS('\tenrm'); } + if (my $defn = $token && $STATE->lookupDefinition($token)) { + return $defn->isFontDef; } + return; }); # This should eventually actually load the font metrics, -# and tie-in to the FontMetrics data used by Font. +# and tie-in to LaTeXML::Common::Font, as well as any FontMetrics data used by Font. DefPrimitive('\font SkipSpaces Token SkipSpaces SkipMatch:= SkipSpaces TeXFileName', sub { my ($stomach, $cs, $name) = @_; my $gullet = $stomach->getGullet; $name = ToString($name); - my ($at, $scaled); - if ($gullet->readKeyword('at')) { $at = $gullet->readDimension; } - if ($gullet->readKeyword('scaled')) { $scaled = $gullet->readNumber; } - my %props = LaTeXML::Common::Font::decodeFontname($name, - $at && $at->ptValue, $scaled && $scaled->valueOf / 1000); - if (!keys %props) { # Failed? - Info('unexpected', $name, $stomach, "Unrecognized font name '$name'", - "Font switch macro " . ToString($cs) . " will have no effect"); } - else { - $props{fontname} = $name; } - my $f = ($at ? $at->divide(Dimension('1em'))->valueOf - : ($scaled ? $scaled->valueOf / 1000 - : 1)); - my $fontinfo = \%props; -##### $$fontinfo{data} = [map { $_->multiply($f); } - $$fontinfo{data} = [map { $_->multiply($f)->valueOf; } - Dimension(0), Dimension('0.5em'), Dimension(0), - Dimension(0), Dimension('1ex'), Dimension('1em')]; + my %props = LaTeXML::Common::Font::decodeFontname($name); + my $size = Dimension($props{size} . 'pt'); + my $key = 'fontinfo_' . $name; # Unique key associated with the font's file+size+... + my ($at, $scale) = (undef, 1); + if ($gullet->readKeyword('at')) { + $at = $gullet->readDimension; + $scale = $at->divide($size)->valueOf; } + elsif ($gullet->readKeyword('scaled')) { + $scale = $gullet->readNumber->valueOf / 1000.0; + $at = $size->multiply($scale); } + if ($at) { + $props{at} = ToString($at); + $props{design_size} = $size; + $props{size} = $at->ptValue; + $key .= " at " . ToString($at); } $gullet->skipSpaces; - # Store the font info & metrics - AssignValue('fontinfo_' . ToString($cs) => $fontinfo); + my $fontinfo = LookupValue($key); + if (!$fontinfo) { # If we haven't already defined & stored this font + $props{key} = $key; + $props{skewchar} = LookupRegister('\defaultskewchar'); + $props{hyphenchar} = LookupRegister('\defaulthyphenchar'); + $fontinfo = \%props; + # These really should be read from a tfm file! + $$fontinfo{data} = [map { $_->multiply($scale)->valueOf; } + Dimension(0), Dimension('0.5em'), Dimension(0), + Dimension(0), Dimension('1ex'), Dimension('1em')]; + # Store the font info & metrics + AssignValue($key => $fontinfo); } # The font $cs should select the font - DefPrimitiveI($cs, undef, undef, font => $fontinfo); + $STATE->installDefinition(LaTeXML::Core::Definition::FontDef->new($cs, $key)); return; }); -DefMacroI('\fontname', undef, sub { Explode("fontname not implemented"); }); +DefMacro('\fontname FontDef', sub { + my ($gullet, $fontinfo) = @_; + my $name = $fontinfo && $$fontinfo{name}; + Explode($name || "fontname not available"); }); + # Access to the font parameters; Curiously, can be used as scratch arrays (eg LaTeX3) -DefRegister('\fontdimen Number FontToken' => Dimension(0), +DefRegister('\fontdimen Number FontDef' => Dimension(0), getter => sub { - my ($p, $font) = @_; - my $info = lookupFontinfo($font); + my ($p, $fontinfo) = @_; $p = ToString($p); - my $data = $info && $$info{data}; -#### return ($data && $$data[$p - 1]) || Dimension(0); }, + my $data = $fontinfo && $$fontinfo{data}; return Dimension(($data && $$data[$p - 1]) || 0); }, setter => sub { - my ($value, $scope, $p, $font) = @_; - my $info = lookupFontinfo($font); + my ($value, $scope, $p, $fontinfo) = @_; $p = ToString($p); - if (my $data = $info && $$info{data}) { + if (my $data = $fontinfo && $$fontinfo{data}) { my $l = scalar(@$data); if ($l < $p) { for (my $i = $l ; $i < $p ; $i++) { @@ -248,7 +257,8 @@ DeclareFontMap('OMS', # surd amalg nabla int sqcup sqcap sqsubseteq sqsupseteq "\x{221A}", "\x{2210}", "\x{2207}", "\x{222B}", "\x{2294}", "\x{2293}", "\x{2291}", "\x{2292}", # section dagger ddagger para clubsuit diam.suit heartsuit spadesuit - UTF(0xA7), "\x{2020}", "\x{2021}", UTF(0xB6), "\x{2663}", "\x{2662}", "\x{2661}", "\x{2660}"]); + UTF(0xA7), "\x{2020}", "\x{2021}", UTF(0xB6), "\x{2663}", "\x{2662}", "\x{2661}", "\x{2660}"], + uppercase_mathstyle => { family => 'caligraphic' }); DeclareFontMap('OMX', [ # ( ) [ ] lfloor rfloor lceil rceil diff --git a/lib/LaTeXML/Engine/TeX_Hyphenation.pool.ltxml b/lib/LaTeXML/Engine/TeX_Hyphenation.pool.ltxml index f89c7b0dd..b1d0d1ae2 100644 --- a/lib/LaTeXML/Engine/TeX_Hyphenation.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Hyphenation.pool.ltxml @@ -55,15 +55,14 @@ DefPrimitive('\setlanguage Number', undef); # \righthyphenmin pi is the minimum number of characters that must appear after the last hyphen in an hyphenated word. # \uchyph pi prevents hyphenation of uppercase words unless this is positive. -DefRegister('\hyphenchar FontToken' => Number(ord('-')), +DefRegister('\hyphenchar FontDef' => Number(ord('-')), getter => sub { - my ($font) = @_; - my $info = lookupFontinfo($font); - return ($info && $$info{hyphenchar}) || Number(ord('-')); }, + my ($fontinfo) = @_; + return ($fontinfo && $$fontinfo{hyphenchar}) || Number(ord('-')); }, setter => sub { - my ($value, $scope, $font) = @_; - if (my $info = lookupFontinfo($font)) { - $$info{hyphenchar} = $value; } } + my ($value, $scope, $fontinfo) = @_; + if ($fontinfo) { + $$fontinfo{hyphenchar} = $value; } } ); DefRegister('\defaulthyphenchar' => Number(ord('-'))); diff --git a/lib/LaTeXML/Engine/TeX_Macro.pool.ltxml b/lib/LaTeXML/Engine/TeX_Macro.pool.ltxml index 314b64513..a4b195573 100644 --- a/lib/LaTeXML/Engine/TeX_Macro.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Macro.pool.ltxml @@ -239,24 +239,39 @@ DefPrimitiveI('\dont_expand', undef, sub { # \the #---------------------------------------------------------------------- # \the c returns character tokens for an internal quantity's or parameter's current value. - -# \the -DefMacro('\the Register', sub { - my ($gullet, $variable) = @_; - return () unless $variable; - my ($defn, @args) = @$variable; - if (!$defn || $defn eq 'missing') { - Error('expected', "", $gullet, "a register was expected to be here"); return (); } - my $type = $defn->isRegister; - if (!$type) { - my $cs = ToString($defn->getCS); - if ($cs eq '\font') { # what to do here? - return T_CS('\tenrm'); } - Error('unexpected', "\\the$cs", $gullet, "You can't use $cs after \\the"); return (); } - my $value = $defn->valueOf(@args); - ## In all cases, these should be OTHER, except for space. (!?) - my @tokens = ($type eq 'Tokens' ? ($value ? $value->unlist : ()) : Explode(ToString($value))); - return @tokens; }); +# The argument to \the is a variety of "Internal Quantities", being parameters, +# registers, internal registers, codenames, etc. See TeX Book, pp.214--215. +# [Since \the is expandable, perhaps should just be built into \the's code? Never need to revert] +DefMacro('\the', sub { + my ($gullet) = @_; + my $token = $gullet->readXToken; + if (!defined $token) { + Error('expected', '', $gullet, + "A was supposed to be here", "Got nothing."); + return T_OTHER('0'); } + my $defn = LookupDefinition($token); + if (!defined $defn) { # the token is Undefined + if ($token && ($token->getCatcode == CC_CS)) { # but IS a cs \something + Error('expected', '', $gullet, + "A was supposed to be here", "Got " . Stringify($token), + "Defining it now."); + # Hackery: to avoid potential repeated errors, define it now as a number register + DefRegisterI($token, undef, Number(0)); # Dimension, or what? + return T_OTHER('0'); } } + elsif (my $type = $defn->isRegister) { # SOME kind of register is acceptable + my @args = ($$defn{parameters} ? $$defn{parameters}->readArguments($gullet) : ()); + my $value = $defn->valueOf(@args); + return (($type eq 'Token') || $type eq 'Tokens' + ? ($value ? $value->unlist : ()) : Explode(ToString($value))); } + elsif ($defn->getCSName eq '\font') { + # HACK to get the \fontcmd that would have selected the current font (see FontDef) + return $STATE->lookupValue('current_FontDef') || T_CS('\tenrm'); } # ???? + elsif ($defn->isFontDef) { # Or a propert TeX \fontcmd defined by \font + return $defn->getCS; } + # If we fall through to here, whatever $token is shouln't have been used with \the + my $t = ToString($token); + Error('unexpected', "\\the$t", $gullet, "You can't use $t after \\the"); + return T_OTHER('0'); }); #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 1; diff --git a/lib/LaTeXML/Engine/TeX_Math.pool.ltxml b/lib/LaTeXML/Engine/TeX_Math.pool.ltxml index 5ebd644eb..be3e8d277 100644 --- a/lib/LaTeXML/Engine/TeX_Math.pool.ltxml +++ b/lib/LaTeXML/Engine/TeX_Math.pool.ltxml @@ -567,31 +567,11 @@ DefRegister('\everydisplay', Tokens()); # \delcode iq is -1 or the delimiter code for a character. # \mathcode iq holds the math character (15-bit number) for each of the 256 characters with which TeX works. -our @mathclassrole = (undef, 'BIGOP', 'BINOP', 'RELOP', 'OPEN', 'CLOSE', 'PUNCT', undef); -# Is this "fontinfo" stuff sufficient to maintain a math font "family" ?? -# What we're really after is a connectio nto a font encoding mapping. -sub decodeMathChar { - my ($n) = @_; - my $class = int($n / (16 * 256)); $n = $n % (16 * 256); - my $fam = int($n / 256); $n = $n % 256; - my $font = LookupValue('textfont_' . $fam) - || LookupValue('scriptfont_' . $fam) - || LookupValue('scriptscriptfont_' . $fam); - my $char = chr($n); - my $fontinfo = lookupFontinfo($font); # Map char(code) to Unicode, via font - my $glyph = ($fontinfo && $$fontinfo{encoding} && FontDecode($n, $$fontinfo{encoding}) // $char); - # If no specific class, Lookup properties from a DefMath? [Or better yet, Unicode data?] - my $charinfo = LookupValue('math_token_attributes_' . $glyph); - my $role = $mathclassrole[$class]; - $role = $$charinfo{role} if (!defined $role) && $charinfo; - return ($role, $glyph); } - DefPrimitive('\mathchar Number', sub { - my ($stomach, $code) = @_; - my ($role, $glyph) = decodeMathChar($code->valueOf); - Box($glyph, undef, undef, - Tokens(T_CS('\mathchar'), $_[1]->revert, T_CS('\relax')), - role => $role); }); + my ($stomach, $code) = @_; + my ($role, $glyph, $font, $reversion) = decodeMathChar($code, + Tokens(T_CS('\mathchar'), $_[1]->revert, T_CS('\relax'))); + return Box($glyph, $font, undef, $reversion, role => $role); }); DefConstructor('\delimiter Number', "?#glyph(?#isMath(#glyph)(#glyph))", @@ -655,9 +635,7 @@ DefRegister('\fam' => Number(-1), setter => sub { my ($fam, $scope) = @_; $STATE->assignValue('fontfamily' => $fam->valueOf, $scope); - if (my $font = $STATE->lookupValue('textfont_' . $fam->valueOf)) { - Digest($font); # Digest, since this is a font COMMAND (\font)!!!!! -} }); + }); #====================================================================== # TeX-level grammatical roles @@ -1131,16 +1109,16 @@ DefRegister('\displaywidowpenalty' => Number(50)); DefRegister('\predisplaypenalty' => Number(10000)); DefRegister('\postdisplaypenalty' => Number(0)); -DefRegister('\skewchar FontToken' => Number(0), +DefRegister('\skewchar FontDef' => Number(0), getter => sub { - my ($font) = @_; - my $info = lookupFontinfo($font); - return ($info && $$info{skewchar}) || Number(0); }, + my ($fontinfo) = @_; + return ($fontinfo && $$fontinfo{skewchar}) || Number(0); }, setter => sub { - my ($value, $scope, $font) = @_; - if (my $info = lookupFontinfo($font)) { - $$info{skewchar} = $value; } } + my ($value, $scope, $fontinfo) = @_; + if ($fontinfo) { + $$fontinfo{skewchar} = $value; } } ); + DefRegister('\defaultskewchar' => Number(-1)); # Dimen registers; TeXBook p. 274 diff --git a/lib/LaTeXML/Engine/plain.pool.ltxml b/lib/LaTeXML/Engine/plain.pool.ltxml index 672d71c87..20b9b94d2 100644 --- a/lib/LaTeXML/Engine/plain.pool.ltxml +++ b/lib/LaTeXML/Engine/plain.pool.ltxml @@ -408,8 +408,12 @@ DefPrimitiveI('\sl', undef, undef, DefPrimitiveI('\sc', undef, undef, font => { shape => 'smallcaps', family => 'serif', series => 'medium' }); -DefPrimitiveI('\cal', undef, undef, - font => { family => 'caligraphic', series => 'medium', shape => 'upright' }); +DefPrimitiveI('\cal', undef, sub { + if (LookupValue('IN_MATH')) { + MergeFont(family => 'caligraphic', series => 'medium', shape => 'upright', encoding => 'OMS'); + return Box(undef, undef, undef, T_CS('\cal')); + } + return; }); # Ideally, we should set these sizes from class files AssignValue(NOMINAL_FONT_SIZE => 10); @@ -426,6 +430,11 @@ DefPrimitiveI('\Huge', undef, undef, font => { size => 29.8 }); DefPrimitiveI('\mit', undef, undef, requireMath => 1, font => { family => 'italic' }); +DefPrimitiveI('\mit', undef, sub { + if (LookupValue('IN_MATH')) { + MergeFont(family => 'math', shape => 'italic'); } + return; }); + DefPrimitiveI('\frenchspacing', undef, undef); DefPrimitiveI('\nonfrenchspacing', undef, undef); DefMacroI('\normalbaselines', undef, diff --git a/lib/LaTeXML/Package.pm b/lib/LaTeXML/Package.pm index 9146c7948..3c98930ca 100644 --- a/lib/LaTeXML/Package.pm +++ b/lib/LaTeXML/Package.pm @@ -48,6 +48,7 @@ use LaTeXML::Core::Rewrite; use LaTeXML::Util::Radix; use File::Which; use Unicode::Normalize; +use LaTeXML::Util::Unicode; use Text::Balanced; use Text::Unidecode; use base qw(Exporter); @@ -86,7 +87,7 @@ our @EXPORT = (qw(&DefAutoload &DefExpandable # Font encoding qw(&DeclareFontMap &FontDecode &FontDecodeString &LoadFontMap), - + qw(&decodeMathChar), # Color qw(&DefColor &DefColorModel &LookupColor), @@ -109,7 +110,6 @@ our @EXPORT = (qw(&DefAutoload &DefExpandable # Random low-level token or string operations. qw(&CleanID &CleanLabel &CleanIndexKey &CleanClassName &CleanBibKey &NormalizeBibKey &CleanURL &ComposeURL - &UTF &roman &Roman), # Math & font state. qw(&MergeFont), @@ -140,6 +140,7 @@ our @EXPORT = (qw(&DefAutoload &DefExpandable @LaTeXML::Core::Alignment::EXPORT, @LaTeXML::Common::XML::EXPORT, @LaTeXML::Util::Radix::EXPORT, + @LaTeXML::Util::Unicode::EXPORT, ); #********************************************************************** @@ -154,10 +155,6 @@ our @EXPORT = (qw(&DefAutoload &DefExpandable # Still, it would be nice if there were `compiled' forms of .ltxml files! #********************************************************************** -sub UTF { - my ($code) = @_; - return pack('U', $code); } - sub coerceCS { my ($cs) = @_; if ((ref $cs) && (ref $cs ne 'LaTeXML::Core::Token')) { @@ -2730,47 +2727,53 @@ sub AtEndDocument { #====================================================================== # my $fontmap_options = { # [CONSTANT] - family => 1 }; - + family => 1, uppercase_mathstyle => 1, lowercase_mathstyle => 1, digit_mathstyle => 1 }; + +# Define the font encoding which maps from input codepoints to actual Unicode glyphs. +# Sometimes, codepoints normally within the alphanumeric portion map to +# Exotically styled alphanumerics (eg. Caligraphic, Blackboard-bold). +# Ironically, these are usually well handled in Math postprocessors (to generate Unicode), +# but CSS generally doesn't have the fonts available for pure styling. +# The counter-intuitive pragmatic used here is that in Math, these remain as +# ASCII chars with (semantic) styling, while in text they get mapped to whatever unicode was given. +# The options (uppercase|lowercase|digit)_mathstyle specify font to merge when converting +# alphanumerics to math (see FontDecode, below) sub DeclareFontMap { my ($name, $map, %options) = @_; CheckOptions("DeclareFontMap", $fontmap_options, %options); - my $mapname = ToString($name) - . ($options{family} ? '_' . $options{family} : '') - . '_fontmap'; - AssignValue($mapname => $map, 'global'); + my $encname = ToString($name) . ($options{family} ? '_' . $options{family} : ''); + AssignValue($encname . '_fontmap' => $map, 'global'); + foreach my $style (qw(uppercase_mathstyle lowercase_mathstyle digit_mathstyle)) { + AssignValue($encname . '_' . $style => $options{$style}, 'global') if $options{$style}; } return; } # Decode a codepoint using the fontmap for a given font and/or fontencoding. # If $encoding not provided, then lookup according to the current font's # encoding; the font family may also be used to choose the fontmap (think tt fonts!). -# When $implicit is false, we are "explicitly" asking for a decoding, such as -# with \char, \mathchar, \symbol, DeclareTextSymbol and such cases. -# In such cases, only codepoints specifically within the map are covered; the rest are undef. -# If $implicit is true, we'll decode token content that has made it to the stomach: -# We're going to assume that SOME sort of handling of input encoding is taking place, -# so that if anything above 128 comes in, it must already be Unicode!. -# The lower half plane still needs to go through decoding, though, to deal -# with TeX's rearrangement of ASCII... sub FontDecode { - my ($code, $encoding, $implicit) = @_; + my ($code, $encoding, $font) = @_; return if !defined $code || ($code < 0); - my ($map, $font); + my $map; if (!$encoding) { - $font = LookupValue('font'); + $font = LookupValue('font') unless $font; $encoding = $font->getEncoding || 'OT1'; } if ($encoding && ($map = LoadFontMap($encoding))) { # OK got some map. my ($family, $fmap); if ($font && ($family = $font->getFamily) && ($fmap = LookupValue($encoding . '_' . $family . '_fontmap'))) { - $map = $fmap; } } # Use the family specific map, if any. - if ($implicit) { - if ($map && ($code < 128)) { - return $$map[$code]; } - else { - return pack('U', $code); } } - else { - return ($map ? $$map[$code] : undef); } } - + $encoding = $encoding . '_' . $family; + $map = $fmap; } } # Use the family specific map, if any. + my $glyph = ($map ? $$map[$code] : undef); + my $category = ((0x30 <= $code) && ($code <= 0x39) ? 'digit' + : ((0x41 <= $code) && ($code <= 0x5A) ? 'uppercase' + : ((0x61 <= $code) && ($code <= 0x7A) ? 'lowercase' : undef))); + if (my $mathstyle = $category && $STATE->lookupValue('IN_MATH') + && $STATE->lookupValue($encoding . '_' . $category . '_mathstyle')) { + $glyph = chr($code); # Keep as ASCII + $font = $font->merge(%$mathstyle) if $font; } # but record the (semantic) font change + return ($glyph, $font); } + +# If $implicit is true, assume that codepoints missing from the effective FontMap +# just decode to themselves (chr()). sub FontDecodeString { my ($string, $encoding, $implicit) = @_; return if !defined $string; @@ -2801,6 +2804,51 @@ sub LoadFontMap { AssignValue($encoding . '_fontmap_failed_to_load' => 1, 'global'); } } return $map; } +our @mathclassrole = (undef, 'BIGOP', 'BINOP', 'RELOP', 'OPEN', 'CLOSE', 'PUNCT', undef); + +sub decodeMathChar { + my ($mathcode, $reversion) = @_; + $mathcode = $mathcode->valueOf if ref $mathcode; + my $n = $mathcode; + my $class = int($n / (16 * 256)); $n = $n % (16 * 256); + my $fam = int($n / 256); $n = $n % 256; + my $char = chr($n); + my $curfont = $STATE->lookupValue('font'); + my $curfam = $STATE->lookupValue('fontfamily') // -1; + my $initfont = $STATE->lookupValue('initial_math_font') || $curfont; + my ($fontdef, $fontinfo); + my ($oclass, $ofam) = ($class, $fam); + # Special case: class 7 means use the \fam as the family code, if 0<=f<=15; + if ($class == 7) { + $fam = $curfam if (defined $curfam) && (0 <= $curfam) && ($curfam <= 15); } + # We MAY need to include the effective font change in the reversion! + my $maybe_rev = ($curfam >= 0) && ($fam != 1); + # BUT if no raw/plain tex font selection occurred, use the current font + # [heuristic since raw TeX and abstract LaTeX(ML) font schemes aren't yet integrated] + if (($class == 7) && ($curfam < 0) && ($curfont ne $initfont)) { + $maybe_rev = 1; + $fontdef = T_CS('\font'); # Assume specified by \mathrm or something similar! + $fontinfo = $STATE->lookupValue('font')->asFontinfo; } + else { + $fontdef = LookupValue('textfont_' . $fam); + my $defn = $STATE->lookupDefinition($fontdef); + $fontinfo = $defn && $defn->isFontDef; } + my $font = $curfont->merge(%$fontinfo); + my $encoding = $fontinfo && $$fontinfo{encoding} || ''; + my ($glyph, $f) = ($encoding ? FontDecode($n, $encoding, $font) : ($char, $font)); + # If no specific class, Lookup properties from a DefMath? [Eventually: Unicode data!] + my $charinfo = (defined $glyph ? LookupValue('math_token_attributes_' . $glyph) : ()); + my $role = ($charinfo && $$charinfo{role}) || $mathclassrole[$class]; + my $size = $curfont->getSize; + $f = $f->merge(size => $size); + my %d = $f->relativeTo($curfont); + if ($reversion) { + %d = () if LookupValue('LaTeX.pool.ltxml_loaded'); + my $rev = ($maybe_rev && %d ? Tokens(T_BEGIN, $fontdef, $reversion, T_END) : $reversion); + return ($role, $glyph, $f, $rev); } + else { + return ($role, $glyph, $f); } } + #====================================================================== # Color sub LookupColor { diff --git a/lib/LaTeXML/Package/amsb.fontmap.ltxml b/lib/LaTeXML/Package/amsb.fontmap.ltxml index fbe8e42f4..34d7aa3e2 100644 --- a/lib/LaTeXML/Package/amsb.fontmap.ltxml +++ b/lib/LaTeXML/Package/amsb.fontmap.ltxml @@ -21,18 +21,18 @@ DeclareFontMap('AMSb', [ "\x{22E8}", "\x{22E9}", "\x{22E6}", "\x{22E7}", "\x{2266}\x{0338}", "\x{2267}\x{0338}", "\x{2AB5}", "\x{2AB6}", "\x{2AB9}", "\x{2ABA}", "\x{2A89}", "\x{2A8A}", "\x{2241}", "\x{2247}", "\x{2571}", "\x{2572}", "\x{228A}", "\x{228B}", "\x{2AC5}\x{0338}", "\x{2AC6}\x{0338}", "\x{2ACB}", "\x{2ACC}", "\x{2ACB}", "\x{2ACC}", - "\x{228A}", "\x{228B}", "\x{2288}", "\x{2289}", "\x{2226}", "\x{2224}", "\x{2224}", "\x{2226}", - "\x{22AC}", "\x{22AE}", "\x{22AD}", "\x{22AF}", "\x{22ED}", "\x{22EC}", "\x{22EB}", "\x{22EA}", - "\x{219A}", "\x{219B}", "\x{21CD}", "\x{21CF}", "\x{21CE}", "\x{21AE}", "\x{22C7}", "\x{2205}", + "\x{228A}", "\x{228B}", "\x{2288}", "\x{2289}", "\x{2226}", "\x{2224}", "\x{2224}", "\x{2226}", + "\x{22AC}", "\x{22AE}", "\x{22AD}", "\x{22AF}", "\x{22ED}", "\x{22EC}", "\x{22EB}", "\x{22EA}", + "\x{219A}", "\x{219B}", "\x{21CD}", "\x{21CF}", "\x{21CE}", "\x{21AE}", "\x{22C7}", "\x{2205}", "\x{2204}", "\x{1D538}", "\x{1D539}", "\x{2102}", "\x{1D53B}", "\x{1D53C}", "\x{1D53D}", "\x{1D53E}", "\x{210D}", "\x{1D540}", "\x{1D541}", "\x{1D542}", "\x{1D543}", "\x{1D544}", "\x{2115}", "\x{1D546}", "\x{2119}", "\x{211A}", "\x{211D}", "\x{1D54A}", "\x{1D54B}", "\x{1D54C}", "\x{1D54D}", "\x{1D54E}", - "\x{1D54F}", "\x{1D550}", "\x{2124}", UTF(0x5E), UTF(0x5E), UTF(0x7E), UTF(0x7E), undef, - "\x{2132}", "\x{2141}", undef, undef, undef, undef, "\x{2127}", UTF(0xF0), - "\x{2242}", "\x{2136}", "\x{2137}", "\x{2138}", "\x{22D6}", "\x{22D7}", "\x{22C9}", "\x{22CA}", - "\x{2223}", "\x{2225}", "\x{2216}", "\x{223C}", "\x{2248}", "\x{224A}", "\x{2AB8}", "\x{2AB7}", - "\x{21B6}", "\x{21B7}", "\x{03DD}", "\x{03F0}", "\x{1D55C}", "\x{210F}", "\x{210F}", "\x{03F6}", -]); + "\x{1D54F}", "\x{1D550}", "\x{2124}", UTF(0x5E), UTF(0x5E), UTF(0x7E), UTF(0x7E), undef, + "\x{2132}", "\x{2141}", undef, undef, undef, undef, "\x{2127}", UTF(0xF0), + "\x{2242}", "\x{2136}", "\x{2137}", "\x{2138}", "\x{22D6}", "\x{22D7}", "\x{22C9}", "\x{22CA}", + "\x{2223}", "\x{2225}", "\x{2216}", "\x{223C}", "\x{2248}", "\x{224A}", "\x{2AB8}", "\x{2AB7}", + "\x{21B6}", "\x{21B7}", "\x{03DD}", "\x{03F0}", "\x{1D55C}", "\x{210F}", "\x{210F}", "\x{03F6}", + ], + uppercase_mathstyle => { family => 'blackboard' }); 1; - diff --git a/lib/LaTeXML/Package/pifont.sty.ltxml b/lib/LaTeXML/Package/pifont.sty.ltxml index 4a488346a..261f7721d 100644 --- a/lib/LaTeXML/Package/pifont.sty.ltxml +++ b/lib/LaTeXML/Package/pifont.sty.ltxml @@ -20,12 +20,14 @@ use LaTeXML::Package; DefPrimitive('\Pisymbol{}{Number}', sub { my ($document, $pifont, $code) = @_; - return Box(FontDecode($code->valueOf, ToString($pifont)), undef, undef, Invocation(T_CS('\char'), $code)); }); + my ($glyph, $font) = FontDecode($code->valueOf, ToString($pifont)); + return Box($glyph, $font, undef, Invocation(T_CS('\char'), $code)); }); DefPrimitive('\lx@Picountersymbol{}{}{Number}', sub { my ($document, $pifont, $counter, $codebase) = @_; my $code = $codebase->valueOf + LookupRegister('\c@' . ToString($counter))->valueOf - 1; - return Box(FontDecode($code, ToString($pifont))); }); + my ($glyph, $font) = FontDecode($code, ToString($pifont)); + return Box($glyph, $font); }); DefMacro('\Pilist{}{}', '\list{\Pisymbol{#1}{#2}}{}'); DefMacro('\endPilist', '\endlist'); diff --git a/lib/LaTeXML/Package/siunitx.sty.ltxml b/lib/LaTeXML/Package/siunitx.sty.ltxml index 4a5d30045..b0e35f24e 100644 --- a/lib/LaTeXML/Package/siunitx.sty.ltxml +++ b/lib/LaTeXML/Package/siunitx.sty.ltxml @@ -494,9 +494,8 @@ sub six_format_simplenumber { ? six_groupdigits($fraction, -1) : $fraction); push(@tokens, $f); } -## return Tokens(@tokens, @trailer); } return I_dual({ revert_as => 'presentation' }, - I_symbol({ role => 'NUMBER', meaning => T_OTHER(six_number_string($number)) }), + I_symbol({ role => 'NUMBER', meaning => six_number_string($number) }), I_wrap({}, Tokens(@tokens, @trailer))); } sub six_groupdigits { @@ -607,7 +606,7 @@ sub six_format_scinumber { # If mantissa is simple number, use scientific notation for the meaning # (all the dual cruft above formats appropriately, but is wasted) if ($arg1 && !$$arg1{operator} && (ToString(six_get('exponent-base')) eq '10')) { - $result = I_wrap({ meaning => T_OTHER(six_number_string($number)) }, $result); } + $result = I_wrap({ meaning => six_number_string($number) }, $result); } return $result; } sub six_format_compoundnumber { @@ -858,7 +857,7 @@ DefMacro('\ang OptionalKeyVals:SIX {}', sub { ($degrees ? six_number_string($degrees) . "\x{00B0}" : ''), ($minutes ? six_number_string($minutes) . "\x{2032}" : ''), ($seconds ? six_number_string($seconds) . "\x{2033}" : '')); - my $result = six_wrap(I_dual({}, I_symbol({ role => 'NUMBER', meaning => T_OTHER($string) }), + my $result = six_wrap(I_dual({}, I_symbol({ role => 'NUMBER', meaning => $string }), I_wrap({}, @punctuated))); six_end_processing(); return $result; }); diff --git a/t/complex/si.xml b/t/complex/si.xml index 61be96be1..9fbb3bcb5 100644 --- a/t/complex/si.xml +++ b/t/complex/si.xml @@ -244,14 +244,14 @@ Some text 3.430 - + - π + π - + - + 2 @@ -259,11 +259,11 @@ Some text + / - π + π 3 @@ -521,7 +521,7 @@ Some text × - + 1 . @@ -539,10 +539,10 @@ Some text 1234.1234 - + - + 3 @@ -550,10 +550,10 @@ Some text + - + 3 @@ -561,10 +561,10 @@ Some text + - + 3 @@ -572,10 +572,10 @@ Some text + - + 3 @@ -583,10 +583,10 @@ Some text + - + 3 @@ -636,7 +636,7 @@ Some text ( - π + π ) @@ -1718,7 +1718,7 @@ Some text -

+

× @@ -1734,10 +1734,10 @@ Some text π + π - + × @@ -1757,7 +1757,7 @@ Some text ) - π + π

@@ -7730,10 +7730,10 @@ uncertainty - + - + ± 3.76 @@ -8063,10 +8063,10 @@ uncertainty - + - + ± 3.76 diff --git a/t/daemon/fatals/fatal_100.xml b/t/daemon/fatals/fatal_100.xml index 66385276f..5d0d3545c 100644 --- a/t/daemon/fatals/fatal_100.xml +++ b/t/daemon/fatals/fatal_100.xml @@ -22,9 +22,8 @@ §2 2Last heading here - -

0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt 0.0pt

+ +

0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

- \ No newline at end of file + diff --git a/t/fonts/plainfonts.pdf b/t/fonts/plainfonts.pdf new file mode 100644 index 000000000..5bcce7a36 Binary files /dev/null and b/t/fonts/plainfonts.pdf differ diff --git a/t/fonts/plainfonts.tex b/t/fonts/plainfonts.tex new file mode 100644 index 000000000..c7149addf --- /dev/null +++ b/t/fonts/plainfonts.tex @@ -0,0 +1,89 @@ + +Testing low-level \TeX\ font manipulations. + +\def\testfont#1{% + \par -- Font \string#1(\fontname#1; \meaning#1):{X{\the#1X}X}; hyphen=\the\hyphenchar#1.} + +Stock plain fonts: +\testfont\font +\testfont\fiverm +\testfont\tenrm + +New 5pt fonts, new default {\tt hyphenchar=99}. +{\bf All} shared; {\tt scaled} mapped to {\tt at}: +\defaulthyphenchar=99 +\font\myrmfiveA=cmr10 at 5pt +\font\myrmfiveB=cmr10 at 5pt +\font\myrmfiveC=cmr10 scaled 500 +\testfont\myrmfiveA +\testfont\myrmfiveB +\testfont\myrmfiveC + +Bump {\tt hyphenchar=100} on myrmfiveA: +\hyphenchar\myrmfiveA=100 +\testfont\myrmfiveA +\testfont\myrmfiveB +\testfont\myrmfiveC + +Bump {\tt hyphenchar=101} on myrmfiveA, {\it as if} grouped: +{\hyphenchar\myrmfiveA=101} +\testfont\myrmfiveA +\testfont\myrmfiveB +\testfont\myrmfiveC + +While stock fonts should be unchanged +\testfont\fiverm +\testfont\tenrm + + +\string\font\ is weird; +it snapshots of the current font; +but compare meanings: +{\fiverm\expandafter\let\expandafter\xxxrm\the\font \global\let\xxxrm\xxxrm} +{\fiverm\testfont\font} +\testfont\fiverm +%{\expandafter\let\expandafter\xxxrm\the\font X\fiverm X \xxxrm X; + +% But compare meanings: \meaning\font\ vs \meaning\xxxrm. +% } + +\testfont{\textfont2} + +Testing plain math fonts: +\def\sample{abc123} +\def\tester#1{% + \par{\string#1: {#1 \sample} and ${#1 \sample}$}} + +\def\noop{} +\tester\noop +\tester\rm +\tester\mit +\tester\cal +\tester\it +\tester\sl +\tester\bf +\tester\tt + +Testing Text glyph lookup. +\font\tenbsy=cmbsy10 +\font\tenmsa=msam10 +\font\tenmsb=msbm10 + +Normal: +{\tenrm\char"10};{\tenrm\char"41}. +{\teni\char"10};{\teni\char"41}. +{\tenbf\char"10};{\tenbf\char"41}. +{\tentt\char"10};{\tentt\char"41}. +{\tensl\char"10};{\tensl\char"41}. +{\tenit\char"10};{\tenit\char"41}. + +Symbol: +{\tensy\char"10};{\tensy\char"41}. +{\tenbsy\char"10};{\tenbsy\char"41}. +{\tenex\char"10};{\tenex\char"41}. + +AMS: +{\tenmsa\char"10};{\tenmsa\char"41}. +{\tenmsb\char"10};{\tenmsb\char"41}. + +\bye diff --git a/t/fonts/plainfonts.xml b/t/fonts/plainfonts.xml new file mode 100644 index 000000000..1feff6752 --- /dev/null +++ b/t/fonts/plainfonts.xml @@ -0,0 +1,182 @@ + + + + + +

Testing low-level  font manipulations.

+
+ +

Stock plain fonts:

+
+ +

– Font “font(cmr10; “font):XXX; hyphen=45.

+
+ +

– Font “fiverm(cmr5; select font cmr5):XXX; hyphen=45.

+
+ +

– Font “tenrm(cmr10; select font cmr10):XXX; hyphen=45.

+
+ +

New 5pt fonts, new default hyphenchar=99. +All shared; scaled mapped to at:

+
+ +

– Font “myrmfiveA(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=99.

+
+ +

– Font “myrmfiveB(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=99.

+
+ +

– Font “myrmfiveC(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=99.

+
+ +

Bump hyphenchar=100 on myrmfiveA:

+
+ +

– Font “myrmfiveA(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=100.

+
+ +

– Font “myrmfiveB(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=100.

+
+ +

– Font “myrmfiveC(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=100.

+
+ +

Bump hyphenchar=101 on myrmfiveA, as if grouped:

+
+ +

– Font “myrmfiveA(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=101.

+
+ +

– Font “myrmfiveB(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=101.

+
+ +

– Font “myrmfiveC(cmr10; select font cmr10 at 5.0pt):XXX; hyphen=101.

+
+ +

While stock fonts should be unchanged

+
+ +

– Font “fiverm(cmr5; select font cmr5):XXX; hyphen=45.

+
+ +

– Font “tenrm(cmr10; select font cmr10):XXX; hyphen=45.

+
+ +

“font is weird; +it snapshots of the current font; +but compare meanings:

+
+ +

– Font “font(cmr5; “font):XXX; hyphen=45.

+
+ +

– Font “fiverm(cmr5; select font cmr5):XXX; hyphen=45.

+
+ +

– Font “textfont2(cmsy10; “textfont2):X𝒳X; hyphen=45.

+
+ +

Testing plain math fonts:

+
+ +

“noop: abc123 and + + + + a + b + c + 123 + + +

+
+ +

“rm: abc123 and + + abc123 + +

+
+ +

“mit: abc123 and + + + + a + b + c + 123 + + +

+
+ +

“cal: abc123 and + + + + + + + + + + + +

+
+ +

“it: abc123 and + + abc123 + +

+
+ +

“sl: abc123 and + + abc123 + +

+
+ +

“bf: abc123 and + + abc123 + +

+
+ +

“tt: abc123 and + + abc123 + +

+
+ +

Testing Text glyph lookup.

+
+ +

Normal: +ı;A. +ζ;A. +ı;A. +ı;A. +ı;A. +ı;A.

+
+ +

Symbol: +≍;𝒜. +;𝒜. +(;⎠.

+
+ +

AMS: +↠;⊐. +⋨;𝔸.

+
+