Skip to content

Commit

Permalink
implement verbatim*
Browse files Browse the repository at this point in the history
  • Loading branch information
teepeemm committed Aug 5, 2023
1 parent 207eb53 commit b569ec3
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 38 deletions.
86 changes: 51 additions & 35 deletions lib/LaTeXML/Package/LaTeX.pool.ltxml
Original file line number Diff line number Diff line change
Expand Up @@ -1863,8 +1863,8 @@ DefRegister('\@itemdepth' => Number(0));
#======================================================================

# NOTE: how's the best way to get verbatim material through?
DefEnvironment('{verbatim}', '<ltx:verbatim>#body</ltx:verbatim>');
DefEnvironment('{verbatim*}', '<ltx:verbatim>#body</ltx:verbatim>');
#DefEnvironment('{verbatim}', '<ltx:verbatim>#body</ltx:verbatim>');
#DefEnvironment('{verbatim*}', '<ltx:verbatim>#body</ltx:verbatim>');

DefMacroI('\@verbatim', undef,
'\par\aftergroup\lx@end@verbatim\lx@@verbatim'); # Close enough?
Expand All @@ -1888,40 +1888,53 @@ DefConstructorI('\lx@end@verbatim', undef,
# and also the usual environment capture.
DefConstructorI(T_CS('\begin{verbatim}'), undef,
"<ltx:verbatim font='#font'>#body</ltx:verbatim>",
beforeDigest => [sub { $_[0]->bgroup;
my @stuff = ();
if (my $b = LookupValue('@environment@verbatim@atbegin')) {
push(@stuff, Digest(@$b)); }
AssignValue(current_environment => 'verbatim');
DefMacroI('\@currenvir', undef, 'verbatim');
MergeFont(family => 'typewriter');
# Digest(T_CS('\par')); # NO! See beforeConstruct!
@stuff; }],
afterDigest => [sub {
my ($stomach, $whatsit) = @_;
# $stomach->egroup;
my $font = $whatsit->getFont;
my $loc = $whatsit->getLocator;
my $end = "\\end{verbatim}";
my @lines = ();
my $gullet = $stomach->getGullet;
while (defined(my $line = $gullet->readRawLine)) {
# The raw chars will still have to be decoded (but not space!!)
$line = join('', map { ($_ eq ' ' ? ' ' : FontDecodeString($_, 'OT1_typewriter')) }
split(//, $line));
if ($line =~ /^(.*?)\\end\{verbatim\}(.*?)$/) {
push(@lines, $1 . "\n"); $gullet->unread(Tokenize($2), T_CR);
last; }
push(@lines, $line . "\n"); }
pop(@lines) if $lines[-1] eq "\n";
# Note last line ends up as Whatsit's "trailer"
if (my $b = LookupValue('@environment@verbatim@atend')) {
push(@lines, ToString(Digest(@$b))); }
$stomach->egroup;
$whatsit->setBody(map { Box($_, $font, $loc, T_OTHER($_)) } @lines, $end);
return; }],
beforeDigest => [sub { beforeDigestVerbatim(0, @_); }],
afterDigest => [sub { afterDigestVerbatim(0, @_); }],
beforeConstruct => sub { $_[0]->maybeCloseElement('ltx:p'); });

DefConstructorI(T_CS('\begin{verbatim*}'), undef,
"<ltx:verbatim font='#font'>#body</ltx:verbatim>",
beforeDigest => [sub { beforeDigestVerbatim(1, @_); }],
afterDigest => [sub { afterDigestVerbatim(1, @_); }],
beforeConstruct => sub { $_[0]->maybeCloseElement('ltx:p'); });

sub beforeDigestVerbatim {
my ($starred, $stomach) = @_;
$stomach->bgroup;
my @stuff = ();
if (my $b = LookupValue('@environment@verbatim@atbegin')) {
push(@stuff, Digest(@$b)); }
AssignValue(current_environment => 'verbatim');
DefMacroI('\@currenvir', undef, 'verbatim');
MergeFont(family => 'typewriter');
# Digest(T_CS('\par')); # NO! See beforeConstruct!
@stuff; }

sub afterDigestVerbatim {
my ($starred, $stomach, $whatsit) = @_;
# $stomach->egroup;
my $font = $whatsit->getFont;
my $loc = $whatsit->getLocator;
my $end = $starred ? '\end{verbatim*}' : '\end{verbatim}';
my $space = $starred ? "\x{2423}" : ' ';
my @lines = ();
my $gullet = $stomach->getGullet;
while (defined(my $line = $gullet->readRawLine)) {
# The raw chars will still have to be decoded (but not space!!)
$line = join('', map { ($_ eq ' ' ? $space : FontDecodeString($_, 'OT1_typewriter')) }
split(//, $line));
if ($line =~ /^(.*?)\Q$end\E(.*?)$/) {
push(@lines, $1 . "\n"); $gullet->unread(Tokenize($2), T_CR);
last; }
push(@lines, $line . "\n"); }
pop(@lines) if $lines[-1] eq "\n";
# Note last line ends up as Whatsit's "trailer"
if (my $b = LookupValue('@environment@verbatim@atend')) {
push(@lines, ToString(Digest(@$b))); }
$stomach->egroup;
$whatsit->setBody(map { Box($_, $font, $loc, T_OTHER($_)) } @lines, $end);
return; }

DefPrimitiveI('\@vobeyspaces', undef, sub {
AssignCatcode(" " => 13);
Let(T_ACTIVE(" "), '\nobreakspace');
Expand All @@ -1935,11 +1948,14 @@ DefMacroI('\verb', undef, sub {
StartSemiverbatim('%', '\\', '{', '}');
$STATE->assignCatcode(' ', CC_ACTIVE);
my $init;
my $skippedSpace = 0;
# As of texlive 2021, DO skip spaces before delimiter (even tho we've changed catcodes)
# but if we do skip spaces, * can be the delimiter
do { $init = $gullet->readToken();
$skippedSpace = 1 if (defined $init && $init->getString eq ' ');
} while (defined $init && $init->getString eq ' ');
my $starred = 0;
if (T_OTHER('*')->equals($init)) {
if (T_OTHER('*')->equals($init) && !$skippedSpace) {
$starred = 1;
do { $init = $gullet->readToken();
} while (defined $init && $init->getString eq ' '); }
Expand Down
Binary file modified t/tokenize/verb.pdf
Binary file not shown.
10 changes: 9 additions & 1 deletion t/tokenize/verb.tex
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ \section{Verbatim Environment}
keep this too \end{verbatim} outside verbatim
More Normal {\bf Bold} stuff.

Verbatim 4:
\begin{verbatim*} keep this
AbNormal {\bf NonBold} stuff.
keep this too \end{verbatim*} outside verbatim
More Normal {\bf Bold} stuff.

% NOT allowed
%\section{Inline \verb|\verb| verbatim.}
\section{Inline verb command}
Expand All @@ -47,7 +53,7 @@ \section{Inline verb command}
Note that spaces after a control-sequence are skipped on the \emph{next} read,
by which time catcodes may have been changed.
Prior to April, 2020, a space following \verb|\verb| would -- surprisingly -- have been treated as
the delimiter! Since then, spaces are skipped and the following char is used as delimiter.
the delimiter! Since then, spaces are skipped and the following char (even *) is used as delimiter.
We'll adopt the newer approach.

\verb |a}b#c^d_e$f| stuff
Expand All @@ -56,6 +62,8 @@ \section{Inline verb command}

{\verb* Trick-roll} T }

\verb * + i n * o u t +

% Another tricky case
\makeatletter
\def\verbatimlisting#1{%
Expand Down
16 changes: 14 additions & 2 deletions t/tokenize/verb.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ AbNormal {\bf NonBold} stuff.
keep this too
</verbatim>
<p>outside verbatim
More Normal <text font="bold">Bold</text> stuff.</p>
</para>
<para xml:id="S1.p5">
<p>Verbatim 4:</p>
<verbatim font="typewriter">␣keep␣this
AbNormal␣{\bf␣NonBold}␣stuff.
keep␣this␣too␣␣␣␣
</verbatim>
<p>␣outside␣verbatim
More Normal <text font="bold">Bold</text> stuff.</p>
</para>
</section>
Expand Down Expand Up @@ -78,7 +87,7 @@ More Normal <text font="bold">Bold</text> stuff.</p>
<p>Note that spaces after a control-sequence are skipped on the <emph font="italic">next</emph> read,
by which time catcodes may have been changed.
Prior to April, 2020, a space following <verbatim font="typewriter">\verb</verbatim> would – surprisingly – have been treated as
the delimiter! Since then, spaces are skipped and the following char is used as delimiter.
the delimiter! Since then, spaces are skipped and the following char (even *) is used as delimiter.
We’ll adopt the newer approach.</p>
</para>
<para xml:id="S2.p9">
Expand All @@ -91,9 +100,12 @@ We’ll adopt the newer approach.</p>
<p><verbatim font="typewriter">rick-roll}␣</verbatim></p>
</para>
<para xml:id="S2.p12">
<p>Input snippet</p>
<p><verbatim font="typewriter">+ i n </verbatim> o u t +</p>
</para>
<para xml:id="S2.p13">
<p>Input snippet</p>
</para>
<para xml:id="S2.p14">
<verbatim font="typewriter">|a}b#c^d_e$f|
</verbatim>
<p>after.</p>
Expand Down

0 comments on commit b569ec3

Please sign in to comment.