cur2html

#!/usr/bin/perl

$level      = "../";
$stylesheet = $level . "/css/homepage.css";
$cursheet   = $level . "/css/curriculum.css";
$navsheet   = $level . "/css/navbar.css";
$mathjax    = $level . "/support/MathJax/es5/tex-mml-chtml.js";
$emailbase  = $level . "/scripts/email-base.jsc";
$lang       = "en";
$auth       = "Luca Giuzzi";
$email      = "luca.giuzzi\@unibs.it";
@divtit     = "";
@divid      = "";




sub processList {

    #    print "ENTER: $_";
    #enumerate (ordered lists)
    if (/\\begin\{enumerate\}/) {
        $_          = "\<ol class='enumerate'\>\n";
        $enum       += 1;
        $firstentry = 1;
        return 1;
    }
    if (/\\end\{enumerate\}/) {
        $_          = "\</li\>\n\</ol\>\n";
        $enum       -= 1;
        $firstentry = 0;
        return 1;
    }

    #itemize (unordered lists)
    if (/\\begin\{itemize\}/) {
        $_          = "\<ul class='itemize'\>\n";
        $enum       += 1;
        $firstentry = 1;
        return 1;
    }
    if (/\\end\{itemize\}/) {
        $_          = "\</li\>\n\</ul\>\n";
        $enum       -= 1;
        $firstentry = 0;
        return 1;
    }

    #item
    if ( ($enum) && (/\\item/x) ) {
        if ($firstentry) {
            $new        = '<li>' . "\n";
            $firstentry = 0;
        }
        else {
            $new = '</li><li>' . "\n";
        }
        s/\\item/$new/;
        return 1;
    }
}

sub processInput {
    if (/(\\input\{)([^{}]*)(\})/xg) {
        $tgtfile = $2;
        open( my $INPUTFILE, "<" . $tgtfile . ".tex" );

        #print $tgtfile;
        while (<$INPUTFILE>) {
            processDoc();
        }
        close $INPUTFILE;
    }
}

sub processTabular {

    # Tabular environment (tricky)
    #  but there is only one table to
    #  take into account

    # - begin tabular
    if (/\\begin\{tabular\}/) {
        $intab = 1;
        print <<TABLE_HEAD;
<table>
<tr><td>
TABLE_HEAD
        next;
    }

    if ($intab) {

        # - end tabular
        if (/\\end\{tabular\}/) {
            $intab = 0;
            print "\<\/td\><\/tr\>\</table\>\n";
            next;
        }

        # - replace TeX markers with the HTML
        #   ones
        s/\\\\\[([^\]]*)\]/\\\\/g;
        s/\\\\/\<\/td\>\<\/tr\>\<tr\>\<td\>/g;
        s/\&/\<\/td\>\<td\>/g;
    }
}

sub quoterx {
    $in=shift;
    $in =~ s/\(/\\\(/g;
    $in =~ s/\)/\\\)/g;
    return $in;
}

sub processTeX {

    # THIS HAS TO BE CALLED AFTER TABULAR
    # Replace std abbreviations
    # PG looks better as it is
    s/\\PG/\{{PG\}\}/g;
    s/\\cO/\{\\mathcal O\}/g;
    s/\\cF/\{\\mathcal F\}g/;
    s/\\cH/\{\\mathcal H\}/g;
    s/\\fB/\{\\mathfrak B\}/g;
    s/\\cU/\{\\mathcal U\}/g;
    s/\\par/\<br\/\>/g;
    s/\\\`(\w)/\&$1grave;/g;
    s/\\\'(\w)/\&$1acute;/g;
    s/\\\"(\w)/\&$1uml;/g;
    s/---/\&mdash;/g;
    s/--/\&ndash;/g;
    s/\~/\&nbsp;/g;

    #inverted commas
    s/\`\`/"/g;
    s/\'\'/"/g;

    #URL indication
    s/\\textasciitilde /\~/g;

    #Italic processing
    if (/\{([\W]*)\\em([ \w]*)\}/g) {
        s/$1/\<span style='font-style:italic' class='em'\>$2\<\/span\>/g;
    }
    if (/\\emph\{([\W]*)([ :\w-]*)\}/g) {
        s/$1/\<span style='font-style:italic' class='emph'\>$2\<\/span\>/g;
    }

    #Boldface processing
    if (/\{([\W]*)\\bf(series)? ([ \[\]\w]*)\}/g) {
        s/$1/\<span style='font-weight:bold' class='bf'\>$3\<\/span\>/g;
    }

    #Typed font (used for email and web)
    # (convert in boldface)
    if (/\{([\W]*)\\tt([ \~\:\/\@\.\w]*)\}/g) {
        s/$1/\<span style='font-weight: bold' class='tt'>\>$2\<\/span\>/g;
    }

### CONVERT MACROS

    # std macros: url, href,doi
    if (/(\\url\{)([^{}]*)(\})/xg) {
        $intro = $1;
        $url   = $2;
        $outro = $3;
        $all   = "\\" . $intro . $url . $outro;
        $repl  = '<a class="url" href="' . $url . '">' . $url . '</a>';
	$all =~ s/\{/\\\{/g;
        s/$all/$repl/;
    }
    if (/(\\href\{)([^{}]*)(\}\{)([^{}]*)(\})/xg) {
        $intro = $1;
        $tgt   = $2;
        $sep0  = $3;
        $link  = $4;
        $outro = $5;
        $all   = "\\" . $intro . $tgt . $sep0 . $link . $outro;
        $repl  = '<a class="href" href="' . $tgt . '">' . $link . '</a>';
	$all =~ s/\{/\\\{/g;
        s/$all/$repl/;
    }
    if (/(\\doi\{)([^{}]*)(\})/xg) {
        $intro = $1;
        $doi   = $2;
        $outro = $3;
        $all   = "\\" . $intro . $doi . $outro;
        $repl =
	    '<span class="doi">'
          . 'doi: <a class="doi" href="http://dx.doi.org/' 
          . $doi . '">' 
          . $doi . '</a></span>';
	$all =~ s/\{/\\\{/g;
    s/$all/$repl/;
    }
    if (/(\\papertitle(?<!\{)\{)([^}].*)\},(?!})/xmg) {
        $intro = $1;
        $title   = $2;
        $outro = $3;
        $all   =  $intro . $title ."\}";
        $repl =
	    '<span class="papertitle">"'
          . $title .  
          '"</span>';
	print STDERR $all."\n";
        $all=quotemeta($all);
	#	print STDERR $all."\n";
    s/$all/$repl/;
    }
    if (/(\\subjtitle(?<!\{)\{)([^}].*)\}[ ;.,](?!})/xmg) {
        $intro = $1;
        $title   = $2;
        $outro = $3;
        $all   =  $intro . $title ."\}";
        $repl =
	    '<span class="subj">"'
          . $title .  
          '"</span>';
	print STDERR $all."\n";
        $all=quotemeta($all);
    s/$all/$repl/;
    }


    #My macros: arxiv
    if (/(\\arxiv\{)([^{}]*)(\})/xg) {
        $intro = $1;
        $arxiv = $2;
        $outro = $3;
        $all   = "\\" . $intro . $arxiv . $outro;
        $repl =
	    '<span class="arxiv">'
          . '(arxiv: <a class="arxiv" '
          . 'href="http://arxiv.org/abs/'
          . $arxiv . '">'
          . $arxiv . '</a>)</span>';
	$all =~ s/\{/\\\{/g;
        s/$all/$repl/;
    }
    if (/(\\issn\{)([^{}]*)(\})/xg) {
 	$cmd=$1;
	$issn=$2;
	$outro=$3;
	$all=".issn.$issn.";
	$repl='<span class="issn">ISSN: '.$issn.'</span>';
	$all =~ s/\{/\\\{/g;
        s/$all/$repl/;
    }
    if (/(\\semmat\{)([^{}]*)(\}\{)([^{}]*)(\})/xg) {
        $num  = $2;
        $year = $4;
        $all  = ".semmat.$num..$year.";
        $repl =
            'Quaderno del Seminario Matematico di Brescia n.'
          . '<a class="semmat" '
          . 'href="http://semmat.dmf.unicatt.it/cgi-bin/preprintserv/semmat/'
          . 'Quad'
          . $year . 'n'
          . $num . '">'
          . $num . '/'
          . $year . '</a>';
	$all =~ s/\{/\\\{/g;
        s/$all/$repl/;
    }
}

sub processDoc() {
    s/</\&lt;/g;
    s/>/\&gt;/g;

    # look for babel
    if (/\\selectlanguage\{([a-z]*)\}/) {
        $lang = $1;
        $lang =~ s/british/en/;
        $lang =~ s/italian/it/;
    }

    # Ignore the prologue (we should implement
    #  at least \newcommand ... then we would
    #  have a "common solution" of a sort, but
    # ...

    if (/\\begin\{document\}/) {
        $indoc = 1;
        printhead( $auth, $email );
        next;
    }
    next if ( !$indoc );

    #ignore comments
    next if (/^\%/);
    $comment = 1 if (/\\begin\{comment\}/);
    $comment = 0 if (/\\end\{comment\}/);
    next if $comment;

    #Tables
    processTabular();

    #TeX markings+macros
    processTeX();
    processInput();

    #section -> DIV; level 2 heading
    $depth = 0;
    if (/\\section/) {
        /(\{(?: [^{}]* | (?0) )*\})/xgm;
        $tgt = $1;
        $tgt =~ s/^\{//;
        $tgt =~ s/\}$//;
        printsect( $tgt, 2 );
        $secnum++;
        next;
    }

    #subsection -> level 3 heading
    if (/\\subsection/) {
        /(\{(?: [^{}]* | (?0) )*\})/xgm;
        $tgt = $1;
        $tgt =~ s/^\{//;
        $tgt =~ s/\}$//;
        printsect( $tgt, 3 );
        next;
    }

    if (/\\ssubsect/) {
        /(\{(?: [^{}]* | (?0) )*\})/xgm;
        $tgt = $1;
        $tgt =~ s/^\{//;
        $tgt =~ s/\}$//;
        printsect( $tgt, 3 );
     /(\{(?: [^{}]* | (?0) )*\})/xgm;
        $tgt = $1;
        $tgt =~ s/^\{//;
        $tgt =~ s/\}$//;
        printsect( $tgt, 4 );
       next;
    }

    #paragraph
    if (/^\\par$/) {
        print "\<p /\>\n";
        next;
    }

    if ( processList() ) {
        print;
        next;
    }

    #skip all the other tex constructs (and the line they are
    # contained in
    if (/^\W*\\[^{begin|end|section|subsection|ssubsect|\[|\$}]*/) { next; }

    #print out the text
    print;
}

#section heading
sub printsect {
    my $titl = shift;
    my $levl = shift;
    ( my $idnam = $titl ) =~ s/ /_/g;
    $idnam =~ s/[^\w]/_/g;

    if ($secnum) {
        $enum == 1
          and print '</li></ol>' . "\n";
        $enum == 2
          and print '</li></ul>' . "\n";
        print '<p/></div>' . "\n";
    }
    if ( $levl == 2 ) {
        push @divtit, $titl;
        push @divid,  $idnam;
    }
    print <<END_SECT;
<div id="$idnam">
\<h$levl id="h$idnam"\>$titl\</h$levl\>
END_SECT
}



#standard header
sub printhead {
    my $author = shift;
    my $email  = shift;
    my $of     = "of";
    if ( $lang =~ /it/ ) { $of = "di"; }

    print <<END_HEADER;
<!DOCTYPE html>
<html lang="$lang">
<head>
   <title>Curriculum $of $author</title>
   <meta charset="utf-8" />
   <meta name="Author" lang="it" content="$author" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0" /> 
   <link rev="made" href="mailto:$email" />
   <link rel="stylesheet" type="text/css" href="../support/bootstrap/dist/css/bootstrap.css" />
   <link rel="stylesheet" type="text/css" href="$stylesheet" />
   <link rel="stylesheet" type="text/css" href="$navsheet" />
   <link rel="stylesheet" type="text/css" href="$cursheet" />
   <link rel="shortcut icon" type="image/x-icon" href="favicon.ico" />
   <script type="text/javascript">
   MathJax = {
      tex: {
    inlineMath: [['\$', '\$'], ['\\\\(', '\\\\)']]
  }
   };
   </script> 
   <script src="$mathjax"
   type="text/javascript"> </script>
   <script src="/scripts/jquery.js" type="text/javascript"></script>
   <script src="/support/bootstrap/dist/js/bootstrap.js"
   type="text/javascript"></script>
   <script src="$emailbase" type="text/javascript">
   </script>
</head>
<body class="curr">

<?php
    include("$level/snippets/NavBar.en.inc");
?>
<div id="content">
<div class="container-fluid">
<div class="row">
<div class="col-xs-12 col-sm-9">
<hr />
<h1 class="title">Curriculum $of $auth</h1>
<hr />
END_HEADER
}

sub createbanner {
    print <<END_BH;
<br /><hr />
</div>
</div>
<div class="col-12 col-sm-1"></div>
<div class="col-12 col-sm-2 banner" >
<nav id="banner" class="d-none d-md-block" style="padding:20px; padding-right:40px; position:fixed;">
END_BH
    @divtit = reverse @divtit;
    @divid  = reverse @divid;
    while (@divtit) {
        $title = pop @divtit;
        $name  = pop @divid;
        if ($title) {
            print <<ENDD;
<ul class="nav navbar-nav">
<li class="nav-link"><a href="#$name">$title</a></li></ul>
ENDD
        }
    }
    print <<ENDBAN;
</ul>
</nav>
</div>
ENDBAN
}

#standard epilogue
sub printend {
    my $bsname  = "curriculum-eng6";
    my $oth     = "curriculum-ita6";
    my $olang   = "Italiano";
    my $now_str = gmtime;
    if ( $lang =~ /it/ ) {
        $bsname = "curriculum-ita6";
        $olang  = "English";
        $oth    = "curriculum-eng6";
    }

    print <<END_ENDMATTER;

</div>
</div>
</div>
<div id="Update">
<pre class="update">Last updated: $now_str by <a href="$level/files/scripts/perl/cur2html.php">cur2html</a> </pre>
</div>

<script type="text/javascript">
 \$("ul#topright").replaceWith(\'<ul class="nav navbar-nav navbar-right"><li><a href="$bsname.pdf">PDF</a> </li></ul>\')
 \$("li.othlang").replaceWith(\'<li id="othlang2"><a href="$oth.php">$olang</a></li>\')
</script>
</body>
</html>
END_ENDMATTER
}




while (<>) {
    processDoc();
}

createbanner;
printend;