#!/usr/bin/perl # converter from my diary.shtml to blosxom # Copyright (C) 2004 Kengo Ichiki # $Id: diary2blosxom.pl,v 1.7 2004/01/11 04:19:46 ichiki Exp $ use HTML::Parser; use Jcode; $topurl = "http://kichiki.com/cgi-bin/blosxom.cgi/diary"; $flavour = "html"; # check for command-line argument die "Usage: diary2blosxom.pl (HTML file)\n" unless @ARGV == 1; # get the command-line argument my $file = shift; # Create HTML Parser object my $p = HTML::Parser->new (api_version => 3, start_h => [\&start, "tagname,attr"], text_h => [\&text, "dtext"], end_h => [\&end, "tagname"], ); die "File $file does't exist.\n" unless -e $file; open (HTML, "< $file") or die "Cannot open $file"; $h1flag = 0; # h1 tag $h2flag = 0; # h2 tag $preflag = 0; # pre tag $nsaflag = 0; # not-shown anchor tag flag -- local copy, no-href tags $ullevel = 0; # ul level $year = 0; $month = 0; $day = 0; $num = 0; $title = ""; $body = ""; $subn = 0; $subday = 0; $subname = ""; @subfile; @subtitle; @subbody; @subullevel; while () { # convert input-line into utf8 Jcode::convert ( \$_, "utf8"); $p->parse($_); } exit; sub start { my ($tagname, $attr) = @_; my $i; my $a, $c, $tmptag; # h1 tag if ($tagname eq "h1") { $h1flag = 1; } # h2 tag elsif ($tagname eq "h2") { $h2flag = 1; } # a tag elsif ($tagname eq "a") { if ($ullevel == 0 && $h2flag == 1) { $_ = $attr->{id}; if (/d(\d+)/) { $day = $1; $num = 0; # reset counter } } if ($ullevel >= 1) { if ($attr->{href} eq "") { # do nothing $nsaflag = 1; } else { $_ = $attr->{href}; $_ =~ s/^images\/AMAZON/http:\/\/images.amazon.com\/images\/P/; $_ =~ s/^theorems.shtml/http:\/\/kichiki.hp.infoseek.co.jp\/diary\/theorems.shtml/; $_ =~ s/^\#/diary$year\_$month.shtml\#/; if (/^diary(\d{4})\_(\d{2}).shtml\#d(\d+)-(.+)/) { $_ = sprintf ("$topurl/%4d%02d%02d-%s.$flavour", $1, $2, $3, $4); } elsif (/^diary(\d{4})\_(\d{2}).shtml\#d(\d+)/) { $_ = "$topurl/$1/$2/$3"; } elsif (/^diary(\d{4})\_(\d{2}).shtml/) { $_ = "$topurl/$1/$2"; } if ($_ eq "") { # do nothing $nsaflag = 1; } elsif (/^LOCAL\//) { # do nothing $nsaflag = 1; } else { $nsaflag = 0; # to show (for sure) if ($subn == 0) { if ($ullevel == 1) { $title .= ""; } else { $body .= ""; } } else { if ($subullevel[$subn-1] == $ullevel) { $subtitle[$subn-1] .= ""; } else { $subbody[$subn-1] .= ""; } } } } if ($attr->{id} ne "") { $_ = $attr->{id}; if (/d(\d+)-(.+)/) { $subn ++; $subday = $1; $subname = $2; if ($subday != $day) { print "WRONG!!\n"; } $subfile [$subn-1] = sprintf ("%4d%02d%02d-%s", $year, $month, $day, $subname); $subullevel [$subn-1] = $ullevel; $subtitle [$subn-1] = ""; $subbody [$subn-1] = ""; } } } } # ul tag elsif ($tagname eq "ul") { if ($ullevel >= 1) { if ($subn == 0) { $body .= "\n"; for ($i = 1; $i < $ullevel; $i ++) { $body .= " "; } $body .= "\n"; } } # a tag elsif ($tagname eq "a") { if ($ullevel >= 1 && $nsaflag == 0) { if ($subn == 0) { if ($ullevel == 1) { $title .= ""; } elsif ($ullevel > 1) { $body .= ""; } } else { if ($ullevel == $subullevel[$subn-1]) { $subtitle[$subn-1] .= ""; } elsif ($ullevel > $subullevel[$subn-1]) { $subbody[$subn-1] .= ""; } } } else { $nsaflag = 0; } } # li tag elsif ($tagname eq "li") { if ($subn == 0) { if ($ullevel == 1) { $blosxomfile = sprintf ("%4d%02d%02d%02d", $year, $month, $day, $num); # end of the entry print_entry ($blosxomfile, $title, $body, $year, $month, $day, $num); # for sure $title = ""; $body = ""; } elsif ($ullevel > 1) { $body .= "\n"; for ($i = 1; $i < $ullevel; $i ++) { $body .= " "; } $body .= "\n"; } } else { if ($ullevel == $subullevel[$subn-1]) { # end of the entry print_entry ($subfile[$subn-1], $subtitle[$subn-1], $subbody[$subn-1], $year, $month, $day, $num); # make a link # categorize if ($t =~ /\Q[\E.+\Q]\E$/) { $category = "phys"; } else { $category = ""; } if ($subn == 1) { if ($category eq "") { $body .= "$subtitle[$subn-1]"; } else { $body .= "$subtitle[$subn-1]"; } } else { if ($category eq "") { $subbody [$subn - 2] .= "$subtitle[$subn-1]"; } else { $subbody [$subn - 2] .= "$subtitle[$subn-1]"; } } # for sure $subfile [$subn-1] = ""; $subtitle [$subn-1] = ""; $subbody [$subn-1] = ""; $subullevel[$subn-1] = 0; $subn --; } else { $subbody[$subn-1] .= "\n"; for ($i = 1; $i < ($ullevel-$subullevel[$sun-1]); $i ++) { $subbody[$subn-1] .= " "; } $subbody[$subn-1] .= "\n"; } } } # pre tag elsif ($tagname eq "pre") { $preflag = 0; if ($subn == 0) { if ($ullevel == 1) { $title .= ""; } elsif ($ullevel > 1) { $body .= ""; } } else { if ($ullevel == $subullevel[$subn-1]) { $subtitle[$subn-1] .= ""; } elsif ($ullevel > $subullevel[$subn-1]) { $subbody[$subn-1] .= ""; } } } # other tags else { if ($ullevel >= 1) { if ($subn == 0) { if ($ullevel == 1) { $title .= ""; } else { $body .= ""; } } else { if ($ullevel == $subullevel[$subn-1]) { $subtitle[$subn-1] .= ""; } else { $subbody[$subn-1] .= ""; } } } } } sub print_entry { my ($f, # file $t, # title $b, # body $y, # year (4 digits) $m, # month (1 - 12) $d, # day (1 - 31) $n # number (1 - ) ) = @_; my $category; my @txtmon = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "13"); $t =~ s/\n//g; $t =~ s/^ +//g; $t =~ s/ +$//g; $t =~ s/ / /g; $t =~ s/\Q()\E//g; if ($t eq "") { $t = "-"; } $b =~ s/\Q()\E//g; # categorize if ($t =~ /\Q[\E.+\Q]\E$/) { $category = "phys"; } else { $category = ""; } if ($category ne "") { unless (-e $category) { mkdir "$category", 0755 or die "Cannot make dir $category\n"; } open (FH, "> ./$category/$f.txt") or die "Cannot open $f.txt\n"; } else { open (FH, "> $f.txt") or die "Cannot open $f.txt\n"; } print (FH "$t\n"); printf (FH "meta-creation_date: %s %d, %d 00:%02d\n", $txtmon[int($m-1)], $d, $y, $n); print (FH "$b\n"); close (FH); }