#!/usr/bin/perl # converter from my diary.shtml to blosxom # Copyright (C) 2004 Kengo Ichiki # $Id: diary2blosxom.pl,v 1.1 2004/01/04 07:21:57 ichiki Exp $ use HTML::Parser; use Jcode; # check for command-line argument die "Usage: diary2blosxom.pl (HTML file)\n" unless @ARGV == 1; # get the command-line argument my $file = shift; @txtmon = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "13"); $topurl = "http://kichiki.com/cgi-bin/blosxom.cgi"; $flavour = "html"; # Create HTML Parser object my $p = HTML::Parser->new ( api_version => 3, start_h => [\&start, "tagname,attr"], text_h => [\&text, "dtext"], end_h => [\&end, "tagname"], ); die "File \"$file\" does't exist.\n" unless -e $file; open(HTML, "< $file") or die "Cannot open $file"; $h1flag = 0; # h1 tag $h2flag = 0; # h2 tag $nsaflag = 0; # not-shown anchor tag flag -- local copy, no-href tags $ullevel = 0; # ul level $year = 0; $month = 0; $day = 0; $num = 0; $title = ""; $body = ""; $subn = 0; $subday = 0; $subname = ""; @subfile; @subtitle; @subbody; @subullevel; while () { # convert input-line into utf8 Jcode::convert ( \$_, "utf8"); $p->parse($_); } exit; sub start { my ($tagname, $attr) = @_; my $i; # h1 tag if ($tagname eq "h1") { $h1flag = 1; } # h2 tag elsif ($tagname eq "h2") { $h2flag = 1; } # a tag elsif ($tagname eq "a") { if ($ullevel == 0 && $h2flag == 1) { $_ = $attr->{id}; if (/d([0-9]+)/) { $day = $1; $num = 0; # reset counter } } if ($ullevel >= 1) { if ($attr->{href} ne '') { $_ = $attr->{href}; $_ =~ s/^#/diary$year\_$month.shtml#/; $_ =~ s/^diary/http:\/\/kichiki.hp.infoseek.co.jp\/diary\/diary/; if ($_ eq '') { # do nothing $nsaflag = 1; } elsif (/^LOCAL\//) { # do nothing $nsaflag = 1; } else { $nsaflag = 0; # to show (for sure) if ($subn == 0) { if ($ullevel == 1) { $title .= ""; } else { $body .= ""; } } else { if ($subullevel[$subn-1] == $ullevel) { $subtitle[$subn-1] .= ""; } else { $subbody[$subn-1] .= ""; } } } } elsif ($attr->{id} ne '') { $_ = $attr->{id}; if (/d([0-9]+)-([a-zA-Z0-9\-]+)/) { $subn ++; $subday = $1; $subname = $2; if ($subday != $day) { print "WRONG!!\n"; } $subfile [$subn - 1] = "$year$month$day-$subname.$flavour"; # NEED TO CORRECT TO USE FORMAT ABOVE! $subullevel [$subn - 1] = $ullevel; $subtitle [$subn - 1] = ""; $subbody [$subn - 1] = ""; } } } } # ul tag elsif ($tagname eq "ul") { if ($ullevel >= 1) { if ($subn == 0) { $body .= "\n"; for ($i = 1; $i < $ullevel; $i ++) { $body .= " "; } $body .= "\n"; } } # a tag elsif ($tagname eq "a") { if ($nsaflag == 0) { if ($subn == 0) { if ($ullevel == 1) { $title .= ""; } elsif ($ullevel > 1) { $body .= ""; } } elsif ($subullevel[$subn-1] > $ullevel) { $subbody[$subn-1] .= ""; } } else { $nsaflag = 0; } } # li tag elsif ($tagname eq "li") { if ($subn == 0) { if ($ullevel == 1) { # end of the entry $title =~ s/\n//g; $title =~ s/^ +//g; $title =~ s/ / /g; printf "file = %s%s%s%02d.txt\n", $year, $month, $day, $num; print "$title\n"; printf "meta-creation_date: %s %d, %d 00:%02d\n", $txtmon[int($month-1)], $day, $year, $num; if ($body eq "") { print "$title\n"; } else { print "$body\n"; } print "----------------------------------------------------------------------------\n"; # for sure $title = ""; $body = ""; } elsif ($ullevel > 1) { $body .= "\n"; for ($i = 1; $i < $ullevel; $i ++) { $body .= " "; } $body .= "\n"; } } else { if ($ullevel == $subullevel[$subn-1]) { # end of the entry $subtitle[$subn-1] =~ s/\n//g; $subtitle[$subn-1] =~ s/^ +//g; $subtitle[$subn-1] =~ s/ / /g; if ($subbody[$subn-1] eq "") { $subbody[$subn-1] = $subtitle[$subn-1]; } print "file = $subfile[$subn-1]\n"; print "$subtitle[$subn-1]\n"; printf "meta-creation_date: %s %d, %d 00:%02d\n", $txtmon[int($month-1)], $day, $year, $num; print "$subbody[$subn-1]\n"; print "----------------------------------------------------------------------------\n"; # make a link if ($subn == 1) { $body .= "$subtitle[$subn-1]"; } else { $subbody [$subn - 2] .= "$subtitle[$subn-1]"; } # for sure $subfile [$subn - 1] = ""; $subtitle [$subn - 1] = ""; $subbody [$subn - 1] = ""; $subullevel[$subn-1] = 0; $subn --; } else { $subbody[$subn-1] .= "\n"; for ($i = 1; $i < ($ullevel-$subullevel[$sun-1]); $i ++) { $subbody[$subn-1] .= " "; } $subbody[$subn-1] .= "\n"; } } } # other tags else { if ($subn == 0) { if ($ullevel == 1) { $title .= ""; } else { $body .= ""; } } else { if ($subullevel[$subn-1] == $ullevel) { $subtitle[$subn-1] .= ""; } else { $subbody[$subn-1] .= ""; } } } }