#!/usr/bin/perl
# converter from my diary.shtml to blosxom
# Copyright (C) 2004 Kengo Ichiki
# $Id: diary2blosxom.pl,v 1.3 2004/01/06 05:20:40 ichiki Exp $
use HTML::Parser;
use Jcode;
@txtmon = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "13");
$topurl = "http://kichiki.com/cgi-bin/blosxom.cgi";
# check for command-line argument
die "Usage: diary2blosxom.pl (HTML file)\n" unless @ARGV == 1;
# get the command-line argument
my $file = shift;
# Create HTML Parser object
my $p = HTML::Parser->new
( api_version => 3,
start_h => [\&start, "tagname,attr"],
text_h => [\&text, "dtext"],
end_h => [\&end, "tagname"],
);
die "File \"$file\" does't exist.\n" unless -e $file;
open(HTML, "< $file") or die "Cannot open $file";
$h1flag = 0; # h1 tag
$h2flag = 0; # h2 tag
$nsaflag = 0; # not-shown anchor tag flag -- local copy, no-href tags
$ullevel = 0; # ul level
$year = 0;
$month = 0;
$day = 0;
$num = 0;
$title = "";
$body = "";
$subn = 0;
$subday = 0;
$subname = "";
@subfile;
@subtitle;
@subbody;
@subullevel;
while () {
# convert input-line into utf8
Jcode::convert ( \$_, "utf8");
$p->parse($_);
}
exit;
sub start
{
my ($tagname, $attr) = @_;
my $i;
# h1 tag
if ($tagname eq "h1") {
$h1flag = 1;
}
# h2 tag
elsif ($tagname eq "h2") {
$h2flag = 1;
}
# a tag
elsif ($tagname eq "a") {
if ($ullevel == 0 && $h2flag == 1) {
$_ = $attr->{id};
if (/d(\d+)/) {
$day = $1;
$num = 0; # reset counter
}
}
if ($ullevel >= 1) {
if ($attr->{href} eq '') {
# do nothing
$nsaflag = 1;
}
else {
$_ = $attr->{href};
$_ =~ s/^\#/diary$year\_$month.shtml\#/;
if (/^diary(\d{4})\_(\d{2}).shtml\#d(\d+)-(.+)/) {
$_ = "$topurl/$1$2$3-$4";
}
elsif (/^diary(\d{4})\_(\d{2}).shtml\#d(\d+)/) {
$_ = "$topurl/$1/$2/$3";
}
elsif (/^diary(\d{4})\_(\d{2}).shtml/) {
$_ = "$topurl/$1/$2";
}
if ($_ eq '') {
# do nothing
$nsaflag = 1;
}
elsif (/^LOCAL\//) {
# do nothing
$nsaflag = 1;
}
else {
$nsaflag = 0; # to show (for sure)
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "";
}
else {
$body .= "";
}
}
else {
if ($subullevel[$subn-1] == $ullevel) {
$subtitle[$subn-1] .= "";
}
else {
$subbody[$subn-1] .= "";
}
}
}
}
if ($attr->{id} ne '') {
$_ = $attr->{id};
if (/d(\d+)-(.+)/) {
$subn ++;
$subday = $1;
$subname = $2;
if ($subday != $day) {
print "WRONG!!\n";
}
$subfile [$subn - 1] = "$year$month$day-$subname";
# NEED TO CORRECT TO USE FORMAT ABOVE!
$subullevel [$subn - 1] = $ullevel;
$subtitle [$subn - 1] = "";
$subbody [$subn - 1] = "";
}
}
}
}
# ul tag
elsif ($tagname eq "ul") {
if ($ullevel >= 1) {
if ($subn == 0) {
$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "\n";
}
else {
$subbody [$subn - 1] .= "\n";
for ($i = 1; $i < ($ullevel - $subullevel[$subn-1]); $i ++) {
$subbody [$subn - 1] .= " ";
}
$subbody [$subn - 1] .= "\n";
}
}
$ullevel ++;
}
# li tag
elsif ($tagname eq "li") {
if ($subn == 0) {
if ($ullevel == 1) {
# start of the entry
$num ++;
$title = "";
$body = "";
}
elsif ($ullevel > 1) {
for ($i = 2; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "- \n";
}
}
else {
for ($i = 2; $i < ($ullevel - $subullevel[$subn-1]); $i ++) {
$subbody [$subn - 1] .= " ";
}
$subbody [$subn - 1] .= "
- \n";
}
}
# other tags
else {
if ($ullevel >= 1) {
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "<$tagname>";
}
else {
$body .= "<$tagname>";
}
}
else {
if ($subullevel[$subn-1] == $ullevel) {
$subtitle[$subn-1] .= "<$tagname>";
}
else {
$subbody[$subn-1] .= "<$tagname>";
}
}
}
}
}
sub text
{
my ($dtext) = shift;
$dtext =~ s/ +/ /g;
$dtext =~ s/^\n//g;
$dtext =~ s/&/&/g;
if ($h1flag == 1) {
#print "dtext = $dtext\n";
$_ = $dtext;
if (/(\d+)年(\d+)月/) {
$year = $1;
$month = $2;
#print "year = $year\nmonth = $month\n";
}
}
if ($nsaflag == 0) {
if ($subn == 0) {
if ($ullevel == 1) {
$title .= $dtext;
}
elsif ($ullevel > 1) {
$body .= $dtext;
}
}
elsif ($subullevel[$subn-1] == $ullevel) {
$subtitle[$subn-1] .= $dtext;
}
else {
$subbody[$subn-1] .= $dtext;
}
}
}
sub end
{
my ($tagname) = shift;
my $i;
my $blosxomfile;
# h1 tag
if ($tagname eq "h1") {
$h1flag = 0;
}
# h2 tag
elsif ($tagname eq "h2") {
$h2flag = 0;
}
# ul tag
elsif ($tagname eq "ul") {
$ullevel --;
if ($subn == 0) {
if ($ullevel >= 1) {
$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "
\n";
}
}
else {
$subbody[$subn-1] .= "\n";
for ($i = 1; $i < ($ullevel-$subullevel[$subn-1]); $i ++) {
$subbody[$subn-1] .= " ";
}
$subbody[$subn-1] .= "
\n";
}
}
# a tag
elsif ($tagname eq "a") {
if ($ullevel >= 1 && $nsaflag == 0) {
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "";
}
elsif ($ullevel > 1) {
$body .= "";
}
}
else {
if ($ullevel == $subullevel[$subn-1]) {
$subtitle[$subn-1] .= "";
}
elsif ($ullevel > $subullevel[$subn-1]) {
$subbody[$subn-1] .= "";
}
}
}
else {
$nsaflag = 0;
}
}
# li tag
elsif ($tagname eq "li") {
if ($subn == 0) {
if ($ullevel == 1) {
# end of the entry
$title =~ s/\n//g;
$title =~ s/^ +//g;
$title =~ s/ +$//g;
$title =~ s/ / /g;
#printf "file = %s%s%s%02d.txt\n", $year, $month, $day, $num;
$blosxomfile = sprintf ("%s%s%s%02d.txt\n", $year, $month, $day, $num);
print "file = $blosxomfile\n";
open (FH, "> $blosxomfile")
or die "Cannot open $blosxomfile\n";
print (FH "$title\n");
printf (FH "meta-creation_date: %s %d, %d 00:%02d\n", $txtmon[int($month-1)], $day, $year, $num);
if ($body eq "") {
print (FH "$title\n");
}
else {
print (FH "$body\n");
}
close (FH);
#print "----------------------------------------------------------------------------\n";
# for sure
$title = "";
$body = "";
}
elsif ($ullevel > 1) {
$body .= "\n";
for ($i = 1; $i < $ullevel; $i ++) {
$body .= " ";
}
$body .= "\n";
}
}
else {
if ($ullevel == $subullevel[$subn-1]) {
# end of the entry
$subtitle[$subn-1] =~ s/\n//g;
$subtitle[$subn-1] =~ s/^ +//g;
$subtitle[$subn-1] =~ s/ / /g;
if ($subbody[$subn-1] eq "") {
$subbody[$subn-1] = $subtitle[$subn-1];
}
print "file = $subfile[$subn-1]\n";
open (FH, "> $subfile[$subn-1].txt")
or die "Cannot open $subfile[$subn-1].txt\n";
print (FH "$subtitle[$subn-1]\n");
printf (FH "meta-creation_date: %s %d, %d 00:%02d\n", $txtmon[int($month-1)], $day, $year, $num);
print (FH "$subbody[$subn-1]\n");
close (FH);
#print "----------------------------------------------------------------------------\n";
# make a link
if ($subn == 1) {
$body .= "$subtitle[$subn-1]";
}
else {
$subbody [$subn - 2]
.= "$subtitle[$subn-1]";
}
# for sure
$subfile [$subn - 1] = "";
$subtitle [$subn - 1] = "";
$subbody [$subn - 1] = "";
$subullevel[$subn-1] = 0;
$subn --;
}
else {
$subbody[$subn-1] .= "\n";
for ($i = 1; $i < ($ullevel-$subullevel[$sun-1]); $i ++) {
$subbody[$subn-1] .= " ";
}
$subbody[$subn-1] .= "\n";
}
}
}
# other tags
else {
if ($ullevel >= 1) {
if ($subn == 0) {
if ($ullevel == 1) {
$title .= "$tagname>";
}
else {
$body .= "$tagname>";
}
}
else {
if ($ullevel == $subullevel[$subn-1]) {
$subtitle[$subn-1] .= "$tagname>";
}
else {
$subbody[$subn-1] .= "$tagname>";
}
}
}
}
}