#!/usr/bin/perl
# Archive converter from standard mbox format to Sympa's archive format,
# creating the Sympa compatible log.* files in the same directory as the
# input files. When done here, copy the log.* files to the "archives"
# subdirectory of the list's directory under ~sympa/expl, and then run
# the Sympa distribution script ~sympa/bin/arc2webarc.pl.
#
# Note: Should work on listproc or majordomo archives too, maybe others.
#
# Usage:
# mbox2sympa.pl <input>
#
# <input> can be either a single mbox file or the name of a
# directory containing files in mbox format, with arbitrary names.
#
# Written by Adam Bernstein (adam@amberbug.org), 11/08/2002
# (pardon my beginner's Perl syntax -- I'm really a C/C-shell programmer)
# Expanded from mjarc2sympa.pl by Petr Prazak <prazak@grisoft.cz>
use File::Find;
my %months = ('Jan',1,'Feb',2,'Mar',3,'Apr',4,'May',5,'Jun',6,'Jul',7,'Aug',8,'Sep',9,'Oct',10,'Nov',11,'Dec',12);
my $separator = "\n------- CUT --- CUT\n\n";
my $header = "\n------- THIS IS A RFC934 COMPLIANT DIGEST\n\n";
my $outdir = ".";
my $total = 0; #total converted messages
# Start by assuming the most standard From line date format, which seems
# to be like "From <...> Wed Jan 01 00:00:00 2000" (different than the
# standard format in the Date: line):
my $month_index = 3;
my $year_index = 6;
my $num_fields = 7;
my $arg = $ARGV[0];
if (-d $arg) {
print "Processing the directory $arg\n";
$outdir = $arg;
my $pth = $arg . "/*";
@filelist = glob($pth);
set_date_format($filelist[0]);
foreach $file ( @filelist ) {
next if ($file =~ /^\./);
process_file($file) if ( -f $file && -r $file);
}
print "\nConverted $total messages in total.\n";
} elsif (-r $arg) {
set_date_format($arg);
process_file($arg);
} else {
print STDERR "Bad argument $arg, not a file or directory\n";
}
sub process_file {
my $mj_file = shift;
my $year, $month, $list;
open FH, "<$mj_file" || die "Cannot open $mj_file: $!";
print "Converting mbox archive for file $mj_file\n";
my $prev_month = 0;
my $prev_year = 0;
my $mailcount = 0;
my $subtotal = 0;
while (<FH>) {
# Look for "From " lines, doing a basic check against the number of fields
# in the line to avoid false hits (ie. message text starting with "From "):
# if ((/^From /) && (split==$num_fields)){ ----- modification CB
if ((/^From -/) && (split==$num_fields)){
@array = split;
$month = $months{$array[$month_index]};
die "Sorry, month not ready correctly -- probably a date format problem.\nRun again and set date format interactively.\n" if ( $month == NULL );
$year = $array[$year_index];
$outfile = sprintf "%s/log.%d%02d",$outdir,$year,$month;
# Reset things and make a new output file when a new month starts:
if ( $month != $prev_month ) {
if ( $prev_month != 0 ) {
print OUT $separator if ($mailcount > 0);
close OUT;
print "Archived $mailcount messages for $prev_month/$prev_year.\n";
}
$mailcount = 0;
$newfile = 1;
$newfile = 0 if (-e $outfile);
open OUT, ">>$outfile" || die "Cannot open output file $outfile: $!";
if ( $newfile ) {
print "print header\n";
print OUT $header;
}
$first = 1;
}
if ($first) {
$first = 0;
}
else {
print OUT $separator;
}
++$mailcount;
++$subtotal;
$prev_month = $month;
$prev_year = $year;
}
print OUT $_;
}
print OUT $separator;
close FH;
close OUT;
$total += $subtotal;
print "Archived $mailcount messages for $month/$year,\n $subtotal messages total\n";
}
sub set_date_format {
my $file = shift;
print "Do you want to interactively set the date format? (y/n) [n]: ";
if (<STDIN> =~ /y/) {
open FH, "<$file" || die "***Cannot open $file: $!\n";
while (<FH>) {
last if (/^From /);
}
close FH;
print "Here's the first From line in the file:\n";
print "\n $_\n";
print "Does this line contain a date? (y/n) [y]: ";
if ( <STDIN> =~ /n/) {
die "***Can't proceed without dates in From lines\n";
}
$num_fields=split;
print "\n @_[0] xxxxx @_[2] @_[3] @_[4] @_[5] @_[6]\n\n";
print "Counting the first field as 0, which field in this line gives the month? ";
$month_index=<STDIN>;
while ( $months{@_[$month_index]} == NULL) {
print "\nThat doesn't seem right -- month must be \"Jan\", \"Feb\", etc.\n";
print "Try again, counting the first field as 0: ";
$month_index=<STDIN>;
}
print "Which field in this line gives the year? ";
$year_index = <STDIN>;
}
}