====== mbox2sympa.pl ====== #!/usr/bin/perl # Archive converter from standard mbox format to Sympa's archive format, # creating the Sympa compatible log.* files in the same directory as the # input files. When done here, copy the log.* files to the "archives" # subdirectory of the list's directory under ~sympa/expl, and then run # the Sympa distribution script ~sympa/bin/arc2webarc.pl. # # Note: Should work on listproc or majordomo archives too, maybe others. # # Usage: # mbox2sympa.pl # # can be either a single mbox file or the name of a # directory containing files in mbox format, with arbitrary names. # # Written by Adam Bernstein (adam@amberbug.org), 11/08/2002 # (pardon my beginner's Perl syntax -- I'm really a C/C-shell programmer) # Expanded from mjarc2sympa.pl by Petr Prazak use File::Find; my %months = ('Jan',1,'Feb',2,'Mar',3,'Apr',4,'May',5,'Jun',6,'Jul',7,'Aug',8,'Sep',9,'Oct',10,'Nov',11,'Dec',12); my $separator = "\n------- CUT --- CUT\n\n"; my $header = "\n------- THIS IS A RFC934 COMPLIANT DIGEST\n\n"; my $outdir = "."; my $total = 0; #total converted messages # Start by assuming the most standard From line date format, which seems # to be like "From <...> Wed Jan 01 00:00:00 2000" (different than the # standard format in the Date: line): my $month_index = 3; my $year_index = 6; my $num_fields = 7; my $arg = $ARGV[0]; if (-d $arg) { print "Processing the directory $arg\n"; $outdir = $arg; my $pth = $arg . "/*"; @filelist = glob($pth); set_date_format($filelist[0]); foreach $file ( @filelist ) { next if ($file =~ /^\./); process_file($file) if ( -f $file && -r $file); } print "\nConverted $total messages in total.\n"; } elsif (-r $arg) { set_date_format($arg); process_file($arg); } else { print STDERR "Bad argument $arg, not a file or directory\n"; } sub process_file { my $mj_file = shift; my $year, $month, $list; open FH, "<$mj_file" || die "Cannot open $mj_file: $!"; print "Converting mbox archive for file $mj_file\n"; my $prev_month = 0; my $prev_year = 0; my $mailcount = 0; my $subtotal = 0; while () { # Look for "From " lines, doing a basic check against the number of fields # in the line to avoid false hits (ie. message text starting with "From "): # if ((/^From /) && (split==$num_fields)){ ----- modification CB if ((/^From -/) && (split==$num_fields)){ @array = split; $month = $months{$array[$month_index]}; die "Sorry, month not ready correctly -- probably a date format problem.\nRun again and set date format interactively.\n" if ( $month == NULL ); $year = $array[$year_index]; $outfile = sprintf "%s/log.%d%02d",$outdir,$year,$month; # Reset things and make a new output file when a new month starts: if ( $month != $prev_month ) { if ( $prev_month != 0 ) { print OUT $separator if ($mailcount > 0); close OUT; print "Archived $mailcount messages for $prev_month/$prev_year.\n"; } $mailcount = 0; $newfile = 1; $newfile = 0 if (-e $outfile); open OUT, ">>$outfile" || die "Cannot open output file $outfile: $!"; if ( $newfile ) { print "print header\n"; print OUT $header; } $first = 1; } if ($first) { $first = 0; } else { print OUT $separator; } ++$mailcount; ++$subtotal; $prev_month = $month; $prev_year = $year; } print OUT $_; } print OUT $separator; close FH; close OUT; $total += $subtotal; print "Archived $mailcount messages for $month/$year,\n $subtotal messages total\n"; } sub set_date_format { my $file = shift; print "Do you want to interactively set the date format? (y/n) [n]: "; if ( =~ /y/) { open FH, "<$file" || die "***Cannot open $file: $!\n"; while () { last if (/^From /); } close FH; print "Here's the first From line in the file:\n"; print "\n $_\n"; print "Does this line contain a date? (y/n) [y]: "; if ( =~ /n/) { die "***Can't proceed without dates in From lines\n"; } $num_fields=split; print "\n @_[0] xxxxx @_[2] @_[3] @_[4] @_[5] @_[6]\n\n"; print "Counting the first field as 0, which field in this line gives the month? "; $month_index=; while ( $months{@_[$month_index]} == NULL) { print "\nThat doesn't seem right -- month must be \"Jan\", \"Feb\", etc.\n"; print "Try again, counting the first field as 0: "; $month_index=; } print "Which field in this line gives the year? "; $year_index = ; } }