#!/usr/local/bin/perl # # file: process-index # auth: Brad Burdick # desc: post-process SEC EDGAR SGML index file creating specified index # file type # # usage: process-index [-D YYMMDD] [-c] [-f] [-m] [-d datadir] [input_file] # ########################################################################## # Copyright (c) 1994, 1995 Internet Multicasting Service # # The SEC EDGAR Level 1 Dissemination processing software ("software") # was developed by the Internet Multicasting Service and may # be used for academic, research, government, and internal business # purposes without charge. You may not resell this code or include it # in a product that you are selling without prior permission of the # Internet Multicasting Service. # # This software is provided ``as is'', without express or implied # warranty, and with no support nor obligation to assist in its # use, correction, modification or enhancement. We assume no liability # with respect to the infringement of copyrights, trade secrets, or any # patents, and are not responsible for consequential damages. Proper # use of the software is entirely the responsibility of the user. ########################################################################## eval 'exec /usr/bin/perl -s $0 ${1+"$@"}' if 0; # who am i? ($prog = $0) =~ s#.*/##; # where we find our local libraries push(@INC, "/usr/local/ims/lib"); # for processing command line options require 'getopts.pl'; # header values for index files require 'index-hdr.pl'; # Edgar date manipulation routines require 'edgar-date.pl'; # miscellaneous support routines require 'edgar-util.pl'; # process command line options, if any &Getopts('D:cd:fm'); # what type of processing? $do_company = defined($opt_c); $do_formtype = defined($opt_f); # true if master index, otherwise assume daily index $do_master = defined($opt_m); # where to place index files $datadir = defined($opt_d) ? "$opt_d" : "/ftp/edgar"; &makepath($datadir, 0755); # date stamp for file name $date = defined($opt_D) ? "$opt_D" : ""; # data file's date @today = &edgar_date; # date stamp used in file name $date = sprintf("%02d%02d%02d", $today[5], $today[4]+1, $today[3]) unless $date; # where to place daily index file(s) $daily = "daily-index"; # where to place full index file(s) $full = "full-index"; # message of the day $motd = "\n\n" . "ATTENTION: Second quarter index data has been archived to the\n" . " following subdirectory: edgar/full-index/1995/QTR2/.\n\n" . " First quarter index data has been archived to the\n" . " following subdirectory: edgar/full-index/1995/QTR1/.\n"; # archive command lines $compress = "/bin/compress -c"; $sit = "/usr/local/bin/sit -u -C edgar -o"; $zip = "/usr/local/bin/zip -b /tmp -jlq -u -9"; # file containing _no daily index file_ message $nofile = '/usr/local/ims/lib/no-daily'; # take data from stdin if no file provided if ($#ARGV < 0) { push(@ARGV, "<&STDIN"); } if ($do_company) { if ($do_master) { $compfile = "$datadir/$full/company.idx"; } else { $compfile = "$datadir/$daily/company.$date.idx"; } } if ($do_formtype) { if ($do_master) { $formfile = "$datadir/$full/form.idx"; } else { $formfile = "$datadir/$daily/form.$date.idx"; } } # # main processing loop # foreach $file (@ARGV) { open(IN, "$file") || die "$prog: error getting input: $!\n"; while ($line = ) { next unless ($line =~ /^[0-9]/); chop($line); ($cik, $cname, $type, $filedate, $filename) = split(/\|/, $line); if ($do_company) { push(@company, sprintf("%-60.60s %-10.10s %-10.10s %-10.10s %-47.47s", $cname, $type, $cik, $filedate, $filename)); } if ($do_formtype) { push(@form, sprintf("%-10.10s %-60.60s %-10.10s %-10.10s %-47.47s", $type, $cname, $cik, $filedate, $filename)); } } } if ($do_company) { local(@sortedindex) = sort(@company); local(@header); if ($do_master) { @header = @master_hdr; } else { @header = @daily_hdr; } open(COMPANY, ">$compfile") || warn "$prog: unable to open $compfile: $!\n"; &process_hdr('COMPANY', *header); print COMPANY join("\n", @header), "\n"; # print message of the day if available print COMPANY $motd if ($do_master && $motd); print COMPANY sprintf("\n%-60.60s %-10.10s %-10.10s %-10.10s %-10.10s\n", 'Company Name', 'Form Type', 'CIK', 'Date Filed', 'File Name'); print COMPANY "-" x 141, "\n"; print COMPANY join("\n", @sortedindex), "\n"; close(COMPANY); # create archive data files if ($do_master) { system("chdir $datadir/$full ; $compress $compfile > company.Z"); system("chdir $datadir/$full ; $sit company.sit company.idx"); system("chdir $datadir/$full ; $zip company.zip company.idx"); } else { # mail command for daily index file local($mail) = "/usr/ucb/mail"; local($mailcmd); local($rcpt) = "edgar-daily@town.hall.org"; local($subj) = ""; $subj = "EDGAR daily index for "; $subj .= sprintf("%s %02d, %4d", $Months[int(substr($date, 2, 2))], substr($date, 4, 2), 1900+int(substr($date, 0, 2))); if (-s $compfile < 100000) { system("$mail -s \"$subj\" $rcpt < $compfile"); } else { local($file, $subj); # send notice of file too big to normal daily list ($file = $compfile) =~ s#/ftp##; $subj = "$file -- Index too big"; system("$mail -s \"$subj\" $rcpt < $nofile"); } # edgar-daily-big always gets the index $rcpt = "edgar-daily-big@town.hall.org"; system("$mail -s \"$subj\" $rcpt < $compfile"); } } if ($do_formtype) { local(@sortedindex) = sort(@form); local(@header); if ($do_master) { @header = @master_hdr; } else { @header = @daily_hdr; } open(FORM, ">$formfile") || warn "$prog: unable to open $formfile: $!\n"; &process_hdr('FORM', *header); print FORM join("\n", @header), "\n"; # print message of the day if available print FORM $motd if ($do_master && $motd); print FORM sprintf("\n%-10.10s %-60.60s %-10.10s %-10.10s %-10.10s\n", 'Form Type', 'Company Name', 'CIK', 'Date Filed', 'File Name'); print FORM "-" x 141, "\n"; print FORM join("\n", @sortedindex), "\n"; close(FORM); # create archive data files if ($do_master) { system("chdir $datadir/$full ; $compress $formfile > form.Z"); system("chdir $datadir/$full ; $sit form.sit form.idx"); system("chdir $datadir/$full ; $zip form.zip form.idx"); } } exit 0; # # process index header # expects fixed format from header - see lib/index-hdr.pl # sub process_hdr { local($hdr_type) = shift; local(*header) = shift; local($recv) = sprintf("%s %02d, %04d", $Months[int(substr($date, 2, 2))], substr($date, 4, 2), 1900+int(substr($date, 0, 2))); # data file's date $header[1] =~ s/%s/$recv/; if ($hdr_type eq 'COMPANY') { $header[0] =~ s|%s|by Company Name|; if ($do_master) { $header[6] =~ s|%s|$full/company.idx|; } else { $header[6] =~ s|%s|$daily/company.$date.idx|; } } elsif ($hdr_type eq 'FORM') { $header[0] =~ s|%s|by Form Type|; if ($do_master) { $header[6] =~ s|%s|$full/form.idx|; } else { $header[6] =~ s|%s|$daily/form.$date.idx|; } } }