#!/usr/local/bin/perl # # file: correct-edgar # auth: Brad Burdick # desc: apply corrections to SEC EDGAR data file(s) # # usage: correct-edgar [-a] [-d datadir] [-v] [-w workdir] [input_file ...] # ########################################################################## # Copyright (c) 1994, 1995 Internet Multicasting Service # # The SEC EDGAR Level 1 Dissemination processing software ("software") # was developed by the Internet Multicasting Service and may # be used for academic, research, government, and internal business # purposes without charge. You may not resell this code or include it # in a product that you are selling without prior permission of the # Internet Multicasting Service. # # This software is provided ``as is'', without express or implied # warranty, and with no support nor obligation to assist in its # use, correction, modification or enhancement. We assume no liability # with respect to the infringement of copyrights, trade secrets, or any # patents, and are not responsible for consequential damages. Proper # use of the software is entirely the responsibility of the user. ########################################################################## eval 'exec /usr/bin/perl -s $0 ${1+"$@"}' if 0; # who am i? ($prog = $0) =~ s#.*/##; # where we find our local libraries push(@INC, '/usr/local/ims/lib'); # for processing command line options require 'getopts.pl'; # Edgar general utility routines require 'edgar-util.pl'; # Edgar SGML description info require 'edgar-desc.pl'; # date stamp for IMS header - century is hard-coded @date = localtime; $datestamp = sprintf("%04d%02d%02d", $date[5]+1900, $date[4]+1, $date[3]); # process command line options, if any &Getopts('ad:vw:'); # what type of processing? $do_ascii = defined($opt_a); # type of processing specified? if (! $do_ascii) { die "$prog: no processing type specified: Exiting ...\n"; } # verbose output? $verbose = defined($opt_v); # where to place submissions $datadir = defined($opt_d) ? "$opt_d" : "/ftp/edgar"; &makepath($datadir, 0775); # current working area $workdir = defined($opt_w) ? "$opt_w" : "/in/edgar/work"; &makepath($workdir, 0775); # base file name (accession # for now) $accno = ''; # subdirectory name (cik for now) $cik = ''; # keep track of the number of corrections made $corrections = 0; # output file name $hdrfile = ''; # document text @document = (); # header text @header = (); # are we processing a header? $in_hdr = 0; FILE: foreach $file (@ARGV) { # ignore filings for years we don't carry # file name format is 0000000000-99-000000, where -99- values are the # filing year. next unless ($file =~ /^\d+-9[45]-\d+/); open(IN, "$file") || (warn "$prog: error getting input: $!", next FILE); LINE: while ($line = ) { chop($line); # # assumes SUBMISSIONs are not nested # ignores junk outside of ... nest # if (! $in_hdr) { if ($line =~ '') { # start of header $in_hdr = 1; chop($line = ); # # is this a correction? # if ($line =~ '') { chop($line = ); if ($line =~ /\d+:\d+/) { # time stamp (optional) chop($line = ); } } # # we'll use the accession number as a file name for now. # if ($line =~ '') { ($accno = $line) =~ s/(\S+)/\1/; } else { # error - accession # MUST be next warn "$prog: $file: invalid format\n"; next FILE; } $ims_hdr = "$accno.hdr.sgml : $datestamp"; push(@header, $ims_hdr); push(@header, $line); } } elsif ($line =~ '') { # end of header local($dir) = 'data'; $hdrfile = "$datadir/$dir/$cik/$accno.hdr.sgml"; $in_hdr = 0; # make sure header exists if (! -e "$hdrfile") { # check for corrections in current feed $hdrfile = "$workdir/$accno.hdr.sgml"; if (! -e "$hdrfile") { warn "$prog: $accno.hdr.sgml not found\n"; @header = (); next FILE; } } push(@header, ""); open(OUT, ">$hdrfile") || (warn "$prog: error opening $hdrfile: $!\n", next FILE); # save the header to $hdrfile print OUT join("\n", @header), "\n"; # now process the document text if ($do_ascii) { local($txtfile); local(@newheader) = (); local(@newdoc) = (); local($/) = undef; ($txtfile = $hdrfile) =~ s/hdr.sgml/txt/o; open(DOC, "$txtfile") || (warn "$prog: unable to open $txtfile: $!\n", next FILE); &process_ascii(*edgar_desc, *header, *newheader); print "Modifying $txtfile ...\n" if $verbose; # slurp in the whole document file @document = split("\n", ); # remove existing header &remove_header(*document, *newdoc); # open data file open(TEXT, ">$txtfile") || (warn "$prog: unable to open $txtfile: $!\n", next FILE); print TEXT "", "$accno.txt : $datestamp\n"; print TEXT join("\n", @newheader), "\n"; print TEXT join("\n", @newdoc), "\n"; print TEXT "\n"; # MUST do this before signing document to insure that all data # is flushed to the file close(TEXT); # resign the modified document system("/usr/local/ims/bin/sign-doc $txtfile"); } $corrections++; $in_hdr = 0; # reset the array(s) @header = (); @document = (); } elsif ($in_hdr) { # TODO: fully automate this stuff... # # is this a deletion? # if ($line =~ //) { &delete_submission($accno); $in_hdr = 0; # reset the array @header = (); @document = (); next FILE; } elsif ($line =~ /|/) { # next LINE; # eat the tag # } elsif ($line =~ //) { # local($tag) = pop(@header); # local($endtag); # # ($endtag = $tag) =~ s#<(.*)>##; # # while ($line = ) { # chop($line); # next unless ($line eq $endtag); # next LINE; # } } # # grab the CIK # if ($line =~ /^/) { ($cik = $line) =~ s/0*(.*)/\1/; } push(@header, $line); # save the header line } } } # pet peeve... :) print "$corrections correction", ($corrections == 1) ? " was " : "s were ", "applied\n"; exit 0; # # create a more human-readable header file # # format of description info is: # tag text|replacement text|end nest text # sub process_ascii { local(*desc) = shift; local(*header) = shift; local(*newheader) = shift; local($found) = 0; local($indent) = 0; local($line); local($endnest, $rep, $tag); # TODO: find a way to speed this up foreach $line (@header) { DESC: for (@desc) { ($tag, $rep, $endnest) = split(/\|/); if ($line eq "<$endnest>") { $indent-- if ($indent > 0); $found = 1; last; } elsif ($line =~ /^<$tag>/) { if ($rep) { $tag = $rep; } else { $tag =~ s/-/ /og; } if ($endnest) { # true if this $tag starts a nest $tag = join("", "\n", "\t" x $indent, "$tag"); $indent++; } else { $tag = join("", "\t" x $indent, "$tag\t"); } if ($line =~ /^/) { $line =~ s/(.*)/$item_desc{\1}/e; } elsif ($line =~ /^/) { $line =~ s/(.*)/$sec_codes{\1}/e; } else { # strip out the tag info $line =~ s/<.*>(.*)/\1/; } push(@newheader, join("", $tag, $line)); $found = 1; last DESC; } } if (! $found) { push(@newheader, $line); } $found = 0; } } # # remove existing ASCII header # sub remove_header { local(*doc) = shift; local(*newdoc) = shift; local($in_doc) = 0; # special case for corrections in current feed - no header info if ($doc[0] =~ /^/) { @newdoc = @doc; return; } foreach $line (@doc) { next unless ($in_doc || ($line =~ /<\/IMS-HEADER>/o)); if ($in_doc == 0) { $in_doc = 1; next; } last if ($line =~ /<\/IMS-DOCUMENT>/o); push(@newdoc, $line); } } # # delete a submission # sub delete_submission { local($accno) = shift; local($savdir) = '/ftp/edgar/data/.deleted'; # TODO: must be a better way... local(@index_list) = ('/ftp/edgar/full-index', '/ftp/edgar/full-index/1995/QTR2', '/ftp/edgar/full-index/1995/QTR1', '/ftp/edgar/full-index/1994/QTR4', '/ftp/edgar/full-index/1994/QTR3', '/ftp/edgar/full-index/1994/QTR2', '/ftp/edgar/full-index/1994/QTR1', ); # make path if it doesn't exist &makepath($savdir, 0750); foreach $i (@index_list) { open(IN, "$i/master.idx") || die "$0: error opening index file:$!\n"; while () { chop; local($cik, $comp, $form, $date, $path) = split(/\|/); next unless ($path =~ /$accno/); push(@deletions, $path); # Note that we cannot short-circuit this search and quit after the # first match since each submission may have multiple CIKs (files) # associated with it....bummer. } } foreach $path (@deletions) { local($path_sgml); ($path_sgml = $path) =~ s/txt/hdr.sgml/; # make sure the file wasn't already deleted if (-e "/ftp/$path") { local($txt) = (split(/\//, $path))[3]; # if save file doesn't already exist, create it if (! -e "$savdir/$txt") { local($sgml); ($sgml = $txt) =~ s/txt/hdr.sgml/; # save backup of file(s) to be deleted link("/ftp/$path", "$savdir/$txt"); link("/ftp/$path_sgml", "$savdir/$sgml"); print "$path DELETED\n" if $verbose; } # remove original file(s) unlink("/ftp/$path"); unlink("/ftp/$path_sgml"); } } }