#!/usr/local/bin/perl # # file: process-edgar # auth: Brad Burdick # desc: post-process SEC EDGAR SGML header file # # usage: process-edgar [-a] [-d datadir] [-e errdir] [-w workdir] # [input_file ...] # ########################################################################## # Copyright (c) 1994, 1995 Internet Multicasting Service # # The SEC EDGAR Level 1 Dissemination processing software ("software") # was developed by the Internet Multicasting Service and may # be used for academic, research, government, and internal business # purposes without charge. You may not resell this code or include it # in a product that you are selling without prior permission of the # Internet Multicasting Service. # # This software is provided ``as is'', without express or implied # warranty, and with no support nor obligation to assist in its # use, correction, modification or enhancement. We assume no liability # with respect to the infringement of copyrights, trade secrets, or any # patents, and are not responsible for consequential damages. Proper # use of the software is entirely the responsibility of the user. ########################################################################## eval 'exec /usr/bin/perl -s $0 ${1+"$@"}' if 0; # who am i? ($prog = $0) =~ s#.*/##; # allow local libraries push(@INC, '/usr/local/ims/lib'); # for processing command line options require 'getopts.pl'; # Edgar SGML description info require 'edgar-desc.pl'; # Edgar general utility routines require 'edgar-util.pl'; # process command line options, if any &Getopts('ad:w:'); # what type of processing? $do_ascii = defined($opt_a); # type of processing specified? if (! $do_ascii) { die "$prog: no processing type specified: Exiting ...\n"; } # base directory $datadir = defined($opt_d) ? "$opt_d" : "/in/edgar"; # where to place normal submissions $workdir = defined($opt_w) ? "$datadir/$opt_w" : "$datadir/work"; &makepath($workdir, 0775); # where to place exception submissions (errors) $errdir = defined($opt_e) ? "$datadir/$opt_e" : "$datadir/exceptions"; &makepath($errdir, 0775); # date stamp for header @date = localtime; $datestamp = sprintf("%04d%02d%02d", $date[5]+1900, $date[4]+1, $date[3]); # accession number (used as base file name for now) $accno = ''; # header text @header = (); # are we processing a header? $in_hdr = 0; # take data from stdin if no file provided if ($#ARGV < 0) { push(@ARGV, "<&STDIN"); } # # main processing loop # foreach $file (@ARGV) { &load_hdr($file, *header); # get accession number for (@header) { next unless ($_ =~ ''); ($accno = $_) =~ s/(\S+)$/\1/; last; } if ($do_ascii) { $outfile = "$workdir/$accno.txt"; } if ($do_ascii) { local($docfile) = "$workdir/$accno.txt"; local(@document) = (); local(@newheader) = (); local($/) = undef; &process_ascii(*edgar_desc, *header, *newheader); # slurp in the whole document file open(DOC, "$docfile") || die "$prog: unable to open $docfile: $!\n"; @document = ; # open data file open(TEXT, ">$outfile") || die "$prog: unable to open $outfile: $!\n"; print TEXT "", "$accno.txt : $datestamp\n"; print TEXT join("\n", @newheader), "\n"; print TEXT @document; print TEXT "\n"; close(TEXT); # resign the modified document system("/usr/local/ims/bin/sign-doc $outfile"); } $in_hdr = 0; @header = (); } exit 0; # # load SGML header # sub load_hdr { local($file) = shift; local(*header) = shift; local($/) = undef; # slurp in the whole header file open(IN, "$file") || die "$prog: unable to open $file: $!\n"; @header = split("\n", ); return; } # # create a more human-readable header file # # format of description info is: # tag text|replacement text|end nest text # sub process_ascii { local(*desc) = shift; local(*header) = shift; local(*newheader) = shift; local($found) = 0; local($indent) = 0; local($line); local($endnest, $rep, $tag); foreach $line (@header) { for (@desc) { ($tag, $rep, $endnest) = split(/\|/); if ($line eq "<$endnest>") { $indent-- if ($indent > 0); $found = 1; last; } elsif ($line =~ /^<$tag>/) { if ($rep) { $tag = $rep; } else { $tag =~ s/-/ /og; } if ($endnest) { # true if this $tag starts a nest $tag = join("", "\n", "\t" x $indent, "$tag"); $indent++; } else { $tag = join("", "\t" x $indent, "$tag\t"); } if ($line =~ /^/) { $line =~ s/(.*)/$item_desc{\1}/e; } elsif ($line =~ /^/) { $line =~ s/(.*)/$sec_codes{\1}/e; } elsif ($line =~ /^/) { $line =~ s/(.*)/\1/; $line = "$sic_codes{$line} [$line]"; } else { # strip out the tag info $line =~ s/<.*>(.*)/\1/; } push(@newheader, join("", $tag, $line)); $found = 1; last; } } if (! $found) { push(@newheader, $line); } $found = 0; } }