#!/usr/local/bin/perl # # file: extract-edgar # auth: Brad Burdick # desc: extract the EDGAR data files from the tape feed # # usage: extract-edgar [-C] [-c corrdir] [-d datadir] [-e errdir] # [-v] [-w workdir] [input_file ...] # ########################################################################## # Copyright (c) 1994, 1995 Internet Multicasting Service # # The SEC EDGAR Level 1 Dissemination processing software ("software") # was developed by the Internet Multicasting Service and may # be used for academic, research, government, and internal business # purposes without charge. You may not resell this code or include it # in a product that you are selling without prior permission of the # Internet Multicasting Service. # # This software is provided ``as is'', without express or implied # warranty, and with no support nor obligation to assist in its # use, correction, modification or enhancement. We assume no liability # with respect to the infringement of copyrights, trade secrets, or any # patents, and are not responsible for consequential damages. Proper # use of the software is entirely the responsibility of the user. ########################################################################## eval 'exec /usr/bin/perl -s $0 ${1+"$@"}' if 0; # who am i? ($prog = $0) =~ s#.*/##; # where we find our local libraries push(@INC, '/usr/local/ims/lib'); # for processing command line options require 'getopts.pl'; # Edgar general utility routines require 'edgar-util.pl'; # process command line options, if any &Getopts('Cc:d:e:vw:'); # only extract correction submissions? $corrections_only = defined($opt_C); # where to place submissions $datadir = defined($opt_d) ? "$opt_d" : "/in/edgar"; &makepath($datadir, 0775); # verbose output? $verbose = defined($opt_v); # where to place normal submissions $workdir = defined($opt_w) ? "$datadir/$opt_w" : "$datadir/work"; &makepath($workdir, 0775); # where to place correction submissions $corrdir = defined($opt_c) ? "$datadir/$opt_c" : "$datadir/corrections"; &makepath($corrdir, 0775); # where to place exception submissions (errors) $errdir = defined($opt_e) ? "$datadir/$opt_e" : "$datadir/exceptions"; &makepath($errdir, 0775); # submission text @submission = (); # is this submission a correction? $correction = 0; # are we processing a submission? $in_sub = 0; # take data from stdin if no file provided if ($#ARGV < 0) { push(@ARGV, "<&STDIN"); } foreach $file (@ARGV) { open(IN, "$file") || die "$prog: error getting input: $!\n"; # reset the submission and clear flags @submission = (); $in_sub = $correction = 0; # will block on input from STDIN if empty LINE: while ($line = ) { chop($line); # # assumes SUBMISSIONs are not nested # ignores junk outside of ... nest # if (! $in_sub) { if ($line =~ '') { # start of submission $in_sub = 1; push(@submission, ''); # save tag chop($line = ); # # is this a correction? # if ($line =~ /^/) { $correction = 1; push(@submission, $line); # tag (optional) chop($line = ); if ($line =~ /\d+:\d+/) { # time stamp (optional) push(@submission, $line); chop($line = ); } } # # we'll use the accession number as a file name for now. # if ($line =~ /^/) { push(@submission, $line); ($accno = $line) =~ s/(\S+)/\1/; if ($correction) { $outfile = &get_next_file("$corrdir/$accno.corr01"); print "Processing $outfile ...\n" if $verbose; } else { # do we only want correction submissions? last LINE if ($corrections_only); $outfile = "$workdir/$accno.nc"; } if (-e $outfile) { warn "$prog: $outfile already exists!!\n"; $outfile = &get_next_file("$errdir/$accno.dup01"); print "Processing $outfile ...\n" if $verbose; } } else { # error - accession # MUST be next $outfile = &get_next_file("$errdir/$accno.err01"); print "Processing $outfile ...\n" if $verbose; } open(OUT, ">$outfile") || die "$prog: $outfile: $!\n"; } } elsif ($line =~ /^<\/SUBMISSION>/) { # end of submission $in_sub = $correction = 0; push(@submission, ''); # save the submission to $outfile print OUT join("\n", @submission), "\n"; # reset the submission and clear flags @submission = (); } else { push(@submission, $line); # save the line } } } exit 0;