| xxyygorich Sun Jul 23, 2006 4:51 pm |
|
|
Tech post: Perl sub to retrieve insider purchase from Form4 Tech post: Perl sub to retrieve insider purchase from SEC Form4 xml
This code is parsing the xml file of a SEC form4 for retrieving insider purchase information.
I will add real time retrieving xml file function later to change the sub to a real time insider purchase report script. The current version require manual downloading xml file of Form4. So it is pretty much useless now, it does parse the file though.
Code:
#!/usr/bin/perl
# Author: xxyygorich.
# Copy right @2006
# This script can be freely distributed. No warranty of any kind will be provided.
# Usage: form4parse.pl <xml file>
# This script is to show a subroutine to parse a SEC form4 xml file to retrieve insider purchase information.
use strict;
my $xml_f = shift; #manually input xml file of a Form 4.
my $page = `more $xml_f`;
my %insider;
parse_Form4xml ($page, \%insider); # the sub will return a reference of hash contains all insider transaction information and a 0/1 flag for insider purchases.
print $insider{"symbol"}."\t";
print $insider{"owner_p"}."\t";
print $insider{"trans"}->[0]->{"date"}."\t";
print $insider{"trans"}->[0]->{"share"}."\n";
sub parse_Form4xml () {
my ($form4page, $insider_r) = @_;
my $insider_purchase_flag = 0;
if ($form4page =~ /\<transactionCode\>P\<\/transactionCode\>/) {
#purchase involved;
my @form4line = split (/\n/, $form4page);
my $trans_cnt = 0;
my %trans_detail = ();
my @trans = ();
my ($st_flag, $td_flag, $sh_flag, $psh_flag) = (0, 0, 0, 0);
my $record_flag = 0;
for (my $i = 0; $i <= $#form4line; $i++) {
#print $form4line[$i]."\n";
if ($form4line[$i] =~ /\<issuerTradingSymbol>(\w+)\<\/issuerTradingSymbol\>/) {
my $symbol = $1;
$insider_r->{"symbol"} = $symbol;
next;
}
if ($form4line[$i] =~ /\<rptOwnerName\>(.*)\<\/rptOwnerName\>/) {
my $owner = $1;
$insider_r->{"owner"} = $owner;
next;
}
if ($form4line[$i] =~ /\<isTenPercentOwner\>1\<\/isTenPercentOwner\>/) {
my $owner_p = 'T';
$insider_r->{"owner_p"} = $owner_p;
next;
}
if ($form4line[$i] =~ /\<isDirector\>1\<\/isDirector\>/) {
my $owner_p = 'D';
$insider_r->{"owner_p"} = $owner_p;
next;
}
if ($form4line[$i] =~ /\<isOfficer\>1\<\/isOfficer\>/) {
my $owner_p = 'E';
$insider_r->{"owner_p"} = $owner_p;
next;
}
if ($form4line[$i] =~ /\<isOther\>1\<\/isOther\>/) {
my $owner_p = 'O';
$insider_r->{"owner_p"} = $owner_p;
next;
}
if (($insider_r->{"owner_p"} eq 'E') &&
($form4line[$i] =~ /\<officerTitle\>(.*)\<\/officerTitle\>/)) {
my $owner_t = $1;
$insider_r->{"owner_t"} = $owner_t;
next;
}
if ($form4line[$i] =~ /\<nonDerivativeTransaction\>/) {
$record_flag = 1;
next;
}
if ($record_flag) {
if ($form4line[$i] =~ /\<securityTitle\>/) {
$st_flag = 1;
next;
}
if ($st_flag && ($form4line[$i] =~ /\<value\>(.*)?\<\/value\>/)) {
my $st = $1;
$trans_detail{"security"} = $st;
$st_flag = 0;
next;
}
if ($form4line[$i] =~ /\<transactionDate\>/) {
$td_flag = 1;
next;
}
if ($td_flag && ($form4line[$i] =~ /\<value\>(.*)?\<\/value\>/)) {
my $td = $1;
$trans_detail{"date"} = $td;
$td_flag = 0;
next;
}
if ($form4line[$i] =~ /\<transactionCode\>(\w)\<\/transactionCode\>/) {
my $tcode = $1;
$trans_detail{"code"} = $tcode;
next;
}
if ($form4line[$i] =~ /\<transactionShares\>/) {
$sh_flag = 1;
next;
}
if ($sh_flag && ($form4line[$i] =~ /\<value\>(.*)?\<\/value\>/)) {
my $sh = $1;
$trans_detail{"share"} = $sh;
$sh_flag = 0;
next;
}
if ($form4line[$i] =~ /\<transactionPricePerShare\>/) {
$psh_flag = 1;
next;
}
if ($sh_flag && ($form4line[$i] =~ /\<value\>(.*)?\<\/value\>/)) {
my $psh = $1;
$trans_detail{"p_share"} = $psh;
$psh_flag = 0;
next;
}
if ($form4line[$i] =~ /\<\/nonDerivativeTransaction\>/) {
if ($trans_detail{"code"} eq 'P') {
my %trans_detail_copy = %trans_detail;
push (@trans, \%trans_detail_copy);
next;
}
}
if ($form4line[$i] =~ /\<\/nonDerivativeTable\>/) {
$insider_r->{"trans"} = \@trans;
last;
}
}
}
$insider_purchase_flag = 1;
} else {
# No insider purchase;
}
return $insider_purchase_flag;
}
|
|
| |
Search Engine Indexer
BlastInvest @2005 p h p B B © 2001, 2002 p h p B B Group
|