Professional Documents
Culture Documents
aspx/perl/31396/perl-curl-get-data-from-
website
use strict;
use warnings;
use HTML::TableExtract;
use HTTP::Cookies;
use HTTP::Request::Common qw(POST GET);
use LWP::UserAgent;
# Create cookies
my $jar = HTTP::Cookies->new();
exit;
====================
TABLE 0:
ROW 0:
�||||
ROW 1:
�|Enter Master Air Waybill (MAWB)|
ROW 2:
Optional (For Import MAWB Only)|
ROW 3:
�||||
ROW 4:
||* Master Air Waybill number example 123 - 12345678 ||
TABLE 1:
ROW 0:
ROW 1:
Item|AWB No|Flight No|Flight Date|Origin|Dest|Status|Pieces|Weight|Time|
ROW 2:
1|081-75133844|JQ 029|Oct 19 2010|MEL|BKK|Delivered|2|1,480.00|Oct 20 2010 - 125
5|
TABLE 0:
ROW 0:
�||||
ROW 1:
�|Enter Master Air Waybill (MAWB)|
ROW 2:
Optional (For Import MAWB Only)|
ROW 3:
�||||
ROW 4:
||* Master Air Waybill number example 123 - 12345678 ||
TABLE 1:
ROW 0:
ROW 1:
Item|AWB No|Flight No|Flight Date|Origin|Dest|ULD No|Status|Pieces|Weight|Time|
ROW 2:
1|176-75064953|EK 419|Oct 15 2010|BKK|DXB|Flight Change�|Export Transshipment|3|
743.00|Oct 14 2010 5:37PM|
ROW 3:
2|176-75064953|EK 419|Oct 15 2010|BKK|DXB|�|Accepted|3|743.00|Oct 14 2010 5:37PM
ROW 4:
3|176-75064953|EK 373|Oct 15 2010|BKK|DXB|Flight Change�|Export Transshipment|3|
743.00|Oct 14 2010 6:12PM|
ROW 5:
4|176-75064953|EK 373|Oct 15 2010|BKK|DXB|SHC�|Export Transshipment|3|743.00|Oct
14 2010 6:12PM|
ROW 6:
5|176-75064953|EK 373|Oct 14 2010|BKK|DXB|Flight Change�|Export Transshipment|3|
743.00|Oct 14 2010 6:42PM|
ROW 7:
6|176-75064953|EK 373|Oct 14 2010|BKK|DXB|PMC31131EK�|Manifested|3|743.00|Oct 14
2010 6:57PM|
ROW 8:
7|176-75064953|EK 373|Oct 14 2010|BKK|DXB|�|Departed|3|743.00|Oct 14 2010 9:54PM
--------------------------------------------------------
-------------Get Data from Home Page---------------
--------------------------------------------------------
#!/usr/bin/Perl
use LWP::Simple;
use HTML::Parser;
use Data::Dumper;
my $url = shift @ARGV;
die "No URL specified on command line." unless (defined $url);
my $content = get($url); #put site html in $content.
die "get failed" if (!defined $content);
# create parser object
my $parser = HTML::Parser->new(api_version=>3,
start_h=>[\&startTag, 'tag, attr'] ,
end_h=>[\&endTag, 'tag'] ,
text_h=>[\&textElem, 'text']
);
#parse object.
$parser->parse($content);
sub startTag
{
my ($tag, $attrHash) = @_;
print "TAG: $tag \n";
print "ATTR HASH: " , Dumper $attrHash , "\n";
print "-----\n";
}
sub endTag
{
my $tag = shift;
print "END TAG: $tag \n";
print "-----\n";
}
sub textElem
{
my $text = shift;
print "TEXT: $text \n";
print "-----\n";
}