#!/usr/bin/perl
use IO::Socket;
use URI;
open(LINKS, "<< links.dat");
@bigarray = ();
while (
chomp;
push(@bigarray, $_);
}
close(LINKS);
foreach $uri (@bigarray) {
($domain = URI->new($uri)->authority) =~ s/^www\.//i;
$socket = IO::Socket::INET->new(PeerAddr
=> $domain,
PeerPort => 80,
Proto => 'tcp',
Type => SOCK_STREAM)
or die "Couldn't connect";
print $socket "GET / HTTP/1.0\n\n";
#$page = <$socket>;
open(LINKS, ">> links.dat");
while (defined($line = <$socket>)) {
$line =~ m{href="(.*?)"}ig;
print LINKS "$1";
}
close(LINKS);
close($socket);
}
10 Ağustos 2007 Cuma
Perl URL Crawler
Subscribe to:
Kayıt Yorumları (Atom)
0 Comments:
Post a Comment