Waraxe IT Security Portal  
  Login or Register
::  Home  ::  Search  ::  Your Account  ::  Forums  ::   Waraxe Advisories  ::  Tools  ::
March 29, 2024
Menu
 Home
 Logout
 Discussions
 Forums
 Members List
 IRC chat
 Tools
 Base64 coder
 MD5 hash
 CRC32 checksum
 ROT13 coder
 SHA-1 hash
 URL-decoder
 Sql Char Encoder
 Affiliates
 y3dips ITsec
 Md5 Cracker
 User Manuals
 AlbumNow
 Content
 Content
 Sections
 FAQ
 Top
 Info
 Feedback
 Recommend Us
 Search
 Journal
 Your Account



User Info
Welcome, Anonymous
Nickname
Password
(Register)

Membership:
Latest: MichaelSnaRe
New Today: 0
New Yesterday: 0
Overall: 9145

People Online:
Visitors: 794
Members: 0
Total: 794
PacketStorm News
·301 Moved Permanently

read more...
Log in Register Forum FAQ Memberlist Search
IT Security and Insecurity Portal

www.waraxe.us Forum Index -> Perl -> Google wordlist creator
Post new topic  Reply to topic View previous topic :: View next topic 
Google wordlist creator
PostPosted: Thu Nov 06, 2008 10:07 pm Reply with quote
Sm0ke
Moderator
Moderator
 
Joined: Nov 25, 2006
Posts: 141
Location: Finland




Code:
#!/usr/bin/perl
use strict;
use warnings;
use LWP::Simple qw($ua get);
use HTML::Parser;

&usage unless @ARGV==4;
my $ua = LWP::UserAgent->new;
   $ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.4');
my($min,$max,$term,$result,@urls,@words) = ($ARGV[0],$ARGV[1],$ARGV[2],$ARGV[3]);
print "[+] Min word length: $min\n[+] Max word length: $max\n[+] Term: $term\n[+] Result file: $result\n";

sub in_array {for(my $x=1;$x<=$#_;$x++){if($_[$x] eq $_[0]){return $x;}}return 0;}

sub trim($){
   my $str = shift;
     $str =~ s/^(.*?)(?:\x0D\x0A|\x0A|\x0D|\x0C|\x{2028}|\x{2029})/$1/s;#From the module Text::Chomp
     $str =~ s/   //g;
     $str =~ s/\-//g;
     $str =~ s/\://g;
     $str =~ s/\%//g;
     $str =~ s/\(//g;
     $str =~ s/\)//g;
     $str =~ s/([!-)]*)//g;
   return $str;
}

sub usage{
   print "Usage: WordlistCreator.pl <min word length> <max word length> <dork/word> <results list>\n";
   print "\tEnclose dork/word in spaces for \"multiple words\"\n";
   print "Eg: WordlistCreator.pl 6 13 niggers racism.txt\n";
   print "Eg: WordlistCreator.pl 5 10 \"site:myspace.com wigger\" myspace.txt\n";
   exit;
}

sub getreq($){
   my $url = $_[0];
   my $request = HTTP::Request->new(GET => $url);
   my $myrequest = $ua->request($request);
   if($myrequest->is_success){
      return $myrequest->content;
   }
   else{
      print "\n[e] Connection error - " . $myrequest->status_line . "\n";
      return 0;
   }
}

sub google($$){
   my ($dork,$start) = ($_[0],$_[1]);
   $dork =~ s/\?/\%3f/g;
   $dork =~ s/ /\+/g;
   $dork =~ s/:/\%3A/g;
   $dork =~ s/\//\%2F/g;
   $dork =~ s/&/\%26/g;
   $dork =~ s/\"/\%22/g;
   $dork =~ s/\'/\%27/g;
   $dork =~ s/,/\%2C/g;
   $dork =~ s/\\/\%5C/g;
   my $url = "http://www.google.com.au/search?q=$dork&start=$start&num=100";
   print "[+] $url\n";
   my $content = &getreq($url);
   if($content =~ m/<h1>We\'re sorry\.\.\.<\/h1>/){
      print "[!] Google killed you\n";
      return 0;
   }
   while($content =~ m/<h2 class=r><a href=\"http:\/\/([a-z0-9-\.]+)\/(.*?)\"/ig){
      if(!in_array("http://$1",@urls)){
         my $dad = "http://$1";
         if($2){$dad="http://$1/$2";}
         print "[+] $dad\n";
         push(@urls,"$dad");
      }
   }
   if ($content =~ m/<td nowrap class=b><a href=\"\/(.*?)\"><div id=nn><\/div>Next<\/a><\/table><\/div><\/div><center>/){
      print "[+] Another page\n";
      &google($dork,($start + 100));
   }
}

sub textreeeh{
   foreach(@_){
      my @wordz = split(/ /,shift);
      foreach(@wordz){
         my $word = trim($_);
            $word = "\L$word";
         if(($word =~ m/^([a-z0-9]*)$/i) && (length($word)>($min-1)) && (length($word)<($max+1))){
            if(!in_array($word,@words)){
               push(@words,$word);
               @words = sort(@words);
               open(ELOG,">>$result") || die "[-] Couldn't open file: $result\n";
               print ELOG "$word\n";
               close(ELOG);
            }
         }
      }
   }
}

print "[+] Searching google for $term\n";
&google($term,"0");
print "[+] Finished searching, building..\n";
open(ELOG,">>$result") || die "[-] Couldn't open file: $result\n";
print ELOG "";
close(ELOG);
open("prevwords",$result) || die("[+] Couldn't open file: $result\n");
@words = <prevwords>;
close("prevwords");
foreach(@urls){
   my $url = $_;
   print "[+] $url\n";
   my $content = &getreq($url);
   my $p = HTML::Parser->new(text_h => [\&textreeeh, 'text']);
      $p->parse($content);
}
print "[+] Got this many words: " . ($#words+1) . "\n";
my $resultsize = ((-s $result)/1024);
print "[+] Size of wordlist: " . substr($resultsize,0,4) . " Kb\n";


Quote:
Usage: WordlistCreator.pl <min word length> <max word length> <dork/word> <results list>
Enclose dork/word in spaces for "multiple words"
Eg: WordlistCreator.pl 6 13 niggers racism.txt
Eg: WordlistCreator.pl 5 10 "site:myspace.com wigger" myspace.txt

It works by taking your string, Googling it and getting all the results it can then goes to each URL and parses the HTML and gets words out.

Quote:
Eg: WordlistCreator.pl 6 13 searchterm dump.txt


This will Google for "searchterm" and find words that have lengths between 6 and 13, and dump to dump.txt
View user's profile Send private message
Google wordlist creator
  www.waraxe.us Forum Index -> Perl
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
All times are GMT  
Page 1 of 1  

  
  
 Post new topic  Reply to topic  




Powered by phpBB © 2001-2008 phpBB Group






Space Raider game for Android, free download - Space Raider gameplay video - Zone Raider mobile games
All logos and trademarks in this site are property of their respective owner. The comments and posts are property of their posters, all the rest (c) 2004-2020 Janek Vind "waraxe"
Page Generation: 0.117 Seconds