Waraxe IT Security Portal  
  Login or Register
::  Home  ::  Search  ::  Your Account  ::  Forums  ::   Waraxe Advisories  ::  Tools  ::
January 25, 2020
 Members List
 IRC chat
 Base64 coder
 MD5 hash
 CRC32 checksum
 ROT13 coder
 SHA-1 hash
 Sql Char Encoder
 y3dips ITsec
 Md5 Cracker
 User Manuals
 Recommend Us
 Your Account

User Info
Welcome, Anonymous

Latest: MichaelSnaRe
New Today: 0
New Yesterday: 0
Overall: 9145

People Online:
Visitors: 168
Members: 2
Total: 170

Online Now:
01: david - Homepage
02: kolaz - Homepage
PacketStorm News
Currently there is a problem with headlines from this site
Log in Register Forum FAQ Memberlist Search
IT Security and Insecurity Portal

www.waraxe.us Forum Index -> Perl -> Google wordlist creator
Post new topic  Reply to topic View previous topic :: View next topic 
Google wordlist creator
PostPosted: Thu Nov 06, 2008 10:07 pm Reply with quote
Joined: Nov 25, 2006
Posts: 141
Location: Finland

use strict;
use warnings;
use LWP::Simple qw($ua get);
use HTML::Parser;

&usage unless @ARGV==4;
my $ua = LWP::UserAgent->new;
   $ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv: Gecko/20070309 Firefox/');
my($min,$max,$term,$result,@urls,@words) = ($ARGV[0],$ARGV[1],$ARGV[2],$ARGV[3]);
print "[+] Min word length: $min\n[+] Max word length: $max\n[+] Term: $term\n[+] Result file: $result\n";

sub in_array {for(my $x=1;$x<=$#_;$x++){if($_[$x] eq $_[0]){return $x;}}return 0;}

sub trim($){
   my $str = shift;
     $str =~ s/^(.*?)(?:\x0D\x0A|\x0A|\x0D|\x0C|\x{2028}|\x{2029})/$1/s;#From the module Text::Chomp
     $str =~ s/   //g;
     $str =~ s/\-//g;
     $str =~ s/\://g;
     $str =~ s/\%//g;
     $str =~ s/\(//g;
     $str =~ s/\)//g;
     $str =~ s/([!-)]*)//g;
   return $str;

sub usage{
   print "Usage: WordlistCreator.pl <min word length> <max word length> <dork/word> <results list>\n";
   print "\tEnclose dork/word in spaces for \"multiple words\"\n";
   print "Eg: WordlistCreator.pl 6 13 niggers racism.txt\n";
   print "Eg: WordlistCreator.pl 5 10 \"site:myspace.com wigger\" myspace.txt\n";

sub getreq($){
   my $url = $_[0];
   my $request = HTTP::Request->new(GET => $url);
   my $myrequest = $ua->request($request);
      return $myrequest->content;
      print "\n[e] Connection error - " . $myrequest->status_line . "\n";
      return 0;

sub google($$){
   my ($dork,$start) = ($_[0],$_[1]);
   $dork =~ s/\?/\%3f/g;
   $dork =~ s/ /\+/g;
   $dork =~ s/:/\%3A/g;
   $dork =~ s/\//\%2F/g;
   $dork =~ s/&/\%26/g;
   $dork =~ s/\"/\%22/g;
   $dork =~ s/\'/\%27/g;
   $dork =~ s/,/\%2C/g;
   $dork =~ s/\\/\%5C/g;
   my $url = "http://www.google.com.au/search?q=$dork&start=$start&num=100";
   print "[+] $url\n";
   my $content = &getreq($url);
   if($content =~ m/<h1>We\'re sorry\.\.\.<\/h1>/){
      print "[!] Google killed you\n";
      return 0;
   while($content =~ m/<h2 class=r><a href=\"http:\/\/([a-z0-9-\.]+)\/(.*?)\"/ig){
         my $dad = "http://$1";
         print "[+] $dad\n";
   if ($content =~ m/<td nowrap class=b><a href=\"\/(.*?)\"><div id=nn><\/div>Next<\/a><\/table><\/div><\/div><center>/){
      print "[+] Another page\n";
      &google($dork,($start + 100));

sub textreeeh{
      my @wordz = split(/ /,shift);
         my $word = trim($_);
            $word = "\L$word";
         if(($word =~ m/^([a-z0-9]*)$/i) && (length($word)>($min-1)) && (length($word)<($max+1))){
               @words = sort(@words);
               open(ELOG,">>$result") || die "[-] Couldn't open file: $result\n";
               print ELOG "$word\n";

print "[+] Searching google for $term\n";
print "[+] Finished searching, building..\n";
open(ELOG,">>$result") || die "[-] Couldn't open file: $result\n";
print ELOG "";
open("prevwords",$result) || die("[+] Couldn't open file: $result\n");
@words = <prevwords>;
   my $url = $_;
   print "[+] $url\n";
   my $content = &getreq($url);
   my $p = HTML::Parser->new(text_h => [\&textreeeh, 'text']);
print "[+] Got this many words: " . ($#words+1) . "\n";
my $resultsize = ((-s $result)/1024);
print "[+] Size of wordlist: " . substr($resultsize,0,4) . " Kb\n";

Usage: WordlistCreator.pl <min word length> <max word length> <dork/word> <results list>
Enclose dork/word in spaces for "multiple words"
Eg: WordlistCreator.pl 6 13 niggers racism.txt
Eg: WordlistCreator.pl 5 10 "site:myspace.com wigger" myspace.txt

It works by taking your string, Googling it and getting all the results it can then goes to each URL and parses the HTML and gets words out.

Eg: WordlistCreator.pl 6 13 searchterm dump.txt

This will Google for "searchterm" and find words that have lengths between 6 and 13, and dump to dump.txt
View user's profile Send private message
Google wordlist creator
  www.waraxe.us Forum Index -> Perl
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
All times are GMT  
Page 1 of 1  

 Post new topic  Reply to topic  

Powered by phpBB 2001-2008 phpBB Group

Book Opinions
All logos and trademarks in this site are property of their respective owner. The comments and posts are property of their posters, all the rest (c) 2004-2013 Janek Vind "waraxe"
Page Generation: 0.064 Seconds