Craigslist search all cities script 2

Posted by Joel Jensen Wed, 16 Aug 2006 15:12:00 GMT

My scooter got stolen a while ago, and I am tired of looking at craigslist to find it. So I wrote this script to do it for me. I wrote a bot that looked at all the cities, and found the citynumber, took the results and plugged it in here. It looks at all the craigslist cities [cityname, citynumber] goes out to there, says its Internet explorer and does a search. The search results are an html file that has the name of the search as the title. Takes a few minutes to run, but can be automated.

Usage for me looking for my scooter is this => search_all_craigslist.py aprilia

#!/usr/bin/env python
# encoding: utf-8
"""
search_all_craigslist.py

Copyright © 2006 Joel Jensen. All Rights Reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY JOEL JENSEN "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""

import sys
import getopt
import urllib2, urllib, re, smtplib, time

help_message = '''
Usage search_all_craigslist.py searchterm [searchterm]
Example:
search_all_craigslist.py red wagon
'''


class Usage(Exception):
    def __init__(self, msg):
        self.msg = msg


def main(argv=None):
    if argv is None:
        argv = sys.argv
    try:
        try:
            opts, args = getopt.getopt(argv[1:], "ho:v", ["help", "output="])
        except getopt.error, msg:
            raise Usage(msg)

        # option processing
        for option, value in opts:
            if option == "-v":
                verbose = True
            if option in ("-h", "--help"):
                raise Usage(help_message)
            if option in ("-o", "--output"):
                output = value

        cl_search(args)

    except Usage, err:
        print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
        print >> sys.stderr, "\t for help use --help"
        return 2


def cl_search(search):
  print 'started on',time.strftime('%X %x %Z')

  searchterm = urllib.quote(" ".join(search))
  cities = """http://www.craigslist.org,1
  http://bham.craigslist.org,127
  http://huntsville.craigslist.org,231
  http://mobile.craigslist.org,200
  http://montgomery.craigslist.org,207
  http://geo.craigslist.org/iso/us/ak,51
  http://flagstaff.craigslist.org,244
  http://phoenix.craigslist.org,18
  http://tucson.craigslist.org,57
  http://fayar.craigslist.org,293
  http://littlerock.craigslist.org,100
  http://bakersfield.craigslist.org,63
  http://chico.craigslist.org,187
  http://fresno.craigslist.org,43
  http://humboldt.craigslist.org,189
  http://inlandempire.craigslist.org,104
  http://losangeles.craigslist.org,7
  http://merced.craigslist.org,285
  http://modesto.craigslist.org,96
  http://monterey.craigslist.org,102
  http://orangecounty.craigslist.org,103
  http://palmsprings.craigslist.org,209
  http://redding.craigslist.org,188
  http://sacramento.craigslist.org,12
  http://sandiego.craigslist.org,8
  http://sfbay.craigslist.org,1
  http://slo.craigslist.org,191
  http://santabarbara.craigslist.org,62
  http://stockton.craigslist.org,97
  http://ventura.craigslist.org,208
  http://cosprings.craigslist.org,210
  http://denver.craigslist.org,13
  http://fortcollins.craigslist.org,287
  http://pueblo.craigslist.org,315
  http://rockies.craigslist.org,288
  http://hartford.craigslist.org,44
  http://newhaven.craigslist.org,168
  http://newlondon.craigslist.org,281
  http://geo.craigslist.org/iso/us/de,193
  http://geo.craigslist.org/iso/us/dc,10
  http://daytona.craigslist.org,238
  http://fortlauderdale.craigslist.org,236
  http://fortmyers.craigslist.org,125
  http://gainesville.craigslist.org,219
  http://jacksonville.craigslist.org,80
  http://miami.craigslist.org,20
  http://orlando.craigslist.org,39
  http://pensacola.craigslist.org,203
  http://sarasota.craigslist.org,237
  http://tallahassee.craigslist.org,186
  http://tampa.craigslist.org,37
  http://westpalmbeach.craigslist.org,124
  http://athensga.craigslist.org,258
  http://atlanta.craigslist.org,14
  http://augusta.craigslist.org,256
  http://macon.craigslist.org,257
  http://savannah.craigslist.org,205
  http://geo.craigslist.org/iso/us/hi,28
  http://geo.craigslist.org/iso/us/id,52
  http://chambana.craigslist.org,190
  http://chicago.craigslist.org,11
  http://peoria.craigslist.org,224
  http://rockford.craigslist.org,223
  http://springfieldil.craigslist.org,225
  http://bloomington.craigslist.org,229
  http://evansville.craigslist.org,227
  http://fortwayne.craigslist.org,226
  http://indianapolis.craigslist.org,45
  http://southbend.craigslist.org,228
  http://desmoines.craigslist.org,98
  http://quadcities.craigslist.org,307
  http://topeka.craigslist.org,280
  http://wichita.craigslist.org,99
  http://lexington.craigslist.org,133
  http://louisville.craigslist.org,58
  http://batonrouge.craigslist.org,199
  http://lafayette.craigslist.org,283
  http://lakecharles.craigslist.org,284
  http://neworleans.craigslist.org,31
  http://shreveport.craigslist.org,206
  http://geo.craigslist.org/iso/us/me,169
  http://geo.craigslist.org/iso/us/md,34
  http://boston.craigslist.org,4
  http://capecod.craigslist.org,239
  http://westernmass.craigslist.org,173
  http://worcester.craigslist.org,240
  http://annarbor.craigslist.org,172
  http://detroit.craigslist.org,22
  http://flint.craigslist.org,259
  http://grandrapids.craigslist.org,129
  http://kalamazoo.craigslist.org,261
  http://lansing.craigslist.org,212
  http://nmi.craigslist.org,309
  http://saginaw.craigslist.org,260
  http://up.craigslist.org,262
  http://duluth.craigslist.org,255
  http://minneapolis.craigslist.org,19
  http://rmn.craigslist.org,316
  http://gulfport.craigslist.org,230
  http://jackson.craigslist.org,134
  http://columbiamo.craigslist.org,222
  http://kansascity.craigslist.org,30
  http://springfield.craigslist.org,221
  http://stlouis.craigslist.org,29
  http://geo.craigslist.org/iso/us/mt,192
  http://lincoln.craigslist.org,282
  http://omaha.craigslist.org,55
  http://lasvegas.craigslist.org,26
  http://reno.craigslist.org,92
  http://geo.craigslist.org/iso/us/nh,198
  http://newjersey.craigslist.org,170
  http://southjersey.craigslist.org,286
  http://albuquerque.craigslist.org,50
  http://santafe.craigslist.org,218
  http://albany.craigslist.org,59
  http://binghamton.craigslist.org,248
  http://buffalo.craigslist.org,40
  http://hudsonvalley.craigslist.org,249
  http://ithaca.craigslist.org,201
  http://longisland.craigslist.org,250
  http://newyork.craigslist.org,3
  http://rochester.craigslist.org,126
  http://syracuse.craigslist.org,130
  http://utica.craigslist.org,247
  http://asheville.craigslist.org,171
  http://charlotte.craigslist.org,41
  http://fayetteville.craigslist.org,273
  http://greensboro.craigslist.org,61
  http://raleigh.craigslist.org,36
  http://wilmington.craigslist.org,274
  http://winstonsalem.craigslist.org,272
  http://geo.craigslist.org/iso/us/nd,196
  http://akroncanton.craigslist.org,251
  http://cincinnati.craigslist.org,35
  http://cleveland.craigslist.org,27
  http://columbus.craigslist.org,42
  http://dayton.craigslist.org,131
  http://toledo.craigslist.org,204
  http://youngstown.craigslist.org,252
  http://oklahomacity.craigslist.org,54
  http://tulsa.craigslist.org,70
  http://bend.craigslist.org,233
  http://eugene.craigslist.org,94
  http://medford.craigslist.org,216
  http://portland.craigslist.org,9
  http://salem.craigslist.org,232
  http://erie.craigslist.org,275
  http://harrisburg.craigslist.org,166
  http://lancaster.craigslist.org,279
  http://allentown.craigslist.org,167
  http://pennstate.craigslist.org,277
  http://philadelphia.craigslist.org,17
  http://pittsburgh.craigslist.org,33
  http://reading.craigslist.org,278
  http://scranton.craigslist.org,276
  http://geo.craigslist.org/iso/us/pr,180
  http://geo.craigslist.org/iso/us/ri,38
  http://charleston.craigslist.org,128
  http://columbia.craigslist.org,101
  http://greenville.craigslist.org,253
  http://myrtlebeach.craigslist.org,254
  http://geo.craigslist.org/iso/us/sd,195
  http://chattanooga.craigslist.org,220
  http://knoxville.craigslist.org,202
  http://memphis.craigslist.org,46
  http://nashville.craigslist.org,32
  http://amarillo.craigslist.org,269
  http://austin.craigslist.org,15
  http://beaumont.craigslist.org,264
  http://brownsville.craigslist.org,266
  http://corpuschristi.craigslist.org,265
  http://dallas.craigslist.org,21
  http://elpaso.craigslist.org,132
  http://houston.craigslist.org,23
  http://laredo.craigslist.org,271
  http://lubbock.craigslist.org,267
  http://mcallen.craigslist.org,263
  http://odessa.craigslist.org,268
  http://sanantonio.craigslist.org,53
  http://easttexas.craigslist.org,308
  http://waco.craigslist.org,270
  http://provo.craigslist.org,292
  http://saltlakecity.craigslist.org,56
  http://geo.craigslist.org/iso/us/vt,93
  http://blacksburg.craigslist.org,291
  http://charlottesville.craigslist.org,290
  http://norfolk.craigslist.org,48
  http://richmond.craigslist.org,60
  http://roanoke.craigslist.org,289
  http://bellingham.craigslist.org,217
  http://seattle.craigslist.org,2
  http://spokane.craigslist.org,95
  http://yakima.craigslist.org,246
  http://geo.craigslist.org/iso/us/wv,194
  http://appleton.craigslist.org,243
  http://eauclaire.craigslist.org,242
  http://greenbay.craigslist.org,241
  http://madison.craigslist.org,165
  http://milwaukee.craigslist.org,47
  http://geo.craigslist.org/iso/us/wy,197"""

  outfile = searchterm +'_search.html'
  searchfor =  re.compile(r'^<p>&nbsp;\w.*$',re.M)
  urls = cities.splitlines()
  txdata = None
  txheaders = {
      'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
      'Accept-Language': 'en-us',
      'Keep-Alive': '300',
      'Connection': 'keep-alive',
      'Cache-Control': 'max-age=0',
  }
  outarr = []
  for url in urls:
    url = url.strip()
    loc,area_id = url.split(',')
    section = '/cgi-bin/search?areaID='+area_id+'&subAreaID=&query='+searchterm+'&catAbbreviation=sss'
    url = loc.strip() + section.strip() 
    req = urllib2.Request(url, txdata, txheaders)
    u = urllib2.urlopen(req)
    headers = u.info()
    data = u.read()
    results = searchfor.findall(data)
    outarr += results
    print url
  outstuff = ('\n').join(outarr)
  outfile = open(outfile,'w')
  outfile.write(outstuff)
  outfile.close
  print 'ended on ', time.strftime('%X %x %Z')



if __name__ == "__main__":
    sys.exit(main())
Comments

Leave a comment

  1. John M about 22 hours later:

    Very cool script.

  2. Abdi over 2 years later:

    Nice script. For those looking for a quick way to search Craigslist, try my site: http://www.craigzoom.com

Comments