#!/usr/bin/python
# matt.joyce@gmail.com
# April 2007
#
# based on work by William K Turkel http://digitalhistoryhacks.blogspot.com
#
# 1 Load the pickled ASINs
# 2 Query Amazon
# 3 Compile a list of ASIN pairs
# 4 Pickle and save the list of pairs.


import xml.dom.minidom
import urllib
import pickle
import amazon

#file prefix 
filename='dhh'

# load the initial asin numbers
initial_asins=pickle.load(open(filename+"_asins.pik",'r'))
print len(initial_asins)

# list to store pairs
pairs=[]

# for each initall asin
for asin_I in initial_asins:
    #query amazon for similar titles
    amazonresult=AmazonAPI(asin_I,"SIMILAR")
    result = xml.dom.minidom.parse(amazonresult)
    #search the resulting items
    for item in result.getElementsByTagName('Item'):
	#find the asin numbers
	for asin in item.getElementsByTagName("ASIN"):
	    asin_S=str(asin.firstChild.nodeValue)
	    # create a tuple pair for each similar title (Original ASIN, Similar ASIN)
	    # add the pair the the list
	    pairs.append((asin_I, asin_S))
	    print asin_I, asin_S

# store the pairs
pickle.dump(pairs,open(filename+'_pairs.pik','w'))


