import os
from bs4 import BeautifulSoup
# listing all top websites in Indonesia | Part 1
def listing(url):
websites = []
os.system('curl -s {} > /tmp/listing.bounty'.format(url))
with open('/tmp/listing.bounty') as lines:
raw = lines.read().rstrip()
bsoup = BeautifulSoup(raw, 'html.parser')
# for item in bsoup.find_all('a'):
# print item
for item in bsoup.find_all('p', {'class':'desc-paragraph'}):
subs = BeautifulSoup(str(item), 'html.parser')
for a in subs.find_all('a'):
websites.append(a.get_text())
os.system('rm /tmp/*.bounty')
return websites
# every url have 25 website, so we iterate 4 times to get 100 top website on Indonesia
top_websites = []
for i in xrange(4):
url = 'http://www.alexa.com/topsites/countries\;{}/ID'.format(i)
top_websites += listing(url)
for i in xrange(len(top_websites)):
print '{:<4} {:<16}'.format(i,top_websites[i])