import os
from bs4 import BeautifulSoup

# listing all top websites in Indonesia | Part 1
def listing(url):
    websites = []
    os.system('curl -s {} > /tmp/listing.bounty'.format(url))
    with open('/tmp/listing.bounty') as lines:
        raw = lines.read().rstrip()

    bsoup = BeautifulSoup(raw, 'html.parser')
    # for item in bsoup.find_all('a'):
    #     print item

    for item in bsoup.find_all('p', {'class':'desc-paragraph'}):
        subs = BeautifulSoup(str(item), 'html.parser')
        for a in subs.find_all('a'):
            websites.append(a.get_text())
    os.system('rm /tmp/*.bounty')
    return websites

# every url have 25 website, so we iterate 4 times to get 100 top website on Indonesia
top_websites = []
for i in xrange(4):
    url = 'http://www.alexa.com/topsites/countries\;{}/ID'.format(i)
    top_websites += listing(url)

for i in xrange(len(top_websites)):
    print '{:<4} {:<16}'.format(i,top_websites[i])

results matching ""

    No results matching ""