Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed similar apps parsing #54

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions play_scraper/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def search(query, page=None, detailed=False, hl='en', gl='us'):
return s.search(query, page, detailed)


def similar(app_id, detailed=False, hl='en', gl='us'):
def similar(app_id, detailed=True, hl='en', gl='us'):
"""Sends a GET request, follows the redirect, and retrieves a list of
applications similar to the specified app.

Expand All @@ -88,7 +88,7 @@ def similar(app_id, detailed=False, hl='en', gl='us'):
:return: a list of similar apps
"""
s = scraper.PlayScraper(hl, gl)
return s.similar(app_id)
return s.similar(app_id, detailed)


def categories(hl='en', gl='us', ignore_promotions=True):
Expand Down
14 changes: 8 additions & 6 deletions play_scraper/scraper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

import logging
import re
try:
from urllib import quote_plus
from urlparse import urljoin
Expand Down Expand Up @@ -54,15 +55,16 @@ def _parse_multiple_apps(self, list_response):
:param list_response: the Response object from a list request
:return: a list of app dictionaries
"""
list_strainer = SoupStrainer('span',
{'class': 'preview-overlay-container'})
list_strainer = SoupStrainer('a')
soup = BeautifulSoup(list_response.content,
'lxml',
from_encoding='utf8',
parse_only=list_strainer)
parse_only=list_strainer,
from_encoding='utf8')

# getting app_ids from href attributes of app links on the similar apps page
app_tags = soup.find_all(href=re.compile("\/store\/apps\/details\?id="))
app_ids = list(set([link.attrs["href"][23:] for link in app_tags])) # converting the resulting list to set to get only unique values and then vice versa as multi_futures_app_request requires a list

app_ids = [x.attrs['data-docid']
for x in soup.select('span.preview-overlay-container')]
return multi_futures_app_request(app_ids, params=self.params)

def details(self, app_id):
Expand Down