93 lines
3.5 KiB
Python
93 lines
3.5 KiB
Python
![]() |
import re
|
||
|
from common.models import SourceSiteEnum
|
||
|
from games.models import Game
|
||
|
from games.forms import GameForm
|
||
|
from common.scraper import *
|
||
|
from common.scrapers.igdb import IgdbGameScraper
|
||
|
|
||
|
|
||
|
class SteamGameScraper(AbstractScraper):
|
||
|
site_name = SourceSiteEnum.STEAM.value
|
||
|
host = 'store.steampowered.com'
|
||
|
data_class = Game
|
||
|
form_class = GameForm
|
||
|
|
||
|
regex = re.compile(r"https://store\.steampowered\.com/app/\d+")
|
||
|
|
||
|
def scrape(self, url):
|
||
|
m = self.regex.match(url)
|
||
|
if m:
|
||
|
effective_url = m[0]
|
||
|
else:
|
||
|
raise ValueError("not valid url")
|
||
|
try:
|
||
|
s = IgdbGameScraper()
|
||
|
s.scrape_steam(effective_url)
|
||
|
self.raw_data = s.raw_data
|
||
|
self.raw_img = s.raw_img
|
||
|
self.img_ext = s.img_ext
|
||
|
self.raw_data['source_site'] = self.site_name
|
||
|
self.raw_data['source_url'] = effective_url
|
||
|
# return self.raw_data, self.raw_img
|
||
|
except:
|
||
|
self.raw_img = None
|
||
|
self.raw_data = {}
|
||
|
headers = DEFAULT_REQUEST_HEADERS.copy()
|
||
|
headers['Host'] = self.host
|
||
|
headers['Cookie'] = "wants_mature_content=1; birthtime=754700401;"
|
||
|
content = self.download_page(url, headers)
|
||
|
|
||
|
title = content.xpath("//div[@class='apphub_AppName']/text()")[0]
|
||
|
developer = content.xpath("//div[@id='developers_list']/a/text()")
|
||
|
publisher = content.xpath("//div[@class='glance_ctn']//div[@class='dev_row'][2]//a/text()")
|
||
|
release_date = parse_date(
|
||
|
content.xpath(
|
||
|
"//div[@class='release_date']/div[@class='date']/text()")[0]
|
||
|
)
|
||
|
|
||
|
genre = content.xpath(
|
||
|
"//div[@class='details_block']/b[2]/following-sibling::a/text()")
|
||
|
|
||
|
platform = ['PC']
|
||
|
|
||
|
brief = content.xpath(
|
||
|
"//div[@class='game_description_snippet']/text()")[0].strip()
|
||
|
|
||
|
img_url = content.xpath(
|
||
|
"//img[@class='game_header_image_full']/@src"
|
||
|
)[0].replace("header.jpg", "library_600x900.jpg")
|
||
|
raw_img, img_ext = self.download_image(img_url, url)
|
||
|
|
||
|
# no 600x900 picture
|
||
|
if raw_img is None:
|
||
|
img_url = content.xpath("//img[@class='game_header_image_full']/@src")[0]
|
||
|
raw_img, img_ext = self.download_image(img_url, url)
|
||
|
|
||
|
if raw_img is not None:
|
||
|
self.raw_img = raw_img
|
||
|
self.img_ext = img_ext
|
||
|
|
||
|
data = {
|
||
|
'title': title if title else self.raw_data['title'],
|
||
|
'other_title': None,
|
||
|
'developer': developer if 'developer' not in self.raw_data else self.raw_data['developer'],
|
||
|
'publisher': publisher if 'publisher' not in self.raw_data else self.raw_data['publisher'],
|
||
|
'release_date': release_date if 'release_date' not in self.raw_data else self.raw_data['release_date'],
|
||
|
'genre': genre if 'genre' not in self.raw_data else self.raw_data['genre'],
|
||
|
'platform': platform if 'platform' not in self.raw_data else self.raw_data['platform'],
|
||
|
'brief': brief if brief else self.raw_data['brief'],
|
||
|
'other_info': None if 'other_info' not in self.raw_data else self.raw_data['other_info'],
|
||
|
'source_site': self.site_name,
|
||
|
'source_url': effective_url
|
||
|
}
|
||
|
self.raw_data = data
|
||
|
return self.raw_data, self.raw_img
|
||
|
|
||
|
@classmethod
|
||
|
def get_effective_url(cls, raw_url):
|
||
|
m = cls.regex.match(raw_url)
|
||
|
if m:
|
||
|
return m[0]
|
||
|
else:
|
||
|
return None
|