summaryrefslogtreecommitdiff
path: root/scraper/spotify.php
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/spotify.php')
-rw-r--r--scraper/spotify.php726
1 files changed, 726 insertions, 0 deletions
diff --git a/scraper/spotify.php b/scraper/spotify.php
new file mode 100644
index 0000000..79f61a6
--- /dev/null
+++ b/scraper/spotify.php
@@ -0,0 +1,726 @@
+<?php
+
+class spotify{
+
+ private const req_web = 0;
+ private const req_api = 1;
+ private const req_clientid = 2;
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("spotify");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "category" => [
+ "display" => "Category",
+ "option" => [
+ "any" => "All (no pagination)",
+ "audiobooks" => "Audiobooks",
+ "tracks" => "Songs",
+ "artists" => "Artists",
+ "playlists" => "Playlists",
+ "albums" => "Albums",
+ "podcastAndEpisodes" => "Podcasts & Shows (no pagination)",
+ "episodes" => "Episodes",
+ "users" => "Profiles"
+ ]
+ ]
+ ];
+ }
+
+ private function get($proxy, $url, $get = [], $reqtype = self::req_web, $bearer = null, $token = null){
+
+ $curlproc = curl_init();
+
+ switch($reqtype){
+
+ case self::req_api:
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: application/json",
+ "Accept-Language: en",
+ "app-platform: WebPlayer",
+ "authorization: Bearer {$bearer}",
+ "client-token: {$token}",
+ "content-type: application/json;charset=UTF-8",
+ "Origin: https://open.spotify.com",
+ "Referer: https://open.spotify.com/",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "spotify-app-version: 1.2.27.93.g7aee53d4",
+ "TE: trailers"
+ ];
+ break;
+
+ case self::req_web:
+ $headers = [
+ "User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site"
+ ];
+ break;
+
+ case self::req_clientid:
+ $get = json_encode($get);
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $get);
+
+ $headers = [
+ "User-Agent:" . config::USER_AGENT,
+ "Accept: application/json",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br",
+ "Referer: https://open.spotify.com/",
+ "content-type: application/json",
+ "Content-Length: " . strlen($get),
+ "Origin: https://open.spotify.com",
+ "DNT: 1",
+ "Sec-GPC: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site",
+ "TE: trailers"
+ ];
+ break;
+ }
+
+ if($reqtype !== self::req_clientid){
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function music($get){
+
+ $search = $get["s"];
+ $ip = $this->backend->get_ip();
+ $category = $get["category"];
+
+ /*
+ audiobooks first and second page decoded
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAudiobooks&variables={"searchTerm":"freddie+dredd","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"8758e540afdba5afa3c5246817f6bd31d86a15b3f5666c363dd017030f35d785"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAudiobooks&variables={"searchTerm":"freddie+dredd","offset":30,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"8758e540afdba5afa3c5246817f6bd31d86a15b3f5666c363dd017030f35d785"}}
+ */
+
+ /*
+ songs
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchTracks&variables={"searchTerm":"asmr","offset":0,"limit":100,"numberOfTopResults":20,"includeAudiobooks":false}&extensions={"persistedQuery":{"version":1,"sha256Hash":"16c02d6304f5f721fc2eb39dacf2361a4543815112506a9c05c9e0bc9733a679"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchTracks&variables={"searchTerm":"asmr","offset":100,"limit":100,"numberOfTopResults":20,"includeAudiobooks":false}&extensions={"persistedQuery":{"version":1,"sha256Hash":"16c02d6304f5f721fc2eb39dacf2361a4543815112506a9c05c9e0bc9733a679"}}
+ */
+
+ /*
+ artists
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":30,"limit":23,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchArtists&variables={"searchTerm":"asmr","offset":53,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"b8840daafdda9a9ceadb7c5774731f63f9eca100445d2d94665f2dc58b45e2b9"}}
+ */
+
+ /*
+ playlists
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchPlaylists&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"19b4143a0500ccec189ca0f4a0316bc2c615ecb51ce993ba4d7d08afd1d87aa4"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchPlaylists&variables={"searchTerm":"asmr","offset":30,"limit":3,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"19b4143a0500ccec189ca0f4a0316bc2c615ecb51ce993ba4d7d08afd1d87aa4"}}
+ */
+
+ /*
+ albums
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAlbums&variables={"searchTerm":"asmr","offset":33,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e93b13cda461482da2940467eb2beed9368e9bb2fff37df3fb6633fc61271a27"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchAlbums&variables={"searchTerm":"asmr","offset":33,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e93b13cda461482da2940467eb2beed9368e9bb2fff37df3fb6633fc61271a27"}}
+ */
+
+ /*
+ podcasts & shows (contains authors, no pagination)
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchFullEpisodes&variables={"searchTerm":"asmr","offset":0,"limit":30}&extensions={"persistedQuery":{"version":1,"sha256Hash":"9f996251c9781fabce63f1a9980b5287ea33bc5e8c8953d0c4689b09936067a1"}}
+ */
+
+ /*
+ episodes
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchDesktop&variables={"searchTerm":"asmr","offset":0,"limit":10,"numberOfTopResults":5,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"da03293d92a2cfc5e24597dcdc652c0ad135e1c64a78fddbf1478a7e096bea44"}}
+ ??? https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchFullEpisodes&variables={"searchTerm":"asmr","offset":60,"limit":30}&extensions={"persistedQuery":{"version":1,"sha256Hash":"9f996251c9781fabce63f1a9980b5287ea33bc5e8c8953d0c4689b09936067a1"}}
+ */
+
+ /*
+ profiles
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchUsers&variables={"searchTerm":"asmr","offset":0,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"02026f48ab5001894e598904079b620ebc64f2d53b55ca20c3858abd3a46c5fb"}}
+ https://api-partner.spotify.com/pathfinder/v1/query?operationName=searchUsers&variables={"searchTerm":"asmr","offset":30,"limit":30,"numberOfTopResults":20,"includeAudiobooks":true}&extensions={"persistedQuery":{"version":1,"sha256Hash":"02026f48ab5001894e598904079b620ebc64f2d53b55ca20c3858abd3a46c5fb"}}
+ */
+
+ // get HTML
+ try{
+
+ $html =
+ $this->get(
+ $ip,
+ "https://open.spotify.com/search/" .
+ rawurlencode($search) .
+ ($category != "any" ? "/" . $category : ""),
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get initial search page");
+ }
+
+ // grep bearer and client ID
+ $this->fuckhtml->load($html);
+
+ $script =
+ $this->fuckhtml
+ ->getElementById(
+ "session",
+ "script"
+ );
+
+ if($script === null){
+
+ throw new Exception("Failed to grep bearer token");
+ }
+
+ $script =
+ json_decode(
+ $script["innerHTML"],
+ true
+ );
+
+ $bearer = $script["accessToken"];
+ $client_id = $script["clientId"];
+
+ // hit client ID endpoint
+ try{
+
+ $token =
+ json_decode(
+ $this->get(
+ $ip,
+ "https://clienttoken.spotify.com/v1/clienttoken",
+ [ // !! that shit must be sent as json data
+ "client_data" => [
+ "client_id" => $client_id,
+ "client_version" => "1.2.27.93.g7aee53d4",
+ "js_sdk_data" => [
+ "device_brand" => "unknown",
+ "device_id" => "4c7ca20117ca12288ea8fc7118a9118c",
+ "device_model" => "unknown",
+ "device_name" => "computer",
+ "os" => "windows",
+ "os_version" => "NT 10.0"
+ ]
+ ]
+ ],
+ self::req_clientid
+ ),
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch token");
+ }
+
+ if($token === null){
+
+ throw new Exception("Failed to decode token");
+ }
+
+ $token = $token["granted_token"]["token"];
+
+ try{
+
+ switch($get["option"]){
+
+ case "any":
+ $variables = [
+ "searchTerm" => $search,
+ "offset" => 0,
+ "limit" => 10,
+ "numberOfTopResults" => 5,
+ "includeAudiobooks" => true
+ ];
+ break;
+
+ case "audiobooks":
+
+ break;
+ }
+
+ $payload =
+ $this->get(
+ $ip,
+ "https://api-partner.spotify.com/pathfinder/v1/query",
+ [
+ "operationName" => "searchDesktop",
+ "variables" =>
+ json_encode(
+ [
+ "searchTerm" => $search,
+ "offset" => 0,
+ "limit" => 10,
+ "numberOfTopResults" => 5,
+ "includeAudiobooks" => true
+ ]
+ ),
+ "extensions" =>
+ json_encode(
+ [
+ "persistedQuery" => [
+ "version" => 1,
+ "sha256Hash" => "21969b655b795601fb2d2204a4243188e75fdc6d3520e7b9cd3f4db2aff9591e" // ?
+ ]
+ ]
+ )
+ ],
+ self::req_api,
+ $bearer,
+ $token
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON results");
+ }
+
+ if($payload == "Token expired"){
+
+ throw new Exception("Grepped spotify token has expired");
+ }
+
+ $payload = json_decode($payload, true);
+
+ if($payload === null){
+
+ throw new Exception("Failed to decode JSON results");
+ }
+
+ //$payload = json_decode(file_get_contents("scraper/spotify.json"), true);
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "song" => [],
+ "playlist" => [],
+ "album" => [],
+ "podcast" => [],
+ "author" => [],
+ "user" => []
+ ];
+
+ // get songs
+ foreach($payload["data"]["searchV2"]["tracksV2"]["items"] as $result){
+
+ if(isset($result["item"])){
+
+ $result = $result["item"];
+ }
+
+ if(isset($result["data"])){
+
+ $result = $result["data"];
+ }
+
+ [$artist, $artist_link] = $this->get_artists($result["artists"]);
+
+ $out["song"][] = [
+ "title" => $result["name"],
+ "description" => null,
+ "url" => "https://open.spotify.com/track/" . $result["id"],
+ "views" => null,
+ "author" => [
+ "name" => $artist,
+ "url" => $artist_link,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($result["albumOfTrack"]["coverArt"]),
+ "date" => null,
+ "duration" => $result["duration"]["totalMilliseconds"] / 1000,
+ "stream" => [
+ "endpoint" => "spotify",
+ "url" => "track." . $result["id"]
+ ]
+ ];
+ }
+
+ // get playlists
+ foreach($payload["data"]["searchV2"]["playlists"]["items"] as $playlist){
+
+ if(isset($playlist["data"])){
+
+ $playlist = $playlist["data"];
+ }
+
+ $avatar = $this->get_thumb($playlist["ownerV2"]["data"]["avatar"]);
+
+ $out["playlist"][] = [
+ "title" => $playlist["name"],
+ "description" => null,
+ "author" => [
+ "name" => $playlist["ownerV2"]["data"]["name"],
+ "url" =>
+ "https://open.spotify.com/user/" .
+ explode(
+ ":",
+ $playlist["ownerV2"]["data"]["uri"],
+ 3
+ )[2],
+ "avatar" => $avatar["url"]
+ ],
+ "thumb" => $this->get_thumb($playlist["images"]["items"][0]),
+ "date" => null,
+ "duration" => null,
+ "url" =>
+ "https://open.spotify.com/playlist/" .
+ explode(
+ ":",
+ $playlist["uri"],
+ 3
+ )[2]
+ ];
+ }
+
+ // get albums
+ foreach($payload["data"]["searchV2"]["albums"]["items"] as $album){
+
+ if(isset($album["data"])){
+
+ $album = $album["data"];
+ }
+
+ [$artist, $artist_link] = $this->get_artists($album["artists"]);
+
+ $out["album"][] = [
+ "title" => $album["name"],
+ "description" => null,
+ "author" => [
+ "name" => $artist,
+ "url" => $artist_link,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($album["coverArt"]),
+ "date" => mktime(0, 0, 0, 0, 32, $album["date"]["year"]),
+ "duration" => null,
+ "url" =>
+ "https://open.spotify.com/album/" .
+ explode(
+ ":",
+ $album["uri"],
+ 3
+ )[2]
+ ];
+ }
+
+ // get podcasts
+ foreach($payload["data"]["searchV2"]["podcasts"]["items"] as $podcast){
+
+ if(isset($podcast["data"])){
+
+ $podcast = $podcast["data"];
+ }
+
+ $description = [];
+ foreach($podcast["topics"]["items"] as $subject){
+
+ $description[] = $subject["title"];
+ }
+
+ $description = implode(", ", $description);
+
+ if($description == ""){
+
+ $description = null;
+ }
+
+ $out["podcast"][] = [
+ "title" => $podcast["name"],
+ "description" => $description,
+ "author" => [
+ "name" => $podcast["publisher"]["name"],
+ "url" => null,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($podcast["coverArt"]),
+ "date" => null,
+ "duration" => null,
+ "url" =>
+ "https://open.spotify.com/show/" .
+ explode(
+ ":",
+ $podcast["uri"],
+ 3
+ )[2],
+ "stream" => [
+ "endpoint" => null,
+ "url" => null
+ ]
+ ];
+ }
+
+ // get audio books (put in podcasts)
+ foreach($payload["data"]["searchV2"]["audiobooks"]["items"] as $podcast){
+
+ if(isset($podcast["data"])){
+
+ $podcast = $podcast["data"];
+ }
+
+ $description = [];
+ foreach($podcast["topics"]["items"] as $subject){
+
+ $description[] = $subject["title"];
+ }
+
+ $description = implode(", ", $description);
+
+ if($description == ""){
+
+ $description = null;
+ }
+
+ $authors = [];
+ foreach($podcast["authors"] as $author){
+
+ $authors[] = $author["name"];
+ }
+
+ $authors = implode(", ", $authors);
+
+ if($authors == ""){
+
+ $authors = null;
+ }
+
+ $uri =
+ explode(
+ ":",
+ $podcast["uri"],
+ 3
+ )[2];
+
+ $out["podcast"][] = [
+ "title" => $podcast["name"],
+ "description" => $description,
+ "author" => [
+ "name" => $authors,
+ "url" => null,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($podcast["coverArt"]),
+ "date" => strtotime($podcast["publishDate"]["isoString"]),
+ "duration" => null,
+ "url" => "https://open.spotify.com/show/" . $uri,
+ "stream" => [
+ "endpoint" => "spotify",
+ "url" => "episode." . $uri
+ ]
+ ];
+ }
+
+ // get episodes (and place them in podcasts)
+ foreach($payload["data"]["searchV2"]["episodes"]["items"] as $podcast){
+
+ if(isset($podcast["data"])){
+
+ $podcast = $podcast["data"];
+ }
+
+ $out["podcast"][] = [
+ "title" => $podcast["name"],
+ "description" => $this->limitstrlen($podcast["description"]),
+ "author" => [
+ "name" =>
+ isset(
+ $podcast["podcastV2"]["data"]["publisher"]["name"]
+ ) ?
+ $podcast["podcastV2"]["data"]["publisher"]["name"]
+ : null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "thumb" => $this->get_thumb($podcast["coverArt"]),
+ "date" => strtotime($podcast["releaseDate"]["isoString"]),
+ "duration" => $podcast["duration"]["totalMilliseconds"] / 1000,
+ "url" =>
+ "https://open.spotify.com/show/" .
+ explode(
+ ":",
+ $podcast["uri"],
+ 3
+ )[2],
+ "stream" => [
+ "endpoint" => null,
+ "url" => null
+ ]
+ ];
+ }
+
+ // get authors
+ foreach($payload["data"]["searchV2"]["artists"]["items"] as $user){
+
+ if(isset($user["data"])){
+
+ $user = $user["data"];
+ }
+
+ $avatar = $this->get_thumb($user["visuals"]["avatarImage"]);
+
+ $out["author"][] = [
+ "title" =>
+ (
+ $user["profile"]["verified"] === true ?
+ "✓ " : ""
+ ) .
+ $user["profile"]["name"],
+ "followers" => null,
+ "description" => null,
+ "thumb" => $avatar,
+ "url" =>
+ "https://open.spotify.com/artist/" .
+ explode(
+ ":",
+ $user["uri"],
+ 3
+ )[2]
+ ];
+ }
+
+ // get users
+ foreach($payload["data"]["searchV2"]["users"]["items"] as $user){
+
+ if(isset($user["data"])){
+
+ $user = $user["data"];
+ }
+
+ $avatar = $this->get_thumb($user["avatar"]);
+
+ $out["user"][] = [
+ "title" => $user["displayName"] . " (@{$user["id"]})",
+ "followers" => null,
+ "description" => null,
+ "thumb" => $avatar,
+ "url" => "https://open.spotify.com/user/" . $user["id"]
+ ];
+ }
+
+ return $out;
+ }
+
+ private function get_artists($artists){
+
+ $artist_out = [];
+
+ foreach($artists["items"] as $artist){
+
+ $artist_out[] = $artist["profile"]["name"];
+ }
+
+ $artist_out =
+ implode(", ", $artist_out);
+
+ if($artist_out == ""){
+
+ return [null, null];
+ }
+
+ $artist_link =
+ $artist === null ?
+ null :
+ "https://open.spotify.com/artist/" .
+ explode(
+ ":",
+ $artists["items"][0]["uri"]
+ )[2];
+
+ return [$artist_out, $artist_link];
+ }
+
+ private function get_thumb($cover){
+
+ $thumb_out = null;
+
+ if($cover !== null){
+ foreach($cover["sources"] as $thumb){
+
+ if(
+ $thumb_out === null ||
+ (int)$thumb["width"] > $thumb_out["width"]
+ ){
+
+ $thumb_out = $thumb;
+ }
+ }
+ }
+
+ if($thumb_out === null){
+
+ return [
+ "url" => null,
+ "ratio" => null
+ ];
+ }else{
+
+ return [
+ "url" => $thumb_out["url"],
+ "ratio" => "1:1"
+ ];
+ }
+ }
+
+ private function limitstrlen($text){
+
+ return
+ explode(
+ "\n",
+ wordwrap(
+ str_replace(
+ ["\n\r", "\r\n", "\n", "\r"],
+ " ",
+ $text
+ ),
+ 300,
+ "\n"
+ ),
+ 2
+ )[0];
+ }
+}