From e252bf4fce70c56b33294fdd095b0dc08ce8b95f Mon Sep 17 00:00:00 2001 From: lolcat Date: Sat, 29 Jun 2024 12:55:28 -0400 Subject: soundcloud fix, for good this time --- scraper/sc.php | 103 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 22 deletions(-) (limited to 'scraper/sc.php') diff --git a/scraper/sc.php b/scraper/sc.php index 86ea979..c435c78 100644 --- a/scraper/sc.php +++ b/scraper/sc.php @@ -6,6 +6,9 @@ class sc{ include "lib/backend.php"; $this->backend = new backend("sc"); + + include "lib/fuckhtml.php"; + $this->fuckhtml = new fuckhtml(); } public function getfilters($page){ @@ -25,7 +28,7 @@ class sc{ ]; } - private function get($proxy, $url, $get = []){ + private function get($proxy, $url, $get = [], $web_req = false){ $curlproc = curl_init(); @@ -37,19 +40,42 @@ class sc{ curl_setopt($curlproc, CURLOPT_URL, $url); curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding - curl_setopt($curlproc, CURLOPT_HTTPHEADER, - ["User-Agent: " . config::USER_AGENT, - "Accept: application/json, text/javascript, */*; q=0.01", - "Accept-Language: en-US,en;q=0.5", - "Accept-Encoding: gzip", - "Referer: https://soundcloud.com/", - "Origin: https://soundcloud.com", - "DNT: 1", - "Connection: keep-alive", - "Sec-Fetch-Dest: empty", - "Sec-Fetch-Mode: cors", - "Sec-Fetch-Site: same-site"] - ); + + // use http2 + curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); + + if($web_req === false){ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "Referer: https://soundcloud.com/", + "Origin: https://soundcloud.com", + "DNT: 1", + "Connection: keep-alive", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-site"] + ); + }else{ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Connection: keep-alive", + "Upgrade-Insecure-Requests: 1", + "Sec-Fetch-Dest: document", + "Sec-Fetch-Mode: navigate", + "Sec-Fetch-Site: cross-site", + "Priority: u=1", + "TE: trailers"] + ); + } curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); @@ -396,13 +422,47 @@ class sc{ $token = apcu_fetch("sc_token"); - if($token === false){ + if($token !== false){ + + return $token; + } + + // search through all javascript components on the main page + try{ + $html = + $this->get( + $proxy, + "https://soundcloud.com", + [] + ); + }catch(Exception $error){ + + throw new Exception("Failed to fetch front page"); + } + + $this->fuckhtml->load($html); + + $scripts = + $this->fuckhtml + ->getElementsByTagName( + "script" + ); + + foreach($scripts as $script){ + + if( + !isset($script["attributes"]["src"]) || + strpos($script["attributes"]["src"], "sndcdn.com") === false + ){ + + continue; + } try{ $js = $this->get( $proxy, - "https://a-v2.sndcdn.com/assets/0-a901c1e0.js", + $script["attributes"]["src"], [] ); }catch(Exception $error){ @@ -416,16 +476,15 @@ class sc{ $token ); - if(!isset($token[1])){ + if(isset($token[1])){ - throw new Exception("Failed to get search token"); + apcu_store("sc_token", $token[1]); + return $token[1]; + break; } - - apcu_store("sc_token", $token[1]); - return $token[1]; } - return $token; + throw new Exception("Did not find a Soundcloud token in the Javascript blobs"); } private function limitstrlen($text){ -- cgit v1.2.3 From 8161f8e7b8b236892a2254b8876fe4363b33050b Mon Sep 17 00:00:00 2001 From: lolcat Date: Sat, 29 Jun 2024 17:58:05 -0400 Subject: forgor to set headers --- scraper/sc.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'scraper/sc.php') diff --git a/scraper/sc.php b/scraper/sc.php index c435c78..011293a 100644 --- a/scraper/sc.php +++ b/scraper/sc.php @@ -57,7 +57,8 @@ class sc{ "Connection: keep-alive", "Sec-Fetch-Dest: empty", "Sec-Fetch-Mode: cors", - "Sec-Fetch-Site: same-site"] + "Sec-Fetch-Site: same-site", + "Priority: u=1"] ); }else{ @@ -433,7 +434,8 @@ class sc{ $this->get( $proxy, "https://soundcloud.com", - [] + [], + true ); }catch(Exception $error){ -- cgit v1.2.3 From 6df9d17ada64663c8bfb80baafc42dc951951218 Mon Sep 17 00:00:00 2001 From: lolcat Date: Sat, 29 Jun 2024 18:03:10 -0400 Subject: fixed soundcloud crash --- scraper/sc.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'scraper/sc.php') diff --git a/scraper/sc.php b/scraper/sc.php index 011293a..7083c42 100644 --- a/scraper/sc.php +++ b/scraper/sc.php @@ -327,9 +327,12 @@ class sc{ $description[] = $song["title"]; } - if(count($description) != 0){ + if(count($description) !== 0){ $description = trim($count . " songs. " . implode(", ", $description)); + }else{ + + $description = ""; } if( -- cgit v1.2.3