From c8ab934b108c83a391583ea8ae7f717c9753926e Mon Sep 17 00:00:00 2001 From: lolcat Date: Sun, 3 Sep 2023 22:41:44 -0400 Subject: soundcloud lolllllll --- .gitignore | 23 +++ api.txt | 29 ++++ api/v1/music.php | 26 ++++ audio_sc.php | 223 +++++++++++++++++++++++++++++ lib/curlproxy.php | 8 +- lib/frontend.php | 28 +++- music.php | 224 +++++++++++++++++++++++++++++ scraper/sc.php | 397 +++++++++++++++++++++++++++++++++++++++++++++++++++ scraper/yandex.php | 19 ++- settings.php | 12 +- static/style.css | 7 + template/header.html | 2 +- template/home.html | 2 +- 13 files changed, 982 insertions(+), 18 deletions(-) create mode 100644 .gitignore create mode 100644 api/v1/music.php create mode 100644 audio_sc.php create mode 100644 music.php create mode 100644 scraper/sc.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2930bc8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,23 @@ +lib/test.html +lib/postdata.json +lib/nextpage.json +scraper/brave.html +scraper/yandex.json +scraper/marginalia.json +banner_og/ +scraper/mojeek.html +scraper/google.html +scraper/google-img.html +scraper/google-video.html +scraper/google-news.html +scraper/google-img-nextpage.html +scraper/brave-image.html +scraper/brave-video.html +scraper/facebook.html +scraper/facebook-nextpage.json +scraper/yandex-video.json +scraper/yandex.html +scraper/soundcloud.json +scraper/mp3-pm.html +banner/* +!banner/*default* diff --git a/api.txt b/api.txt index 3b45e91..40b0ed3 100644 --- a/api.txt +++ b/api.txt @@ -242,6 +242,21 @@ the endpoint above. ++ /api/v1/music + Each entry under "song" contains a array index called "stream" that + looks like this :: + + endpoint: audio_sc + url: https://api-v2.soundcloud <...> + + + When the endpoint is "audio_sc", you MUST use 4get's audio_sc + endpoint, for example, if you want an audio stream back. Otherwise, + you are free to handle the json+m3u8 crap yourself. If the endpoint + is equal to "audio", that URL SHOULD return a valid HTTP audio + stream, and using the "audio" endpoint becomes optional again. + + + /favicon Get the favicon for a website. The only parameter is "s", and must include the protocol. @@ -284,6 +299,20 @@ The parameter is "s" for the audio link. ++ /audio_sc + Get a proxied audio file for SoundCloud. Does not support downloads + trough WGET or CURL, since it returns 30kb~160kb "206 Partial + Content" parts, due to technical limitations that comes with + converting m3u8 playlists to seekable audio files. If you use this + endpoint, you must support these 206 codes and also handle the + initial 302 HTTP redirect. I used this method as I didn't want to + store information about your request needlessly. This method also + allows noJS users to access the files. + + The parameter is "s" for the SoundCloud JSON m3u8 abomination. It + does not support "normal" SoundCloud URLs at this time. + + + Appendix If you have any questions or need clarifications, please send an email my way to will at lolcat.ca diff --git a/api/v1/music.php b/api/v1/music.php new file mode 100644 index 0000000..faf2d96 --- /dev/null +++ b/api/v1/music.php @@ -0,0 +1,26 @@ +getscraperfilters( + "music", + isset($_GET["scraper"]) ? $_GET["scraper"] : null +); + +$get = $frontend->parsegetfilters($_GET, $filters); + +try{ + echo json_encode( + $scraper->music($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES + ); + +}catch(Exception $e){ + + echo json_encode(["status" => $e->getMessage()]); +} diff --git a/audio_sc.php b/audio_sc.php new file mode 100644 index 0000000..9a227e3 --- /dev/null +++ b/audio_sc.php @@ -0,0 +1,223 @@ +proxy = new proxy(); + + if(isset($_GET["u"])){ + + /* + we're now proxying audio + */ + $viewkey = $_GET["u"]; + + if(!isset($_GET["r"])){ + + $this->do404("Ranges(r) are missing"); + } + + $ranges = explode(",", $_GET["r"]); + + // sanitize ranges + foreach($ranges as &$range){ + + if(!is_numeric($range)){ + + $this->do404("Invalid range specified"); + } + + $range = (int)$range; + } + + // sort ranges (just to make sure) + sort($ranges); + + // convert ranges to pairs + $last = -1; + foreach($ranges as &$r){ + + $tmp = $r; + $r = [$last + 1, $r]; + + $last = $tmp; + } + + $browser_headers = getallheaders(); + + // get the requested range from client + $client_range = 0; + foreach($browser_headers as $key => $value){ + + if(strtolower($key) == "range"){ + + preg_match( + '/bytes=([0-9]+)/', + $value, + $client_regex + ); + + if(isset($client_regex[1])){ + + $client_range = (int)$client_regex[1]; + }else{ + + $client_range = 0; + } + break; + } + } + + if( + $client_range < 0 || + $client_range > $ranges[count($ranges) - 1][1] + ){ + + // range is not satisfiable + http_response_code(416); + header("Content-Type: text/plain"); + die(); + } + + $rng = null; + for($i=0; $iproxy->stream_linear_audio( + $viewkey + ); + }catch(Exception $error){ + + $this->do404("Could not read stream"); + } + + die(); + } + + /* + redirect user to correct resource + we need to scrape and store the byte positions in the result URL + */ + if(!isset($_GET["s"])){ + + $this->do404("The URL(s) parameter is missing"); + } + + $viewkey = $_GET["s"]; + + if( + preg_match( + '/soundcloud\.com$/', + parse_url($viewkey, PHP_URL_HOST) + ) === false + ){ + + $this->do404("This endpoint can only be used for soundcloud streams"); + } + + try{ + + $json = $this->proxy->get($viewkey)["body"]; + }catch(Exception $error){ + + $this->do404("Curl error: " . $error->getMessage()); + } + + $json = json_decode($json, true); + + if(!isset($json["url"])){ + + $this->do404("Could not get URL from JSON"); + } + + $viewkey = $json["url"]; + + $m3u8 = $this->proxy->get($viewkey)["body"]; + + $m3u8 = explode("\n", $m3u8); + + $lineout = null; + $streampos_arr = []; + foreach($m3u8 as $line){ + + $line = trim($line); + if($line[0] == "#"){ + + continue; + } + + if($lineout === null){ + $lineout = $line; + } + + preg_match( + '/\/media\/[0-9]+\/([0-9]+)\/([0-9]+)/', + $line, + $matches + ); + + if(isset($matches[0])){ + + $streampos_arr[] = [ + (int)$matches[1], + (int)$matches[2] + ]; + } + } + + if($lineout === null){ + + $this->do404("Could not get stream URL"); + } + + $lineout = + preg_replace( + '/\/media\/([0-9]+)\/[0-9]+\/[0-9]+/', + '/media/$1/0/0', + $lineout + ); + + $streampos = []; + + foreach($streampos_arr as $pos){ + + $streampos[] = $pos[1]; + } + + $streampos = implode(",", $streampos); + + header("Location: audio_sc?u=" . urlencode($lineout) . "&r=$streampos"); + header("Accept-Ranges: bytes"); + } + + private function do404($error){ + + http_response_code(404); + header("Content-Type: text/plain"); + header("X-Error: $error"); + die(); + } +} diff --git a/lib/curlproxy.php b/lib/curlproxy.php index 846fbb7..7d4ca5d 100644 --- a/lib/curlproxy.php +++ b/lib/curlproxy.php @@ -150,7 +150,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate", @@ -178,7 +178,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Accept: image/avif,image/webp,*/*", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate", @@ -380,7 +380,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Accept: image/avif,image/webp,*/*", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate, br", @@ -396,7 +396,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate, br", diff --git a/lib/frontend.php b/lib/frontend.php index 9350230..665f600 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -98,7 +98,7 @@ class frontend{ ]); } - public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true){ + public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true, $customhtml = null){ $payload = '
'; @@ -187,7 +187,9 @@ class frontend{ $this->highlighttext($keywords, $site["description"]) . '
'; } - + + $payload .= $customhtml; + $payload .= ''; /* @@ -764,6 +766,7 @@ class frontend{ 'goGoogle cache' . 'arArchive.org' . 'arArchive.is' . + 'ghGhostarchive' . 'biBing cache' . 'meMegalodon' . ''; @@ -835,6 +838,10 @@ class frontend{ case "news": $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null; break; + + case "music": + $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null; + break; } if( @@ -923,6 +930,14 @@ class frontend{ "mojeek" => "Mojeek" ] ]; + + case "music": + $filters["scraper"] = [ + "display" => "Scraper", + "option" => [ + "sc" => "SoundCloud" + ] + ]; break; } @@ -994,6 +1009,11 @@ class frontend{ include "scraper/wiby.php"; $lib = new wiby(); break; + + case "sc": + include "scraper/sc.php"; + $lib = new sc(); + break; } // set scraper on $_GET @@ -1169,7 +1189,7 @@ class frontend{ $html = null; - foreach(["web", "images", "videos", "news"] as $type){ + foreach(["web", "images", "videos", "news", "music"] as $type){ $html .= 'getscraperfilters("music"); + +$get = $frontend->parsegetfilters($_GET, $filters); + +$frontend->loadheader( + $get, + $filters, + "music" +); + +$payload = [ + "class" => "", + "right-left" => "", + "right-right" => "", + "left" => "" +]; + +try{ + $results = $scraper->music($get); + +}catch(Exception $error){ + + echo + $frontend->drawerror( + "Shit", + 'This scraper returned an error:' . + '
' . htmlspecialchars($error->getMessage()) . '
' . + 'Things you can try:' . + '
    ' . + '
  • Use a different scraper
  • ' . + '
  • Remove keywords that could cause errors
  • ' . + '
  • Use another 4get instance
  • ' . + '

' . + 'If the error persists, please
contact the administrator.' + ); + die(); +} + +$categories = [ + "song" => "", + "author" => "", + "playlist" => "" +]; + +/* + Set the main container +*/ +$main = null; + +if(count($results["song"]) !== 0){ + + $main = "song"; + +}elseif(count($results["author"]) !== 0){ + + $main = "author"; + +}elseif(count($results["playlist"]) !== 0){ + + $main = "playlist"; + +}else{ + + // No results found! + echo + $frontend->drawerror( + "Nobody here but us chickens!", + 'Have you tried:' . + '' . + '' + ); + die(); +} + +/* + Generate list of songs +*/ +foreach($categories as $name => $data){ + + foreach($results[$name] as $item){ + + $greentext = []; + + if( + isset($item["date"]) && + $item["date"] !== null + ){ + + $greentext[] = date("jS M y @ g:ia", $item["date"]); + } + + if( + isset($item["views"]) && + $item["views"] !== null + ){ + + $views = number_format($item["views"]) . " views"; + $greentext[] = $views; + } + + if( + isset($item["followers"]) && + $item["followers"] !== null + ){ + + $greentext[] = number_format($item["followers"]) . " followers"; + } + + if( + isset($item["author"]["name"]) && + $item["author"]["name"] !== null + ){ + + $greentext[] = $item["author"]["name"]; + } + + $greentext = implode(" • ", $greentext); + + if( + isset($item["duration"]) && + $item["duration"] !== null + ){ + + $duration = $frontend->s_to_timestamp($item["duration"]); + }else{ + + $duration = null; + } + + $tabindex = $name == $main ? true : false; + + $customhtml = null; + + if( + $name == "song" && + $item["stream"]["endpoint"] !== null + ){ + + $customhtml = + '