diff options
-rw-r--r-- | .gitignore | 28 | ||||
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | api.txt | 29 | ||||
-rw-r--r-- | api/v1/music.php | 26 | ||||
-rw-r--r-- | audio_sc.php | 223 | ||||
-rw-r--r-- | lib/curlproxy.php | 8 | ||||
-rw-r--r-- | lib/frontend.php | 28 | ||||
-rw-r--r-- | music.php | 224 | ||||
-rw-r--r-- | scraper/sc.php | 397 | ||||
-rw-r--r-- | scraper/yandex.php | 19 | ||||
-rw-r--r-- | settings.php | 12 | ||||
-rw-r--r-- | static/style.css | 7 | ||||
-rw-r--r-- | template/header.html | 2 | ||||
-rw-r--r-- | template/home.html | 2 |
14 files changed, 989 insertions, 19 deletions
@@ -1,2 +1,28 @@ +<<<<<<< HEAD +lib/test.html +lib/postdata.json +lib/nextpage.json +scraper/brave.html +scraper/yandex.json +scraper/marginalia.json +banner_og/ +scraper/mojeek.html +scraper/google.html +scraper/google-img.html +scraper/google-video.html +scraper/google-news.html +scraper/google-img-nextpage.html +scraper/brave-image.html +scraper/brave-video.html +scraper/facebook.html +scraper/facebook-nextpage.json +scraper/yandex-video.json +scraper/yandex.html +scraper/soundcloud.json +scraper/mp3-pm.html banner/* -!banner/*default*
\ No newline at end of file +!banner/*default* +======= +banner/* +!banner/*default* +>>>>>>> 77293818cd213ec0ad07c573d298fff9cd5b357d @@ -34,6 +34,9 @@ https://4get.ca - Google - Mojeek +5. Music + - SoundCloud + More scrapers are coming soon. I currently want to add Hackernews, Qwant and find a way to scrape Yandex web without those fucking captchas. A shopping, music and files tab is also in my todo list. # Setup @@ -242,6 +242,21 @@ the endpoint above. ++ /api/v1/music + Each entry under "song" contains a array index called "stream" that + looks like this :: + + endpoint: audio_sc + url: https://api-v2.soundcloud <...> + + + When the endpoint is "audio_sc", you MUST use 4get's audio_sc + endpoint, for example, if you want an audio stream back. Otherwise, + you are free to handle the json+m3u8 crap yourself. If the endpoint + is equal to "audio", that URL SHOULD return a valid HTTP audio + stream, and using the "audio" endpoint becomes optional again. + + + /favicon Get the favicon for a website. The only parameter is "s", and must include the protocol. @@ -284,6 +299,20 @@ The parameter is "s" for the audio link. ++ /audio_sc + Get a proxied audio file for SoundCloud. Does not support downloads + trough WGET or CURL, since it returns 30kb~160kb "206 Partial + Content" parts, due to technical limitations that comes with + converting m3u8 playlists to seekable audio files. If you use this + endpoint, you must support these 206 codes and also handle the + initial 302 HTTP redirect. I used this method as I didn't want to + store information about your request needlessly. This method also + allows noJS users to access the files. + + The parameter is "s" for the SoundCloud JSON m3u8 abomination. It + does not support "normal" SoundCloud URLs at this time. + + + Appendix If you have any questions or need clarifications, please send an email my way to will at lolcat.ca diff --git a/api/v1/music.php b/api/v1/music.php new file mode 100644 index 0000000..faf2d96 --- /dev/null +++ b/api/v1/music.php @@ -0,0 +1,26 @@ +<?php + +header("Content-Type: application/json"); + +chdir("../../"); + +include "lib/frontend.php"; +$frontend = new frontend(); + +[$scraper, $filters] = $frontend->getscraperfilters( + "music", + isset($_GET["scraper"]) ? $_GET["scraper"] : null +); + +$get = $frontend->parsegetfilters($_GET, $filters); + +try{ + echo json_encode( + $scraper->music($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES + ); + +}catch(Exception $e){ + + echo json_encode(["status" => $e->getMessage()]); +} diff --git a/audio_sc.php b/audio_sc.php new file mode 100644 index 0000000..9a227e3 --- /dev/null +++ b/audio_sc.php @@ -0,0 +1,223 @@ +<?php + +new sc_audio(); + +class sc_audio{ + + public function __construct(){ + + include "lib/curlproxy.php"; + $this->proxy = new proxy(); + + if(isset($_GET["u"])){ + + /* + we're now proxying audio + */ + $viewkey = $_GET["u"]; + + if(!isset($_GET["r"])){ + + $this->do404("Ranges(r) are missing"); + } + + $ranges = explode(",", $_GET["r"]); + + // sanitize ranges + foreach($ranges as &$range){ + + if(!is_numeric($range)){ + + $this->do404("Invalid range specified"); + } + + $range = (int)$range; + } + + // sort ranges (just to make sure) + sort($ranges); + + // convert ranges to pairs + $last = -1; + foreach($ranges as &$r){ + + $tmp = $r; + $r = [$last + 1, $r]; + + $last = $tmp; + } + + $browser_headers = getallheaders(); + + // get the requested range from client + $client_range = 0; + foreach($browser_headers as $key => $value){ + + if(strtolower($key) == "range"){ + + preg_match( + '/bytes=([0-9]+)/', + $value, + $client_regex + ); + + if(isset($client_regex[1])){ + + $client_range = (int)$client_regex[1]; + }else{ + + $client_range = 0; + } + break; + } + } + + if( + $client_range < 0 || + $client_range > $ranges[count($ranges) - 1][1] + ){ + + // range is not satisfiable + http_response_code(416); + header("Content-Type: text/plain"); + die(); + } + + $rng = null; + for($i=0; $i<count($ranges); $i++){ + + if($ranges[$i][0] <= $client_range){ + + $rng = $ranges[$i]; + } + } + + // proxy data! + http_response_code(206); // partial content + header("Accept-Ranges: bytes"); + header("Content-Range: bytes {$rng[0]}-{$rng[1]}/" . ($ranges[count($ranges) - 1][1] + 1)); + + $viewkey = + preg_replace( + '/\/media\/([0-9]+)\/[0-9]+\/[0-9]+/', + '/media/$1/' . $rng[0] . '/' . $rng[1], + $viewkey + ); + + try{ + + $this->proxy->stream_linear_audio( + $viewkey + ); + }catch(Exception $error){ + + $this->do404("Could not read stream"); + } + + die(); + } + + /* + redirect user to correct resource + we need to scrape and store the byte positions in the result URL + */ + if(!isset($_GET["s"])){ + + $this->do404("The URL(s) parameter is missing"); + } + + $viewkey = $_GET["s"]; + + if( + preg_match( + '/soundcloud\.com$/', + parse_url($viewkey, PHP_URL_HOST) + ) === false + ){ + + $this->do404("This endpoint can only be used for soundcloud streams"); + } + + try{ + + $json = $this->proxy->get($viewkey)["body"]; + }catch(Exception $error){ + + $this->do404("Curl error: " . $error->getMessage()); + } + + $json = json_decode($json, true); + + if(!isset($json["url"])){ + + $this->do404("Could not get URL from JSON"); + } + + $viewkey = $json["url"]; + + $m3u8 = $this->proxy->get($viewkey)["body"]; + + $m3u8 = explode("\n", $m3u8); + + $lineout = null; + $streampos_arr = []; + foreach($m3u8 as $line){ + + $line = trim($line); + if($line[0] == "#"){ + + continue; + } + + if($lineout === null){ + $lineout = $line; + } + + preg_match( + '/\/media\/[0-9]+\/([0-9]+)\/([0-9]+)/', + $line, + $matches + ); + + if(isset($matches[0])){ + + $streampos_arr[] = [ + (int)$matches[1], + (int)$matches[2] + ]; + } + } + + if($lineout === null){ + + $this->do404("Could not get stream URL"); + } + + $lineout = + preg_replace( + '/\/media\/([0-9]+)\/[0-9]+\/[0-9]+/', + '/media/$1/0/0', + $lineout + ); + + $streampos = []; + + foreach($streampos_arr as $pos){ + + $streampos[] = $pos[1]; + } + + $streampos = implode(",", $streampos); + + header("Location: audio_sc?u=" . urlencode($lineout) . "&r=$streampos"); + header("Accept-Ranges: bytes"); + } + + private function do404($error){ + + http_response_code(404); + header("Content-Type: text/plain"); + header("X-Error: $error"); + die(); + } +} diff --git a/lib/curlproxy.php b/lib/curlproxy.php index 846fbb7..7d4ca5d 100644 --- a/lib/curlproxy.php +++ b/lib/curlproxy.php @@ -150,7 +150,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate", @@ -178,7 +178,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Accept: image/avif,image/webp,*/*", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate", @@ -380,7 +380,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Accept: image/avif,image/webp,*/*", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate, br", @@ -396,7 +396,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", + "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", "Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate, br", diff --git a/lib/frontend.php b/lib/frontend.php index 9350230..665f600 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -98,7 +98,7 @@ class frontend{ ]); } - public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true){ + public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true, $customhtml = null){ $payload = '<div class="text-result">'; @@ -187,7 +187,9 @@ class frontend{ $this->highlighttext($keywords, $site["description"]) . '</div>'; } - + + $payload .= $customhtml; + $payload .= '</a>'; /* @@ -764,6 +766,7 @@ class frontend{ '<a href="https://webcache.googleusercontent.com/search?q=cache:' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://google.com" alt="go">Google cache</a>' . '<a href="https://web.archive.org/web/' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.org" alt="ar">Archive.org</a>' . '<a href="https://archive.is/newest/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.is" alt="ar">Archive.is</a>' . + '<a href="https://ghostarchive.org/search?term=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://ghostarchive.org" alt="gh">Ghostarchive</a>' . '<a href="https://www.bing.com/search?q=url%3A' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://bing.com" alt="bi">Bing cache</a>' . '<a href="https://megalodon.jp/?url=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://megalodon.jp" alt="me">Megalodon</a>' . '</div>'; @@ -835,6 +838,10 @@ class frontend{ case "news": $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null; break; + + case "music": + $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null; + break; } if( @@ -923,6 +930,14 @@ class frontend{ "mojeek" => "Mojeek" ] ]; + + case "music": + $filters["scraper"] = [ + "display" => "Scraper", + "option" => [ + "sc" => "SoundCloud" + ] + ]; break; } @@ -994,6 +1009,11 @@ class frontend{ include "scraper/wiby.php"; $lib = new wiby(); break; + + case "sc": + include "scraper/sc.php"; + $lib = new sc(); + break; } // set scraper on $_GET @@ -1169,7 +1189,7 @@ class frontend{ $html = null; - foreach(["web", "images", "videos", "news"] as $type){ + foreach(["web", "images", "videos", "news", "music"] as $type){ $html .= '<a href="/' . $type . '?s=' . urlencode($query); @@ -1303,7 +1323,7 @@ class frontend{ return htmlspecialchars($image); } - return "/proxy?i=" . urlencode($image) . "&s=" . $format; + return "/proxy.php?i=" . urlencode($image) . "&s=" . $format; } public function htmlnextpage($gets, $npt, $page){ diff --git a/music.php b/music.php new file mode 100644 index 0000000..61078b9 --- /dev/null +++ b/music.php @@ -0,0 +1,224 @@ +<?php + +/* + Initialize random shit +*/ +include "lib/frontend.php"; +$frontend = new frontend(); + +[$scraper, $filters] = $frontend->getscraperfilters("music"); + +$get = $frontend->parsegetfilters($_GET, $filters); + +$frontend->loadheader( + $get, + $filters, + "music" +); + +$payload = [ + "class" => "", + "right-left" => "", + "right-right" => "", + "left" => "" +]; + +try{ + $results = $scraper->music($get); + +}catch(Exception $error){ + + echo + $frontend->drawerror( + "Shit", + 'This scraper returned an error:' . + '<div class="code">' . htmlspecialchars($error->getMessage()) . '</div>' . + 'Things you can try:' . + '<ul>' . + '<li>Use a different scraper</li>' . + '<li>Remove keywords that could cause errors</li>' . + '<li>Use another 4get instance</li>' . + '</ul><br>' . + 'If the error persists, please <a href="/about">contact the administrator</a>.' + ); + die(); +} + +$categories = [ + "song" => "", + "author" => "", + "playlist" => "" +]; + +/* + Set the main container +*/ +$main = null; + +if(count($results["song"]) !== 0){ + + $main = "song"; + +}elseif(count($results["author"]) !== 0){ + + $main = "author"; + +}elseif(count($results["playlist"]) !== 0){ + + $main = "playlist"; + +}else{ + + // No results found! + echo + $frontend->drawerror( + "Nobody here but us chickens!", + 'Have you tried:' . + '<ul>' . + '<li>Using a different scraper</li>' . + '<li>Using fewer keywords</li>' . + '<li>Defining broader filters (Is NSFW turned off?)</li>' . + '</ul>' . + '</div>' + ); + die(); +} + +/* + Generate list of songs +*/ +foreach($categories as $name => $data){ + + foreach($results[$name] as $item){ + + $greentext = []; + + if( + isset($item["date"]) && + $item["date"] !== null + ){ + + $greentext[] = date("jS M y @ g:ia", $item["date"]); + } + + if( + isset($item["views"]) && + $item["views"] !== null + ){ + + $views = number_format($item["views"]) . " views"; + $greentext[] = $views; + } + + if( + isset($item["followers"]) && + $item["followers"] !== null + ){ + + $greentext[] = number_format($item["followers"]) . " followers"; + } + + if( + isset($item["author"]["name"]) && + $item["author"]["name"] !== null + ){ + + $greentext[] = $item["author"]["name"]; + } + + $greentext = implode(" • ", $greentext); + + if( + isset($item["duration"]) && + $item["duration"] !== null + ){ + + $duration = $frontend->s_to_timestamp($item["duration"]); + }else{ + + $duration = null; + } + + $tabindex = $name == $main ? true : false; + + $customhtml = null; + + if( + $name == "song" && + $item["stream"]["endpoint"] !== null + ){ + + $customhtml = + '<audio src="' . $item["stream"]["endpoint"] . '?s=' . urlencode($item["stream"]["url"]) . '" controls autostart="false" preload="none">'; + } + + $categories[$name] .= $frontend->drawtextresult($item, $greentext, $duration, $get["s"], $tabindex, $customhtml); + } +} + +$payload["left"] = $categories[$main]; + +// dont re-draw the category +unset($categories[$main]); + +/* + Populate right handside +*/ + +$i = 1; +foreach($categories as $name => $value){ + + if($value == ""){ + + continue; + } + + if($i % 2 === 1){ + + $write = "right-left"; + }else{ + + $write = "right-right"; + } + + $payload[$write] .= + '<div class="answer-wrapper">' . + '<input id="answer' . $i . '" class="spoiler" type="checkbox">' . + '<div class="answer">' . + '<div class="answer-title">' . + '<a class="answer-title" href="?s=' . urlencode($get["s"]); + + switch($name){ + + case "playlist": + $payload[$write] .= + '&type=playlist"><h2>Playlists</h2></a>'; + break; + + case "author": + $payload[$write] .= + '&type=people"><h2>Authors</h2></a>'; + break; + } + + $payload[$write] .= + '</div>' . + $categories[$name] . + '</div>' . + '<label class="spoiler-button" for="answer' . $i . '"></label></div>'; + + $i++; +} + +if($i !== 1){ + + $payload["class"] = " has-answer"; +} + +if($results["npt"] !== null){ + + $payload["left"] .= + '<a href="' . $frontend->htmlnextpage($get, $results["npt"], "music") . '" class="nextpage">Next page ></a>'; +} + +echo $frontend->load("search.html", $payload); diff --git a/scraper/sc.php b/scraper/sc.php new file mode 100644 index 0000000..88d4dab --- /dev/null +++ b/scraper/sc.php @@ -0,0 +1,397 @@ +<?php + +class sc{ + + public function __construct(){ + + include "lib/nextpage.php"; + $this->nextpage = new nextpage("sc"); + } + + public function getfilters($page){ + + return [ + "type" => [ + "display" => "Type", + "option" => [ + "any" => "Any type", + "track" => "Tracks", + "people" => "People", + "album" => "Albums", + "playlist" => "Playlists", + "goplus" => "Go+ Tracks" + ] + ] + ]; + } + + private function get($url, $get = []){ + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", + "Accept: application/json, text/javascript, */*; q=0.01", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "Referer: https://soundcloud.com/", + "Origin: https://soundcloud.com", + "DNT: 1", + "Connection: keep-alive", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-site"] + ); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function music($get){ + + if($get["npt"]){ + + $params = $this->nextpage->get($get["npt"], "music"); + $params = json_decode($params, true); + + $url = $params["url"]; + unset($params["url"]); + + }else{ + + // normal search: + // https://api-v2.soundcloud.com/search?q=freddie%20dredd&variant_ids=&facet=model&user_id=351062-302234-707916-795081&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en + + // soundcloud go+ search: + // https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&filter.content_tier=SUB_HIGH_TIER&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en + + // tracks search: + // https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en + + // users search: + // https://api-v2.soundcloud.com/search/users?q=freddie%20dredd&variant_ids=&facet=place&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en + + // albums search: + // https://api-v2.soundcloud.com/search/albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en + + // playlists search: + // https://api-v2.soundcloud.com/search/playlists_without_albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en + + $search = $get["s"]; + $type = $get["type"]; + + switch($type){ + + case "any": + $url = "https://api-v2.soundcloud.com/search"; + $params = [ + "q" => $search, + "variant_ids" => "", + "facet" => "model", + "user_id" => "351062-302234-707916-795081", + "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E", + "limit" => 20, + "offset" => 0, + "linked_partitioning" => 1, + "app_version" => 1693487844, + "app_locale" => "en" + ]; + break; + + case "track": + $url = "https://api-v2.soundcloud.com/search/tracks"; + $params = [ + "q" => $search, + "variant_ids" => "", + "facet_genre" => "", + "user_id" => "351062-302234-707916-795081", + "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E", + "limit" => 20, + "offset" => 0, + "linked_partitioning" => 1, + "app_version" => 1693487844, + "app_locale" => "en" + ]; + break; + + case "people": + $url = "https://api-v2.soundcloud.com/search/users"; + $params = [ + "q" => $search, + "variant_ids" => "", + "facet" => "place", + "user_id" => "351062-302234-707916-795081", + "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E", + "limit" => 20, + "offset" => 0, + "linked_partitioning" => 1, + "app_version" => 1693487844, + "app_locale" => "en" + ]; + break; + + case "album": + $url = "https://api-v2.soundcloud.com/search/albums"; + $params = [ + "q" => $search, + "variant_ids" => "", + "facet" => "genre", + "user_id" => "351062-302234-707916-795081", + "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E", + "limit" => 20, + "offset" => 0, + "linked_partitioning" => 1, + "app_version" => 1693487844, + "app_locale" => "en" + ]; + break; + + case "playlist": + $url = "https://api-v2.soundcloud.com/search/playlists_without_albums"; + $params = [ + "q" => $search, + "variant_ids" => "", + "facet" => "genre", + "user_id" => "351062-302234-707916-795081", + "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E", + "limit" => 20, + "offset" => 0, + "linked_partitioning" => 1, + "app_version" => 1693487844, + "app_locale" => "en" + ]; + break; + + case "goplus": + $url = "https://api-v2.soundcloud.com/search/tracks"; + $params = [ + "q" => $search, + "variant_ids" => "", + "filter.content_tier" => "SUB_HIGH_TIER", + "facet" => "genre", + "user_id" => "351062-302234-707916-795081", + "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E", + "limit" => 20, + "offset" => 0, + "linked_partitioning" => 1, + "app_version" => 1693487844, + "app_locale" => "en" + ]; + break; + } + } + + try{ + + $json = $this->get($url, $params); + + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + } + + /* + $handle = fopen("scraper/soundcloud.json", "r"); + $json = fread($handle, filesize("scraper/soundcloud.json")); + fclose($handle); + */ + + $json = json_decode($json, true); + + if($json === null){ + + throw new Exception("Failed to decode JSON"); + } + + $out = [ + "status" => "ok", + "npt" => null, + "song" => [], + "playlist" => [], + "author" => [] + ]; + + /* + Get next page + */ + if(isset($json["next_href"])){ + + $params["query_urn"] = $json["query_urn"]; + $params["offset"] = $params["offset"] + 20; + $params["url"] = $url; // we will remove this later + + $out["npt"] = + $this->nextpage->store( + json_encode($params), + "music" + ); + } + + /* + Scrape items + */ + foreach($json["collection"] as $item){ + + switch($item["kind"]){ + + case "user": + // parse author + $out["author"][] = [ + "title" => $item["username"], + "followers" => $item["followers_count"], + "description" => $item["track_count"] . " songs. " . $this->limitstrlen($item["description"]), + "thumb" => [ + "url" => $item["avatar_url"], + "ratio" => "1:1" + ], + "url" => $item["permalink_url"] + ]; + break; + + case "playlist": + // parse playlist + $description = []; + $count = 0; + + foreach($item["tracks"] as $song){ + + $count++; + + if(!isset($song["title"])){ + + continue; + } + + $description[] = $song["title"]; + } + + if(count($description) != 0){ + + $description = $count . " songs. " . implode(", ", $description); + } + + if( + isset($item["artwork_url"]) && + !empty($item["artwork_url"]) + ){ + + $thumb = [ + "ratio" => "1:1", + "url" => $item["artwork_url"] + ]; + + }elseif( + isset($item["tracks"][0]["artwork_url"]) && + !empty($item["tracks"][0]["artwork_url"]) + ){ + + $thumb = [ + "ratio" => "1:1", + "url" => $item["tracks"][0]["artwork_url"] + ]; + }else{ + + $thumb = [ + "ratio" => null, + "url" => null + ]; + } + + $out["playlist"][] = [ + "title" => $item["title"], + "description" => $description, + "author" => [ + "name" => $item["user"]["username"], + "url" => $item["user"]["permalink_url"], + "avatar" => $item["user"]["avatar_url"] + ], + "thumb" => $thumb, + "date" => strtotime($item["created_at"]), + "duration" => $item["duration"] / 1000, + "url" => $item["permalink_url"] + ]; + break; + + case "track": + if(stripos($item["monetization_model"], "TIER") === false){ + + $stream = [ + "endpoint" => "audio_sc", + "url" => + $item["media"]["transcodings"][0]["url"] . + "?client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E" . + "&track_authorization=" . + $item["track_authorization"] + ]; + }else{ + + $stream = [ + "endpoint" => null, + "url" => null + ]; + } + + // parse track + $out["song"][] = [ + "title" => $item["title"], + "description" => $item["description"] == "" ? null : $this->limitstrlen($item["description"]), + "url" => $item["permalink_url"], + "views" => $item["playback_count"], + "author" => [ + "name" => $item["user"]["username"], + "url" => $item["user"]["permalink_url"], + "avatar" => $item["user"]["avatar_url"] + ], + "thumb" => [ + "ratio" => "1:1", + "url" => $item["artwork_url"] + ], + "date" => strtotime($item["created_at"]), + "duration" => (int)$item["full_duration"] / 1000, + "stream" => $stream + ]; + break; + } + } + + return $out; + } + + private function limitstrlen($text){ + + return + explode( + "\n", + wordwrap( + str_replace( + "\n", + " ", + $text + ), + 300, + "\n" + ) + )[0]; + } +} diff --git a/scraper/yandex.php b/scraper/yandex.php index 8cb733e..65abe73 100644 --- a/scraper/yandex.php +++ b/scraper/yandex.php @@ -959,6 +959,7 @@ class yandex{ "img" ); + $c = 1; if(count($thumb) === 0){ $thumb = [ @@ -967,7 +968,6 @@ class yandex{ ]; }else{ - $c = 1; $thumb = [ "url" => str_replace( @@ -1065,12 +1065,17 @@ class yandex{ "views" => $views, "thumb" => $thumb, "url" => - $this->fuckhtml - ->getTextContent( - $data["counters"] - ["toHostingLoaded"] - ["postfix"] - ["href"] + str_replace( + "http://", + "https://", + $this->fuckhtml + ->getTextContent( + $data["counters"] + ["toHostingLoaded"] + ["postfix"] + ["href"] + ), + $c ) ]; } diff --git a/settings.php b/settings.php index c968e57..f6abb12 100644 --- a/settings.php +++ b/settings.php @@ -161,6 +161,16 @@ $settings = [ "text" => "Mojeek" ] ] + ], + [ + "description" => "Music", + "parameter" => "scraper_music", + "options" => [ + [ + "value" => "sc", + "text" => "SoundCloud" + ] + ] ] ] ] @@ -235,7 +245,7 @@ echo '<head>' . '<meta http-equiv="Content-Type" content="text/html;charset=utf-8">' . '<title>Settings</title>' . - '<link rel="stylesheet" href="/static/style.css">' . + '<link rel="stylesheet" href="/static/style.css?v3">' . '<meta name="viewport" content="width=device-width,initial-scale=1">' . '<meta name="robots" content="index,follow">' . '<link rel="icon" type="image/x-icon" href="/favicon.ico">' . diff --git a/static/style.css b/static/style.css index e5044b1..ee320a7 100644 --- a/static/style.css +++ b/static/style.css @@ -51,6 +51,13 @@ audio{ max-width:100%; display:block; +} + +.left audio{ + margin-top:7px; +} + +.right-wrapper audio{ margin-bottom:17px; } diff --git a/template/header.html b/template/header.html index 2633521..b687d27 100644 --- a/template/header.html +++ b/template/header.html @@ -3,7 +3,7 @@ <head> <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> <title>{%title%}</title> - <link rel="stylesheet" href="/static/style.css?v2"> + <link rel="stylesheet" href="/static/style.css?v3"> <meta name="viewport" content="width=device-width,initial-scale=1"> <meta name="robots" content="{%index%}index,{%index%}follow"> <link rel="icon" type="image/x-icon" href="/favicon.ico"> diff --git a/template/home.html b/template/home.html index 4e32ebc..7982734 100644 --- a/template/home.html +++ b/template/home.html @@ -4,7 +4,7 @@ <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> <title>4get</title> <meta name="viewport" content="width=device-width,initial-scale=1"> - <link rel="stylesheet" href="/static/style.css?v2"> + <link rel="stylesheet" href="/static/style.css?v3"> <meta name="robots" content="index,follow"> <link rel="icon" type="image/x-icon" href="/favicon.ico"> <meta name="description" content="4get.ca: They live in our walls!"> |