summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore28
-rw-r--r--README.md3
-rw-r--r--api.txt29
-rw-r--r--api/v1/music.php26
-rw-r--r--audio_sc.php223
-rw-r--r--lib/curlproxy.php8
-rw-r--r--lib/frontend.php28
-rw-r--r--music.php224
-rw-r--r--scraper/sc.php397
-rw-r--r--scraper/yandex.php19
-rw-r--r--settings.php12
-rw-r--r--static/style.css7
-rw-r--r--template/header.html2
-rw-r--r--template/home.html2
14 files changed, 989 insertions, 19 deletions
diff --git a/.gitignore b/.gitignore
index 126df62..7498cf4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,28 @@
+<<<<<<< HEAD
+lib/test.html
+lib/postdata.json
+lib/nextpage.json
+scraper/brave.html
+scraper/yandex.json
+scraper/marginalia.json
+banner_og/
+scraper/mojeek.html
+scraper/google.html
+scraper/google-img.html
+scraper/google-video.html
+scraper/google-news.html
+scraper/google-img-nextpage.html
+scraper/brave-image.html
+scraper/brave-video.html
+scraper/facebook.html
+scraper/facebook-nextpage.json
+scraper/yandex-video.json
+scraper/yandex.html
+scraper/soundcloud.json
+scraper/mp3-pm.html
banner/*
-!banner/*default* \ No newline at end of file
+!banner/*default*
+=======
+banner/*
+!banner/*default*
+>>>>>>> 77293818cd213ec0ad07c573d298fff9cd5b357d
diff --git a/README.md b/README.md
index 88024cf..f4d9b3d 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,9 @@ https://4get.ca
- Google
- Mojeek
+5. Music
+ - SoundCloud
+
More scrapers are coming soon. I currently want to add Hackernews, Qwant and find a way to scrape Yandex web without those fucking captchas. A shopping, music and files tab is also in my todo list.
# Setup
diff --git a/api.txt b/api.txt
index 3b45e91..40b0ed3 100644
--- a/api.txt
+++ b/api.txt
@@ -242,6 +242,21 @@
the endpoint above.
++ /api/v1/music
+ Each entry under "song" contains a array index called "stream" that
+ looks like this ::
+
+ endpoint: audio_sc
+ url: https://api-v2.soundcloud <...>
+
+
+ When the endpoint is "audio_sc", you MUST use 4get's audio_sc
+ endpoint, for example, if you want an audio stream back. Otherwise,
+ you are free to handle the json+m3u8 crap yourself. If the endpoint
+ is equal to "audio", that URL SHOULD return a valid HTTP audio
+ stream, and using the "audio" endpoint becomes optional again.
+
+
+ /favicon
Get the favicon for a website. The only parameter is "s", and must
include the protocol.
@@ -284,6 +299,20 @@
The parameter is "s" for the audio link.
++ /audio_sc
+ Get a proxied audio file for SoundCloud. Does not support downloads
+ trough WGET or CURL, since it returns 30kb~160kb "206 Partial
+ Content" parts, due to technical limitations that comes with
+ converting m3u8 playlists to seekable audio files. If you use this
+ endpoint, you must support these 206 codes and also handle the
+ initial 302 HTTP redirect. I used this method as I didn't want to
+ store information about your request needlessly. This method also
+ allows noJS users to access the files.
+
+ The parameter is "s" for the SoundCloud JSON m3u8 abomination. It
+ does not support "normal" SoundCloud URLs at this time.
+
+
+ Appendix
If you have any questions or need clarifications, please send an
email my way to will at lolcat.ca
diff --git a/api/v1/music.php b/api/v1/music.php
new file mode 100644
index 0000000..faf2d96
--- /dev/null
+++ b/api/v1/music.php
@@ -0,0 +1,26 @@
+<?php
+
+header("Content-Type: application/json");
+
+chdir("../../");
+
+include "lib/frontend.php";
+$frontend = new frontend();
+
+[$scraper, $filters] = $frontend->getscraperfilters(
+ "music",
+ isset($_GET["scraper"]) ? $_GET["scraper"] : null
+);
+
+$get = $frontend->parsegetfilters($_GET, $filters);
+
+try{
+ echo json_encode(
+ $scraper->music($get),
+ JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES
+ );
+
+}catch(Exception $e){
+
+ echo json_encode(["status" => $e->getMessage()]);
+}
diff --git a/audio_sc.php b/audio_sc.php
new file mode 100644
index 0000000..9a227e3
--- /dev/null
+++ b/audio_sc.php
@@ -0,0 +1,223 @@
+<?php
+
+new sc_audio();
+
+class sc_audio{
+
+ public function __construct(){
+
+ include "lib/curlproxy.php";
+ $this->proxy = new proxy();
+
+ if(isset($_GET["u"])){
+
+ /*
+ we're now proxying audio
+ */
+ $viewkey = $_GET["u"];
+
+ if(!isset($_GET["r"])){
+
+ $this->do404("Ranges(r) are missing");
+ }
+
+ $ranges = explode(",", $_GET["r"]);
+
+ // sanitize ranges
+ foreach($ranges as &$range){
+
+ if(!is_numeric($range)){
+
+ $this->do404("Invalid range specified");
+ }
+
+ $range = (int)$range;
+ }
+
+ // sort ranges (just to make sure)
+ sort($ranges);
+
+ // convert ranges to pairs
+ $last = -1;
+ foreach($ranges as &$r){
+
+ $tmp = $r;
+ $r = [$last + 1, $r];
+
+ $last = $tmp;
+ }
+
+ $browser_headers = getallheaders();
+
+ // get the requested range from client
+ $client_range = 0;
+ foreach($browser_headers as $key => $value){
+
+ if(strtolower($key) == "range"){
+
+ preg_match(
+ '/bytes=([0-9]+)/',
+ $value,
+ $client_regex
+ );
+
+ if(isset($client_regex[1])){
+
+ $client_range = (int)$client_regex[1];
+ }else{
+
+ $client_range = 0;
+ }
+ break;
+ }
+ }
+
+ if(
+ $client_range < 0 ||
+ $client_range > $ranges[count($ranges) - 1][1]
+ ){
+
+ // range is not satisfiable
+ http_response_code(416);
+ header("Content-Type: text/plain");
+ die();
+ }
+
+ $rng = null;
+ for($i=0; $i<count($ranges); $i++){
+
+ if($ranges[$i][0] <= $client_range){
+
+ $rng = $ranges[$i];
+ }
+ }
+
+ // proxy data!
+ http_response_code(206); // partial content
+ header("Accept-Ranges: bytes");
+ header("Content-Range: bytes {$rng[0]}-{$rng[1]}/" . ($ranges[count($ranges) - 1][1] + 1));
+
+ $viewkey =
+ preg_replace(
+ '/\/media\/([0-9]+)\/[0-9]+\/[0-9]+/',
+ '/media/$1/' . $rng[0] . '/' . $rng[1],
+ $viewkey
+ );
+
+ try{
+
+ $this->proxy->stream_linear_audio(
+ $viewkey
+ );
+ }catch(Exception $error){
+
+ $this->do404("Could not read stream");
+ }
+
+ die();
+ }
+
+ /*
+ redirect user to correct resource
+ we need to scrape and store the byte positions in the result URL
+ */
+ if(!isset($_GET["s"])){
+
+ $this->do404("The URL(s) parameter is missing");
+ }
+
+ $viewkey = $_GET["s"];
+
+ if(
+ preg_match(
+ '/soundcloud\.com$/',
+ parse_url($viewkey, PHP_URL_HOST)
+ ) === false
+ ){
+
+ $this->do404("This endpoint can only be used for soundcloud streams");
+ }
+
+ try{
+
+ $json = $this->proxy->get($viewkey)["body"];
+ }catch(Exception $error){
+
+ $this->do404("Curl error: " . $error->getMessage());
+ }
+
+ $json = json_decode($json, true);
+
+ if(!isset($json["url"])){
+
+ $this->do404("Could not get URL from JSON");
+ }
+
+ $viewkey = $json["url"];
+
+ $m3u8 = $this->proxy->get($viewkey)["body"];
+
+ $m3u8 = explode("\n", $m3u8);
+
+ $lineout = null;
+ $streampos_arr = [];
+ foreach($m3u8 as $line){
+
+ $line = trim($line);
+ if($line[0] == "#"){
+
+ continue;
+ }
+
+ if($lineout === null){
+ $lineout = $line;
+ }
+
+ preg_match(
+ '/\/media\/[0-9]+\/([0-9]+)\/([0-9]+)/',
+ $line,
+ $matches
+ );
+
+ if(isset($matches[0])){
+
+ $streampos_arr[] = [
+ (int)$matches[1],
+ (int)$matches[2]
+ ];
+ }
+ }
+
+ if($lineout === null){
+
+ $this->do404("Could not get stream URL");
+ }
+
+ $lineout =
+ preg_replace(
+ '/\/media\/([0-9]+)\/[0-9]+\/[0-9]+/',
+ '/media/$1/0/0',
+ $lineout
+ );
+
+ $streampos = [];
+
+ foreach($streampos_arr as $pos){
+
+ $streampos[] = $pos[1];
+ }
+
+ $streampos = implode(",", $streampos);
+
+ header("Location: audio_sc?u=" . urlencode($lineout) . "&r=$streampos");
+ header("Accept-Ranges: bytes");
+ }
+
+ private function do404($error){
+
+ http_response_code(404);
+ header("Content-Type: text/plain");
+ header("X-Error: $error");
+ die();
+ }
+}
diff --git a/lib/curlproxy.php b/lib/curlproxy.php
index 846fbb7..7d4ca5d 100644
--- a/lib/curlproxy.php
+++ b/lib/curlproxy.php
@@ -150,7 +150,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
@@ -178,7 +178,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0",
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
@@ -380,7 +380,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
@@ -396,7 +396,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
"Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
diff --git a/lib/frontend.php b/lib/frontend.php
index 9350230..665f600 100644
--- a/lib/frontend.php
+++ b/lib/frontend.php
@@ -98,7 +98,7 @@ class frontend{
]);
}
- public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true){
+ public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true, $customhtml = null){
$payload =
'<div class="text-result">';
@@ -187,7 +187,9 @@ class frontend{
$this->highlighttext($keywords, $site["description"]) .
'</div>';
}
-
+
+ $payload .= $customhtml;
+
$payload .= '</a>';
/*
@@ -764,6 +766,7 @@ class frontend{
'<a href="https://webcache.googleusercontent.com/search?q=cache:' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://google.com" alt="go">Google cache</a>' .
'<a href="https://web.archive.org/web/' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.org" alt="ar">Archive.org</a>' .
'<a href="https://archive.is/newest/' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=https://archive.is" alt="ar">Archive.is</a>' .
+ '<a href="https://ghostarchive.org/search?term=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://ghostarchive.org" alt="gh">Ghostarchive</a>' .
'<a href="https://www.bing.com/search?q=url%3A' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://bing.com" alt="bi">Bing cache</a>' .
'<a href="https://megalodon.jp/?url=' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=https://megalodon.jp" alt="me">Megalodon</a>' .
'</div>';
@@ -835,6 +838,10 @@ class frontend{
case "news":
$get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null;
break;
+
+ case "music":
+ $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null;
+ break;
}
if(
@@ -923,6 +930,14 @@ class frontend{
"mojeek" => "Mojeek"
]
];
+
+ case "music":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "sc" => "SoundCloud"
+ ]
+ ];
break;
}
@@ -994,6 +1009,11 @@ class frontend{
include "scraper/wiby.php";
$lib = new wiby();
break;
+
+ case "sc":
+ include "scraper/sc.php";
+ $lib = new sc();
+ break;
}
// set scraper on $_GET
@@ -1169,7 +1189,7 @@ class frontend{
$html = null;
- foreach(["web", "images", "videos", "news"] as $type){
+ foreach(["web", "images", "videos", "news", "music"] as $type){
$html .= '<a href="/' . $type . '?s=' . urlencode($query);
@@ -1303,7 +1323,7 @@ class frontend{
return htmlspecialchars($image);
}
- return "/proxy?i=" . urlencode($image) . "&s=" . $format;
+ return "/proxy.php?i=" . urlencode($image) . "&s=" . $format;
}
public function htmlnextpage($gets, $npt, $page){
diff --git a/music.php b/music.php
new file mode 100644
index 0000000..61078b9
--- /dev/null
+++ b/music.php
@@ -0,0 +1,224 @@
+<?php
+
+/*
+ Initialize random shit
+*/
+include "lib/frontend.php";
+$frontend = new frontend();
+
+[$scraper, $filters] = $frontend->getscraperfilters("music");
+
+$get = $frontend->parsegetfilters($_GET, $filters);
+
+$frontend->loadheader(
+ $get,
+ $filters,
+ "music"
+);
+
+$payload = [
+ "class" => "",
+ "right-left" => "",
+ "right-right" => "",
+ "left" => ""
+];
+
+try{
+ $results = $scraper->music($get);
+
+}catch(Exception $error){
+
+ echo
+ $frontend->drawerror(
+ "Shit",
+ 'This scraper returned an error:' .
+ '<div class="code">' . htmlspecialchars($error->getMessage()) . '</div>' .
+ 'Things you can try:' .
+ '<ul>' .
+ '<li>Use a different scraper</li>' .
+ '<li>Remove keywords that could cause errors</li>' .
+ '<li>Use another 4get instance</li>' .
+ '</ul><br>' .
+ 'If the error persists, please <a href="/about">contact the administrator</a>.'
+ );
+ die();
+}
+
+$categories = [
+ "song" => "",
+ "author" => "",
+ "playlist" => ""
+];
+
+/*
+ Set the main container
+*/
+$main = null;
+
+if(count($results["song"]) !== 0){
+
+ $main = "song";
+
+}elseif(count($results["author"]) !== 0){
+
+ $main = "author";
+
+}elseif(count($results["playlist"]) !== 0){
+
+ $main = "playlist";
+
+}else{
+
+ // No results found!
+ echo
+ $frontend->drawerror(
+ "Nobody here but us chickens!",
+ 'Have you tried:' .
+ '<ul>' .
+ '<li>Using a different scraper</li>' .
+ '<li>Using fewer keywords</li>' .
+ '<li>Defining broader filters (Is NSFW turned off?)</li>' .
+ '</ul>' .
+ '</div>'
+ );
+ die();
+}
+
+/*
+ Generate list of songs
+*/
+foreach($categories as $name => $data){
+
+ foreach($results[$name] as $item){
+
+ $greentext = [];
+
+ if(
+ isset($item["date"]) &&
+ $item["date"] !== null
+ ){
+
+ $greentext[] = date("jS M y @ g:ia", $item["date"]);
+ }
+
+ if(
+ isset($item["views"]) &&
+ $item["views"] !== null
+ ){
+
+ $views = number_format($item["views"]) . " views";
+ $greentext[] = $views;
+ }
+
+ if(
+ isset($item["followers"]) &&
+ $item["followers"] !== null
+ ){
+
+ $greentext[] = number_format($item["followers"]) . " followers";
+ }
+
+ if(
+ isset($item["author"]["name"]) &&
+ $item["author"]["name"] !== null
+ ){
+
+ $greentext[] = $item["author"]["name"];
+ }
+
+ $greentext = implode(" • ", $greentext);
+
+ if(
+ isset($item["duration"]) &&
+ $item["duration"] !== null
+ ){
+
+ $duration = $frontend->s_to_timestamp($item["duration"]);
+ }else{
+
+ $duration = null;
+ }
+
+ $tabindex = $name == $main ? true : false;
+
+ $customhtml = null;
+
+ if(
+ $name == "song" &&
+ $item["stream"]["endpoint"] !== null
+ ){
+
+ $customhtml =
+ '<audio src="' . $item["stream"]["endpoint"] . '?s=' . urlencode($item["stream"]["url"]) . '" controls autostart="false" preload="none">';
+ }
+
+ $categories[$name] .= $frontend->drawtextresult($item, $greentext, $duration, $get["s"], $tabindex, $customhtml);
+ }
+}
+
+$payload["left"] = $categories[$main];
+
+// dont re-draw the category
+unset($categories[$main]);
+
+/*
+ Populate right handside
+*/
+
+$i = 1;
+foreach($categories as $name => $value){
+
+ if($value == ""){
+
+ continue;
+ }
+
+ if($i % 2 === 1){
+
+ $write = "right-left";
+ }else{
+
+ $write = "right-right";
+ }
+
+ $payload[$write] .=
+ '<div class="answer-wrapper">' .
+ '<input id="answer' . $i . '" class="spoiler" type="checkbox">' .
+ '<div class="answer">' .
+ '<div class="answer-title">' .
+ '<a class="answer-title" href="?s=' . urlencode($get["s"]);
+
+ switch($name){
+
+ case "playlist":
+ $payload[$write] .=
+ '&type=playlist"><h2>Playlists</h2></a>';
+ break;
+
+ case "author":
+ $payload[$write] .=
+ '&type=people"><h2>Authors</h2></a>';
+ break;
+ }
+
+ $payload[$write] .=
+ '</div>' .
+ $categories[$name] .
+ '</div>' .
+ '<label class="spoiler-button" for="answer' . $i . '"></label></div>';
+
+ $i++;
+}
+
+if($i !== 1){
+
+ $payload["class"] = " has-answer";
+}
+
+if($results["npt"] !== null){
+
+ $payload["left"] .=
+ '<a href="' . $frontend->htmlnextpage($get, $results["npt"], "music") . '" class="nextpage">Next page &gt;</a>';
+}
+
+echo $frontend->load("search.html", $payload);
diff --git a/scraper/sc.php b/scraper/sc.php
new file mode 100644
index 0000000..88d4dab
--- /dev/null
+++ b/scraper/sc.php
@@ -0,0 +1,397 @@
+<?php
+
+class sc{
+
+ public function __construct(){
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("sc");
+ }
+
+ public function getfilters($page){
+
+ return [
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "Any type",
+ "track" => "Tracks",
+ "people" => "People",
+ "album" => "Albums",
+ "playlist" => "Playlists",
+ "goplus" => "Go+ Tracks"
+ ]
+ ]
+ ];
+ }
+
+ private function get($url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
+ "Accept: application/json, text/javascript, */*; q=0.01",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Referer: https://soundcloud.com/",
+ "Origin: https://soundcloud.com",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-site"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function music($get){
+
+ if($get["npt"]){
+
+ $params = $this->nextpage->get($get["npt"], "music");
+ $params = json_decode($params, true);
+
+ $url = $params["url"];
+ unset($params["url"]);
+
+ }else{
+
+ // normal search:
+ // https://api-v2.soundcloud.com/search?q=freddie%20dredd&variant_ids=&facet=model&user_id=351062-302234-707916-795081&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // soundcloud go+ search:
+ // https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&filter.content_tier=SUB_HIGH_TIER&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // tracks search:
+ // https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // users search:
+ // https://api-v2.soundcloud.com/search/users?q=freddie%20dredd&variant_ids=&facet=place&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // albums search:
+ // https://api-v2.soundcloud.com/search/albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ // playlists search:
+ // https://api-v2.soundcloud.com/search/playlists_without_albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
+
+ $search = $get["s"];
+ $type = $get["type"];
+
+ switch($type){
+
+ case "any":
+ $url = "https://api-v2.soundcloud.com/search";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet" => "model",
+ "user_id" => "351062-302234-707916-795081",
+ "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E",
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1693487844,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "track":
+ $url = "https://api-v2.soundcloud.com/search/tracks";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet_genre" => "",
+ "user_id" => "351062-302234-707916-795081",
+ "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E",
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1693487844,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "people":
+ $url = "https://api-v2.soundcloud.com/search/users";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet" => "place",
+ "user_id" => "351062-302234-707916-795081",
+ "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E",
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1693487844,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "album":
+ $url = "https://api-v2.soundcloud.com/search/albums";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet" => "genre",
+ "user_id" => "351062-302234-707916-795081",
+ "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E",
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1693487844,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "playlist":
+ $url = "https://api-v2.soundcloud.com/search/playlists_without_albums";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "facet" => "genre",
+ "user_id" => "351062-302234-707916-795081",
+ "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E",
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1693487844,
+ "app_locale" => "en"
+ ];
+ break;
+
+ case "goplus":
+ $url = "https://api-v2.soundcloud.com/search/tracks";
+ $params = [
+ "q" => $search,
+ "variant_ids" => "",
+ "filter.content_tier" => "SUB_HIGH_TIER",
+ "facet" => "genre",
+ "user_id" => "351062-302234-707916-795081",
+ "client_id" => "iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E",
+ "limit" => 20,
+ "offset" => 0,
+ "linked_partitioning" => 1,
+ "app_version" => 1693487844,
+ "app_locale" => "en"
+ ];
+ break;
+ }
+ }
+
+ try{
+
+ $json = $this->get($url, $params);
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ /*
+ $handle = fopen("scraper/soundcloud.json", "r");
+ $json = fread($handle, filesize("scraper/soundcloud.json"));
+ fclose($handle);
+ */
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "song" => [],
+ "playlist" => [],
+ "author" => []
+ ];
+
+ /*
+ Get next page
+ */
+ if(isset($json["next_href"])){
+
+ $params["query_urn"] = $json["query_urn"];
+ $params["offset"] = $params["offset"] + 20;
+ $params["url"] = $url; // we will remove this later
+
+ $out["npt"] =
+ $this->nextpage->store(
+ json_encode($params),
+ "music"
+ );
+ }
+
+ /*
+ Scrape items
+ */
+ foreach($json["collection"] as $item){
+
+ switch($item["kind"]){
+
+ case "user":
+ // parse author
+ $out["author"][] = [
+ "title" => $item["username"],
+ "followers" => $item["followers_count"],
+ "description" => $item["track_count"] . " songs. " . $this->limitstrlen($item["description"]),
+ "thumb" => [
+ "url" => $item["avatar_url"],
+ "ratio" => "1:1"
+ ],
+ "url" => $item["permalink_url"]
+ ];
+ break;
+
+ case "playlist":
+ // parse playlist
+ $description = [];
+ $count = 0;
+
+ foreach($item["tracks"] as $song){
+
+ $count++;
+
+ if(!isset($song["title"])){
+
+ continue;
+ }
+
+ $description[] = $song["title"];
+ }
+
+ if(count($description) != 0){
+
+ $description = $count . " songs. " . implode(", ", $description);
+ }
+
+ if(
+ isset($item["artwork_url"]) &&
+ !empty($item["artwork_url"])
+ ){
+
+ $thumb = [
+ "ratio" => "1:1",
+ "url" => $item["artwork_url"]
+ ];
+
+ }elseif(
+ isset($item["tracks"][0]["artwork_url"]) &&
+ !empty($item["tracks"][0]["artwork_url"])
+ ){
+
+ $thumb = [
+ "ratio" => "1:1",
+ "url" => $item["tracks"][0]["artwork_url"]
+ ];
+ }else{
+
+ $thumb = [
+ "ratio" => null,
+ "url" => null
+ ];
+ }
+
+ $out["playlist"][] = [
+ "title" => $item["title"],
+ "description" => $description,
+ "author" => [
+ "name" => $item["user"]["username"],
+ "url" => $item["user"]["permalink_url"],
+ "avatar" => $item["user"]["avatar_url"]
+ ],
+ "thumb" => $thumb,
+ "date" => strtotime($item["created_at"]),
+ "duration" => $item["duration"] / 1000,
+ "url" => $item["permalink_url"]
+ ];
+ break;
+
+ case "track":
+ if(stripos($item["monetization_model"], "TIER") === false){
+
+ $stream = [
+ "endpoint" => "audio_sc",
+ "url" =>
+ $item["media"]["transcodings"][0]["url"] .
+ "?client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E" .
+ "&track_authorization=" .
+ $item["track_authorization"]
+ ];
+ }else{
+
+ $stream = [
+ "endpoint" => null,
+ "url" => null
+ ];
+ }
+
+ // parse track
+ $out["song"][] = [
+ "title" => $item["title"],
+ "description" => $item["description"] == "" ? null : $this->limitstrlen($item["description"]),
+ "url" => $item["permalink_url"],
+ "views" => $item["playback_count"],
+ "author" => [
+ "name" => $item["user"]["username"],
+ "url" => $item["user"]["permalink_url"],
+ "avatar" => $item["user"]["avatar_url"]
+ ],
+ "thumb" => [
+ "ratio" => "1:1",
+ "url" => $item["artwork_url"]
+ ],
+ "date" => strtotime($item["created_at"]),
+ "duration" => (int)$item["full_duration"] / 1000,
+ "stream" => $stream
+ ];
+ break;
+ }
+ }
+
+ return $out;
+ }
+
+ private function limitstrlen($text){
+
+ return
+ explode(
+ "\n",
+ wordwrap(
+ str_replace(
+ "\n",
+ " ",
+ $text
+ ),
+ 300,
+ "\n"
+ )
+ )[0];
+ }
+}
diff --git a/scraper/yandex.php b/scraper/yandex.php
index 8cb733e..65abe73 100644
--- a/scraper/yandex.php
+++ b/scraper/yandex.php
@@ -959,6 +959,7 @@ class yandex{
"img"
);
+ $c = 1;
if(count($thumb) === 0){
$thumb = [
@@ -967,7 +968,6 @@ class yandex{
];
}else{
- $c = 1;
$thumb = [
"url" =>
str_replace(
@@ -1065,12 +1065,17 @@ class yandex{
"views" => $views,
"thumb" => $thumb,
"url" =>
- $this->fuckhtml
- ->getTextContent(
- $data["counters"]
- ["toHostingLoaded"]
- ["postfix"]
- ["href"]
+ str_replace(
+ "http://",
+ "https://",
+ $this->fuckhtml
+ ->getTextContent(
+ $data["counters"]
+ ["toHostingLoaded"]
+ ["postfix"]
+ ["href"]
+ ),
+ $c
)
];
}
diff --git a/settings.php b/settings.php
index c968e57..f6abb12 100644
--- a/settings.php
+++ b/settings.php
@@ -161,6 +161,16 @@ $settings = [
"text" => "Mojeek"
]
]
+ ],
+ [
+ "description" => "Music",
+ "parameter" => "scraper_music",
+ "options" => [
+ [
+ "value" => "sc",
+ "text" => "SoundCloud"
+ ]
+ ]
]
]
]
@@ -235,7 +245,7 @@ echo
'<head>' .
'<meta http-equiv="Content-Type" content="text/html;charset=utf-8">' .
'<title>Settings</title>' .
- '<link rel="stylesheet" href="/static/style.css">' .
+ '<link rel="stylesheet" href="/static/style.css?v3">' .
'<meta name="viewport" content="width=device-width,initial-scale=1">' .
'<meta name="robots" content="index,follow">' .
'<link rel="icon" type="image/x-icon" href="/favicon.ico">' .
diff --git a/static/style.css b/static/style.css
index e5044b1..ee320a7 100644
--- a/static/style.css
+++ b/static/style.css
@@ -51,6 +51,13 @@
audio{
max-width:100%;
display:block;
+}
+
+.left audio{
+ margin-top:7px;
+}
+
+.right-wrapper audio{
margin-bottom:17px;
}
diff --git a/template/header.html b/template/header.html
index 2633521..b687d27 100644
--- a/template/header.html
+++ b/template/header.html
@@ -3,7 +3,7 @@
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<title>{%title%}</title>
- <link rel="stylesheet" href="/static/style.css?v2">
+ <link rel="stylesheet" href="/static/style.css?v3">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="robots" content="{%index%}index,{%index%}follow">
<link rel="icon" type="image/x-icon" href="/favicon.ico">
diff --git a/template/home.html b/template/home.html
index 4e32ebc..7982734 100644
--- a/template/home.html
+++ b/template/home.html
@@ -4,7 +4,7 @@
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<title>4get</title>
<meta name="viewport" content="width=device-width,initial-scale=1">
- <link rel="stylesheet" href="/static/style.css?v2">
+ <link rel="stylesheet" href="/static/style.css?v3">
<meta name="robots" content="index,follow">
<link rel="icon" type="image/x-icon" href="/favicon.ico">
<meta name="description" content="4get.ca: They live in our walls!">