diff options
author | lolcat <will@lolcat.ca> | 2024-04-21 19:31:56 -0400 |
---|---|---|
committer | lolcat <will@lolcat.ca> | 2024-04-21 19:31:56 -0400 |
commit | 130358a9e0504a55cf3f86b2d7035feb7f4e84de (patch) | |
tree | 81f59790f7ead0b393a0e0b25caa082216245fcd | |
parent | 9e18327df69542e07fad2ef471a3ebdbe9b08ae8 (diff) |
v8
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | README.md | 85 | ||||
-rw-r--r-- | api/v1/ac.php | 22 | ||||
-rw-r--r-- | data/config.php | 24 | ||||
-rw-r--r-- | lib/backend.php | 84 | ||||
-rw-r--r-- | lib/curlproxy.php | 63 | ||||
-rw-r--r-- | lib/frontend.php | 100 | ||||
-rw-r--r-- | proxy.php | 29 | ||||
-rw-r--r-- | scraper/marginalia.php | 344 | ||||
-rw-r--r-- | scraper/pinterest.php | 7 | ||||
-rw-r--r-- | scraper/qwant.php | 893 | ||||
-rw-r--r-- | scraper/sc.php | 75 | ||||
-rw-r--r-- | scraper/wiby.php | 2 | ||||
-rw-r--r-- | scraper/yandex.php | 10 | ||||
-rw-r--r-- | static/serverping.js | 99 | ||||
-rw-r--r-- | template/about.html | 4 |
16 files changed, 1385 insertions, 457 deletions
@@ -29,3 +29,4 @@ data/captcha/minecraft/ banner/* !banner/*default* >>>>>>> 77293818cd213ec0ad07c573d298fff9cd5b357d +scraper/curlie.html @@ -11,63 +11,42 @@ https://4get.ca ## Totally unbiased comparison between alternatives -| | 4get | searx(ng) | librex | araa | -|----------------------------|-------------------------|-----------|-------------|----------| -| RAM usage | 200-400mb~ | 2GB~ | 200-400mb~ | 2GB~ | -| Does it suck | no (debunked by snopes) | yes | yes | a little | -| Does it work | ye | no | no | ye | -| Did the dev commit suicide | not until my 30s | idk | yes | no | +| | 4get | searx(ng) | librex | araa | +|----------------------------|-------------------------|-----------|-------------|-----------| +| RAM usage | 200-400mb~ | 2GB~ | 200-400mb~ | 2GB~ | +| Does it suck | no (debunked by snopes) | yes | yes | a little | +| Does it work | ye | sometimes | no | sometimes | +| Did the dev commit suicide | not until my 30s | no | allegedly | no | + +## Features +1. Rotating proxies on a per-scraper basis +2. Search filters, which SearxNG lacks for the most part +3. Bot protection that *actually* filters out the bots (when configured) +4. Interface doesn't require javascript +5. Favicon fetcher with caching support & image proxy +6. Bunch of other shit + +tl;dr the best way to actually browse for shit. # Supported websites -1. Web - - DuckDuckGo - - Brave - - Yandex - - Google - - Mwmbl - - Mojeek - - Marginalia - - wiby - - Curlie -2. Images - - DuckDuckGo - - Yandex - - Google - - Brave - - Yep - - Imgur - - FindThatMeme - -3. Videos - - YouTube - - DuckDuckgo - - Brave - - Yandex - - Google - -4. News - - DuckDuckGo - - Brave - - Google - - Mojeek - -5. Music - - SoundCloud - -6. Autocompleter - - Brave - - DuckDuckGo - - Yandex - - Google - - Qwant - - Yep - - Marginalia - - YouTube - - SoundCloud +| Web | Images | Videos | News | Music | Autocompleter | +|------------|--------------|------------|------------|------------|---------------| +| DuckDuckGo | DuckDuckGo | YouTube | DuckDuckGo | Soundcloud | Brave | +| Brave | Brave | DuckDuckGo | Brave | | DuckDuckGo | +| Yandex | Yandex | Brave | Google | | Yandex | +| Google | Google | Yandex | Qwant | | Google | +| Qwant | Qwant | Google | Mojeek | | Yep | +| Yep | Pinterest | Qwant | | | Marginalia | +| Crowdview | Yep | | | | YouTube | +| Mwmbl | Imgur | | | | Soundcloud | +| Mojeek | FindThatMeme | | | | | +| Marginalia | | | | | | +| wiby | | | | | | +| Curlie | | | | | | # Installation -Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>! +Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>. I recommend following the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">apache2 guide</a>. ## Contact -Shit breaks all the time but I repair it all the time too! Email me here: will (at) lolcat.ca +Shit breaks all the time but I repair it all the time too... Email me here: <b>will (at) lolcat.ca</b> or create an issue. diff --git a/api/v1/ac.php b/api/v1/ac.php index b1ec7dd..9d9f534 100644 --- a/api/v1/ac.php +++ b/api/v1/ac.php @@ -18,7 +18,7 @@ class autocomplete{ "yep" => "https://api.yep.com/ac/?query={searchTerms}", "marginalia" => "https://search.marginalia.nu/suggest/?partial={searchTerms}", "yt" => "https://suggestqueries-clients6.youtube.com/complete/search?client=youtube&q={searchTerms}", - "sc" => "https://api-v2.soundcloud.com/search/queries?q={searchTerms}&client_id=" . config::SC_CLIENT_TOKEN . "&limit=10&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en" + "sc" => "" ]; /* @@ -39,14 +39,6 @@ class autocomplete{ $this->do404("Search(s) exceeds the 500 char length"); } - if( - isset($_GET["scraper"]) && - is_string($_GET["scraper"]) === false - ){ - - $_GET["scraper"] = "brave"; // default option - } - /* Get $scraper */ @@ -77,7 +69,6 @@ class autocomplete{ } // return results - switch($scraper){ case "google": @@ -115,7 +106,16 @@ class autocomplete{ case "sc": // soundcloud - $js = $this->get($this->scrapers[$scraper], $_GET["s"]); + chdir("../../"); + include "scraper/sc.php"; + $sc = new sc(); + + $token = $sc->get_token("raw_ip::::"); + + $js = $this->get( + "https://api-v2.soundcloud.com/search/queries?q={searchTerms}&client_id=" . $token . "&limit=10&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en", + $_GET["s"] + ); $js = json_decode($js, true); diff --git a/data/config.php b/data/config.php index 26a19ea..42a968a 100644 --- a/data/config.php +++ b/data/config.php @@ -5,7 +5,7 @@ class config{ // any parameters. // 4get version. Please keep this updated - const VERSION = 7; + const VERSION = 8; // Will be shown pretty much everywhere. const SERVER_NAME = "4get"; @@ -63,13 +63,6 @@ class config{ "via" ]; - // @TODO: Portscan the user for open proxies before allowing a connection, block user if any are found - // Requires the nmap package - const NMAP_PROXY_CHECK = false; - - // @TODO: Make IP blacklist public under /api/v1/blacklist endpoint ? - const PUBLIC_IP_BLACKLIST = true; - // Maximal number of searches per captcha key/pass issued. Counter gets // reset on every APCU cache clear (should happen once a day). // Only useful when BOT_PROTECTION is NOT set to 0 @@ -113,7 +106,7 @@ class config{ // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Changing this might break things. - const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0"; + const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"; // Proxy pool assignments for each scraper // false = Use server's raw IP @@ -123,6 +116,7 @@ class config{ const PROXY_BRAVE = false; const PROXY_FB = false; // facebook const PROXY_GOOGLE = false; + const PROXY_QWANT = false; const PROXY_MARGINALIA = false; const PROXY_MOJEEK = false; const PROXY_SC = false; // soundcloud @@ -146,14 +140,8 @@ class config{ // Scraper-specific parameters // - // SOUNDCLOUD - // Get these parameters by making a search on soundcloud with network - // tab open, then filter URLs using "search?q=". (No need to login) - const SC_USER_ID = "447501-577662-794348-352629"; - const SC_CLIENT_TOKEN = "VNc62l3wxDWS0Ol62j5UYNc1gsZ3UXPv"; - // MARGINALIA - // Get an API key by contacting the Marginalia.nu maintainer. The "public" key - // works but is almost always rate-limited. - const MARGINALIA_API_KEY = "public"; + // Use "null" to default out to HTML scraping OR specify a string to + // use the API (Eg: "public"). API has less filters. + const MARGINALIA_API_KEY = null; } diff --git a/lib/backend.php b/lib/backend.php index c76a0be..7631ff3 100644 --- a/lib/backend.php +++ b/lib/backend.php @@ -93,31 +93,31 @@ class backend{ */ public function store($payload, $page, $proxy){ - $page = $page[0]; - $password = random_bytes(256); // 2048 bit - $salt = random_bytes(16); - $key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true); - $iv = - random_bytes( - openssl_cipher_iv_length("aes-256-gcm") - ); - - $tag = ""; - $out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16); + $key = sodium_crypto_secretbox_keygen(); + $nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES); $requestid = apcu_inc("requestid"); apcu_store( - $page . "." . - $this->scraper . + $page[0] . "." . // first letter of page name + $this->scraper . // scraper name $requestid, - gzdeflate($proxy . "," . $salt.$iv.$out.$tag), - 900 // cache information for 15 minutes blaze it + [ + $nonce, + $proxy, + // compress and encrypt + sodium_crypto_secretbox( + gzdeflate($payload), + $nonce, + $key + ) + ], + 900 // cache information for 15 minutes ); return $this->scraper . $requestid . "." . - rtrim(strtr(base64_encode($password), '+/', '-_'), '='); + rtrim(strtr(base64_encode($key), '+/', '-_'), '='); } public function get($npt, $page){ @@ -137,7 +137,7 @@ class backend{ if($payload === false){ - throw new Exception("The nextPageToken is invalid or has expired!"); + throw new Exception("The next page token is invalid or has expired!"); } $key = @@ -150,47 +150,27 @@ class backend{ ) ); - $payload = gzinflate($payload); - - // get proxy - [ - $proxy, - $payload - ] = explode(",", $payload, 2); - - $key = - hash_pbkdf2( - "sha512", - $key, - substr($payload, 0, 16), // salt - 20000, - 32, - true - ); - $ivlen = openssl_cipher_iv_length("aes-256-gcm"); - - $payload = - openssl_decrypt( - substr( - $payload, - 16 + $ivlen, - -16 - ), - "aes-256-gcm", - $key, - OPENSSL_RAW_DATA, - substr($payload, 16, $ivlen), - substr($payload, -16) + // decrypt and decompress data + $payload[2] = + gzinflate( + sodium_crypto_secretbox_open( + $payload[2], // data + $payload[0], // nonce + $key + ) ); - if($payload === false){ + if($payload[2] === false){ - throw new Exception("The nextPageToken is invalid or has expired!"); + throw new Exception("The next page token is invalid or has expired!"); } - // remove the key after using + // remove the key after using successfully apcu_delete($apcu); - return [$payload, $proxy]; + return [ + $payload[2], // data + $payload[1] // proxy + ]; } } diff --git a/lib/curlproxy.php b/lib/curlproxy.php index f1ce2a7..313ab01 100644 --- a/lib/curlproxy.php +++ b/lib/curlproxy.php @@ -290,30 +290,24 @@ class proxy{ if(isset($headers["content-type"])){ - if($headers["content-type"] == "text/html"){ + if(stripos($headers["content-type"], "text/html") !== false){ - throw new Exception("Server returned an html document instead of image"); + throw new Exception("Server returned html"); } - $tmp = explode(";", $headers["content-type"]); - - for($i=0; $i<count($tmp); $i++){ + if( + preg_match( + '/image\/([^ ]+)/i', + $headers["content-type"], + $match + ) + ){ - if( - preg_match( - '/^image\/([^ ]+)/i', - $tmp[$i], - $match - ) - ){ - - $format = strtolower($match[1]); + $format = strtolower($match[1]); + + if(substr(strtolower($format), 0, 2) == "x-"){ - if(substr($format, 0, 2) == "x-"){ - - $format = substr($format, 2); - } - break; + $format = substr($format, 2); } } } @@ -351,6 +345,8 @@ class proxy{ private function stream($url, $referer, $format){ + $this->clientcache(); + $this->url = $url; $this->format = $format; @@ -360,8 +356,6 @@ class proxy{ throw new Exception("Invalid URL"); } - $this->clientcache(); - $curl = curl_init(); // set headers @@ -490,11 +484,14 @@ class proxy{ // get content type if(isset($this->headers["content-type"])){ - $filetype = explode("/", $this->headers["content-type"]); + $octet_check = stripos($this->headers["content-type"], "octet-stream"); - if(strtolower($filetype[0]) != $this->format){ + if( + stripos($this->headers["content-type"], $this->format) === false && + $octet_check === false + ){ - throw new Exception("Resource is not an {$this->format} (Found {$filetype[0]} instead)"); + throw new Exception("Resource reported invalid Content-Type"); } }else{ @@ -502,6 +499,18 @@ class proxy{ throw new Exception("Resource is not an {$this->format} (no Content-Type)"); } + $filetype = explode("/", $this->headers["content-type"]); + + if(!isset($filetype[1])){ + + throw new Exception("Malformed Content-Type header"); + } + + if($octet_check !== false){ + + $filetype[1] = "jpeg"; + } + header("Content-Type: {$this->format}/{$filetype[1]}"); // give payload size @@ -541,7 +550,7 @@ class proxy{ if(isset($filename[1])){ - header("Content-Disposition: filename=" . $filename[1] . "." . $filetype); + header("Content-Disposition: filename=\"" . trim($filename[1], "\"'") . "." . $filetype . "\""); return; } } @@ -552,7 +561,7 @@ class proxy{ if($filename === null){ // everything failed! rename file to domain name - header("Content-Disposition: filename=" . parse_url($url, PHP_URL_HOST) . "." . $filetype); + header("Content-Disposition: filename=\"" . parse_url($url, PHP_URL_HOST) . "." . $filetype . "\""); return; } @@ -569,7 +578,7 @@ class proxy{ $filename = implode(".", $filename); - header("Content-Disposition: inline; filename=" . $filename . "." . $filetype); + header("Content-Disposition: inline; filename=\"" . $filename . "." . $filetype . "\""); return; } diff --git a/lib/frontend.php b/lib/frontend.php index f3810df..a48b722 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -923,6 +923,7 @@ class frontend{ "brave" => "Brave", "yandex" => "Yandex", "google" => "Google", + "qwant" => "Qwant", "yep" => "Yep", "crowdview" => "Crowdview", "mwmbl" => "Mwmbl", @@ -942,6 +943,7 @@ class frontend{ "yandex" => "Yandex", "brave" => "Brave", "google" => "Google", + "qwant" => "Qwant", "yep" => "Yep", //"pinterest" => "Pinterest", "imgur" => "Imgur", @@ -959,7 +961,8 @@ class frontend{ "ddg" => "DuckDuckGo", "brave" => "Brave", "yandex" => "Yandex", - "google" => "Google" + "google" => "Google", + "qwant" => "Qwant" ] ]; break; @@ -971,6 +974,7 @@ class frontend{ "ddg" => "DuckDuckGo", "brave" => "Brave", "google" => "Google", + "qwant" => "Qwant", "yep" => "Yep", "mojeek" => "Mojeek" ] @@ -1010,98 +1014,8 @@ class frontend{ $scraper_out = $first; } - switch($scraper_out){ - - case "ddg": - include "scraper/ddg.php"; - $lib = new ddg(); - break; - - case "brave": - include "scraper/brave.php"; - $lib = new brave(); - break; - - case "yt"; - include "scraper/youtube.php"; - $lib = new youtube(); - break; - - case "yandex": - include "scraper/yandex.php"; - $lib = new yandex(); - break; - - case "google": - include "scraper/google.php"; - $lib = new google(); - break; - /* - case "fb": - include "scraper/facebook.php"; - $lib = new facebook(); - break;*/ - - case "crowdview": - include "scraper/crowdview.php"; - $lib = new crowdview(); - break; - - case "mwmbl": - include "scraper/mwmbl.php"; - $lib = new mwmbl(); - break; - - case "mojeek": - include "scraper/mojeek.php"; - $lib = new mojeek(); - break; - - case "marginalia": - include "scraper/marginalia.php"; - $lib = new marginalia(); - break; - - case "wiby": - include "scraper/wiby.php"; - $lib = new wiby(); - break; - - case "curlie": - include "scraper/curlie.php"; - $lib = new curlie(); - break; - - case "yep": - include "scraper/yep.php"; - $lib = new yep(); - break; - - case "sc": - include "scraper/sc.php"; - $lib = new sc(); - break; - - case "spotify": - include "scraper/spotify.php"; - $lib = new spotify(); - break; - - case "pinterest": - include "scraper/pinterest.php"; - $lib = new pinterest(); - break; - - case "imgur": - include "scraper/imgur.php"; - $lib = new imgur(); - break; - - case "ftm": - include "scraper/ftm.php"; - $lib = new ftm(); - break; - } + include "scraper/$scraper_out.php"; + $lib = new $scraper_out(); // set scraper on $_GET $_GET["scraper"] = $scraper_out; @@ -24,13 +24,36 @@ try{ } // bing request, ask bing to resize and stream to browser + $image = parse_url($_GET["i"]); + if( + isset($image["host"]) && preg_match( - '/bing.net$/', - parse_url($_GET["i"], PHP_URL_HOST) + '/^[A-z0-9.]*bing\.(net|com)$/i', + $image["host"] ) ){ + if( + !isset($image["query"]) || + !isset($image["path"]) || + $image["path"] != "/th" + ){ + + header("X-Error: Invalid bing image path"); + $proxy->do404(); + die(); + } + + parse_str($image["query"], $str); + + if(!isset($str["id"])){ + + header("X-Error: Missing bing ID"); + $proxy->do404(); + die(); + } + switch($_GET["s"]){ case "portrait": $req = "&w=50&h=90&p=0&qlt=90"; break; @@ -40,7 +63,7 @@ try{ case "cover": $req = "&w=207&h=270&p=0&qlt=90"; break; } - $proxy->stream_linear_image($_GET["i"] . $req, "https://bing.net"); + $proxy->stream_linear_image("https://" . $image["host"] . "/th?id=" . urlencode($str["id"]) . $req, "https://www.bing.com"); die(); } diff --git a/scraper/marginalia.php b/scraper/marginalia.php index b790a97..2a2c1e6 100644 --- a/scraper/marginalia.php +++ b/scraper/marginalia.php @@ -3,78 +3,103 @@ class marginalia{ public function __construct(){ + include "lib/fuckhtml.php"; + $this->fuckhtml = new fuckhtml(); + include "lib/backend.php"; $this->backend = new backend("marginalia"); } public function getfilters($page){ - switch($page){ + if(config::MARGINALIA_API_KEY === null){ - case "web": - return [ - "profile" => [ - "display" => "Profile", - "option" => [ - "any" => "Default", - "modern" => "Modern" - ] - ], - "format" => [ - "display" => "Format", - "option" => [ - "any" => "Any", - "html5" => "html5", - "xhtml" => "xhtml", - "html123" => "html123" - ] - ], - "file" => [ - "display" => "File", - "option" => [ - "any" => "Any", - "nomedia" => "Deny media", - "media" => "Contains media", - "audio" => "Contains audio", - "video" => "Contains video", - "archive" => "Contains archive", - "document" => "Contains document" - ] - ], - "javascript" => [ - "display" => "Javascript", - "option" => [ - "any" => "Allow JS", - "deny" => "Deny JS", - "require" => "Require JS" - ] - ], - "trackers" => [ - "display" => "Trackers", - "option" => [ - "any" => "Allow trackers", - "deny" => "Deny trackers", - "require" => "Require trackers" - ] - ], - "cookies" => [ - "display" => "Cookies", - "option" => [ - "any" => "Allow cookies", - "deny" => "Deny cookies", - "require" => "Require cookies" - ] - ], - "affiliate" => [ - "display" => "Affiliate links in body", - "option" => [ - "any" => "Allow affiliate links", - "deny" => "Deny affiliate links", - "require" => "Require affiliate links" - ] + $base = [ + "adtech" => [ + "display" => "Reduce adtech", + "option" => [ + "no" => "No", + "yes" => "Yes" ] - ]; + ], + "recent" => [ + "display" => "Recent results", + "option" => [ + "no" => "No", + "yes" => "Yes" + ] + ], + "intitle" => [ + "display" => "Search in title", + "option" => [ + "no" => "No", + "yes" => "Yes" + ] + ] + ]; + }else{ + + $base = []; } + + return array_merge( + $base, + [ + "format" => [ + "display" => "Format", + "option" => [ + "any" => "Any format", + "html5" => "html5", + "xhtml" => "xhtml", + "html123" => "html123" + ] + ], + "file" => [ + "display" => "Filetype", + "option" => [ + "any" => "Any filetype", + "nomedia" => "Deny media", + "media" => "Contains media", + "audio" => "Contains audio", + "video" => "Contains video", + "archive" => "Contains archive", + "document" => "Contains document" + ] + ], + "javascript" => [ + "display" => "Javascript", + "option" => [ + "any" => "Allow JS", + "deny" => "Deny JS", + "require" => "Require JS" + ] + ], + "trackers" => [ + "display" => "Trackers", + "option" => [ + "any" => "Allow trackers", + "deny" => "Deny trackers", + "require" => "Require trackers" + ] + ], + "cookies" => [ + "display" => "Cookies", + "option" => [ + "any" => "Allow cookies", + "deny" => "Deny cookies", + "require" => "Require cookies" + ] + ], + "affiliate" => [ + "display" => "Affiliate links in body", + "option" => [ + "any" => "Allow affiliate links", + "deny" => "Deny affiliate links", + "require" => "Require affiliate links" + ] + ] + ] + ); } private function get($proxy, $url, $get = []){ @@ -132,7 +157,6 @@ class marginalia{ throw new Exception("Search term is empty!"); } - $profile = $get["profile"]; $format = $get["format"]; $file = $get["file"]; @@ -180,38 +204,6 @@ class marginalia{ $search = implode(" ", $search); - $params = [ - "count" => 20 - ]; - - if($profile == "modern"){ - - $params["index"] = 1; - } - - try{ - $json = - $this->get( - $this->backend->get_ip(), // no nextpage - "https://api.marginalia.nu/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search), - $params - ); - }catch(Exception $error){ - - throw new Exception("Failed to get JSON"); - } - - if($json == "Slow down"){ - - throw new Exception("The API key used is rate limited. Please try again in a few minutes."); - } - - $json = json_decode($json, true); - /* - $handle = fopen("scraper/marginalia.json", "r"); - $json = json_decode(fread($handle, filesize("scraper/marginalia.json")), true); - fclose($handle);*/ - $out = [ "status" => "ok", "spelling" => [ @@ -228,19 +220,169 @@ class marginalia{ "related" => [] ]; - foreach($json["results"] as $result){ + if(config::MARGINALIA_API_KEY !== null){ + + try{ + $json = + $this->get( + $this->backend->get_ip(), // no nextpage + "https://api.marginalia.nu/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search), + [ + "count" => 20 + ] + ); + }catch(Exception $error){ + + throw new Exception("Failed to get JSON"); + } + + if($json == "Slow down"){ + + throw new Exception("The API key used is rate limited. Please try again in a few minutes."); + } + + $json = json_decode($json, true); + + foreach($json["results"] as $result){ + + $out["web"][] = [ + "title" => $result["title"], + "description" => str_replace("\n", " ", $result["description"]), + "url" => $result["url"], + "date" => null, + "type" => "web", + "thumb" => [ + "url" => null, + "ratio" => null + ], + "sublink" => [], + "table" => [] + ]; + } + + return $out; + } + + // no more cloudflare!! Parse html by default + $params = [ + "query" => $search + ]; + + foreach(["adtech", "recent", "intitle"] as $v){ + + if($get[$v] == "yes"){ + + switch($v){ + + case "adtech": $params["adtech"] = "reduce"; break; + case "recent": $params["recent"] = "recent"; break; + case "adtech": $params["searchTitle"] = "title"; break; + } + } + } + + try{ + $html = + $this->get( + $this->backend->get_ip(), + "https://search.marginalia.nu/search", + $params + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } + + $this->fuckhtml->load($html); + + $sections = + $this->fuckhtml + ->getElementsByClassName( + "card search-result", + "section" + ); + + foreach($sections as $section){ + + $this->fuckhtml->load($section); + + $title = + $this->fuckhtml + ->getElementsByClassName( + "title", + "a" + )[0]; + + $description = + $this->fuckhtml + ->getElementsByClassName( + "description", + "p" + ); + + if(count($description) !== 0){ + + $description = + $this->fuckhtml + ->getTextContent( + $description[0] + ); + }else{ + + $description = null; + } + + $sublinks = []; + $sublink_html = + $this->fuckhtml + ->getElementsByClassName("additional-results"); + + if(count($sublink_html) !== 0){ + + $this->fuckhtml->load($sublink_html[0]); + + $links = + $this->fuckhtml + ->getElementsByTagName("a"); + + foreach($links as $link){ + + $sublinks[] = [ + "title" => + $this->fuckhtml + ->getTextContent( + $link + ), + "date" => null, + "description" => null, + "url" => + $this->fuckhtml + ->getTextContent( + $link["attributes"]["href"] + ) + ]; + } + } $out["web"][] = [ - "title" => $result["title"], - "description" => str_replace("\n", " ", $result["description"]), - "url" => $result["url"], + "title" => + $this->fuckhtml + ->getTextContent( + $title + ), + "description" => $description, + "url" => + $this->fuckhtml + ->getTextContent( + $title["attributes"]["href"] + ), "date" => null, "type" => "web", "thumb" => [ "url" => null, "ratio" => null ], - "sublink" => [], + "sublink" => $sublinks, "table" => [] ]; } diff --git a/scraper/pinterest.php b/scraper/pinterest.php index 37473a1..f3c4439 100644 --- a/scraper/pinterest.php +++ b/scraper/pinterest.php @@ -4,11 +4,8 @@ class pinterest{ public function __construct(){ - include "lib/nextpage.php"; - $this->nextpage = new nextpage("pinterest"); - - include "lib/proxy_pool.php"; - $this->proxy = new proxy_pool("pinterest"); + include "lib/backend.php"; + $this->backend = new backend("pinterest"); } public function getfilters($page){ diff --git a/scraper/qwant.php b/scraper/qwant.php new file mode 100644 index 0000000..9cc9b9e --- /dev/null +++ b/scraper/qwant.php @@ -0,0 +1,893 @@ +<?php + +class qwant{ + + public function __construct(){ + + include "lib/backend.php"; + $this->backend = new backend("qwant"); + } + + public function getfilters($page){ + + $base = [ + "nsfw" => [ + "display" => "NSFW", + "option" => [ + "yes" => "Yes", + "maybe" => "Maybe", + "no" => "No" + ] + ], + "country" => [ + "display" => "Country", + "option" => [ + "en_US" => "United States", + "fr_FR" => "France", + "en_GB" => "Great Britain", + "de_DE" => "Germany", + "it_IT" => "Italy", + "es_AR" => "Argentina", + "en_AU" => "Australia", + "es_ES" => "Spain (es)", + "ca_ES" => "Spain (ca)", + "cs_CZ" => "Czech Republic", + "ro_RO" => "Romania", + "el_GR" => "Greece", + "zh_CN" => "China", + "zh_HK" => "Hong Kong", + "en_NZ" => "New Zealand", + "fr_FR" => "France", + "th_TH" => "Thailand", + "ko_KR" => "South Korea", + "sv_SE" => "Sweden", + "nb_NO" => "Norway", + "da_DK" => "Denmark", + "hu_HU" => "Hungary", + "et_EE" => "Estonia", + "es_MX" => "Mexico", + "es_CL" => "Chile", + "en_CA" => "Canada (en)", + "fr_CA" => "Canada (fr)", + "en_MY" => "Malaysia", + "bg_BG" => "Bulgaria", + "fi_FI" => "Finland", + "pl_PL" => "Poland", + "nl_NL" => "Netherlands", + "pt_PT" => "Portugal", + "de_CH" => "Switzerland (de)", + "fr_CH" => "Switzerland (fr)", + "it_CH" => "Switzerland (it)", + "de_AT" => "Austria", + "fr_BE" => "Belgium (fr)", + "nl_BE" => "Belgium (nl)", + "en_IE" => "Ireland", + "he_IL" => "Israel" + ] + ] + ]; + + switch($page){ + + case "web": + $base = array_merge( + $base, + [ + "time" => [ + "display" => "Time posted", + "option" => [ + "any" => "Any time", + "day" => "Past 24 hours", + "week" => "Past week", + "month" => "Past month" + ] + ], + "extendedsearch" => [ + // no display, wont show in interface + "option" => [ + "yes" => "Yes", + "no" => "No" + ] + ] + ] + ); + break; + + case "images": + $base = array_merge( + $base, + [ + "time" => [ + "display" => "Time posted", + "option" => [ + "any" => "Any time", + "day" => "Past 24 hours", + "week" => "Past week", + "month" => "Past month" + ] + ], + "size" => [ + "display" => "Size", + "option" => [ + "any" => "Any size", + "large" => "Large", + "medium" => "Medium", + "small" => "Small" + ] + ], + "color" => [ + "display" => "Color", + "option" => [ + "any" => "Any color", + "coloronly" => "Color only", + "monochrome" => "Monochrome", + "black" => "Black", + "brown" => "Brown", + "gray" => "Gray", + "white" => "White", + "yellow" => "Yellow", + "orange" => "Orange", + "red" => "Red", + "pink" => "Pink", + "purple" => "Purple", + "blue" => "Blue", + "teal" => "Teal", + "green" => "Green" + ] + ], + "imagetype" => [ + "display" => "Type", + "option" => [ + "any" => "Any type", + "animatedgif" => "Animated GIF", + "photo" => "Photograph", + "transparent" => "Transparent" + ] + ], + "license" => [ + "display" => "License", + "option" => [ + "any" => "Any license", + "share" => "Non-commercial reproduction and sharing", + "sharecommercially" => "Reproduction and sharing", + "modify" => "Non-commercial reproduction, sharing and modification", + "modifycommercially" => "Reproduction, sharing and modification", + "public" => "Public domain" + ] + ] + ] + ); + break; + + case "videos": + $base = array_merge( + $base, + [ + "order" => [ + "display" => "Order by", + "option" => [ + "relevance" => "Relevance", + "views" => "Views", + "date" => "Most recent", + ] + ], + "source" => [ + "display" => "Source", + "option" => [ + "any" => "Any source", + "youtube" => "YouTube", + "dailymotion" => "Dailymotion", + ] + ] + ] + ); + break; + + case "news": + $base = array_merge( + $base, + [ + "time" => [ + "display" => "Time posted", + "option" => [ + "any" => "Any time", + "hour" => "Less than 1 hour ago", + "day" => "Past 24 hours", + "week" => "Past week", + "month" => "Past month" + ] + ], + "order" => [ + "display" => "Order by", + "option" => [ + "relevance" => "Relevance", + "date" => "Most recent" + ] + ] + ] + ); + break; + } + + return $base; + } + + private function get($proxy, $url, $get = []){ + + $headers = [ + "User-Agent: " . config::USER_AGENT, + "Accept: application/json, text/plain, */*", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Connection: keep-alive", + "Origin: https://www.qwant.com", + "Referer: https://www.qwant.com/", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-site", + "TE: trailers" + ]; + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers); + + // Bypass HTTP/2 check + curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function web($get){ + + if($get["npt"]){ + + // get next page data + [$params, $proxy] = $this->backend->get($get["npt"], "web"); + + $params = json_decode($params, true); + + }else{ + + // get _GET data instead + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + if(strlen($search) > 2048){ + + throw new Exception("Search term is too long!"); + } + + $proxy = $this->backend->get_ip(); + + $params = [ + "q" => $search, + "freshness" => $get["time"], + "count" => 10, + "locale" => $get["country"], + "offset" => 0, + "device" => "desktop", + "tgp" => 3, + "safesearch" => 0, + "displayed" => "true" + ]; + + switch($get["nsfw"]){ + + case "yes": $params["safesearch"] = 0; break; + case "maybe": $params["safesearch"] = 1; break; + case "no": $params["safesearch"] = 2; break; + } + } + /* + $handle = fopen("scraper/qwant_web.json", "r"); + $json = fread($handle, filesize("scraper/qwant_web.json")); + fclose($handle);*/ + + try{ + $json = + $this->get( + $proxy, + "https://fdn.qwant.com/v3/search/web", + $params + ); + + }catch(Exception $error){ + + throw new Exception("Could not fetch JSON"); + } + + $json = json_decode($json, true); + + if($json === NULL){ + + throw new Exception("Failed to decode JSON"); + } + + if(isset($json["data"]["message"][0])){ + + throw new Exception("Server returned an error:\n" . $json["data"]["message"][0]); + } + + if($json["status"] != "success"){ + + if($json["data"]["error_code"] === 5){ + + return $out; + } + + throw new Exception("Server returned an error code: " . $json["data"]["error_code"]); + } + + if(!isset($json["data"]["result"]["items"]["mainline"])){ + + throw new Exception("Server did not return a result object"); + } + + // data is OK, parse + $out = [ + "status" => "ok", + "spelling" => [ + "type" => "no_correction", + "using" => null, + "correction" => null + ], + "npt" => null, + "answer" => [], + "web" => [], + "image" => [], + "video" => [], + "news" => [], + "related" => [] + ]; + + // get instant answer + if( + $get["extendedsearch"] == "yes" && + isset($json["data"]["result"]["items"]["sidebar"][0]["endpoint"]) + ){ + + try{ + $answer = + $this->get( + $proxy, + "https://api.qwant.com/v3" . + $json["data"]["result"]["items"]["sidebar"][0]["endpoint"], + [] + ); + + $answer = json_decode($answer, true); + + if( + $answer === null || + $answer["status"] != "success" || + $answer["data"]["result"] === null + ){ + + throw new Exception(); + } + + // parse answer + $out["answer"][] = [ + "title" => $answer["data"]["result"]["title"], + "description" => [ + [ + "type" => "text", + "value" => $this->trimdots($answer["data"]["result"]["description"]) + ] + ], + "url" => $answer["data"]["result"]["url"], + "thumb" => + $answer["data"]["result"]["thumbnail"]["landscape"] == null ? + null : + $this->unshitimage( + $answer["data"]["result"]["thumbnail"]["landscape"], + false + ), + "table" => [], + "sublink" => [] + ]; + + }catch(Exception $error){ + + // do nothing in case of failure + } + + } + + // get word correction + if(isset($json["data"]["query"]["queryContext"]["alteredQuery"])){ + + $out["spelling"] = [ + "type" => "including", + "using" => $json["data"]["query"]["queryContext"]["alteredQuery"], + "correction" => $json["data"]["query"]["queryContext"]["alterationOverrideQuery"] + ]; + } + + // check for next page + if($json["data"]["result"]["lastPage"] === false){ + + $params["offset"] = $params["offset"] + 10; + + $out["npt"] = + $this->backend->store( + json_encode($params), + "web", + $proxy + ); + } + + // parse results + foreach($json["data"]["result"]["items"]["mainline"] as $item){ + + switch($item["type"]){ // ignores ads + + case "web": + foreach($item["items"] as $result){ + + if(isset($result["thumbnailUrl"])){ + + $thumb = [ + "url" => $this->unshitimage($result["thumbnailUrl"]), + "ratio" => "16:9" + ]; + }else{ + + $thumb = [ + "url" => null, + "ratio" => null + ]; + } + + $sublinks = []; + if(isset($result["links"])){ + + foreach($result["links"] as $link){ + + $sublinks[] = [ + "title" => $this->trimdots($link["title"]), + "date" => null, + "description" => isset($link["desc"]) ? $this->trimdots($link["desc"]) : null, + "url" => $link["url"] + ]; + } + } + + $out["web"][] = [ + "title" => $this->trimdots($result["title"]), + "description" => $this->trimdots($result["desc"]), + "url" => $result["url"], + "date" => null, + "type" => "web", + "thumb" => $thumb, + "sublink" => $sublinks, + "table" => [] + ]; + } + break; + + case "images": + foreach($item["items"] as $image){ + + $out["image"][] = [ + "title" => $image["title"], + "source" => [ + [ + "url" => $image["media"], + "width" => (int)$image["width"], + "height" => (int)$image["height"] + ], + [ + "url" => $this->unshitimage($image["thumbnail"]), + "width" => $image["thumb_width"], + "height" => $image["thumb_height"] + ] + ], + "url" => $image["url"] + ]; + } + break; + + case "videos": + foreach($item["items"] as $video){ + + $out["video"][] = [ + "title" => $video["title"], + "description" => null, + "date" => (int)$video["date"], + "duration" => $video["duration"] === null ? null : $video["duration"] / 1000, + "views" => null, + "thumb" => + $video["thumbnail"] === null ? + [ + "url" => null, + "ratio" => null, + ] : + [ + "url" => $this->unshitimage($video["thumbnail"]), + "ratio" => "16:9", + ], + "url" => $video["url"] + ]; + } + break; + + case "related_searches": + foreach($item["items"] as $related){ + + $out["related"][] = $related["text"]; + } + break; + } + } + + return $out; + } + + + public function image($get){ + + if($get["npt"]){ + + [$params, $proxy] = + $this->backend->get( + $get["npt"], + "images" + ); + + $params = json_decode($params, true); + }else{ + + $search = $get["s"]; + + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $proxy = $this->backend->get_ip(); + + $params = [ + "t" => "images", + "q" => $search, + "count" => 125, + "locale" => $get["country"], + "offset" => 0, // increment by 125 + "device" => "desktop", + "tgp" => 3 + ]; + + if($get["time"] != "any"){ + + $params["freshness"] = $get["time"]; + } + + foreach(["size", "color", "imagetype", "license"] as $p){ + + if($get[$p] != "any"){ + + $params[$p] = $get[$p]; + } + } + + switch($get["nsfw"]){ + + case "yes": $params["safesearch"] = 0; break; + case "maybe": $params["safesearch"] = 1; break; + case "no": $params["safesearch"] = 2; break; + } + } + + try{ + $json = $this->get( + $proxy, + "https://api.qwant.com/v3/search/images", + $params, + ); + }catch(Exception $err){ + + throw new Exception("Failed to get JSON"); + } + + /* + $handle = fopen("scraper/yandex.json", "r"); + $json = fread($handle, filesize("scraper/yandex.json")); + fclose($handle);*/ + + $json = json_decode($json, true); + + if($json === null){ + + throw new Exception("Failed to decode JSON"); + } + + if($json["status"] != "success"){ + + throw new Exception("Qwant returned an API error"); + } + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + if($json["data"]["result"]["lastPage"] === false){ + + $params["offset"] = $params["offset"] + 125; + + $out["npt"] = $this->backend->store( + json_encode($params), + "images", + $proxy + ); + } + + foreach($json["data"]["result"]["items"] as $image){ + + $out["image"][] = [ + "title" => $this->trimdots($image["title"]), + "source" => [ + [ + "url" => $image["media"], + "width" => $image["width"], + "height" => $image["height"] + ], + [ + "url" => $this->unshitimage($image["thumbnail"]), + "width" => $image["thumb_width"], + "height" => $image["thumb_height"] + ] + ], + "url" => $image["url"] + ]; + } + + return $out; + } + + public function video($get){ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $params = [ + "t" => "videos", + "q" => $search, + "count" => 50, + "locale" => $get["country"], + "offset" => 0, // dont implement pagination + "device" => "desktop", + "tgp" => 3 + ]; + + switch($get["nsfw"]){ + + case "yes": $params["safesearch"] = 0; break; + case "maybe": $params["safesearch"] = 1; break; + case "no": $params["safesearch"] = 2; break; + } + + try{ + $json = + $this->get( + $this->backend->get_ip(), + "https://api.qwant.com/v3/search/videos", + $params + ); + }catch(Exception $error){ + + throw new Exception("Could not fetch JSON"); + } + + /* + $handle = fopen("scraper/yandex-video.json", "r"); + $json = fread($handle, filesize("scraper/yandex-video.json")); + fclose($handle); + */ + + $json = json_decode($json, true); + + if($json === null){ + + throw new Exception("Could not parse JSON"); + } + + if($json["status"] != "success"){ + + throw new Exception("Qwant returned an API error"); + } + + $out = [ + "status" => "ok", + "npt" => null, + "video" => [], + "author" => [], + "livestream" => [], + "playlist" => [], + "reel" => [] + ]; + + foreach($json["data"]["result"]["items"] as $video){ + + if(empty($video["thumbnail"])){ + + $thumb = [ + "url" => null, + "ratio" => null + ]; + }else{ + + $thumb = [ + "url" => $this->unshitimage($video["thumbnail"], false), + "ratio" => "16:9" + ]; + } + + $duration = (int)$video["duration"]; + + $out["video"][] = [ + "title" => $video["title"], + "description" => $this->limitstrlen($video["desc"]), + "author" => [ + "name" => $video["channel"], + "url" => null, + "avatar" => null + ], + "date" => (int)$video["date"], + "duration" => $duration === 0 ? null : $duration, + "views" => null, + "thumb" => $thumb, + "url" => preg_replace("/\?syndication=.+/", "", $video["url"]) + ]; + } + + return $out; + } + + public function news($get){ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $params = [ + "t" => "news", + "q" => $search, + "count" => 50, + "locale" => $get["country"], + "offset" => 0, // dont implement pagination + "device" => "desktop", + "tgp" => 3 + ]; + + switch($get["nsfw"]){ + + case "yes": $params["safesearch"] = 0; break; + case "maybe": $params["safesearch"] = 1; break; + case "no": $params["safesearch"] = 2; break; + } + + try{ + $json = + $this->get( + $this->backend->get_ip(), + "https://api.qwant.com/v3/search/news", + $params + ); + }catch(Exception $error){ + + throw new Exception("Could not fetch JSON"); + } + + /* + $handle = fopen("scraper/yandex-video.json", "r"); + $json = fread($handle, filesize("scraper/yandex-video.json")); + fclose($handle); + */ + + $json = json_decode($json, true); + + if($json === null){ + + throw new Exception("Could not parse JSON"); + } + + if($json["status"] != "success"){ + + throw new Exception("Qwant returned an API error"); + } + + $out = [ + "status" => "ok", + "npt" => null, + "news" => [] + ]; + + foreach($json["data"]["result"]["items"] as $news){ + + if(empty($news["media"][0]["pict_big"]["url"])){ + + $thumb = [ + "url" => null, + "ratio" => null + ]; + }else{ + + $thumb = [ + "url" => $this->unshitimage($news["media"][0]["pict_big"]["url"], false), + "ratio" => "16:9" + ]; + } + + $out["news"][] = [ + "title" => $news["title"], + "author" => $news["press_name"], + "description" => $this->trimdots($news["desc"]), + "date" => (int)$news["date"], + "thumb" => $thumb, + "url" => $news["url"] + ]; + } + + return $out; + } + + private function limitstrlen($text){ + + return explode("\n", wordwrap($text, 300, "\n"))[0]; + } + + private function trimdots($text){ + + return trim($text, ". "); + } + + private function unshitimage($url, $is_bing = true){ + + // https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0 + parse_str(parse_url($url)["query"], $parts); + + if($is_bing){ + $parse = parse_url($parts["u"]); + parse_str($parse["query"], $parts); + + return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]); + } + + return $parts["u"]; + } +} diff --git a/scraper/sc.php b/scraper/sc.php index 23742f1..e2e7385 100644 --- a/scraper/sc.php +++ b/scraper/sc.php @@ -70,7 +70,7 @@ class sc{ return $data; } - public function music($get){ + public function music($get, $last_attempt = false){ if($get["npt"]){ @@ -108,6 +108,7 @@ class sc{ $type = $get["type"]; $proxy = $this->backend->get_ip(); + $token = $this->get_token($proxy); switch($type){ @@ -117,12 +118,11 @@ class sc{ "q" => $search, "variant_ids" => "", "facet" => "model", - "user_id" => config::SC_USER_ID, - "client_id" => config::SC_CLIENT_TOKEN, + "client_id" => $token, "limit" => 20, "offset" => 0, "linked_partitioning" => 1, - "app_version" => 1696577813, + "app_version" => 1713542117, "app_locale" => "en" ]; break; @@ -133,12 +133,11 @@ class sc{ "q" => $search, "variant_ids" => "", "facet_genre" => "", - "user_id" => config::SC_USER_ID, - "client_id" => config::SC_CLIENT_TOKEN, + "client_id" => $token, "limit" => 20, "offset" => 0, "linked_partitioning" => 1, - "app_version" => 1696577813, + "app_version" => 1713542117, "app_locale" => "en" ]; break; @@ -149,12 +148,11 @@ class sc{ "q" => $search, "variant_ids" => "", "facet" => "place", - "user_id" => config::SC_USER_ID, - "client_id" => config::SC_CLIENT_TOKEN, + "client_id" => $token, "limit" => 20, "offset" => 0, "linked_partitioning" => 1, - "app_version" => 1696577813, + "app_version" => 1713542117, "app_locale" => "en" ]; break; @@ -165,12 +163,11 @@ class sc{ "q" => $search, "variant_ids" => "", "facet" => "genre", - "user_id" => config::SC_USER_ID, - "client_id" => config::SC_CLIENT_TOKEN, + "client_id" => $token, "limit" => 20, "offset" => 0, "linked_partitioning" => 1, - "app_version" => 1696577813, + "app_version" => 1713542117, "app_locale" => "en" ]; break; @@ -181,12 +178,11 @@ class sc{ "q" => $search, "variant_ids" => "", "facet" => "genre", - "user_id" => config::SC_USER_ID, - "client_id" => config::SC_CLIENT_TOKEN, + "client_id" => $token, "limit" => 20, "offset" => 0, "linked_partitioning" => 1, - "app_version" => 1696577813, + "app_version" => 1713542117, "app_locale" => "en" ]; break; @@ -198,12 +194,11 @@ class sc{ "variant_ids" => "", "filter.content_tier" => "SUB_HIGH_TIER", "facet" => "genre", - "user_id" => config::SC_USER_ID, - "client_id" => config::SC_CLIENT_TOKEN, + "client_id" => $token, "limit" => 20, "offset" => 0, "linked_partitioning" => 1, - "app_version" => 1696577813, + "app_version" => 1713542117, "app_locale" => "en" ]; break; @@ -229,7 +224,14 @@ class sc{ if($json === null){ - throw new Exception("Failed to decode JSON. Did the keys set in data/config.php expire?"); + if($last_attempt === true){ + + throw new Exception("Fetched an invalid token (please report!!)"); + } + + // token might've expired, get a new one and re-try search + get_token($proxy); + return $this->music($get, true); } $out = [ @@ -352,7 +354,7 @@ class sc{ "endpoint" => "sc", "url" => $item["media"]["transcodings"][0]["url"] . - "?client_id=" . config::SC_CLIENT_TOKEN . + "?client_id=" . $token . "&track_authorization=" . $item["track_authorization"] ]; @@ -390,6 +392,37 @@ class sc{ return $out; } + public function get_token($proxy){ + + $token = apcu_fetch("sc_token"); + + if($token === false){ + + $js = + $this->get( + $proxy, + "https://a-v2.sndcdn.com/assets/1-c3e4038d.js", + [] + ); + + preg_match( + '/client_id=([^"]+)/', + $js, + $token + ); + + if(!isset($token[1])){ + + throw new Exception("Failed to get search token"); + } + + apcu_store("sc_token", $token[1]); + return $token[1]; + } + + return $token; + } + private function limitstrlen($text){ return diff --git a/scraper/wiby.php b/scraper/wiby.php index 2d79c56..59f723c 100644 --- a/scraper/wiby.php +++ b/scraper/wiby.php @@ -209,7 +209,7 @@ class wiby{ $out["web"][] = [ "title" => $this->unescapehtml(trim($links[2][$i])), - "description" => $this->unescapehtml(trim(strip_tags($links[3][$i]))), + "description" => $this->unescapehtml(trim(strip_tags($links[3][$i]), ".\n\r ")), "url" => trim($links[1][$i]), "date" => null, "type" => "web", diff --git a/scraper/yandex.php b/scraper/yandex.php index 9b73428..2e81cee 100644 --- a/scraper/yandex.php +++ b/scraper/yandex.php @@ -644,6 +644,11 @@ class yandex{ $json = json_decode($json, true); + if($json === null){ + + throw new Exception("Failed to decode JSON"); + } + if( isset($json["type"]) && $json["type"] == "captcha" @@ -652,11 +657,6 @@ class yandex{ throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes."); } - if($json === null){ - - throw new Exception("Failed to decode JSON"); - } - $out = [ "status" => "ok", "npt" => null, diff --git a/static/serverping.js b/static/serverping.js index a94fe50..6b680d5 100644 --- a/static/serverping.js +++ b/static/serverping.js @@ -22,10 +22,10 @@ var list = []; var pinged_list = []; var reqs = 0; var errors = 0; -var sort = 0; // lower ping first +var sort = 6; // highest version first // check for instance redirect stuff -var redir = ""; +var redir = []; var target = "/web?"; new URL(window.location.href) .searchParams @@ -39,12 +39,16 @@ new URL(window.location.href) } if(key == "npt"){ return; } - redir += encodeURIComponent(key) + "=" + encodeURIComponent(value) + redir.push(encodeURIComponent(key) + "=" + encodeURIComponent(value)) } ); -if(redir != ""){ - redir = target + redir; +if(redir.length !== 0){ + + redir = target + redir.join("&"); +}else{ + + redir = ""; } var quote = document.createElement("div"); @@ -61,14 +65,13 @@ var table = document.createElement("table"); table.innerHTML = '<thead>' + '<tr>' + - '<th><div class="arrow up"></div>Ping</th>' + '<th class="extend">Server</th>' + '<th>Address</th>' + '<th>Bot protection</th>' + '<th title="Amount of legit requests processed since the last APCU cache clear (usually happens at midnight)">Real reqs (?)</th>' + '<th title="Amount of filtered requests processed since the last APCU cache clear (usually happens at midnight)">Bot reqs (?)</th>' + '<th>API</th>' + - '<th>Version</th>' + + '<th><div class="arrow up"></div>Version</th>' + '</tr>' + '</thead>' + '<tbody></tbody>'; @@ -118,14 +121,13 @@ for(var i=0; i<th.length; i++){ switch(div.textContent.toLowerCase()){ - case "ping": sort = orientation; break; - case "server": sort = 2 + orientation; break; - case "address": sort = 4 + orientation; break; - case "bot protection": sort = 6 + orientation; break; - case "real reqs (?)": sort = 8 + orientation; break; - case "bot reqs (?)": sort = 10 + orientation; break; - case "api": sort = 12 + orientation; break; - case "version": sort = 14 + orientation; break; + case "server": sort = 0 + orientation; break; + case "address": sort = 2 + orientation; break; + case "bot protection": sort = 4 + orientation; break; + case "real reqs (?)": sort = 6 + orientation; break; + case "bot reqs (?)": sort = 8 + orientation; break; + case "api": sort = 10 + orientation; break; + case "version": sort = 12 + orientation; break; } render_list(); @@ -160,16 +162,6 @@ function number_format(int){ return new Intl.NumberFormat().format(int); } -window.fetch = (function(fetch) { - return function(fn, t){ - const begin = Date.now(); - return fetch.apply(this, arguments).then(function(response) { - response.ping = Date.now() - begin; - return response; - }); - }; -})(window.fetch); - // parse initial server list fetch_server(window.location.origin); @@ -188,7 +180,6 @@ async function fetch_server(server){ if(list[i] == server){ // serber was already fetched - console.info("Already checked server: " + server); return; } } @@ -200,9 +191,7 @@ async function fetch_server(server){ try{ - var payload = await fetch( - server + "/ami4get" - ); + var payload = await fetch(server + "/ami4get"); if(payload.status !== 200){ @@ -214,7 +203,6 @@ async function fetch_server(server){ } data = await payload.json(); - data.server.ping = payload.ping; }catch(error){ @@ -316,41 +304,36 @@ function render_list(){ case 0: case 1: - sorted_list = sorta(pinged_list, "ping", filter === true ? false : true); + sorted_list = textsort(pinged_list, "name", filter === true ? false : true); break; case 2: case 3: - sorted_list = textsort(pinged_list, "name", filter === true ? false : true); + sorted_list = textsort(pinged_list, "ip", filter === true ? false : true); break; case 4: case 5: - sorted_list = textsort(pinged_list, "ip", filter === true ? false : true); + sorted_list = sorta(pinged_list, "bot_protection", filter === true ? false : true); break; case 6: case 7: - sorted_list = sorta(pinged_list, "bot_protection", filter === true ? false : true); + sorted_list = sorta(pinged_list, "real_requests", filter); break; case 8: case 9: - sorted_list = sorta(pinged_list, "real_requests", filter); + sorted_list = sorta(pinged_list, "bot_requests", filter); break; case 10: case 11: - sorted_list = sorta(pinged_list, "bot_requests", filter); + sorted_list = sorta(pinged_list, "api_enabled", filter); break; case 12: case 13: - sorted_list = sorta(pinged_list, "api_enabled", filter); - break; - - case 14: - case 15: sorted_list = sorta(pinged_list, "version", filter); break; } @@ -362,32 +345,16 @@ function render_list(){ html += '<tr onclick="show_server(' + sorted_list[k].index + ');">'; - for(var i=0; i<8; i++){ + for(var i=0; i<7; i++){ html += '<td'; switch(i){ - case 0: // server ping - if(sorted_list[k].server.ping <= 100){ - - html += '><span style="color:var(--green);">' + sorted_list[k].server.ping + '</span>'; - break; - } - - if(sorted_list[k].server.ping <= 200){ - - html += '><span style="color:var(--yellow);">' + sorted_list[k].server.ping + '</span>'; - break; - } - - html += '><span style="color:var(--red);">' + number_format(sorted_list[k].server.ping) + '</span>'; - break; - // server name - case 1: html += ' class="extend">' + htmlspecialchars(sorted_list[k].server.name); break; - case 2: html += '>' + htmlspecialchars(new URL(sorted_list[k].server.ip).host); break; - case 3: // bot protection + case 0: html += ' class="extend">' + htmlspecialchars(sorted_list[k].server.name); break; + case 1: html += '>' + htmlspecialchars(new URL(sorted_list[k].server.ip).host); break; + case 2: // bot protection switch(sorted_list[k].server.bot_protection){ case 0: @@ -407,15 +374,15 @@ function render_list(){ } break; - case 4: // real reqs + case 3: // real reqs html += '>' + number_format(sorted_list[k].server.real_requests); break; - case 5: // bot reqs + case 4: // bot reqs html += '>' + number_format(sorted_list[k].server.bot_requests); break; - case 6: // api enabled + case 5: // api enabled if(sorted_list[k].server.api_enabled){ @@ -427,7 +394,7 @@ function render_list(){ break; // version - case 7: html += ">v" + sorted_list[k].server.version; break; + case 6: html += ">v" + sorted_list[k].server.version; break; } html += '</td>'; @@ -436,6 +403,8 @@ function render_list(){ html += '</tr>'; } + console.log(html); + tbody.innerHTML = html; } diff --git a/template/about.html b/template/about.html index 12dd957..6398884 100644 --- a/template/about.html +++ b/template/about.html @@ -38,7 +38,7 @@ This is a metasearch engine that gets results from other engines, and strips awa Provide users with a privacy oriented, extremely lightweight, ad free, free as in freedom (and free beer!) way to search for documents around the internet, with minimal, optional javascript code. My long term goal would be to build my own index (that doesn't suck) and provide users with an unbiased search engine, with no political inclinations. <a href="#logs"><h2 id="logs">Do you keep logs?</h2></a> -I store data temporarly to get the next page of results. This might include search queries, tokens and other parameters. These parameters are encrypted using <div class="code-inline">aes-256-gcm</div> on the serber, for which I give you a key (also known internally as <div class="code-inline">npt</div> token). When you make a request to get the next page, you supply the token, the data is decrypted and the request is fulfilled. This encrypted data is deleted after 15 minutes, or after it's used, whichever comes first.<br><br> +I store data temporarly to get the next page of results. This might include search queries, filters and tokens. These parameters are encrypted using <div class="code-inline">libsodium</div> on the serber, for which I give you a decryption key (also known internally as <div class="code-inline">npt</div> token). When you make a request to get the next page, you supply the token, the data is decrypted and the request is fulfilled. This encrypted data is deleted after 15 minutes, or after it's used, whichever comes first.<br><br> I <b>don't</b> log IP addresses, user agents, or anything else. The <div class="code-inline">npt</div> tokens are the only thing that are stored (in RAM, mind you), temporarly, encrypted. @@ -48,7 +48,7 @@ Your search queries and supplied filters are shared with the scraper you chose ( TL;DR assume those websites can see what you search for, but can't see who you are (unless you're really dumb). <a href="#hosting"><h2 id="hosting">Where is this website hosted?</h2></a> -This website is hosted on a Contabo shitbox in the United States. +Please head over to the <a href="/instances">4get instances</a> page, select an instance and click on "IP lookup". <a href="#keyboard-shortcuts"><h2 id="keyboard-shortcuts">Keyboard shortcuts?</h2></a> Use <div class="code-inline">/</div> to focus the search box.<br><br> |