diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/backend.php | 197 | ||||
-rw-r--r-- | lib/captcha_gen.php | 32 | ||||
-rw-r--r-- | lib/curlproxy.php | 8 | ||||
-rw-r--r-- | lib/frontend.php | 143 | ||||
-rw-r--r-- | lib/fuckhtml.php | 2 | ||||
-rw-r--r-- | lib/nextpage.php | 106 |
6 files changed, 281 insertions, 207 deletions
diff --git a/lib/backend.php b/lib/backend.php new file mode 100644 index 0000000..209cfec --- /dev/null +++ b/lib/backend.php @@ -0,0 +1,197 @@ +<?php +class backend{ + + public function __construct($scraper){ + + $this->scraper = $scraper; + $this->requestid = apcu_inc("real_requests"); + } + + /* + Proxy stuff + */ + public function get_ip(){ + + $pool = constant("config::PROXY_" . strtoupper($this->scraper)); + if($pool === false){ + + // we don't want a proxy, fuck off! + return 'raw_ip::::'; + } + + // indent + $proxy_index_raw = apcu_inc("p." . $this->scraper); + + $proxylist = file_get_contents("data/proxies/" . $pool . ".txt"); + $proxylist = explode("\n", $proxylist); + + // ignore empty or commented lines + $proxylist = array_filter($proxylist, function($entry){ + $entry = ltrim($entry); + return strlen($entry) > 0 && substr($entry, 0, 1) != "#"; + }); + + $proxylist = array_values($proxylist); + + return $proxylist[$proxy_index_raw % count($proxylist)]; + } + + // this function is also called directly on nextpage + public function assign_proxy(&$curlproc, $ip){ + + // parse proxy line + [ + $type, + $address, + $port, + $username, + $password + ] = explode(":", $ip, 5); + + switch($type){ + + case "raw_ip": + return; + break; + + case "http": + case "https": + curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); + curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port); + break; + + case "socks4": + curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4); + curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port); + break; + + case "socks5": + curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5); + curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port); + break; + + case "socks4a": + curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A); + curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port); + break; + + case "socks5_hostname": + curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME); + curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port); + break; + } + + if($username != ""){ + + curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password); + } + } + + + + /* + Next page stuff + */ + public function store($payload, $page, $proxy){ + + $page = $page[0]; + $password = random_bytes(256); // 2048 bit + $salt = random_bytes(16); + $key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true); + $iv = + random_bytes( + openssl_cipher_iv_length("aes-256-gcm") + ); + + $tag = ""; + $out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16); + + $key = apcu_inc("key", 1); + + apcu_store( + $page . "." . + $this->scraper . + $this->requestid, + gzdeflate($proxy . "," . $salt.$iv.$out.$tag), + 900 // cache information for 15 minutes blaze it + ); + + return + $this->scraper . $this->requestid . "." . + rtrim(strtr(base64_encode($password), '+/', '-_'), '='); + } + + public function get($npt, $page){ + + $page = $page[0]; + $explode = explode(".", $npt, 2); + + if(count($explode) !== 2){ + + throw new Exception("Malformed nextPageToken!"); + } + + $apcu = $page . "." . $explode[0]; + $key = $explode[1]; + + $payload = apcu_fetch($apcu); + + if($payload === false){ + + throw new Exception("The nextPageToken is invalid or has expired!"); + } + + $key = + base64_decode( + str_pad( + strtr($key, '-_', '+/'), + strlen($key) % 4, + '=', + STR_PAD_RIGHT + ) + ); + + $payload = gzinflate($payload); + + // get proxy + [ + $proxy, + $payload + ] = explode(",", $payload, 2); + + $key = + hash_pbkdf2( + "sha512", + $key, + substr($payload, 0, 16), // salt + 20000, + 32, + true + ); + $ivlen = openssl_cipher_iv_length("aes-256-gcm"); + + $payload = + openssl_decrypt( + substr( + $payload, + 16 + $ivlen, + -16 + ), + "aes-256-gcm", + $key, + OPENSSL_RAW_DATA, + substr($payload, 16, $ivlen), + substr($payload, -16) + ); + + if($payload === false){ + + throw new Exception("The nextPageToken is invalid or has expired!"); + } + + // remove the key after using + apcu_delete($apcu); + + return [$payload, $proxy]; + } +} diff --git a/lib/captcha_gen.php b/lib/captcha_gen.php index 80bc665..6728747 100644 --- a/lib/captcha_gen.php +++ b/lib/captcha_gen.php @@ -4,6 +4,19 @@ class captcha{ public function __construct($frontend, $get, $filters, $page, $output){ + // check if we want captcha + if(config::BOT_PROTECTION !== 1){ + + if($output === true){ + $frontend->loadheader( + $get, + $filters, + $page + ); + } + return; + } + /* Validate cookie, if it exists */ @@ -46,6 +59,7 @@ class captcha{ if($output === false){ + http_response_code(429); // too many reqs echo json_encode([ "status" => "The \"pass\" token in your cookies is missing or has expired!!" ]); @@ -184,15 +198,6 @@ class captcha{ } } - /* - Generate random grid data to pass to captcha.php - */ - $dataset = [ - ["birds", 2263], - ["fumo_plushies", 1006], - ["minecraft", 848] - ]; - // get the positions for the answers // will return between 3 and 6 answer positions $range = range(0, 15); @@ -216,17 +221,18 @@ class captcha{ } // choose a dataset - $choosen = &$dataset[random_int(0, count($dataset) - 1)]; + $c = count(config::CAPTCHA_DATASET); + $choosen = config::CAPTCHA_DATASET[random_int(0, $c - 1)]; $choices = []; - for($i=0; $i<count($dataset); $i++){ + for($i=0; $i<$c; $i++){ - if($dataset[$i][0] == $choosen[0]){ + if(config::CAPTCHA_DATASET[$i][0] == $choosen[0]){ continue; } - $choices[] = $dataset[$i]; + $choices[] = config::CAPTCHA_DATASET[$i]; } // generate grid data diff --git a/lib/curlproxy.php b/lib/curlproxy.php index ef9085b..f1ce2a7 100644 --- a/lib/curlproxy.php +++ b/lib/curlproxy.php @@ -152,7 +152,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", + "User-Agent: " . config::USER_AGENT, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate", @@ -180,7 +180,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", + "User-Agent: " . config::USER_AGENT, "Accept: image/avif,image/webp,*/*", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate", @@ -379,7 +379,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", + "User-Agent: " . config::USER_AGENT, "Accept: image/avif,image/webp,*/*", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate, br", @@ -395,7 +395,7 @@ class proxy{ $curl, CURLOPT_HTTPHEADER, [ - "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0", + "User-Agent: " . config::USER_AGENT, "Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5", "Accept-Language: en-US,en;q=0.5", "Accept-Encoding: gzip, deflate, br", diff --git a/lib/frontend.php b/lib/frontend.php index 97c8c5b..0f9f95d 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -4,6 +4,41 @@ class frontend{ public function load($template, $replacements = []){ + $replacements["server_name"] = htmlspecialchars(config::SERVER_NAME); + $replacements["version"] = config::VERSION; + + if(isset($_COOKIE["theme"])){ + + $theme = str_replace(["/". "."], "", $_COOKIE["theme"]); + + if( + $theme != "Dark" && + !is_file("static/themes/" . $theme . ".css") + ){ + + $theme = config::DEFAULT_THEME; + } + }else{ + + $theme = config::DEFAULT_THEME; + } + + if($theme != "Dark"){ + + $replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . $theme . '.css?v' . config::VERSION . '">'; + }else{ + + $replacements["style"] = ""; + } + + if(isset($_COOKIE["scraper_ac"])){ + + $replacements["ac"] = '?ac=' . htmlspecialchars($_COOKIE["scraper_ac"]); + }else{ + + $replacements["ac"] = ''; + } + $handle = fopen("template/{$template}", "r"); $data = fread($handle, filesize("template/{$template}")); fclose($handle); @@ -29,30 +64,6 @@ class frontend{ return trim($html); } - public function getthemeclass($raw = true){ - - if( - isset($_COOKIE["theme"]) && - $_COOKIE["theme"] == "cream" - ){ - - $body_class = "theme-white "; - }else{ - - $body_class = ""; - } - - if( - $raw && - $body_class != "" - ){ - - return ' class="' . rtrim($body_class) . '"'; - } - - return $body_class; - } - public function loadheader(array $get, array $filters, string $page){ echo @@ -62,8 +73,7 @@ class frontend{ "index" => "no", "search" => htmlspecialchars($get["s"]), "tabs" => $this->generatehtmltabs($page, $get["s"]), - "filters" => $this->generatehtmlfilters($filters, $get), - "body_class" => $this->getthemeclass() + "filters" => $this->generatehtmlfilters($filters, $get) ]); if( @@ -74,18 +84,17 @@ class frontend{ ){ // bot detected !! - echo - $this->drawerror( - "Tshh, blocked!", - 'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.', - ); + $this->drawerror( + "Tshh, blocked!", + 'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.', + ); die(); } } public function drawerror($title, $error){ - return + echo $this->load("search.html", [ "class" => "", "right-left" => "", @@ -96,6 +105,23 @@ class frontend{ $error . '</div>' ]); + die(); + } + + public function drawscrapererror($error, $get, $target){ + + $this->drawerror( + "Shit", + 'This scraper returned an error:' . + '<div class="code">' . htmlspecialchars($error) . '</div>' . + 'Things you can try:' . + '<ul>' . + '<li>Use a different scraper</li>' . + '<li>Remove keywords that could cause errors</li>' . + '<li><a href="/instances?target=' . $target . "&" . $this->buildquery($get, false) . '">Try your search on another 4get instance</a></li>' . + '</ul><br>' . + 'If the error persists, please <a href="/about">contact the administrator</a>.' + ); } public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true, $customhtml = null){ @@ -819,30 +845,7 @@ class frontend{ public function getscraperfilters($page){ - $get_scraper = null; - - switch($page){ - - case "web": - $get_scraper = isset($_COOKIE["scraper_web"]) ? $_COOKIE["scraper_web"] : null; - break; - - case "images": - $get_scraper = isset($_COOKIE["scraper_images"]) ? $_COOKIE["scraper_images"] : null; - break; - - case "videos": - $get_scraper = isset($_COOKIE["scraper_videos"]) ? $_COOKIE["scraper_videos"] : null; - break; - - case "news": - $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null; - break; - - case "music": - $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null; - break; - } + $get_scraper = isset($_COOKIE["scraper_$page"]) ? $_COOKIE["scraper_$page"] : null; if( isset($_GET["scraper"]) && @@ -1148,32 +1151,8 @@ class frontend{ break; case "_SEARCH": - - // get search string & bang - $sanitized[$parameter] = trim($sanitized[$parameter]); - $sanitized["bang"] = ""; - - if( - strlen($sanitized[$parameter]) !== 0 && - $sanitized[$parameter][0] == "!" - ){ - - $sanitized[$parameter] = explode(" ", $sanitized[$parameter], 2); - - $sanitized["bang"] = trim($sanitized[$parameter][0]); - - if(count($sanitized[$parameter]) === 2){ - - $sanitized[$parameter] = trim($sanitized[$parameter][1]); - }else{ - - $sanitized[$parameter] = ""; - } - - $sanitized["bang"] = ltrim($sanitized["bang"], "!"); - } - - $sanitized[$parameter] = ltrim($sanitized[$parameter], "! \n\r\t\v\x00"); + // get search string + $sanitized["s"] = trim($sanitized[$parameter]); } } } diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php index 5c65417..cb5d38d 100644 --- a/lib/fuckhtml.php +++ b/lib/fuckhtml.php @@ -442,5 +442,3 @@ class fuckhtml{ return json_decode($json_out, true); } } - -?> diff --git a/lib/nextpage.php b/lib/nextpage.php deleted file mode 100644 index 7516667..0000000 --- a/lib/nextpage.php +++ /dev/null @@ -1,106 +0,0 @@ -<?php - -class nextpage{ - - public function __construct($scraper){ - - $this->scraper = $scraper; - } - - public function store($payload, $page){ - - $page = $page[0]; - $password = random_bytes(256); // 2048 bit - $salt = random_bytes(16); - $key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true); - $iv = - random_bytes( - openssl_cipher_iv_length("aes-256-gcm") - ); - - $tag = ""; - $out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16); - - $key = apcu_inc("key", 1); - - apcu_store( - $page . "." . - $this->scraper . - (string)$key, - gzdeflate($salt.$iv.$out.$tag), - 900 // cache information for 15 minutes blaze it - ); - - return - $this->scraper . $key . "." . - rtrim(strtr(base64_encode($password), '+/', '-_'), '='); - } - - public function get($npt, $page){ - - $page = $page[0]; - $explode = explode(".", $npt, 2); - - if(count($explode) !== 2){ - - throw new Exception("Malformed nextPageToken!"); - } - - $apcu = $page . "." . $explode[0]; - $key = $explode[1]; - - $payload = apcu_fetch($apcu); - - if($payload === false){ - - throw new Exception("The nextPageToken is invalid or has expired!"); - } - - $key = - base64_decode( - str_pad( - strtr($key, '-_', '+/'), - strlen($key) % 4, - '=', - STR_PAD_RIGHT - ) - ); - - $payload = gzinflate($payload); - - $key = - hash_pbkdf2( - "sha512", - $key, - substr($payload, 0, 16), // salt - 20000, - 32, - true - ); - $ivlen = openssl_cipher_iv_length("aes-256-gcm"); - - $payload = - openssl_decrypt( - substr( - $payload, - 16 + $ivlen, - -16 - ), - "aes-256-gcm", - $key, - OPENSSL_RAW_DATA, - substr($payload, 16, $ivlen), - substr($payload, -16) - ); - - if($payload === false){ - - throw new Exception("The nextPageToken is invalid or has expired!"); - } - - // remove the key after using - apcu_delete($apcu); - - return $payload; - } -} |