summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/backend.php197
-rw-r--r--lib/captcha_gen.php32
-rw-r--r--lib/curlproxy.php8
-rw-r--r--lib/frontend.php143
-rw-r--r--lib/fuckhtml.php2
-rw-r--r--lib/nextpage.php106
6 files changed, 281 insertions, 207 deletions
diff --git a/lib/backend.php b/lib/backend.php
new file mode 100644
index 0000000..209cfec
--- /dev/null
+++ b/lib/backend.php
@@ -0,0 +1,197 @@
+<?php
+class backend{
+
+ public function __construct($scraper){
+
+ $this->scraper = $scraper;
+ $this->requestid = apcu_inc("real_requests");
+ }
+
+ /*
+ Proxy stuff
+ */
+ public function get_ip(){
+
+ $pool = constant("config::PROXY_" . strtoupper($this->scraper));
+ if($pool === false){
+
+ // we don't want a proxy, fuck off!
+ return 'raw_ip::::';
+ }
+
+ // indent
+ $proxy_index_raw = apcu_inc("p." . $this->scraper);
+
+ $proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
+ $proxylist = explode("\n", $proxylist);
+
+ // ignore empty or commented lines
+ $proxylist = array_filter($proxylist, function($entry){
+ $entry = ltrim($entry);
+ return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
+ });
+
+ $proxylist = array_values($proxylist);
+
+ return $proxylist[$proxy_index_raw % count($proxylist)];
+ }
+
+ // this function is also called directly on nextpage
+ public function assign_proxy(&$curlproc, $ip){
+
+ // parse proxy line
+ [
+ $type,
+ $address,
+ $port,
+ $username,
+ $password
+ ] = explode(":", $ip, 5);
+
+ switch($type){
+
+ case "raw_ip":
+ return;
+ break;
+
+ case "http":
+ case "https":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
+ curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
+ break;
+
+ case "socks4":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
+ curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
+ break;
+
+ case "socks5":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
+ curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
+ break;
+
+ case "socks4a":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
+ curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
+ break;
+
+ case "socks5_hostname":
+ curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
+ curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
+ break;
+ }
+
+ if($username != ""){
+
+ curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
+ }
+ }
+
+
+
+ /*
+ Next page stuff
+ */
+ public function store($payload, $page, $proxy){
+
+ $page = $page[0];
+ $password = random_bytes(256); // 2048 bit
+ $salt = random_bytes(16);
+ $key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true);
+ $iv =
+ random_bytes(
+ openssl_cipher_iv_length("aes-256-gcm")
+ );
+
+ $tag = "";
+ $out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16);
+
+ $key = apcu_inc("key", 1);
+
+ apcu_store(
+ $page . "." .
+ $this->scraper .
+ $this->requestid,
+ gzdeflate($proxy . "," . $salt.$iv.$out.$tag),
+ 900 // cache information for 15 minutes blaze it
+ );
+
+ return
+ $this->scraper . $this->requestid . "." .
+ rtrim(strtr(base64_encode($password), '+/', '-_'), '=');
+ }
+
+ public function get($npt, $page){
+
+ $page = $page[0];
+ $explode = explode(".", $npt, 2);
+
+ if(count($explode) !== 2){
+
+ throw new Exception("Malformed nextPageToken!");
+ }
+
+ $apcu = $page . "." . $explode[0];
+ $key = $explode[1];
+
+ $payload = apcu_fetch($apcu);
+
+ if($payload === false){
+
+ throw new Exception("The nextPageToken is invalid or has expired!");
+ }
+
+ $key =
+ base64_decode(
+ str_pad(
+ strtr($key, '-_', '+/'),
+ strlen($key) % 4,
+ '=',
+ STR_PAD_RIGHT
+ )
+ );
+
+ $payload = gzinflate($payload);
+
+ // get proxy
+ [
+ $proxy,
+ $payload
+ ] = explode(",", $payload, 2);
+
+ $key =
+ hash_pbkdf2(
+ "sha512",
+ $key,
+ substr($payload, 0, 16), // salt
+ 20000,
+ 32,
+ true
+ );
+ $ivlen = openssl_cipher_iv_length("aes-256-gcm");
+
+ $payload =
+ openssl_decrypt(
+ substr(
+ $payload,
+ 16 + $ivlen,
+ -16
+ ),
+ "aes-256-gcm",
+ $key,
+ OPENSSL_RAW_DATA,
+ substr($payload, 16, $ivlen),
+ substr($payload, -16)
+ );
+
+ if($payload === false){
+
+ throw new Exception("The nextPageToken is invalid or has expired!");
+ }
+
+ // remove the key after using
+ apcu_delete($apcu);
+
+ return [$payload, $proxy];
+ }
+}
diff --git a/lib/captcha_gen.php b/lib/captcha_gen.php
index 80bc665..6728747 100644
--- a/lib/captcha_gen.php
+++ b/lib/captcha_gen.php
@@ -4,6 +4,19 @@ class captcha{
public function __construct($frontend, $get, $filters, $page, $output){
+ // check if we want captcha
+ if(config::BOT_PROTECTION !== 1){
+
+ if($output === true){
+ $frontend->loadheader(
+ $get,
+ $filters,
+ $page
+ );
+ }
+ return;
+ }
+
/*
Validate cookie, if it exists
*/
@@ -46,6 +59,7 @@ class captcha{
if($output === false){
+ http_response_code(429); // too many reqs
echo json_encode([
"status" => "The \"pass\" token in your cookies is missing or has expired!!"
]);
@@ -184,15 +198,6 @@ class captcha{
}
}
- /*
- Generate random grid data to pass to captcha.php
- */
- $dataset = [
- ["birds", 2263],
- ["fumo_plushies", 1006],
- ["minecraft", 848]
- ];
-
// get the positions for the answers
// will return between 3 and 6 answer positions
$range = range(0, 15);
@@ -216,17 +221,18 @@ class captcha{
}
// choose a dataset
- $choosen = &$dataset[random_int(0, count($dataset) - 1)];
+ $c = count(config::CAPTCHA_DATASET);
+ $choosen = config::CAPTCHA_DATASET[random_int(0, $c - 1)];
$choices = [];
- for($i=0; $i<count($dataset); $i++){
+ for($i=0; $i<$c; $i++){
- if($dataset[$i][0] == $choosen[0]){
+ if(config::CAPTCHA_DATASET[$i][0] == $choosen[0]){
continue;
}
- $choices[] = $dataset[$i];
+ $choices[] = config::CAPTCHA_DATASET[$i];
}
// generate grid data
diff --git a/lib/curlproxy.php b/lib/curlproxy.php
index ef9085b..f1ce2a7 100644
--- a/lib/curlproxy.php
+++ b/lib/curlproxy.php
@@ -152,7 +152,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
+ "User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
@@ -180,7 +180,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
+ "User-Agent: " . config::USER_AGENT,
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate",
@@ -379,7 +379,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
+ "User-Agent: " . config::USER_AGENT,
"Accept: image/avif,image/webp,*/*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
@@ -395,7 +395,7 @@ class proxy{
$curl,
CURLOPT_HTTPHEADER,
[
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
+ "User-Agent: " . config::USER_AGENT,
"Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip, deflate, br",
diff --git a/lib/frontend.php b/lib/frontend.php
index 97c8c5b..0f9f95d 100644
--- a/lib/frontend.php
+++ b/lib/frontend.php
@@ -4,6 +4,41 @@ class frontend{
public function load($template, $replacements = []){
+ $replacements["server_name"] = htmlspecialchars(config::SERVER_NAME);
+ $replacements["version"] = config::VERSION;
+
+ if(isset($_COOKIE["theme"])){
+
+ $theme = str_replace(["/". "."], "", $_COOKIE["theme"]);
+
+ if(
+ $theme != "Dark" &&
+ !is_file("static/themes/" . $theme . ".css")
+ ){
+
+ $theme = config::DEFAULT_THEME;
+ }
+ }else{
+
+ $theme = config::DEFAULT_THEME;
+ }
+
+ if($theme != "Dark"){
+
+ $replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . $theme . '.css?v' . config::VERSION . '">';
+ }else{
+
+ $replacements["style"] = "";
+ }
+
+ if(isset($_COOKIE["scraper_ac"])){
+
+ $replacements["ac"] = '?ac=' . htmlspecialchars($_COOKIE["scraper_ac"]);
+ }else{
+
+ $replacements["ac"] = '';
+ }
+
$handle = fopen("template/{$template}", "r");
$data = fread($handle, filesize("template/{$template}"));
fclose($handle);
@@ -29,30 +64,6 @@ class frontend{
return trim($html);
}
- public function getthemeclass($raw = true){
-
- if(
- isset($_COOKIE["theme"]) &&
- $_COOKIE["theme"] == "cream"
- ){
-
- $body_class = "theme-white ";
- }else{
-
- $body_class = "";
- }
-
- if(
- $raw &&
- $body_class != ""
- ){
-
- return ' class="' . rtrim($body_class) . '"';
- }
-
- return $body_class;
- }
-
public function loadheader(array $get, array $filters, string $page){
echo
@@ -62,8 +73,7 @@ class frontend{
"index" => "no",
"search" => htmlspecialchars($get["s"]),
"tabs" => $this->generatehtmltabs($page, $get["s"]),
- "filters" => $this->generatehtmlfilters($filters, $get),
- "body_class" => $this->getthemeclass()
+ "filters" => $this->generatehtmlfilters($filters, $get)
]);
if(
@@ -74,18 +84,17 @@ class frontend{
){
// bot detected !!
- echo
- $this->drawerror(
- "Tshh, blocked!",
- 'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.',
- );
+ $this->drawerror(
+ "Tshh, blocked!",
+ 'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.',
+ );
die();
}
}
public function drawerror($title, $error){
- return
+ echo
$this->load("search.html", [
"class" => "",
"right-left" => "",
@@ -96,6 +105,23 @@ class frontend{
$error .
'</div>'
]);
+ die();
+ }
+
+ public function drawscrapererror($error, $get, $target){
+
+ $this->drawerror(
+ "Shit",
+ 'This scraper returned an error:' .
+ '<div class="code">' . htmlspecialchars($error) . '</div>' .
+ 'Things you can try:' .
+ '<ul>' .
+ '<li>Use a different scraper</li>' .
+ '<li>Remove keywords that could cause errors</li>' .
+ '<li><a href="/instances?target=' . $target . "&" . $this->buildquery($get, false) . '">Try your search on another 4get instance</a></li>' .
+ '</ul><br>' .
+ 'If the error persists, please <a href="/about">contact the administrator</a>.'
+ );
}
public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true, $customhtml = null){
@@ -819,30 +845,7 @@ class frontend{
public function getscraperfilters($page){
- $get_scraper = null;
-
- switch($page){
-
- case "web":
- $get_scraper = isset($_COOKIE["scraper_web"]) ? $_COOKIE["scraper_web"] : null;
- break;
-
- case "images":
- $get_scraper = isset($_COOKIE["scraper_images"]) ? $_COOKIE["scraper_images"] : null;
- break;
-
- case "videos":
- $get_scraper = isset($_COOKIE["scraper_videos"]) ? $_COOKIE["scraper_videos"] : null;
- break;
-
- case "news":
- $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null;
- break;
-
- case "music":
- $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null;
- break;
- }
+ $get_scraper = isset($_COOKIE["scraper_$page"]) ? $_COOKIE["scraper_$page"] : null;
if(
isset($_GET["scraper"]) &&
@@ -1148,32 +1151,8 @@ class frontend{
break;
case "_SEARCH":
-
- // get search string & bang
- $sanitized[$parameter] = trim($sanitized[$parameter]);
- $sanitized["bang"] = "";
-
- if(
- strlen($sanitized[$parameter]) !== 0 &&
- $sanitized[$parameter][0] == "!"
- ){
-
- $sanitized[$parameter] = explode(" ", $sanitized[$parameter], 2);
-
- $sanitized["bang"] = trim($sanitized[$parameter][0]);
-
- if(count($sanitized[$parameter]) === 2){
-
- $sanitized[$parameter] = trim($sanitized[$parameter][1]);
- }else{
-
- $sanitized[$parameter] = "";
- }
-
- $sanitized["bang"] = ltrim($sanitized["bang"], "!");
- }
-
- $sanitized[$parameter] = ltrim($sanitized[$parameter], "! \n\r\t\v\x00");
+ // get search string
+ $sanitized["s"] = trim($sanitized[$parameter]);
}
}
}
diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php
index 5c65417..cb5d38d 100644
--- a/lib/fuckhtml.php
+++ b/lib/fuckhtml.php
@@ -442,5 +442,3 @@ class fuckhtml{
return json_decode($json_out, true);
}
}
-
-?>
diff --git a/lib/nextpage.php b/lib/nextpage.php
deleted file mode 100644
index 7516667..0000000
--- a/lib/nextpage.php
+++ /dev/null
@@ -1,106 +0,0 @@
-<?php
-
-class nextpage{
-
- public function __construct($scraper){
-
- $this->scraper = $scraper;
- }
-
- public function store($payload, $page){
-
- $page = $page[0];
- $password = random_bytes(256); // 2048 bit
- $salt = random_bytes(16);
- $key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true);
- $iv =
- random_bytes(
- openssl_cipher_iv_length("aes-256-gcm")
- );
-
- $tag = "";
- $out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16);
-
- $key = apcu_inc("key", 1);
-
- apcu_store(
- $page . "." .
- $this->scraper .
- (string)$key,
- gzdeflate($salt.$iv.$out.$tag),
- 900 // cache information for 15 minutes blaze it
- );
-
- return
- $this->scraper . $key . "." .
- rtrim(strtr(base64_encode($password), '+/', '-_'), '=');
- }
-
- public function get($npt, $page){
-
- $page = $page[0];
- $explode = explode(".", $npt, 2);
-
- if(count($explode) !== 2){
-
- throw new Exception("Malformed nextPageToken!");
- }
-
- $apcu = $page . "." . $explode[0];
- $key = $explode[1];
-
- $payload = apcu_fetch($apcu);
-
- if($payload === false){
-
- throw new Exception("The nextPageToken is invalid or has expired!");
- }
-
- $key =
- base64_decode(
- str_pad(
- strtr($key, '-_', '+/'),
- strlen($key) % 4,
- '=',
- STR_PAD_RIGHT
- )
- );
-
- $payload = gzinflate($payload);
-
- $key =
- hash_pbkdf2(
- "sha512",
- $key,
- substr($payload, 0, 16), // salt
- 20000,
- 32,
- true
- );
- $ivlen = openssl_cipher_iv_length("aes-256-gcm");
-
- $payload =
- openssl_decrypt(
- substr(
- $payload,
- 16 + $ivlen,
- -16
- ),
- "aes-256-gcm",
- $key,
- OPENSSL_RAW_DATA,
- substr($payload, 16, $ivlen),
- substr($payload, -16)
- );
-
- if($payload === false){
-
- throw new Exception("The nextPageToken is invalid or has expired!");
- }
-
- // remove the key after using
- apcu_delete($apcu);
-
- return $payload;
- }
-}