path: root/lib
diff options
authorlolcat <>2023-07-22 14:41:14 -0400
committerlolcat <>2023-07-22 14:41:14 -0400
commitbca265aea67ec62499aaa113a6490ce9ec7fe730 (patch)
tree3f05ec5ea542e41b474947e180034f42e99648e9 /lib
still missing things on google scraper
Diffstat (limited to 'lib')
-rw-r--r--lib/classic.pngbin0 -> 7623 bytes
-rw-r--r--lib/favicon404.pngbin0 -> 807 bytes
-rw-r--r--lib/img404.pngbin0 -> 4549 bytes
9 files changed, 2677 insertions, 0 deletions
diff --git a/lib/bingcache-todo-fix.php b/lib/bingcache-todo-fix.php
new file mode 100644
index 0000000..a4acb5b
--- /dev/null
+++ b/lib/bingcache-todo-fix.php
@@ -0,0 +1,144 @@
+// <div class="b_attribution" u="0N|5119|4769685974291356|tEsWuE7HW3Z5AIPQMVkDH4WaotS4LrK-" tabindex="0">
+new bingcache();
+class bingcache{
+ public function __construct(){
+ if(
+ !isset($_GET["s"]) ||
+ $this->validate_url($_GET["s"]) === false
+ ){
+ var_dump($this->validate_url($_GET["s"]));
+ $this->do404("Please provide a valid URL.");
+ }
+ $url = $_GET["s"];
+ $curlproc = curl_init();
+ curl_setopt(
+ $curlproc,
+ "" .
+ urlencode($url)
+ );
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt(
+ $curlproc,
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 5);
+ $data = curl_exec($curlproc);
+ if(curl_errno($curlproc)){
+ $this->do404("Failed to connect to bing servers. Please try again later.");
+ }
+ curl_close($curlproc);
+ preg_match(
+ '/<div class="b_attribution" u="(.*)" tabindex="0">/',
+ $data,
+ $keys
+ );
+ print_r($keys);
+ if(count($keys) === 0){
+ $this->do404("Bing has not archived this URL.");
+ }
+ $keys = explode("|", $keys[1]);
+ $count = count($keys);
+ //header("Location:" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
+ echo("Location:" . $keys[$count - 2] . "&w=" . $keys[$count - 1]);
+ }
+ public function do404($text){
+ include "lib/frontend.php";
+ $frontend = new frontend();
+ echo
+ $frontend->load(
+ "error.html",
+ [
+ "title" => "Shit",
+ "text" => $text
+ ]
+ );
+ die();
+ }
+ public function validate_url($url){
+ $url_parts = parse_url($url);
+ // check if required parts are there
+ if(
+ !isset($url_parts["scheme"]) ||
+ !(
+ $url_parts["scheme"] == "http" ||
+ $url_parts["scheme"] == "https"
+ ) ||
+ !isset($url_parts["host"])
+ ){
+ return false;
+ }
+ if(
+ // if its not an RFC-valid URL
+ !filter_var($url, FILTER_VALIDATE_URL)
+ ){
+ return false;
+ }
+ $ip =
+ str_replace(
+ ["[", "]"], // handle ipv6
+ "",
+ $url_parts["host"]
+ );
+ // if its not an IP
+ if(!filter_var($ip, FILTER_VALIDATE_IP)){
+ // resolve domain's IP
+ $ip = gethostbyname($url_parts["host"] . ".");
+ }
+ // check if its localhost
+ return filter_var(
+ $ip,
+ );
+ }
diff --git a/lib/classic.png b/lib/classic.png
new file mode 100644
index 0000000..3d2b8fc
--- /dev/null
+++ b/lib/classic.png
Binary files differ
diff --git a/lib/curlproxy.php b/lib/curlproxy.php
new file mode 100644
index 0000000..846fbb7
--- /dev/null
+++ b/lib/curlproxy.php
@@ -0,0 +1,652 @@
+class proxy{
+ public const req_web = 0;
+ public const req_image = 1;
+ public function __construct($cache = true){
+ $this->cache = $cache;
+ }
+ public function do404(){
+ http_response_code(404);
+ header("Content-Type: image/png");
+ $handle = fopen("lib/img404.png", "r");
+ echo fread($handle, filesize("lib/img404.png"));
+ fclose($handle);
+ die();
+ return;
+ }
+ public function getabsoluteurl($path, $relative){
+ if($this->validateurl($path)){
+ return $path;
+ }
+ if(substr($path, 0, 2) == "//"){
+ return "https:" . $path;
+ }
+ $url = null;
+ $relative = parse_url($relative);
+ $url = $relative["scheme"] . "://";
+ if(
+ isset($relative["user"]) &&
+ isset($relative["pass"])
+ ){
+ $url .= $relative["user"] . ":" . $relative["pass"] . "@";
+ }
+ $url .= $relative["host"];
+ if(isset($relative["path"])){
+ $relative["path"] = explode(
+ "/",
+ $relative["path"]
+ );
+ unset($relative["path"][count($relative["path"]) - 1]);
+ $relative["path"] = implode("/", $relative["path"]);
+ $url .= $relative["path"];
+ }
+ if(
+ strlen($path) !== 0 &&
+ $path[0] !== "/"
+ ){
+ $url .= "/";
+ }
+ $url .= $path;
+ return $url;
+ }
+ public function validateurl($url){
+ $url_parts = parse_url($url);
+ // check if required parts are there
+ if(
+ !isset($url_parts["scheme"]) ||
+ !(
+ $url_parts["scheme"] == "http" ||
+ $url_parts["scheme"] == "https"
+ ) ||
+ !isset($url_parts["host"])
+ ){
+ return false;
+ }
+ $ip =
+ str_replace(
+ ["[", "]"], // handle ipv6
+ "",
+ $url_parts["host"]
+ );
+ // if its not an IP
+ if(!filter_var($ip, FILTER_VALIDATE_IP)){
+ // resolve domain's IP
+ $ip = gethostbyname($url_parts["host"] . ".");
+ }
+ // check if its localhost
+ if(
+ filter_var(
+ $ip,
+ ) === false
+ ){
+ return false;
+ }
+ return true;
+ }
+ public function get($url, $reqtype = self::req_web, $acceptallcodes = false, $referer = null, $redirectcount = 0){
+ if($redirectcount === 5){
+ throw new Exception("Too many redirects");
+ }
+ // sanitize URL
+ try{
+ $this->validateurl($url);
+ }catch(Exception $error){
+ throw new Exception($error->getMessage());
+ }
+ $this->clientcache();
+ $curl = curl_init();
+ curl_setopt($curl, CURLOPT_URL, $url);
+ curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curl, CURLOPT_HEADER, 1);
+ switch($reqtype){
+ case self::req_web:
+ curl_setopt(
+ $curl,
+ [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ]
+ );
+ break;
+ case self::req_image:
+ if($referer === null){
+ $referer = explode("/", $url, 4);
+ array_pop($referer);
+ $referer = implode("/", $referer);
+ }
+ curl_setopt(
+ $curl,
+ [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0",
+ "Accept: image/avif,image/webp,*/*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Referer: {$referer}"
+ ]
+ );
+ break;
+ }
+ curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curl, CURLOPT_TIMEOUT, 30);
+ // limit size of payloads
+ curl_setopt($curl, CURLOPT_BUFFERSIZE, 1024);
+ curl_setopt($curl, CURLOPT_NOPROGRESS, false);
+ curl_setopt(
+ $curl,
+ function($downloadsize, $downloaded, $uploadsize, $uploaded
+ ){
+ // if $downloaded exceeds 100MB, fuck off
+ return ($downloaded > 100000000) ? 1 : 0;
+ });
+ $body = curl_exec($curl);
+ if(curl_errno($curl)){
+ throw new Exception(curl_error($curl));
+ }
+ curl_close($curl);
+ $headers = [];
+ $http = null;
+ while(true){
+ $header = explode("\n", $body, 2);
+ $body = $header[1];
+ if($http === null){
+ // http/1.1 200 ok
+ $header = explode("/", $header[0], 2);
+ $header = explode(" ", $header[1], 3);
+ $http = [
+ "version" => (float)$header[0],
+ "code" => (int)$header[1]
+ ];
+ continue;
+ }
+ if(trim($header[0]) == ""){
+ // reached end of headers
+ break;
+ }
+ $header = explode(":", $header[0], 2);
+ // malformed headers
+ if(count($header) !== 2){ continue; }
+ $headers[strtolower(trim($header[0]))] = trim($header[1]);
+ }
+ // check http code
+ if(
+ $http["code"] >= 300 &&
+ $http["code"] <= 309
+ ){
+ // redirect
+ if(!isset($headers["location"])){
+ throw new Exception("Broken redirect");
+ }
+ $redirectcount++;
+ return $this->get($this->getabsoluteurl($headers["location"], $url), $reqtype, $acceptallcodes, $referer, $redirectcount);
+ }else{
+ if(
+ $acceptallcodes === false &&
+ $http["code"] > 300
+ ){
+ throw new Exception("Remote server returned an error code! ({$http["code"]})");
+ }
+ }
+ // check if data is okay
+ switch($reqtype){
+ case self::req_image:
+ $format = false;
+ if(isset($headers["content-type"])){
+ if($headers["content-type"] == "text/html"){
+ throw new Exception("Server returned an html document instead of image");
+ }
+ $tmp = explode(";", $headers["content-type"]);
+ for($i=0; $i<count($tmp); $i++){
+ if(
+ preg_match(
+ '/^image\/([^ ]+)/i',
+ $tmp[$i],
+ $match
+ )
+ ){
+ $format = strtolower($match[1]);
+ if(substr($format, 0, 2) == "x-"){
+ $format = substr($format, 2);
+ }
+ break;
+ }
+ }
+ }
+ return [
+ "http" => $http,
+ "format" => $format,
+ "headers" => $headers,
+ "body" => $body
+ ];
+ break;
+ default:
+ return [
+ "http" => $http,
+ "headers" => $headers,
+ "body" => $body
+ ];
+ break;
+ }
+ return;
+ }
+ public function stream_linear_image($url, $referer = null){
+ $this->stream($url, $referer, "image");
+ }
+ public function stream_linear_audio($url, $referer = null){
+ $this->stream($url, $referer, "audio");
+ }
+ private function stream($url, $referer, $format){
+ $this->url = $url;
+ $this->format = $format;
+ // sanitize URL
+ try{
+ $this->validateurl($url);
+ }catch(Exception $error){
+ throw new Exception($error->getMessage());
+ }
+ $this->clientcache();
+ $curl = curl_init();
+ // set headers
+ if($referer === null){
+ $referer = explode("/", $url, 4);
+ array_pop($referer);
+ $referer = implode("/", $referer);
+ }
+ switch($format){
+ case "image":
+ curl_setopt(
+ $curl,
+ [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: image/avif,image/webp,*/*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Referer: {$referer}"
+ ]
+ );
+ break;
+ case "audio":
+ curl_setopt(
+ $curl,
+ [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip, deflate, br",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Referer: {$referer}"
+ ]
+ );
+ break;
+ }
+ // follow redirects
+ curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
+ curl_setopt($curl, CURLOPT_MAXREDIRS, 5);
+ curl_setopt($curl, CURLOPT_AUTOREFERER, 5);
+ // set url
+ curl_setopt($curl, CURLOPT_URL, $url);
+ curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
+ // timeout + disable ssl
+ curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
+ curl_setopt($curl, CURLOPT_TIMEOUT, 30);
+ curl_setopt(
+ $curl,
+ function($c, $data){
+ if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
+ throw new Exception("Serber returned a non-200 code");
+ }
+ echo $data;
+ return strlen($data);
+ }
+ );
+ $this->empty_header = false;
+ $this->cont = false;
+ $this->headers_tmp = [];
+ $this->headers = [];
+ curl_setopt(
+ $curl,
+ function($c, $header){
+ $head = trim($header);
+ $len = strlen($head);
+ if($len === 0){
+ $this->empty_header = true;
+ $this->headers_tmp = [];
+ }else{
+ $this->empty_header = false;
+ $this->headers_tmp[] = $head;
+ }
+ foreach($this->headers_tmp as $h){
+ // parse headers
+ $h = explode(":", $h, 2);
+ if(count($h) !== 2){
+ if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
+ // not HTTP 200, probably a redirect
+ $this->cont = false;
+ }else{
+ $this->cont = true;
+ }
+ // is HTTP 200, just ignore that line
+ continue;
+ }
+ $this->headers[strtolower(trim($h[0]))] = trim($h[1]);
+ }
+ if(
+ $this->cont &&
+ $this->empty_header
+ ){
+ // get content type
+ if(isset($this->headers["content-type"])){
+ $filetype = explode("/", $this->headers["content-type"]);
+ if(strtolower($filetype[0]) != $this->format){
+ throw new Exception("Resource is not an {$this->format} (Found {$filetype[0]} instead)");
+ }
+ }else{
+ throw new Exception("Resource is not an {$this->format} (no Content-Type)");
+ }
+ header("Content-Type: {$this->format}/{$filetype[1]}");
+ // give payload size
+ if(isset($this->headers["content-length"])){
+ header("Content-Length: {$this->headers["content-length"]}");
+ }
+ // give filename
+ $this->getfilenameheader($this->headers, $this->url, $filetype[1]);
+ }
+ return strlen($header);
+ }
+ );
+ curl_exec($curl);
+ if(curl_errno($curl)){
+ throw new Exception(curl_error($curl));
+ }
+ curl_close($curl);
+ }
+ public function getfilenameheader($headers, $url, $filetype = "jpg"){
+ // get filename from content-disposition header
+ if(isset($headers["content-disposition"])){
+ preg_match(
+ '/filename=([^;]+)/',
+ $headers["content-disposition"],
+ $filename
+ );
+ if(isset($filename[1])){
+ header("Content-Disposition: filename=" . $filename[1] . "." . $filetype);
+ return;
+ }
+ }
+ // get filename from URL
+ $filename = parse_url($url, PHP_URL_PATH);
+ if($filename === null){
+ // everything failed! rename file to domain name
+ header("Content-Disposition: filename=" . parse_url($url, PHP_URL_HOST) . "." . $filetype);
+ return;
+ }
+ // remove extension from filename
+ $filename =
+ explode(
+ ".",
+ basename($filename)
+ );
+ if(count($filename) > 1){
+ array_pop($filename);
+ }
+ $filename = implode(".", $filename);
+ header("Content-Disposition: inline; filename=" . $filename . "." . $filetype);
+ return;
+ }
+ public function getimageformat($payload, &$imagick){
+ $finfo = new finfo(FILEINFO_MIME_TYPE);
+ $format = $finfo->buffer($payload["body"]);
+ if($format === false){
+ if($payload["format"] === false){
+ header("X-Error: Could not parse format");
+ $this->favicon404();
+ }
+ $format = $payload["format"];
+ }else{
+ $format_tmp = explode("/", $format, 2);
+ if($format_tmp[0] == "image"){
+ $format_tmp = strtolower($format_tmp[1]);
+ if(substr($format_tmp, 0, 2) == "x-"){
+ $format_tmp = substr($format_tmp, 2);
+ }
+ $format = $format_tmp;
+ }
+ }
+ switch($format){
+ case "tiff": $format = "gif"; break;
+ case "": $format = "ico"; break;
+ case "icon": $format = "ico"; break;
+ case "svg+xml": $format = "svg"; break;
+ }
+ $imagick = new Imagick();
+ if(
+ !in_array(
+ $format,
+ array_map("strtolower", $imagick->queryFormats())
+ )
+ ){
+ // format could not be found, but imagemagick can
+ // sometimes detect it? shit's fucked
+ $format = false;
+ }
+ return $format;
+ }
+ public function clientcache(){
+ if($this->cache === false){
+ return;
+ }
+ header("Last-Modified: Thu, 01 Oct 1970 00:00:00 GMT");
+ $headers = getallheaders();
+ if(
+ isset($headers["If-Modified-Since"]) ||
+ isset($headers["If-Unmodified-Since"])
+ ){
+ http_response_code(304); // 304: Not Modified
+ die();
+ }
+ }
diff --git a/lib/favicon404.png b/lib/favicon404.png
new file mode 100644
index 0000000..7540694
--- /dev/null
+++ b/lib/favicon404.png
Binary files differ
diff --git a/lib/frontend.php b/lib/frontend.php
new file mode 100644
index 0000000..3be912b
--- /dev/null
+++ b/lib/frontend.php
@@ -0,0 +1,1282 @@
+class frontend{
+ public function load($template, $replacements = []){
+ $handle = fopen("template/{$template}", "r");
+ $data = fread($handle, filesize("template/{$template}"));
+ fclose($handle);
+ $data = explode("\n", $data);
+ $html = "";
+ for($i=0; $i<count($data); $i++){
+ $html .= trim($data[$i]);
+ }
+ foreach($replacements as $key => $value){
+ $html =
+ str_replace(
+ "{%{$key}%}",
+ $value,
+ $html
+ );
+ }
+ return trim($html);
+ }
+ public function getthemeclass($raw = true){
+ if(
+ isset($_COOKIE["theme"]) &&
+ $_COOKIE["theme"] == "cream"
+ ){
+ $body_class = "theme-white ";
+ }else{
+ $body_class = "";
+ }
+ if(
+ $raw &&
+ $body_class != ""
+ ){
+ return ' class="' . rtrim($body_class) . '"';
+ }
+ return $body_class;
+ }
+ public function loadheader(array $get, array $filters, string $page){
+ echo
+ $this->load("header.html", [
+ "title" => trim($get["s"] . " ({$page})"),
+ "description" => ucfirst($page) . ' search results for &quot;' . htmlspecialchars($get["s"]) . '&quot;',
+ "index" => "no",
+ "search" => htmlspecialchars($get["s"]),
+ "tabs" => $this->generatehtmltabs($page, $get["s"]),
+ "filters" => $this->generatehtmlfilters($filters, $get),
+ "body_class" => $this->getthemeclass()
+ ]);
+ if(
+ preg_match(
+ '/bot|wget|curl|python-requests|scrapy|feedfetcher|go-http-client|ruby|universalfeedparser|yahoo\! slurp|spider|rss/i',
+ )
+ ){
+ // bot detected !!
+ echo
+ $this->drawerror(
+ "Tshh, blocked!",
+ 'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.',
+ );
+ die();
+ }
+ }
+ public function drawerror($title, $error){
+ return
+ $this->load("search.html", [
+ "class" => "",
+ "right-left" => "",
+ "right-right" => "",
+ "left" =>
+ '<div class="infobox">' .
+ '<h1>' . htmlspecialchars($title) . '</h1>' .
+ $error .
+ '</div>'
+ ]);
+ }
+ public function drawtextresult($site, $greentext = null, $duration = null, $keywords, $tabindex = true){
+ $payload =
+ '<div class="text-result">';
+ // add favicon, link and archive links
+ $payload .= $this->drawlink($site["url"]);
+ /*
+ Draw title + description + filetype
+ */
+ $payload .=
+ '<a href="' . htmlspecialchars($site["url"]) . '" class="hover" rel="noreferrer nofollow"';
+ if($tabindex === false){
+ $payload .= ' tabindex="-1"';
+ }
+ $payload .= '>';
+ if($site["thumb"]["url"] !== null){
+ $payload .=
+ '<div class="thumb-wrap';
+ switch($site["thumb"]["ratio"]){
+ case "16:9":
+ $size = "landscape";
+ break;
+ case "9:16":
+ $payload .= " portrait";
+ $size = "portrait";
+ break;
+ case "1:1":
+ $payload .= " square";
+ $size = "square";
+ break;
+ }
+ $payload .=
+ '">' .
+ '<img class="thumb" src="/proxy?i=' . urlencode($site["thumb"]["url"]) . '&s=' . $size . '" alt="thumb">';
+ if($duration !== null){
+ $payload .=
+ '<div class="duration">' .
+ htmlspecialchars($duration) .
+ '</div>';
+ }
+ $payload .=
+ '</div>';
+ }
+ $payload .=
+ '<div class="title">';
+ if(
+ isset($site["type"]) &&
+ $site["type"] != "web"
+ ){
+ $payload .= '<div class="type">' . strtoupper($site["type"]) . '</div>';
+ }
+ $payload .=
+ htmlspecialchars($site["title"]) .
+ '</div>';
+ if($greentext !== null){
+ $payload .=
+ '<div class="greentext">' .
+ htmlspecialchars($greentext) .
+ '</div>';
+ }
+ if($site["description"] !== null){
+ $payload .=
+ '<div class="description">' .
+ $this->highlighttext($keywords, $site["description"]) .
+ '</div>';
+ }
+ $payload .= '</a>';
+ /*
+ Sublinks
+ */
+ if(
+ isset($site["sublink"]) &&
+ !empty($site["sublink"])
+ ){
+ usort($site["sublink"], function($a, $b){
+ return strlen($a["description"]) > strlen($b["description"]);
+ });
+ $payload .=
+ '<div class="sublinks">' .
+ '<table>';
+ $opentr = false;
+ for($i=0; $i<count($site["sublink"]); $i++){
+ if(($i % 2) === 0){
+ $opentr = true;
+ $payload .= '<tr>';
+ }else{
+ $opentr = false;
+ }
+ $payload .=
+ '<td>' .
+ '<a href="' . htmlspecialchars($site["sublink"][$i]["url"]) . '" rel="noreferrer nofollow">' .
+ '<div class="title">' .
+ htmlspecialchars($site["sublink"][$i]["title"]) .
+ '</div>';
+ if(!empty($site["sublink"][$i]["date"])){
+ $payload .=
+ '<div class="greentext">' .
+ date("jS M y @ g:ia", $site["sublink"][$i]["date"]) .
+ '</div>';
+ }
+ if(!empty($site["sublink"][$i]["description"])){
+ $payload .=
+ '<div class="description">' .
+ $this->highlighttext($keywords, $site["sublink"][$i]["description"]) .
+ '</div>';
+ }
+ $payload .= '</a></td>';
+ if($opentr === false){
+ $payload .= '</tr>';
+ }
+ }
+ if($opentr === true){
+ $payload .= '<td></td></tr>';
+ }
+ $payload .= '</table></div>';
+ }
+ if(
+ isset($site["table"]) &&
+ !empty($site["table"])
+ ){
+ $payload .= '<table class="info-table">';
+ foreach($site["table"] as $title => $value){
+ $payload .=
+ '<tr>' .
+ '<td>' . htmlspecialchars($title) . '</td>' .
+ '<td>' . htmlspecialchars($value) . '</td>' .
+ '</tr>';
+ }
+ $payload .= '</table>';
+ }
+ return $payload . '</div>';
+ }
+ public function highlighttext($keywords, $text){
+ $text = htmlspecialchars($text);
+ $keywords = explode(" ", $keywords);
+ $regex = [];
+ foreach($keywords as $word){
+ $regex[] = "\b" . preg_quote($word, "/") . "\b";
+ }
+ $regex = "/" . implode("|", $regex) . "/i";
+ return
+ preg_replace(
+ $regex,
+ '<b>${0}</b>',
+ $text
+ );
+ }
+ function highlightcode($text){
+ //
+ ini_set("highlight.comment", "c-comment");
+ ini_set("highlight.default", "c-default");
+ ini_set("highlight.html", "c-default");
+ ini_set("highlight.keyword", "c-keyword");
+ ini_set("highlight.string", "c-string");
+ $text =
+ trim(
+ preg_replace(
+ '/<\/span>$/',
+ "", // remove stray ending span because of the <?php stuff
+ str_replace(
+ [
+ '<br />',
+ '&nbsp;'
+ ],
+ [
+ "\n", // replace <br> with newlines
+ " " // replace html entity to space
+ ],
+ str_replace(
+ [
+ // leading <?php garbage
+ "<span style=\"color: c-default\">\n&lt;?php&nbsp;",
+ "<code>",
+ "</code>"
+ ],
+ "",
+ highlight_string("<?php " . $text, true)
+ )
+ )
+ )
+ );
+ // replace colors
+ $classes = ["c-comment", "c-default", "c-keyword", "c-string"];
+ foreach($classes as $class){
+ $text = str_replace('<span style="color: ' . $class . '">', '<span class="' . $class . '">', $text);
+ }
+ return $text;
+ }
+ public function drawlink($link){
+ /*
+ Add favicon
+ */
+ $host = parse_url($link);
+ $esc =
+ explode(
+ ".",
+ $host["host"],
+ 2
+ );
+ if(
+ count($esc) === 2 &&
+ $esc[0] == "www"
+ ){
+ $esc = $esc[1];
+ }else{
+ $esc = $esc[0];
+ }
+ $esc = substr($esc, 0, 2);
+ $urlencode = urlencode($link);
+ $payload =
+ '<div class="url">' .
+ '<button class="favicon" tabindex="-1">' .
+ '<img src="/favicon?s=' . htmlspecialchars($host["scheme"] . "://" . $host["host"]) . '" alt="' . htmlspecialchars($esc) . '">' .
+ //'<img src="/404.php" alt="' . htmlspecialchars($esc) . '">' .
+ '</button>' .
+ '<div class="favicon-dropdown">';
+ /*
+ Add archive links
+ */
+ if(
+ $host["host"] == "" ||
+ $host["host"] == ""
+ ){
+ $archives = [];
+ $path = explode("/", $host["path"]);
+ $count = count($path);
+ // /pol/thread/417568063/post-shitty-memes-if-you-want-to
+ if($count !== 0){
+ $isboard = true;
+ switch($path[1]){
+ case "con":
+ break;
+ case "q":
+ $archives[] = "";
+ break;
+ case "qa":
+ $archives[] = "";
+ break;
+ case "qb":
+ $archives[] = "";
+ break;
+ case "trash":
+ $archives[] = "";
+ break;
+ case "a":
+ $archives[] = "";
+ break;
+ case "c":
+ $archives[] = "";
+ break;
+ case "w":
+ break;
+ case "m":
+ $archives[] = "";
+ break;
+ case "cgl":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "cm":
+ $archives[] = "";
+ break;
+ case "f":
+ $archives[] = "";
+ break;
+ case "n":
+ break;
+ case "jp":
+ $archives[] = "";
+ break;
+ case "vt":
+ $archives[] = "";
+ break;
+ case "v":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "vg":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "vm":
+ $archives[] = "";
+ break;
+ case "vmg":
+ $archives[] = "";
+ break;
+ case "vp":
+ $archives[] = "";
+ break;
+ case "vr":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "vrpg":
+ $archives[] = "";
+ break;
+ case "vst":
+ $archives[] = "";
+ break;
+ case "co":
+ $archives[] = "";
+ break;
+ case "g":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "tv":
+ $archives[] = "";
+ break;
+ case "k":
+ $archives[] = "";
+ break;
+ case "o":
+ $archives[] = "";
+ break;
+ case "an":
+ $archives[] = "";
+ break;
+ case "tg":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "sp":
+ $archives[] = "";
+ break;
+ case "xs":
+ $archives[] = "";
+ break;
+ case "pw":
+ break;
+ case "sci":
+ $archives[] = "";
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "his":
+ $archives[] = "";
+ break;
+ case "int":
+ $archives[] = "";
+ break;
+ case "out":
+ break;
+ case "toy":
+ break;
+ case "i":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "po":
+ break;
+ case "p":
+ break;
+ case "ck":
+ $archives[] = "";
+ break;
+ case "ic":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "wg":
+ break;
+ case "lit":
+ $archives[] = "";
+ break;
+ case "mu":
+ $archives[] = "";
+ break;
+ case "fa":
+ $archives[] = "";
+ break;
+ case "3":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "gd":
+ break;
+ case "diy":
+ $archives[] = "";
+ break;
+ case "wsg":
+ $archives[] = "";
+ break;
+ case "qst":
+ break;
+ case "biz":
+ $archives[] = "";
+ break;
+ case "trv":
+ $archives[] = "";
+ break;
+ case "fit":
+ $archives[] = "";
+ break;
+ case "x":
+ $archives[] = "";
+ break;
+ case "adv":
+ $archives[] = "";
+ break;
+ case "lgbt":
+ $archives[] = "";
+ break;
+ case "mlp":
+ $archives[] = "";
+ $archives[] = "";
+ break;
+ case "news":
+ break;
+ case "wsr":
+ break;
+ case "vip":
+ break;
+ case "b":
+ $archives[] = "";
+ break;
+ case "r9k":
+ $archives[] = "";
+ break;
+ case "pol":
+ $archives[] = "";
+ break;
+ case "bant":
+ $archives[] = "";
+ break;
+ case "soc":
+ $archives[] = "";
+ break;
+ case "s4s":
+ $archives[] = "";
+ break;
+ case "s":
+ $archives[] = "";
+ break;
+ case "hc":
+ $archives[] = "";
+ break;
+ case "hm":
+ $archives[] = "";
+ break;
+ case "h":
+ $archives[] = "";
+ break;
+ case "e":
+ break;
+ case "u":
+ $archives[] = "";
+ break;
+ case "d":
+ $archives[] = "";
+ break;
+ case "y":
+ $archives[] = "";
+ break;
+ case "t":
+ $archives[] = "";
+ break;
+ case "hr":
+ $archives[] = "";
+ break;
+ case "gif":
+ break;
+ case "aco":
+ $archives[] = "";
+ break;
+ case "r":
+ $archives[] = "";
+ break;
+ default:
+ $isboard = false;
+ break;
+ }
+ if($isboard === true){
+ $archives[] = "";
+ }
+ $trail = "";
+ if(
+ isset($path[2]) &&
+ isset($path[3]) &&
+ $path[2] == "thread"
+ ){
+ $trail .= "/" . $path[1] . "/thread/" . $path[3];
+ }elseif($isboard){
+ $trail = "/" . $path[1] . "/";
+ }
+ for($i=0; $i<count($archives); $i++){
+ $payload .=
+ '<a href="https://' . $archives[$i] . $trail . '" class="list" target="_BLANK">' .
+ '<img src="/favicon?s=https://' . $archives[$i] . '" alt="' . $archives[$i][0] . $archives[$i][1] . '">' .
+ $archives[$i] .
+ '</a>';
+ }
+ }
+ }
+ $payload .=
+ '<a href="' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=" alt="go">Google cache</a>' .
+ '<a href="' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=" alt="ar"></a>' .
+ '<a href="' . htmlspecialchars($link) . '" class="list" target="_BLANK"><img src="/favicon?s=" alt="ar"></a>' .
+ '<a href="' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=" alt="bi">Bing cache</a>' .
+ '<a href="' . $urlencode . '" class="list" target="_BLANK"><img src="/favicon?s=" alt="me">Megalodon</a>' .
+ '</div>';
+ /*
+ Draw link
+ */
+ $parts = explode("/", $link);
+ $clickurl = "";
+ // remove trailing /
+ $c = count($parts) - 1;
+ if($parts[$c] == ""){
+ $parts[$c - 1] = $parts[$c - 1] . "/";
+ unset($parts[$c]);
+ }
+ // merge https://site together
+ $parts = [
+ $parts[0] . $parts[1] . '//' . $parts[2],
+ ...array_slice($parts, 3, count($parts) - 1)
+ ];
+ $c = count($parts);
+ for($i=0; $i<$c; $i++){
+ if($i !== 0){ $clickurl .= "/"; }
+ $clickurl .= $parts[$i];
+ if($i === $c - 1){
+ $parts[$i] = rtrim($parts[$i], "/");
+ }
+ $payload .=
+ '<a class="part" href="' . htmlspecialchars($clickurl) . '" rel="noreferrer nofollow" tabindex="-1">' .
+ htmlspecialchars(urldecode($parts[$i])) .
+ '</a>';
+ if($i !== $c - 1){
+ $payload .= '<span class="separator"></span>';
+ }
+ }
+ return $payload . '</div>';
+ }
+ public function getscraperfilters($page){
+ $get_scraper = null;
+ switch($page){
+ case "web":
+ $get_scraper = isset($_COOKIE["scraper_web"]) ? $_COOKIE["scraper_web"] : null;
+ break;
+ case "images":
+ $get_scraper = isset($_COOKIE["scraper_images"]) ? $_COOKIE["scraper_images"] : null;
+ break;
+ case "videos":
+ $get_scraper = isset($_COOKIE["scraper_videos"]) ? $_COOKIE["scraper_videos"] : null;
+ break;
+ case "news":
+ $get_scraper = isset($_COOKIE["scraper_news"]) ? $_COOKIE["scraper_news"] : null;
+ break;
+ }
+ if(
+ isset($_GET["scraper"]) &&
+ is_string($_GET["scraper"])
+ ){
+ $get_scraper = $_GET["scraper"];
+ }else{
+ if(
+ isset($_GET["npt"]) &&
+ is_string($_GET["npt"])
+ ){
+ $get_scraper = explode(".", $_GET["npt"], 2)[0];
+ $get_scraper =
+ preg_replace(
+ '/[0-9]+$/',
+ "",
+ $get_scraper
+ );
+ }
+ }
+ // add search field
+ $filters =
+ [
+ "s" => [
+ "option" => "_SEARCH"
+ ]
+ ];
+ // define default scrapers
+ switch($page){
+ case "web":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "ddg" => "DuckDuckGo",
+ "brave" => "Brave",
+ "google" => "Google",
+ "mojeek" => "Mojeek",
+ "marginalia" => "Marginalia",
+ "wiby" => "wiby"
+ ]
+ ];
+ break;
+ case "images":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "ddg" => "DuckDuckGo",
+ "yandex" => "Yandex",
+ "google" => "Google"
+ ]
+ ];
+ break;
+ case "videos":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "yt" => "YouTube",
+ "ddg" => "DuckDuckGo",
+ "google" => "Google"
+ ]
+ ];
+ break;
+ case "news":
+ $filters["scraper"] = [
+ "display" => "Scraper",
+ "option" => [
+ "ddg" => "DuckDuckGo",
+ "brave" => "Brave",
+ "google" => "Google",
+ "mojeek" => "Mojeek"
+ ]
+ ];
+ break;
+ }
+ // get scraper name from user input, or default out to preferred scraper
+ $scraper_out = null;
+ $first = true;
+ foreach($filters["scraper"]["option"] as $scraper_name => $scraper_pretty){
+ if($first === true){
+ $first = $scraper_name;
+ }
+ if($scraper_name == $get_scraper){
+ $scraper_out = $scraper_name;
+ }
+ }
+ if($scraper_out === null){
+ $scraper_out = $first;
+ }
+ switch($scraper_out){
+ case "ddg":
+ include "scraper/ddg.php";
+ $lib = new ddg();
+ break;
+ case "brave":
+ include "scraper/brave.php";
+ $lib = new brave();
+ break;
+ case "yt";
+ include "scraper/youtube.php";
+ $lib = new youtube();
+ break;
+ case "yandex":
+ include "scraper/yandex.php";
+ $lib = new yandex();
+ break;
+ case "google":
+ include "scraper/google.php";
+ $lib = new google();
+ break;
+ case "mojeek":
+ include "scraper/mojeek.php";
+ $lib = new mojeek();
+ break;
+ case "marginalia":
+ include "scraper/marginalia.php";
+ $lib = new marginalia();
+ break;
+ case "wiby":
+ include "scraper/wiby.php";
+ $lib = new wiby();
+ break;
+ }
+ // set scraper on $_GET
+ $_GET["scraper"] = $scraper_out;
+ // set nsfw on $_GET
+ if(
+ isset($_COOKIE["nsfw"]) &&
+ !isset($_GET["nsfw"])
+ ){
+ $_GET["nsfw"] = $_COOKIE["nsfw"];
+ }
+ return
+ [
+ $lib,
+ array_merge_recursive(
+ $filters,
+ $lib->getfilters($page)
+ )
+ ];
+ }
+ public function parsegetfilters($parameters, $whitelist){
+ $sanitized = [];
+ // add npt token
+ if(
+ isset($parameters["npt"]) &&
+ is_string($parameters["npt"])
+ ){
+ $sanitized["npt"] = $parameters["npt"];
+ }else{
+ $sanitized["npt"] = false;
+ }
+ // we're iterating over $whitelist, so
+ // you can't polluate $sanitized with useless
+ // parameters
+ foreach($whitelist as $parameter => $value){
+ if(isset($parameters[$parameter])){
+ if(!is_string($parameters[$parameter])){
+ $sanitized[$parameter] = null;
+ continue;
+ }
+ // parameter is already set, use that value
+ $sanitized[$parameter] = $parameters[$parameter];
+ }else{
+ // parameter is not set, add it
+ if(is_string($value["option"])){
+ // special field: set default value manually
+ switch($value["option"]){
+ case "_DATE":
+ // no date set
+ $sanitized[$parameter] = false;
+ break;
+ case "_SEARCH":
+ // no search set
+ $sanitized[$parameter] = "";
+ break;
+ }
+ }else{
+ // set a default value
+ $sanitized[$parameter] = array_keys($value["option"])[0];
+ }
+ }
+ // sanitize input
+ if(is_array($value["option"])){
+ if(
+ !in_array(
+ $sanitized[$parameter],
+ $keys = array_keys($value["option"])
+ )
+ ){
+ $sanitized[$parameter] = $keys[0];
+ }
+ }else{
+ // sanitize search & string
+ switch($value["option"]){
+ case "_DATE":
+ if($sanitized[$parameter] !== false){
+ $sanitized[$parameter] = strtotime($sanitized[$parameter]);
+ if($sanitized[$parameter] <= 0){
+ $sanitized[$parameter] = false;
+ }
+ }
+ break;
+ case "_SEARCH":
+ // get search string & bang
+ $sanitized[$parameter] = trim($sanitized[$parameter]);
+ $sanitized["bang"] = "";
+ if(
+ strlen($sanitized[$parameter]) !== 0 &&
+ $sanitized[$parameter][0] == "!"
+ ){
+ $sanitized[$parameter] = explode(" ", $sanitized[$parameter], 2);
+ $sanitized["bang"] = trim($sanitized[$parameter][0]);
+ if(count($sanitized[$parameter]) === 2){
+ $sanitized[$parameter] = trim($sanitized[$parameter][1]);
+ }else{
+ $sanitized[$parameter] = "";
+ }
+ $sanitized["bang"] = ltrim($sanitized["bang"], "!");
+ }
+ $sanitized[$parameter] = ltrim($sanitized[$parameter], "! \n\r\t\v\x00");
+ }
+ }
+ }
+ // invert dates if needed
+ if(
+ isset($sanitized["older"]) &&
+ isset($sanitized["newer"]) &&
+ $sanitized["newer"] !== false &&
+ $sanitized["older"] !== false &&
+ $sanitized["newer"] > $sanitized["older"]
+ ){
+ // invert
+ [
+ $sanitized["older"],
+ $sanitized["newer"]
+ ] = [
+ $sanitized["newer"],
+ $sanitized["older"]
+ ];
+ }
+ return $sanitized;
+ }
+ public function s_to_timestamp($seconds){
+ if(is_string($seconds)){
+ return "LIVE";
+ }
+ return ($seconds >= 60) ? ltrim(gmdate("H:i:s", $seconds), ":0") : gmdate("0:s", $seconds);
+ }
+ public function generatehtmltabs($page, $query){
+ $html = null;
+ foreach(["web", "images", "videos", "news"] as $type){
+ $html .= '<a href="/' . $type . '?s=' . urlencode($query);
+ if(!empty($params)){
+ $html .= $params;
+ }
+ $html .= '" class="tab';
+ if($type == $page){
+ $html .= ' selected';
+ }
+ $html .= '">' . ucfirst($type) . '</a>';
+ }
+ return $html;
+ }
+ public function generatehtmlfilters($filters, $params){
+ $html = null;
+ foreach($filters as $filter_name => $filter_values){
+ if(!isset($filter_values["display"])){
+ continue;
+ }
+ $output = true;
+ $tmp =
+ '<div class="filter">' .
+ '<div class="title">' . htmlspecialchars($filter_values["display"]) . '</div>';
+ if(is_array($filter_values["option"])){
+ $tmp .= '<select name="' . $filter_name . '">';
+ foreach($filter_values["option"] as $option_name => $option_title){
+ $tmp .= '<option value="' . $option_name . '"';
+ if($params[$filter_name] == $option_name){
+ $tmp .= ' selected';
+ }
+ $tmp .= '>' . htmlspecialchars($option_title) . '</option>';
+ }
+ $tmp .= '</select>';
+ }else{
+ switch($filter_values["option"]){
+ case "_DATE":
+ $tmp .= '<input type="date" name="' . $filter_name . '"';
+ if($params[$filter_name] !== false){
+ $tmp .= ' value="' . date("Y-m-d", $params[$filter_name]) . '"';
+ }
+ $tmp .= '>';
+ break;
+ default:
+ $output = false;
+ break;
+ }
+ }
+ $tmp .= '</div>';
+ if($output === true){
+ $html .= $tmp;
+ }
+ }
+ return $html;
+ }
+ public function buildquery($gets, $ommit = false){
+ $out = [];
+ foreach($gets as $key => $value){
+ if(
+ $value == null ||
+ $value == false ||
+ $key == "npt" ||
+ $key == "extendedsearch" ||
+ $value == "any" ||
+ $value == "all" ||
+ (
+ $ommit === true &&
+ $key == "s"
+ )
+ ){
+ continue;
+ }
+ $out[$key] = $value;
+ }
+ return http_build_query($out);
+ }
+ public function htmlnextpage($gets, $npt, $page){
+ $query = $this->buildquery($gets);
+ return $page . "?" . $query . "&npt=" . $npt;
+ }
diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php
new file mode 100644
index 0000000..8802511
--- /dev/null
+++ b/lib/fuckhtml.php
@@ -0,0 +1,361 @@
+class fuckhtml{
+ public function __construct($html = null, $isfile = false){
+ if($html !== null){
+ $this->load($html, $isfile);
+ }
+ }
+ public function load($html, $isfile = false){
+ if(is_array($html)){
+ if(!isset($html["innerHTML"])){
+ throw new Exception("(load) Supplied array doesn't contain a innerHTML index");
+ }
+ $html = $html["innerHTML"];
+ }
+ if($isfile){
+ $handle = fopen($html, "r");
+ $fetch = fread($handle, filesize($html));
+ fclose($handle);
+ $this->html = $fetch;
+ }else{
+ $this->html = $html;
+ }
+ $this->strlen = strlen($this->html);
+ }
+ public function getElementsByTagName(string $tagname){
+ $out = [];
+ /*
+ Scrape start of the tag. Example
+ <div class="mydiv"> ...
+ */
+ if($tagname == "*"){
+ $tagname = '[^\/<>\s]+';
+ }else{
+ $tagname = preg_quote(strtolower($tagname));
+ }
+ preg_match_all(
+ '/<\s*(' . $tagname . ')(\s(?:[^>\'"]*|"[^"]*"|\'[^\']*\')+)?\s*>/i',
+ /* '/<\s*(' . $tagname . ')(\s[\S\s]*?)?>/i', */
+ $this->html,
+ $starting_tags,
+ );
+ for($i=0; $i<count($starting_tags[0]); $i++){
+ /*
+ Parse attributes
+ */
+ $attributes = [];
+ preg_match_all(
+ '/([^\/\s\\=]+)(?:\s*=\s*("[^"]*"|\'[^\']*\'|[^\s]*))?/',
+ $starting_tags[2][$i][0],
+ $regex_attributes
+ );
+ for($k=0; $k<count($regex_attributes[0]); $k++){
+ if(trim($regex_attributes[2][$k]) == ""){
+ $attributes[$regex_attributes[1][$k]] =
+ "true";
+ continue;
+ }
+ $attributes[$regex_attributes[1][$k]] =
+ trim($regex_attributes[2][$k], "'\" \n\r\t\v\x00");
+ }
+ $out[] = [
+ "tagName" => strtolower($starting_tags[1][$i][0]),
+ "startPos" => $starting_tags[0][$i][1],
+ "endPos" => 0,
+ "startTag" => $starting_tags[0][$i][0],
+ "attributes" => $attributes,
+ "innerHTML" => null
+ ];
+ }
+ /*
+ Get innerHTML
+ */
+ // get closing tag positions
+ preg_match_all(
+ '/<\s*\/\s*(' . $tagname . ')\s*>/i',
+ $this->html,
+ $regex_closing_tags,
+ );
+ // merge opening and closing tags together
+ for($i=0; $i<count($regex_closing_tags[1]); $i++){
+ $out[] = [
+ "tagName" => strtolower($regex_closing_tags[1][$i][0]),
+ "endTag" => $regex_closing_tags[0][$i][0],
+ "startPos" => $regex_closing_tags[0][$i][1]
+ ];
+ }
+ usort(
+ $out,
+ function($a, $b){
+ return $a["startPos"] > $b["startPos"];
+ }
+ );
+ // computer the indent level for each element
+ $level = [];
+ $count = count($out);
+ for($i=0; $i<$count; $i++){
+ if(!isset($level[$out[$i]["tagName"]])){
+ $level[$out[$i]["tagName"]] = 0;
+ }
+ if(isset($out[$i]["startTag"])){
+ // encountered starting tag
+ $level[$out[$i]["tagName"]]++;
+ $out[$i]["level"] = $level[$out[$i]["tagName"]];
+ }else{
+ // encountered closing tag
+ $out[$i]["level"] = $level[$out[$i]["tagName"]];
+ $level[$out[$i]["tagName"]]--;
+ }
+ }
+ // if the indent level is the same for a div,
+ // we encountered _THE_ closing tag
+ for($i=0; $i<$count; $i++){
+ if(!isset($out[$i]["startTag"])){
+ continue;
+ }
+ for($k=$i; $k<$count; $k++){
+ if(
+ isset($out[$k]["endTag"]) &&
+ $out[$i]["tagName"] == $out[$k]["tagName"] &&
+ $out[$i]["level"]
+ === $out[$k]["level"]
+ ){
+ $startlen = strlen($out[$i]["startTag"]);
+ $endlen = strlen($out[$k]["endTag"]);
+ $out[$i]["endPos"] = $out[$k]["startPos"] + $endlen;
+ $out[$i]["innerHTML"] =
+ substr(
+ $this->html,
+ $out[$i]["startPos"] + $startlen,
+ $out[$k]["startPos"] - ($out[$i]["startPos"] + $startlen)
+ );
+ $out[$i]["outerHTML"] =
+ substr(
+ $this->html,
+ $out[$i]["startPos"],
+ $out[$k]["startPos"] - $out[$i]["startPos"] + $endlen
+ );
+ break;
+ }
+ }
+ }
+ // filter out ending divs
+ for($i=0; $i<$count; $i++){
+ if(isset($out[$i]["endTag"])){
+ unset($out[$i]);
+ }
+ unset($out[$i]["startTag"]);
+ }
+ return array_values($out);
+ }
+ public function getElementsByAttributeName(string $name, $collection = null){
+ if($collection === null){
+ $collection = $this->getElementsByTagName("*");
+ }elseif(is_string($collection)){
+ $collection = $this->getElementsByTagName($collection);
+ }
+ $return = [];
+ foreach($collection as $elem){
+ foreach($elem["attributes"] as $attrib_name => $attrib_value){
+ if($attrib_name == $name){
+ $return[] = $elem;
+ continue 2;
+ }
+ }
+ }
+ return $return;
+ }
+ public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){
+ $elems = $this->getElementsByAttributeName($name, $collection);
+ $value = explode(" ", $value);
+ $return = [];
+ foreach($elems as $elem){
+ foreach($elem["attributes"] as $attrib_name => $attrib_value){
+ $attrib_value = explode(" ", $attrib_value);
+ $ac = count($attrib_value);
+ $nc = count($value);
+ $cr = 0;
+ for($i=0; $i<$nc; $i++){
+ for($k=0; $k<$ac; $k++){
+ if($value[$i] == $attrib_value[$k]){
+ $cr++;
+ }
+ }
+ }
+ if($cr === $nc){
+ $return[] = $elem;
+ continue 2;
+ }
+ }
+ }
+ return $return;
+ }
+ public function getElementsByAttributeValue(string $name, string $value, $collection = null){
+ $elems = $this->getElementsByAttributeName($name, $collection);
+ $return = [];
+ foreach($elems as $elem){
+ foreach($elem["attributes"] as $attrib_name => $attrib_value){
+ if($attrib_value == $value){
+ $return[] = $elem;
+ continue 2;
+ }
+ }
+ }
+ return $return;
+ }
+ public function getElementById(string $idname, $collection = null){
+ $id = $this->getElementsByAttributeValue("id", $idname, $collection);
+ if(count($id) !== 0){
+ return $id[0];
+ }
+ return false;
+ }
+ public function getElementsByClassName(string $classname, $collection = null){
+ return $this->getElementsByFuzzyAttributeValue("class", $classname, $collection);
+ }
+ public function getTextContent($html, $whitespace = false, $trim = true){
+ if(is_array($html)){
+ if(!isset($html["innerHTML"])){
+ throw new Exception("(getTextContent) Supplied array doesn't contain a innerHTML index");
+ }
+ $html = $html["innerHTML"];
+ }
+ $html =
+ preg_split('/\n|<\/?br>/i', $html);
+ $out = "";
+ for($i=0; $i<count($html); $i++){
+ $tmp =
+ html_entity_decode(
+ strip_tags(
+ $html[$i]
+ ),
+ );
+ if($trim){
+ $tmp = trim($tmp);
+ }
+ $out .= $tmp;
+ if($whitespace === true){
+ $out .= "\n";
+ }else{
+ $out .= " ";
+ }
+ }
+ if($trim){
+ return trim($out);
+ }
+ return $out;
+ }
diff --git a/lib/img404.png b/lib/img404.png
new file mode 100644
index 0000000..4549dee
--- /dev/null
+++ b/lib/img404.png
Binary files differ
diff --git a/lib/nextpage.php b/lib/nextpage.php
new file mode 100644
index 0000000..a883e49
--- /dev/null
+++ b/lib/nextpage.php
@@ -0,0 +1,106 @@
+class nextpage{
+ public function __construct($scraper){
+ $this->scraper = $scraper;
+ }
+ public function store($payload, $page){
+ $page = $page[0];
+ $password = random_bytes(256); // 2048 bit
+ $salt = random_bytes(16);
+ $key = hash_pbkdf2("sha512", $password, $salt, 20000, 32, true);
+ $iv =
+ random_bytes(
+ openssl_cipher_iv_length("aes-256-gcm")
+ );
+ $tag = "";
+ $out = openssl_encrypt($payload, "aes-256-gcm", $key, OPENSSL_RAW_DATA, $iv, $tag, "", 16);
+ $key = apcu_inc("key", 1);
+ apcu_store(
+ $page . "." .
+ $this->scraper .
+ (string)($key),
+ gzdeflate($salt.$iv.$out.$tag),
+ 420 // cache information for 7 minutes blaze it
+ );
+ return
+ $this->scraper . $key . "." .
+ rtrim(strtr(base64_encode($password), '+/', '-_'), '=');
+ }
+ public function get($npt, $page){
+ $page = $page[0];
+ $explode = explode(".", $npt, 2);
+ if(count($explode) !== 2){
+ throw new Exception("Malformed nextPageToken!");
+ }
+ $apcu = $page . "." . $explode[0];
+ $key = $explode[1];
+ $payload = apcu_fetch($apcu);
+ if($payload === false){
+ throw new Exception("The nextPageToken is invalid or has expired!");
+ }
+ $key =
+ base64_decode(
+ str_pad(
+ strtr($key, '-_', '+/'),
+ strlen($key) % 4,
+ '=',
+ )
+ );
+ $payload = gzinflate($payload);
+ $key =
+ hash_pbkdf2(
+ "sha512",
+ $key,
+ substr($payload, 0, 16), // salt
+ 20000,
+ 32,
+ true
+ );
+ $ivlen = openssl_cipher_iv_length("aes-256-gcm");
+ $payload =
+ openssl_decrypt(
+ substr(
+ $payload,
+ 16 + $ivlen,
+ -16
+ ),
+ "aes-256-gcm",
+ $key,
+ substr($payload, 16, $ivlen),
+ substr($payload, -16)
+ );
+ if($payload === false){
+ throw new Exception("The nextPageToken is invalid or has expired!");
+ }
+ // remove the key after using
+ apcu_delete($apcu);
+ return $payload;
+ }
diff --git a/lib/type-todo.php b/lib/type-todo.php
new file mode 100644
index 0000000..f813543
--- /dev/null
+++ b/lib/type-todo.php
@@ -0,0 +1,132 @@
+ public function type($get){
+ $search = $get["s"];
+ $bang = $get["bang"];
+ if(empty($search)){
+ if(!empty($bang)){
+ // !youtube
+ $conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
+ pg_prepare($conn, "bang_get", "SELECT bang,name FROM bangs WHERE bang LIKE $1 ORDER BY bang ASC LIMIT 8");
+ $q = pg_execute($conn, "bang_get", ["$bang%"]);
+ $results = [];
+ while($row = pg_fetch_array($q, null, PGSQL_ASSOC)){
+ $results[] = [
+ "s" => "!" . $row["bang"],
+ "n" => $row["name"]
+ ];
+ }
+ return $results;
+ }else{
+ // everything is empty
+ // lets just return a bang list
+ return [
+ [
+ "s" => "!w",
+ "n" => "Wikipedia",
+ "u" => "{%q%}"
+ ],
+ [
+ "s" => "!4ch",
+ "n" => "4chan Board",
+ "u" => "{%q%}"
+ ],
+ [
+ "s" => "!a",
+ "n" => "Amazon",
+ "u" => "{%q%}"
+ ],
+ [
+ "s" => "!e",
+ "n" => "eBay",
+ "u" => "{%q%}"
+ ],
+ [
+ "s" => "!so",
+ "n" => "Stack Overflow",
+ "u" => "{%q%}"
+ ],
+ [
+ "s" => "!gh",
+ "n" => "GitHub",
+ "u" => "{%q%}"
+ ],
+ [
+ "s" => "!tw",
+ "n" => "Twitter",
+ "u" => "{%q%}"
+ ],
+ [
+ "s" => "!r",
+ "n" => "Reddit",
+ "u" => "{%q%}"
+ ],
+ ];
+ }
+ }
+ // now we know search isnt empty
+ if(!empty($bang)){
+ // check if the bang exists
+ $conn = pg_connect("host=localhost dbname=4get user=postgres password=postgres");
+ pg_prepare($conn, "bang_get_single", "SELECT bang,name FROM bangs WHERE bang = $1 LIMIT 1");
+ $q = pg_execute($conn, "bang_get_single", [$bang]);
+ $row = pg_fetch_array($q, null, PGSQL_ASSOC);
+ if(isset($row["bang"])){
+ $bang = "!$bang ";
+ }else{
+ $bang = "";
+ }
+ }
+ try{
+ $res = $this->get(
+ "",
+ [
+ "q" => strtolower($search)
+ ],
+ ddg::req_xhr
+ );
+ $res = json_decode($res, true);
+ }catch(Exception $e){
+ throw new Exception("Failed to get /ac/");
+ }
+ $arr = [];
+ for($i=0; $i<count($res); $i++){
+ if($i === 8){break;}
+ if(empty($bang)){
+ $arr[] = [
+ "s" => $res[$i]["phrase"]
+ ];
+ }else{
+ $arr[] = [
+ "s" => $bang . $res[$i]["phrase"],
+ "n" => $row["name"]
+ ];
+ }
+ }
+ return $arr;
+ }