diff options
-rw-r--r-- | about.php | 2 | ||||
-rw-r--r-- | api.txt | 4 | ||||
-rw-r--r-- | lib/frontend.php | 10 | ||||
-rw-r--r-- | lib/nextpage.php | 2 | ||||
-rw-r--r-- | proxy.php | 1 | ||||
-rw-r--r-- | scraper/google.php | 346 | ||||
-rw-r--r-- | settings.php | 38 | ||||
-rw-r--r-- | template/header.html | 2 | ||||
-rw-r--r-- | template/home.html | 4 | ||||
-rw-r--r-- | template/images.html | 2 | ||||
-rw-r--r-- | template/search.html | 2 |
11 files changed, 209 insertions, 204 deletions
@@ -60,7 +60,7 @@ $left = Provide users with a privacy oriented, extremely lightweight, ad free, free as in freedom (and free beer!) way to search for documents around the internet, with minimal, optional javascript code. My long term goal would be to build my own index (that doesn\'t suck) and provide users with an unbiased search engine, with no political inclinations. <a href="#logs"><h2 id="logs">Do you keep logs?</h2></a> - I store data temporarly to get the next page of results. This might include search queries, tokens and other parameters. These parameters are encrypted using <div class="code-inline">aes-256-gcm</div> on the serber, for which I give you a key (also known internally as <div class="code-inline">npt</div> token). When you make a request to get the next page, you supply the token, the data is decrypted and the request is fulfilled. This encrypted data is deleted after 7 minutes, or after it\'s used, whichever comes first.<br><br> + I store data temporarly to get the next page of results. This might include search queries, tokens and other parameters. These parameters are encrypted using <div class="code-inline">aes-256-gcm</div> on the serber, for which I give you a key (also known internally as <div class="code-inline">npt</div> token). When you make a request to get the next page, you supply the token, the data is decrypted and the request is fulfilled. This encrypted data is deleted after 15 minutes, or after it\'s used, whichever comes first.<br><br> I <b>don\'t</b> log IP addresses, user agents, or anything else. The <div class="code-inline">npt</div> tokens are the only thing that are stored (in RAM, mind you), temporarly, encrypted. @@ -73,8 +73,8 @@ impossible for a 4get operator to peek at the private data of the user after a request has been made. - The tokens will expire as soon as they are used or after a 7 minutes - inactivity period, whichever comes first. + The tokens will expire as soon as they are used or after a 15 + minutes inactivity period, whichever comes first. + Beware of null values! diff --git a/lib/frontend.php b/lib/frontend.php index e03eb1d..16e5693 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -878,7 +878,7 @@ class frontend{ "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", - "google" => "Google", + //"google" => "Google", "mojeek" => "Mojeek", "marginalia" => "Marginalia", "wiby" => "wiby" @@ -892,7 +892,7 @@ class frontend{ "option" => [ "ddg" => "DuckDuckGo", "yandex" => "Yandex", - "google" => "Google" + //"google" => "Google" ] ]; break; @@ -903,7 +903,7 @@ class frontend{ "option" => [ "yt" => "YouTube", "ddg" => "DuckDuckGo", - "google" => "Google" + //"google" => "Google" ] ]; break; @@ -914,7 +914,7 @@ class frontend{ "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", - "google" => "Google", + //"google" => "Google", "mojeek" => "Mojeek" ] ]; @@ -1285,7 +1285,7 @@ class frontend{ return htmlspecialchars($image); } - return "/proxy.php?i=" . urlencode($image) . "&s=" . $format; + return "/proxy?i=" . urlencode($image) . "&s=" . $format; } public function htmlnextpage($gets, $npt, $page){ diff --git a/lib/nextpage.php b/lib/nextpage.php index a883e49..3fab855 100644 --- a/lib/nextpage.php +++ b/lib/nextpage.php @@ -28,7 +28,7 @@ class nextpage{ $this->scraper . (string)($key), gzdeflate($salt.$iv.$out.$tag), - 420 // cache information for 7 minutes blaze it + 900 // cache information for 15 minutes blaze it ); return @@ -109,6 +109,7 @@ try{ $image->stripImage(); $image->setFormat("jpeg"); + $image->setImageCompressionQuality(90); $image->setImageCompression(Imagick::COMPRESSION_JPEG2000); $proxy->getfilenameheader($payload["headers"], $_GET["i"]); diff --git a/scraper/google.php b/scraper/google.php index df10754..28ede6d 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -1565,18 +1565,17 @@ class google{ } /* - Fallback to parsing it as an embed + Detect if its a wikipedia thing */ + $h3 = + $this->fuckhtml + ->getElementsByTagName("h3"); + - $table = [ - "title" => null, - "description" => [], - "url" => null, - "thumb" => null, - "table" => [], - "sublink" => [] - ]; + /* + Fallback to parsing the word definitions + */ $parts = $this->fuckhtml ->getElementsByClassName( @@ -1596,12 +1595,17 @@ class google{ $head = $parts[0]; - $h3 = - $this->fuckhtml - ->getElementsByTagName("h3"); - if(count($h3) !== 0){ + $table = [ + "title" => null, + "description" => [], + "url" => null, + "thumb" => null, + "table" => [], + "sublink" => [] + ]; + $h3 = $h3[0]; $table["title"] = @@ -1626,201 +1630,201 @@ class google{ $head ) ]; - } - - $audio = - $this->fuckhtml - ->getElementsByTagName("audio"); - - if(count($audio) !== 0){ - $table["description"][] = [ - "type" => "audio", - "url" => - str_replace( - "http://", - "https://", - $this->fuckhtml - ->getTextContent( - $audio[0]["attributes"]["src"] - ) - ) - ]; - } - - if(count($parts) >= 2){ - - $this->fuckhtml->load($parts[1]); - - $parts = + $audio = $this->fuckhtml - ->getElementsByClassName( - $this->findstyles( - [ - "padding-bottom" => "12px" - ], - self::is_class - ), - "div" - ); + ->getElementsByTagName("audio"); - foreach($parts as $part){ + if(count($audio) !== 0){ - $this->fuckhtml->load($part); + $table["description"][] = [ + "type" => "audio", + "url" => + str_replace( + "http://", + "https://", + $this->fuckhtml + ->getTextContent( + $audio[0]["attributes"]["src"] + ) + ) + ]; + } + + if(count($parts) >= 2){ + + $this->fuckhtml->load($parts[1]); - $lists = + $parts = $this->fuckhtml - ->getElementsByTagName("ol"); + ->getElementsByClassName( + $this->findstyles( + [ + "padding-bottom" => "12px" + ], + self::is_class + ), + "div" + ); - if(count($lists) !== 0){ + foreach($parts as $part){ - foreach($lists as $list){ - - $this->fuckhtml->load($list); - - $list_items = - $this->fuckhtml - ->getElementsByTagName("li"); - - $index = 0; + $this->fuckhtml->load($part); + + $lists = + $this->fuckhtml + ->getElementsByTagName("ol"); + + if(count($lists) !== 0){ - if(count($list_items) !== 0){ + foreach($lists as $list){ - foreach($list_items as $list_item){ - - $index++; - - $this->fuckhtml->load($list_item); - - $list_subitems = - $this->fuckhtml - ->getElementsByTagName("div"); + $this->fuckhtml->load($list); + + $list_items = + $this->fuckhtml + ->getElementsByTagName("li"); + + $index = 0; + + if(count($list_items) !== 0){ - foreach($list_subitems as $subitem){ + foreach($list_items as $list_item){ - if($subitem["level"] !== 1){ continue; } + $index++; - $this->fuckhtml->load($subitem); + $this->fuckhtml->load($list_item); - $spans = + $list_subitems = $this->fuckhtml - ->getElementsByTagName("span"); + ->getElementsByTagName("div"); - if(count($spans) !== 0){ + foreach($list_subitems as $subitem){ - $type = "quote"; - }else{ + if($subitem["level"] !== 1){ continue; } - $type = "text"; - } - - $value = - $this->fuckhtml - ->getTextContent( - $subitem - ); - - if($type == "text"){ + $this->fuckhtml->load($subitem); + + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($spans) !== 0){ + + $type = "quote"; + }else{ + + $type = "text"; + } + + $value = + $this->fuckhtml + ->getTextContent( + $subitem + ); - $value = $index . ". " . $value; + if($type == "text"){ + + $value = $index . ". " . $value; + } + + $table["description"][] = [ + "type" => $type, + "value" => $value + ]; } - - $table["description"][] = [ - "type" => $type, - "value" => $value - ]; } } } - } - - continue; - } - - // get title - $spans = - $this->fuckhtml - ->getElementsByTagName("span"); - - if(count($spans) !== 0){ - - foreach($spans as $span){ - $part["innerHTML"] = - str_replace( - $span["outerHTML"], - "", - $part["innerHTML"] - ); + continue; } - if( + // get title + $spans = $this->fuckhtml - ->getTextContent( - $part - ) - == "" - ){ - - $table["description"][] = [ - "type" => "title", - "value" => - $this->fuckhtml - ->getTextContent( - $spans[0] - ) - ]; - - continue; - } - } - - // fallback to getting non-numbered list - $nlist = - $this->fuckhtml - ->getElementsByClassName( - $this->findstyles( - [ - "white-space" => "pre-line", - "word-wrap" => "break-word" - ], - self::is_class - ), - "div" - ); - - if(count($nlist) !== 0){ + ->getElementsByTagName("span"); - foreach($nlist as $nlist_item){ + if(count($spans) !== 0){ - $text = - $this->fuckhtml - ->getTextContent($nlist_item); - - if($text == ""){ + foreach($spans as $span){ - continue; + $part["innerHTML"] = + str_replace( + $span["outerHTML"], + "", + $part["innerHTML"] + ); } - $this->fuckhtml->load($nlist_item); - - $spans = + if( $this->fuckhtml - ->getElementsByTagName("span"); - - if(count($spans) !== 0){ + ->getTextContent( + $part + ) + == "" + ){ - // is a quote node - $type = "quote"; - }else{ + $table["description"][] = [ + "type" => "title", + "value" => + $this->fuckhtml + ->getTextContent( + $spans[0] + ) + ]; - $type = "text"; + continue; } + } + + // fallback to getting non-numbered list + $nlist = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "white-space" => "pre-line", + "word-wrap" => "break-word" + ], + self::is_class + ), + "div" + ); + + if(count($nlist) !== 0){ - $table["description"][] = [ - "type" => $type, - "value" => $text - ]; + foreach($nlist as $nlist_item){ + + $text = + $this->fuckhtml + ->getTextContent($nlist_item); + + if($text == ""){ + + continue; + } + + $this->fuckhtml->load($nlist_item); + + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($spans) !== 0){ + + // is a quote node + $type = "quote"; + }else{ + + $type = "text"; + } + + $table["description"][] = [ + "type" => $type, + "value" => $text + ]; + } } } } diff --git a/settings.php b/settings.php index 29f051d..96c31c8 100644 --- a/settings.php +++ b/settings.php @@ -70,10 +70,10 @@ $settings = [ "value" => "brave", "text" => "Brave" ], - [ - "value" => "google", - "text" => "Google" - ], + //[ + // "value" => "google", + // "text" => "Google" + //], [ "value" => "mojeek", "text" => "Mojeek" @@ -99,11 +99,11 @@ $settings = [ [ "value" => "yandex", "text" => "Yandex" - ], - [ - "value" => "google", - "text" => "Google" - ] + ]//, + //[ + // "value" => "google", + // "text" => "Google" + //] ] ], [ @@ -117,11 +117,11 @@ $settings = [ [ "value" => "ddg", "text" => "DuckDuckGo" - ], - [ - "value" => "google", - "text" => "Google" - ] + ]//, + //[ + // "value" => "google", + // "text" => "Google" + //] ] ], [ @@ -136,10 +136,10 @@ $settings = [ "value" => "brave", "text" => "Brave" ], - [ - "value" => "google", - "text" => "Google" - ], + //[ + // "value" => "google", + // "text" => "Google" + //], [ "value" => "mojeek", "text" => "Mojeek" @@ -219,7 +219,7 @@ echo '<head>' . '<meta http-equiv="Content-Type" content="text/html;charset=utf-8">' . '<title>Settings</title>' . - '<link rel="stylesheet" href="/static/style.css">' . + '<link rel="stylesheet" href="/static/style.css?v2">' . '<meta name="viewport" content="width=device-width,initial-scale=1">' . '<meta name="robots" content="index,follow">' . '<link rel="icon" type="image/x-icon" href="/favicon.ico">' . diff --git a/template/header.html b/template/header.html index bd6fc8a..2633521 100644 --- a/template/header.html +++ b/template/header.html @@ -3,7 +3,7 @@ <head> <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> <title>{%title%}</title> - <link rel="stylesheet" href="/static/style.css"> + <link rel="stylesheet" href="/static/style.css?v2"> <meta name="viewport" content="width=device-width,initial-scale=1"> <meta name="robots" content="{%index%}index,{%index%}follow"> <link rel="icon" type="image/x-icon" href="/favicon.ico"> diff --git a/template/home.html b/template/home.html index 7f00dae..8ca6377 100644 --- a/template/home.html +++ b/template/home.html @@ -4,7 +4,7 @@ <meta http-equiv="Content-Type" content="text/html;charset=utf-8"> <title>4get</title> <meta name="viewport" content="width=device-width,initial-scale=1"> - <link rel="stylesheet" href="/static/style.css"> + <link rel="stylesheet" href="/static/style.css?v2"> <meta name="robots" content="index,follow"> <link rel="icon" type="image/x-icon" href="/favicon.ico"> <meta name="description" content="4get.ca: They live in our walls!"> @@ -31,6 +31,6 @@ Report a problem: <a href="https://lolcat.ca/contact">lolcat.ca/contact</a> </div> </div> - <script src="/static/client.js"></script> + <script src="/static/client.js?v2"></script> </body> </html> diff --git a/template/images.html b/template/images.html index a09c121..61e319c 100644 --- a/template/images.html +++ b/template/images.html @@ -2,6 +2,6 @@ {%images%} </div> {%nextpage%} - <script src="/static/client.js"></script> + <script src="/static/client.js?v2"></script> </body> </html> diff --git a/template/search.html b/template/search.html index bbfbb54..c187102 100644 --- a/template/search.html +++ b/template/search.html @@ -11,6 +11,6 @@ {%left%} </div> </div> - <script src="/static/client.js"></script> + <script src="/static/client.js?v2"></script> </body> </html> |