diff options
-rw-r--r-- | README.md | 40 | ||||
-rw-r--r-- | ami4get.php | 2 | ||||
-rw-r--r-- | api.txt | 4 | ||||
-rw-r--r-- | api/v1/web.php | 10 | ||||
-rw-r--r-- | data/config.php | 14 | ||||
-rw-r--r-- | lib/captcha_gen.php | 7 | ||||
-rw-r--r-- | lib/frontend.php | 12 | ||||
-rw-r--r-- | lib/fuckhtml.php | 36 | ||||
-rw-r--r-- | robots.txt | 4 | ||||
-rw-r--r-- | scraper/brave.php | 6 | ||||
-rw-r--r-- | scraper/ddg.php | 2 | ||||
-rw-r--r-- | scraper/google.php | 3677 | ||||
-rw-r--r-- | scraper/mojeek.php | 2 | ||||
-rw-r--r-- | scraper/sc.php | 2 | ||||
-rw-r--r-- | settings.php | 33 | ||||
-rw-r--r-- | static/icon/call.png | bin | 0 -> 2155 bytes | |||
-rw-r--r-- | static/icon/directions.png | bin | 0 -> 1673 bytes | |||
-rw-r--r-- | static/serverping.js | 21 | ||||
-rw-r--r-- | static/style.css | 6 | ||||
-rw-r--r-- | static/themes/Catppuccin Latte.css (renamed from static/themes/Catpuccin Latte.css) | 0 | ||||
-rw-r--r-- | template/instances.html | 1 | ||||
-rw-r--r-- | web.php | 14 |
22 files changed, 2844 insertions, 1049 deletions
@@ -23,6 +23,7 @@ https://4get.ca - DuckDuckGo - Brave - Yandex + - Google - Mojeek - Marginalia - wiby @@ -41,10 +42,12 @@ https://4get.ca - DuckDuckgo - Brave - Yandex + - Google 4. News - DuckDuckGo - Brave + - Google - Mojeek 5. Music @@ -61,7 +64,7 @@ https://4get.ca - YouTube - SoundCloud -More scrapers are coming soon. I currently want to add Google web/video/news search, HackerNews (durr orange site!!) and Qwant. A shopping and files tab is also in my todo list. +More scrapers are coming soon. I currently want to add HackerNews (durr orange site!!), Qwant, Yep and other garbage. A shopping, files, tab and more music scrapers are also on my todo list. # Installation This section is still to-do. You will need to figure shit out for some of the apache2 and nginx stuff. Everything else should be OK. @@ -190,6 +193,41 @@ services: Replace relevant values and start with `docker-compose up -d` +## Install on Caddy + +1. Install dependencies: + +`sudo apt install caddy php8.2-dom php8.2-imagick imagemagick php8.2-curl curl php8.2-apcu git` + +2. Clone this repository where you want to host this from: + +`cd /var/www && sudo git clone https://git.konakona.moe/diowo/4get` + +3. Set permission on the `icons` directory inside `4get` + +`cd /var/www/4get/ && sudo chmod 777 -R icons/` + +4. Add an entry for 4get on your Caddyfile at `/etc/caddy/Caddyfile` + +```sh +4get.konakona.moe { + root * /var/www/4get + file_server + encode gzip + php_fastcgi unix//var/run/php/php8.2-fpm.sock { + index index.php + } + redir /{path}.php{query} 301 + try_files {path} {path}.php +} +``` + +Caddy deals with SSL certificates automatically so you don't have to mess with anything. Also if needed, a sample of my Caddyfile can be found [here](https://git.konakona.moe/diowo/misc/src/branch/master/etc/caddy/Caddyfile). + +5. Restart Caddy + +`sudo systemctl restart caddy` + # Encryption setup I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour. diff --git a/ami4get.php b/ami4get.php index f2d48bf..5bb9273 100644 --- a/ami4get.php +++ b/ami4get.php @@ -5,8 +5,8 @@ header("Access-Control-Allow-Origin: *"); include "data/config.php"; -$bot_requests = apcu_fetch("captcha"); $real_requests = apcu_fetch("real_requests"); +$bot_requests = apcu_fetch("captcha_gen"); echo json_encode( [ @@ -68,8 +68,8 @@ + Get the next page of results - All API responses come with an array index named "nextpage". To get - the next page of results, you must make another API call with &npt. + All API responses come with an array index named "npt". To get the + next page of results, you must make another API call with &npt. Example :: diff --git a/api/v1/web.php b/api/v1/web.php index dc1a7cc..156e53e 100644 --- a/api/v1/web.php +++ b/api/v1/web.php @@ -39,10 +39,12 @@ if( } try{ - echo json_encode( - $scraper->web($get), - JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES - ); + + echo + json_encode( + $scraper->web($get), + JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES + ); }catch(Exception $e){ diff --git a/data/config.php b/data/config.php index f2ca214..0f14a42 100644 --- a/data/config.php +++ b/data/config.php @@ -5,7 +5,7 @@ class config{ // any parameters. // 4get version. Please keep this updated - const VERSION = 5; + const VERSION = 6; // Will be shown pretty much everywhere. const SERVER_NAME = "4get"; @@ -56,14 +56,22 @@ class config{ const INSTANCES = [ "https://4get.ca", "https://4get.zzls.xyz", + "https://4getus.zzls.xyz", "https://4get.silly.computer", "https://4g.opnxng.com", - "https://4get.konakona.moe" + "https://4get.konakona.moe", + "https://4get.lvkaszus.pl", + "https://4g.ggtyler.dev", + "https://4get.perennialte.ch", + "https://4get.sihj.net", + "https://4get.hbubli.cc", + "https://4get.plunked.party", + "https://4get.seitan-ayoub.lol" ]; // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Changing this might break things. - const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/119.0"; + const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/120.0"; // Proxy pool assignments for each scraper // false = Use server's raw IP diff --git a/lib/captcha_gen.php b/lib/captcha_gen.php index 6728747..abcab7a 100644 --- a/lib/captcha_gen.php +++ b/lib/captcha_gen.php @@ -7,6 +7,7 @@ class captcha{ // check if we want captcha if(config::BOT_PROTECTION !== 1){ + apcu_inc("real_requests"); if($output === true){ $frontend->loadheader( $get, @@ -45,6 +46,8 @@ class captcha{ }else{ // the cookie is OK! dont die() and give results + apcu_inc("real_requests"); + if($output === true){ $frontend->loadheader( $get, @@ -175,6 +178,8 @@ class captcha{ apcu_inc($key, 1, $stupid, 86400); + apcu_inc("real_requests"); + setcookie( "pass", $key, @@ -197,7 +202,7 @@ class captcha{ $error = "<div class=\"quote\">You were <a href=\"https://www.youtube.com/watch?v=e1d7fkQx2rk\" target=\"_BLANK\" rel=\"noreferrer nofollow\">kicked out of Mensa.</a> Please try again.</div>"; } } - + // get the positions for the answers // will return between 3 and 6 answer positions $range = range(0, 15); diff --git a/lib/frontend.php b/lib/frontend.php index 0f9f95d..bef12aa 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -25,7 +25,7 @@ class frontend{ if($theme != "Dark"){ - $replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . $theme . '.css?v' . config::VERSION . '">'; + $replacements["style"] = '<link rel="stylesheet" href="/static/themes/' . rawurlencode($theme) . '.css?v' . config::VERSION . '">'; }else{ $replacements["style"] = ""; @@ -84,6 +84,8 @@ class frontend{ ){ // bot detected !! + apcu_inc("captcha_gen"); + $this->drawerror( "Tshh, blocked!", 'You were blocked from viewing this page. If you wish to scrape data from 4get, please consider running <a href="https://git.lolcat.ca/lolcat/4get" rel="noreferrer nofollow">your own 4get instance</a> or using <a href="/api.txt">the API</a>.', @@ -889,7 +891,7 @@ class frontend{ "ddg" => "DuckDuckGo", "brave" => "Brave", "yandex" => "Yandex", - //"google" => "Google", + "google" => "Google", "mojeek" => "Mojeek", "marginalia" => "Marginalia", "wiby" => "wiby" @@ -921,8 +923,8 @@ class frontend{ //"fb" => "Facebook videos", "ddg" => "DuckDuckGo", "brave" => "Brave", - "yandex" => "Yandex" - //"google" => "Google" + "yandex" => "Yandex", + "google" => "Google" ] ]; break; @@ -933,7 +935,7 @@ class frontend{ "option" => [ "ddg" => "DuckDuckGo", "brave" => "Brave", - //"google" => "Google", + "google" => "Google", "mojeek" => "Mojeek" ] ]; diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php index cb5d38d..bd161ce 100644 --- a/lib/fuckhtml.php +++ b/lib/fuckhtml.php @@ -15,7 +15,7 @@ class fuckhtml{ if(!isset($html["innerHTML"])){ - throw new Exception("(load) Supplied array doesn't contain a innerHTML index"); + throw new Exception("(load) Supplied array doesn't contain an innerHTML index"); } $html = $html["innerHTML"]; } @@ -35,6 +35,11 @@ class fuckhtml{ $this->strlen = strlen($this->html); } + public function getloadedhtml(){ + + return $this->html; + } + public function getElementsByTagName(string $tagname){ $out = []; @@ -46,7 +51,7 @@ class fuckhtml{ if($tagname == "*"){ - $tagname = '[^\/<>\s]+'; + $tagname = '[A-Za-z0-9._-]+'; }else{ $tagname = preg_quote(strtolower($tagname)); @@ -126,7 +131,7 @@ class fuckhtml{ } ); - // computer the indent level for each element + // compute the indent level for each element $level = []; $count = count($out); @@ -314,7 +319,7 @@ class fuckhtml{ if(!isset($html["innerHTML"])){ - throw new Exception("(getTextContent) Supplied array doesn't contain a innerHTML index"); + throw new Exception("(getTextContent) Supplied array doesn't contain an innerHTML index"); } $html = $html["innerHTML"]; } @@ -441,4 +446,27 @@ class fuckhtml{ return json_decode($json_out, true); } + + public function parseJsString($string){ + + return + preg_replace_callback( + '/\\\u[A-Fa-f0-9]{4}|\\\x[A-Fa-f0-9]{2}/', + function($match){ + + if($match[0][1] == "u"){ + + return json_decode('"' . $match[0] . '"'); + }else{ + + return mb_convert_encoding( + stripcslashes($match[0]), + "utf-8", + "windows-1252" + ); + } + }, + $string + ); + } } @@ -24,5 +24,5 @@ User-agent: * Disallow: -host: 4get.ca -sitemap: https://4get.ca/sitemap.xml +Host: 4get.ca +Sitemap: https://4get.ca/sitemap diff --git a/scraper/brave.php b/scraper/brave.php index bd1cd80..8be55ac 100644 --- a/scraper/brave.php +++ b/scraper/brave.php @@ -857,7 +857,9 @@ class brave{ // parse ratings if( isset($info["ratings"]) && - $info["ratings"] != "void 0" + $info["ratings"] != "void 0" && + is_array($info["ratings"]) && + count($info["ratings"]) !== 0 ){ $description[] = [ @@ -1183,7 +1185,7 @@ class brave{ "title" => $news["title"], "author" => null, "description" => $news["description"], - "date" => !isset($news["age"]) || $news["age"] == "void 0" ? null : strtotime($news["age"]), + "date" => !isset($news["age"]) || $news["age"] == "void 0" || $news["age"] == "null" ? null : strtotime($news["age"]), "thumb" => $thumb, "url" => $news["url"] ]; diff --git a/scraper/ddg.php b/scraper/ddg.php index 2d737ba..4a0d11f 100644 --- a/scraper/ddg.php +++ b/scraper/ddg.php @@ -545,8 +545,6 @@ class ddg{ public function web($get){ - $proxy = null; - if($get["npt"]){ [$jsgrep, $proxy] = $this->backend->get($get["npt"], "web"); diff --git a/scraper/google.php b/scraper/google.php index 055d12a..bf2b0e4 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -16,713 +16,496 @@ class google{ public function getfilters($page){ + $base = [ + "country" => [ // gl=<country> + "display" => "Country", + "option" => [ + "any" => "Instance's country", + "af" => "Afghanistan", + "al" => "Albania", + "dz" => "Algeria", + "as" => "American Samoa", + "ad" => "Andorra", + "ao" => "Angola", + "ai" => "Anguilla", + "aq" => "Antarctica", + "ag" => "Antigua and Barbuda", + "ar" => "Argentina", + "am" => "Armenia", + "aw" => "Aruba", + "au" => "Australia", + "at" => "Austria", + "az" => "Azerbaijan", + "bs" => "Bahamas", + "bh" => "Bahrain", + "bd" => "Bangladesh", + "bb" => "Barbados", + "by" => "Belarus", + "be" => "Belgium", + "bz" => "Belize", + "bj" => "Benin", + "bm" => "Bermuda", + "bt" => "Bhutan", + "bo" => "Bolivia", + "ba" => "Bosnia and Herzegovina", + "bw" => "Botswana", + "bv" => "Bouvet Island", + "br" => "Brazil", + "io" => "British Indian Ocean Territory", + "bn" => "Brunei Darussalam", + "bg" => "Bulgaria", + "bf" => "Burkina Faso", + "bi" => "Burundi", + "kh" => "Cambodia", + "cm" => "Cameroon", + "ca" => "Canada", + "cv" => "Cape Verde", + "ky" => "Cayman Islands", + "cf" => "Central African Republic", + "td" => "Chad", + "cl" => "Chile", + "cn" => "China", + "cx" => "Christmas Island", + "cc" => "Cocos (Keeling) Islands", + "co" => "Colombia", + "km" => "Comoros", + "cg" => "Congo", + "cd" => "Congo, the Democratic Republic", + "ck" => "Cook Islands", + "cr" => "Costa Rica", + "ci" => "Cote D'ivoire", + "hr" => "Croatia", + "cu" => "Cuba", + "cy" => "Cyprus", + "cz" => "Czech Republic", + "dk" => "Denmark", + "dj" => "Djibouti", + "dm" => "Dominica", + "do" => "Dominican Republic", + "ec" => "Ecuador", + "eg" => "Egypt", + "sv" => "El Salvador", + "gq" => "Equatorial Guinea", + "er" => "Eritrea", + "ee" => "Estonia", + "et" => "Ethiopia", + "fk" => "Falkland Islands (Malvinas)", + "fo" => "Faroe Islands", + "fj" => "Fiji", + "fi" => "Finland", + "fr" => "France", + "gf" => "French Guiana", + "pf" => "French Polynesia", + "tf" => "French Southern Territories", + "ga" => "Gabon", + "gm" => "Gambia", + "ge" => "Georgia", + "de" => "Germany", + "gh" => "Ghana", + "gi" => "Gibraltar", + "gr" => "Greece", + "gl" => "Greenland", + "gd" => "Grenada", + "gp" => "Guadeloupe", + "gu" => "Guam", + "gt" => "Guatemala", + "gn" => "Guinea", + "gw" => "Guinea-Bissau", + "gy" => "Guyana", + "ht" => "Haiti", + "hm" => "Heard Island and Mcdonald Islands", + "va" => "Holy See (Vatican City State)", + "hn" => "Honduras", + "hk" => "Hong Kong", + "hu" => "Hungary", + "is" => "Iceland", + "in" => "India", + "id" => "Indonesia", + "ir" => "Iran, Islamic Republic", + "iq" => "Iraq", + "ie" => "Ireland", + "il" => "Israel", + "it" => "Italy", + "jm" => "Jamaica", + "jp" => "Japan", + "jo" => "Jordan", + "kz" => "Kazakhstan", + "ke" => "Kenya", + "ki" => "Kiribati", + "kp" => "Korea, Democratic People's Republic", + "kr" => "Korea, Republic", + "kw" => "Kuwait", + "kg" => "Kyrgyzstan", + "la" => "Lao People's Democratic Republic", + "lv" => "Latvia", + "lb" => "Lebanon", + "ls" => "Lesotho", + "lr" => "Liberia", + "ly" => "Libyan Arab Jamahiriya", + "li" => "Liechtenstein", + "lt" => "Lithuania", + "lu" => "Luxembourg", + "mo" => "Macao", + "mk" => "Macedonia, the Former Yugosalv Republic", + "mg" => "Madagascar", + "mw" => "Malawi", + "my" => "Malaysia", + "mv" => "Maldives", + "ml" => "Mali", + "mt" => "Malta", + "mh" => "Marshall Islands", + "mq" => "Martinique", + "mr" => "Mauritania", + "mu" => "Mauritius", + "yt" => "Mayotte", + "mx" => "Mexico", + "fm" => "Micronesia, Federated States", + "md" => "Moldova, Republic", + "mc" => "Monaco", + "mn" => "Mongolia", + "ms" => "Montserrat", + "ma" => "Morocco", + "mz" => "Mozambique", + "mm" => "Myanmar", + "na" => "Namibia", + "nr" => "Nauru", + "np" => "Nepal", + "nl" => "Netherlands", + "an" => "Netherlands Antilles", + "nc" => "New Caledonia", + "nz" => "New Zealand", + "ni" => "Nicaragua", + "ne" => "Niger", + "ng" => "Nigeria", + "nu" => "Niue", + "nf" => "Norfolk Island", + "mp" => "Northern Mariana Islands", + "no" => "Norway", + "om" => "Oman", + "pk" => "Pakistan", + "pw" => "Palau", + "ps" => "Palestinian Territory, Occupied", + "pa" => "Panama", + "pg" => "Papua New Guinea", + "py" => "Paraguay", + "pe" => "Peru", + "ph" => "Philippines", + "pn" => "Pitcairn", + "pl" => "Poland", + "pt" => "Portugal", + "pr" => "Puerto Rico", + "qa" => "Qatar", + "re" => "Reunion", + "ro" => "Romania", + "ru" => "Russian Federation", + "rw" => "Rwanda", + "sh" => "Saint Helena", + "kn" => "Saint Kitts and Nevis", + "lc" => "Saint Lucia", + "pm" => "Saint Pierre and Miquelon", + "vc" => "Saint Vincent and the Grenadines", + "ws" => "Samoa", + "sm" => "San Marino", + "st" => "Sao Tome and Principe", + "sa" => "Saudi Arabia", + "sn" => "Senegal", + "cs" => "Serbia and Montenegro", + "sc" => "Seychelles", + "sl" => "Sierra Leone", + "sg" => "Singapore", + "sk" => "Slovakia", + "si" => "Slovenia", + "sb" => "Solomon Islands", + "so" => "Somalia", + "za" => "South Africa", + "gs" => "South Georgia and the South Sandwich Islands", + "es" => "Spain", + "lk" => "Sri Lanka", + "sd" => "Sudan", + "sr" => "Suriname", + "sj" => "Svalbard and Jan Mayen", + "sz" => "Swaziland", + "se" => "Sweden", + "ch" => "Switzerland", + "sy" => "Syrian Arab Republic", + "tw" => "Taiwan, Province of China", + "tj" => "Tajikistan", + "tz" => "Tanzania, United Republic", + "th" => "Thailand", + "tl" => "Timor-Leste", + "tg" => "Togo", + "tk" => "Tokelau", + "to" => "Tonga", + "tt" => "Trinidad and Tobago", + "tn" => "Tunisia", + "tr" => "Turkey", + "tm" => "Turkmenistan", + "tc" => "Turks and Caicos Islands", + "tv" => "Tuvalu", + "ug" => "Uganda", + "ua" => "Ukraine", + "ae" => "United Arab Emirates", + "uk" => "United Kingdom", + "us" => "United States", + "um" => "United States Minor Outlying Islands", + "uy" => "Uruguay", + "uz" => "Uzbekistan", + "vu" => "Vanuatu", + "ve" => "Venezuela", + "vn" => "Viet Nam", + "vg" => "Virgin Islands, British", + "vi" => "Virgin Islands, U.S.", + "wf" => "Wallis and Futuna", + "eh" => "Western Sahara", + "ye" => "Yemen", + "zm" => "Zambia", + "zw" => "Zimbabwe" + ] + ], + "nsfw" => [ + "display" => "NSFW", + "option" => [ + "yes" => "Yes", // safe=active + "no" => "No" // safe=off + ] + ], + "lang" => [ // lr=<lang> (prefix lang with "lang_") + "display" => "Language", + "option" => [ + "any" => "Any language", + "ar" => "Arabic", + "bg" => "Bulgarian", + "ca" => "Catalan", + "cs" => "Czech", + "da" => "Danish", + "de" => "German", + "el" => "Greek", + "en" => "English", + "es" => "Spanish", + "et" => "Estonian", + "fi" => "Finnish", + "fr" => "French", + "hr" => "Croatian", + "hu" => "Hungarian", + "id" => "Indonesian", + "is" => "Icelandic", + "it" => "Italian", + "iw" => "Hebrew", + "ja" => "Japanese", + "ko" => "Korean", + "lt" => "Lithuanian", + "lv" => "Latvian", + "nl" => "Dutch", + "no" => "Norwegian", + "pl" => "Polish", + "pt" => "Portuguese", + "ro" => "Romanian", + "ru" => "Russian", + "sk" => "Slovak", + "sl" => "Slovenian", + "sr" => "Serbian", + "sv" => "Swedish", + "tr" => "Turkish", + "zh-CN" => "Chinese (Simplified)", + "zh-TW" => "Chinese (Traditional)" + ] + ] + ]; + switch($page){ case "web": - case "videos": - case "news": - return [ - "country" => [ // gl=<country> - "display" => "Country", - "option" => [ - "any" => "Instance's country", - "af" => "Afghanistan", - "al" => "Albania", - "dz" => "Algeria", - "as" => "American Samoa", - "ad" => "Andorra", - "ao" => "Angola", - "ai" => "Anguilla", - "aq" => "Antarctica", - "ag" => "Antigua and Barbuda", - "ar" => "Argentina", - "am" => "Armenia", - "aw" => "Aruba", - "au" => "Australia", - "at" => "Austria", - "az" => "Azerbaijan", - "bs" => "Bahamas", - "bh" => "Bahrain", - "bd" => "Bangladesh", - "bb" => "Barbados", - "by" => "Belarus", - "be" => "Belgium", - "bz" => "Belize", - "bj" => "Benin", - "bm" => "Bermuda", - "bt" => "Bhutan", - "bo" => "Bolivia", - "ba" => "Bosnia and Herzegovina", - "bw" => "Botswana", - "bv" => "Bouvet Island", - "br" => "Brazil", - "io" => "British Indian Ocean Territory", - "bn" => "Brunei Darussalam", - "bg" => "Bulgaria", - "bf" => "Burkina Faso", - "bi" => "Burundi", - "kh" => "Cambodia", - "cm" => "Cameroon", - "ca" => "Canada", - "cv" => "Cape Verde", - "ky" => "Cayman Islands", - "cf" => "Central African Republic", - "td" => "Chad", - "cl" => "Chile", - "cn" => "China", - "cx" => "Christmas Island", - "cc" => "Cocos (Keeling) Islands", - "co" => "Colombia", - "km" => "Comoros", - "cg" => "Congo", - "cd" => "Congo, the Democratic Republic", - "ck" => "Cook Islands", - "cr" => "Costa Rica", - "ci" => "Cote D'ivoire", - "hr" => "Croatia", - "cu" => "Cuba", - "cy" => "Cyprus", - "cz" => "Czech Republic", - "dk" => "Denmark", - "dj" => "Djibouti", - "dm" => "Dominica", - "do" => "Dominican Republic", - "ec" => "Ecuador", - "eg" => "Egypt", - "sv" => "El Salvador", - "gq" => "Equatorial Guinea", - "er" => "Eritrea", - "ee" => "Estonia", - "et" => "Ethiopia", - "fk" => "Falkland Islands (Malvinas)", - "fo" => "Faroe Islands", - "fj" => "Fiji", - "fi" => "Finland", - "fr" => "France", - "gf" => "French Guiana", - "pf" => "French Polynesia", - "tf" => "French Southern Territories", - "ga" => "Gabon", - "gm" => "Gambia", - "ge" => "Georgia", - "de" => "Germany", - "gh" => "Ghana", - "gi" => "Gibraltar", - "gr" => "Greece", - "gl" => "Greenland", - "gd" => "Grenada", - "gp" => "Guadeloupe", - "gu" => "Guam", - "gt" => "Guatemala", - "gn" => "Guinea", - "gw" => "Guinea-Bissau", - "gy" => "Guyana", - "ht" => "Haiti", - "hm" => "Heard Island and Mcdonald Islands", - "va" => "Holy See (Vatican City State)", - "hn" => "Honduras", - "hk" => "Hong Kong", - "hu" => "Hungary", - "is" => "Iceland", - "in" => "India", - "id" => "Indonesia", - "ir" => "Iran, Islamic Republic", - "iq" => "Iraq", - "ie" => "Ireland", - "il" => "Israel", - "it" => "Italy", - "jm" => "Jamaica", - "jp" => "Japan", - "jo" => "Jordan", - "kz" => "Kazakhstan", - "ke" => "Kenya", - "ki" => "Kiribati", - "kp" => "Korea, Democratic People's Republic", - "kr" => "Korea, Republic", - "kw" => "Kuwait", - "kg" => "Kyrgyzstan", - "la" => "Lao People's Democratic Republic", - "lv" => "Latvia", - "lb" => "Lebanon", - "ls" => "Lesotho", - "lr" => "Liberia", - "ly" => "Libyan Arab Jamahiriya", - "li" => "Liechtenstein", - "lt" => "Lithuania", - "lu" => "Luxembourg", - "mo" => "Macao", - "mk" => "Macedonia, the Former Yugosalv Republic", - "mg" => "Madagascar", - "mw" => "Malawi", - "my" => "Malaysia", - "mv" => "Maldives", - "ml" => "Mali", - "mt" => "Malta", - "mh" => "Marshall Islands", - "mq" => "Martinique", - "mr" => "Mauritania", - "mu" => "Mauritius", - "yt" => "Mayotte", - "mx" => "Mexico", - "fm" => "Micronesia, Federated States", - "md" => "Moldova, Republic", - "mc" => "Monaco", - "mn" => "Mongolia", - "ms" => "Montserrat", - "ma" => "Morocco", - "mz" => "Mozambique", - "mm" => "Myanmar", - "na" => "Namibia", - "nr" => "Nauru", - "np" => "Nepal", - "nl" => "Netherlands", - "an" => "Netherlands Antilles", - "nc" => "New Caledonia", - "nz" => "New Zealand", - "ni" => "Nicaragua", - "ne" => "Niger", - "ng" => "Nigeria", - "nu" => "Niue", - "nf" => "Norfolk Island", - "mp" => "Northern Mariana Islands", - "no" => "Norway", - "om" => "Oman", - "pk" => "Pakistan", - "pw" => "Palau", - "ps" => "Palestinian Territory, Occupied", - "pa" => "Panama", - "pg" => "Papua New Guinea", - "py" => "Paraguay", - "pe" => "Peru", - "ph" => "Philippines", - "pn" => "Pitcairn", - "pl" => "Poland", - "pt" => "Portugal", - "pr" => "Puerto Rico", - "qa" => "Qatar", - "re" => "Reunion", - "ro" => "Romania", - "ru" => "Russian Federation", - "rw" => "Rwanda", - "sh" => "Saint Helena", - "kn" => "Saint Kitts and Nevis", - "lc" => "Saint Lucia", - "pm" => "Saint Pierre and Miquelon", - "vc" => "Saint Vincent and the Grenadines", - "ws" => "Samoa", - "sm" => "San Marino", - "st" => "Sao Tome and Principe", - "sa" => "Saudi Arabia", - "sn" => "Senegal", - "cs" => "Serbia and Montenegro", - "sc" => "Seychelles", - "sl" => "Sierra Leone", - "sg" => "Singapore", - "sk" => "Slovakia", - "si" => "Slovenia", - "sb" => "Solomon Islands", - "so" => "Somalia", - "za" => "South Africa", - "gs" => "South Georgia and the South Sandwich Islands", - "es" => "Spain", - "lk" => "Sri Lanka", - "sd" => "Sudan", - "sr" => "Suriname", - "sj" => "Svalbard and Jan Mayen", - "sz" => "Swaziland", - "se" => "Sweden", - "ch" => "Switzerland", - "sy" => "Syrian Arab Republic", - "tw" => "Taiwan, Province of China", - "tj" => "Tajikistan", - "tz" => "Tanzania, United Republic", - "th" => "Thailand", - "tl" => "Timor-Leste", - "tg" => "Togo", - "tk" => "Tokelau", - "to" => "Tonga", - "tt" => "Trinidad and Tobago", - "tn" => "Tunisia", - "tr" => "Turkey", - "tm" => "Turkmenistan", - "tc" => "Turks and Caicos Islands", - "tv" => "Tuvalu", - "ug" => "Uganda", - "ua" => "Ukraine", - "ae" => "United Arab Emirates", - "uk" => "United Kingdom", - "us" => "United States", - "um" => "United States Minor Outlying Islands", - "uy" => "Uruguay", - "uz" => "Uzbekistan", - "vu" => "Vanuatu", - "ve" => "Venezuela", - "vn" => "Viet Nam", - "vg" => "Virgin Islands, British", - "vi" => "Virgin Islands, U.S.", - "wf" => "Wallis and Futuna", - "eh" => "Western Sahara", - "ye" => "Yemen", - "zm" => "Zambia", - "zw" => "Zimbabwe" - ] - ], - "nsfw" => [ - "display" => "NSFW", - "option" => [ - "yes" => "Yes", // safe=active - "no" => "No" // safe=off - ] - ], - "lang" => [ // lr=<lang> (prefix lang with "lang_") - "display" => "Language", - "option" => [ - "any" => "Any language", - "ar" => "Arabic", - "bg" => "Bulgarian", - "ca" => "Catalan", - "cs" => "Czech", - "da" => "Danish", - "de" => "German", - "el" => "Greek", - "en" => "English", - "es" => "Spanish", - "et" => "Estonian", - "fi" => "Finnish", - "fr" => "French", - "hr" => "Croatian", - "hu" => "Hungarian", - "id" => "Indonesian", - "is" => "Icelandic", - "it" => "Italian", - "iw" => "Hebrew", - "ja" => "Japanese", - "ko" => "Korean", - "lt" => "Lithuanian", - "lv" => "Latvian", - "nl" => "Dutch", - "no" => "Norwegian", - "pl" => "Polish", - "pt" => "Portuguese", - "ro" => "Romanian", - "ru" => "Russian", - "sk" => "Slovak", - "sl" => "Slovenian", - "sr" => "Serbian", - "sv" => "Swedish", - "tr" => "Turkish", - "zh-CN" => "Chinese (Simplified)", - "zh-TW" => "Chinese (Traditional)" + return array_merge( + $base, + [ + "newer" => [ // &sort=review-date:r:20090301:20090430 + "display" => "Newer than", + "option" => "_DATE" + ], + "older" => [ + "display" => "Older than", + "option" => "_DATE" ] - ], - "newer" => [ // &sort=review-date:r:20090301:20090430 - "display" => "Newer than", - "option" => "_DATE" - ], - "older" => [ - "display" => "Older than", - "option" => "_DATE" ] - ]; + ); break; case "images": - return [ - "country" => [ // gl=<country> - "display" => "Country", - "option" => [ - "any" => "Instance's country", - "af" => "Afghanistan", - "al" => "Albania", - "dz" => "Algeria", - "as" => "American Samoa", - "ad" => "Andorra", - "ao" => "Angola", - "ai" => "Anguilla", - "aq" => "Antarctica", - "ag" => "Antigua and Barbuda", - "ar" => "Argentina", - "am" => "Armenia", - "aw" => "Aruba", - "au" => "Australia", - "at" => "Austria", - "az" => "Azerbaijan", - "bs" => "Bahamas", - "bh" => "Bahrain", - "bd" => "Bangladesh", - "bb" => "Barbados", - "by" => "Belarus", - "be" => "Belgium", - "bz" => "Belize", - "bj" => "Benin", - "bm" => "Bermuda", - "bt" => "Bhutan", - "bo" => "Bolivia", - "ba" => "Bosnia and Herzegovina", - "bw" => "Botswana", - "bv" => "Bouvet Island", - "br" => "Brazil", - "io" => "British Indian Ocean Territory", - "bn" => "Brunei Darussalam", - "bg" => "Bulgaria", - "bf" => "Burkina Faso", - "bi" => "Burundi", - "kh" => "Cambodia", - "cm" => "Cameroon", - "ca" => "Canada", - "cv" => "Cape Verde", - "ky" => "Cayman Islands", - "cf" => "Central African Republic", - "td" => "Chad", - "cl" => "Chile", - "cn" => "China", - "cx" => "Christmas Island", - "cc" => "Cocos (Keeling) Islands", - "co" => "Colombia", - "km" => "Comoros", - "cg" => "Congo", - "cd" => "Congo, the Democratic Republic", - "ck" => "Cook Islands", - "cr" => "Costa Rica", - "ci" => "Cote D'ivoire", - "hr" => "Croatia", - "cu" => "Cuba", - "cy" => "Cyprus", - "cz" => "Czech Republic", - "dk" => "Denmark", - "dj" => "Djibouti", - "dm" => "Dominica", - "do" => "Dominican Republic", - "ec" => "Ecuador", - "eg" => "Egypt", - "sv" => "El Salvador", - "gq" => "Equatorial Guinea", - "er" => "Eritrea", - "ee" => "Estonia", - "et" => "Ethiopia", - "fk" => "Falkland Islands (Malvinas)", - "fo" => "Faroe Islands", - "fj" => "Fiji", - "fi" => "Finland", - "fr" => "France", - "gf" => "French Guiana", - "pf" => "French Polynesia", - "tf" => "French Southern Territories", - "ga" => "Gabon", - "gm" => "Gambia", - "ge" => "Georgia", - "de" => "Germany", - "gh" => "Ghana", - "gi" => "Gibraltar", - "gr" => "Greece", - "gl" => "Greenland", - "gd" => "Grenada", - "gp" => "Guadeloupe", - "gu" => "Guam", - "gt" => "Guatemala", - "gn" => "Guinea", - "gw" => "Guinea-Bissau", - "gy" => "Guyana", - "ht" => "Haiti", - "hm" => "Heard Island and Mcdonald Islands", - "va" => "Holy See (Vatican City State)", - "hn" => "Honduras", - "hk" => "Hong Kong", - "hu" => "Hungary", - "is" => "Iceland", - "in" => "India", - "id" => "Indonesia", - "ir" => "Iran, Islamic Republic", - "iq" => "Iraq", - "ie" => "Ireland", - "il" => "Israel", - "it" => "Italy", - "jm" => "Jamaica", - "jp" => "Japan", - "jo" => "Jordan", - "kz" => "Kazakhstan", - "ke" => "Kenya", - "ki" => "Kiribati", - "kp" => "Korea, Democratic People's Republic", - "kr" => "Korea, Republic", - "kw" => "Kuwait", - "kg" => "Kyrgyzstan", - "la" => "Lao People's Democratic Republic", - "lv" => "Latvia", - "lb" => "Lebanon", - "ls" => "Lesotho", - "lr" => "Liberia", - "ly" => "Libyan Arab Jamahiriya", - "li" => "Liechtenstein", - "lt" => "Lithuania", - "lu" => "Luxembourg", - "mo" => "Macao", - "mk" => "Macedonia, the Former Yugosalv Republic", - "mg" => "Madagascar", - "mw" => "Malawi", - "my" => "Malaysia", - "mv" => "Maldives", - "ml" => "Mali", - "mt" => "Malta", - "mh" => "Marshall Islands", - "mq" => "Martinique", - "mr" => "Mauritania", - "mu" => "Mauritius", - "yt" => "Mayotte", - "mx" => "Mexico", - "fm" => "Micronesia, Federated States", - "md" => "Moldova, Republic", - "mc" => "Monaco", - "mn" => "Mongolia", - "ms" => "Montserrat", - "ma" => "Morocco", - "mz" => "Mozambique", - "mm" => "Myanmar", - "na" => "Namibia", - "nr" => "Nauru", - "np" => "Nepal", - "nl" => "Netherlands", - "an" => "Netherlands Antilles", - "nc" => "New Caledonia", - "nz" => "New Zealand", - "ni" => "Nicaragua", - "ne" => "Niger", - "ng" => "Nigeria", - "nu" => "Niue", - "nf" => "Norfolk Island", - "mp" => "Northern Mariana Islands", - "no" => "Norway", - "om" => "Oman", - "pk" => "Pakistan", - "pw" => "Palau", - "ps" => "Palestinian Territory, Occupied", - "pa" => "Panama", - "pg" => "Papua New Guinea", - "py" => "Paraguay", - "pe" => "Peru", - "ph" => "Philippines", - "pn" => "Pitcairn", - "pl" => "Poland", - "pt" => "Portugal", - "pr" => "Puerto Rico", - "qa" => "Qatar", - "re" => "Reunion", - "ro" => "Romania", - "ru" => "Russian Federation", - "rw" => "Rwanda", - "sh" => "Saint Helena", - "kn" => "Saint Kitts and Nevis", - "lc" => "Saint Lucia", - "pm" => "Saint Pierre and Miquelon", - "vc" => "Saint Vincent and the Grenadines", - "ws" => "Samoa", - "sm" => "San Marino", - "st" => "Sao Tome and Principe", - "sa" => "Saudi Arabia", - "sn" => "Senegal", - "cs" => "Serbia and Montenegro", - "sc" => "Seychelles", - "sl" => "Sierra Leone", - "sg" => "Singapore", - "sk" => "Slovakia", - "si" => "Slovenia", - "sb" => "Solomon Islands", - "so" => "Somalia", - "za" => "South Africa", - "gs" => "South Georgia and the South Sandwich Islands", - "es" => "Spain", - "lk" => "Sri Lanka", - "sd" => "Sudan", - "sr" => "Suriname", - "sj" => "Svalbard and Jan Mayen", - "sz" => "Swaziland", - "se" => "Sweden", - "ch" => "Switzerland", - "sy" => "Syrian Arab Republic", - "tw" => "Taiwan, Province of China", - "tj" => "Tajikistan", - "tz" => "Tanzania, United Republic", - "th" => "Thailand", - "tl" => "Timor-Leste", - "tg" => "Togo", - "tk" => "Tokelau", - "to" => "Tonga", - "tt" => "Trinidad and Tobago", - "tn" => "Tunisia", - "tr" => "Turkey", - "tm" => "Turkmenistan", - "tc" => "Turks and Caicos Islands", - "tv" => "Tuvalu", - "ug" => "Uganda", - "ua" => "Ukraine", - "ae" => "United Arab Emirates", - "uk" => "United Kingdom", - "us" => "United States", - "um" => "United States Minor Outlying Islands", - "uy" => "Uruguay", - "uz" => "Uzbekistan", - "vu" => "Vanuatu", - "ve" => "Venezuela", - "vn" => "Viet Nam", - "vg" => "Virgin Islands, British", - "vi" => "Virgin Islands, U.S.", - "wf" => "Wallis and Futuna", - "eh" => "Western Sahara", - "ye" => "Yemen", - "zm" => "Zambia", - "zw" => "Zimbabwe" - ] - ], - "nsfw" => [ - "display" => "NSFW", - "option" => [ - "yes" => "Yes", // safe=active - "no" => "No" // safe=off - ] - ], - "lang" => [ // lr=<lang> (prefix lang with "lang_") - "display" => "Language", - "option" => [ - "any" => "Any language", - "ar" => "Arabic", - "bg" => "Bulgarian", - "ca" => "Catalan", - "cs" => "Czech", - "da" => "Danish", - "de" => "German", - "el" => "Greek", - "en" => "English", - "es" => "Spanish", - "et" => "Estonian", - "fi" => "Finnish", - "fr" => "French", - "hr" => "Croatian", - "hu" => "Hungarian", - "id" => "Indonesian", - "is" => "Icelandic", - "it" => "Italian", - "iw" => "Hebrew", - "ja" => "Japanese", - "ko" => "Korean", - "lt" => "Lithuanian", - "lv" => "Latvian", - "nl" => "Dutch", - "no" => "Norwegian", - "pl" => "Polish", - "pt" => "Portuguese", - "ro" => "Romanian", - "ru" => "Russian", - "sk" => "Slovak", - "sl" => "Slovenian", - "sr" => "Serbian", - "sv" => "Swedish", - "tr" => "Turkish", - "zh-CN" => "Chinese (Simplified)", - "zh-TW" => "Chinese (Traditional)" - ] - ], - "time" => [ // tbs=qrd:<size> - "display" => "Time posted", - "option" => [ - "any" => "Any time", - "d" => "Past 24 hours", - "w" => "Past week", - "m" => "Past month", - "y" => "Past year" - ] - ], - "size" => [ - "display" => "Size", - "option" => [ - // tbs=isz:<size> - "any" => "Any size", - "l" => "Large", - "m" => "Medium", - "i" => "Icon", - // from here - // tbz:lt,islt:<size> - "qsvga" => "Larger than 400x300", - "vga" => "Larger than 640x480", - "qsvga" => "Larger than 800x600", - "xga" => "Larger than 1024x768", - "2mp" => "Larger than 2MP", - "4mp" => "Larger than 4MP", - "6mp" => "Larger than 6MP", - "8mp" => "Larger than 8MP", - "10mp" => "Larger than 10MP", - "12mp" => "Larger than 12MP", - "15mp" => "Larger than 15MP", - "20mp" => "Larger than 20MP", - "40mp" => "Larger than 40MP", - "70mp" => "Larger than 70MP" - ] - ], - "ratio" => [ // tbs=iar:<size> - "display" => "Aspect ratio", - "option" => [ - "any" => "Any ratio", - "t" => "Tall", - "s" => "Square", - "w" => "Wide", - "xw" => "Panoramic" - ] - ], - "color" => [ // tbs=ic:<color> - "display" => "Color", - "option" => [ - "any" => "Any color", - "color" => "Full color", - "gray" => "Black & white", - "trans" => "Transparent", - // from there, its ic:specific,isc:<color> - "red" => "Red", - "orange" => "Orange", - "yellow" => "Yellow", - "green" => "Green", - "teal" => "Teal", - "blue" => "Blue", - "purple" => "Purple", - "pink" => "Pink", - "white" => "White", - "gray" => "Gray", - "black" => "Black", - "brown" => "Brown" - ] - ], - "type" => [ // tbs=itp:<type> - "display" => "Type", - "option" => [ - "any" => "Any type", - "face" => "Faces", - "clipart" => "Clip Art", - "lineart" => "Line Drawing", - "stock" => "Stock", - "animated" => "Animated" + return array_merge( + $base, + [ + "time" => [ // tbs=qrd:<size> + "display" => "Time posted", + "option" => [ + "any" => "Any time", + "d" => "Past 24 hours", + "w" => "Past week", + "m" => "Past month", + "y" => "Past year" + ] + ], + "size" => [ + "display" => "Size", + "option" => [ + // tbs=isz:<size> + "any" => "Any size", + "l" => "Large", + "m" => "Medium", + "i" => "Icon", + // from here + // tbz:lt,islt:<size> + "qsvga" => "Larger than 400x300", + "vga" => "Larger than 640x480", + "qsvga" => "Larger than 800x600", + "xga" => "Larger than 1024x768", + "2mp" => "Larger than 2MP", + "4mp" => "Larger than 4MP", + "6mp" => "Larger than 6MP", + "8mp" => "Larger than 8MP", + "10mp" => "Larger than 10MP", + "12mp" => "Larger than 12MP", + "15mp" => "Larger than 15MP", + "20mp" => "Larger than 20MP", + "40mp" => "Larger than 40MP", + "70mp" => "Larger than 70MP" + ] + ], + "ratio" => [ // tbs=iar:<size> + "display" => "Aspect ratio", + "option" => [ + "any" => "Any ratio", + "t" => "Tall", + "s" => "Square", + "w" => "Wide", + "xw" => "Panoramic" + ] + ], + "color" => [ // tbs=ic:<color> + "display" => "Color", + "option" => [ + "any" => "Any color", + "color" => "Full color", + "gray" => "Black & white", + "trans" => "Transparent", + // from there, its ic:specific,isc:<color> + "red" => "Red", + "orange" => "Orange", + "yellow" => "Yellow", + "green" => "Green", + "teal" => "Teal", + "blue" => "Blue", + "purple" => "Purple", + "pink" => "Pink", + "white" => "White", + "gray" => "Gray", + "black" => "Black", + "brown" => "Brown" + ] + ], + "type" => [ // tbs=itp:<type> + "display" => "Type", + "option" => [ + "any" => "Any type", + "face" => "Faces", + "clipart" => "Clip Art", + "lineart" => "Line Drawing", + "stock" => "Stock", + "animated" => "Animated" + ] + ], + "format" => [ // tbs=ift:<format> + "display" => "Format", + "option" => [ + "any" => "Any format", + "jpg" => "JPG", + "gif" => "GIF", + "png" => "PNG", + "bmp" => "BMP", + "svg" => "SVG", + "webp" => "WEBP", + "ico" => "ICO", + "craw" => "RAW" + ] + ], + "rights" => [ // tbs=il:<rights> + "display" => "Usage rights", + "option" => [ + "any" => "Any license", + "cl" => "Creative Commons licenses", + "ol" => "Commercial & other licenses" + ] ] - ], - "format" => [ // tbs=ift:<format> - "display" => "Format", - "option" => [ - "any" => "Any format", - "jpg" => "JPG", - "gif" => "GIF", - "png" => "PNG", - "bmp" => "BMP", - "svg" => "SVG", - "webp" => "WEBP", - "ico" => "ICO", - "craw" => "RAW" + ] + ); + break; + + case "videos": + return array_merge( + $base, + [ + "time" => [ + "display" => "Time posted", + "option" => [ // tbs=qdr + "any" => "Any time", + "h" => "Past hour", + "d" => "Past 24 hours", + "w" => "Past week", + "m" => "Past month", + "y" => "Past year" + ] + ], + "duration" => [ + "display" => "Duration", + "option" => [ + "any" => "Any duration", + "s" => "Short (0-4min)", // tbs=dur:s + "m" => "Medium (4-20min)", // tbs=dur:m + "l" => "Long (20+ min)" // tbs=dur:l + ] + ], + "quality" => [ + "display" => "Quality", + "option" => [ + "any" => "Any quality", + "h" => "High quality" // tbs=hq:h + ] + ], + "captions" => [ + "display" => "Captions", + "option" => [ + "any" => "No preference", + "yes" => "Closed captioned" // tbs=cc:1 + ] ] - ], - "rights" => [ // tbs=il:<rights> - "display" => "Usage rights", - "option" => [ - "any" => "Any license", - "cl" => "Creative Commons licenses", - "ol" => "Commercial & other licenses" + ] + ); + break; + + case "news": + return array_merge( + $base, + [ + "time" => [ + "display" => "Time posted", + "option" => [ // tbs=qdr + "any" => "Any time", + "h" => "Past hour", + "d" => "Past 24 hours", + "w" => "Past week", + "m" => "Past month", + "y" => "Past year", + "a" => "Archives" // tbs=ar:1 + ] + ], + "sort" => [ + "display" => "Sort", + "option" => [ + "relevance" => "Relevance", + "date" => "Date" // sbd:1 + ] ] ] - ]; + ); break; } } @@ -773,58 +556,402 @@ class google{ curl_close($curlproc); return $data; } - /* + + + public function web($get){ - $search = $get["s"]; - $country = $get["country"]; - $nsfw = $get["nsfw"]; - $lang = $get["lang"]; - $older = $get["older"]; - $newer = $get["newer"]; + if($get["npt"]){ + + [$req, $ip] = $this->backend->get($get["npt"], "web"); + parse_str( + parse_url($req, PHP_URL_QUERY), + $search + ); + + if(isset($search["q"])){ + + $search = $search["q"]; + }else{ + + $search = "a"; // lol + } + + try{ + $html = + $this->get( + $ip, + "https://www.google.com" . $req, + [] + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } + }else{ + $search = $get["s"]; + $country = $get["country"]; + $nsfw = $get["nsfw"]; + $lang = $get["lang"]; + $older = $get["older"]; + $newer = $get["newer"]; + $ip = $this->backend->get_ip(); + + $params = [ + "q" => $search, + "num" => 20 // get 20 results + ]; + + // country + if($country != "any"){ + + $params["gl"] = $country; + } + + // nsfw + $params["safe"] = $nsfw == "yes" ? "off" : "active"; + + // language + if($lang != "any"){ + + $params["lr"] = "lang_" . $lang; + } + + // &sort=review-date:r:20090301:20090430 + $older = $older === false ? false : date("Ymd", $older); + $newer = $newer === false ? false : date("Ymd", $newer); + + if( + $older !== false && + $newer === false + ){ + + $newer = date("Ymd", time()); + } + + if( + $older !== false || + $newer !== false + ){ + + $params["sort"] = "review-date:r:" . $older . ":" . $newer; + } + + try{ + $html = + $this->get( + $ip, + "https://www.google.com/search", + $params + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } + } - $params = [ - "num" => 20 // get 20 results - ]; + return $this->parsepage($html, "web", $search, $ip); + } + + + + public function video($get){ - // country - if($country != "any"){ + if($get["npt"]){ + + [$req, $ip] = $this->backend->get($get["npt"], "videos"); + parse_str( + parse_url($req, PHP_URL_QUERY), + $search + ); + + if(isset($search["q"])){ + + $search = $search["q"]; + }else{ + + $search = "a"; // lol + } + + try{ + + $html = + $this->get( + $ip, + "https://www.google.com" . $req, + [] + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } + + }else{ + $search = $get["s"]; + $country = $get["country"]; + $nsfw = $get["nsfw"]; + $lang = $get["lang"]; + $time = $get["time"]; + $duration = $get["duration"]; + $quality = $get["quality"]; + $captions = $get["captions"]; + $ip = $this->backend->get_ip(); + + $params = [ + "q" => $search, + "tbm" => "vid", + "num" => "20" + ]; + + // country + if($country != "any"){ + + $params["gl"] = $country; + } + + // nsfw + $params["safe"] = $nsfw == "yes" ? "off" : "active"; + + // language + if($lang != "any"){ + + $params["lr"] = "lang_" . $lang; + } + + $tbs = []; + + // time + if($time != "any"){ + + $tbs[] = "qdr:" . $time; + } + + // duration + if($duration != "any"){ + + $tbs[] = "dur:" . $duration; + } - $params["gl"] = $country; + // quality + if($quality != "any"){ + + $tbs[] = "hq:" . $quality; + } + + // captions + if($captions != "any"){ + + $tbs[] = "cc:" . $captions; + } + + // append tbs + if(count($tbs) !== 0){ + + $params["tbs"] = + implode(",", $tbs); + } + + try{ + $html = + $this->get( + $ip, + "https://www.google.com/search", + $params + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } } - // nsfw - $params["safe"] = $nsfw == "yes" ? "off" : "active"; + $json = $this->parsepage($html, "videos", $search, $ip); + $out = [ + "status" => "ok", + "npt" => $json["npt"], + "video" => [], + "author" => [], + "livestream" => [], + "playlist" => [], + "reel" => [] + ]; - // language - if($lang != "any"){ + foreach($json["web"] as $item){ - $params["lr"] = "lang_" . $lang; + $out["video"][] = [ + "title" => $item["title"], + "description" => $item["description"], + "author" => [ + "name" => null, + "url" => null, + "avatar" => null + ], + "date" => isset($item["table"]["Posted"]) ? strtotime($item["table"]["Posted"]) : null, + "duration" => isset($item["table"]["Duration"]) ? $this->hms2int($item["table"]["Duration"]) : null, + "views" => null, + "thumb" => + $item["thumb"]["url"] === null ? + [ + "url" => null, + "ratio" => null + ] : + [ + "url" => $item["thumb"]["url"], + "ratio" => "16:9" + ], + "url" => $item["url"] + ]; } - // &sort=review-date:r:20090301:20090430 - $older = $older === false ? false : date("Ymd", $older); - $newer = $newer === false ? false : date("Ymd", $newer); + return $out; + } + + + + public function news($get){ - if( - $older !== false && - $newer === false - ){ + if($get["npt"]){ + + [$req, $ip] = $this->backend->get($get["npt"], "news"); + parse_str( + parse_url($req, PHP_URL_QUERY), + $search + ); + + if(isset($search["q"])){ + + $search = $search["q"]; + }else{ + + $search = "a"; // lol + } + + try{ + + $html = + $this->get( + $ip, + "https://www.google.com" . $req, + [] + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } + + }else{ + $search = $get["s"]; + $country = $get["country"]; + $nsfw = $get["nsfw"]; + $lang = $get["lang"]; + $time = $get["time"]; + $sort = $get["sort"]; + $ip = $this->backend->get_ip(); - $newer = date("Ymd", time()); + $params = [ + "q" => $search, + "tbm" => "nws", + "num" => "20" + ]; + + // country + if($country != "any"){ + + $params["gl"] = $country; + } + + // nsfw + $params["safe"] = $nsfw == "yes" ? "off" : "active"; + + // language + if($lang != "any"){ + + $params["lr"] = "lang_" . $lang; + } + + $tbs = []; + + // time + if($time != "any"){ + + if($time == "a"){ + + $tbs[] = "ar:1"; + }else{ + + $tbs[] = "qdr:" . $time; + } + } + + // relevance + if($sort == "date"){ + + $tbs[] = "sbd:1"; + } + + // append tbs + if(count($tbs) !== 0){ + + $params["tbs"] = + implode(",", $tbs); + } + + $html = + $this->get( + $ip, + "https://www.google.com/search", + $params + ); } - if( - $older !== false || - $newer !== false - ){ + $json = $this->parsepage($html, "news", $search, $ip); + $out = [ + "status" => "ok", + "npt" => $json["npt"], + "news" => [] + ]; + + foreach($json["web"] as $item){ + + $description = array_key_first($item["table"]); + + if($description !== null){ + + $date = $item["table"][$description]; + }else{ + + $date = null; + } - $params["sort"] = "review-date:r:" . $older . ":" . $newer; + $out["news"][] = [ + "title" => $item["title"], + "author" => $item["author"], + "description" => $description, + "date" => strtotime($date), + "thumb" => + $item["thumb"]["url"] === null ? + [ + "url" => null, + "ratio" => null + ] : + [ + "url" => $item["thumb"]["url"], + "ratio" => "16:9" + ], + "url" => $item["url"] + ]; } + return $out; + } + + + + private function parsepage($html, $pagetype, $search, $ip){ + /* $handle = fopen("scraper/google.html", "r"); $html = fread($handle, filesize("scraper/google.html")); fclose($handle); + */ $out = [ "status" => "ok", @@ -844,6 +971,156 @@ class google{ $this->parsejavascript($html); + // + // parse accdef's + // + $has_appended_accdef = false; + + preg_match_all( + '/window\.jsl\.dh\(\'(accdef_[0-9]+)\',\'(.*)\'\);/', + $html, + $accdefs_regex + ); + + $accdefs = []; + for($i=0; $i<count($accdefs_regex[0]); $i++){ + + // decode UTF-16 string + $answer = + $this->fuckhtml + ->parseJsString( + $accdefs_regex[2][$i] + ); + + $this->fuckhtml->load($answer); + + // get description + $description = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "padding" => "12px 16px 12px", + ], + self::is_class + ), + "div" + )[1]; + + // get date (rare) + $date = + $this->fuckhtml + ->getElementsByTagName("sub"); + + if(count($date) !== 0){ + + $description = + str_replace( + $date[0]["outerHTML"], + "", + $description["innerHTML"] + ); + + $date = + strtotime( + $this->fuckhtml + ->getTextContent( + $date[0] + ) + ); + }else{ + + $date = null; + } + + // get information table + $table = []; + + $tbody = + $this->fuckhtml + ->getElementsByTagName("tbody"); + + if(count($tbody) !== 0){ + + $this->fuckhtml->load($tbody[0]); + + $trs = + $this->fuckhtml + ->getElementsByTagName("tr"); + + foreach($trs as $tr){ + + $this->fuckhtml->load($tr); + + $tds = + $this->fuckhtml + ->getElementsByTagName("td"); + + if(count($tds) === 2){ + + $table[ + $this->fuckhtml + ->getTextContent( + $tds[0] + ) + ] = + $this->fuckhtml + ->getTextContent( + $tds[1] + ); + } + } + + // load back what we had + $this->fuckhtml->load($answer); + } + + // get title & link + $a = + $this->fuckhtml + ->getElementsByTagName("a")[0]; + + $this->fuckhtml->load($a); + + $title = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($title) === 0){ + + continue; + } + + $accdefs[] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $title[0] + ) + ), + "description" => + $this->fuckhtml + ->getTextContent( + $description + ), + "url" => + $this->unshiturl( + $a["attributes"]["href"] + ), + "date" => $date, + "type" => "web", + "thumb" => [ + "url" => null, + "ratio" => null + ], + "sublink" => [], + "table" => $table + ]; + } + + $this->fuckhtml->load($html); + $containers = $this->fuckhtml ->getElementsByClassName( @@ -863,6 +1140,94 @@ class google{ $this->fuckhtml->load($container); + // detect spelling + $spelling = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "font-size" => "20px", + "line-height" => "26px", + "padding-top" => "2px", + "margin-bottom" => "1px" + ], + self::is_class + ), + "div" + ); + + if(count($spelling) !== 0){ + + $a = + $this->fuckhtml + ->getElementsByTagName("a"); + + if(count($a) !== 0){ + + $scripts = + $this->fuckhtml + ->getElementsByTagName("script"); + + foreach($scripts as $script){ + + $container["innerHTML"] = + str_replace( + $script["outerHTML"], + "", + $container["innerHTML"] + ); + } + + $container["innerHTML"] = + $this->fuckhtml + ->getTextContent( + str_replace( + $a[0]["outerHTML"], + "", + $container["innerHTML"] + ) + ); + + if( + preg_match( + '/^did you mean/i', + $container["innerHTML"] + ) + ){ + + $out["spelling"] = [ + "type" => "not_many", + "using" => $search, + "correction" => + $this->fuckhtml + ->getTextContent( + $a[0] + ) + ]; + } + + elseif( + preg_match( + '/^showing results for/i', + $container["innerHTML"] + ) + ){ + + $out["spelling"] = [ + "type" => "including", + "using" => + $this->fuckhtml + ->getTextContent( + $a[0] + ), + "correction" => $search + ]; + } + } + + continue; + } + $title = $this->fuckhtml ->getElementsByClassName( @@ -891,14 +1256,7 @@ class google{ ) ), "description" => null, - "url" => - $this->decodeurl( - $this->fuckhtml - ->getElementsByTagName("a") - [0] - ["attributes"] - ["href"] - ), + "url" => null, "date" => null, "type" => "web", "thumb" => [ @@ -909,33 +1267,235 @@ class google{ "table" => [] ]; + // get link + $web["url"] = + $this->unshiturl( + $this->fuckhtml + ->getElementsByTagName("a") + [0] + ["attributes"] + ["href"] + ); + + // + // check if link contains a carousel + // + $carousels = $this->parsecarousels(); + if(count($carousels) !== 0){ + + $first = true; + foreach($carousels as $carousel_cat){ + + foreach($carousel_cat as $carousel){ + + if($first === true){ + + $first = false; + }elseif($carousel["image"] !== null){ + + $out["image"][] = [ + "title" => $carousel["title"], + "source" => [ + [ + "url" => $carousel["image"], + "width" => null, + "height" => null + ] + ], + "url" => $carousel["url"] + ]; + } + + $web["sublink"][] = [ + "title" => $carousel["title"], + "date" => $carousel["date"], + "description" => $carousel["description"], + "url" => $carousel["url"] + ]; + } + } + + if($carousels[0][0]["image"] !== null){ + $web["thumb"] = [ + "url" => $carousels[0][0]["image"], + "ratio" => "16:9" + ]; + } + + $out["web"][] = $web; + continue; + } + + // + // no carousel entries, parse as normal link + // + $this->fuckhtml->load($container); + + // parse URL + $web["url"] = + $this->unshiturl( + $this->fuckhtml + ->getElementsByTagName("a") + [0] + ["attributes"] + ["href"] + ); + $container = $container["innerHTML"]; - $description_container = + $line_detect = $this->fuckhtml ->getElementsByClassName( $this->findstyles( [ - "padding" => "12px 16px 12px" + "height" => "1px", + "background-color" => "#dadce0", + "margin" => "0 16px" ], self::is_class ), "div" - )[1]; + ); + + if(count($line_detect) !== 0){ + + // we found a line, this means we're dealing with a + // "featured snippet" + $featured = true; + + $description_container = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "white-space" => "pre-line", + "word-wrap" => "break-word" + ], + self::is_class + ), + "div" + )[1]; + + // get date node for it + $date = + $this->fuckhtml + ->getElementsByTagName("sub"); + + if(count($date) !== 0){ + $web["date"] = + strtotime( + $this->fuckhtml + ->getTextContent( + $date[0] + ) + ); + } + }else{ + + // we're dealing with a normal link + $featured = false; + + $description_container = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "padding" => "12px 16px 12px" + ], + self::is_class + ), + "div" + )[1]; + } + + // + // Get author if we're parsing news + // + if($pagetype == "news"){ + + $author = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "position" => "absolute", + "width" => "100%", + "top" => "0", + "left" => "0", + "padding-top" => "1px", + "margin-bottom" => "-1px" + ], + self::is_class + ), + "div" + ); + + if(count($author) !== 0){ + + $web["author"] = + $this->fuckhtml + ->getTextContent( + $author[0] + ); + }else{ + + $web["author"] = null; + } + } $description = $description_container["innerHTML"]; - // get sublinks $this->fuckhtml->load($description); + // + // get thumbnail before we call loadhtml again + // + $img = + $this->fuckhtml + ->getElementsByTagName("img"); + + if(count($img) !== 0){ + + $skip = true; + + if( + isset($img[0]["attributes"]["alt"]) && + stripos($img[0]["attributes"]["alt"], "Video for") !== false + ){ + + // is a video thumbnail + $web["thumb"]["ratio"] = "16:9"; + }else{ + + // is a google thumbnail + $web["thumb"]["ratio"] = "1:1"; + } + + $web["thumb"]["url"] = + $this->getimage( + $img[0]["attributes"]["id"] + ); + }else{ + + $skip = false; + } + + // + // get sublinks + // $links = $this->fuckhtml ->getElementsByTagName("a"); - $skip = true; foreach($links as $link){ + if($skip === true){ + + $skip = false; + continue; + } + $description = str_replace( $link["outerHTML"], @@ -943,12 +1503,6 @@ class google{ $description ); - if($skip){ - - $skip = false; - continue; - } - $sublink = [ "title" => null, "description" => null, @@ -957,214 +1511,799 @@ class google{ ]; $sublink["title"] = - $this->fuckhtml - ->getTextContent( - $link + $this->titledots( + $this->fuckhtml + ->getTextContent( + $link + ) ); $sublink["url"] = - $this->decodeurl( + $this->unshiturl( $link ["attributes"] ["href"] ); - $web["sublink"][] = $sublink; + if(parse_url($sublink["url"], PHP_URL_HOST) !== null){ + + $web["sublink"][] = $sublink; + } } - // get thumbnail before we call loadhtml again - $img = + // + // Parse spans in description + // + $this->fuckhtml->load($description); + + if($featured === false){ + + $levels = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "padding-bottom" => "8px" + ], + self::is_class + ), + "div" + ); + + // oh my god yes, fucking great, sometimes there are NO levels + // hahahahahhahahahahahahahahahhahaa + if(count($levels) === 0){ + + $levels = [$description]; + } + + foreach($levels as $level){ + + $this->fuckhtml->load($level); + + $spans = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + $is_rating = -1; + + foreach($spans as $span){ + + $innertext = + trim( + $this->fuckhtml + ->getTextContent( + $span + ), + " ·." + ); + + if($innertext == ""){ continue; } + + if( + strtolower($innertext) + == "rating" + ){ + + $is_rating = 0; + + // clean up before we go + $description = + str_replace( + $span["outerHTML"], + "", + $description + ); + continue; + } + + // + // Parse rating object + // + if($is_rating >= 0){ + + // clean up description + $description = + str_replace( + $span["outerHTML"], + "", + $description + ); + + if($span["level"] !== 1){ continue; } + $is_rating++; + + // 10/10 (123) + if($is_rating === 1){ + + $innertext = explode(" ", $innertext, 2); + + $web["table"]["Rating"] = $innertext[0]; + + if(count($innertext) === 2){ + $web["table"]["Hits"] = + trim( + str_replace( + [ + "(", + ")" + ], + "", + $innertext[1] + ) + ); + + if($web["table"]["Hits"] == ""){ + + unset($web["table"]["Hits"]); + } + } + continue; + } + + // US$4.99 + // MYR 50.00 + // $38.34 + // JP¥6,480 + // Reviewed by your mom + if($is_rating === 2){ + + if( + preg_match( + '/^Review by (.+)/', + $innertext, + $match + ) + ){ + + $web["table"]["Author"] = $match[1]; + continue; + } + + $web["table"]["Price"] = $innertext; + continue; + } + + // Android / In stock + if($is_rating === 3){ + + $web["table"]["Support"] = $innertext; + continue; + } + + // ignore the rest + continue; + } + + // + // Parse standalone text + // + + // If we reach this point: + // 1. Ratings have been parsed + // 2. We're parsing a WEB link, not some shitty piece of shit + + // check for date + // if span has no text before it, assume it's a date + $desc_split = + explode( + $span["outerHTML"], + $description, + 2 + ); + + if( + $this->fuckhtml + ->getTextContent( + $desc_split[0] + ) == "" + ){ + + // has no text before + $date = strtotime($innertext); + if($date){ + + $web["date"] = $date; + } + + // cleanup + $description = + str_replace( + $span["outerHTML"], + "", + $description + ); + + continue; + } + + // Ready to parse table + if(count($desc_split) === 2){ + $this->fuckhtml->load($desc_split[1]); + + $web["table"][ + $this->fuckhtml + ->getTextContent( + trim($desc_split[0], ": ") + ) + ] = $innertext; + + // cleanup + $description = + str_replace( + $desc_split[0] . $span["outerHTML"], + "", + $description + ); + } + } + } + } + + $web["description"] = + trim( + $this->fuckhtml + ->getTextContent( + $description + ), + " ·." + ); + + if($web["description"] == ""){ + + $web["description"] = null; + } + + $out["web"][] = $web; + + continue; + } + + // + // Detect wikipedia shit + // + $wiki_title = + $this->fuckhtml + ->getElementsByTagName("h3"); + + if(count($wiki_title) !== 0){ + + $description_after = []; + $description = []; + $table = []; + $sublink = []; + + $as = $this->fuckhtml - ->getElementsByTagName("img"); + ->getElementsByTagName("a"); - if(count($img) !== 0){ + foreach($as as $a){ if( - isset($img[0]["attributes"]["alt"]) && - stripos($img[0]["attributes"]["alt"], "Video for") !== false + isset($a["attributes"]["href"]) && + parse_url($a["attributes"]["href"], PHP_URL_HOST) == "maps.google.com" ){ - // is a video thumbnail - $web["thumb"]["ratio"] = "16:9"; - }else{ - - // is a google thumbnail - $web["thumb"]["ratio"] = "1:1"; + // detected maps embed, ignore + continue 2; } + } + + // get carousels and remove them from container for image grepper + $carousels = $this->parsecarousels($container["innerHTML"]); + $this->fuckhtml->load($container); + + // add images to image tab, if applicable + for($i=0; $i<count($carousels); $i++){ - $web["thumb"]["url"] = - $this->getimage( - $img[0]["attributes"]["id"] - ); + foreach($carousels[$i] as $item){ + + if( + $item["url"] !== null && + $item["ref"] !== null && + $item["image"] !== null && + $item["title"] !== null + ){ + + $out["image"][] = [ + "title" => $item["title"], + "source" => [ + [ + "url" => $item["url"], + "width" => $item["image_width"], + "height" => $item["image_height"] + ], + [ + "url" => $item["image"], + "width" => $item["thumb_width"], + "height" => $item["thumb_height"] + ] + ], + "url" => $item["ref"] + ]; + + unset($carousels[$i]); + } + } } - // get table elements - $this->fuckhtml->load($description); + $carousels = array_values($carousels); - $levels = + // interpret remaining carousels as title + carousel + $titles = $this->fuckhtml ->getElementsByClassName( $this->findstyles( [ - "padding-bottom" => "8px" + "font-weight" => "700", + "letter-spacing" => "0.75px", + "text-transform" => "uppercase" ], self::is_class - ), - "div" + ) ); - $additional_info = []; - foreach($levels as $level){ + for($i=0; $i<count($titles); $i++){ - $this->fuckhtml->load($level); + if(!isset($carousels[$i])){ + + break; + } - $spans = - $this->fuckhtml - ->getElementsByTagName( - "span" - ); + $description_after[] = [ + "type" => "title", + "value" => + $this->fuckhtml + ->getTextContent( + $titles[$i] + ) + ]; + + foreach($carousels[$i] as $carousel){ + + $description_after[] = [ + "type" => "link", + "url" => "web?s=" . urlencode($carousel["description"]) . "&scraper=google", + "value" => $carousel["description"] + ]; + + if($carousel["subtext"] !== null){ + + $description_after[] = [ + "type" => "quote", + "value" => $carousel["subtext"] + ]; + } + + $description_after[] = [ + "type" => "image", + "url" => $carousel["image"] + ]; + } + } + + $categories = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "padding" => "12px 16px 12px" + ], + self::is_class + ) + ); + + $image = + $this->fuckhtml + ->getElementsByTagName("img"); + + if(count($image) !== 0){ - $is_rating = -2; + $image = $this->getimage($image[0]["attributes"]["id"]); + }else{ - foreach($spans as $span){ + $image = null; + } + + $url = null; + + for($i=0; $i<count($categories); $i++){ + + $this->fuckhtml->load($categories[$i]); + + if($i === 0){ + // first node. this should be the header with the small + // information snippet + + $url = + $this->fuckhtml + ->getElementsByTagName("a"); - // clean up description - $description = + if(count($url) !== 0){ + + $url = + $this->unshiturl( + $url[0]["attributes"]["href"] + ); + + if(parse_url($url, PHP_URL_HOST) == "encrypted-tbn0.gstatic.com"){ + + $image = $url; + $url = null; + } + }else{ + + $url = null; + } + + $categories[$i]["innerHTML"] = str_replace( - $span["outerHTML"], + $wiki_title[0]["outerHTML"], "", - $description + $categories[$i]["innerHTML"] ); - $innertext = + $subtext = $this->fuckhtml ->getTextContent( - $span + $categories[$i]["innerHTML"] ); - if($innertext == ""){ continue; } + if(strlen($subtext) !== 0){ + + $description[] = [ + "type" => "quote", + "value" => + $this->fuckhtml + ->getTextContent( + $categories[$i]["innerHTML"] + ) + ]; + } + + // detect audio file + $audio = + $this->fuckhtml + ->getElementsByTagName("audio"); - if( - strtolower($innertext) - == "rating" - ){ + if(count($audio) !== 0){ - $is_rating = -1; - continue; + $description[] = [ + "type" => "audio", + "url" => + $this->fuckhtml + ->getTextContent( + $audio[0]["attributes"]["src"] + ) + ]; } + }else{ - // - // Parse rating object - // + // check for separator elements IN THERE + $separators = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "white-space" => "pre-line", + "word-wrap" => "break-word" + ], + self::is_class + ), + "div" + ); - if($is_rating >= -1){ + // detect container type + foreach($separators as $separator){ - if($span["level"] !== 1){ continue; } + $this->fuckhtml->load($separator); - $is_rating++; + // ignore wrong levels + if($separator["level"] !== 2){ + + continue; + } + + // + // Detect word definition + // + $wordwraps = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "padding-bottom" => "12px" + ], + self::is_class + ), + "div" + ); - // 10/10 (123) - if($is_rating === 0){ + if(count($wordwraps) !== 0){ + + foreach($wordwraps as $word){ + + $this->fuckhtml->load($word); + + // detect title + $span = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + if( + count($span) === 1 && + $this->fuckhtml + ->getTextContent( + str_replace( + $span[0]["outerHTML"], + "", + $word["innerHTML"] + ) + ) == "" + ){ + + $description[] = [ + "type" => "title", + "value" => + $this->fuckhtml + ->getTextContent( + $span[0] + ) + ]; + continue; + } + + // detect list element + $lists = + $this->fuckhtml + ->getElementsByTagName("ol"); + + if(count($lists) !== 0){ + foreach($lists as $list){ + + $this->fuckhtml->load($list); + + $items = + $this->fuckhtml + ->getElementsByTagName("li"); + + $w = 0; + foreach($items as $item){ + + $w++; + $this->fuckhtml->load($item); + + // get subnodes + $subnodes = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "white-space" => "pre-line", + "word-wrap" => "break-word" + ], + self::is_class + ), + "div" + ); + + foreach($subnodes as $subnode){ + + $this->fuckhtml->load($subnode); + + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($spans) !== 0){ + + // append quote + $description[] = [ + "type" => "quote", + "value" => + $this->fuckhtml + ->getTextContent( + $subnode + ) + ]; + }else{ + + // append text + $description[] = [ + "type" => "text", + "value" => + $w . ". " . + $this->fuckhtml + ->getTextContent( + $subnode + ) + ]; + } + } + } + } + }else{ + + // parse without list + // get subnodes + $subnodes = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "white-space" => "pre-line", + "word-wrap" => "break-word" + ], + self::is_class + ), + "div" + ); + + foreach($subnodes as $subnode){ + + $this->fuckhtml->load($subnode); + + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($spans) !== 0){ + + // append quote + $description[] = [ + "type" => "quote", + "value" => + $this->fuckhtml + ->getTextContent( + $subnode + ) + ]; + }else{ + + // append text + $description[] = [ + "type" => "text", + "value" => + $this->fuckhtml + ->getTextContent( + $subnode + ) + ]; + } + } + } + } + }else{ + + // + // Parse table + // + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + foreach($spans as $span){ + + if(!isset($span["attributes"]["class"])){ + + // found table + $row = + explode( + ":", + $this->fuckhtml + ->getTextContent( + $separator + ), + 2 + ); + + if(count($row) === 2){ + + $table[rtrim($row[0])] = + ltrim($row[1]); + + } + continue 2; + } + } - $innertext = explode(" ", $innertext, 2); + // + // Parse normal description + // + $links_rem = + $this->fuckhtml + ->getElementsByTagName("a"); - $web["table"]["Rating"] = $innertext[0]; - $web["table"]["Hits"] = - trim( + foreach($links_rem as $rem){ + + $separator["innerHTML"] = str_replace( - [ - "(", - ")" - ], + $rem["outerHTML"], "", - $innertext[1] + $separator["innerHTML"] + ); + } + + $description[] = [ + "type" => "text", + "value" => + rtrim( + $this->fuckhtml + ->getTextContent( + $separator + ), + " .," ) - ); - continue; + ]; } + } + } - // US$4.99 - // MYR 50.00 - // $38.34 - // JP¥6,480 - if($is_rating === 2){ + // detect huge buttons + $buttons = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "display" => "table-cell", + "vertical-align" => "middle", + "height" => "52px", + "text-align" => "center" + ], + self::is_class + ), + "a" + ); + + if(count($buttons) !== 0){ - $web["table"]["Price"] = $innertext; - continue; - } + foreach($buttons as $button){ - // Android / In stock - if($is_rating === 4){ + if(isset($button["attributes"]["href"])){ - $web["table"]["Support"] = $innertext; - continue; + $sublink[ + $this->fuckhtml + ->getTextContent( + $button + ) + ] = + $this->unshiturl( + $button["attributes"]["href"] + ); } - - // ignore the rest - continue; } - - // - // Parse standalone text - // - $additional_info[] = $innertext; } } - for($i=0; $i<count($additional_info); $i++){ - - // @TODO - // generate better node names - $web["table"]["Info node #$i"] = $additional_info[$i]; - } - - $this->fuckhtml->load($description); - - // get date node - $span = - $this->fuckhtml - ->getElementsByTagName( - "span" - ); - - if(count($span) !== 0){ - - $description = - str_replace( - $span[0]["outerHTML"], - "", - $description - ); - - $span = - strtotime( - $this->fuckhtml - ->getTextContent( - $span[0] - ) - ); - - if($span){ - - $web["date"] = $span; - } - } + // append description_after (contains carousel info) + $description = array_merge( + $description, + $description_after + ); - $web["description"] = - trim( + $out["answer"][] = [ + "title" => $this->fuckhtml ->getTextContent( - $description + $wiki_title[0] ), - " ·." - ); - - $out["web"][] = $web; + "description" => $description, + "url" => $url, + "thumb" => $image, + "table" => $table, + "sublink" => $sublink + ]; continue; } - // check for container title header + // + // Detect related searches containers + // $container_title = $this->fuckhtml ->getElementsByClassName( @@ -1183,6 +2322,21 @@ class google{ if(count($container_title) !== 0){ + // get carousel entries + $carousels = $this->parsecarousels($container["innerHTML"]); + $this->fuckhtml->load($container); + + foreach($carousels as $carousel){ + + foreach($carousel as $item){ + + if($item["url"] !== null){ + + $out["related"][] = $item["url"]; + } + } + } + $container_title = strtolower( $this->fuckhtml @@ -1191,158 +2345,300 @@ class google{ ) ); - if( - $container_title == "related searches" || - $container_title == "people also search for" - ){ + switch($container_title){ - // - // Parse related searches - // - $as = - $this->fuckhtml - ->getElementsByTagName("a"); - - foreach($as as $a){ + case "related searches": + case "people also search for": + // + // Parse related searches + // + $as = + $this->fuckhtml + ->getElementsByTagName("a"); - $out["related"][] = + foreach($as as $a){ + + $out["related"][] = + $this->fuckhtml + ->getTextContent($a); + } + break; + + case "people also ask": + // get related queries + $divs = $this->fuckhtml - ->getTextContent($a); - } + ->getElementsByTagName("div"); + + foreach($divs as $div){ + + // add accdef's here + if($has_appended_accdef === false){ + + $out["web"] = array_merge($out["web"], $accdefs); + $has_appended_accdef = true; + } + + // add accdef's questions + if(isset($div["attributes"]["role"])){ + + $out["related"][] = + $this->fuckhtml + ->getTextContent($div); + + continue; + } + } + break; } continue; } // - // Parse image carousel + // Parse news // - $title_container = + $title = $this->fuckhtml ->getElementsByClassName( $this->findstyles( [ - "padding" => "12px 16px 12px" + "font-size" => "16px", + "line-height" => "20px", + "font-weight" => "400" ], self::is_class ), "div" ); - if(count($title_container) !== 0){ + if(count($title) !== 0){ - $title_container = + $carousels = $this->parsecarousels(); + $this->fuckhtml->load($container); + + if(count($carousels) === 0){ + + // no carousels found + continue; + } + + $title = strtolower( $this->fuckhtml ->getTextContent( - $title_container[0] + $title[0] ) ); - if($title_container == "imagesview all"){ - - // - // Image carousel - // - $pcitem = - $this->fuckhtml - ->getElementsByClassName( - "pcitem", - "div" - ); + if( + preg_match( + '/^latest from|^top stories/', + $title + ) + ){ - foreach($pcitem as $item){ - - $this->fuckhtml->load($item); - - $link = - $this->fuckhtml - ->getElementsByTagName( - "a" - )[0]; - - parse_str( - parse_url( - $this->fuckhtml - ->getTextContent( - $link - ["attributes"] - ["href"] - ), - PHP_URL_QUERY - ), - $link - ); + // Found news article + foreach($carousels[0] as $carousel){ - if(isset($link["tbm"])){ + if($carousel["image"] !== null){ - continue; + $thumb = [ + "url" => $carousel["image"], + "ratio" => "16:9" + ]; + }else{ + + $thumb = [ + "url" => null, + "ratio" => null + ]; } - $image = - $this->fuckhtml - ->getElementsByTagName("img")[0]; - - $title = - $this->fuckhtml - ->getTextContent( - $image - ["attributes"] - ["alt"] - ); - - $image = - $this->getimage( - $image - ["attributes"] - ["id"] - ); + $out["news"][] = [ + "title" => $carousel["title"], + "description" => $carousel["description"], + "date" => $carousel["date"], + "thumb" => $thumb, + "url" => $carousel["url"] + ]; + } + } + + elseif( + $title == "images" + ){ + + foreach($carousels as $carousel){ - $out["image"][] = [ - "title" => $title, - "source" => [ - [ - "url" => $link["imgurl"], - "width" => (int)$link["w"], - "height" => (int)$link["h"] + foreach($carousel as $item){ + + $out["image"][] = [ + "title" => $item["title"], + "source" => [ + [ + "url" => $item["url"], + "width" => $item["image_width"], + "height" => $item["image_height"] + ], + [ + "url" => $item["image"], + "width" => $item["thumb_width"], + "height" => $item["thumb_height"] + ] ], - [ - "url" => $image, - "width" => (int)$link["tbnw"], - "height" => (int)$link["tbnh"] - ] - ], - "url" => $link["imgrefurl"] - ]; + "url" => $item["ref"] + ]; + } } } + + continue; } // - // Get next page + // Detect nodes with only text + links // + + // ignore elements with <style> tags + $style = + $this->fuckhtml + ->getElementsByTagName("style"); + + if(count($style) !== 0){ + + continue; + } + $as = $this->fuckhtml ->getElementsByTagName("a"); + $description = []; + foreach($as as $a){ + // + // Detect next page + // if( isset($a["attributes"]["aria-label"]) && strtolower($a["attributes"]["aria-label"]) == "next page" ){ $out["npt"] = + $this->backend->store( + $this->fuckhtml + ->getTextContent( + $a["attributes"]["href"] + ), + $pagetype, + $ip + ); + continue 2; + } + + // + // Parse as text node + // + $container["innerHTML"] = + explode( + $a["outerHTML"], + $container["innerHTML"], + 2 + ); + + $before = + $this->fuckhtml + ->getTextContent( + $container["innerHTML"][0], + false, + false + ); + + // set after + if(count($container["innerHTML"]) === 2){ + + $container["innerHTML"] = + $container["innerHTML"][1]; + }else{ + + $container["innerHTML"] = ""; + } + + if($before != ""){ + + $description[] = [ + "type" => "text", + "value" => $before + ]; + } + + // add link + $description[] = [ + "type" => "link", + "url" => + $this->unshiturl( + $a["attributes"] + ["href"] + ), + "value" => $this->fuckhtml ->getTextContent( - $a["attributes"]["href"] - ); + $a + ) + ]; + } + + if($container["innerHTML"] != ""){ + + $description[] = [ + "type" => "text", + "value" => + $this->fuckhtml + ->getTextContent( + $container["innerHTML"] + ) + ]; + } + + $out["answer"][] = [ + "title" => "Notice", + "description" => $description, + "url" => null, + "thumb" => null, + "table" => [], + "sublink" => [] + ]; + } + + // + // remove duplicate web links cause instant answers + // sometimes contains duplicates + // + $c = count($out["web"]); + $links = []; + + for($i=0; $i<$c; $i++){ + + foreach($links as $link){ + + if($out["web"][$i]["url"] == $link){ + + unset($out["web"][$i]); + continue 2; } } + + $links[] = $out["web"][$i]["url"]; } + $out["web"] = array_values($out["web"]); + return $out; - }*/ + } + + public function image($get){ @@ -1691,7 +2987,7 @@ class google{ array_merge( $this->computedstyle, $this->parsestyles($style["innerHTML"]) - ); + ); } // get images in javascript var @@ -1723,18 +3019,24 @@ class google{ } preg_match_all( - '/var s=\'(data:image[^\']+)\';var i=\[\'([^\']+)\'];/', + '/var s=\'(data:image[^\']+)\';var i=\[(\'[^\;]*\')];/', $script["innerHTML"], $image_grep ); if(count($image_grep[0]) !== 0){ - $this->js_image[trim($image_grep[2][0])] = + $items = explode(",", $image_grep[2][0]); + $value = $this->fuckhtml ->getTextContent( $image_grep[1][0] ); + + foreach($items as $item){ + + $this->js_image[trim($item, "' ")] = $value; + } } // even more javascript crap @@ -1748,7 +3050,7 @@ class google{ for($i=0; $i<count($ask_grep[0]); $i++){ $this->ask[trim($ask_grep[1][$i])] = - stripcslashes( + $this->fuckhtml->parseJsString( $ask_grep[2][$i] ); } @@ -1831,43 +3133,321 @@ class google{ private function getimage($id){ - if( - isset($this->js_image[$id]) && - $this->js_image[$id] != "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAABAUlEQVR4AWMYesChoYElLjkzPj4lY3d8csZjIL4MxPNjUzPcSTYsISFLAqj5NBD/h+LPQPwbiT87NCuLh2gDgRr2QzXuT0jNMoBYksARn5zuHJ+UcR0kB6RXE2VYXHJGOlTDZmzyIJcB5e+D1CSkZDgQNBAaZv+jU1JkcKpJygiGeZ0I76a/Byq8jU9NZFqaCNTA48SE33/iDcw8TIyBt0GKQTFN0Msp6f2EIyUpo57YSIlLSrMhIg0WCIBcCfXSdlzJBsheTHQ6jEnOUgEFOLaEDbMIlhZBOYrorAdJk+nroVnvPsSgdGdoOF7HZyhZ2XPoGQoqjbCpIbt0AiejIQMArVLI7k/DXFkAAAAASUVORK5CYII=" - ){ + if(isset($this->js_image[$id])){ + + $return = $this->fuckhtml->parseJsString($this->js_image[$id]); - if(stripos($this->js_image[$id], "data:image") !== false){ + if( + $return != "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAABAUlEQVR4AWMYesChoYElLjkzPj4lY3d8csZjIL4MxPNjUzPcSTYsISFLAqj5NBD/h+LPQPwbiT87NCuLh2gDgRr2QzXuT0jNMoBYksARn5zuHJ+UcR0kB6RXE2VYXHJGOlTDZmzyIJcB5e+D1CSkZDgQNBAaZv+jU1JkcKpJygiGeZ0I76a/Byq8jU9NZFqaCNTA48SE33/iDcw8TIyBt0GKQTFN0Msp6f2EIyUpo57YSIlLSrMhIg0WCIBcCfXSdlzJBsheTHQ6jEnOUgEFOLaEDbMIlhZBOYrorAdJk+nroVnvPsSgdGdoOF7HZyhZ2XPoGQoqjbCpIbt0AiejIQMArVLI7k/DXFkAAAAASUVORK5CYII=" && + $return != "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAA6ElEQVR4Ae2UvQ7BYBSGW4uFxlVICLsYDA0D14NEunRxHSJ+BkYG9ibiHtgkuvpZWOod3uFESqpOF/ElT/q958h5OtQx/iexY/evY9ACJjBewUtkXHEPyBYUNQQuCETuggNrF2DHF3A4kfUMmLB+BoUYAg4nIX0TTNnbfCjg8HDBGuR4z4Ij+813giAC8rcrkXusjdQEpMpcYt5rCzrMaea7tqD9JLhpCyrMZeadpmApssPaUOszXQALGHz67De0/2gpMGPP014VFpizfgJ5zWXnAF8MryW1rj3x5l8LJANQF1lZQH5f8AAWpNcUs6HAEAAAAABJRU5ErkJggg==" + ){ - return - explode( - "\\x3d", - $this->js_image[$id], - 2 - )[0]; + if( + preg_match( + '/^\/\//', + $return + ) + ){ + + return 'https:' . $return; + } + + return $return; } - return $this->js_image[$id]; + return null; } + } + + private function parsecarousels(&$item_to_remove = false){ - return null; + $carousels = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "padding" => "16px", + "position" => "relative" + ], + self::is_class + ) + ); + + $return = []; + + for($i=0; $i<count($carousels); $i++){ + + if(!isset($carousels[$i]["outerHTML"])){ + + continue; + } + + $this->fuckhtml->load($carousels[$i]); + + if($item_to_remove !== false){ + + $item_to_remove = + str_replace( + $carousels[$i]["outerHTML"], + "", + $item_to_remove + ); + } + + $pcitems = + $this->fuckhtml + ->getElementsByClassName( + "pcitem", + "div" + ); + + foreach($pcitems as $pcitem){ + + $this->fuckhtml->load($pcitem); + + $out = [ + "url" => null, + "ref" => null, + "image" => null, + "thumb_width" => null, + "thumb_height" => null, + "image_width" => null, + "image_height" => null, + "title" => null, + "description" => null, + "subtext" => null, + "date" => null + ]; + + $url = + $this->unshiturl( + $this->fuckhtml + ->getElementsByTagName("a") + [0] + ["attributes"] + ["href"], + true + ); + + // set ref + $out["ref"] = $url["ref"]; + + // set url + $out["url"] = $url["url"]; + + // set sizes + $out["thumb_width"] = $url["thumb_width"]; + $out["thumb_height"] = $url["thumb_height"]; + $out["image_width"] = $url["image_width"]; + $out["image_height"] = $url["image_height"]; + + // get image + $out["image"] = + $this->fuckhtml + ->getElementsByTagName( + "img" + ); + + if(count($out["image"]) !== 0){ + + // get title from image + if(isset($out["image"][0]["attributes"]["alt"])){ + + $out["title"] = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $out["image"][0]["attributes"]["alt"] + ) + ); + } + + // get image url + if(isset($out["image"][0]["attributes"]["id"])){ + + $out["image"] = $this->getimage($out["image"][0]["attributes"]["id"]); + } + + elseif(isset($out["image"][0]["attributes"]["data-ll"])){ + + $out["image"] = + $this->fuckhtml + ->getTextContent( + $out["image"][0]["attributes"]["data-ll"] + ); + }else{ + + // failed to get image information + $out["image"] = null; + } + + if($out["image"] == 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABgAAAAYAgMAAACdGdVrAAAADFBMVEVMaXFChfRChfRChfT0tCPZAAAAA3RSTlMAgFJEkGxNAAAAL0lEQVR4AWPADxgdwBT3BTDF9AUiuhdC6WNK/v///y+UggrClSA07EWVglmEFwAA5eYSExeCwigAAAAASUVORK5CYII='){ + + // found arrow image base64, skip entry + continue; + } + }else{ + + // Could not find any image in node + $out["image"] = null; + } + + // get title from spans + $title = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "color" => "#1967d2" + ], + self::is_class + ), + "span" + ); + + if(count($title) !== 0){ + + $out["title"] = + $this->fuckhtml + ->getTextContent( + $title[0] + ); + } + + // get textnodes + $textnodes = + $this->fuckhtml + ->getElementsByClassName( + $this->findstyles( + [ + "white-space" => "pre-line", + "word-wrap" => "break-word" + ], + self::is_class + ) + ); + + $subtext = null; + + if(count($textnodes) !== 0){ + + // get date + $date = + $this->fuckhtml + ->getTextContent( + $textnodes[count($textnodes) - 1], + true + ); + + if(str_replace("\n", " ", $date) == $title){ + + $date = null; + }else{ + + if(strpos($date, "\n") !== false){ + + $date = explode("\n", $date); + $date = $date[count($date) - 1]; + } + elseif(strpos($date, "•") !== false){ + + $date = explode("•", $date); + $date = ltrim($date[count($date) - 1]); + }else{ + + $date = null; + } + } + + if($date !== null){ + + $date = strtotime($date); + } + + // get description + $description = + $this->fuckhtml + ->getTextContent( + $textnodes[0] + ); + + if($out["title"] === null){ + + if($date === null){ + + $out["title"] = $description; + $description = null; + }else{ + + $out["title"] = parse_url($out["url"], PHP_URL_HOST); + } + } + + if(isset($textnodes[1])){ + + $out["subtext"] = + $this->fuckhtml + ->getTextContent( + $textnodes[1] + ); + } + + }else{ + + $date = null; + $description = null; + } + + $out["date"] = $date; + $out["description"] = $this->titledots($description); + + if($out["url"] === null){ + + $out["url"] = $out["title"]; + } + + if($out["title"] == $out["description"]){ + + $out["description"] = null; + } + + $return[$i][] = $out; + } + } + + return $return; } - private function decodeurl($url){ + private function unshiturl($url, $return_size = false){ - preg_match( - '/^\/url\?q=([^&]+)|^\/interstitial\?url=([^&]+)/', + // get parameters from URL + $url = $this->fuckhtml - ->getTextContent($url), - $match - ); + ->getTextContent($url); - if(count($match) === 0){ + $newurl = parse_url($url, PHP_URL_QUERY); + + if($newurl == ""){ - return null; + // probably telephone number + return $url; } - $url = empty($match[1]) ? urldecode($match[2]) : urldecode($match[1]); + $url = $newurl; + unset($newurl); + parse_str($url, $query); + + if(isset($query["imgurl"])){ + + $url = $query["imgurl"]; + } + elseif(isset($query["q"])){ + + $url = $query["q"]; + } + + // rewrite URLs to remove extra tracking parameters $domain = parse_url($url, PHP_URL_HOST); if( @@ -1889,7 +3469,7 @@ class google{ ); } - if( + elseif( preg_match( '/imdb\.com$|youtube\.[^.]+$/', $domain @@ -1909,6 +3489,101 @@ class google{ } + elseif( + preg_match( + '/play\.google\.[^.]+$/', + $domain + ) + ){ + + // remove referrers from play.google.com + $oldquery = parse_url($url, PHP_URL_QUERY); + if($oldquery !== null){ + + parse_str($oldquery, $query); + if(isset($query["referrer"])){ unset($query["referrer"]); } + if(isset($query["hl"])){ unset($query["hl"]); } + if(isset($query["gl"])){ unset($query["gl"]); } + + $query = http_build_query($query); + + $url = + str_replace( + $oldquery, + $query, + $url + ); + } + } + + elseif( + preg_match( + '/twitter\.com$/', + $domain + ) + ){ + + // remove more referrers from twitter.com + $oldquery = parse_url($url, PHP_URL_QUERY); + if($oldquery !== null){ + + parse_str($oldquery, $query); + if(isset($query["ref_src"])){ unset($query["ref_src"]); } + + $query = http_build_query($query); + + if($query != ""){ + + $query .= "?" . $query; + } + + $url = + str_replace( + '?' . $oldquery, + $query, + $url + ); + } + } + + elseif( + preg_match( + '/maps\.google\.[^.]+/', + $domain + ) + ){ + + if(stripos($url, "maps?") !== false){ + + //https://maps.google.com/maps?daddr=Johnny,+603+Rue+St+Georges,+Saint-J%C3%A9r%C3%B4me,+Quebec+J7Z+5B7 + $query = parse_url($url, PHP_URL_QUERY); + + if($query !== null){ + + parse_str($query, $query); + + if(isset($query["daddr"])){ + + $url = + "https://maps.google.com/maps?daddr=" . + urlencode($query["daddr"]); + } + } + } + } + + if($return_size){ + + return [ + "url" => $url, + "ref" => isset($query["imgrefurl"]) ? $query["imgrefurl"] : null, + "thumb_width" => isset($query["tbnw"]) ? (int)$query["tbnw"] : null, + "thumb_height" => isset($query["tbnh"]) ? (int)$query["tbnh"] : null, + "image_width" => isset($query["w"]) ? (int)$query["w"] : null, + "image_height" => isset($query["h"]) ? (int)$query["h"] : null + ]; + } + return $url; } @@ -1924,6 +3599,6 @@ class google{ private function titledots($title){ - return rtrim($title, ".… \t\n\r\0\x0B"); + return rtrim($title, ". \t\n\r\0\x0B"); } } diff --git a/scraper/mojeek.php b/scraper/mojeek.php index c492a20..d17158b 100644 --- a/scraper/mojeek.php +++ b/scraper/mojeek.php @@ -608,7 +608,7 @@ class mojeek{ $this->fuckhtml ->getTextContent( $this->fuckhtml - ->getElementsByClassName("i", "p")[1] + ->getElementsByClassName("i", "p")[0] ) ); diff --git a/scraper/sc.php b/scraper/sc.php index 16d3931..02cf087 100644 --- a/scraper/sc.php +++ b/scraper/sc.php @@ -229,7 +229,7 @@ class sc{ if($json === null){ - throw new Exception("Failed to decode JSON"); + throw new Exception("Failed to decode JSON. Did the keys set in data/config.php expire?"); } $out = [ diff --git a/settings.php b/settings.php index bee31ea..20f5031 100644 --- a/settings.php +++ b/settings.php @@ -117,10 +117,10 @@ $settings = [ "value" => "yandex", "text" => "Yandex" ], - /*[ + [ "value" => "google", "text" => "Google" - ],*/ + ], [ "value" => "mojeek", "text" => "Mojeek" @@ -192,11 +192,11 @@ $settings = [ [ "value" => "yandex", "text" => "Yandex" - ]/*, + ], [ "value" => "google", "text" => "Google" - ]*/ + ] ] ], [ @@ -211,10 +211,10 @@ $settings = [ "value" => "brave", "text" => "Brave" ], - /*[ + [ "value" => "google", "text" => "Google" - ],*/ + ], [ "value" => "mojeek", "text" => "Mojeek" @@ -434,20 +434,33 @@ $left .= '</div>' . '<div class="settings-submit">' . '<input type="submit" value="Update settings!">' . - '<a href="../">< Return to front page</a>' . + '<a href="../">< Go back</a>' . '</div>' . '</form>'; if(count($_GET) === 0){ - + + $code = []; + foreach($_COOKIE as $key => $value){ + + $code[] = rawurlencode($key) . "=" . rawurlencode($value); + } + + $code = implode("&", $code); + + if($code != ""){ + + $code = "?" . $code; + } + echo $frontend->load( "search.html", [ "class" => "", "right-left" => - '<div class="infobox"><h2>Preference link</h2>Follow this link to auto-apply all cookies. Useful if your browser clears out cookies after a browsing session. Following this link will redirect you to the front page, unless no settings are set.<br><br>' . - '<a href="settings' . rtrim("?" . str_replace("; ", "&", $code), "?") . '">Bookmark me!</a>' . + '<div class="infobox"><h2>Preference link</h2>Following this link will re-apply all cookies configured here and will redirect you to the front page. Useful if your browser clears out cookies after a browsing session.<br><br>' . + '<a href="settings' . $code . '">Bookmark me!</a>' . '</div>', "right-right" => "", "left" => $left diff --git a/static/icon/call.png b/static/icon/call.png Binary files differnew file mode 100644 index 0000000..c91bece --- /dev/null +++ b/static/icon/call.png diff --git a/static/icon/directions.png b/static/icon/directions.png Binary files differnew file mode 100644 index 0000000..699da40 --- /dev/null +++ b/static/icon/directions.png diff --git a/static/serverping.js b/static/serverping.js index 5fe285d..a94fe50 100644 --- a/static/serverping.js +++ b/static/serverping.js @@ -160,6 +160,16 @@ function number_format(int){ return new Intl.NumberFormat().format(int); } +window.fetch = (function(fetch) { + return function(fn, t){ + const begin = Date.now(); + return fetch.apply(this, arguments).then(function(response) { + response.ping = Date.now() - begin; + return response; + }); + }; +})(window.fetch); + // parse initial server list fetch_server(window.location.origin); @@ -187,25 +197,24 @@ async function fetch_server(server){ list.push(server); var data = null; - var ping = new Date().getTime(); try{ - data = await fetch( + var payload = await fetch( server + "/ami4get" ); - if(data.status !== 200){ + if(payload.status !== 200){ // endpoint is not available errors++; div_failedreqs.textContent = number_format(errors); - console.warn(server + ": Invalid HTTP code " + data.status); + console.warn(server + ": Invalid HTTP code " + payload.status); return; } - data = await data.json(); - data.server.ping = new Date().getTime() - ping; + data = await payload.json(); + data.server.ping = payload.ping; }catch(error){ diff --git a/static/style.css b/static/style.css index bb76c2e..2ea2d73 100644 --- a/static/style.css +++ b/static/style.css @@ -499,6 +499,7 @@ h3,h4,h5,h6{ text-align:center; display:block; text-align:left; + white-space:nowrap; } .favicon-dropdown img{ @@ -1247,6 +1248,11 @@ table tr a:last-child{ padding-left:20px; } +.instances .go-back{ + margin-top:17px; + display:inline-block; +} + /* Responsive image diff --git a/static/themes/Catpuccin Latte.css b/static/themes/Catppuccin Latte.css index 7c70549..7c70549 100644 --- a/static/themes/Catpuccin Latte.css +++ b/static/themes/Catppuccin Latte.css diff --git a/template/instances.html b/template/instances.html index 829e638..06752c0 100644 --- a/template/instances.html +++ b/template/instances.html @@ -27,6 +27,7 @@ </tbody> </table> </noscript> + <a href="../" class="go-back">< Go back</a> <div id="popup-bg"></div> <div class="popup-wrapper"> <div class="popup"></div> @@ -146,9 +146,17 @@ if(count($results["image"]) !== 0){ $right["image"] .= '<a class="image" href="' . htmlspecialchars($image["url"]) . '" rel="noreferrer nofollow" title="' . htmlspecialchars($image["title"]) . '" data-json="' . htmlspecialchars(json_encode($image["source"])) . '" tabindex="-1">' . - '<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "square") . '" alt="thumb">' . - '<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>' . - '</a>'; + '<img src="' . $frontend->htmlimage($image["source"][count($image["source"]) - 1]["url"], "square") . '" alt="thumb">'; + + if( + $image["source"][0]["width"] !== null && + $image["source"][0]["height"] !== null + ){ + + $right["image"] .= '<div class="duration">' . $image["source"][0]["width"] . 'x' . $image["source"][0]["height"] . '</div>'; + } + + $right["image"] .= '</a>'; } $right["image"] .= |