diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | api.txt | 2 | ||||
-rw-r--r-- | banner/4get-default.png | bin | 21969 -> 0 bytes | |||
-rw-r--r-- | data/config.php | 4 | ||||
-rw-r--r-- | docker-compose.yaml | 5 | ||||
-rw-r--r-- | docs/apache2.md | 16 | ||||
-rw-r--r-- | docs/configure.md | 4 | ||||
-rw-r--r-- | docs/docker.md | 14 | ||||
-rwxr-xr-x[-rw-r--r--] | icons/lolcat.ca.png | bin | 753 -> 753 bytes | |||
-rw-r--r-- | lib/backend.php | 1 | ||||
-rw-r--r-- | lib/frontend.php | 16 | ||||
-rw-r--r-- | robots.txt | 4 | ||||
-rw-r--r-- | scraper/googlealt.php | 5009 | ||||
-rw-r--r-- | settings.php | 44 | ||||
-rw-r--r-- | static/style.css | 139 | ||||
-rw-r--r-- | template/about.html | 4 | ||||
-rw-r--r-- | template/home.html | 2 |
18 files changed, 5162 insertions, 109 deletions
@@ -24,6 +24,7 @@ banner/* data/captcha/birds/ data/captcha/fumo_plushies/ data/captcha/minecraft/ +icons/* !banner/*default* ======= banner/* @@ -4,14 +4,14 @@ **4get** is a proxy search engine that doesn't suck. ## About 4get -https://4get.ca/about +https://4get.itinerariummentis.org/about ## Try it out -https://4get.ca +https://4get.itinerariummentis.org ## Totally unbiased comparison between alternatives -| | 4get | searx(ng) | librex | araa | +| | 4get | 4get(ng) | librex | araa | |----------------------------|-------------------------|-----------|-------------|-----------| | RAM usage | 200-400mb~ | 2GB~ | 200-400mb~ | 2GB~ | | Does it suck | no (debunked by snopes) | yes | yes | a little | @@ -277,7 +277,7 @@ URL should return a valid HTTP audio stream. To access the endpoint, you must add the following prefix in your request, like so: - https://4get.ca/audio/<endpoint>?s=<url> + https://4get.itinerariummentis.org/audio/<endpoint>?s=<url> + /favicon diff --git a/banner/4get-default.png b/banner/4get-default.png Binary files differdeleted file mode 100644 index c075175..0000000 --- a/banner/4get-default.png +++ /dev/null diff --git a/data/config.php b/data/config.php index f8808a0..ae642bb 100644 --- a/data/config.php +++ b/data/config.php @@ -8,7 +8,7 @@ class config{ const VERSION = 8; // Will be shown pretty much everywhere. - const SERVER_NAME = "4get"; + const SERVER_NAME = "4get.itmens"; // Will be shown in <meta> tag on home page const SERVER_SHORT_DESCRIPTION = "They live in our walls!"; @@ -81,6 +81,7 @@ class config{ // To appear in the list of an instance, contact the host and if everyone added // eachother your serber should appear everywhere. const INSTANCES = [ + "https://4get.itinerariummentis.org", "https://4get.ca", "https://4get.zzls.xyz", "https://4getus.zzls.xyz", @@ -121,6 +122,7 @@ class config{ const PROXY_BRAVE = false; const PROXY_FB = false; // facebook const PROXY_GOOGLE = false; + const PROXY_GOOGLEALT = false;//ipv6 version of google const PROXY_QWANT = false; const PROXY_MARGINALIA = false; const PROXY_MOJEEK = false; diff --git a/docker-compose.yaml b/docker-compose.yaml index 2bba4ca..845c69b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -6,7 +6,12 @@ services: image: luuul/4get:latest restart: unless-stopped environment: +<<<<<<< HEAD + - FOURGET_VERSION=6 + - FOURGET_SERVER_NAME=4get.itinerariummentis.org +======= - FOURGET_SERVER_NAME=4get.ca +>>>>>>> 0b68d6b2a9928d1b9dae389fbf5b95056e678be0 ports: - "80:80" diff --git a/docs/apache2.md b/docs/apache2.md index e746a7e..d83eab3 100644 --- a/docs/apache2.md +++ b/docs/apache2.md @@ -35,8 +35,8 @@ Now, edit the following file: `/etc/apache2/sites-available/000-default.conf`, r 1. The `VirtualHost` here instructs apache2 to redirect all **HTTP** traffic that specify an unknown `Host` header be redirected to a specific domain of your choice. Configuring this is not required but highly recommended. ```xml <VirtualHost *:80> - # no domain = go to 4get.ca - RedirectMatch 301 ^(.*)$ https://4get.ca$1 + # no domain = go to 4get.itinerariummentis.org + RedirectMatch 301 ^(.*)$ https://4get.itinerariummentis.org$1 </VirtualHost> ``` @@ -95,7 +95,7 @@ Now, edit the file `/etc/apache2/sites-available/default-ssl.conf`, remove every This ruleset will redirect all clients that specify an unknown `Host` to the domain of our choice. I recommend you uncomment the `ErrorLog` directive while setting things up in case a problem occurs with PHP. Don't worry about the invalid SSL paths, we will generate our certificates later; Just make sure you specify the right domains in there: ```xml <VirtualHost *:443> - RedirectMatch 301 ^(.*)$ https://4get.ca$1 + RedirectMatch 301 ^(.*)$ https://4get.itinerariummentis.org$1 ServerAdmin will@lolcat.ca #ErrorLog ${APACHE_LOG_DIR}/error.log @@ -116,15 +116,15 @@ This ruleset will redirect all clients that specify an unknown `Host` to the dom AddOutputFilterByType DEFLATE text/plain AddOutputFilterByType DEFLATE text/css - SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem - SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem + SSLCertificateFile /etc/letsencrypt/live/4get.itinerariummentis.org/fullchain.pem + SSLCertificateKeyFile /etc/letsencrypt/live/4get.itinerariummentis.org/privkey.pem </VirtualHost> ``` -This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private. +This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.itinerariummentis.org/settings` resolves to `4get.itinerariummentis.org/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private. ```xml <VirtualHost *:443> - ServerName 4get.ca + ServerName 4get.itinerariummentis.org DocumentRoot /var/www/4get @@ -201,7 +201,7 @@ service apache2 stop Now, run `certbot`, and specify all of your domains by prepending `-d` every time. Make sure the first domain you specify is your main domain, and the same domain you specified in the configuration above! We use ECDSA encryption here as it's better than RSA. ```sh -certbot certonly --standalone --key-type ecdsa -d 4get.ca -d www.4get.ca -d lolcat.ca -d www.lolcat.ca +certbot certonly --standalone --key-type ecdsa -d 4get.itinerariummentis.org -d www.4get.itinerariummentis.org -d lolcat.ca -d www.lolcat.ca ``` Certbot should ask you a few questions, just play along. At the end of the setup, certbot should tell you about the location of the certificates. Double check to make sure they correspond to the paths we specified in `default-ssl.conf`. Your certificates should now update every 2-3 months automatically. diff --git a/docs/configure.md b/docs/configure.md index 7cc4175..d1e3bfb 100644 --- a/docs/configure.md +++ b/docs/configure.md @@ -39,10 +39,10 @@ sudo cp /usr/local/lib/libcurl-impersonate-ff.so /usr/lib/x86_64-linux-gnu/libcu Make sure to restart your webserver and/or PHP daemon, otherwise it will keep using the old library. You should now be able to bypass Cloudflare's shitty checks!! # Robots.txt -Make sure you configure this right to optimize your search engine presence! Head over to `/robots.txt` and change the 4get.ca domain to your own domain. +Make sure you configure this right to optimize your search engine presence! Head over to `/robots.txt` and change the 4get.itinerariummentis.org domain to your own domain. # Server listing -To be listed on https://4get.ca/instances , you must contact *any* of the people in the server list and ask them to add you to their list of instances in their configuration. The instance list is distributed, and I don't have control over it. +To be listed on https://4get.itinerariummentis.org/instances , you must contact *any* of the people in the server list and ask them to add you to their list of instances in their configuration. The instance list is distributed, and I don't have control over it. If you see spammy entries in your instances list, simply remove the instance from your list that pushes the offending entries. diff --git a/docs/docker.md b/docs/docker.md index 6a94197..4e2c34a 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -5,7 +5,7 @@ When using docker container any environment variables prefixed with `FOURGET_` w When lists of data is expected in [data/config.php](../data/config.php), such as `INSTANCES`, you can pass in a comma separated string via environment variable. Example: -`FOURGET_INSTANCES="https://4get.ca,https://domain.tld"` +`FOURGET_INSTANCES="https://4get.itinerariummentis.org,https://domain.tld"` #### Special environment variables @@ -47,13 +47,13 @@ For more information on configuration view [data/config.php](../data/config.php) You can start 4get with ``` -docker run -d -p 80:80 -e FOURGET_SERVER_NAME="4get.ca" -e FOURGET_PROTO="http" luuul/4get:latest +docker run -d -p 80:80 -e FOURGET_SERVER_NAME="4get.itinerariummentis.org" -e FOURGET_PROTO="http" luuul/4get:latest ``` ...Or with SSL: ``` -docker run -d -p 443:443 -e FOURGET_SERVER_NAME="4get.ca" -e FOURGET_PROTO="https" -v /etc/letsencrypt/live/domain.tld:/etc/4get/certs luuul/4get:latest +docker run -d -p 443:443 -e FOURGET_SERVER_NAME="4get.itinerariummentis.org" -e FOURGET_PROTO="https" -v /etc/letsencrypt/live/domain.tld:/etc/4get/certs luuul/4get:latest ``` @@ -73,7 +73,7 @@ services: restart: unless-stopped environment: - FOURGET_PROTO=http - - FOURGET_SERVER_NAME=4get.ca + - FOURGET_SERVER_NAME=4get.itinerariummentis.org ports: - "80:80" @@ -91,7 +91,7 @@ services: restart: unless-stopped environment: - FOURGET_PROTO=https - - FOURGET_SERVER_NAME=4get.ca + - FOURGET_SERVER_NAME=4get.itinerariummentis.org ports: - "80:80" @@ -116,7 +116,7 @@ services: restart: unless-stopped environment: - FOURGET_PROTO=http - - FOURGET_SERVER_NAME=4get.ca + - FOURGET_SERVER_NAME=4get.itinerariummentis.org - FOURGET_BOT_PROTECTION=1 ports: @@ -138,7 +138,7 @@ services: restart: unless-stopped environment: - FOURGET_PROTO=http - - FOURGET_SERVER_NAME=4get.ca + - FOURGET_SERVER_NAME=4get.itinerariummentis.org ports: - "80:80" diff --git a/icons/lolcat.ca.png b/icons/lolcat.ca.png Binary files differindex bf47a44..bf47a44 100644..100755 --- a/icons/lolcat.ca.png +++ b/icons/lolcat.ca.png diff --git a/lib/backend.php b/lib/backend.php index 7c450da..00161a6 100644 --- a/lib/backend.php +++ b/lib/backend.php @@ -50,7 +50,6 @@ class backend{ switch($type){ case "raw_ip": - return; break; case "http": diff --git a/lib/frontend.php b/lib/frontend.php index 1c3eb09..de8f446 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -242,6 +242,7 @@ class frontend{ $payload .= $this->highlighttext($keywords, $site["title"]) . '</div>'; + $payload .= '</a>'; if($greentext !== null){ @@ -261,7 +262,6 @@ class frontend{ $payload .= $customhtml; - $payload .= '</a>'; /* Sublinks @@ -919,10 +919,11 @@ class frontend{ $filters["scraper"] = [ "display" => "Scraper", "option" => [ + "googlealt" => "GoogleV6", + "google" => "Google", "ddg" => "DuckDuckGo", "brave" => "Brave", "yandex" => "Yandex", - "google" => "Google", "qwant" => "Qwant", "yep" => "Yep", "greppr" => "Greppr", @@ -940,10 +941,11 @@ class frontend{ $filters["scraper"] = [ "display" => "Scraper", "option" => [ + "googlealt" => "GoogleV6", + "google" => "Google", + "brave" => "Brave", "ddg" => "DuckDuckGo", "yandex" => "Yandex", - "brave" => "Brave", - "google" => "Google", "qwant" => "Qwant", "yep" => "Yep", //"pinterest" => "Pinterest", @@ -959,6 +961,7 @@ class frontend{ "option" => [ "yt" => "YouTube", //"fb" => "Facebook videos", + "googlealt" => "GoogleV6", "ddg" => "DuckDuckGo", "brave" => "Brave", "yandex" => "Yandex", @@ -972,10 +975,11 @@ class frontend{ $filters["scraper"] = [ "display" => "Scraper", "option" => [ - "ddg" => "DuckDuckGo", - "brave" => "Brave", + "googlealt" => "GoogleV6", "google" => "Google", + "brave" => "Brave", "qwant" => "Qwant", + "ddg" => "DuckDuckGo", "yep" => "Yep", "mojeek" => "Mojeek" ] @@ -23,6 +23,4 @@ # ⢀⠢⡑⡀⢂⢊⠠⠁⡂⡐⠀⠅⡈⠪⠪⠪⠣⠫⠑⡁⢔⠕⣜⣜⢦⡰⡎⡯⡾⡽ User-agent: * -Disallow: -Host: 4get.ca -Sitemap: https://4get.ca/sitemap +Disallow: / diff --git a/scraper/googlealt.php b/scraper/googlealt.php new file mode 100644 index 0000000..aa523db --- /dev/null +++ b/scraper/googlealt.php @@ -0,0 +1,5009 @@ +<?php + +// @TODO check for consent.google.com page, if need be + +class googlealt{ + + public function __construct(){ + + include "lib/fuckhtml.php"; + $this->fuckhtml = new fuckhtml(); + + include "lib/backend.php"; + $this->backend = new backend("googlealt"); + } + + public function getfilters($page){ + + $base = [ + "country" => [ // gl=<country> (image: cr=countryAF) + "display" => "Country", + "option" => [ + "any" => "Instance's country", + "af" => "Afghanistan", + "al" => "Albania", + "dz" => "Algeria", + "as" => "American Samoa", + "ad" => "Andorra", + "ao" => "Angola", + "ai" => "Anguilla", + "aq" => "Antarctica", + "ag" => "Antigua and Barbuda", + "ar" => "Argentina", + "am" => "Armenia", + "aw" => "Aruba", + "au" => "Australia", + "at" => "Austria", + "az" => "Azerbaijan", + "bs" => "Bahamas", + "bh" => "Bahrain", + "bd" => "Bangladesh", + "bb" => "Barbados", + "by" => "Belarus", + "be" => "Belgium", + "bz" => "Belize", + "bj" => "Benin", + "bm" => "Bermuda", + "bt" => "Bhutan", + "bo" => "Bolivia", + "ba" => "Bosnia and Herzegovina", + "bw" => "Botswana", + "bv" => "Bouvet Island", + "br" => "Brazil", + "io" => "British Indian Ocean Territory", + "bn" => "Brunei Darussalam", + "bg" => "Bulgaria", + "bf" => "Burkina Faso", + "bi" => "Burundi", + "kh" => "Cambodia", + "cm" => "Cameroon", + "ca" => "Canada", + "cv" => "Cape Verde", + "ky" => "Cayman Islands", + "cf" => "Central African Republic", + "td" => "Chad", + "cl" => "Chile", + "cn" => "China", + "cx" => "Christmas Island", + "cc" => "Cocos (Keeling) Islands", + "co" => "Colombia", + "km" => "Comoros", + "cg" => "Congo", + "cd" => "Congo, the Democratic Republic", + "ck" => "Cook Islands", + "cr" => "Costa Rica", + "ci" => "Cote D'ivoire", + "hr" => "Croatia", + "cu" => "Cuba", + "cy" => "Cyprus", + "cz" => "Czech Republic", + "dk" => "Denmark", + "dj" => "Djibouti", + "dm" => "Dominica", + "do" => "Dominican Republic", + "ec" => "Ecuador", + "eg" => "Egypt", + "sv" => "El Salvador", + "gq" => "Equatorial Guinea", + "er" => "Eritrea", + "ee" => "Estonia", + "et" => "Ethiopia", + "fk" => "Falkland Islands (Malvinas)", + "fo" => "Faroe Islands", + "fj" => "Fiji", + "fi" => "Finland", + "fr" => "France", + "gf" => "French Guiana", + "pf" => "French Polynesia", + "tf" => "French Southern Territories", + "ga" => "Gabon", + "gm" => "Gambia", + "ge" => "Georgia", + "de" => "Germany", + "gh" => "Ghana", + "gi" => "Gibraltar", + "gr" => "Greece", + "gl" => "Greenland", + "gd" => "Grenada", + "gp" => "Guadeloupe", + "gu" => "Guam", + "gt" => "Guatemala", + "gn" => "Guinea", + "gw" => "Guinea-Bissau", + "gy" => "Guyana", + "ht" => "Haiti", + "hm" => "Heard Island and Mcdonald Islands", + "va" => "Holy See (Vatican City State)", + "hn" => "Honduras", + "hk" => "Hong Kong", + "hu" => "Hungary", + "is" => "Iceland", + "in" => "India", + "id" => "Indonesia", + "ir" => "Iran, Islamic Republic", + "iq" => "Iraq", + "ie" => "Ireland", + "il" => "Israel", + "it" => "Italy", + "jm" => "Jamaica", + "jp" => "Japan", + "jo" => "Jordan", + "kz" => "Kazakhstan", + "ke" => "Kenya", + "ki" => "Kiribati", + "kp" => "Korea, Democratic People's Republic", + "kr" => "Korea, Republic", + "kw" => "Kuwait", + "kg" => "Kyrgyzstan", + "la" => "Lao People's Democratic Republic", + "lv" => "Latvia", + "lb" => "Lebanon", + "ls" => "Lesotho", + "lr" => "Liberia", + "ly" => "Libyan Arab Jamahiriya", + "li" => "Liechtenstein", + "lt" => "Lithuania", + "lu" => "Luxembourg", + "mo" => "Macao", + "mk" => "Macedonia, the Former Yugosalv Republic", + "mg" => "Madagascar", + "mw" => "Malawi", + "my" => "Malaysia", + "mv" => "Maldives", + "ml" => "Mali", + "mt" => "Malta", + "mh" => "Marshall Islands", + "mq" => "Martinique", + "mr" => "Mauritania", + "mu" => "Mauritius", + "yt" => "Mayotte", + "mx" => "Mexico", + "fm" => "Micronesia, Federated States", + "md" => "Moldova, Republic", + "mc" => "Monaco", + "mn" => "Mongolia", + "ms" => "Montserrat", + "ma" => "Morocco", + "mz" => "Mozambique", + "mm" => "Myanmar", + "na" => "Namibia", + "nr" => "Nauru", + "np" => "Nepal", + "nl" => "Netherlands", + "an" => "Netherlands Antilles", + "nc" => "New Caledonia", + "nz" => "New Zealand", + "ni" => "Nicaragua", + "ne" => "Niger", + "ng" => "Nigeria", + "nu" => "Niue", + "nf" => "Norfolk Island", + "mp" => "Northern Mariana Islands", + "no" => "Norway", + "om" => "Oman", + "pk" => "Pakistan", + "pw" => "Palau", + "ps" => "Palestinian Territory, Occupied", + "pa" => "Panama", + "pg" => "Papua New Guinea", + "py" => "Paraguay", + "pe" => "Peru", + "ph" => "Philippines", + "pn" => "Pitcairn", + "pl" => "Poland", + "pt" => "Portugal", + "pr" => "Puerto Rico", + "qa" => "Qatar", + "re" => "Reunion", + "ro" => "Romania", + "ru" => "Russian Federation", + "rw" => "Rwanda", + "sh" => "Saint Helena", + "kn" => "Saint Kitts and Nevis", + "lc" => "Saint Lucia", + "pm" => "Saint Pierre and Miquelon", + "vc" => "Saint Vincent and the Grenadines", + "ws" => "Samoa", + "sm" => "San Marino", + "st" => "Sao Tome and Principe", + "sa" => "Saudi Arabia", + "sn" => "Senegal", + "cs" => "Serbia and Montenegro", + "sc" => "Seychelles", + "sl" => "Sierra Leone", + "sg" => "Singapore", + "sk" => "Slovakia", + "si" => "Slovenia", + "sb" => "Solomon Islands", + "so" => "Somalia", + "za" => "South Africa", + "gs" => "South Georgia and the South Sandwich Islands", + "es" => "Spain", + "lk" => "Sri Lanka", + "sd" => "Sudan", + "sr" => "Suriname", + "sj" => "Svalbard and Jan Mayen", + "sz" => "Swaziland", + "se" => "Sweden", + "ch" => "Switzerland", + "sy" => "Syrian Arab Republic", + "tw" => "Taiwan, Province of China", + "tj" => "Tajikistan", + "tz" => "Tanzania, United Republic", + "th" => "Thailand", + "tl" => "Timor-Leste", + "tg" => "Togo", + "tk" => "Tokelau", + "to" => "Tonga", + "tt" => "Trinidad and Tobago", + "tn" => "Tunisia", + "tr" => "Turkey", + "tm" => "Turkmenistan", + "tc" => "Turks and Caicos Islands", + "tv" => "Tuvalu", + "ug" => "Uganda", + "ua" => "Ukraine", + "ae" => "United Arab Emirates", + "uk" => "United Kingdom", + "us" => "United States", + "um" => "United States Minor Outlying Islands", + "uy" => "Uruguay", + "uz" => "Uzbekistan", + "vu" => "Vanuatu", + "ve" => "Venezuela", + "vn" => "Viet Nam", + "vg" => "Virgin Islands, British", + "vi" => "Virgin Islands, U.S.", + "wf" => "Wallis and Futuna", + "eh" => "Western Sahara", + "ye" => "Yemen", + "zm" => "Zambia", + "zw" => "Zimbabwe" + ] + ], + "nsfw" => [ + "display" => "NSFW", + "option" => [ + "yes" => "Yes", // safe=active + "no" => "No" // safe=off + ] + ] + ]; + + switch($page){ + + case "web": + return array_merge( + $base, + [ + "lang" => [ // lr=<lang> (prefix lang with "lang_") + "display" => "Language", + "option" => [ + "any" => "Any language", + "ar" => "Arabic", + "bg" => "Bulgarian", + "ca" => "Catalan", + "cs" => "Czech", + "da" => "Danish", + "de" => "German", + "el" => "Greek", + "en" => "English", + "es" => "Spanish", + "et" => "Estonian", + "fi" => "Finnish", + "fr" => "French", + "hr" => "Croatian", + "hu" => "Hungarian", + "id" => "Indonesian", + "is" => "Icelandic", + "it" => "Italian", + "iw" => "Hebrew", + "ja" => "Japanese", + "ko" => "Korean", + "lt" => "Lithuanian", + "lv" => "Latvian", + "nl" => "Dutch", + "no" => "Norwegian", + "pl" => "Polish", + "pt" => "Portuguese", + "ro" => "Romanian", + "ru" => "Russian", + "sk" => "Slovak", + "sl" => "Slovenian", + "sr" => "Serbian", + "sv" => "Swedish", + "tr" => "Turkish", + "zh-CN" => "Chinese (Simplified)", + "zh-TW" => "Chinese (Traditional)" + ] + ], + "newer" => [ // tbs + "display" => "Newer than", + "option" => "_DATE" + ], + "older" => [ + "display" => "Older than", + "option" => "_DATE" + ], + "spellcheck" => [ + "display" => "Spellcheck", + "option" => [ + "yes" => "Yes", + "no" => "No" + ] + ] + ] + ); + break; + + case "images": + return array_merge( + $base, + [ + "time" => [ // tbs=qdr:<time> + "display" => "Time posted", + "option" => [ + "any" => "Any time", + "d" => "Past 24 hours", + "w" => "Past week", + "m" => "Past month", + "y" => "Past year" + ] + ], + "size" => [ // imgsz + "display" => "Size", + "option" => [ + "any" => "Any size", + "l" => "Large", + "m" => "Medium", + "i" => "Icon", + "qsvga" => "Larger than 400x300", + "vga" => "Larger than 640x480", + "svga" => "Larger than 800x600", + "xga" => "Larger than 1024x768", + "2mp" => "Larger than 2MP", + "4mp" => "Larger than 4MP", + "6mp" => "Larger than 6MP", + "8mp" => "Larger than 8MP", + "10mp" => "Larger than 10MP", + "12mp" => "Larger than 12MP", + "15mp" => "Larger than 15MP", + "20mp" => "Larger than 20MP", + "40mp" => "Larger than 40MP", + "70mp" => "Larger than 70MP" + ] + ], + "ratio" => [ // imgar + "display" => "Aspect ratio", + "option" => [ + "any" => "Any ratio", + "t|xt" => "Tall", + "s" => "Square", + "w" => "Wide", + "xw" => "Panoramic" + ] + ], + "color" => [ // imgc + "display" => "Color", + "option" => [ + "any" => "Any color", + "color" => "Full color", + "bnw" => "Black & white", + "trans" => "Transparent", + // from here, imgcolor + "red" => "Red", + "orange" => "Orange", + "yellow" => "Yellow", + "green" => "Green", + "teal" => "Teal", + "blue" => "Blue", + "purple" => "Purple", + "pink" => "Pink", + "white" => "White", + "gray" => "Gray", + "black" => "Black", + "brown" => "Brown" + ] + ], + "type" => [ // tbs=itp:<type> + "display" => "Type", + "option" => [ + "any" => "Any type", + "clipart" => "Clip Art", + "lineart" => "Line Drawing", + "animated" => "Animated" + ] + ], + "format" => [ // as_filetype + "display" => "Format", + "option" => [ + "any" => "Any format", + "jpg" => "JPG", + "gif" => "GIF", + "png" => "PNG", + "bmp" => "BMP", + "svg" => "SVG", + "webp" => "WEBP", + "ico" => "ICO", + "craw" => "RAW" + ] + ], + "rights" => [ // tbs=sur:<rights> + "display" => "Usage rights", + "option" => [ + "any" => "Any license", + "cl" => "Creative Commons licenses", + "ol" => "Commercial & other licenses" + ] + ] + ] + ); + break; + + case "videos": + return array_merge( + $base, + [ + "newer" => [ // tbs + "display" => "Newer than", + "option" => "_DATE" + ], + "older" => [ + "display" => "Older than", + "option" => "_DATE" + ], + "duration" => [ + "display" => "Duration", + "option" => [ + "any" => "Any duration", + "s" => "Short (0-4min)", // tbs=dur:s + "m" => "Medium (4-20min)", // tbs=dur:m + "l" => "Long (20+ min)" // tbs=dur:l + ] + ], + "quality" => [ + "display" => "Quality", + "option" => [ + "any" => "Any quality", + "h" => "High quality" // tbs=hq:h + ] + ], + "captions" => [ + "display" => "Captions", + "option" => [ + "any" => "No preference", + "yes" => "Closed captioned" // tbs=cc:1 + ] + ] + ] + ); + break; + + case "news": + return array_merge( + $base, + [ + "newer" => [ // tbs + "display" => "Newer than", + "option" => "_DATE" + ], + "older" => [ + "display" => "Older than", + "option" => "_DATE" + ], + "sort" => [ + "display" => "Sort", + "option" => [ + "relevance" => "Relevance", + "date" => "Date" // sbd:1 + ] + ] + ] + ); + break; + } + } + + private function get($proxy, $url, $get = []){ + + $headers = [ + "User-Agent: " . config::USER_AGENT, + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + //"Cookie: SOCS=CAESNQgCEitib3FfaWRlbnRpdHlmcm9udGVuZHVpc2VydmVyXzIwMjQwMzE3LjA4X3AwGgJlbiAEGgYIgM7orwY", + "Connection: keep-alive", + "Upgrade-Insecure-Requests: 1", + "Sec-Fetch-Dest: document", + "Sec-Fetch-Mode: navigate", + "Sec-Fetch-Site: none", + "Sec-Fetch-User: ?1", + "Priority: u=1", + "TE: trailers" + ]; + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers); + curl_setopt($curlproc, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V6); + // use http2 + curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + // follow redirects + curl_setopt($curlproc, CURLOPT_FOLLOWLOCATION, true); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + + + + private function parsepage($html, $pagetype, $search, $proxy, $params){ + + $out = [ + "status" => "ok", + "spelling" => [ + "type" => "no_correction", + "using" => null, + "correction" => null + ], + "npt" => null, + "answer" => [], + "web" => [], + "image" => [], + "video" => [], + "news" => [], + "related" => [] + ]; + + $this->fuckhtml->load($html); + + $this->detect_sorry(); + + // parse all <style> tags + $this->parsestyles(); + + // get javascript images + $this->scrape_dimg($html); + + // get html blobs + preg_match_all( + '/function\(\){window\.jsl\.dh\(\'([^\']+?)\',\'(.+?[^\'])\'\);/', + $html, + $blobs + ); + + $this->blobs = []; + if(isset($blobs[1])){ + + for($i=0; $i<count($blobs[1]); $i++){ + + $this->blobs[$blobs[1][$i]] = + $this->fuckhtml + ->parseJsString( + $blobs[2][$i] + ); + } + } + + $this->scrape_imagearr($html); + + // + // load result column + // + $result_div = + $this->fuckhtml + ->getElementById( + "center_col", + "div" + ); + + if($result_div === false){ + + throw new Exception("Failed to grep result div"); + } + + $this->fuckhtml->load($result_div); + + // + // Get word corrections + // + $correction = + $this->fuckhtml + ->getElementById( + "fprs", + "p" + ); + + if($correction){ + + $this->fuckhtml->load($correction); + + $a = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + $using = + $this->fuckhtml + ->getElementById( + "fprsl", + $a + ); + + if($using){ + + $using = + $this->fuckhtml + ->getTextContent( + $using + ); + + $spans = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + $type_span = + $this->fuckhtml + ->getTextContent( + $spans[0] + ); + + $type = "not_many"; + + if( + stripos( + $type_span, + "Showing results for" + ) !== false + ){ + + $type = "including"; + } + + $correction = + $this->fuckhtml + ->getTextContent( + $a[count($a) - 1] + ); + + $out["spelling"] = [ + "type" => $type, + "using" => $using, + "correction" => $correction + ]; + } + + // reset + $this->fuckhtml->load($result_div); + } + + // + // get notices + // + $botstuff = + $this->fuckhtml + ->getElementById( + "botstuff" + ); + + // important for later + $last_page = false; + + if($botstuff){ + + $this->fuckhtml->load($botstuff); + + $cards = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "line-height" => "normal" + ] + ), + "div" + ); + + foreach($cards as $card){ + + $this->fuckhtml->load($card); + + $h2 = + $this->fuckhtml + ->getElementsByTagName( + "h2" + ); + + if(count($h2) !== 0){ + + $title = + $this->fuckhtml + ->getTextContent( + $h2[0] + ); + + $card["innerHTML"] = + str_replace( + $h2[0]["outerHTML"], + "", + $card["innerHTML"] + ); + }else{ + + $title = "Notice"; + } + + $description = []; + + $as = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($as) !== 0){ + + $first = true; + + foreach($as as $a){ + + $text_link = + $this->fuckhtml + ->getTextContent( + $a + ); + + if(stripos($text_link, "repeat the search") !== false){ + + $last_page = true; + break 2; + } + + $parts = + explode( + $a["outerHTML"], + $card["innerHTML"], + 2 + ); + + $card["innerHTML"] = $parts[1]; + + $value = + preg_replace( + '/ +/', + " ", + $this->fuckhtml + ->getTextContent( + $parts[0], + false, + false + ) + ); + + if(strlen(trim($value)) !== 0){ + + $description[] = [ + "type" => "text", + "value" => $value + ]; + + if($first){ + + $description[0]["value"] = + ltrim($description[0]["value"]); + } + } + + $first = false; + + $description[] = [ + "type" => "link", + "url" => + $this->fuckhtml + ->getTextContent( + $a["attributes"] + ["href"] + ), + "value" => $text_link + ]; + } + + $text = + $this->fuckhtml + ->getTextContent( + $card["innerHTML"], + false, + false + ); + + if(strlen(trim($text)) !== 0){ + + $description[] = [ + "type" => "text", + "value" => + rtrim( + $text + ) + ]; + } + + }else{ + + // @TODO: Check if this ever gets populated without giving me garbage + /* + $text = + $this->fuckhtml + ->getTextContent( + $card + ); + + if($text != ""){ + $description[] = [ + "type" => "text", + "value" => $text + ]; + }*/ + } + + if(count($description) !== 0){ + + $out["answer"][] = [ + "title" => $title, + "description" => $description, + "url" => null, + "thumb" => null, + "table" => [], + "sublink" => [] + ]; + } + } + + // reset + $this->fuckhtml->load($html); + } + + // + // get "Related Searches" and "People also search for" + // + $relateds = + array_merge( + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "align-items" => "center", + "background-color" => "#28292a", + "border-radius" => "100px", + "box-sizing" => "border-box", + "display" => "flex", + "max-height" => "none", + "min-height" => "48px", + "padding-left" => "17px", + "padding-right" => "17px", + "position" => "relative" + ] + ) . " " . + $this->getstyle( + [ + "margin-left" => "8px", + "margin-right" => "8px" + ] + ), + "a" + ), + $this->fuckhtml + ->getElementsByClassName( + "wyccme", + "div" + ) + ); + + foreach($relateds as $related){ + + $text = + $this->fuckhtml + ->getTextContent( + $related + ); + + if($text == "More results"){ continue; } + + $out["related"][] = $text; + } + + // + // Get text results + // + $results = + $this->fuckhtml + ->getElementsByClassName( + "g", + "div" + ); + + $this->skip_next = false; + + foreach($results as $result){ + + if($this->skip_next){ + + $this->skip_next = false; + continue; + } + + $this->fuckhtml->load($result); + + $web = [ + "title" => null, + "description" => null, + "url" => null, + "date" => null, + "type" => "web", + "thumb" => [ + "url" => null, + "ratio" => null + ], + "sublink" => [], + "table" => [] + ]; + + // Detect presence of sublinks + $g = + $this->fuckhtml + ->getElementsByClassName( + "g", + "div" + ); + + $sublinks = []; + if(count($g) > 0){ + + $table = + $this->fuckhtml + ->getElementsByTagName( + "table" + ); + + if(count($table) !== 0){ + + // found some sublinks! + + $this->fuckhtml->load($table[0]); + + $tds = + $this->fuckhtml + ->getElementsByTagName( + "td" + ); + + foreach($tds as $td){ + + $this->fuckhtml->load($td); + + $a = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if( + count($a) === 0 || + ( + isset($a[0]["attributes"]["class"]) && + $a[0]["attributes"]["class"] == "fl" + ) + ){ + + continue; + } + + $td["innerHTML"] = + str_replace( + $a[0]["outerHTML"], + "", + $td["innerHTML"] + ); + + $web["sublink"][] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $a[0] + ) + ), + "description" => + html_entity_decode( + $this->titledots( + $this->fuckhtml + ->getTextContent( + $td + ) + ) + ), + "url" => + $this->unshiturl( + $a[0] + ["attributes"] + ["href"] + ), + "date" => null + ]; + } + + // reset + $this->fuckhtml->load($result); + } + + // skip on next iteration + $this->skip_next = true; + } + + // get title + $h3 = + $this->fuckhtml + ->getElementsByTagName( + "h3" + ); + + if(count($h3) === 0){ + + continue; + } + + $web["title"] = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $h3[0] + ) + ); + + // get url + $as = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + $web["url"] = + $this->unshiturl( + $as[0] + ["attributes"] + ["href"] + ); + + if( + !preg_match( + '/^http/', + $web["url"] + ) + ){ + + // skip if invalid url is found + continue; + } + + // + // probe for twitter carousel + // + $carousel = + $this->fuckhtml + ->getElementsByTagName( + "g-scrolling-carousel" + ); + + if(count($carousel) !== 0){ + + $this->fuckhtml->load($carousel[0]); + + $items = + $this->fuckhtml + ->getElementsByTagName( + "g-inner-card" + ); + + $has_thumbnail = false; + + foreach($items as $item){ + + $this->fuckhtml->load($item); + + if($has_thumbnail === false){ + + // get thumbnail + $thumb = + $this->fuckhtml + ->getElementsByTagName( + "img" + ); + + if( + count($thumb) !== 0 && + isset($thumb[0]["attributes"]["id"]) + ){ + + $web["thumb"] = [ + "url" => + $this->getdimg( + $thumb[0]["attributes"]["id"] + ), + "ratio" => "16:9" + ]; + + $has_thumbnail = true; + } + + // or else, try getting a thumbnail from next container + } + + // cache div + $div = + $this->fuckhtml + ->getElementsByTagName( + "div" + ); + + // get link + $links = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + // get description of carousel sublink + $description = + $this->fuckhtml + ->getElementsByAttributeValue( + "role", + "heading", + $div + ); + + if(count($description) !== 0){ + + $description = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $description[0] + ) + ); + }else{ + + $description = null; + } + + $bottom = + $this->fuckhtml + ->getElementsByAttributeValue( + "style", + "z-index:2", + $div + ); + + $title = null; + $date = null; + if(count($bottom) !== 0){ + + $this->fuckhtml->load($bottom[0]); + + $spans = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + $title = + $this->fuckhtml + ->getTextContent( + $spans[0] + ); + + $date = + strtotime( + $this->fuckhtml + ->getTextContent( + $spans[count($spans) - 1] + ) + ); + } + + $web["sublink"][] = [ + "title" => $title, + "description" => $description, + "url" => + $this->unshiturl( + $links[0] + ["attributes"] + ["href"] + ), + "date" => $date + ]; + } + + $out["web"][] = $web; + continue; + } + + // + // get viewcount, time posted and follower count from <cite> tag + // + $cite = + $this->fuckhtml + ->getElementsByTagName( + "cite" + ); + + if(count($cite) !== 0){ + + $this->fuckhtml->load($cite[0]); + + $spans = + $this->fuckhtml + ->getElementsByTagName("span"); + + if(count($spans) === 0){ + + $cites = + explode( + "·", + $this->fuckhtml + ->getTextContent( + $cite[0] + ) + ); + + foreach($cites as $cite){ + + $cite = trim($cite); + + if( + preg_match( + '/(.+) (views|followers|likes)$/', + $cite, + $match + ) + ){ + + $web["table"][ucfirst($match[2])] = + $match[1]; + }elseif( + preg_match( + '/ago$/', + $cite + ) + ){ + + $web["date"] = + strtotime($cite); + } + } + } + + // reset + $this->fuckhtml->load($result); + } + + // + // attempt to fetch description cleanly + // + $description = + $this->fuckhtml + ->getElementsByAttributeValue( + "style", + "-webkit-line-clamp:2" + ); + + if(count($description) !== 0){ + + $web["description"] = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $description[0] + ) + ); + }else{ + + // use ANOTHER method where the description is a header of the result + $description = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "wa:/description" + ); + + if(count($description) !== 0){ + + // get date off that shit + $date = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "font-size" => "12px", + "line-height" => "1.34", + "display" => "inline-block", + "font-family" => "Google Sans,arial,sans-serif", + "padding-right" => "0", + "white-space" => "nowrap" + ] + ), + "span" + ); + + if(count($date) !== 0){ + + $description[0]["innerHTML"] = + str_replace( + $date[0]["outerHTML"], + "", + $description[0]["innerHTML"] + ); + + $web["date"] = + strtotime( + $this->fuckhtml + ->getTextContent( + $date[0] + ) + ); + } + + $web["description"] = + $this->fuckhtml + ->getTextContent( + $description[0] + ); + }else{ + + // Yes.. You guessed it, use ANOTHER method to get descriptions + // off youtube containers + $description = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "-webkit-box-orient" => "vertical", + "display" => "-webkit-box", + "font-size" => "14px", + "-webkit-line-clamp" => "2", + "line-height" => "22px", + "overflow" => "hidden", + "word-break" => "break-word", + "color" => "#bdc1c6" + ] + ), + "div" + ); + + if(count($description) !== 0){ + + // check for video duration + $duration = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "border-radius" => "10px", + "font-family" => "arial,sans-serif-medium,sans-serif", + "font-size" => "12px", + "line-height" => "16px", + "padding-block" => "2px", + "padding-inline" => "8px" + ] + ), + "div" + ); + + if(count($duration) !== 0){ + + $web["table"]["Duration"] = + $this->fuckhtml + ->getTextContent( + $duration[0] + ); + + // remove duration from description + $description[0]["innerHTML"] = + str_replace( + $duration[0]["outerHTML"], + "", + $description[0]["innerHTML"] + ); + } + + $web["description"] = + $this->titledots( + html_entity_decode( + $this->fuckhtml + ->getTextContent( + $description[0] + ) + ) + ); + + // get author + time posted + $info = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "color" => "var(" . $this->getcolorvar("#70757a") . ")", + "font-size" => "14px", + "line-height" => "20px", + "margin-top" => "12px" + ] + ), + "div" + ); + + if(count($info) !== 0){ + + $info = + explode( + "·", + $this->fuckhtml + ->getTextContent( + $info[0] + ) + ); + + switch(count($info)){ + + case 3: + $web["table"]["Author"] = trim($info[1]); + $web["date"] = strtotime(trim($info[2])); + break; + + case 2: + $web["date"] = strtotime(trim($info[1])); + break; + } + } + } + } + } + + // + // get categories of content within the search result + // + $cats = + $this->fuckhtml + ->getElementsByAttributeName( + "data-sncf", + "div" + ); + + foreach($cats as $cat){ + + $this->fuckhtml->load($cat); + + // detect image category + $images = + $this->fuckhtml + ->getElementsByTagName( + "img" + ); + + if(count($images) !== 0){ + + foreach($images as $image){ + + if(isset($image["attributes"]["id"])){ + // we found an image + + if(isset($image["attributes"]["width"])){ + + $width = (int)$image["attributes"]["width"]; + + if($width == 110){ + + $ratio = "1:1"; + }elseif($width > 110){ + + $ratio = "16:9"; + }else{ + + $ratio = "9:16"; + } + }else{ + + $ratio = "1:1"; + } + + $web["thumb"] = [ + "url" => $this->getdimg($image["attributes"]["id"]), + "ratio" => $ratio + ]; + + continue 2; + } + } + } + + // Detect rating + $spans_unfiltered = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + $spans = + $this->fuckhtml + ->getElementsByAttributeName( + "aria-label", + $spans_unfiltered + ); + + foreach($spans as $span){ + + if( + preg_match( + '/^Rated/', + $span["attributes"]["aria-label"] + ) + ){ + + // found rating + // scrape rating + preg_match( + '/([0-9.]+).*([0-9.]+)/', + $span["attributes"]["aria-label"], + $rating + ); + + if(isset($rating[1])){ + + $web["table"]["Rating"] = + $rating[1] . "/" . $rating[2]; + } + + $has_seen_reviews = 0; + foreach($spans_unfiltered as $span_unfiltered){ + + if( + preg_match( + '/([0-9,.]+) +([A-z]+)$/', + $this->fuckhtml + ->getTextContent( + $span_unfiltered + ), + $votes + ) + ){ + + $has_seen_reviews++; + $web["table"][ucfirst($votes[2])] = $votes[1]; + continue; + } + + $text = + $this->fuckhtml + ->getTextContent( + $span_unfiltered + ); + + if( + $text == " " || + $text == "" + ){ + + break; + } + + switch($has_seen_reviews){ + + case 1: + // scrape price + $web["table"]["Price"] = $text; + $has_seen_reviews++; + break; + + case 2: + // scrape platform + $web["table"]["Platform"] = $text; + $has_seen_reviews++; + break; + + case 3: + // Scrape type + $web["table"]["Medium"] = $text; + break; + } + } + + continue 2; + } + } + + // check if its a table of small sublinks + $table = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "display" => "table", + "white-space" => "nowrap", + "margin" => "5px 0", + "line-height" => "1.58", + "color" => "var(" . $this->getcolorvar("#70757a") . ")" + ] + ), + "div" + ); + + if(count($table) !== 0){ + + $this->fuckhtml->load($table[0]); + + $rows = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "display" => "flex", + "white-space" => "normal" + ] + ), + "div" + ); + + foreach($rows as $row){ + + $this->fuckhtml->load($row); + + $sublink = [ + "title" => null, + "description" => null, + "url" => null, + "date" => null + ]; + + $link = + $this->fuckhtml + ->getElementsByTagName( + "a" + )[0]; + + $sublink["title"] = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $link + ) + ); + + $sublink["url"] = + $this->unshiturl( + $link + ["attributes"] + ["href"] + ); + + $row["innerHTML"] = + str_replace( + $link["outerHTML"], + "", + $row["innerHTML"] + ); + + $this->fuckhtml->load($row); + + $spans = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + foreach($spans as $span){ + + $text = + $this->fuckhtml + ->getTextContent( + $span + ); + + if( + preg_match( + '/answers?$/', + $text + ) + ){ + + $sublink["description"] = + $text; + + continue; + } + + $time = strtotime($text); + + if($time !== false){ + + $sublink["date"] = $time; + } + } + + $web["sublink"][] = $sublink; + } + + // reset + $this->fuckhtml->load($cat); + continue; + } + + // check if its an answer header + $answer_header = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "overflow" => "hidden", + "text-overflow" => "ellipsis" + ] + ), + "span" + ); + + if(count($answer_header) !== 0){ + + $link = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + $cat["innerHTML"] = + str_replace( + $link[0]["outerHTML"], + "", + $cat["innerHTML"] + ); + + $web["sublink"][] = [ + "title" => + $this->fuckhtml + ->getTextContent( + $link[0] + ), + "description" => + $this->titledots( + trim( + str_replace( + "\xc2\xa0", + " ", + html_entity_decode( + $this->fuckhtml + ->getTextContent( + $cat + ) + ) + ), + " ·" + ) + ), + "url" => + $this->fuckhtml + ->getTextContent( + $link[0] + ["attributes"] + ["href"] + ), + "date" => null + ]; + + continue; + } + + // check if its list of small sublinks + $urls = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($urls) !== 0){ + + // found small links + foreach($urls as $url){ + + $target = + $this->fuckhtml + ->getTextContent( + $url + ["attributes"] + ["href"] + ); + + if( + !preg_match( + '/^http/', + $target + ) + ){ + + continue; + } + + $web["sublink"][] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $url + ) + ), + "description" => null, + "url" => $target, + "date" => null + ]; + } + + continue; + } + + // we probed everything, assume this is the description + // if we didn't find one cleanly previously + if($web["description"] === null){ + $web["description"] = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $cat + ) + ); + } + } + + // check if description contains date + $description = explode("—", $web["description"], 2); + + if( + count($description) === 2 && + strlen($description[0]) <= 20 + ){ + + $date = strtotime($description[0]); + + if($date !== false){ + + $web["date"] = $date; + $web["description"] = ltrim($description[1]); + } + } + + // fetch youtube thumbnail + $thumbnail = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "border-radius" => "8px", + "height" => "fit-content", + "justify-content" => "center", + "margin-right" => "20px", + "margin-top" => "4px", + "position" => "relative", + "width" => "fit-content" + ] + ), + "div" + ); + + if(count($thumbnail) !== 0){ + + // load thumbnail container + $this->fuckhtml->load($thumbnail[0]); + + $image = + $this->fuckhtml + ->getElementsByTagName( + "img" + ); + + if( + count($image) !== 0 && + isset($image[0]["attributes"]["id"]) + ){ + + $web["thumb"] = [ + "url" => + $this->unshit_thumb( + $this->getdimg( + $image[0]["attributes"]["id"] + ) + ), + "ratio" => "16:9" + ]; + } + + // reset + $this->fuckhtml->load($result); + } + + $out["web"][] = $web; + } + + // reset + $this->fuckhtml->load($result_div); + + // + // Get instant answers + // + $answer_containers = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "padding-left" => "0px", + "padding-right" => "0px" + ] + ), + "div" + ); + + $date_class = + $this->getstyle( + [ + "font-size" => "12px", + "line-height" => "1.34", + "display" => "inline-block", + "font-family" => "Google Sans,arial,sans-serif", + "padding-right" => "0", + "white-space" => "nowrap" + ] + ); + + foreach($answer_containers as $container){ + + $this->fuckhtml->load($container); + + $web = [ + "title" => null, + "description" => null, + "url" => null, + "date" => null, + "type" => "web", + "thumb" => [ + "url" => null, + "ratio" => null + ], + "sublink" => [], + "table" => [] + ]; + + $answers = + $this->fuckhtml + ->getElementsByAttributeName( + "aria-controls", + "div" + ); + + $item_insert_pos = 1; + foreach($answers as $answer){ + + $out["related"][] = + $this->fuckhtml + ->getTextContent( + $answer + ); + + if( + isset( + $this->blobs[ + $answer + ["attributes"] + ["aria-controls"] + ] + ) + ){ + + $this->fuckhtml->load( + $this->blobs[ + $answer + ["attributes"] + ["aria-controls"] + ] + ); + + $divs = + $this->fuckhtml + ->getElementsByAttributeName( + "id", + "div" + ); + + foreach($divs as $div){ + + if( + !isset( + $this->blobs[ + $div + ["attributes"] + ["id"] + ] + ) + ){ + + continue; + } + + $this->fuckhtml->load( + $this->blobs[ + $div + ["attributes"] + ["id"] + ] + ); + + // get url + $as = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($as) !== 0){ + + $web["url"] = + $this->unshiturl( + $as[0]["attributes"]["href"] + ); + + // skip entries that redirect to a search + if( + !preg_match( + '/^http/', + $web["url"] + ) + ){ + + continue 3; + } + } + + // get title + $h3 = + $this->fuckhtml + ->getElementsByTagName( + "h3" + ); + + if(count($h3) !== 0){ + + $web["title"] = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $h3[0] + ) + ); + } + + $description = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "wa:/description", + "div" + ); + + if(count($description) !== 0){ + + // check for date + $this->fuckhtml->load($description[0]); + + $date = + $this->fuckhtml + ->getElementsByClassName( + $date_class, + "span" + ); + + if(count($date) !== 0){ + + $description[0]["innerHTML"] = + str_replace( + $date[0]["outerHTML"], + "", + $description[0]["innerHTML"] + ); + + $web["date"] = + strtotime( + $this->fuckhtml + ->getTextContent( + $date[0] + ) + ); + } + + $web["description"] = + ltrim( + $this->fuckhtml + ->getTextContent( + $description[0] + ), + ": " + ); + } + } + + foreach($out["web"] as $item){ + + if($item["url"] == $web["url"]){ + + continue 2; + } + } + + array_splice($out["web"], $item_insert_pos, 0, [$web]); + $item_insert_pos++; + } + } + } + + // reset + $this->fuckhtml->load($result_div); + + // + // Scrape word definition + // + $definition_container = + $this->fuckhtml + ->getElementsByClassName( + "lr_container", + "div" + ); + + if(count($definition_container) !== 0){ + + $this->fuckhtml->load($definition_container[0]); + + // get header + $header = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "EntryHeader", + "div" + ); + + if(count($header) !== 0){ + + $description = []; + + $this->fuckhtml->load($header[0]); + + $title_div = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "font-family" => "Google Sans,arial,sans-serif", + "font-size" => "28px", + "line-height" => "36px" + ] + ) + ); + + if(count($title_div) !== 0){ + + $title = + $this->fuckhtml + ->getTextContent( + $title_div[0] + ); + }else{ + + $title = "Word definition"; + } + + $subtext_div = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "font-family" => "arial,sans-serif", + "font-size" => "14px", + "line-height" => "22px" + ] + ), + "span" + ); + + if(count($subtext_div) !== 0){ + + $description[] = [ + "type" => "quote", + "value" => + $this->fuckhtml + ->getTextContent( + $subtext_div[0] + ) + ]; + } + + // get audio + $audio = + $this->fuckhtml + ->getElementsByTagName( + "audio" + ); + + if(count($audio) !== 0){ + + $this->fuckhtml->load($audio[0]); + + $source = + $this->fuckhtml + ->getElementsByTagName( + "source" + ); + + if(count($source) !== 0){ + + $description[] = [ + "type" => "audio", + "url" => + preg_replace( + '/^\/\//', + "https://", + $this->fuckhtml + ->getTextContent( + $source[0] + ["attributes"] + ["src"] + ) + ) + ]; + } + + } + + // remove header to avoid confusion + $definition_container[0]["innerHTML"] = + str_replace( + $header[0]["outerHTML"], + "", + $definition_container[0]["innerHTML"] + ); + + // reset + $this->fuckhtml->load($definition_container[0]); + + $vmods = + $this->fuckhtml + ->getElementsByClassName( + "vmod", + "div" + ); + + foreach($vmods as $category){ + + if( + !isset( + $category + ["attributes"] + ["data-topic"] + ) || + $category + ["attributes"] + ["class"] != "vmod" + ){ + + continue; + } + + $this->fuckhtml->load($category); + + // get category type + $type = + $this->fuckhtml + ->getElementsByTagName( + "i" + ); + + if(count($type) !== 0){ + + $description[] = [ + "type" => "title", + "value" => + $this->fuckhtml + ->getTextContent( + $type[0] + ) + ]; + } + + // get heading text + $headings = + $this->fuckhtml + ->getElementsByClassName( + "xpdxpnd", + "div" + ); + + foreach($headings as $heading){ + + $description[] = [ + "type" => "quote", + "value" => + $this->fuckhtml + ->getTextContent( + $heading + ) + ]; + } + + $definitions = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "SenseDefinition", + "div" + ); + + $i = 1; + $text = []; + + foreach($definitions as $definition){ + + $text[] = + $i . ". " . + $this->fuckhtml + ->getTextContent( + $definition + ); + + $i++; + } + + if(count($text) !== 0){ + + $description[] = [ + "type" => "text", + "value" => + implode("\n", $text) + ]; + } + } + + $out["answer"][] = [ + "title" => $title, + "description" => $description, + "url" => null, + "thumb" => null, + "table" => [], + "sublink" => [] + ]; + } + + // reset + $this->fuckhtml->load($result_div); + } + + // + // scrape elements with a g-section-with-header + // includes: images, news carousels + // + + $g_sections = + $this->fuckhtml + ->getElementsByTagName( + "g-section-with-header" + ); + + if(count($g_sections) !== 0){ + foreach($g_sections as $g_section){ + + // parse elements with a g-section-with-header + $this->fuckhtml->load($g_section); + + $div_title = + $this->fuckhtml + ->getElementsByClassName( + "a-no-hover-decoration", + "a" + ); + + if(count($div_title) !== 0){ + + // title detected, skip + continue; + } + + // no title detected: detect news container + $news = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "outline-offset" => "-1px", + "display" => "flex", + "flex-direction" => "column", + "flex-grow" => "1" + ] + ) + ); + + foreach($news as $new){ + + $this->fuckhtml->load($new); + + $image = + $this->fuckhtml + ->getElementsByAttributeName( + "id", + "img" + ); + + if( + count($image) !== 0 && + !( + isset($image[0]["attributes"]["style"]) && + strpos( + $image[0]["attributes"]["style"], + "height:18px" + ) !== false + ) + ){ + + $thumb = [ + "url" => + $this->getdimg( + $image[0] + ["attributes"] + ["id"] + ), + "ratio" => "1:1" + ]; + } + + $title = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $this->fuckhtml + ->getElementsByAttributeValue( + "role", + "heading", + "div" + )[0] + ) + ); + + $date_div = + $this->fuckhtml + ->getElementsByAttributeName( + "style", + "div" + ); + + if(count($date_div) !== 0){ + + foreach($date_div as $div){ + + if( + strpos( + $div["attributes"]["style"], + "bottom:" + ) !== false + ){ + $date = + strtotime( + $this->fuckhtml + ->getTextContent( + $div + ) + ); + + break; + } + } + }else{ + + $date = null; + } + + $out["news"][] = [ + "title" => $title, + "description" => null, + "date" => $date, + "thumb" => $thumb, + "url" => + $this->fuckhtml + ->getTextContent( + $new + ["attributes"] + ["href"] + ) + ]; + } + } + + // reset + $this->fuckhtml->load($result_div); + } + + // + // Parse images (carousel, left hand-side) + // + $image_carousels = + $this->fuckhtml + ->getElementsByAttributeValue( + "id", + "media_result_group", + "div" + ); + + if(count($image_carousels) !== 0){ + + foreach($image_carousels as $image_carousel){ + + $this->fuckhtml->load($image_carousel); + + // get related searches in image carousel + $relateds = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "display" => "inline-block", + "margin-right" => "6px", + "outline" => "none", + "padding" => "6px 0" + ], + "a" + ) + ); + + foreach($relateds as $related){ + + if(!isset($related["innerHTML"])){ + + // found an image + continue; + } + + $text = + $this->fuckhtml + ->getTextContent( + $related + ); + + if($text != ""){ + + $out["related"][] = $text; + } + } + + $div = + $this->fuckhtml + ->getElementsByTagName( + "div" + ); + + // get loaded images + $images = + $this->fuckhtml + ->getElementsByClassName( + "ivg-i", + $div + ); + + foreach($images as $image){ + + $this->fuckhtml->load($image); + + $img_tags = + $this->fuckhtml + ->getElementsByTagName( + "img" + ); + + if( + !isset($image["attributes"]["data-docid"]) || + !isset($this->image_arr[$image["attributes"]["data-docid"]]) + ){ + + continue; + } + + // search for the right image tag + $image_tag = false; + foreach($img_tags as $img){ + + if( + isset( + $img + ["attributes"] + ["alt"] + ) && + trim( + $img + ["attributes"] + ["alt"] + ) != "" + ){ + + $image_tag = $img; + break; + } + } + + if($image_tag === false){ + + continue; + } + + $out["image"][] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $image_tag + ["attributes"] + ["alt"] + ) + ), + "source" => + $this->image_arr[ + $image + ["attributes"] + ["data-docid"] + ], + "url" => + $this->fuckhtml + ->getTextContent( + $image + ["attributes"] + ["data-lpage"] + ) + ]; + } + + // get unloaded javascript images + $images_js_sel = + $this->fuckhtml + ->getElementsByAttributeName( + "id", + $div + ); + + $loaded = []; + + foreach($images_js_sel as $sel){ + + if( + !isset($this->blobs[$sel["attributes"]["id"]]) || + in_array((string)$sel["attributes"]["id"], $loaded, true) + ){ + + // not an unloaded javascript image + continue; + } + + $loaded[] = $sel["attributes"]["id"]; + + // get yet another javascript component + $this->fuckhtml->load($this->blobs[$sel["attributes"]["id"]]); + + // get js node: contains title & url + $js_node = + $this->fuckhtml + ->getElementsByTagName( + "div" + )[0]; + + if(!isset($this->blobs[$js_node["attributes"]["id"]])){ + + // did not find refer id + continue; + } + + // load second javascript component + $this->fuckhtml->load($this->blobs[$js_node["attributes"]["id"]]); + + // get title from image alt text. + // data-src from this image is cropped, ignore it.. + $img = + $this->fuckhtml + ->getElementsByTagName( + "img" + )[0]; + + $out["image"][] = [ + "title" => + $this->fuckhtml + ->getTextContent( + $img["attributes"]["alt"] + ), + "source" => + $this->image_arr[ + $js_node["attributes"]["data-docid"] + ], + "url" => + $this->fuckhtml + ->getTextContent( + $js_node["attributes"]["data-lpage"] + ) + ]; + } + } + + // reset + $this->fuckhtml->load($result_div); + } + + // + // Parse videos + // + $this->fuckhtml->load($result_div); + + $videos = + $this->fuckhtml + ->getElementsByAttributeName( + "data-vid", + "div" + ); + + foreach($videos as $video){ + + $this->fuckhtml->load($video); + + // get url + $url = + $this->fuckhtml + ->getTextContent( + $video + ["attributes"] + ["data-surl"] + ); + + foreach($out["web"] as $link){ + + if($link["url"] == $url){ + + // ignore if we already have the video in $out["web"] + continue 2; + } + } + + // get thumbnail + $image = + $this->fuckhtml + ->getElementsByAttributeName( + "id", + "img" + ); + + if(count($image) !== 0){ + + $thumb = [ + "url" => $this->getdimg($image[0]["attributes"]["id"]), + "ratio" => "16:9" + ]; + }else{ + + $thumb = [ + "url" => null, + "ratio" => null + ]; + } + + // get title + $title = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "font-family" => "arial,sans-serif", + "font-size" => "16px", + "font-weight" => "400", + "line-height" => "24px" + ] + ), + "div" + ); + + if(count($title) === 0){ + + // ?? no title + continue; + } + + $title = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $title[0] + ) + ); + + // get duration + $duration_div = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "border-radius" => "10px", + "font-family" => "arial,sans-serif-medium,sans-serif", + "font-size" => "12px", + "line-height" => "16px", + "padding-block" => "2px", + "padding-inline" => "8px" + ] + ), + "div" + ); + + if(count($duration_div) !== 0){ + + $duration = + $this->hms2int( + $this->fuckhtml + ->getTextContent( + $duration_div[0] + ) + ); + }else{ + + // check if its a livestream + $duration = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "background-color" => "#d93025", + "border-radius" => "10px", + "color" => "#fff", + "font-family" => "arial,sans-serif-medium,sans-serif", + "font-size" => "12px", + "line-height" => "16px", + "padding-block" => "2px", + "padding-inline" => "8px" + ] + ), + "span" + ); + + if(count($duration) !== 0){ + + $duration = "_LIVE"; + }else{ + + $duration = null; + } + } + + // get date + $date_div = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "color" => "var(" . $this->getcolorvar("#70757a") . ")", + "font-size" => "14px" + ] + ), + "div" + ); + + if(count($date_div) !== 0){ + + $date = strtotime( + $this->fuckhtml + ->getTextContent( + $date_div[0] + ) + ); + + if($date === false){ + + // failed to parse date + $date = null; + } + }else{ + + $date = null; + } + + $out["video"][] = [ + "title" => $title, + "description" => null, + "date" => $date, + "duration" => $duration, + "views" => null, + "thumb" => $thumb, + "url" => $url + ]; + } + + // + // Parse featured results (which contain images, fuck the rest desu) + // + $this->fuckhtml->load($html); + $top = + $this->fuckhtml + ->getElementsByAttributeValue( + "aria-label", + "Featured results", + "div" + ); + + if(count($top) !== 0){ + + $this->fuckhtml->load($top[0]); + + // get images + $grid = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "border-radius" => "20px", + "display" => "grid", + "grid-gap" => "2px", + "grid-template-rows" => "repeat(2,minmax(0,1fr))", + "overflow" => "hidden", + "bottom" => "0", + "left" => "0", + "right" => "0", + "top" => "0", + "position" => "absolute", + ] + ), + "div" + ); + + if(count($grid) !== 0){ + + // we found image grid + $this->fuckhtml->load($grid[0]); + + $images_div = + $this->fuckhtml + ->getElementsByAttributeName( + "data-attrid", + "div" + ); + + foreach($images_div as $image_div){ + + $this->fuckhtml->load($image_div); + + $image = + $this->fuckhtml + ->getElementsByTagName( + "img" + ); + + if( + count($image) === 0 || + !isset($image_div["attributes"]["data-docid"]) || + !isset($this->image_arr[$image_div["attributes"]["data-docid"]]) + ){ + + // ?? no image, continue + continue; + } + + $out["image"][] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $image[0]["attributes"]["alt"] + ) + ), + "source" => + $this->image_arr[ + $image_div["attributes"]["data-docid"] + ], + "url" => + $this->fuckhtml + ->getTextContent( + $image_div["attributes"]["data-lpage"] + ) + ]; + } + } + } + + + // + // craft $npt token + // + if( + $last_page === false && + count($out["web"]) !== 0 + ){ + if(!isset($params["start"])){ + + $params["start"] = 20; + }else{ + + $params["start"] += 20; + } + + $out["npt"] = + $this->backend + ->store( + json_encode($params), + $pagetype, + $proxy + ); + } + + + // + // Parse right handside + // + $this->fuckhtml->load($html); + + $rhs = + $this->fuckhtml + ->getElementById( + "rhs" + ); + + if($rhs === null){ + + return $out; + } + + $this->fuckhtml->load($rhs); + + // get images gallery + $image_gallery = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-rc", + "ivg-i", + "div" + ); + + if(count($image_gallery) !== 0){ + + $this->fuckhtml->load($image_gallery[0]); + + // get images + $images_div = + $this->fuckhtml + ->getElementsByClassName( + "ivg-i", + "div" + ); + + foreach($images_div as $image_div){ + + $this->fuckhtml->load($image_div); + + $image = + $this->fuckhtml + ->getElementsByTagName( + "img" + ); + + if( + count($image) === 0 || + !isset( + $this->image_arr[ + $image_div + ["attributes"] + ["data-docid"] + ] + ) + ){ + + continue; + } + + foreach($out["image"] as $existing_image){ + + // might already exist + if( + $existing_image["source"][1]["url"] == + $this->image_arr[ + $image_div + ["attributes"] + ["data-docid"] + ][1]["url"] + ){ + + continue 2; + } + } + + $out["image"][] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $image[0] + ["attributes"] + ["alt"] + ) + ), + "source" => + $this->image_arr[ + $image_div + ["attributes"] + ["data-docid"] + ], + "url" => + $this->fuckhtml + ->getTextContent( + $image_div + ["attributes"] + ["data-lpage"] + ) + ]; + } + + // reset + $this->fuckhtml->load($rhs); + } + + // get header container + $header = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "padding" => "0 0 16px 20px", + "display" => "flex" + ] + ), + "div" + ); + + // stop parsing wikipedia heads if there isn't a header + $description = []; + $title = "About"; + + if(count($header) !== 0){ + + $this->fuckhtml->load($header[0]); + + // g-snackbar-action present: we found a button instead + if( + count( + $this->fuckhtml + ->getElementsByTagName( + "g-snackbar-action" + ) + ) !== 0 + ){ + + $title_tag = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "title", + "div" + ); + + if(count($title_tag) !== 0){ + $title = + $this->fuckhtml + ->getTextContent( + $title_tag[0] + ); + + $header[0]["innerHTML"] = + str_replace( + $title_tag[0]["outerHTML"], + "", + $header[0]["innerHTML"] + ); + + // if header still contains text, add it as a subtitle in description + $subtitle = + $this->fuckhtml + ->getTextContent( + $header[0] + ); + + if(strlen($subtitle) !== 0){ + + $description[] = [ + "type" => "quote", + "value" => $subtitle + ]; + } + } + } + + // reset + $this->fuckhtml->load($rhs); + } + + // get description elements + $url = null; + + $text = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "description", + "div" + ); + + if(count($text) !== 0){ + + $this->fuckhtml->load($text[0]); + + $a = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($a) !== 0){ + // get link and remove it from description + + $a = $a[count($a) - 1]; + + $text[0]["innerHTML"] = + str_replace( + $a["outerHTML"], + "", + $text[0]["innerHTML"] + ); + + $url = + $this->fuckhtml + ->getTextContent( + $a + ["attributes"] + ["href"] + ); + } + + $description[] = [ + "type" => "text", + "value" => + html_entity_decode( + preg_replace( + '/^Description/', + "", + $this->fuckhtml + ->getTextContent( + $text[0] + ) + ) + ) + ]; + + // reset + $this->fuckhtml->load($rhs); + } + + // get reviews (google play, steam, etc) + $review_container = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "align-items" => "start", + "display" => "flex" + ] + ), + "div" + ); + + if(count($review_container) !== 0){ + + $this->fuckhtml->load($review_container[0]); + + $as = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($as) !== 0){ + + $description[] = [ + "type" => "title", + "value" => "Ratings" + ]; + + foreach($as as $a){ + + $this->fuckhtml->load($a); + + $spans = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + if(count($spans) >= 2){ + + $value = + trim( + $this->fuckhtml + ->getTextContent( + $spans[1] + ), + "· " + ); + + if( + $value == "" && + isset($spans[2]) + ){ + + $value = + $this->fuckhtml + ->getTextContent( + $spans[2] + ); + } + + $description[] = [ + "type" => "link", + "url" => + $this->fuckhtml + ->getTextContent( + $a["attributes"] + ["href"] + ), + "value" => $value + ]; + + $description[] = [ + "type" => "text", + "value" => + ": " . + $this->fuckhtml + ->getTextContent( + $spans[0] + ) . "\n" + ]; + } + } + } + + // reset + $this->fuckhtml->load($rhs); + } + + // initialize sublinks + $sublinks = []; + + // get description from business + if(count($description) === 0){ + + $data_attrid = + $this->fuckhtml + ->getElementsByAttributeName( + "data-attrid" + ); + + $summary = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "kc:/local:one line summary", + $data_attrid + ); + + if(count($summary) !== 0){ + + $description[] = [ + "type" => "quote", + "value" => + $this->fuckhtml + ->getTextContent( + $summary[0] + ) + ]; + + // remove summary so it doesnt get parsed as a table + $rhs["innerHTML"] = + str_replace( + $summary[0]["outerHTML"], + "", + $rhs["innerHTML"] + ); + + $this->fuckhtml->load($rhs); + } + + $address = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "kc:/location/location:address", + $data_attrid + ); + + if(count($address) !== 0){ + + $description[] = [ + "type" => "text", + "value" => + $this->fuckhtml + ->getTextContent( + $address[0] + ) + ]; + } + + // get title + $title_div = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "title", + $data_attrid + ); + + if(count($title_div) !== 0){ + + $title = + $this->fuckhtml + ->getTextContent( + $title_div[0] + ); + } + + // get phone number + $phone = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "kc:/local:alt phone", + $data_attrid + ); + + if(count($phone) !== 0){ + + $this->fuckhtml->load($phone[0]); + + $sublinks["Call"] = + "tel:" . + $this->fuckhtml + ->getTextContent( + $this->fuckhtml + ->getElementsByAttributeName( + "aria-label", + "span" + )[0] + ); + + $this->fuckhtml->load($rhs); + } + } + + if(count($description) === 0){ + + // still no description? abort + return $out; + } + + // get table elements + $table = []; + $table_elems = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "margin-top" => "7px" + ] + ), + "div" + ); + + foreach($table_elems as $elem){ + + $this->fuckhtml->load($elem); + + $spans = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + if(count($spans) === 0){ + + // ?? invalid + continue; + } + + $elem["innerHTML"] = + str_replace( + $spans[0]["outerHTML"], + "", + $elem["innerHTML"] + ); + + $key = + rtrim( + $this->fuckhtml + ->getTextContent( + $spans[0] + ), + ": " + ); + + if( + $key == "" || + $key == "Phone" + ){ + + continue; + } + + if($key == "Hours"){ + + $hours = []; + + $this->fuckhtml->load($elem); + + $trs = + $this->fuckhtml + ->getElementsByTagName( + "tr" + ); + + foreach($trs as $tr){ + + $this->fuckhtml->load($tr); + + $tds = + $this->fuckhtml + ->getElementsByTagName( + "td" + ); + + if(count($tds) === 2){ + + $hours[] = + $this->fuckhtml + ->getTextContent( + $tds[0] + ) . ": " . + $this->fuckhtml + ->getTextContent( + $tds[1] + ); + } + } + + if(count($hours) !== 0){ + + $hours = implode("\n", $hours); + $table["Hours"] = $hours; + } + + continue; + } + + $table[$key] = + preg_replace( + '/ +/', + " ", + $this->fuckhtml + ->getTextContent( + $elem + ) + ); + } + + // reset + $this->fuckhtml->load($rhs); + + // get the website div + $as = + $this->fuckhtml + ->getElementsByAttributeValue( + "data-attrid", + "visit_official_site", + "a" + ); + + if(count($as) !== 0){ + + $sublinks["Website"] = + str_replace( + "http://", + "https://", + $this->fuckhtml + ->getTextContent( + $as[0] + ["attributes"] + ["href"] + ) + ); + }else{ + + // get website through button + $button = + $this->fuckhtml + ->getElementsByClassName( + "ab_button", + "a" + ); + + if(count($button) !== 0){ + + $sublinks["Website"] = + $this->unshiturl( + $this->fuckhtml + ->getTextContent( + $button[0] + ["attributes"] + ["href"] + ) + ); + } + } + + // get social media links + $as = + $this->fuckhtml + ->getElementsByTagName( + "g-link" + ); + + foreach($as as $a){ + + $this->fuckhtml->load($a); + + $link = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($link) === 0){ + + continue; + } + + $sublink_title = + $this->fuckhtml + ->getTextContent( + $a + ); + + if($sublink_title == "X (Twitter)"){ + + $sublink_title = "Twitter"; + } + + $sublinks[$sublink_title] = + $this->fuckhtml + ->getTextContent( + $link[0] + ["attributes"] + ["href"] + ); + } + + // reset + $this->fuckhtml->load($rhs); + + // get those round containers + $containers = + $this->fuckhtml + ->getElementsByClassName( + "tpa-ci" + ); + + foreach($containers as $container){ + + $this->fuckhtml->load($container); + + $as = + $this->fuckhtml + ->getElementsByTagName( + "a" + ); + + if(count($as) === 0){ + + continue; + } + + $sublinks[ + $this->fuckhtml + ->getTextContent( + $as[0] + ) + ] = + $this->fuckhtml + ->getTextContent( + $as[0] + ["attributes"] + ["href"] + ); + } + + $out["answer"][] = [ + "title" => $title, + "description" => $description, + "url" => $url, + "thumb" => null, + "table" => $table, + "sublink" => $sublinks + ]; + + return $out; + } + + + private function scrape_dimg($html){ + + // get images loaded through javascript + $this->dimg = []; + + preg_match_all( + '/function\(\){google\.ldi=({.*?});/', + $html, + $dimg + ); + + if(isset($dimg[1])){ + + foreach($dimg[1] as $i){ + + $tmp = json_decode($i, true); + foreach($tmp as $key => $value){ + + $this->dimg[$key] = + $this->unshit_thumb( + $value + ); + } + } + } + + // get additional javascript base64 images + preg_match_all( + '/var s=\'(data:image\/[^\']+)\';var ii=\[((?:\'[^\']+\',?)+)\];/', + $html, + $dimg + ); + + if(isset($dimg[1])){ + + for($i=0; $i<count($dimg[1]); $i++){ + + $delims = explode(",", $dimg[2][$i]); + $string = + $this->fuckhtml + ->parseJsString( + $dimg[1][$i] + ); + + foreach($delims as $delim){ + + $this->dimg[trim($delim, "'")] = $string; + } + } + } + } + + + private function scrape_imagearr($html){ + // get image links arrays + preg_match_all( + '/\[0,"([^"]+)",\["([^"]+)\",([0-9]+),([0-9]+)\],\["([^"]+)",([0-9]+),([0-9]+)\]/', + $html, + $image_arr + ); + + $this->image_arr = []; + if(isset($image_arr[1])){ + + for($i=0; $i<count($image_arr[1]); $i++){ + + $this->image_arr[$image_arr[1][$i]] = + [ + [ + "url" => + $this->fuckhtml + ->parseJsString( + $image_arr[5][$i] + ), + "width" => (int)$image_arr[7][$i], + "height" => (int)$image_arr[6][$i] + ], + [ + "url" => + $this->unshit_thumb( + $this->fuckhtml + ->parseJsString( + $image_arr[2][$i] + ) + ), + "width" => (int)$image_arr[4][$i], + "height" => (int)$image_arr[3][$i] + ] + ]; + } + } + } + + + private function getdimg($dimg){ + + return isset($this->dimg[$dimg]) ? $this->dimg[$dimg] : null; + } + + + private function unshit_thumb($url){ + // https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQINE2vbnNLHXqoZr3RVsaEJFyOsj1_BiBnJch-e1nyz3oia7Aj5xVj + // https://i.ytimg.com/vi/PZVIyA5ER3Y/mqdefault.jpg?sqp=-oaymwEFCJQBEFM&rs=AMzJL3nXeaCpdIar-ltNwl82Y82cIJfphA + + $parts = parse_url($url); + + if( + isset($parts["host"]) && + preg_match( + '/tbn.*\.gstatic\.com/', + $parts["host"] + ) + ){ + + parse_str($parts["query"], $params); + + if(isset($params["q"])){ + + return "https://" . $parts["host"] . "/images?q=" . $params["q"]; + } + } + + return $url; + } + + + private function parsestyles(){ + + $styles = []; + + $style_div = + $this->fuckhtml + ->getElementsByTagName( + "style" + ); + + $raw_styles = ""; + + foreach($style_div as $style){ + + $raw_styles .= $style["innerHTML"]; + } + + // filter out media/keyframe queries + $raw_styles = + preg_replace( + '/@\s*(?!font-face)[^{]+\s*{[\S\s]+?}\s*}/', + "", + $raw_styles + ); + + // get styles + preg_match_all( + '/(.+?){([\S\s]*?)}/', + $raw_styles, + $matches + ); + + for($i=0; $i<count($matches[1]); $i++){ + + // get style values + preg_match_all( + '/([^:;]+):([^;]*?(?:\([^)]+\)[^;]*?)?)(?:;|$)/', + $matches[2][$i], + $values_regex + ); + + $values = []; + for($k=0; $k<count($values_regex[1]); $k++){ + + $values[trim($values_regex[1][$k])] = + trim($values_regex[2][$k]); + } + + $names = explode(",", $matches[1][$i]); + + // h1,h2,h3 will each get their own array index + foreach($names as $name){ + + $name = trim($name, "}\t\n\r\0\x0B"); + + foreach($values as $key => $value){ + + $styles[$name][$key] = $value; + } + } + } + + foreach($styles as $key => $values){ + + $styles[$key]["_c"] = count($values); + } + + $this->styles = $styles; + + // get CSS colors + $this->css_colors = []; + + if(isset($this->styles[":root"])){ + + foreach($this->styles[":root"] as $key => $value){ + + $this->css_colors[$value] = $key; + } + } + } + + + + private function getstyle($styles){ + + $styles["_c"] = count($styles); + + foreach($this->styles as $style_key => $style_values){ + + if(count(array_intersect_assoc($style_values, $styles)) === $styles["_c"] + 1){ + + $style_key = + explode(" ", $style_key); + + $style_key = $style_key[count($style_key) - 1]; + + return + ltrim( + str_replace( + [".", "#"], + " ", + $style_key + ) + ); + } + } + + return false; + } + + + + private function getcolorvar($color){ + + if(isset($this->css_colors[$color])){ + + return $this->css_colors[$color]; + } + + return null; + } + + + + public function web($get){ + + if($get["npt"]){ + + [$params, $proxy] = $this->backend->get($get["npt"], "web"); + $params = json_decode($params, true); + + $search = $params["q"]; + + }else{ + $search = $get["s"]; + $country = $get["country"]; + $nsfw = $get["nsfw"]; + $lang = $get["lang"]; + $older = $get["older"]; + $newer = $get["newer"]; + $spellcheck = $get["spellcheck"]; + $proxy = $this->backend->get_ip(); + + $offset = 0; + + $params = [ + "q" => $search, + "hl" => "en", + "num" => 20 // get 20 results + ]; + + // country + if($country != "any"){ + + $params["gl"] = $country; + } + + // nsfw + $params["safe"] = $nsfw == "yes" ? "off" : "active"; + + // language + if($lang != "any"){ + + $params["lr"] = "lang_" . $lang; + } + + // generate tbs + $tbs = []; + + // get date + $older = $older === false ? null : date("m/d/Y", $older); + $newer = $newer === false ? null : date("m/d/Y", $newer); + + if( + $older !== null || + $newer !== null + ){ + + $tbs["cdr"] = "1"; + $tbs["cd_min"] = $newer; + $tbs["cd_max"] = $older; + } + + // spellcheck filter + if($spellcheck == "no"){ + + $params["nfpr"] = "1"; + } + + if(count($tbs) !== 0){ + + $params["tbs"] = ""; + + foreach($tbs as $key => $value){ + + $params["tbs"] .= $key . ":" . $value . ","; + } + + $params["tbs"] = rtrim($params["tbs"], ","); + } + } + + try{ + $html = + $this->get( + $proxy, + "https://www.google.com/search", + $params + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } + + //$html = file_get_contents("scraper/google.html"); + + return $this->parsepage($html, "web", $search, $proxy, $params); + } + + + + public function video($get){ + + if($get["npt"]){ + + [$params, $proxy] = $this->backend->get($get["npt"], "web"); + $params = json_decode($params, true); + + $search = $params["q"]; + + }else{ + $search = $get["s"]; + $country = $get["country"]; + $nsfw = $get["nsfw"]; + $older = $get["older"]; + $newer = $get["newer"]; + $duration = $get["duration"]; + $quality = $get["quality"]; + $captions = $get["captions"]; + $proxy = $this->backend->get_ip(); + + $params = [ + "q" => $search, + "tbm" => "vid", + "hl" => "en", + "num" => "20" + ]; + + // country + if($country != "any"){ + + $params["gl"] = $country; + } + + // nsfw + $params["safe"] = $nsfw == "yes" ? "off" : "active"; + + $tbs = []; + + // get date + $older = $older === false ? null : date("m/d/Y", $older); + $newer = $newer === false ? null : date("m/d/Y", $newer); + + if( + $older !== null || + $newer !== null + ){ + + $tbs["cdr"] = "1"; + $tbs["cd_min"] = $newer; + $tbs["cd_max"] = $older; + } + + // duration + if($duration != "any"){ + + $tbs[] = "dur:" . $duration; + } + + // quality + if($quality != "any"){ + + $tbs[] = "hq:" . $quality; + } + + // captions + if($captions != "any"){ + + $tbs[] = "cc:" . $captions; + } + + // append tbs + if(count($tbs) !== 0){ + + $params["tbs"] = + implode(",", $tbs); + } + } + + try{ + $html = + $this->get( + $proxy, + "https://www.google.com/search", + $params + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } + + //$html = file_get_contents("scraper/google-video.html"); + + $response = $this->parsepage($html, "videos", $search, $proxy, $params); + $out = [ + "status" => "ok", + "npt" => $response["npt"], + "video" => [], + "author" => [], + "livestream" => [], + "playlist" => [], + "reel" => [] + ]; + + foreach($response["web"] as $result){ + + $out["video"][] = [ + "title" => $result["title"], + "description" => $result["description"], + "author" => [ + "name" => isset($result["table"]["Author"]) ? $result["table"]["Author"] : null, + "url" => null, + "avatar" => null + ], + "date" => $result["date"], + "duration" => isset($result["table"]["Duration"]) ? $this->hms2int($result["table"]["Duration"]) : null, + "views" => null, + "thumb" => $result["thumb"], + "url" => $result["url"] + ]; + } + + return $out; + } + + + + public function news($get){ + + if($get["npt"]){ + + [$req, $proxy] = $this->backend->get($get["npt"], "news"); + /*parse_str( + parse_url($req, PHP_URL_QUERY), + $search + );*/ + + try{ + + $html = + $this->get( + $proxy, + "https://www.google.com" . $req, + [] + ); + }catch(Exception $error){ + + throw new Exception("Failed to get HTML"); + } + + }else{ + $search = $get["s"]; + $country = $get["country"]; + $nsfw = $get["nsfw"]; + $older = $get["older"]; + $newer = $get["newer"]; + $sort = $get["sort"]; + $proxy = $this->backend->get_ip(); + + $params = [ + "q" => $search, + "tbm" => "nws", + "hl" => "en", + "num" => "20" + ]; + + // country + if($country != "any"){ + + $params["gl"] = $country; + } + + // nsfw + $params["safe"] = $nsfw == "yes" ? "off" : "active"; + + $tbs = []; + + // get date + $older = $older === false ? null : date("m/d/Y", $older); + $newer = $newer === false ? null : date("m/d/Y", $newer); + + if( + $older !== null || + $newer !== null + ){ + + $tbs["cdr"] = "1"; + $tbs["cd_min"] = $newer; + $tbs["cd_max"] = $older; + } + + // relevance + if($sort == "date"){ + + $tbs["sbd"] = "1"; + } + + // append tbs + if(count($tbs) !== 0){ + + $params["tbs"] = ""; + + foreach($tbs as $key => $value){ + + $params["tbs"] .= $key . ":" . $value . ","; + } + + $params["tbs"] = rtrim($params["tbs"], ","); + } + + //$html = file_get_contents("scraper/google-news.html"); + + $html = + $this->get( + $proxy, + "https://www.google.com/search", + $params + ); + } + + $out = [ + "status" => "ok", + "npt" => null, + "news" => [] + ]; + + $this->fuckhtml->load($html); + + $this->detect_sorry(); + + // get images + $this->scrape_dimg($html); + + // parse styles + $this->parsestyles(); + + $center_col = + $this->fuckhtml + ->getElementById( + "center_col", + "div" + ); + + if($center_col === null){ + + throw new Exception("Could not grep result div"); + } + + $this->fuckhtml->load($center_col); + + // get next page + $npt = + $this->fuckhtml + ->getElementById( + "pnnext", + "a" + ); + + if($npt !== false){ + + $out["npt"] = + $this->backend->store( + $this->fuckhtml + ->getTextContent( + $npt["attributes"] + ["href"] + ), + "news", + $proxy + ); + } + + $as = + $this->fuckhtml + ->getElementsByAttributeName( + "jsname", + "a" + ); + + foreach($as as $a){ + + $this->fuckhtml->load($a); + + // get title + $title = + $this->fuckhtml + ->getElementsByAttributeValue( + "role", + "heading", + "div" + ); + + if(count($title) === 0){ + + continue; + } + + $title = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $title[0] + ) + ); + + // get thumbnail + $image = + $this->fuckhtml + ->getElementsByAttributeName( + "id", + "img" + ); + + // check for padded title node, if found, we're inside a carousel + $probe = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "padding" => "16px 16px 40px 16px" + ] + ), + "div" + ); + + if(count($probe) !== 0){ + + $probe = true; + }else{ + + $probe = false; + } + + if( + count($image) !== 0 && + !isset($image[0]["attributes"]["width"]) + ){ + + $thumb = [ + "url" => + $this->getdimg( + $image[0]["attributes"]["id"] + ), + "ratio" => $probe === true ? "16:9" : "1:1" + ]; + }else{ + + $thumb = [ + "url" => null, + "ratio" => null + ]; + } + + $description = null; + + if($probe === false){ + + $desc_divs = + $this->fuckhtml + ->getElementsByAttributeName( + "style", + "div" + ); + + foreach($desc_divs as $desc){ + + if( + strpos( + $desc["attributes"]["style"], + "margin-top:" + ) !== false + ){ + + $description = + $this->titledots( + $this->fuckhtml + ->getTextContent( + $desc + ) + ); + break; + } + } + } + + // get author + $author = + $this->fuckhtml + ->getElementsByClassName( + $this->getstyle( + [ + "overflow" => "hidden", + "text-align" => "left", + "text-overflow" => "ellipsis", + "white-space" => "nowrap", + "margin-bottom" => "8px" + ] + ), + "div" + ); + + if(count($author) !== 0){ + + $author = + $this->fuckhtml + ->getTextContent( + $author[0] + ); + }else{ + + $author = null; + } + + // get date + $date = null; + + $date_div = + $this->fuckhtml + ->getElementsByAttributeName( + "style", + "div" + ); + + foreach($date_div as $d){ + + $this->fuckhtml->load($d); + + $span = + $this->fuckhtml + ->getElementsByTagName( + "span" + ); + + if( + strpos( + $d["attributes"]["style"], + "bottom:" + ) !== false + ){ + + $date = + strtotime( + $this->fuckhtml + ->getTextContent( + $span[count($span) - 1] + ) + ); + break; + } + } + + $out["news"][] = [ + "title" => $title, + "author" => $author, + "description" => $description, + "date" => $date, + "thumb" => $thumb, + "url" => + $this->unshiturl( + $a["attributes"] + ["href"] + ) + ]; + } + + return $out; + } + + + + + public function image($get){ + + // generate parameters + if($get["npt"]){ + + [$params, $proxy] = + $this->backend->get( + $get["npt"], + "images" + ); + + $params = json_decode($params, true); + }else{ + + $search = $get["s"]; + if(strlen($search) === 0){ + + throw new Exception("Search term is empty!"); + } + + $proxy = $this->backend->get_ip(); + $country = $get["country"]; + $nsfw = $get["nsfw"]; + $time = $get["time"]; + $size = $get["size"]; + $ratio = $get["ratio"]; + $color = $get["color"]; + $type = $get["type"]; + $format = $get["format"]; + $rights = $get["rights"]; + + $params = [ + "q" => $search, + "udm" => "2" // get images + ]; + + // country (image search uses cr instead of gl) + if($country != "any"){ + + $params["cr"] = "country" . strtoupper($country); + } + + // nsfw + $params["safe"] = $nsfw == "yes" ? "off" : "active"; + + // generate tbs + $tbs = []; + + // time + if($time != "any"){ + + $tbs["qdr"] = $time; + } + + // size + if($size != "any"){ + + $params["imgsz"] = $size; + } + + // ratio + if($ratio != "any"){ + + $params["imgar"] = $ratio; + } + + // color + if($color != "any"){ + + if( + $color == "color" || + $color == "trans" + ){ + + $params["imgc"] = $color; + }elseif($color == "bnw"){ + + $params["imgc"] = "gray"; + }else{ + + $tbs["ic"] = "specific"; + $tbs["isc"] = $color; + } + } + + // type + if($type != "any"){ + + $tbs["itp"] = $type; + } + + // format + if($format != "any"){ + + $params["as_filetype"] = $format; + } + + // rights (tbs) + if($rights != "any"){ + + $tbs["sur"] = $rights; + } + + // append tbs + if(count($tbs) !== 0){ + + $params["tbs"] = ""; + + foreach($tbs as $key => $value){ + + $params["tbs"] .= $key . ":" . $value . ","; + } + + $params["tbs"] = rtrim($params["tbs"], ","); + } + } + /* + $handle = fopen("scraper/google-img.html", "r"); + $html = fread($handle, filesize("scraper/google-img.html")); + fclose($handle);*/ + + try{ + $html = + $this->get( + $proxy, + "https://www.google.com/search", + $params + ); + }catch(Exception $error){ + + throw new Exception("Failed to get search page"); + } + + $this->fuckhtml->load($html); + + $this->detect_sorry(); + + // get javascript images + $this->scrape_imagearr($html); + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + $images = + $this->fuckhtml + ->getElementsByClassName( + "ivg-i", + "div" + ); + + foreach($images as $div){ + + $this->fuckhtml->load($div); + + $image = + $this->fuckhtml + ->getElementsByTagName("img")[0]; + + $out["image"][] = [ + "title" => + $this->titledots( + $this->fuckhtml + ->getTextContent( + $image["attributes"]["alt"] + ) + ), + "source" => + $this->image_arr[ + $div["attributes"]["data-docid"] + ], + "url" => + $this->fuckhtml + ->getTextContent( + $div["attributes"]["data-lpage"] + ) + ]; + } + + // as usual, no way to check if there is a next page reliably + if(count($out["image"]) > 50){ + + if(!isset($params["start"])){ + + $params["start"] = 10; + }else{ + + $params["start"] += 10; + } + + $out["npt"] = + $this->backend + ->store( + json_encode($params), + "image", + $proxy + ); + } + + return $out; + } + + private function unshiturl($url, $return_size = false){ + + // decode + $url = + $this->fuckhtml + ->getTextContent($url); + + $url_parts = parse_url($url); + + if( + !isset( + $url_parts["host"] + ) + ){ + + // no host, we have a tracking url + parse_str($url_parts["query"], $query); + + if(isset($query["imgurl"])){ + + $url = $query["imgurl"]; + } + elseif(isset($query["q"])){ + + $url = $query["q"]; + } + } + + // rewrite URLs to remove extra tracking parameters + $domain = parse_url($url, PHP_URL_HOST); + + if( + preg_match( + '/wikipedia.org$/', + $domain + ) + ){ + + // rewrite wikipedia mobile URLs to desktop + $url = + $this->replacedomain( + $url, + preg_replace( + '/([a-z0-9]+)(\.m\.)/', + '$1.', + $domain + ) + ); + } + + elseif( + preg_match( + '/imdb\.com$|youtube\.[^.]+$/', + $domain + ) + ){ + + // rewrite imdb and youtube mobile URLs too + $url = + $this->replacedomain( + $url, + preg_replace( + '/^m\./', + "", + $domain + ) + ); + + } + + elseif( + preg_match( + '/play\.google\.[^.]+$/', + $domain + ) + ){ + + // remove referrers from play.google.com + $oldquery = parse_url($url, PHP_URL_QUERY); + if($oldquery !== null){ + + parse_str($oldquery, $query); + if(isset($query["referrer"])){ unset($query["referrer"]); } + if(isset($query["hl"])){ unset($query["hl"]); } + if(isset($query["gl"])){ unset($query["gl"]); } + + $query = http_build_query($query); + + $url = + str_replace( + $oldquery, + $query, + $url + ); + } + } + + elseif( + preg_match( + '/twitter\.com$/', + $domain + ) + ){ + // remove more referrers from twitter.com + $oldquery = parse_url($url, PHP_URL_QUERY); + if($oldquery !== null){ + + parse_str($oldquery, $query); + if(isset($query["ref_src"])){ unset($query["ref_src"]); } + + $query = http_build_query($query); + + $url = + str_replace( + $oldquery, + $query, + $url + ); + } + } + + elseif( + preg_match( + '/maps\.google\.[^.]+/', + $domain + ) + ){ + + if(stripos($url, "maps?") !== false){ + + //https://maps.google.com/maps?daddr=Johnny,+603+Rue+St+Georges,+Saint-J%C3%A9r%C3%B4me,+Quebec+J7Z+5B7 + $query = parse_url($url, PHP_URL_QUERY); + + if($query !== null){ + + parse_str($query, $query); + + if(isset($query["daddr"])){ + + $url = + "https://maps.google.com/maps?daddr=" . + urlencode($query["daddr"]); + } + } + } + } + + if($return_size){ + + return [ + "url" => $url, + "ref" => isset($query["imgrefurl"]) ? $query["imgrefurl"] : null, + "thumb_width" => isset($query["tbnw"]) ? (int)$query["tbnw"] : null, + "thumb_height" => isset($query["tbnh"]) ? (int)$query["tbnh"] : null, + "image_width" => isset($query["w"]) ? (int)$query["w"] : null, + "image_height" => isset($query["h"]) ? (int)$query["h"] : null + ]; + } + + return $url; + } + + private function replacedomain($url, $domain){ + + return + preg_replace( + '/(https?:\/\/)([^\/]+)/', + '$1' . $domain, + $url + ); + } + + private function titledots($title){ + + return trim($title, " .\t\n\r\0\x0B…"); + } + + private function hms2int($time){ + + $parts = explode(":", $time, 3); + $time = 0; + + if(count($parts) === 3){ + + // hours + $time = $time + ((int)$parts[0] * 3600); + array_shift($parts); + } + + if(count($parts) === 2){ + + // minutes + $time = $time + ((int)$parts[0] * 60); + array_shift($parts); + } + + // seconds + $time = $time + (int)$parts[0]; + + return $time; + } + + private function detect_sorry(){ + + $recaptcha = + $this->fuckhtml + ->getElementById( + "recaptcha", + "div" + ); + + if($recaptcha !== false){ + + throw new Exception("Google returned a captcha"); + } + } +} diff --git a/settings.php b/settings.php index 33185e9..1be2dad 100644 --- a/settings.php +++ b/settings.php @@ -76,6 +76,10 @@ $settings = [ "text" => "Yandex" ], [ + "value" => "googlealt", + "text" => "GoogleV6" + ], + [ "value" => "google", "text" => "Google" ], @@ -106,6 +110,14 @@ $settings = [ "parameter" => "scraper_web", "options" => [ [ + "value" => "googlealt", + "text" => "GoogleV6" + ], + [ + "value" => "google", + "text" => "Google" + ], + [ "value" => "ddg", "text" => "DuckDuckGo" ], @@ -118,10 +130,6 @@ $settings = [ "text" => "Yandex" ], [ - "value" => "google", - "text" => "Google" - ], - [ "value" => "qwant", "text" => "Qwant" ], @@ -164,6 +172,14 @@ $settings = [ "parameter" => "scraper_images", "options" => [ [ + "value" => "googlealt", + "text" => "GoogleV6" + ], + [ + "value" => "google", + "text" => "Google" + ], + [ "value" => "ddg", "text" => "DuckDuckGo" ], @@ -176,10 +192,6 @@ $settings = [ "text" => "Brave" ], [ - "value" => "google", - "text" => "Google" - ], - [ "value" => "qwant", "text" => "Qwant" ], @@ -210,6 +222,10 @@ $settings = [ "text" => "YouTube" ], [ + "value" => "googlealt", + "text" => "GoogleV6" + ], + [ "value" => "ddg", "text" => "DuckDuckGo" ], @@ -236,6 +252,14 @@ $settings = [ "parameter" => "scraper_news", "options" => [ [ + "value" => "googlealt", + "text" => "GoogleV6" + ], + [ + "value" => "google", + "text" => "Google" + ], + [ "value" => "ddg", "text" => "DuckDuckGo" ], @@ -244,10 +268,6 @@ $settings = [ "text" => "Brave" ], [ - "value" => "google", - "text" => "Google" - ], - [ "value" => "qwant", "text" => "Qwant" ], diff --git a/static/style.css b/static/style.css index 6276417..3777699 100644 --- a/static/style.css +++ b/static/style.css @@ -1,14 +1,9 @@ :root{ /* background */ - --1d2021: #1d2021; - --282828: #282828; --3c3836: #3c3836; --504945: #504945; /* font */ - --928374: #928374; - --a89984: #a89984; - --bdae93: #bdae93; --8ec07c: #8ec07c; --ebdbb2: #ebdbb2; @@ -24,6 +19,26 @@ --red: #fb4934; } +@media (prefers-color-scheme: light) { + :root{ + --background: white; + --body: black; + --url: grey; + --deeper: #DDD; + --heading-and-links: #316568; + } +} + +@media (prefers-color-scheme: dark) { + :root{ + --background: black; + --body: #AAA; + --url: #999; + --deeper: #282828; + --heading-and-links: #91a5a8; + } +} + audio{ max-width:100%; display:block; @@ -43,8 +58,8 @@ body,html{ } body{ - background:var(--1d2021); - color:var(--a89984); + background:var(--background); + color:var(--body); font-size:16px; box-sizing:border-box; font-family:sans-serif; @@ -64,7 +79,7 @@ body{ } .navigation a{ - color:var(--bdae93); + color:var(--heading-and-links); text-decoration:none; } @@ -83,7 +98,7 @@ h1,h2,h3,h4,h5,h6{ padding:0; margin:0 0 7px 0; line-height:initial; - color:var(--bdae93); + color:var(--heading-and-links); } h3,h4,h5,h6{ @@ -102,7 +117,7 @@ h3,h4,h5,h6{ width:40%; height:36px; border:1px solid var(--504945); - background:var(--282828); + background:var(--deeper); border-radius:2px; margin-bottom:10px; position:relative; @@ -118,7 +133,7 @@ h3,h4,h5,h6{ } .searchbox input[type="text"]::placeholder{ - color:var(--928374); + color:var(--url); } .searchbox input[type="submit"]{ @@ -136,11 +151,11 @@ h3,h4,h5,h6{ line-height:36px; box-sizing:border-box; height:36px; - color:var(--bdae93); + color:var(--heading-and-links); } .searchbox:focus-within{ - border:1px solid var(--928374); + border:1px solid var(--url); } .autocomplete{ @@ -149,8 +164,8 @@ h3,h4,h5,h6{ top:35px; left:-1px; right:-1px; - background:var(--282828); - border:1px solid var(--928374); + background:var(--deeper); + border:1px solid var(--url); border-top:none; border-radius:0 0 2px 2px; z-index:10; @@ -186,7 +201,7 @@ h3,h4,h5,h6{ .tabs .tab{ text-decoration:none; - color:var(--bdae93); + color:var(--heading-and-links); padding:4px 10px; display:inline-block; } @@ -196,7 +211,7 @@ h3,h4,h5,h6{ } .tabs .tab.selected{ - border-bottom:2px solid var(--bdae93); + border-bottom:2px solid var(--heading-and-links); } /* Filters */ @@ -258,7 +273,7 @@ h3,h4,h5,h6{ font-family:Times; width:100%; height:100%; - background:var(--282828); + background: transparent !important; display:block; object-fit:contain; } @@ -295,7 +310,7 @@ h3,h4,h5,h6{ position:relative; max-width:400px; margin:17px auto 0; - border:1px solid var(--928374); + border:1px solid var(--url); } .captcha{ @@ -386,7 +401,7 @@ h3,h4,h5,h6{ } .infobox a{ - color:var(--bdae93); + color:var(--heading-and-links); } .infobox a:hover{ @@ -403,8 +418,8 @@ h3,h4,h5,h6{ } .web .type{ - border:1px solid var(--928374); - background:var(--282828); + border:1px solid var(--url); + background:var(--deeper); padding:0 4px; border-radius:2px; font-size:14px; @@ -420,7 +435,7 @@ h3,h4,h5,h6{ .web .url .part{ font-size:15px; text-decoration:none; - color:var(--928374); + color:var(--url); } .web .separator::before{ @@ -437,7 +452,7 @@ h3,h4,h5,h6{ .web .hover{ display:block; text-decoration:none; - color:var(--a89984); + color:var(--body); overflow:hidden; clear:left; padding-top:7px; @@ -445,12 +460,12 @@ h3,h4,h5,h6{ .web .text-result .title{ font-size:18px; - color:var(--bdae93); + color:var(--heading-and-links); margin-bottom:7px; } .web .text-result a:visited .title{ - color:var(--928374) !important; + color:var(--heading-and-links) !important; } .theme-white .web .text-result a:visited .title{ @@ -486,7 +501,7 @@ h3,h4,h5,h6{ display:none; position:absolute; top:25px; - background:var(--282828); + background:var(--deeper); border:1px solid var(--504945); border-radius:2px; z-index:3; @@ -504,7 +519,7 @@ h3,h4,h5,h6{ .favicon-dropdown a{ text-decoration:none; - color:var(--bdae93); + color:var(--heading-and-links); display:block; padding:2px 7px 2px 5px; font-size:13px; @@ -544,7 +559,7 @@ h3,h4,h5,h6{ float:right; width:160px; height:90px; - background:var(--282828); + background:var(--deeper); border:1px solid var(--504945); margin-left:7px; } @@ -590,7 +605,7 @@ h3,h4,h5,h6{ border:1px solid var(--504945); border-radius:2px; text-decoration:none; - color:var(--bdae93); + color:var(--heading-and-links); } .nextpage:hover{ @@ -642,7 +657,7 @@ table tr td:first-child{ table a{ display:block; text-decoration:none; - color:var(--a89984); + color:var(--body); padding:0 10px 0 0; } @@ -657,7 +672,7 @@ table tr a:last-child{ .related a{ padding-bottom:10px; - color:var(--bdae93); + color:var(--heading-and-links); } .related a:hover{ @@ -680,13 +695,13 @@ table tr a:last-child{ bottom:0; width:100%; height:17px; - background:linear-gradient(transparent, var(--1d2021)); + background:linear-gradient(transparent, var(--background)); pointer-events:none; } .web .answer-title{ text-decoration:none; - color:var(--a89984); + color:var(--body); } .web .answer-title a:hover{ @@ -732,8 +747,8 @@ table tr a:last-child{ .web .info-table{ margin:10px 0; font-size:15px; - color:var(--928374); - background:var(--282828); + color:var(--url); + background:var(--deeper); border:1px dashed var(--504945); } @@ -745,17 +760,17 @@ table tr a:last-child{ width:1%; white-space:nowrap; padding-right:17px; - color:var(--a89984); + color:var(--body); } .web .info-table tr:nth-child(even){ - background:var(--1d2021); + background:var(--background); } .web .sublinks{ padding:17px 10px 0; font-size:15px; - color:var(--#928374); + color:var(--#url); } .web .sublinks table td{ @@ -791,7 +806,7 @@ table tr a:last-child{ .web .wiki-head table, .about table{ margin-top:17px; border:1px dashed var(--504945); - background:var(--1d2021); + background:var(--background); } .web .wiki-head table td{ @@ -809,7 +824,7 @@ table tr a:last-child{ } .web .wiki-head tr:nth-child(odd), .about table tr:nth-child(odd){ - background:var(--282828); + background:var(--deeper); } .web .wiki-head .socials{ @@ -822,7 +837,7 @@ table tr a:last-child{ height:80px; padding-right:4px; float:left; - color:var(--bdae93); + color:var(--heading-and-links); text-decoration:none; display:table; } @@ -868,7 +883,7 @@ table tr a:last-child{ white-space:pre; font-family:monospace; background:var(--3c3836); - color:var(--bdae93); + color:var(--heading-and-links); padding:7px; margin:4px 0 13px 0; overflow-x:auto; @@ -879,9 +894,9 @@ table tr a:last-child{ .code-inline{ display:inline; font-family:monospace; - background:var(--282828); - color:var(--bdae93); - border:1px solid var(--928374); + background:var(--deeper); + color:var(--heading-and-links); + border:1px solid var(--url); padding:0 4px; border-radius:2px; } @@ -897,7 +912,7 @@ table tr a:last-child{ } .web .wiki-head a{ - color:var(--bdae93); + color:var(--heading-and-links); } .quote{ @@ -1039,14 +1054,14 @@ table tr a:last-child{ } #popup-image{ - border:1px solid var(--928374); + border:1px solid var(--url); display:block; margin:0 auto; pointer-events:all; width:100%; height:100%; object-fit:contain; - background:var(--282828); + background:var(--deeper); } #popup-status{ @@ -1056,13 +1071,13 @@ table tr a:last-child{ left:0; width:100%; height:35px; - background:var(--1d2021); - border-bottom:1px solid var(--928374); + background:var(--background); + border-bottom:1px solid var(--url); z-index:4; } #popup-bg{ - background:var(--1d2021); + background:var(--background); opacity:.5; position:fixed; top:0; @@ -1137,14 +1152,14 @@ table tr a:last-child{ .web .settings-submit a{ margin-right:17px; - color:var(--bdae93); + color:var(--heading-and-links); } /* About page */ .about a{ - color:var(--bdae93); + color:var(--heading-and-links); } .about h1, .about h2{ @@ -1189,15 +1204,15 @@ table tr a:last-child{ } .instances a{ - color:var(--bdae93); + color:var(--heading-and-links); } .instances tbody tr:nth-child(even){ - background:var(--282828); + background:var(--deeper); } .instances thead{ - outline:1px solid var(--928374); + outline:1px solid var(--url); outline-offset:-1px; background:var(--3c3836); user-select:none; @@ -1234,13 +1249,13 @@ table tr a:last-child{ width:0; height:0; border:6px solid transparent; - border-top:10px solid var(--bdae93); + border-top:10px solid var(--heading-and-links); } .instances .arrow.up{ top:0; border:6px solid transparent; - border-bottom:10px solid var(--bdae93); + border-bottom:10px solid var(--heading-and-links); } .instances th, .instances td{ @@ -1271,8 +1286,8 @@ table tr a:last-child{ } .instances .popup{ - border:1px solid var(--928374); - background:var(--282828); + border:1px solid var(--url); + background:var(--deeper); padding:7px 10px; pointer-events:initial; } @@ -1366,7 +1381,7 @@ table tr a:last-child{ font-size:24px; font-weight:bold; margin-bottom:17px; - color:var(--bdae93); + color:var(--heading-and-links); } .web .answer{ diff --git a/template/about.html b/template/about.html index 6398884..73a43d5 100644 --- a/template/about.html +++ b/template/about.html @@ -24,7 +24,7 @@ Once you're there, click the pencil on the last entry under "Search engines" (it </tr> <tr> <td>URL with %s in place of query</td> - <td>https://4get.ca/web?s=%s</td> + <td>https://4get.itinerariummentis.org/web?s=%s</td> </tr> </table> @@ -72,6 +72,6 @@ I don't know about that second part but if you want to talk to me, just drop me <a href="https://lolcat.ca" rel="dofollow" class="link">Click here to contact me!</a><br><br> -<a href="https://validator.w3.org/nu/?doc=https%3A%2F%2F4get.ca" title="W3 Valid!"> +<a href="https://validator.w3.org/nu/?doc=https%3A%2F%2F4get.itinerariummentis.org" title="W3 Valid!"> <img src="/static/icon/w3html.png" alt="Valid W3C HTML 4.01" width="88" height="31"> </a> diff --git a/template/home.html b/template/home.html index 28799f2..38075a1 100644 --- a/template/home.html +++ b/template/home.html @@ -30,7 +30,7 @@ </form> <a href="settings">Settings</a> • <a href="instances">Instances</a> • <a href="https://git.lolcat.ca/lolcat/4get_news">News</a> • <a href="api.txt">API</a> • <a href="about">About</a> • <a href="https://git.lolcat.ca/lolcat/4get">Source</a> • <a href="https://ko-fi.com/lolcat" rel="noreferrer" target="BLANK">Donate</a> <div class="subtext"> - <a href="https://4get.ca">Clearnet</a> • <a href="http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion">Tor</a> • <a href="https://lolcat.ca">Report a problem</a><br> + <a href="/">Clearnet</a> • <a href="http://4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion">Tor</a> • <a href="https://lolcat.ca">Report a problem</a><br> Running on <b>v{%version%}</b>!! </div> </div> |