From 24e95482977c03309bcb1a0c53e343c016e7f2d0 Mon Sep 17 00:00:00 2001 From: lolcat Date: Wed, 12 Jun 2024 22:41:02 -0400 Subject: i fucking hate google --- data/config.php | 8 +- lib/frontend.php | 2 +- lib/fuckhtml.php | 12 +- scraper/google.php | 6248 +++++++++++++++++++++++++++++--------------------- scraper/spotify.json | 1 - scraper/yep.php | 2 +- 6 files changed, 3707 insertions(+), 2566 deletions(-) delete mode 100644 scraper/spotify.json diff --git a/data/config.php b/data/config.php index 13be0f4..6cde87d 100644 --- a/data/config.php +++ b/data/config.php @@ -101,12 +101,16 @@ class config{ "https://search.milivojevic.in.rs", "https://4get.snine.nl", "https://4get.datura.network", - "https://4get.neco.lol" + "https://4get.neco.lol", + "https://4get.lol", + "https://4get.ch", + "https://4get.edmateo.site", + "https://4get.sudovanilla.org" ]; // Default user agent to use for scraper requests. Sometimes ignored to get specific webpages // Changing this might break things. - const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0"; + const USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:127.0) Gecko/20100101 Firefox/127.0"; // Proxy pool assignments for each scraper // false = Use server's raw IP diff --git a/lib/frontend.php b/lib/frontend.php index 1c3eb09..5579635 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -1302,7 +1302,7 @@ class frontend{ return htmlspecialchars($image); } - return "/proxy?i=" . urlencode($image) . "&s=" . $format; + return "/proxy.php?i=" . urlencode($image) . "&s=" . $format; } public function htmlnextpage($gets, $npt, $page){ diff --git a/lib/fuckhtml.php b/lib/fuckhtml.php index f3a6efe..8b5eba5 100644 --- a/lib/fuckhtml.php +++ b/lib/fuckhtml.php @@ -240,7 +240,17 @@ class fuckhtml{ public function getElementsByFuzzyAttributeValue(string $name, string $value, $collection = null){ $elems = $this->getElementsByAttributeName($name, $collection); - $value = explode(" ", $value); + $value = + explode( + " ", + trim( + preg_replace( + '/ +/', + " ", + $value + ) + ) + ); $return = []; diff --git a/scraper/google.php b/scraper/google.php index 50bcc22..185ad0e 100644 --- a/scraper/google.php +++ b/scraper/google.php @@ -1,14 +1,9 @@ [ // gl= + "country" => [ // gl= (image: cr=countryAF) "display" => "Country", "option" => [ "any" => "Instance's country", @@ -272,47 +267,6 @@ class google{ "yes" => "Yes", // safe=active "no" => "No" // safe=off ] - ], - "lang" => [ // lr= (prefix lang with "lang_") - "display" => "Language", - "option" => [ - "any" => "Any language", - "ar" => "Arabic", - "bg" => "Bulgarian", - "ca" => "Catalan", - "cs" => "Czech", - "da" => "Danish", - "de" => "German", - "el" => "Greek", - "en" => "English", - "es" => "Spanish", - "et" => "Estonian", - "fi" => "Finnish", - "fr" => "French", - "hr" => "Croatian", - "hu" => "Hungarian", - "id" => "Indonesian", - "is" => "Icelandic", - "it" => "Italian", - "iw" => "Hebrew", - "ja" => "Japanese", - "ko" => "Korean", - "lt" => "Lithuanian", - "lv" => "Latvian", - "nl" => "Dutch", - "no" => "Norwegian", - "pl" => "Polish", - "pt" => "Portuguese", - "ro" => "Romanian", - "ru" => "Russian", - "sk" => "Slovak", - "sl" => "Slovenian", - "sr" => "Serbian", - "sv" => "Swedish", - "tr" => "Turkish", - "zh-CN" => "Chinese (Simplified)", - "zh-TW" => "Chinese (Traditional)" - ] ] ]; @@ -322,13 +276,61 @@ class google{ return array_merge( $base, [ - "newer" => [ // &sort=review-date:r:20090301:20090430 + "lang" => [ // lr= (prefix lang with "lang_") + "display" => "Language", + "option" => [ + "any" => "Any language", + "ar" => "Arabic", + "bg" => "Bulgarian", + "ca" => "Catalan", + "cs" => "Czech", + "da" => "Danish", + "de" => "German", + "el" => "Greek", + "en" => "English", + "es" => "Spanish", + "et" => "Estonian", + "fi" => "Finnish", + "fr" => "French", + "hr" => "Croatian", + "hu" => "Hungarian", + "id" => "Indonesian", + "is" => "Icelandic", + "it" => "Italian", + "iw" => "Hebrew", + "ja" => "Japanese", + "ko" => "Korean", + "lt" => "Lithuanian", + "lv" => "Latvian", + "nl" => "Dutch", + "no" => "Norwegian", + "pl" => "Polish", + "pt" => "Portuguese", + "ro" => "Romanian", + "ru" => "Russian", + "sk" => "Slovak", + "sl" => "Slovenian", + "sr" => "Serbian", + "sv" => "Swedish", + "tr" => "Turkish", + "zh-CN" => "Chinese (Simplified)", + "zh-TW" => "Chinese (Traditional)" + ] + ], + "newer" => [ // tbs "display" => "Newer than", "option" => "_DATE" ], "older" => [ "display" => "Older than", "option" => "_DATE" + ], + "spellcheck" => [ + "display" => "Spellcheck", + "option" => [ + "yes" => "Yes", + "no" => "No" + ] ] ] ); @@ -338,7 +340,7 @@ class google{ return array_merge( $base, [ - "time" => [ // tbs=qrd: + "time" => [ // tbs=qdr: