summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-03-20 10:59:51 -0400
committerlolcat <will@lolcat.ca>2024-03-20 10:59:51 -0400
commit635b2386d4845d7360ea5d1692090dae1dc02e68 (patch)
tree860bdb1f8e2b82e91c64dc257dda862890217db1
parentc4c008c1921f2efd8019d96af4d1897cc817c18c (diff)
fixed google not working in yurop
-rw-r--r--data/config.php2
-rw-r--r--scraper/google.php34
2 files changed, 30 insertions, 6 deletions
diff --git a/data/config.php b/data/config.php
index fd9071e..0a73864 100644
--- a/data/config.php
+++ b/data/config.php
@@ -92,7 +92,7 @@ class config{
const PROXY_DDG = false; // duckduckgo
const PROXY_BRAVE = false;
const PROXY_FB = false; // facebook
- const PROXY_GOOGLE = false;
+ const PROXY_GOOGLE = "webshare_google";
const PROXY_MARGINALIA = false;
const PROXY_MOJEEK = false;
const PROXY_SC = false; // soundcloud
diff --git a/scraper/google.php b/scraper/google.php
index d177424..1485436 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -522,6 +522,7 @@ class google{
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
"DNT: 1",
+ "Cookie: SOCS=CAESNQgCEitib3FfaWRlbnRpdHlmcm9udGVuZHVpc2VydmVyXzIwMjQwMzE3LjA4X3AwGgJlbiAEGgYIgM7orwY",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
@@ -977,9 +978,9 @@ class google{
"related" => []
];
- if($this->detect_sorry($html)){
+ if($error = $this->detect_sorry($html)){
- throw new Exception("Google blocked this 4get instance. Please set up a proxy!");
+ throw new Exception($error);
}
$this->parsejavascript($html);
@@ -2813,9 +2814,9 @@ class google{
throw new Exception("Failed to get search page");
}
- if($this->detect_sorry($html)){
+ if($error = $this->detect_sorry($html)){
- throw new Exception("Google blocked this 4get instance. Please set up a proxy!");
+ throw new Exception($error);
}
$out = [
@@ -3649,7 +3650,30 @@ class google{
$detect_sorry[0]["innerHTML"] == "302 Moved"
){
- return true;
+ // may be consent.google.com in europe or /sorry captcha page
+ $url =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ if(
+ strpos(
+ parse_url(
+ $this->fuckhtml
+ ->getTextContent(
+ $url[0]["attributes"]["href"]
+ ),
+ PHP_URL_PATH
+ ),
+ "/sorry"
+ ) === 0
+ ){
+
+ // found /sorry
+ return "Google blocked this 4get instance. Please setup a proxy!";
+ }
+
+ // found consent.google, should not happen anymore
+ return "Google served a GPDR consent form. This should not happen, please report if you encounter this message";
}
return false;