summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-02-25 09:51:18 -0500
committerlolcat <will@lolcat.ca>2024-02-25 09:51:18 -0500
commite82e908ece8d1b1a33f0b09f2ac54316d1d2a134 (patch)
tree22a19fc1a523761d6c282d1bf66b56b18f87355d /scraper
parentb7f58a142120964405d27806d1c3c9582f4f75e5 (diff)
google fixes
Diffstat (limited to 'scraper')
-rw-r--r--scraper/google.php29
1 files changed, 28 insertions, 1 deletions
diff --git a/scraper/google.php b/scraper/google.php
index 3cff687..0aba310 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -559,6 +559,7 @@ class google{
}
curl_close($curlproc);
+ echo $data;
return $data;
}
@@ -977,6 +978,11 @@ class google{
"related" => []
];
+ if($this->detect_sorry($html)){
+
+ throw new Exception("Google blocked this 4get instance. Please set up a proxy!");
+ }
+
$this->parsejavascript($html);
//
@@ -2795,7 +2801,10 @@ class google{
throw new Exception("Failed to get search page");
}
- $this->fuckhtml->load($html);
+ if($this->detect_sorry($html)){
+
+ throw new Exception("Google blocked this 4get instance. Please set up a proxy!");
+ }
$out = [
"status" => "ok",
@@ -3609,4 +3618,22 @@ class google{
return rtrim($title, ". \t\n\r\0\x0B");
}
+
+ private function detect_sorry($html){
+
+ $this->fuckhtml->load($html);
+ $detect_sorry =
+ $this->fuckhtml
+ ->getElementsByTagName("title");
+
+ if(
+ isset($detect_sorry[0]) &&
+ $detect_sorry[0]["innerHTML"] == "302 Moved"
+ ){
+
+ return true;
+ }
+
+ return false;
+ }
}