summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-03-14 22:33:01 -0400
committerlolcat <will@lolcat.ca>2024-03-14 22:33:01 -0400
commitc4c008c1921f2efd8019d96af4d1897cc817c18c (patch)
tree4a2585aea57a2b3cc8899baa89fa4c8d53e77215
parentd03d25109199fe8f8c510851d249a3a43d5b296d (diff)
google hotfix
-rw-r--r--scraper/google.php61
1 files changed, 40 insertions, 21 deletions
diff --git a/scraper/google.php b/scraper/google.php
index b0e4ded..d177424 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -3,7 +3,6 @@
// todo:
// aliexpress tracking links
// enhanced msx notice
-// detect "sorry" page
class google{
@@ -654,6 +653,7 @@ class google{
throw new Exception("Failed to get HTML");
}
+ //$html = file_get_contents("scraper/google.html");
}
return $this->parsepage($html, "web", $search, $ip);
@@ -2322,11 +2322,11 @@ class google{
->getElementsByClassName(
$this->findstyles(
[
- "font-weight" => "bold",
- "font-size" => "16px",
"color" => "#000",
+ "font-size" => "16px",
+ "font-weight" => "bold",
"margin" => "0",
- "padding" => "12px 16px 0 16px"
+ "padding" => "12px 16px 0px 16px"
],
self::is_class
),
@@ -2529,6 +2529,19 @@ class google{
$description = [];
+ $pcitems =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "pcitem",
+ "div"
+ );
+
+ if(count($pcitems) !== 0){
+
+ // ignore elements with carousels in them
+ continue;
+ }
+
foreach($as as $a){
//
@@ -3075,27 +3088,38 @@ class google{
private function findstyles($rules, $is){
- ksort($rules);
+ $c = count($rules);
- foreach($this->computedstyle as $stylename => $styles){
+ foreach($this->computedstyle as $classname => $styles){
- if($styles == $rules){
+ if($classname[0] != $is){
- preg_match(
- '/\\' . $is . '([^ .]+)/',
- $stylename,
- $out
- );
+ // not a class, skip
+ continue;
+ }
+
+ $i = 0;
+ foreach($styles as $stylename => $stylevalue){
- if(count($out) === 2){
+ if(
+ isset($rules[$stylename]) &&
+ $rules[$stylename] == $stylevalue
+ ){
- return $out[1];
+ $i++;
+ }else{
+
+ continue 2;
}
+ }
+
+ if($c === $i){
- return false;
+ return ltrim($classname, $is);
}
}
+ // fail, did not find classname.
return false;
}
@@ -3103,7 +3127,7 @@ class google{
// get style tags
preg_match_all(
- '/([^{]+){([^}]+)}/',
+ '/([^{]+){([^}]*)}/',
$style,
$tags_regex
);
@@ -3139,11 +3163,6 @@ class google{
}
}
- foreach($tags as &$value){
-
- ksort($value);
- }
-
return $tags;
}