summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-04-27 14:25:39 -0400
committerlolcat <will@lolcat.ca>2024-04-27 14:25:39 -0400
commitf2eb164d40340cf221cb7ad457ab35492da4d308 (patch)
treed896523316590bd961522a12fe84aca1fc3f8db9 /scraper
parent81dc93802c32aa6f593a12b3f2efbe38dc9e31f7 (diff)
qwant gibberish check
Diffstat (limited to 'scraper')
-rw-r--r--scraper/qwant.php23
1 files changed, 23 insertions, 0 deletions
diff --git a/scraper/qwant.php b/scraper/qwant.php
index a8b69fe..7f441e5 100644
--- a/scraper/qwant.php
+++ b/scraper/qwant.php
@@ -453,6 +453,8 @@ class qwant{
switch($item["type"]){ // ignores ads
case "web":
+
+ $first_iteration = true;
foreach($item["items"] as $result){
if(isset($result["thumbnailUrl"])){
@@ -483,6 +485,25 @@ class qwant{
}
}
+ // detect gibberish results
+ if(
+ $first_iteration &&
+ preg_match(
+ "/^" .
+ preg_quote(
+ $this->trimdots(
+ $result["source"]
+ ),
+ "/"
+ ) .
+ "/",
+ $result["url"]
+ ) !== 1
+ ){
+
+ throw new Exception("Qwant returned gibberish results");
+ }
+
$out["web"][] = [
"title" => $this->trimdots($result["title"]),
"description" => $this->trimdots($result["desc"]),
@@ -493,6 +514,8 @@ class qwant{
"sublink" => $sublinks,
"table" => []
];
+
+ $first_iteration = false;
}
break;