summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-05-23 08:58:46 -0400
committerlolcat <will@lolcat.ca>2024-05-23 08:58:46 -0400
commit92d0102738c5dcdab68ce648f7ade36d4ec90bdb (patch)
tree70d02b60d42dc709c82733a63876d783c2bbe1c0
parentbcb5c4d5195b645584a8ebf98b61b86533f7a5b7 (diff)
yep scraper cloudflare error handling
-rw-r--r--scraper/yep.php54
1 files changed, 38 insertions, 16 deletions
diff --git a/scraper/yep.php b/scraper/yep.php
index 5be3806..eba4214 100644
--- a/scraper/yep.php
+++ b/scraper/yep.php
@@ -6,6 +6,9 @@ class yep{
include "lib/backend.php";
$this->backend = new backend("yep");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
}
public function getfilters($page){
@@ -254,8 +257,10 @@ class yep{
["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
+ "Accept-Encoding: gzip, deflate, br, zstd",
+ "Connection: keep-alive",
"DNT: 1",
+ "Priority: u=1",
"Origin: https://yep.com",
"Referer: https://yep.com/",
"Connection: keep-alive",
@@ -265,6 +270,9 @@ class yep{
"TE: trailers"]
);
+ // http3 bypass
+ curl_setopt($curlproc, CURLOPT_HTTP_VERSION, 30);
+
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
@@ -324,27 +332,41 @@ class yep{
// https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
$json =
- json_decode(
- $this->get(
- $this->backend->get_ip(),
- "https://api.yep.com/fs/2/search",
- [
- "client" => "web",
- "gl" => $country == "all" ? $country : strtoupper($country),
- "limit" => "99999",
- "no_correct" => "false",
- "q" => $search,
- "safeSearch" => $nsfw,
- "type" => "web"
- ]
- ),
- true
+ $this->get(
+ $this->backend->get_ip(),
+ "https://api.yep.com/fs/2/search",
+ [
+ "client" => "web",
+ "gl" => $country == "all" ? $country : strtoupper($country),
+ "limit" => "99999",
+ "no_correct" => "false",
+ "q" => $search,
+ "safeSearch" => $nsfw,
+ "type" => "web"
+ ]
);
}catch(Exception $error){
throw new Exception("Failed to fetch JSON");
}
+ // detect cloudflare page
+ $this->fuckhtml->load($json);
+
+ if(
+ count(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "cf-wrapper",
+ "div"
+ )
+ ) !== 0
+ ){
+
+ throw new Exception("Blocked by Cloudflare");
+ }
+
+ $json = json_decode($json, true);
//$json = json_decode(file_get_contents("scraper/yep.json"), true);
if($json === null){