summaryrefslogtreecommitdiff
path: root/scraper/yandex.php
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2023-11-07 08:04:56 -0500
committerlolcat <will@lolcat.ca>2023-11-07 08:04:56 -0500
commit785452873f0ee0a27fc157b482b7551560f0282d (patch)
tree4c70e240031ed3868425ca683c83ebfd378a9159 /scraper/yandex.php
parent64b090ee058953aed2246967332c7f0b6623cd8f (diff)
fix typo
Diffstat (limited to 'scraper/yandex.php')
-rw-r--r--scraper/yandex.php85
1 files changed, 59 insertions, 26 deletions
diff --git a/scraper/yandex.php b/scraper/yandex.php
index 65abe73..7335edc 100644
--- a/scraper/yandex.php
+++ b/scraper/yandex.php
@@ -10,11 +10,11 @@ class yandex{
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("yandex");
+ include "lib/backend.php";
+ // backend included in the scraper functions
}
- private function get($url, $get = [], $nsfw){
+ private function get($proxy, $url, $get = [], $nsfw){
$curlproc = curl_init();
@@ -32,7 +32,7 @@ class yandex{
}
$headers =
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding: gzip",
"Accept-Language: en-US,en;q=0.5",
@@ -54,6 +54,8 @@ class yandex{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -207,6 +209,8 @@ class yandex{
public function web($get){
+ $this->backend = new backend("yandex_w");
+
// has captcha
// https://yandex.com/search/touch/?text=lol&app_platform=android&appsearch_header=1&ui=webmobileapp.yandex&app_version=23070603&app_id=ru.yandex.searchplugin&search_source=yandexcom_touch_native&clid=2218567
@@ -215,10 +219,11 @@ class yandex{
if($get["npt"]){
- $npt = $this->nextpage->get($get["npt"], "web");
+ [$npt, $proxy] = $this->backend->get($get["npt"], "web");
$html =
$this->get(
+ $proxy,
"https://yandex.com" . $npt,
[],
"yes"
@@ -226,6 +231,12 @@ class yandex{
}else{
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
$lang = $get["lang"];
$older = $get["older"];
$newer = $get["newer"];
@@ -269,6 +280,7 @@ class yandex{
try{
$html =
$this->get(
+ $proxy,
"https://yandex.com/search/site/",
$params,
"yes"
@@ -313,7 +325,7 @@ class yandex{
if(count($npt) !== 0){
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
$this->fuckhtml
->getTextContent(
$npt
@@ -321,7 +333,8 @@ class yandex{
["attributes"]
["href"]
),
- "web"
+ "web",
+ $proxy
);
}
@@ -386,17 +399,18 @@ class yandex{
public function image($get){
+ $this->backend = new backend("yandex_i");
+
if($get["npt"]){
- $request =
- json_decode(
- $this->nextpage->get(
- $get["npt"],
- "images"
- ),
- true
+ [$request, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
);
+ $request = json_decode($request, true);
+
$nsfw = $request["nsfw"];
unset($request["nsfw"]);
}else{
@@ -407,6 +421,7 @@ class yandex{
throw new Exception("Search term is empty!");
}
+ $proxy = $this->backend->get_ip();
$nsfw = $get["nsfw"];
$time = $get["time"];
$size = $get["size"];
@@ -611,9 +626,11 @@ class yandex{
try{
$json = $this->get(
+ $proxy,
"https://yandex.com/images/search",
$request,
- $nsfw
+ $nsfw,
+ "yandex_i"
);
}catch(Exception $err){
@@ -676,7 +693,12 @@ class yandex{
$request["p"] = 1;
}
- $out["npt"] = $this->nextpage->store(json_encode($request), "images");
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($request),
+ "images",
+ $proxy
+ );
}
// get search results
@@ -744,21 +766,29 @@ class yandex{
public function video($get){
+ $this->backend = new backend("yandex_v");
+
if($get["npt"]){
- $params =
- json_decode(
- $this->nextpage->get(
- $get["npt"],
- "web"
- ),
- true
+ [$params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "video"
);
+ $params = json_decode($params, true);
+
$nsfw = $params["nsfw"];
unset($params["nsfw"]);
}else{
+
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
$nsfw = $get["nsfw"];
$time = $get["time"];
$duration = $get["duration"];
@@ -865,9 +895,11 @@ class yandex{
try{
$json =
$this->get(
+ $proxy,
"https://yandex.com/video/search",
$params,
- $nsfw
+ $nsfw,
+ "yandex_v"
);
}catch(Exception $error){
@@ -926,9 +958,10 @@ class yandex{
$params["p"] = "1";
$params["nsfw"] = $nsfw;
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
json_encode($params),
- "web"
+ "video",
+ $proxy
);
}