summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2023-11-07 08:04:56 -0500
committerlolcat <will@lolcat.ca>2023-11-07 08:04:56 -0500
commit785452873f0ee0a27fc157b482b7551560f0282d (patch)
tree4c70e240031ed3868425ca683c83ebfd378a9159 /scraper
parent64b090ee058953aed2246967332c7f0b6623cd8f (diff)
fix typo
Diffstat (limited to 'scraper')
-rw-r--r--scraper/brave.php338
-rw-r--r--scraper/ddg.php368
-rw-r--r--scraper/facebook.php5
-rw-r--r--scraper/ftm.php43
-rw-r--r--scraper/google.php84
-rw-r--r--scraper/imgur.php37
-rw-r--r--scraper/marginalia.php17
-rw-r--r--scraper/mojeek.php427
-rw-r--r--scraper/pinterest.php5
-rw-r--r--scraper/sc.php53
-rw-r--r--scraper/wiby.php26
-rw-r--r--scraper/yandex.php85
-rw-r--r--scraper/yep.php16
-rw-r--r--scraper/youtube.php37
14 files changed, 938 insertions, 603 deletions
diff --git a/scraper/brave.php b/scraper/brave.php
index 93256a8..91e3f9e 100644
--- a/scraper/brave.php
+++ b/scraper/brave.php
@@ -7,8 +7,8 @@ class brave{
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("brave");
+ include "lib/backend.php";
+ $this->backend = new backend("brave");
}
public function getfilters($page){
@@ -138,13 +138,20 @@ class brave{
"maybe" => "Maybe",
"no" => "No"
]
+ ],
+ "spellcheck" => [
+ "display" => "Spellcheck",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
]
];
break;
}
}
- private function get($url, $get = [], $nsfw, $country){
+ private function get($proxy, $url, $get = [], $nsfw, $country){
switch($nsfw){
@@ -159,7 +166,7 @@ class brave{
}
$headers = [
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -190,11 +197,12 @@ class brave{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
-
throw new Exception(curl_error($curlproc));
}
@@ -207,7 +215,9 @@ class brave{
if($get["npt"]){
// get next page data
- $q = json_decode($this->nextpage->get($get["npt"], "web"), true);
+ [$q, $proxy] = $this->backend->get($get["npt"], "web");
+
+ $q = json_decode($q, true);
$search = $q["q"];
$q["spellcheck"] = "0";
@@ -222,7 +232,6 @@ class brave{
// get _GET data instead
$search = $get["s"];
-
if(strlen($search) === 0){
throw new Exception("Search term is empty!");
@@ -230,9 +239,10 @@ class brave{
if(strlen($search) > 2048){
- throw new Exception("Search query is too long!");
+ throw new Exception("Search term is too long!");
}
+ $proxy = $this->backend->get_ip();
$nsfw = $get["nsfw"];
$country = $get["country"];
$older = $get["older"];
@@ -288,6 +298,7 @@ class brave{
try{
$html =
$this->get(
+ $proxy,
"https://search.brave.com/search",
$q,
$nsfw,
@@ -361,9 +372,10 @@ class brave{
$q["country"] = $country;
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
json_encode($q),
- "web"
+ "web",
+ $proxy
);
}
}
@@ -759,7 +771,9 @@ class brave{
"description" =>
isset($result["review"]["description"]) ?
$this->limitstrlen(
- $result["review"]["description"]
+ strip_tags(
+ $result["review"]["description"]
+ )
) :
$this->titledots(
$this->fuckhtml
@@ -839,6 +853,32 @@ class brave{
"value" => $this->titledots($info["long_desc"])
];
}
+
+ // parse ratings
+ if(
+ isset($info["ratings"]) &&
+ $info["ratings"] != "void 0"
+ ){
+
+ $description[] = [
+ "type" => "title",
+ "value" => "Ratings"
+ ];
+
+ foreach($info["ratings"] as $rating){
+
+ $description[] = [
+ "type" => "link",
+ "url" => $rating["profile"]["url"],
+ "value" => $rating["profile"]["name"]
+ ];
+
+ $description[] = [
+ "type" => "text",
+ "value" => ": " . $rating["ratingValue"] . "/" . $rating["bestRating"] . "\n"
+ ];
+ }
+ }
}
$table = [];
@@ -908,9 +948,9 @@ class brave{
$out["video"][] = [
"title" => $this->titledots($video["title"]),
"description" => $this->titledots($video["description"]),
- "date" => isset($video["age"]) ? strtotime($video["age"]) : null,
- "duration" => isset($video["video"]["duration"]) ? $this->hms2int($video["video"]["duration"]) : null,
- "views" => null,
+ "date" => isset($video["age"]) && $video["age"] != "void 0" ? strtotime($video["age"]) : null,
+ "duration" => isset($video["video"]["duration"]) && $video["video"]["duration"] != "void 0" ? $this->hms2int($video["video"]["duration"]) : null,
+ "views" => isset($video["video"]["views"]) && $video["video"]["views"] != "void 0" ? (int)$video["video"]["views"] : null,
"thumb" =>
isset($video["thumbnail"]["src"]) ?
[
@@ -1008,37 +1048,75 @@ class brave{
public function news($get){
- $search = $get["s"];
- if(strlen($search) === 0){
+ if($get["npt"]){
- throw new Exception("Search term is empty!");
- }
-
- $nsfw = $get["nsfw"];
- $country = $get["country"];
-
- if(strlen($search) > 2048){
+ [$req, $proxy] = $this->backend->get($get["npt"], "news");
- throw new Exception("Search query is too long!");
- }
- /*
- $handle = fopen("scraper/brave-news.html", "r");
- $html = fread($handle, filesize("scraper/brave-news.html"));
- fclose($handle);*/
- try{
- $html =
- $this->get(
- "https://search.brave.com/news",
- [
- "q" => $search
- ],
- $nsfw,
- $country
- );
+ $req = json_decode($req, true);
- }catch(Exception $error){
+ $search = $req["q"];
+ $country = $req["country"];
+ $nsfw = $req["nsfw"];
+ $offset = $req["offset"];
+ $spellcheck = $req["spellcheck"];
- throw new Exception("Could not fetch search page");
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/news",
+ [
+ "q" => $search,
+ "offset" => $offset,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+
+ }else{
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search term is too long!");
+ }
+
+ $proxy = $this->backend->get_ip();
+ $nsfw = $get["nsfw"];
+ $country = $get["country"];
+ $spellcheck = $get["spellcheck"] == "yes" ? "1" : "0";
+
+ /*
+ $handle = fopen("scraper/brave-news.html", "r");
+ $html = fread($handle, filesize("scraper/brave-news.html"));
+ fclose($handle);*/
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/news",
+ [
+ "q" => $search,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
}
$out = [
@@ -1050,6 +1128,17 @@ class brave{
// load html
$this->fuckhtml->load($html);
+ // get npt
+ $out["npt"] =
+ $this->generatenextpagetoken(
+ $search,
+ $nsfw,
+ $country,
+ $spellcheck,
+ "news",
+ $proxy
+ );
+
$news =
$this->fuckhtml
->getElementsByClassName(
@@ -1183,8 +1272,19 @@ class brave{
public function image($get){
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search term is too long!");
+ }
+
$country = $get["country"];
$nsfw = $get["nsfw"];
+ $spellcheck = $get["spellcheck"] == "yes" ? "1" : "0";
$out = [
"status" => "ok",
@@ -1195,9 +1295,11 @@ class brave{
try{
$html =
$this->get(
+ $this->backend->get_ip(), // no nextpage right now, pass proxy directly
"https://search.brave.com/images",
[
- "q" => $search
+ "q" => $search,
+ "spellcheck" => $spellcheck
],
$nsfw,
$country
@@ -1261,9 +1363,75 @@ class brave{
public function video($get){
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
+ if($get["npt"]){
+
+ [$npt, $proxy] = $this->backend->get($get["npt"], "videos");
+
+ $npt = json_decode($npt, true);
+ $search = $npt["q"];
+ $offset = $npt["offset"];
+ $spellcheck = $npt["spellcheck"];
+ $country = $npt["country"];
+ $nsfw = $npt["nsfw"];
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/videos",
+ [
+ "q" => $search,
+ "offset" => $offset,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search term is too long!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $spellcheck = $get["spellcheck"] == "yes" ? "1" : "0";
+
+ $proxy = $this->backend->get_ip();
+
+ try{
+ $html =
+ $this->get(
+ $proxy,
+ "https://search.brave.com/videos",
+ [
+ "q" => $search,
+ "spellcheck" => $spellcheck
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+ }
+
+ $this->fuckhtml->load($html);
$out = [
"status" => "ok",
@@ -1275,21 +1443,17 @@ class brave{
"reel" => []
];
- try{
- $html =
- $this->get(
- "https://search.brave.com/videos",
- [
- "q" => $search
- ],
- $nsfw,
- $country
- );
-
- }catch(Exception $error){
-
- throw new Exception("Could not fetch search page");
- }
+ // get npt
+ $out["npt"] =
+ $this->generatenextpagetoken(
+ $search,
+ $nsfw,
+ $country,
+ $spellcheck,
+ "videos",
+ $proxy
+ );
+
/*
$handle = fopen("scraper/brave-video.html", "r");
$html = fread($handle, filesize("scraper/brave-video.html"));
@@ -1606,7 +1770,7 @@ class brave{
$data["table"][trim($html[0])] = trim($html[1]);
}
}
-
+ /*
private function getimagelinkfromstyle($thumb){
$thumb =
@@ -1646,13 +1810,13 @@ class brave{
"url" => $url,
"ratio" => "16:9"
];
- }
+ }*/
private function limitstrlen($text){
return explode("\n", wordwrap($text, 300, "\n"))[0];
}
-
+ /*
private function limitwhitespace($text){
return
@@ -1661,7 +1825,7 @@ class brave{
" ",
$text
);
- }
+ }*/
private function titledots($title){
@@ -1678,6 +1842,52 @@ class brave{
return trim($title);
}
+ private function generatenextpagetoken($q, $nsfw, $country, $spellcheck, $page, $proxy){
+
+ $nextpage =
+ $this->fuckhtml
+ ->getElementsByClassName("btn", "a");
+
+ if(count($nextpage) !== 0){
+
+ $nextpage =
+ $nextpage[count($nextpage) - 1];
+
+ if(
+ strtolower(
+ $this->fuckhtml
+ ->getTextContent(
+ $nextpage
+ )
+ ) == "next"
+ ){
+
+ preg_match(
+ '/offset=([0-9]+)/',
+ $this->fuckhtml->getTextContent($nextpage["attributes"]["href"]),
+ $nextpage
+ );
+
+ return
+ $this->backend->store(
+ json_encode(
+ [
+ "q" => $q,
+ "offset" => (int)$nextpage[1],
+ "nsfw" => $nsfw,
+ "country" => $country,
+ "spellcheck" => $spellcheck
+ ]
+ ),
+ $page,
+ $proxy
+ );
+ }
+ }
+
+ return null;
+ }
+
private function unshiturl($url){
// https://imgs.search.brave.com/XFnbR8Sl7ge82MBDEH7ju0UHImRovMVmQ2qnDvgNTuA/rs:fit:844:225:1/g:ce/aHR0cHM6Ly90c2U0/Lm1tLmJpbmcubmV0/L3RoP2lkPU9JUC54/UWotQXU5N2ozVndT/RDJnNG9BNVhnSGFF/SyZwaWQ9QXBp.jpeg
diff --git a/scraper/ddg.php b/scraper/ddg.php
index 1ce8e18..2d737ba 100644
--- a/scraper/ddg.php
+++ b/scraper/ddg.php
@@ -4,8 +4,11 @@ class ddg{
public function __construct(){
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("ddg");
+ include "lib/backend.php";
+ $this->backend = new backend("ddg");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
}
/*
@@ -14,7 +17,7 @@ class ddg{
private const req_web = 0;
private const req_xhr = 1;
- private function get($url, $get = [], $reqtype = self::req_web){
+ private function get($proxy, $url, $get = [], $reqtype = self::req_web){
$curlproc = curl_init();
@@ -28,7 +31,7 @@ class ddg{
switch($reqtype){
case self::req_web:
$headers =
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding: gzip",
"Accept-Language: en-US,en;q=0.5",
@@ -43,7 +46,7 @@ class ddg{
case self::req_xhr:
$headers =
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Encoding: gzip",
"Accept-Language: en-US,en;q=0.5",
@@ -57,6 +60,8 @@ class ddg{
break;
}
+ $this->backend->assign_proxy($curlproc, $proxy);
+
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
@@ -69,7 +74,6 @@ class ddg{
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
-
throw new Exception(curl_error($curlproc));
}
@@ -541,9 +545,11 @@ class ddg{
public function web($get){
+ $proxy = null;
+
if($get["npt"]){
- $jsgrep = $this->nextpage->get($get["npt"], "web");
+ [$jsgrep, $proxy] = $this->backend->get($get["npt"], "web");
$extendedsearch = false;
$inithtml = "";
@@ -555,6 +561,7 @@ class ddg{
throw new Exception("Search term is empty!");
}
+ $proxy = $this->backend->get_ip();
$country = $get["country"];
$nsfw = $get["nsfw"];
$older = $get["older"];
@@ -614,9 +621,9 @@ class ddg{
/*
Get html
*/
- // https://duckduckgo.com/?q=minecraft&kz=1&k1=-1&kp=-2
try{
$inithtml = $this->get(
+ $proxy,
"https://duckduckgo.com/",
$get_filters
);
@@ -643,6 +650,7 @@ class ddg{
try{
$js = $this->get(
+ $proxy,
"https://links.duckduckgo.com" . $jsgrep,
[],
ddg::req_xhr
@@ -692,6 +700,7 @@ class ddg{
// get definition
$wordnikjs = $this->get(
+ $proxy,
"https://duckduckgo.com/js/spice/dictionary/definition/" . $wordnik,
[],
ddg::req_xhr
@@ -725,6 +734,7 @@ class ddg{
$wordnikaudio_json =
json_decode(
$this->get(
+ $proxy,
"https://duckduckgo.com/js/spice/dictionary/audio/" . $wordnik,
[],
ddg::req_xhr
@@ -922,6 +932,7 @@ class ddg{
try{
$stackjs = $this->get(
+ $proxy,
"https://duckduckgo.com" . $stack,
[],
ddg::req_xhr
@@ -944,7 +955,7 @@ class ddg{
$out["answer"][] = [
"title" => $stackjson["Heading"],
- "description" => $this->htmltoarray($stackjson["Abstract"]),
+ "description" => $this->stackoverflow_parse($stackjson["Abstract"]),
"url" => str_replace(["http://", "ddg"], ["https://", ""], $stackjson["AbstractURL"]),
"thumb" => null,
"table" => [],
@@ -973,6 +984,7 @@ class ddg{
try{
$lyricsjs = $this->get(
+ $proxy,
"https://duckduckgo.com" . $lyrics,
[],
ddg::req_xhr
@@ -1166,13 +1178,13 @@ class ddg{
if(isset($answers[$i]["data"]["AbstractText"]) && !empty($answers[$i]["data"]["AbstractText"])){
- $description = $this->htmltoarray($answers[$i]["data"]["AbstractText"]);
+ $description = $this->stackoverflow_parse($answers[$i]["data"]["AbstractText"]);
}elseif(isset($answers[$i]["data"]["Abstract"]) && !empty($answers[$i]["data"]["Abstract"])){
- $description = $this->htmltoarray($answers[$i]["data"]["Abstract"]);
+ $description = $this->stackoverflow_parse($answers[$i]["data"]["Abstract"]);
}elseif(isset($answers[$i]["data"]["Answer"]) && !empty($answers[$i]["data"]["Answer"])){
- $description = $this->htmltoarray($answers[$i]["data"]["Answer"]);
+ $description = $this->stackoverflow_parse($answers[$i]["data"]["Answer"]);
}else{
$description = [];
@@ -1310,6 +1322,7 @@ class ddg{
$description = [];
$shitcoinjs = $this->get(
+ $proxy,
"https://duckduckgo.com/js/spice/cryptocurrency/{$shitcoins[1]}/{$shitcoins[2]}/1",
[],
ddg::req_xhr
@@ -1408,6 +1421,7 @@ class ddg{
try{
$currencyjs = $this->get(
+ $proxy,
"https://duckduckgo.com/js/spice/currency/{$amount}/" . strtolower($currencies[1]) . "/" . strtolower($currencies[2]),
[],
ddg::req_xhr
@@ -1607,7 +1621,7 @@ class ddg{
// store next page token
if(isset($web[$i]["n"])){
- $out["npt"] = $this->nextpage->store($web[$i]["n"] . "&biaexp=b&eslexp=a&litexp=c&msvrtexp=b&wrap=1", "web");
+ $out["npt"] = $this->backend->store($web[$i]["n"] . "&biaexp=b&eslexp=a&litexp=c&msvrtexp=b&wrap=1", "web", $proxy);
continue;
}
@@ -1874,10 +1888,11 @@ class ddg{
if($get["npt"]){
- $npt = $this->nextpage->get($get["npt"], "images");
+ [$npt, $proxy] = $this->backend->get($get["npt"], "images");
try{
$json = json_decode($this->get(
+ $proxy,
"https://duckduckgo.com/i.js?" . $npt,
[],
ddg::req_xhr
@@ -1895,6 +1910,7 @@ class ddg{
throw new Exception("Search term is empty!");
}
+ $proxy = $this->backend->get_ip();
$country = $get["country"];
$nsfw = $get["nsfw"];
$date = $get["date"];
@@ -1934,6 +1950,7 @@ class ddg{
try{
$html = $this->get(
+ $proxy,
"https://duckduckgo.com",
$get_filters,
ddg::req_web
@@ -1980,6 +1997,7 @@ class ddg{
try{
$json = json_decode($this->get(
+ $proxy,
"https://duckduckgo.com/i.js",
$js_params,
ddg::req_xhr
@@ -2005,10 +2023,11 @@ class ddg{
}
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
explode("?", $json["next"])[1] . "&vqd=" .
$vqd,
- "images"
+ "images",
+ $proxy
);
}
@@ -2046,10 +2065,11 @@ class ddg{
if($get["npt"]){
- $npt = $this->nextpage->get($get["npt"], "videos");
+ [$npt, $proxy] = $this->backend->get($get["npt"], "videos");
try{
$json = json_decode($this->get(
+ $proxy,
"https://duckduckgo.com/v.js?" .
$npt,
[],
@@ -2068,6 +2088,7 @@ class ddg{
throw new Exception("Search term is empty!");
}
+ $proxy = $this->backend->get_ip();
$country = $get["country"];
$nsfw = $get["nsfw"];
$date = $get["date"];
@@ -2099,6 +2120,7 @@ class ddg{
try{
$html = $this->get(
+ $proxy,
"https://duckduckgo.com",
$get_filters,
ddg::req_web
@@ -2123,6 +2145,7 @@ class ddg{
try{
$json = json_decode($this->get(
+ $proxy,
"https://duckduckgo.com/v.js",
[
"l" => "us-en",
@@ -2155,9 +2178,10 @@ class ddg{
if(isset($json["next"])){
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
explode("?", $json["next"])[1],
- "videos"
+ "videos",
+ $proxy
);
}
@@ -2213,11 +2237,12 @@ class ddg{
if($get["npt"]){
- $req = $this->nextpage->get($get["npt"], "news");
+ [$req, $proxy] = $this->backend->get($get["npt"], "news");
try{
$json = json_decode($this->get(
+ $proxy,
"https://duckduckgo.com/news.js?" .
$req,
[],
@@ -2236,6 +2261,7 @@ class ddg{
throw new Exception("Search term is empty!");
}
+ $proxy = $this->backend->get_ip();
$country = $get["country"];
$nsfw = $get["nsfw"];
$date = $get["date"];
@@ -2261,6 +2287,7 @@ class ddg{
try{
$html = $this->get(
+ $proxy,
"https://duckduckgo.com",
$get_params,
ddg::req_web
@@ -2303,6 +2330,7 @@ class ddg{
}
$json = json_decode($this->get(
+ $proxy,
"https://duckduckgo.com/news.js",
$js_params,
ddg::req_xhr
@@ -2323,9 +2351,10 @@ class ddg{
if(isset($json["next"])){
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
explode("?", $json["next"])[1],
- "news"
+ "news",
+ $proxy
);
}
@@ -2415,192 +2444,193 @@ class ddg{
return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]);
}
- private function htmltoarray($html){
+ private function appendtext($payload, &$text, &$index){
- $html = strip_tags($html, ["img", "pre", "code", "br", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "a"]);
-
- libxml_use_internal_errors(true);
- $dom = new DOMDocument("1.0", "utf-8");
- $dom->loadHTML('<div>' . $html . '</div>');
- $xpath = new DOMXPath($dom);
- $descendants = $xpath->query('//div/node()');
-
- $images = $xpath->query('//div/node()/img');
- $imageiterator = 0;
+ if(trim($payload) == ""){
+
+ return;
+ }
- if(count($descendants) === 0){
+ if(
+ $index !== 0 &&
+ $text[$index - 1]["type"] == "text"
+ ){
- return [
+ $text[$index - 1]["value"] .= preg_replace('/ $/', " ", $payload);
+ }else{
+
+ $text[] = [
"type" => "text",
- "value" => $this->unescapehtml($html)
+ "value" => preg_replace('/ $/', " ", $payload)
];
+ $index++;
}
+ }
+
+ private function stackoverflow_parse($html){
- $array = [];
- $previoustype = null;
+ $i = 0;
+ $answer = [];
- foreach($descendants as $node){
-
- // $node->nodeValue = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $node->nodeValue);
+ $this->fuckhtml->load($html);
+
+ $tags = $this->fuckhtml->getElementsByTagName("*");
+
+ if(count($tags) === 0){
- // get node type
- switch($node->nodeName){
- case "#text":
- $type = "text";
- break;
-
- case "pre":
- $type = "code";
- break;
-
- case "code":
- $type = "inline_code";
- break;
-
- case "h1":
- case "h2":
- case "h3":
- case "h4":
- case "h5":
- case "h6":
- $type = "title";
- break;
-
- case "blockquote":
- $type = "quote";
- break;
-
- case "a":
- $type = "link";
- break;
-
- case "img":
- $type = "image";
- break;
- }
+ return [
+ [
+ "type" => "text",
+ "value" => htmlspecialchars_decode($html)
+ ]
+ ];
+ }
+
+ foreach($tags as $snippet){
- // add node to array
- switch($type){
+ switch($snippet["tagName"]){
- case "text":
- $value = preg_replace(
- '/ {2,}/',
- " ",
- $this->limitnewlines($this->unescapehtml($node->textContent))
- );
+ case "p":
+ $this->fuckhtml->load($snippet["innerHTML"]);
- if(
- $previoustype == "quote" ||
- $previoustype === null ||
- $previoustype == "image" ||
- $previoustype == "title" ||
- $previoustype == "code"
- ){
-
- $value = ltrim($value);
- }
+ $codetags =
+ $this->fuckhtml
+ ->getElementsByTagName("*");
- if($value == ""){
-
- $previoustype = $type;
- continue 2;
- }
+ $tmphtml = $snippet["innerHTML"];
- // merge with previous text node
- if($previoustype == "text"){
+ foreach($codetags as $tag){
- $array[count($array) - 1]["value"] = trim($array[count($array) - 1]["value"]) . "\n" . $this->bstoutf8($value);
- }else{
+ if(!isset($tag["outerHTML"])){
+
+ continue;
+ }
- $array[] = [
- "type" => "text",
- "value" => $this->bstoutf8($value)
- ];
+ $tmphtml =
+ explode(
+ $tag["outerHTML"],
+ $tmphtml,
+ 2
+ );
+
+ $value = $this->fuckhtml->getTextContent($tmphtml[0], false, false);
+ $this->appendtext($value, $answer, $i);
+
+ $type = null;
+ switch($tag["tagName"]){
+
+ case "code": $type = "inline_code"; break;
+ case "em": $type = "italic"; break;
+ case "blockquote": $type = "quote"; break;
+ default: $type = "text";
+ }
+
+ if($type !== null){
+ $value = $this->fuckhtml->getTextContent($tag, false, false);
+
+ if(trim($value) != ""){
+
+ $answer[] = [
+ "type" => $type,
+ "value" => rtrim($value)
+ ];
+ $i++;
+ }
+ }
+
+ if(count($tmphtml) === 2){
+
+ $tmphtml = $tmphtml[1] . "\n";
+ }else{
+
+ break;
+ }
}
- break;
-
- case "inline_code":
- case "bold":
- $array[] = [
- "type" => "inline_code",
- "value" => $this->bstoutf8(trim($this->limitnewlines($this->unescapehtml($node->textContent))))
- ];
- break;
-
- case "link":
- // check for link nested inside of image
- if(strlen($node->childNodes->item(0)->textContent) !== 0){
+ if(is_array($tmphtml)){
- $array[] = [
- "type" => "link",
- "value" => $this->bstoutf8(trim($this->unescapehtml($node->textContent))),
- "url" => $this->bstoutf8(preg_replace('/\/ddg$/', "", preg_replace('/^http:\/\//', "https://", $this->sanitizeurl($node->getAttribute("href")))))
- ];
- break;
+ $tmphtml = $tmphtml[0];
}
- $type = "image";
-
- if($previoustype == "text"){
+ if(strlen($tmphtml) !== 0){
- $array[count($array) - 1]["value"] = rtrim($array[count($array) - 1]["value"]);
+ $value = $this->fuckhtml->getTextContent($tmphtml, true, false);
+ $this->appendtext($value, $answer, $i);
}
-
- $array[] = [
- "type" => "image",
- "url" => $this->bstoutf8(preg_replace('/^http:\/\//', "https://", preg_replace('/^\/\/images\.duckduckgo\.com\/iu\/\?u=/', "", $images->item($imageiterator)->getAttribute("src"))))
- ];
-
- $imageiterator++;
-
break;
- case "image":
-
- if($previoustype == "text"){
-
- $array[count($array) - 1]["value"] = rtrim($array[count($array) - 1]["value"]);
- }
-
- $array[] = [
+ case "img":
+ $answer[] = [
"type" => "image",
- "url" => $this->bstoutf8(preg_replace('/^http:\/\//', "https://", preg_replace('/^\/\/images\.duckduckgo\.com\/iu\/\?u=/', "", $node->getAttribute("src"))))
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $tag["attributes"]["src"]
+ )
];
+ $i++;
break;
- case "quote":
- case "title":
- case "code":
- if($previoustype == "text"){
+ case "pre":
+ switch($answer[$i - 1]["type"]){
- $array[count($array) - 1]["value"] = rtrim($array[count($array) - 1]["value"]);
+ case "text":
+ case "italic":
+ $answer[$i - 1]["value"] = rtrim($answer[$i - 1]["value"]);
+ break;
}
- // no break
-
- default:
- $value = trim($this->limitnewlines($this->unescapehtml($node->textContent)));
- if($type != "code"){
-
- $value = preg_replace(
- '/ {2,}/',
- " ",
- $value
+ $answer[] =
+ [
+ "type" => "code",
+ "value" =>
+ rtrim(
+ $this->fuckhtml
+ ->getTextContent(
+ $snippet,
+ true,
+ false
+ )
+ )
+ ];
+ $i++;
+
+ break;
+
+ case "ol":
+ $o = 0;
+
+ $this->fuckhtml->load($snippet);
+ $li =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ foreach($li as $elem){
+ $o++;
+
+ $this->appendtext(
+ $o . ". " .
+ $this->fuckhtml
+ ->getTextContent(
+ $elem
+ ),
+ $answer,
+ $i
);
}
-
- $array[] = [
- "type" => $type,
- "value" => $this->bstoutf8($value)
- ];
break;
}
+ }
+
+ if(
+ $i !== 0 &&
+ $answer[$i - 1]["type"] == "text"
+ ){
- $previoustype = $type;
+ $answer[$i - 1]["value"] = rtrim($answer[$i - 1]["value"]);
}
- return $array;
+ return $answer;
}
private function bstoutf8($bs){
diff --git a/scraper/facebook.php b/scraper/facebook.php
index 7bd576b..395a863 100644
--- a/scraper/facebook.php
+++ b/scraper/facebook.php
@@ -9,6 +9,9 @@ class facebook{
include "lib/nextpage.php";
$this->nextpage = new nextpage("fb");
+
+ include "lib/proxy_pool.php";
+ $this->proxy = new proxy_pool("facebook");
}
public function getfilters($page){
@@ -104,6 +107,8 @@ class facebook{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->proxy->assign_proxy($curlproc);
$data = curl_exec($curlproc);
diff --git a/scraper/ftm.php b/scraper/ftm.php
index af39c12..0cdfbb3 100644
--- a/scraper/ftm.php
+++ b/scraper/ftm.php
@@ -4,8 +4,8 @@ class ftm{
public function __construct(){
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("ftm");
+ include "lib/backend.php";
+ $this->backend = new backend("ftm");
}
public function getfilters($page){
@@ -13,7 +13,7 @@ class ftm{
return [];
}
- private function get($url, $search, $offset){
+ private function get($proxy, $url, $search, $offset){
$curlproc = curl_init();
@@ -29,7 +29,7 @@ class ftm{
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -56,6 +56,8 @@ class ftm{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -70,8 +72,6 @@ class ftm{
public function image($get){
- $search = $get["s"];
-
$out = [
"status" => "ok",
"npt" => null,
@@ -80,16 +80,28 @@ class ftm{
if($get["npt"]){
- $count = (int)$this->nextpage->get($get["npt"], "images");
+ [$data, $proxy] = $this->backend->get($get["npt"], "images");
+ $data = json_decode($data, true);
+
+ $count = $data["count"];
+ $search = $data["search"];
}else{
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
$count = 0;
+ $proxy = $this->backend->get_ip();
}
try{
$json =
json_decode(
$this->get(
+ $proxy,
"https://findthatmeme.com/api/v1/search",
$search,
$count
@@ -134,14 +146,15 @@ class ftm{
];
}
- if($count === 50){
-
- $out["npt"] =
- $this->nextpage->store(
- $count,
- "images"
- );
- }
+ $out["npt"] =
+ $this->backend->store(
+ json_encode([
+ "count" => $count,
+ "search" => $search
+ ]),
+ "images",
+ $proxy
+ );
return $out;
}
diff --git a/scraper/google.php b/scraper/google.php
index ca77231..055d12a 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -10,8 +10,8 @@ class google{
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("google");
+ include "lib/backend.php";
+ $this->backend = new backend("google");
}
public function getfilters($page){
@@ -727,7 +727,7 @@ class google{
}
}
- private function get($url, $get = []){
+ private function get($proxy, $url, $get = []){
$headers = [
"User-Agent: Mozilla/5.0 (Linux; U; Android 2.3.3; pt-pt; LG-P500h-parrot Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1 MMS/LG-Android-MMS-V1.0/1.2",
@@ -760,6 +760,8 @@ class google{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -771,7 +773,7 @@ class google{
curl_close($curlproc);
return $data;
}
-
+ /*
public function web($get){
$search = $get["s"];
@@ -877,9 +879,9 @@ class google{
if(count($title) !== 0){
- /*
- Container is a web link
- */
+ //
+ // Container is a web link
+ //
$web = [
"title" =>
$this->titledots(
@@ -1051,9 +1053,9 @@ class google{
continue;
}
- /*
- Parse rating object
- */
+ //
+ // Parse rating object
+ //
if($is_rating >= -1){
@@ -1102,9 +1104,9 @@ class google{
continue;
}
- /*
- Parse standalone text
- */
+ //
+ // Parse standalone text
+ //
$additional_info[] = $innertext;
}
}
@@ -1194,9 +1196,9 @@ class google{
$container_title == "people also search for"
){
- /*
- Parse related searches
- */
+ //
+ // Parse related searches
+ //
$as =
$this->fuckhtml
->getElementsByTagName("a");
@@ -1212,9 +1214,9 @@ class google{
continue;
}
- /*
- Parse image carousel
- */
+ //
+ // Parse image carousel
+ //
$title_container =
$this->fuckhtml
->getElementsByClassName(
@@ -1239,9 +1241,9 @@ class google{
if($title_container == "imagesview all"){
- /*
- Image carousel
- */
+ //
+ // Image carousel
+ //
$pcitem =
$this->fuckhtml
->getElementsByClassName(
@@ -1316,9 +1318,9 @@ class google{
}
}
- /*
- Get next page
- */
+ //
+ // Get next page
+ //
$as =
$this->fuckhtml
->getElementsByTagName("a");
@@ -1340,7 +1342,7 @@ class google{
}
return $out;
- }
+ }*/
public function image($get){
@@ -1348,17 +1350,22 @@ class google{
// generate parameters
if($get["npt"]){
- $params =
- json_decode(
- $this->nextpage->get(
- $get["npt"],
- "images"
- ),
- true
+ [$params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
);
+
+ $params = json_decode($params, true);
}else{
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
$country = $get["country"];
$nsfw = $get["nsfw"];
$lang = $get["lang"];
@@ -1475,6 +1482,7 @@ class google{
try{
$html =
$this->get(
+ $proxy,
"https://www.google.com/search",
$params
);
@@ -1578,9 +1586,10 @@ class google{
$params["ijn"] = (int)$params["ijn"] + 1;
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
json_encode($params),
- "images"
+ "images",
+ $proxy
);
}else{
@@ -1628,9 +1637,10 @@ class google{
$params["imgvl"] = $imgvl;
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
json_encode($params),
- "images"
+ "images",
+ $proxy
);
}
}
diff --git a/scraper/imgur.php b/scraper/imgur.php
index 4a16de7..23efe00 100644
--- a/scraper/imgur.php
+++ b/scraper/imgur.php
@@ -4,11 +4,11 @@ class imgur{
public function __construct(){
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("imgur");
-
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
+
+ include "lib/backend.php";
+ $this->backend = new backend("imgur");
}
public function getfilters($page){
@@ -57,7 +57,7 @@ class imgur{
];
}
- private function get($url, $get = []){
+ private function get($proxy, $url, $get = []){
$curlproc = curl_init();
@@ -70,7 +70,7 @@ class imgur{
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -89,6 +89,8 @@ class imgur{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -105,15 +107,14 @@ class imgur{
if($get["npt"]){
- $filter =
- json_decode(
- $this->nextpage->get(
- $get["npt"],
- "images"
- ),
- true
+ [$filter, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
);
+ $filter = json_decode($filter, true);
+
$search = $filter["s"];
unset($filter["s"]);
@@ -134,6 +135,12 @@ class imgur{
}else{
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
$sort = $get["sort"];
$time = $get["time"];
$format = $get["format"];
@@ -165,6 +172,7 @@ class imgur{
try{
$html =
$this->get(
+ $proxy,
"https://imgur.com/search/$sort/$time/page/$page",
$filter
);
@@ -238,9 +246,10 @@ class imgur{
$filter["page"] = $page + 1;
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
json_encode($filter),
- "images"
+ "images",
+ $proxy
);
}
diff --git a/scraper/marginalia.php b/scraper/marginalia.php
index c8ab09f..b790a97 100644
--- a/scraper/marginalia.php
+++ b/scraper/marginalia.php
@@ -3,7 +3,8 @@
class marginalia{
public function __construct(){
- $this->key = "public";
+ include "lib/backend.php";
+ $this->backend = new backend("marginalia");
}
public function getfilters($page){
@@ -76,10 +77,10 @@ class marginalia{
}
}
- private function get($url, $get = []){
+ private function get($proxy, $url, $get = []){
$headers = [
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -109,6 +110,8 @@ class marginalia{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -124,6 +127,11 @@ class marginalia{
public function web($get){
$search = [$get["s"]];
+ if(strlen($get["s"]) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
$profile = $get["profile"];
$format = $get["format"];
$file = $get["file"];
@@ -184,7 +192,8 @@ class marginalia{
try{
$json =
$this->get(
- "https://api.marginalia.nu/{$this->key}/search/" . urlencode($search),
+ $this->backend->get_ip(), // no nextpage
+ "https://api.marginalia.nu/" . config::MARGINALIA_API_KEY . "/search/" . urlencode($search),
$params
);
}catch(Exception $error){
diff --git a/scraper/mojeek.php b/scraper/mojeek.php
index e7e8abc..3d91c09 100644
--- a/scraper/mojeek.php
+++ b/scraper/mojeek.php
@@ -6,8 +6,8 @@ class mojeek{
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("mojeek");
+ include "lib/backend.php";
+ $this->backend = new backend("mojeek");
}
public function getfilters($page){
@@ -371,10 +371,10 @@ class mojeek{
}
}
- private function get($url, $get = []){
+ private function get($proxy, $url, $get = []){
$headers = [
- "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -404,6 +404,8 @@ class mojeek{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -420,11 +422,12 @@ class mojeek{
if($get["npt"]){
- $token = $this->nextpage->get($get["npt"], "web");
+ [$token, $proxy] = $this->backend->get($get["npt"], "web");
try{
$html =
$this->get(
+ $proxy,
"https://www.mojeek.com" . $token,
[]
);
@@ -485,9 +488,12 @@ class mojeek{
$params["si"] = $domain;
}
+ $proxy = $this->backend->get_ip();
+
try{
$html =
$this->get(
+ $proxy,
"https://www.mojeek.com/search",
$params
);
@@ -529,88 +535,90 @@ class mojeek{
return $out;
}
- $this->fuckhtml->load($results[0]);
-
/*
- Get search results
+ Get all search result divs
*/
- $results =
- $this->fuckhtml
- ->getElementsByTagName("li");
-
- foreach($results as $result){
-
- $data = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null,
- "type" => "web",
- "thumb" => [
- "url" => null,
- "ratio" => null
- ],
- "sublink" => [],
- "table" => []
- ];
-
- $this->fuckhtml->load($result);
+ foreach($results as $container){
- $title =
+ $this->fuckhtml->load($container);
+ $results =
$this->fuckhtml
- ->getElementsByClassName("title", "a")[0];
+ ->getElementsByTagName("li");
- $data["title"] =
- html_entity_decode(
+ foreach($results as $result){
+
+ $data = [
+ "title" => null,
+ "description" => null,
+ "url" => null,
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+
+ $this->fuckhtml->load($result);
+
+ $title =
$this->fuckhtml
- ->getTextContent(
- $title["innerHTML"]
- )
- );
-
- $data["url"] =
- html_entity_decode(
+ ->getElementsByClassName("title", "a")[0];
+
+ $data["title"] =
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $title["innerHTML"]
+ )
+ );
+
+ $data["url"] =
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $title["attributes"]["href"]
+ )
+ );
+
+ $description =
$this->fuckhtml
- ->getTextContent(
- $title["attributes"]["href"]
- )
- );
-
- $description =
- $this->fuckhtml
- ->getElementsByClassName(
- "s", "p"
- );
-
- if(count($description) !== 0){
+ ->getElementsByClassName(
+ "s", "p"
+ );
- $data["description"] =
- $this->titledots(
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
+ if(count($description) !== 0){
+
+ $data["description"] =
+ $this->titledots(
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
)
+ );
+ }
+
+ $data["date"] =
+ explode(
+ " - ",
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName("i", "p")[1]
)
);
+
+ $data["date"] =
+ strtotime(
+ $data["date"][count($data["date"]) - 1]
+ );
+
+ $out["web"][] = $data;
}
-
- $data["date"] =
- explode(
- " - ",
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByClassName("i", "p")[1]
- )
- );
-
- $data["date"] =
- strtotime(
- $data["date"][count($data["date"]) - 1]
- );
-
- $out["web"][] = $data;
}
/*
@@ -969,12 +977,13 @@ class mojeek{
if($a["innerHTML"] == "Next"){
- $out["npt"] = $this->nextpage->store(
+ $out["npt"] = $this->backend->store(
$this->fuckhtml
->getTextContent(
$a["attributes"]["href"]
),
- "web"
+ "web",
+ $proxy
);
}
}
@@ -1001,6 +1010,7 @@ class mojeek{
try{
$html =
$this->get(
+ $this->backend->get_ip(),
"https://www.mojeek.com/search",
[
"q" => $search,
@@ -1011,168 +1021,139 @@ class mojeek{
throw new Exception("Failed to get HTML");
}
-
/*
$handle = fopen("scraper/mojeek.html", "r");
$html = fread($handle, filesize("scraper/mojeek.html"));
- fclose($handle);*/
-
- /*
- Get big, standard and smaller nodes
+ fclose($handle);
*/
- foreach(
- [
- "results-extended",
- "results-standard"
- ]
- as $categoryname
- ){
+
+ $this->fuckhtml->load($html);
+
+ $articles =
+ $this->fuckhtml->getElementsByTagName("article");
+
+ foreach($articles as $article){
+
+ $this->fuckhtml->load($article);
+
+ $data = [
+ "title" => null,
+ "author" => null,
+ "description" => null,
+ "date" => null,
+ "thumb" =>
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" => null
+ ];
+
+ $a = $this->fuckhtml->getElementsByTagName("a")[0];
+
+ $data["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["title"]
+ );
+
+ $data["url"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ );
+
+ $p = $this->fuckhtml->getElementsByTagName("p");
+
+ $data["description"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "s",
+ $p
+ )[0]
+ )
+ );
- $this->fuckhtml->load($html);
+ if($data["description"] == ""){
+
+ $data["description"] = null;
+ }
- $categories =
+ // get date from big node
+ $date =
$this->fuckhtml
->getElementsByClassName(
- $categoryname,
- "ul"
+ "date",
+ $p
);
-
- foreach($categories as $category){
+
+ if(count($date) !== 0){
+
+ $data["date"] =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $date[0]
+ )
+ );
+ }
+
+ // grep date + author
+ $s =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "i",
+ $p
+ )[0];
+
+ $this->fuckhtml->load($s);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ if(count($a) !== 0){
- $this->fuckhtml->load($category);
+ // parse big node information
+ $data["author"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a[0]["innerHTML"]
+ );
+ }else{
- $nodes =
+ // parse smaller nodes
+ $replace =
$this->fuckhtml
- ->getElementsByTagName("li");
+ ->getElementsByTagName("time")[0];
- foreach($nodes as $node){
-
- $data = [
- "title" => null,
- "author" => null,
- "description" => null,
- "date" => null,
- "thumb" =>
- [
- "url" => null,
- "ratio" => null
- ],
- "url" => null
- ];
-
- /*
- Parse the results
- */
- $this->fuckhtml->load($node);
-
- // get title + url
- $a =
- $this->fuckhtml
- ->getElementsByTagName("a")[0];
-
- $data["title"] =
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]["title"]
- );
-
- $data["url"] =
+ $data["date"] =
+ strtotime(
$this->fuckhtml
->getTextContent(
- $a["attributes"]["href"]
- );
-
- // get image
- $image =
- $this->fuckhtml
- ->getElementsByTagName("img");
-
- if(count($image) !== 0){
-
- $data["thumb"] = [
- "url" =>
- urldecode(
- str_replace(
- "/image?img=",
- "",
- $this->fuckhtml
- ->getTextContent(
- $image[0]["attributes"]["src"]
- )
- )
- ),
- "ratio" => "16:9"
- ];
- }
-
- // get description
- $description =
- $this->fuckhtml
- ->getElementsByClassName("s", "p");
-
- if(count($description) !== 0){
-
- $data["description"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- );
- }
-
- // get date + time
- $date =
- $this->fuckhtml
- ->getElementsByClassName(
- "date",
- "p"
- );
-
- $i =
- $this->fuckhtml
- ->getElementsByClassName("i", "p");
-
- if(count($date) !== 0){
-
- // we're inside a big node
- $data["date"] = strtotime($date[0]["innerHTML"]);
-
- if(count($i) !== 0){
-
- $this->fuckhtml->load($i[0]);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName("a");
-
- if(count($a) !== 0){
-
- $data["author"] =
- $this->fuckhtml
- ->getTextContent($a[0]);
- }
- }
- }else{
-
- // we're inside a small node
- if(count($i) !== 0){
-
- $i =
- explode(
- " - ",
- $this->fuckhtml
- ->getTextContent($i[0])
- );
-
- $data["date"] = strtotime(array_pop($i));
- $data["author"] = implode(" - ", $i);
- }
- }
-
- $out["news"][] = $data;
- }
+ $replace
+ )
+ );
+
+ $s["innerHTML"] =
+ str_replace(
+ $replace["outerHTML"],
+ "",
+ $s["innerHTML"]
+ );
+
+ $data["author"] =
+ preg_replace(
+ '/ &bull; $/',
+ "",
+ $s["innerHTML"]
+ );
}
+
+ $out["news"][] = $data;
}
return $out;
diff --git a/scraper/pinterest.php b/scraper/pinterest.php
index 2bb5b71..37473a1 100644
--- a/scraper/pinterest.php
+++ b/scraper/pinterest.php
@@ -6,6 +6,9 @@ class pinterest{
include "lib/nextpage.php";
$this->nextpage = new nextpage("pinterest");
+
+ include "lib/proxy_pool.php";
+ $this->proxy = new proxy_pool("pinterest");
}
public function getfilters($page){
@@ -44,6 +47,8 @@ class pinterest{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->proxy->assign_proxy($curlproc);
$data = curl_exec($curlproc);
diff --git a/scraper/sc.php b/scraper/sc.php
index 1f49f95..16d3931 100644
--- a/scraper/sc.php
+++ b/scraper/sc.php
@@ -4,10 +4,8 @@ class sc{
public function __construct(){
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("sc");
- $this->client_id = "ArYppSEotE3YiXCO4Nsgid2LLqJutiww";
- $this->user_id = "766585-580597-163310-929698";
+ include "lib/backend.php";
+ $this->backend = new backend("sc");
}
public function getfilters($page){
@@ -27,7 +25,7 @@ class sc{
];
}
- private function get($url, $get = []){
+ private function get($proxy, $url, $get = []){
$curlproc = curl_init();
@@ -40,7 +38,7 @@ class sc{
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: application/json, text/javascript, */*; q=0.01",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -58,6 +56,8 @@ class sc{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -74,7 +74,7 @@ class sc{
if($get["npt"]){
- $params = $this->nextpage->get($get["npt"], "music");
+ [$params, $proxy] = $this->backend->get($get["npt"], "music");
$params = json_decode($params, true);
$url = $params["url"];
@@ -101,7 +101,13 @@ class sc{
// https://api-v2.soundcloud.com/search/playlists_without_albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
$type = $get["type"];
+ $proxy = $this->backend->get_ip();
switch($type){
@@ -111,8 +117,8 @@ class sc{
"q" => $search,
"variant_ids" => "",
"facet" => "model",
- "user_id" => $this->user_id,
- "client_id" => $this->client_id,
+ "user_id" => config::SC_USER_ID,
+ "client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
@@ -127,8 +133,8 @@ class sc{
"q" => $search,
"variant_ids" => "",
"facet_genre" => "",
- "user_id" => $this->user_id,
- "client_id" => $this->client_id,
+ "user_id" => config::SC_USER_ID,
+ "client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
@@ -143,8 +149,8 @@ class sc{
"q" => $search,
"variant_ids" => "",
"facet" => "place",
- "user_id" => $this->user_id,
- "client_id" => $this->client_id,
+ "user_id" => config::SC_USER_ID,
+ "client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
@@ -159,8 +165,8 @@ class sc{
"q" => $search,
"variant_ids" => "",
"facet" => "genre",
- "user_id" => $this->user_id,
- "client_id" => $this->client_id,
+ "user_id" => config::SC_USER_ID,
+ "client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
@@ -175,8 +181,8 @@ class sc{
"q" => $search,
"variant_ids" => "",
"facet" => "genre",
- "user_id" => $this->user_id,
- "client_id" => $this->client_id,
+ "user_id" => config::SC_USER_ID,
+ "client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
@@ -192,8 +198,8 @@ class sc{
"variant_ids" => "",
"filter.content_tier" => "SUB_HIGH_TIER",
"facet" => "genre",
- "user_id" => $this->user_id,
- "client_id" => $this->client_id,
+ "user_id" => config::SC_USER_ID,
+ "client_id" => config::SC_CLIENT_TOKEN,
"limit" => 20,
"offset" => 0,
"linked_partitioning" => 1,
@@ -206,7 +212,7 @@ class sc{
try{
- $json = $this->get($url, $params);
+ $json = $this->get($proxy, $url, $params);
}catch(Exception $error){
@@ -244,9 +250,10 @@ class sc{
$params["url"] = $url; // we will remove this later
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
json_encode($params),
- "music"
+ "music",
+ $proxy
);
}
@@ -342,7 +349,7 @@ class sc{
"endpoint" => "audio_sc",
"url" =>
$item["media"]["transcodings"][0]["url"] .
- "?client_id=" . $this->client_id .
+ "?client_id=" . config::SC_CLIENT_TOKEN .
"&track_authorization=" .
$item["track_authorization"]
];
diff --git a/scraper/wiby.php b/scraper/wiby.php
index a1daf57..e8351bc 100644
--- a/scraper/wiby.php
+++ b/scraper/wiby.php
@@ -4,8 +4,8 @@ class wiby{
public function __construct(){
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("wiby");
+ include "lib/backend.php";
+ $this->backend = new backend("wiby");
}
public function getfilters($page){
@@ -36,7 +36,7 @@ class wiby{
];
}
- private function get($url, $get = [], $nsfw){
+ private function get($proxy, $url, $get = [], $nsfw){
$curlproc = curl_init();
@@ -45,11 +45,13 @@ class wiby{
$url .= "?" . $get;
}
+ print_r([$proxy, $url]);
+
curl_setopt($curlproc, CURLOPT_URL, $url);
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -69,6 +71,8 @@ class wiby{
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+ $this->backend->assign_proxy($curlproc, $proxy);
+
$data = curl_exec($curlproc);
if(curl_errno($curlproc)){
@@ -84,11 +88,8 @@ class wiby{
if($get["npt"]){
- $q =
- json_decode(
- $this->nextpage->get($get["npt"], "web"),
- true
- );
+ [$q, $proxy] = $this->backend->get($get["npt"], "web");
+ $q = json_decode($q, true);
$nsfw = $q["nsfw"];
unset($q["nsfw"]);
@@ -100,6 +101,7 @@ class wiby{
throw new Exception("Search term is empty!");
}
+ $proxy = $this->backend->get_ip();
$date = $get["date"];
$nsfw = $get["nsfw"] == "yes" ? "0" : "1";
@@ -150,6 +152,7 @@ class wiby{
try{
$html = $this->get(
+ $proxy,
"https://wiby.me/",
$q,
$nsfw
@@ -171,13 +174,14 @@ class wiby{
}else{
$nextpage =
- $this->nextpage->store(
+ $this->backend->store(
json_encode([
"q" => $q["q"],
"p" => (int)$nextpage[1],
"nsfw" => $nsfw
]),
- "web"
+ "web",
+ $proxy
);
}
diff --git a/scraper/yandex.php b/scraper/yandex.php
index 65abe73..7335edc 100644
--- a/scraper/yandex.php
+++ b/scraper/yandex.php
@@ -10,11 +10,11 @@ class yandex{
include "lib/fuckhtml.php";
$this->fuckhtml = new fuckhtml();
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("yandex");
+ include "lib/backend.php";
+ // backend included in the scraper functions
}
- private function get($url, $get = [], $nsfw){
+ private function get($proxy, $url, $get = [], $nsfw){
$curlproc = curl_init();
@@ -32,7 +32,7 @@ class yandex{
}
$headers =
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding: gzip",
"Accept-Language: en-US,en;q=0.5",
@@ -54,6 +54,8 @@ class yandex{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -207,6 +209,8 @@ class yandex{
public function web($get){
+ $this->backend = new backend("yandex_w");
+
// has captcha
// https://yandex.com/search/touch/?text=lol&app_platform=android&appsearch_header=1&ui=webmobileapp.yandex&app_version=23070603&app_id=ru.yandex.searchplugin&search_source=yandexcom_touch_native&clid=2218567
@@ -215,10 +219,11 @@ class yandex{
if($get["npt"]){
- $npt = $this->nextpage->get($get["npt"], "web");
+ [$npt, $proxy] = $this->backend->get($get["npt"], "web");
$html =
$this->get(
+ $proxy,
"https://yandex.com" . $npt,
[],
"yes"
@@ -226,6 +231,12 @@ class yandex{
}else{
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
$lang = $get["lang"];
$older = $get["older"];
$newer = $get["newer"];
@@ -269,6 +280,7 @@ class yandex{
try{
$html =
$this->get(
+ $proxy,
"https://yandex.com/search/site/",
$params,
"yes"
@@ -313,7 +325,7 @@ class yandex{
if(count($npt) !== 0){
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
$this->fuckhtml
->getTextContent(
$npt
@@ -321,7 +333,8 @@ class yandex{
["attributes"]
["href"]
),
- "web"
+ "web",
+ $proxy
);
}
@@ -386,17 +399,18 @@ class yandex{
public function image($get){
+ $this->backend = new backend("yandex_i");
+
if($get["npt"]){
- $request =
- json_decode(
- $this->nextpage->get(
- $get["npt"],
- "images"
- ),
- true
+ [$request, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "images"
);
+ $request = json_decode($request, true);
+
$nsfw = $request["nsfw"];
unset($request["nsfw"]);
}else{
@@ -407,6 +421,7 @@ class yandex{
throw new Exception("Search term is empty!");
}
+ $proxy = $this->backend->get_ip();
$nsfw = $get["nsfw"];
$time = $get["time"];
$size = $get["size"];
@@ -611,9 +626,11 @@ class yandex{
try{
$json = $this->get(
+ $proxy,
"https://yandex.com/images/search",
$request,
- $nsfw
+ $nsfw,
+ "yandex_i"
);
}catch(Exception $err){
@@ -676,7 +693,12 @@ class yandex{
$request["p"] = 1;
}
- $out["npt"] = $this->nextpage->store(json_encode($request), "images");
+ $out["npt"] =
+ $this->backend->store(
+ json_encode($request),
+ "images",
+ $proxy
+ );
}
// get search results
@@ -744,21 +766,29 @@ class yandex{
public function video($get){
+ $this->backend = new backend("yandex_v");
+
if($get["npt"]){
- $params =
- json_decode(
- $this->nextpage->get(
- $get["npt"],
- "web"
- ),
- true
+ [$params, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "video"
);
+ $params = json_decode($params, true);
+
$nsfw = $params["nsfw"];
unset($params["nsfw"]);
}else{
+
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
$nsfw = $get["nsfw"];
$time = $get["time"];
$duration = $get["duration"];
@@ -865,9 +895,11 @@ class yandex{
try{
$json =
$this->get(
+ $proxy,
"https://yandex.com/video/search",
$params,
- $nsfw
+ $nsfw,
+ "yandex_v"
);
}catch(Exception $error){
@@ -926,9 +958,10 @@ class yandex{
$params["p"] = "1";
$params["nsfw"] = $nsfw;
$out["npt"] =
- $this->nextpage->store(
+ $this->backend->store(
json_encode($params),
- "web"
+ "video",
+ $proxy
);
}
diff --git a/scraper/yep.php b/scraper/yep.php
index 8ff4a57..7a73635 100644
--- a/scraper/yep.php
+++ b/scraper/yep.php
@@ -4,8 +4,8 @@ class yep{
public function __construct(){
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("yep");
+ include "lib/backend.php";
+ $this->backend = new backend("yep");
}
public function getfilters($page){
@@ -238,7 +238,7 @@ class yep{
];
}
- private function get($url, $get = []){
+ private function get($proxy, $url, $get = []){
$curlproc = curl_init();
@@ -251,7 +251,7 @@ class yep{
curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -269,6 +269,8 @@ class yep{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -284,6 +286,11 @@ class yep{
public function image($get){
$search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
$country = $get["country"];
$nsfw = $get["nsfw"];
@@ -305,6 +312,7 @@ class yep{
$json =
json_decode(
$this->get(
+ $this->backend->get_ip(), // no nextpage!
"https://api.yep.com/fs/2/search",
[
"client" => "web",
diff --git a/scraper/youtube.php b/scraper/youtube.php
index 83a68ba..526b026 100644
--- a/scraper/youtube.php
+++ b/scraper/youtube.php
@@ -8,8 +8,8 @@ class youtube{
public function __construct(){
- include "lib/nextpage.php";
- $this->nextpage = new nextpage("yt");
+ include "lib/backend.php";
+ $this->backend = new backend("yt");
}
public function getfilters($page){
@@ -340,7 +340,7 @@ class youtube{
const req_web = 0;
const req_xhr = 1;
- private function get($url, $get = [], $reqtype = self::req_web, $continuation = null){
+ private function get($proxy, $url, $get = [], $reqtype = self::req_web, $continuation = null){
$curlproc = curl_init();
@@ -354,7 +354,7 @@ class youtube{
switch($reqtype){
case self::req_web:
$headers =
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -370,7 +370,7 @@ class youtube{
case self::req_xhr:
$headers =
- ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0",
+ ["User-Agent: " . config::USER_AGENT,
"Accept: */*",
"Accept-Language: en-US,en;q=0.5",
"Accept-Encoding: gzip",
@@ -397,6 +397,8 @@ class youtube{
curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
$data = curl_exec($curlproc);
@@ -430,17 +432,17 @@ class youtube{
$json = fread($handle, filesize("nextpage.json"));
fclose($handle);*/
- $npt =
- json_decode(
- $this->nextpage->get(
- $get["npt"],
- "videos"
- ),
- true
+ [$npt, $proxy] =
+ $this->backend->get(
+ $get["npt"],
+ "videos"
);
+ $npt = json_decode($npt, true);
+
try{
$json = $this->get(
+ $proxy,
"https://www.youtube.com/youtubei/v1/search",
[
"key" => $npt["key"],
@@ -507,6 +509,7 @@ class youtube{
throw new Exception("Search term is empty!");
}
+ $proxy = $this->backend->get_ip();
$date = $get["date"];
$type = $get["type"];
$duration = $get["duration"];
@@ -537,6 +540,7 @@ class youtube{
try{
$json = $this->get(
+ $proxy,
"https://www.youtube.com/results",
$get
);
@@ -942,7 +946,14 @@ class youtube{
if($this->out["npt"] !== null){
- $this->out["npt"] = $this->nextpage->store(json_encode($this->out["npt"]), "videos");
+ $this->out["npt"] =
+ $this->backend->store(
+ json_encode(
+ $this->out["npt"]
+ ),
+ "videos",
+ $proxy
+ );
}
return $this->out;