summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--banner/cynic.pngbin0 -> 68909 bytes
-rw-r--r--lib/frontend.php10
-rw-r--r--scraper/brave.php121
-rw-r--r--scraper/facebook.php8
-rw-r--r--scraper/google.php505
-rw-r--r--scraper/yandex.php596
-rw-r--r--settings.php16
7 files changed, 1178 insertions, 78 deletions
diff --git a/banner/cynic.png b/banner/cynic.png
new file mode 100644
index 0000000..05c728b
--- /dev/null
+++ b/banner/cynic.png
Binary files differ
diff --git a/lib/frontend.php b/lib/frontend.php
index 0f2a1ff..9350230 100644
--- a/lib/frontend.php
+++ b/lib/frontend.php
@@ -878,6 +878,7 @@ class frontend{
"option" => [
"ddg" => "DuckDuckGo",
"brave" => "Brave",
+ "yandex" => "Yandex",
//"google" => "Google",
"mojeek" => "Mojeek",
"marginalia" => "Marginalia",
@@ -903,9 +904,10 @@ class frontend{
"display" => "Scraper",
"option" => [
"yt" => "YouTube",
- "fb" => "Facebook videos",
+ //"fb" => "Facebook videos",
"ddg" => "DuckDuckGo",
- "brave" => "Brave"//,
+ "brave" => "Brave",
+ "yandex" => "Yandex"
//"google" => "Google"
]
];
@@ -972,11 +974,11 @@ class frontend{
include "scraper/google.php";
$lib = new google();
break;
-
+ /*
case "fb":
include "scraper/facebook.php";
$lib = new facebook();
- break;
+ break;*/
case "mojeek":
include "scraper/mojeek.php";
diff --git a/scraper/brave.php b/scraper/brave.php
index 50e7b49..0a73158 100644
--- a/scraper/brave.php
+++ b/scraper/brave.php
@@ -1183,6 +1183,28 @@ class brave{
$div = $this->fuckhtml->getElementsByTagName("div");
/*
+ Get small description
+ */
+ $small_desc =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "infobox-description",
+ $div
+ );
+
+ if(count($small_desc) !== 0){
+
+ $answer["description"][] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $small_desc[0]
+ )
+ ];
+ }
+
+ /*
Get title + url
*/
$title =
@@ -1292,28 +1314,25 @@ class brave{
if(count($code) === 0){
- $answer["description"] =
- [
- [
- "type" => "text",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $desc_tmp
- )
- ],
- [
- "type" => "quote",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $author
- )
- ]
+ $answer["description"][] = [
+ "type" => "text",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $desc_tmp
+ )
+ ];
+
+ $answer["description"][] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $author
+ )
];
}else{
- $text = [];
$i = 0;
foreach($code as $snippet){
@@ -1344,7 +1363,7 @@ class brave{
);
$value = $this->fuckhtml->getTextContent($tmphtml[0], false, false);
- $this->appendtext($value, $text, $i);
+ $this->appendtext($value, $answer["description"], $i);
$type = null;
switch($tag["tagName"]){
@@ -1365,10 +1384,10 @@ class brave{
$type == "title"
){
- $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]);
+ $answer["description"][$i - 1]["value"] = rtrim($answer["description"][$i - 1]["value"]);
}
- $text[] = [
+ $answer["description"][] = [
"type" => $type,
"value" => $value
];
@@ -1393,21 +1412,21 @@ class brave{
if(strlen($tmphtml) !== 0){
$value = $this->fuckhtml->getTextContent($tmphtml, false, false);
- $this->appendtext($value, $text, $i);
+ $this->appendtext($value, $answer["description"], $i);
}
break;
case "pre":
- switch($text[$i - 1]["type"]){
+ switch($answer["description"][$i - 1]["type"]){
case "text":
case "italic":
- $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]);
+ $answer["description"][$i - 1]["value"] = rtrim($answer["description"][$i - 1]["value"]);
break;
}
- $text[] =
+ $answer["description"][] =
[
"type" => "code",
"value" =>
@@ -1441,7 +1460,7 @@ class brave{
->getTextContent(
$elem
),
- $text,
+ $answer["description"],
$i
);
}
@@ -1451,21 +1470,19 @@ class brave{
if(
$i !== 0 &&
- $text[$i - 1]["type"] == "text"
+ $answer["description"][$i - 1]["type"] == "text"
){
- $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]);
+ $answer["description"][$i - 1]["value"] = rtrim($answer["description"][$i - 1]["value"]);
}
if($author){
- $text[] = [
+ $answer["description"][] = [
"type" => "quote",
"value" => $this->fuckhtml->getTextContent($author)
];
}
-
- $answer["description"] = $text;
}
}else{
@@ -1481,22 +1498,20 @@ class brave{
if(count($description) !== 0){
- $description =
+ $answer["description"][] =
[
- [
- "type" => "text",
- "value" =>
- $this->titledots(
- preg_replace(
- '/ Wikipedia$/',
- "",
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
+ "type" => "text",
+ "value" =>
+ $this->titledots(
+ preg_replace(
+ '/ Wikipedia$/',
+ "",
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
)
)
- ]
+ )
];
$ratings =
@@ -1514,7 +1529,7 @@ class brave{
"div"
);
- $description[] = [
+ $answer["description"][] = [
"type" => "title",
"value" => "Ratings"
];
@@ -1550,36 +1565,34 @@ class brave{
)[0]
);
- $c = count($description) - 1;
+ $c = count($answer["description"]) - 1;
if(
$c !== -1 &&
- $description[$c]["type"] == "text"
+ $answer["description"][$c]["type"] == "text"
){
- $description[$c]["value"] .= $num . " ";
+ $answer["description"][$c]["value"] .= $num . " ";
}else{
- $description[] = [
+ $answer["description"][] = [
"type" => "text",
"value" => $num . " "
];
}
- $description[] = [
+ $answer["description"][] = [
"type" => "link",
"value" => $this->fuckhtml->getTextContent($href),
"url" => $this->fuckhtml->getTextContent($href["attributes"]["href"])
];
- $description[] = [
+ $answer["description"][] = [
"type" => "text",
"value" => " (" . $votes . ")\n"
];
}
}
-
- $answer["description"] = $description;
}
}
diff --git a/scraper/facebook.php b/scraper/facebook.php
index 46d58d6..7bd576b 100644
--- a/scraper/facebook.php
+++ b/scraper/facebook.php
@@ -228,16 +228,16 @@ class facebook{
)
);
}
-
+ /*
$html =
$this->get(
"https://www.facebook.com/watch/search/",
$req
- );
- /*
+ );*/
+
$handle = fopen("scraper/facebook.html", "r");
$html = fread($handle, filesize("scraper/facebook.html"));
- fclose($handle);*/
+ fclose($handle);
preg_match_all(
'/({"__bbox":.*,"sequence_number":0}})\]\]/',
diff --git a/scraper/google.php b/scraper/google.php
index 7ed3577..d0e90ca 100644
--- a/scraper/google.php
+++ b/scraper/google.php
@@ -824,8 +824,6 @@ class google{
$html = fread($handle, filesize("scraper/google.html"));
fclose($handle);
- $this->fuckhtml->load($html);
-
$out = [
"status" => "ok",
"spelling" => [
@@ -841,6 +839,507 @@ class google{
"news" => [],
"related" => []
];
+
+ $this->parsejavascript($html);
+
+ $containers =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "background-color" => "#fff",
+ "margin-bottom" => "10px",
+ "-webkit-box-shadow" => "0 1px 6px rgba(32,33,36,0.28)",
+ "border-radius" => "8px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ foreach($containers as $container){
+
+ $this->fuckhtml->load($container);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "color" => "#1967d2",
+ "font-size" => "20px",
+ "line-height" => "26px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($title) !== 0){
+
+ /*
+ Container is a web link
+ */
+ $web = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ ),
+ "description" => null,
+ "url" =>
+ $this->decodeurl(
+ $this->fuckhtml
+ ->getElementsByTagName("a")
+ [0]
+ ["attributes"]
+ ["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+
+ $container = $container["innerHTML"];
+
+ $description_container =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "padding" => "12px 16px 12px"
+ ],
+ self::is_class
+ ),
+ "div"
+ )[1];
+
+ $description =
+ $description_container["innerHTML"];
+
+ // get sublinks
+ $this->fuckhtml->load($description);
+
+ $links =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ $skip = true;
+ foreach($links as $link){
+
+ $description =
+ str_replace(
+ $link["outerHTML"],
+ "",
+ $description
+ );
+
+ if($skip){
+
+ $skip = false;
+ continue;
+ }
+
+ $sublink = [
+ "title" => null,
+ "description" => null,
+ "url" => null,
+ "date" => null
+ ];
+
+ $sublink["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ );
+
+ $sublink["url"] =
+ $this->decodeurl(
+ $link
+ ["attributes"]
+ ["href"]
+ );
+
+ $web["sublink"][] = $sublink;
+ }
+
+ // get thumbnail before we call loadhtml again
+ $img =
+ $this->fuckhtml
+ ->getElementsByTagName("img");
+
+ if(count($img) !== 0){
+
+ if(
+ isset($img[0]["attributes"]["alt"]) &&
+ stripos($img[0]["attributes"]["alt"], "Video for") !== false
+ ){
+
+ // is a video thumbnail
+ $web["thumb"]["ratio"] = "16:9";
+ }else{
+
+ // is a google thumbnail
+ $web["thumb"]["ratio"] = "1:1";
+ }
+
+ $web["thumb"]["url"] =
+ $this->getimage(
+ $img[0]["attributes"]["id"]
+ );
+ }
+
+ // get table elements
+ $this->fuckhtml->load($description);
+
+ $levels =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "padding-bottom" => "8px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ $additional_info = [];
+ foreach($levels as $level){
+
+ $this->fuckhtml->load($level);
+
+ $spans =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "span"
+ );
+
+ $is_rating = -2;
+
+ foreach($spans as $span){
+
+ // clean up description
+ $description =
+ str_replace(
+ $span["outerHTML"],
+ "",
+ $description
+ );
+
+ $innertext =
+ $this->fuckhtml
+ ->getTextContent(
+ $span
+ );
+
+ if($innertext == ""){ continue; }
+
+ if(
+ strtolower($innertext)
+ == "rating"
+ ){
+
+ $is_rating = -1;
+ continue;
+ }
+
+ /*
+ Parse rating object
+ */
+
+ if($is_rating >= -1){
+
+ if($span["level"] !== 1){ continue; }
+
+ $is_rating++;
+
+ // 10/10 (123)
+ if($is_rating === 0){
+
+ $innertext = explode(" ", $innertext, 2);
+
+ $web["table"]["Rating"] = $innertext[0];
+ $web["table"]["Hits"] =
+ trim(
+ str_replace(
+ [
+ "(",
+ ")"
+ ],
+ "",
+ $innertext[1]
+ )
+ );
+ continue;
+ }
+
+ // US$4.99
+ // MYR 50.00
+ // $38.34
+ // JP¥6,480
+ if($is_rating === 2){
+
+ $web["table"]["Price"] = $innertext;
+ continue;
+ }
+
+ // Android / In stock
+ if($is_rating === 4){
+
+ $web["table"]["Support"] = $innertext;
+ continue;
+ }
+
+ // ignore the rest
+ continue;
+ }
+
+ /*
+ Parse standalone text
+ */
+ $additional_info[] = $innertext;
+ }
+ }
+
+ for($i=0; $i<count($additional_info); $i++){
+
+ // @TODO
+ // generate better node names
+ $web["table"]["Info node #$i"] = $additional_info[$i];
+ }
+
+ $this->fuckhtml->load($description);
+
+ // get date node
+ $span =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "span"
+ );
+
+ if(count($span) !== 0){
+
+ $description =
+ str_replace(
+ $span[0]["outerHTML"],
+ "",
+ $description
+ );
+
+ $span =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $span[0]
+ )
+ );
+
+ if($span){
+
+ $web["date"] = $span;
+ }
+ }
+
+ $web["description"] =
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ $description
+ ),
+ " ·."
+ );
+
+ $out["web"][] = $web;
+
+ continue;
+ }
+
+ // check for container title header
+ $container_title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "font-weight" => "bold",
+ "font-size" => "16px",
+ "color" => "#000",
+ "margin" => "0",
+ "padding" => "12px 16px 0 16px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($container_title) !== 0){
+
+ $container_title =
+ strtolower(
+ $this->fuckhtml
+ ->getTextContent(
+ $container_title[0]
+ )
+ );
+
+ if(
+ $container_title == "related searches" ||
+ $container_title == "people also search for"
+ ){
+
+ /*
+ Parse related searches
+ */
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ foreach($as as $a){
+
+ $out["related"][] =
+ $this->fuckhtml
+ ->getTextContent($a);
+ }
+ }
+
+ continue;
+ }
+
+ /*
+ Parse image carousel
+ */
+ $title_container =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "padding" => "12px 16px 12px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($title_container) !== 0){
+
+ $title_container =
+ strtolower(
+ $this->fuckhtml
+ ->getTextContent(
+ $title_container[0]
+ )
+ );
+
+ if($title_container == "imagesview all"){
+
+ /*
+ Image carousel
+ */
+ $pcitem =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "pcitem",
+ "div"
+ );
+
+ foreach($pcitem as $item){
+
+ $this->fuckhtml->load($item);
+
+ $link =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0];
+
+ parse_str(
+ parse_url(
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ ["attributes"]
+ ["href"]
+ ),
+ PHP_URL_QUERY
+ ),
+ $link
+ );
+
+ if(isset($link["tbm"])){
+
+ continue;
+ }
+
+ $image =
+ $this->fuckhtml
+ ->getElementsByTagName("img")[0];
+
+ $title =
+ $this->fuckhtml
+ ->getTextContent(
+ $image
+ ["attributes"]
+ ["alt"]
+ );
+
+ $image =
+ $this->getimage(
+ $image
+ ["attributes"]
+ ["id"]
+ );
+
+ $out["image"][] = [
+ "title" => $title,
+ "source" => [
+ [
+ "url" => $link["imgurl"],
+ "width" => (int)$link["w"],
+ "height" => (int)$link["h"]
+ ],
+ [
+ "url" => $image,
+ "width" => (int)$link["tbnw"],
+ "height" => (int)$link["tbnh"]
+ ]
+ ],
+ "url" => $link["imgrefurl"]
+ ];
+ }
+ }
+ }
+
+ /*
+ Get next page
+ */
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ foreach($as as $a){
+
+ if(
+ isset($a["attributes"]["aria-label"]) &&
+ strtolower($a["attributes"]["aria-label"]) == "next page"
+ ){
+
+ $out["npt"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ );
+ }
+ }
+ }
+
+ return $out;
}
@@ -1163,7 +1662,7 @@ class google{
return $time;
}
- private function loadjavascriptcrap($html){
+ private function parsejavascript($html){
$this->fuckhtml->load($html);
diff --git a/scraper/yandex.php b/scraper/yandex.php
index 437c8aa..8cb733e 100644
--- a/scraper/yandex.php
+++ b/scraper/yandex.php
@@ -18,8 +18,6 @@ class yandex{
$curlproc = curl_init();
- $search = $get["text"];
-
if($get !== []){
$get = http_build_query($get);
$url .= "?" . $get;
@@ -40,7 +38,7 @@ class yandex{
"Accept-Language: en-US,en;q=0.5",
"DNT: 1",
"Cookie: yp=1716337604.sp.family%3A{$nsfw}#1685406411.szm.1:1920x1080:1920x999",
- "Referer: https://yandex.com/images/search?text={$search}",
+ "Referer: https://yandex.com/images/search",
"Connection: keep-alive",
"Upgrade-Insecure-Requests: 1",
"Sec-Fetch-Dest: document",
@@ -72,6 +70,35 @@ class yandex{
switch($pagetype){
+ case "web":
+ return [
+ "lang" => [
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "en" => "English",
+ "ru" => "Russian",
+ "be" => "Belorussian",
+ "fr" => "French",
+ "de" => "German",
+ "id" => "Indonesian",
+ "kk" => "Kazakh",
+ "tt" => "Tatar",
+ "tr" => "Turkish",
+ "uk" => "Ukrainian"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ]
+ ];
+ break;
+
case "images":
return
[
@@ -149,12 +176,214 @@ class yandex{
];
break;
- default:
- return [];
+ case "videos":
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "9" => "Recently"
+ ]
+ ],
+ "duration" => [
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short"
+ ]
+ ]
+ ];
break;
}
}
-
+
+ public function web($get){
+
+ // has captcha
+ // https://yandex.com/search/touch/?text=lol&app_platform=android&appsearch_header=1&ui=webmobileapp.yandex&app_version=23070603&app_id=ru.yandex.searchplugin&search_source=yandexcom_touch_native&clid=2218567
+
+ // https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
+ // &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
+
+ if($get["npt"]){
+
+ $npt = $this->nextpage->get($get["npt"], "web");
+
+ $html =
+ $this->get(
+ "https://yandex.com" . $npt,
+ [],
+ "yes"
+ );
+ }else{
+
+ $search = $get["s"];
+ $lang = $get["lang"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+
+ $params = [
+ "text" => $search,
+ "web" => "1",
+ "frame" => "1",
+ "searchid" => "3131712"
+ ];
+
+ if($lang != "any"){
+
+ $params["lang"] = $lang;
+ }
+
+ if(
+ $newer === false &&
+ $older !== false
+ ){
+
+ $newer = 0;
+ }
+
+ if($newer !== false){
+
+ $params["from_day"] = date("j", $newer);
+ $params["from_month"] = date("n", $newer);
+ $params["from_year"] = date("Y", $newer);
+
+ if($older === false){
+
+ $older = time();
+ }
+
+ $params["to_day"] = date("j", $older);
+ $params["to_month"] = date("n", $older);
+ $params["to_year"] = date("Y", $older);
+ }
+
+ try{
+ $html =
+ $this->get(
+ "https://yandex.com/search/site/",
+ $params,
+ "yes"
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not get search page");
+ }
+
+ /*
+ $handle = fopen("scraper/yandex.html", "r");
+ $html = fread($handle, filesize("scraper/yandex.html"));
+ fclose($handle);*/
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ // get nextpage
+ $npt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-pager__next",
+ "a"
+ );
+
+ if(count($npt) !== 0){
+
+ $out["npt"] =
+ $this->nextpage->store(
+ $this->fuckhtml
+ ->getTextContent(
+ $npt
+ [0]
+ ["attributes"]
+ ["href"]
+ ),
+ "web"
+ );
+ }
+
+ // get items
+ $items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item",
+ "li"
+ );
+
+ foreach($items as $item){
+
+ $this->fuckhtml->load($item);
+
+ $link =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item__title-link",
+ "a"
+ )[0];
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "b-serp-item__text",
+ "div"
+ )[0]
+ )
+ ),
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $link
+ ["attributes"]
+ ["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
public function image($get){
if($get["npt"]){
@@ -402,7 +631,7 @@ class yandex{
$json["type"] == "captcha"
){
- throw new Exception("Yandex blocked this 4get instance. Yandex blocks don't last very long, but the block timer gets reset everytime you make another unsuccessful request. Please try again in ~7 minutes.");
+ throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes.");
}
if($json === null){
@@ -513,6 +742,359 @@ class yandex{
return $out;
}
+ public function video($get){
+
+ if($get["npt"]){
+
+ $params =
+ json_decode(
+ $this->nextpage->get(
+ $get["npt"],
+ "web"
+ ),
+ true
+ );
+
+ $nsfw = $params["nsfw"];
+ unset($params["nsfw"]);
+ }else{
+ $search = $get["s"];
+ $nsfw = $get["nsfw"];
+ $time = $get["time"];
+ $duration = $get["duration"];
+
+ // https://yandex.com/video/search
+ // ?tmpl_version=releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63
+ // &format=json
+ // &request=
+ // {
+ // "blocks":[
+ // {"block":"extra-content","params":{},"version":2},
+ // {"block":"i-global__params:ajax","params":{},"version":2},
+ // {"block":"search2:ajax","params":{},"version":2},
+ // {"block":"vital-incut","params":{},"version":2},
+ // {"block":"content_type_search","params":{},"version":2},
+ // {"block":"serp-controller","params":{},"version":2},
+ // {"block":"cookies_ajax","params":{},"version":2}
+ // ],
+ // "metadata":{
+ // "bundles":{"lb":"^G]!q<X120"},
+ // "assets":{"las":"react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"},
+ // "extraContent":{"names":["i-react-ajax-adapter"]}
+ // }
+ // }
+ // &yu=4861394161661655015
+ // &from=tabbar
+ // &reqid=1693106278500184-6825210746979814879-balancer-l7leveler-kubr-yp-sas-7-BAL-4237
+ // &suggest_reqid=486139416166165501562797413447032
+ // &text=minecraft
+
+ $params = [
+ "tmpl_version" => "releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63",
+ "format" => "json",
+ "request" => json_encode([
+ "blocks" => [
+ (object)[
+ "block" => "extra-content",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "i-global__params:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "search2:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "vital-incut",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "content_type_search",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "serp-controller",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ (object)[
+ "block" => "cookies_ajax",
+ "params" => (object)[],
+ "version" => 2
+ ]
+ ],
+ "metadata" => (object)[
+ "bundles" => (object)[
+ "lb" => "^G]!q<X120"
+ ],
+ "assets" => (object)[
+ "las" => "react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"
+ ],
+ "extraContent" => (object)[
+ "names" => [
+ "i-react-ajax-adapter"
+ ]
+ ]
+ ]
+ ]),
+ "text" => $search
+ ];
+
+ if($duration != "any"){
+
+ $params["duration"] = $duration;
+ }
+
+ if($time != "any"){
+
+ $params["within"] = $time;
+ }
+ }
+ /*
+ $handle = fopen("scraper/yandex-video.json", "r");
+ $json = fread($handle, filesize("scraper/yandex-video.json"));
+ fclose($handle);
+ */
+ try{
+ $json =
+ $this->get(
+ "https://yandex.com/video/search",
+ $params,
+ $nsfw
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch JSON");
+ }
+
+ $json = json_decode($json, true);
+
+ if($json === null){
+
+ throw new Exception("Could not parse JSON");
+ }
+
+ if(!isset($json["blocks"])){
+
+ throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ $html = null;
+ foreach($json["blocks"] as $block){
+
+ if(isset($block["html"])){
+
+ $html .= $block["html"];
+ }
+ }
+
+ $this->fuckhtml->load($html);
+
+ $div =
+ $this->fuckhtml
+ ->getElementsByTagName("div");
+
+ /*
+ Get nextpage
+ */
+ $npt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "more more_direction_next i-bem",
+ $div
+ );
+
+ if(count($npt) !== 0){
+
+ $params["p"] = "1";
+ $params["nsfw"] = $nsfw;
+ $out["npt"] =
+ $this->nextpage->store(
+ json_encode($params),
+ "web"
+ );
+ }
+
+ $items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item",
+ $div
+ );
+
+ foreach($items as $item){
+
+ $data =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $item["attributes"]["data-video"]
+ ),
+ true
+ );
+
+ $this->fuckhtml->load($item);
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "thumb-image__image",
+ "img"
+ );
+
+ if(count($thumb) === 0){
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }else{
+
+ $c = 1;
+ $thumb = [
+ "url" =>
+ str_replace(
+ "//",
+ "https://",
+ $this->fuckhtml
+ ->getTextContent(
+ $thumb
+ [0]
+ ["attributes"]
+ ["src"]
+ ),
+ $c
+ ),
+ "ratio" => "16:9"
+ ];
+ }
+
+ $smallinfos =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item__sitelinks-item",
+ "div"
+ );
+
+ $date = null;
+ $views = null;
+ $first = true;
+
+ foreach($smallinfos as $info){
+
+ if($first){
+
+ $first = false;
+ continue;
+ }
+
+ $info =
+ $this->fuckhtml
+ ->getTextContent(
+ $info
+ );
+
+ if($temp_date = strtotime($info)){
+
+ $date = $temp_date;
+ }else{
+
+ $views = $this->parseviews($info);
+ }
+ }
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item__text serp-item__text_visibleText_always",
+ "div"
+ );
+
+ if(count($description) === 0){
+
+ $description = null;
+ }else{
+
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ );
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->titledots(
+ $data["title"]
+ )
+ ),
+ "description" => $description,
+ "author" => [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => $date,
+ "duration" =>
+ (int)$data
+ ["counters"]
+ ["toHostingLoaded"]
+ ["stredParams"]
+ ["duration"],
+ "views" => $views,
+ "thumb" => $thumb,
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $data["counters"]
+ ["toHostingLoaded"]
+ ["postfix"]
+ ["href"]
+ )
+ ];
+ }
+
+ return $out;
+ }
+
+ private function parseviews($text){
+
+ $text = explode(" ", $text);
+
+ $num = (float)$text[0];
+ $mod = $text[1];
+
+ switch($mod){
+
+ case "bln.": $num = $num * 1000000000; break;
+ case "mln.": $num = $num * 1000000; break;
+ case "thsd.": $num = $num * 1000; break;
+ }
+
+ return $num;
+ }
+
private function titledots($title){
$substr = substr($title, -3);
diff --git a/settings.php b/settings.php
index c53599f..c968e57 100644
--- a/settings.php
+++ b/settings.php
@@ -70,6 +70,10 @@ $settings = [
"value" => "brave",
"text" => "Brave"
],
+ [
+ "value" => "yandex",
+ "text" => "Yandex"
+ ],
/*[
"value" => "google",
"text" => "Google"
@@ -119,16 +123,16 @@ $settings = [
"text" => "YouTube"
],
[
- "value" => "fb",
- "text" => "Facebook videos"
- ],
- [
"value" => "ddg",
"text" => "DuckDuckGo"
],
[
"value" => "brave",
"text" => "Brave"
+ ],
+ [
+ "value" => "yandex",
+ "text" => "Yandex"
]/*,
[
"value" => "google",
@@ -147,8 +151,8 @@ $settings = [
[
"value" => "brave",
"text" => "Brave"
- ],/*
- [
+ ],
+ /*[
"value" => "google",
"text" => "Google"
],*/