summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2023-07-22 14:41:14 -0400
committerlolcat <will@lolcat.ca>2023-07-22 14:41:14 -0400
commitbca265aea67ec62499aaa113a6490ce9ec7fe730 (patch)
tree3f05ec5ea542e41b474947e180034f42e99648e9 /scraper
still missing things on google scraper
Diffstat (limited to 'scraper')
-rw-r--r--scraper/brave.php2287
-rw-r--r--scraper/ddg.php2722
-rw-r--r--scraper/google.php1562
-rw-r--r--scraper/marginalia.php242
-rw-r--r--scraper/mojeek.php1182
-rw-r--r--scraper/wiby.php244
-rw-r--r--scraper/yandex.php530
-rw-r--r--scraper/youtube.php1723
8 files changed, 10492 insertions, 0 deletions
diff --git a/scraper/brave.php b/scraper/brave.php
new file mode 100644
index 0000000..4d48c33
--- /dev/null
+++ b/scraper/brave.php
@@ -0,0 +1,2287 @@
+<?php
+/*
+$brave = new brave();
+
+$handle = fopen("captcha.html", "r");
+$html = fread($handle, filesize("captcha.html"));
+fclose($handle);
+
+$brave->bypasscaptcha($html, "yes", "ca");*/
+
+class brave{
+
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("brave");
+ }
+
+ public function getfilters($page){
+
+ switch($page){
+
+ case "web":
+ return [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "all" => "All Regions",
+ "ar" => "Argentina",
+ "au" => "Australia",
+ "at" => "Austria",
+ "be" => "Belgium",
+ "br" => "Brazil",
+ "ca" => "Canada",
+ "cl" => "Chile",
+ "cn" => "China",
+ "dk" => "Denmark",
+ "fi" => "Finland",
+ "fr" => "France",
+ "de" => "Germany",
+ "hk" => "Hong Kong",
+ "in" => "India",
+ "id" => "Indonesia",
+ "it" => "Italy",
+ "jp" => "Japan",
+ "kr" => "Korea",
+ "my" => "Malaysia",
+ "mx" => "Mexico",
+ "nl" => "Netherlands",
+ "nz" => "New Zealand",
+ "no" => "Norway",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "ph" => "Philippines",
+ "ru" => "Russia",
+ "sa" => "Saudi Arabia",
+ "za" => "South Africa",
+ "es" => "Spain",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "tw" => "Taiwan",
+ "tr" => "Turkey",
+ "gb" => "United Kingdom",
+ "us" => "United States"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ]
+ ];
+ break;
+
+ case "news":
+ return [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "all" => "All regions",
+ "ar" => "Argentina",
+ "au" => "Australia",
+ "at" => "Austria",
+ "be" => "Belgium",
+ "br" => "Brazil",
+ "ca" => "Canada",
+ "cl" => "Chile",
+ "cn" => "China",
+ "dk" => "Denmark",
+ "fi" => "Finland",
+ "fr" => "France",
+ "de" => "Germany",
+ "hk" => "Hong Kong",
+ "in" => "India",
+ "id" => "Indonesia",
+ "it" => "Italy",
+ "jp" => "Japan",
+ "kr" => "Korea",
+ "my" => "Malaysia",
+ "mx" => "Mexico",
+ "nl" => "Netherlands",
+ "nz" => "New Zealand",
+ "no" => "Norway",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "ph" => "Philippines",
+ "ru" => "Russia",
+ "sa" => "Saudi Arabia",
+ "za" => "South Africa",
+ "es" => "Spain",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "tw" => "Taiwan",
+ "tr" => "Turkey",
+ "gb" => "United Kingdom",
+ "us" => "United States"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ]
+ ];
+ break;
+ }
+ }
+
+ private function get($url, $get = [], $nsfw, $country/*, $is_post = false, $additional_cookies = null*/){
+
+ switch($nsfw){
+
+ case "yes": $nsfw = "off"; break;
+ case "maybe": $nsfw = "moderate"; break;
+ case "no": $nsfw = "strict"; break;
+ }
+
+ //$cookie = "safesearch={$nsfw}; country={$country}; useLocation=0";
+ /*
+ if($additional_cookies !== null){
+
+ $cookie = $additional_cookies . "; " . $cookie;
+ }*/
+
+ $headers = [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Cookie: safesearch={$nsfw}; country={$country}; useLocation=0; summarizer=0",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"//,
+ //"Content-Type: application/json"
+ ];
+
+ if($country == "any"){
+
+ $country = "all";
+ }
+
+ $curlproc = curl_init();
+
+ /*if($is_post){
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt(
+ $curlproc,
+ CURLOPT_POSTFIELDS,
+ json_encode($get)
+ );
+
+ }else{
+ */
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+ //}
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ // get next page data
+ $q = json_decode($this->nextpage->get($get["npt"], "web"), true);
+
+ $search = $q["q"];
+ $q["spellcheck"] = 0;
+
+ $nsfw = $q["nsfw"];
+ unset($q["nsfw"]);
+
+ $country = $q["country"];
+ unset($q["country"]);
+
+ }else{
+
+ // get _GET data instead
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search query is too long!");
+ }
+
+ $nsfw = $get["nsfw"];
+ $country = $get["country"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+
+ $q = [
+ "q" => $search
+ ];
+
+ /*
+ Pass older/newer filters to brave
+ */
+ if($newer !== false){
+
+ $newer = date("Y-m-d", $newer);
+
+ if($older === false){
+
+ $older = date("Y-m-d", time());
+ }
+ }
+
+ if(
+ is_string($older) === false &&
+ $older !== false
+ ){
+
+ $older = date("Y-m-d", $older);
+
+ if($newer === false){
+
+ $newer = "1970-01-02";
+ }
+ }
+
+ if($older !== false){
+
+ $q["tf"] = "{$newer}to{$older}";
+ }
+ }
+ /*
+ $handle = fopen("scraper/brave.html", "r");
+ $html = fread($handle, filesize("scraper/brave.html"));
+ fclose($handle);
+ */
+ try{
+ $html =
+ $this->get(
+ "https://search.brave.com/search",
+ $q,
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ // load html
+ $this->fuckhtml->load($html);
+
+ /*
+ Get next page "token"
+ */
+ $nextpage =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "btn ml-15",
+ "a"
+ );
+
+ if(count($nextpage) !== 0){
+
+ preg_match(
+ '/offset=([0-9]+)/',
+ $this->fuckhtml->getTextContent($nextpage[0]["attributes"]["href"]),
+ $nextpage
+ );
+
+ $q["offset"] = (int)$nextpage[1];
+ $q["nsfw"] = $nsfw;
+ $q["country"] = $country;
+
+ $out["npt"] =
+ $this->nextpage->store(
+ json_encode($q),
+ "web"
+ );
+ }
+
+ /*
+ Get discussions (and append them to web results)
+ */
+
+ // they're loaded using javascript!!
+ $discussion =
+ $this->fuckhtml
+ ->getElementById(
+ "js-discussions",
+ "script"
+ );
+
+ if(
+ $discussion &&
+ isset($discussion["attributes"]["data"])
+ ){
+
+ $discussion =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $discussion["attributes"]["data"]
+ ),
+ true
+ );
+
+ foreach($discussion["results"] as $result){
+
+ $data = [
+ "title" => $this->titledots($result["title"]),
+ "description" => null,
+ "url" => $result["url"],
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+
+ // description
+ $data["description"] =
+ $this->limitstrlen(
+ $this->limitwhitespace(
+ $this->titledots(
+ $this->fuckhtml->getTextContent(
+ $result["description"]
+ )
+ )
+ )
+ );
+
+ if($result["age"] != ""){
+ $data["date"] = strtotime($result["age"]);
+ }
+
+ // populate table
+
+ if($result["data"]["num_answers"] != ""){
+ $data["table"]["Replies"] = (int)$result["data"]["num_answers"];
+ }
+
+ if($result["data"]["score"] != ""){
+
+ $score = explode("|", $result["data"]["score"]);
+
+ if(count($score) === 2){
+
+ $score = ((int)$score[1]) . " (" . trim($score[0]) . ")";
+ }else{
+
+ $score = (int)$score[0];
+ }
+
+ $data["table"]["Votes"] = $score;
+ }
+
+ if($result["thumbnail"] != ""){
+
+ $data["thumb"]["url"] = $result["thumbnail"];
+ $data["thumb"]["ratio"] = "16:9";
+ }
+
+ $out["web"][] = $data;
+ }
+ }
+
+ /*
+ Get related searches
+ */
+ $faq =
+ $this->fuckhtml
+ ->getElementById("js-faq", "script");
+
+ if(
+ $faq &&
+ isset($faq["attributes"]["data"])
+ ){
+
+ $faq =
+ json_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $faq["attributes"]["data"]
+ ),
+ true
+ );
+
+ foreach($faq["items"] as $related){
+
+ $out["related"][] = $related["question"];
+ }
+ }
+
+ /*
+ Get spelling autocorrect
+ */
+ $altered =
+ $this->fuckhtml
+ ->getElementById("altered-query", "div");
+
+ if($altered){
+
+ $this->fuckhtml->load($altered);
+
+ $altered =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ if(count($altered) === 2){
+
+ $out["spelling"] = [
+ "type" => "including",
+ "using" =>
+ $this->fuckhtml
+ ->getTextContent($altered[0]),
+ "correction" =>
+ $this->fuckhtml
+ ->getTextContent($altered[1])
+ ];
+ }
+
+ $this->fuckhtml->load($html);
+ }
+
+ /*
+ Get web results
+ */
+ $resulthtml =
+ $this->fuckhtml
+ ->getElementById(
+ "results",
+ "div"
+ );
+
+ $this->fuckhtml->load($resulthtml);
+ $items = 0;
+ foreach(
+ $this->fuckhtml
+ ->getElementsByClassName("snippet fdb")
+ as $result
+ ){
+
+ $data = [
+ "title" => null,
+ "description" => null,
+ "url" => null,
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+
+ if(
+ isset($result["attributes"]["data-type"]) &&
+ $result["attributes"]["data-type"] == "ad"
+ ){
+
+ // is an ad, skip
+ continue;
+ }
+
+ $this->fuckhtml->load($result);
+
+ /*
+ Get title
+ */
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "snippet-title",
+ "span"
+ );
+
+ if(count($title) === 0){
+
+ // encountered AI summarizer
+ // or misspelling indicator @TODO
+ continue;
+ }
+
+ if(isset($title[0]["attributes"]["title"])){
+
+ $data["title"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]["attributes"]["title"]
+ )
+ );
+ }else{
+
+ $data["title"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ );
+ }
+
+ /*
+ Get description
+ */
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "snippet-description",
+ "p"
+ );
+
+ if(count($description) !== 0){
+ $data["description"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ );
+
+ // also check for thumbnail in here
+ $img =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "thumb",
+ "img"
+ );
+
+ if(count($img) !== 0){
+
+ $data["thumb"] = [
+ "url" => $this->unshiturl($img[0]["attributes"]["src"]),
+ "ratio" => "16:9"
+ ];
+ }else{
+
+ // might be a video thumbnail wrapper?
+ $wrapper =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "video-thumb",
+ "a"
+ );
+
+ if(count($wrapper) !== 0){
+
+ // we found a video
+ $this->fuckhtml->load($wrapper[0]);
+
+ $img =
+ $this->fuckhtml
+ ->getElementsByTagName("img");
+
+ $data["thumb"] = [
+ "url" => $this->unshiturl($img[0]["attributes"]["src"]),
+ "ratio" => "16:9"
+ ];
+
+ // get the video length, if its there
+ $duration =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "duration",
+ "div"
+ );
+
+ if(count($duration) !== 0){
+
+ $data["table"]["Duration"] = $duration[0]["innerHTML"];
+ }
+
+ // reset html load
+ $this->fuckhtml->load($result);
+ }
+ }
+
+ }else{
+
+ // is a steam/shop listing
+ $description_alt =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "text-sm",
+ "div"
+ );
+
+ if(count($description_alt) !== 0){
+
+ switch($description_alt[0]["attributes"]["class"]){
+
+ case "text-sm text-gray":
+ case "description text-sm":
+
+ $data["description"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $description_alt[0]
+ )
+ );
+ break;
+ }
+
+ // get table sublink
+ $sublink =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "r-attr text-sm",
+ "div"
+ );
+
+ if(count($sublink) !== 0){
+
+ $this->tablesublink($sublink, $data);
+ }
+
+ // check for thumb element
+ $data["thumb"] = $this->getimagelinkfromstyle("thumb");
+ }else{
+
+ // ok... finally...
+ // maybe its the instant answer thingy
+ $answer =
+ $this->fuckhtml
+ ->getElementsByClassName("answer");
+
+ if(count($answer) !== 0){
+
+ $data["description"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent($answer[0])
+ );
+ }
+ }
+ }
+
+ // finally, fix brave's date format sucking balls
+ $data["description"] = explode(" - ", $data["description"], 2);
+
+ if(count($data["description"]) === 0){
+
+ // nothing to do
+ $data["description"] = $data["description"][0];
+ }else{
+
+ // attempt to parse
+ $time = strtotime($data["description"][0]);
+
+ if($time !== false){
+
+ // got response
+ $data["date"] = $time;
+
+ array_shift($data["description"]);
+ }
+
+ // merge back
+ $data["description"] =
+ implode(" - ", $data["description"]);
+ }
+
+ /*
+ Check content type
+ */
+ $content_type =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "content-type",
+ "span"
+ );
+
+ if(count($content_type) !== 0){
+
+ $data["type"] =
+ strtolower($this->fuckhtml->getTextContent($content_type[0]));
+ }
+
+ /*
+ Check subtext table thingy
+ */
+ $table_items =
+ array_merge(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "item-attributes",
+ "div"
+ ),
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "r",
+ "div"
+ )
+ );
+
+ /*
+ DIV: item-attributes
+ */
+ if(count($table_items) !== 0){
+
+ foreach($table_items as $table){
+
+ $this->fuckhtml->load($table);
+
+ $span =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "text-sm",
+ "*"
+ );
+
+ foreach($span as $item){
+
+ $item =
+ explode(
+ ":",
+ $this->fuckhtml->getTextContent(preg_replace('/\n/', " ", $item["innerHTML"])),
+ 2
+ );
+
+ if(count($item) === 2){
+
+ $data["table"][trim($item[0])] = trim($this->limitwhitespace($item[1]));
+ }
+ }
+ }
+
+ $this->fuckhtml->load($result);
+ }
+
+ // get video sublinks
+ $table_items =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "snippet-description published-time",
+ "p"
+ );
+
+ if(count($table_items) !== 0){
+
+ $table_items =
+ explode(
+ '<span class="mr-15"></span>',
+ $table_items[0]["innerHTML"],
+ 2
+ );
+ if(count($table_items) === 2){
+
+ $item2 = [];
+
+ $item2[] = explode(":", $this->fuckhtml->getTextContent($table_items[0]));
+
+ if(trim($table_items[1]) != ""){
+ $item2[] = explode(":", $this->fuckhtml->getTextContent($table_items[1]));
+ }
+
+ foreach($item2 as $it){
+
+ $data["table"][trim($it[0])] = trim($it[1]);
+ }
+ }
+ }
+
+ /*
+ Get URL
+ */
+ $data["url"] =
+ $this->fuckhtml->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName("a")
+ [0]
+ ["attributes"]
+ ["href"]
+ );
+
+ /*
+ Get sublinks
+ */
+ $sublinks_elems =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "snippet",
+ "div"
+ );
+
+ $sublinks = [];
+
+ foreach($sublinks_elems as $sublink){
+
+ $this->fuckhtml->load($sublink);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName("a")[0];
+
+ $title =
+ $this->fuckhtml
+ ->getTextContent($a);
+
+ $url = $a["attributes"]["href"];
+
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName("p")[0]
+ )
+ );
+
+ $sublinks[] = [
+ "title" => $title,
+ "date" => null,
+ "description" => $description,
+ "url" => $url
+ ];
+ }
+
+ /*
+ Get smaller sublinks
+ */
+ $sublinks_elems =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "deep-link",
+ "a"
+ );
+
+ foreach($sublinks_elems as $sublink){
+
+ $sublinks[] = [
+ "title" => $this->fuckhtml->getTextContent($sublink),
+ "date" => null,
+ "description" => null,
+ "url" => $sublink["attributes"]["href"]
+ ];
+ }
+
+ // append sublinks to $data !!
+ $data["sublink"] = $sublinks;
+
+ // append first result to start of $out["web"]
+ // other results are after
+ if($items === 0){
+
+ $out["web"] = [$data, ...$out["web"]];
+ }else{
+
+ $out["web"][] = $data;
+ }
+ $items++;
+ }
+
+ /*
+ Get news
+ */
+ $this->fuckhtml->load($resulthtml);
+ $news_carousel = $this->fuckhtml->getElementById("news-carousel");
+
+ $this->fuckhtml->load($news_carousel);
+
+ if($news_carousel){
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "card fdb",
+ "a"
+ );
+
+ foreach($a as $news){
+
+ $this->fuckhtml->load($news);
+
+ $out["news"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "title",
+ "div"
+ )[0]
+ )
+ ),
+ "description" => null,
+ "date" =>
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "card-footer__timestamp",
+ "span"
+ )[0]
+ )
+ ),
+ "thumb" => $this->getimagelinkfromstyle("img-bg"),
+ "url" => $this->fuckhtml->getTextContent($news["attributes"]["href"])
+ ];
+ }
+ }
+
+
+
+ /*
+ Get videos
+ */
+ $this->fuckhtml->load($resulthtml);
+ $news_carousel = $this->fuckhtml->getElementById("video-carousel");
+
+ $this->fuckhtml->load($news_carousel);
+
+ if($news_carousel){
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "card fdb",
+ "a"
+ );
+
+ foreach($a as $video){
+
+ $this->fuckhtml->load($video);
+
+ $date = null;
+
+ $date_o =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "text-gray text-xs",
+ "span"
+ );
+
+ if(count($date_o) !== 0){
+
+ $date =
+ strtotime(
+ $this->fuckhtml
+ ->getTextContent(
+ $date_o[0]
+ )
+ );
+ }
+
+ $out["video"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "title",
+ "div"
+ )[0]
+ )
+ ),
+ "description" => null,
+ "date" => $date,
+ "duration" => null,
+ "views" => null,
+ "thumb" => $this->getimagelinkfromstyle("img-bg"),
+ "url" => $this->fuckhtml->getTextContent($video["attributes"]["href"])
+ ];
+ }
+ }
+
+
+ /*
+ Get DEFINITION snippet
+ */
+ $this->fuckhtml->load($html);
+ $infobox = $this->fuckhtml->getElementById("rh-definitions", "div");
+
+ if($infobox !== false){
+
+ $answer = [
+ "title" => null,
+ "description" => [],
+ "url" => null,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+
+ $this->fuckhtml->load($infobox);
+
+ $answer["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "header",
+ "h5"
+ )[0]
+ );
+
+ $sections =
+ $this->fuckhtml
+ ->getElementsByTagName("section");
+
+ $i = -1;
+ foreach($sections as $section){
+
+ $this->fuckhtml->load($section);
+ $items =
+ $this->fuckhtml
+ ->getElementsByTagName("*");
+
+ $li = 1;
+ $pronounce = false;
+ foreach($items as $item){
+
+ switch($item["tagName"]){
+
+ case "h6":
+
+ if(
+ isset($item["attributes"]["class"]) &&
+ $item["attributes"]["class"] == "h6 pronunciation"
+ ){
+
+ if($pronounce){
+
+ break;
+ }
+
+ $answer["description"][] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $item
+ )
+ ];
+
+ $answer["description"][] =
+ [
+ "type" => "audio",
+ "url" => "https://search.brave.com/api/rhfetch?rhtype=definitions&word={$answer["title"]}&source=ahd-5"
+ ];
+
+ $pronounce = true;
+ $i = $i + 2;
+ break;
+ }
+
+ $answer["description"][] = [
+ "type" => "title",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $item
+ )
+ ];
+ $i++;
+ break;
+
+ case "li":
+
+ if(
+ $i !== -1 &&
+ $answer["description"][$i]["type"] == "text"
+ ){
+
+ $answer["description"][$i]["value"] .=
+ "\n" . $li . ". " .
+ $this->fuckhtml
+ ->getTextContent(
+ $item
+ );
+
+ }else{
+ $answer["description"][] = [
+ "type" => "text",
+ "value" =>
+ $li . ". " .
+ $this->fuckhtml
+ ->getTextContent(
+ $item
+ )
+ ];
+ $i++;
+ }
+ $li++;
+ break;
+
+ case "a":
+ $answer["url"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $item["attributes"]["href"]
+ );
+ break;
+ }
+ }
+ }
+
+ $out["answer"][] = $answer;
+ }
+
+
+ /*
+ Get instant answer
+ */
+ $this->fuckhtml->load($html);
+ $infobox = $this->fuckhtml->getElementById("infobox", "div");
+
+ if($infobox !== false){
+
+ $answer = [
+ "title" => null,
+ "description" => [],
+ "url" => null,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+
+ $this->fuckhtml->load($infobox);
+ $div = $this->fuckhtml->getElementsByTagName("div");
+
+ /*
+ Get title + url
+ */
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName("infobox-title", "a");
+
+ if(count($title) !== 0){
+
+ $answer["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ );
+
+ $answer["url"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]["attributes"]["href"]
+ );
+ }
+
+ /*
+ Get thumbnail
+ */
+ $thumb = $this->getimagelinkfromstyle("thumb");
+
+ if($thumb["url"] !== null){
+
+ $answer["thumb"] = $thumb["url"];
+ }
+
+ /*
+ Get table
+ */
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "infobox-attr-header",
+ "div"
+ );
+
+ $rowhtml = $infobox;
+
+ if(count($title) >= 2){
+
+ $rowhtml =
+ explode(
+ $title[1]["outerHTML"],
+ $infobox["innerHTML"],
+ 2
+ )[0];
+ }
+
+ $this->fuckhtml->load($rowhtml);
+
+ $rows =
+ $this->fuckhtml
+ ->getElementsByClassName("infobox-attr", "div");
+
+ foreach($rows as $row){
+
+ if(!isset($row["innerHTML"])){
+
+ continue;
+ }
+
+ $this->fuckhtml->load($row);
+ $span =
+ $this->fuckhtml
+ ->getElementsByTagName("span");
+
+ if(count($span) === 2){
+
+ $answer["table"][
+ $this->fuckhtml->getTextContent($span[0])
+ ] = str_replace("\n", ", ", $this->fuckhtml->getTextContent($span[1], true));
+ }
+ }
+
+ $this->fuckhtml->load($infobox);
+
+ /*
+ Parse stackoverflow answers
+ */
+ $code =
+ $this->fuckhtml
+ ->getElementById("codebox-answer", $div);
+
+ if($code){
+
+ // this might be standalone text with no paragraphs, check for that
+ $author =
+ $this->fuckhtml
+ ->getElementById("author");
+
+ $desc_tmp =
+ str_replace(
+ $author["outerHTML"],
+ "",
+ $code["innerHTML"]
+ );
+
+ $this->fuckhtml->load($desc_tmp);
+ $code =
+ $this->fuckhtml
+ ->getElementsByTagName("*");
+
+ if(count($code) === 0){
+
+ $answer["description"] =
+ [
+ [
+ "type" => "text",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $desc_tmp
+ )
+ ],
+ [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $author
+ )
+ ]
+ ];
+ }else{
+
+ $text = [];
+ $i = 0;
+
+ foreach($code as $snippet){
+
+ switch($snippet["tagName"]){
+
+ case "p":
+ $this->fuckhtml->load($snippet["innerHTML"]);
+
+ $codetags =
+ $this->fuckhtml
+ ->getElementsByTagName("*");
+
+ $tmphtml = $snippet["innerHTML"];
+
+ foreach($codetags as $tag){
+
+ if(!isset($tag["outerHTML"])){
+
+ continue;
+ }
+
+ $tmphtml =
+ explode(
+ $tag["outerHTML"],
+ $tmphtml,
+ 2
+ );
+
+ $value = $this->fuckhtml->getTextContent($tmphtml[0], false, false);
+ $this->appendtext($value, $text, $i);
+
+ $type = null;
+ switch($tag["tagName"]){
+
+ case "code": $type = "inline_code"; break;
+ case "em": $type = "italic"; break;
+ case "blockquote": $type = "quote"; break;
+ default: $type = "text";
+ }
+
+ if($type !== null){
+ $value = $this->fuckhtml->getTextContent($tag, false, true);
+
+ if(trim($value) != ""){
+
+ if(
+ $i !== 0 &&
+ $type == "title"
+ ){
+
+ $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]);
+ }
+
+ $text[] = [
+ "type" => $type,
+ "value" => $value
+ ];
+ $i++;
+ }
+ }
+
+ if(count($tmphtml) === 2){
+
+ $tmphtml = $tmphtml[1];
+ }else{
+
+ break;
+ }
+ }
+
+ if(is_array($tmphtml)){
+
+ $tmphtml = $tmphtml[0];
+ }
+
+ if(strlen($tmphtml) !== 0){
+
+ $value = $this->fuckhtml->getTextContent($tmphtml, false, false);
+ $this->appendtext($value, $text, $i);
+ }
+ break;
+
+ case "pre":
+
+ switch($text[$i - 1]["type"]){
+
+ case "text":
+ case "italic":
+ $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]);
+ break;
+ }
+
+ $text[] =
+ [
+ "type" => "code",
+ "value" =>
+ rtrim(
+ $this->fuckhtml
+ ->getTextContent(
+ $snippet,
+ true,
+ false
+ )
+ )
+ ];
+ $i++;
+
+ break;
+
+ case "ol":
+ $o = 0;
+
+ $this->fuckhtml->load($snippet);
+ $li =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ foreach($li as $elem){
+ $o++;
+
+ $this->appendtext(
+ $o . ". " .
+ $this->fuckhtml
+ ->getTextContent(
+ $elem
+ ),
+ $text,
+ $i
+ );
+ }
+ break;
+ }
+ }
+
+ if(
+ $i !== 0 &&
+ $text[$i - 1]["type"] == "text"
+ ){
+
+ $text[$i - 1]["value"] = rtrim($text[$i - 1]["value"]);
+ }
+
+ if($author){
+
+ $text[] = [
+ "type" => "quote",
+ "value" => $this->fuckhtml->getTextContent($author)
+ ];
+ }
+
+ $answer["description"] = $text;
+ }
+ }else{
+
+ /*
+ Get normal description
+ */
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "mb-6",
+ "div"
+ );
+
+ if(count($description) !== 0){
+
+ $description =
+ [
+ [
+ "type" => "text",
+ "value" =>
+ $this->titledots(
+ preg_replace(
+ '/ Wikipedia$/',
+ "",
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ )
+ )
+ ]
+ ];
+
+ $ratings =
+ $this->fuckhtml
+ ->getElementById("ratings");
+
+ if($ratings){
+
+ $this->fuckhtml->load($ratings);
+
+ $ratings =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "flex-hcenter mb-10",
+ "div"
+ );
+
+ $description[] = [
+ "type" => "title",
+ "value" => "Ratings"
+ ];
+
+ foreach($ratings as $rating){
+
+ $this->fuckhtml->load($rating);
+
+ $num =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "r-num",
+ "div"
+ )[0]
+ );
+
+ $href =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "mr-10",
+ "a"
+ )[0];
+
+ $votes =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "text-sm",
+ "span"
+ )[0]
+ );
+
+ $c = count($description) - 1;
+
+ if(
+ $c !== -1 &&
+ $description[$c]["type"] == "text"
+ ){
+
+ $description[$c]["value"] .= $num . " ";
+ }else{
+
+ $description[] = [
+ "type" => "text",
+ "value" => $num . " "
+ ];
+ }
+
+ $description[] = [
+ "type" => "link",
+ "value" => $this->fuckhtml->getTextContent($href),
+ "url" => $this->fuckhtml->getTextContent($href["attributes"]["href"])
+ ];
+
+ $description[] = [
+ "type" => "text",
+ "value" => " (" . $votes . ")\n"
+ ];
+ }
+ }
+
+ $answer["description"] = $description;
+ }
+ }
+
+ /*
+ Get sublinks
+ */
+ $this->fuckhtml->load($infobox);
+
+ $profiles =
+ $this->fuckhtml
+ ->getElementById("profiles");
+
+ if($profiles){
+ $profiles =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "chip",
+ "a"
+ );
+
+ foreach($profiles as $profile){
+
+ $name = $this->fuckhtml->getTextContent($profile["attributes"]["title"]);
+
+ if(strtolower($name) == "steampowered"){
+
+ $name = "Steam";
+ }
+
+ $answer["sublink"][$name] =
+ $this->fuckhtml->getTextContent($profile["attributes"]["href"]);
+ }
+ }
+
+ $actors =
+ $this->fuckhtml
+ ->getElementById("panel-movie-cast");
+
+ if($actors){
+
+ $this->fuckhtml->load($actors);
+
+ $actors =
+ $this->fuckhtml
+ ->getElementsByClassName("card");
+
+ $answer["description"][] = [
+ "type" => "title",
+ "value" => "Cast"
+ ];
+
+ foreach($actors as $actor){
+
+ $this->fuckhtml->load($actor);
+
+ $answer["description"][] = [
+ "type" => "text",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName("card-body")
+ [0]
+ )
+ ];
+
+ $answer["description"][] = [
+ "type" => "image",
+ "url" => $this->getimagelinkfromstyle("person-thumb")["url"]
+ ];
+ }
+ }
+
+ $out["answer"][] = $answer;
+ }
+
+ /*
+ Get actor standalone thingy
+ */
+ $this->fuckhtml->load($resulthtml);
+ $actors =
+ $this->fuckhtml
+ ->getElementById("predicate-entity");
+
+ if($actors){
+
+ $this->fuckhtml->load($actors);
+
+ $cards =
+ $this->fuckhtml
+ ->getElementsByClassName("card");
+
+ $url =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "disclaimer",
+ "div"
+ )[0];
+
+ $this->fuckhtml->load($url);
+
+ $url =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName("a")
+ [0]
+ ["attributes"]
+ ["href"]
+ );
+
+ $this->fuckhtml->load($actors);
+
+ $answer = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "entity",
+ "span"
+ )[0]
+ ) . " (Cast)",
+ "description" => [],
+ "url" => $url,
+ "sublink" => [],
+ "thumb" => null,
+ "table" => []
+ ];
+
+ foreach($cards as $card){
+
+ $this->fuckhtml->load($card);
+
+ $answer["description"][] = [
+ "type" => "title",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "title"
+ )[0]
+ )
+ ];
+
+ $answer["description"][] = [
+ "type" => "text",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "text-xs desc"
+ )[0]
+ )
+ ];
+
+ $answer["description"][] = [
+ "type" => "image",
+ "url" => $this->getimagelinkfromstyle("img-bg")["url"]
+ ];
+ }
+
+ $out["answer"][] = $answer;
+ }
+
+ return $out;
+ }
+
+ public function news($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $nsfw = $get["nsfw"];
+ $country = $get["country"];
+
+ if(strlen($search) > 2048){
+
+ throw new Exception("Search query is too long!");
+ }
+ /*
+ $handle = fopen("scraper/brave-news.html", "r");
+ $html = fread($handle, filesize("scraper/brave-news.html"));
+ fclose($handle);*/
+ try{
+ $html =
+ $this->get(
+ "https://search.brave.com/news",
+ [
+ "q" => $search
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch search page");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ // load html
+ $this->fuckhtml->load($html);
+
+ $news =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "snippet inline gap-standard",
+ "div"
+ );
+
+ foreach($news as $article){
+
+ $data = [
+ "title" => null,
+ "author" => null,
+ "description" => null,
+ "date" => null,
+ "thumb" =>
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" => null
+ ];
+
+ $this->fuckhtml->load($article);
+ $elems =
+ $this->fuckhtml
+ ->getElementsByTagName("*");
+
+ // get title
+ $data["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "snippet-title",
+ $elems
+ )
+ [0]
+ ["innerHTML"]
+ );
+
+ // get description
+ $data["description"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "snippet-description",
+ $elems
+ )
+ [0]
+ ["innerHTML"]
+ )
+ );
+
+ // get date
+ $date =
+ explode(
+ "•",
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "snippet-url",
+ $elems
+ )[0]
+ )
+ );
+
+ if(
+ count($date) !== 1 &&
+ trim($date[1]) != ""
+ ){
+
+ $data["date"] =
+ strtotime(
+ $date[1]
+ );
+ }
+
+ // get URL
+ $data["url"] =
+ $this->fuckhtml->getTextContent(
+ $this->unshiturl(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "result-header",
+ $elems
+ )
+ [0]
+ ["attributes"]
+ ["href"]
+ )
+ );
+
+ // get thumbnail
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "img"
+ );
+
+ if(
+ count($thumb) === 2 &&
+ trim(
+ $thumb[1]
+ ["attributes"]
+ ["src"]
+ ) != ""
+ ){
+
+ $data["thumb"] = [
+ "url" =>
+ $this->fuckhtml->getTextContent(
+ $this->unshiturl(
+ $thumb[1]
+ ["attributes"]
+ ["src"]
+ )
+ ),
+ "ratio" => "16:9"
+ ];
+ }
+
+ $out["news"][] = $data;
+ }
+
+ return $out;
+ }
+
+ /*
+ public function bypasscaptcha($html, $nsfw, $country){
+
+ // @TODO figure out why I still cant go trough
+ // the captcha wall even after breaking it
+
+ try{
+ $html =
+ $this->get(
+ "https://search.brave.com/goggles",
+ [
+ "q" => "site:dailymotion.com my bloody valentine"
+ ],
+ $nsfw,
+ $country
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch html");
+ }
+
+ // Bypass brave search captcha
+ // this captcha only appears on the goggles page
+ preg_match(
+ '/this\.img\.src = &#34;(.*)&#34;/',
+ $html,
+ $image
+ );
+
+ $image =
+ base64_decode(
+ explode(
+ "data:image/png;base64,",
+ $image[1]
+ )[1]
+ );
+
+ $im = new Imagick();
+ $im->readImageBlob($image);
+
+ $im->blurImage(20, 20);
+ $im->posterizeImage(2, imagick::IMGTYPE_COLORSEPARATION);
+
+ // if we encounter a white line thats longer than 45px
+ // we found the circle position
+ $iterator = $im->getPixelRegionIterator(0, 77, 310, 1);
+
+ $found = null;
+ foreach(
+ $iterator as $row
+ ){
+
+ $whitecount = 0;
+ $count = 0;
+
+ foreach($row as $pixel){
+
+ if($pixel->getColor()["r"] === 255){
+
+ $whitecount++;
+ $pixel->setColor("rgba(255,0,0,0)");
+
+ if($whitecount === 45){
+
+ $found = $count - 45;
+ break 2;
+ }
+ }else{
+
+ $whitecount = 0;
+ }
+
+ $count++;
+ $iterator->syncIterator();
+ }
+ }
+
+ $found = $found + 10;
+
+ //header("Content-Type: image/png");
+ //echo $im;
+ //die();
+
+ if($found === null){
+
+ throw new Exception("Could not bypass captcha");
+ }
+
+ preg_match(
+ '/data="{&#34;captcha_id&#34;:&#34;([0-9A-z-]+)&#34;}"/',
+ $html,
+ $key
+ );
+
+ $key = $key[1];
+ // we bypassed captcha, send POST data
+ $order =
+ $this->get(
+ "https://search.brave.com/api/captcha?brave=0&captcha_id={$key}",
+ [
+ "solution" => (string)$found
+ ],
+ $nsfw,
+ $country,
+ true
+ );
+
+ $order = json_decode($order, true)["orderId"];
+
+ $orderpayload =
+ $this->get(
+ "https://search.brave.com/api/rewards/v1/orders/{$order}",
+ [],
+ $nsfw,
+ $country
+ );
+
+ $orderpayload = json_decode($orderpayload, true);
+
+ $creds =
+ $this->get(
+ "https://search.brave.com/api/rewards/v1/orders/{$order}/credentials",
+ [
+ "itemId" => $orderpayload["items"][0]["id"],
+ "blindedCreds" => [
+ "fuYAVcB/m7BU66vf3wkNGxJCSaRhshB9o+8km3F1h2c=",
+ "uswvcWJuPK/1qFlVdzBP3eQd0+V1EQgfAtnEoMIK+Uk=",
+ "fJWKGLBxl3Gyn4n9FjTLq1PjupfABT7Ni8MeB+iGzUs=",
+ "Aq9enJ/VZP9GxQIza3n65ZK7xQhY4VwDxv53BCb/Txg=",
+ "FMJA9eSLHq71K+Pcwgm4gIQOmdR/6KMy5cMgXhpd5Ro=",
+ "2NVhIAbvI317SP9/xXbVe/U57eWgvHyqVbHL/5+Gdmw=",
+ "6mpjsjSCmYEzK2xlbL8DI2P4LuhWUOxjTLvsTAL9l24=",
+ "kAn4wuHvIlKWhfuFfPTSfD4tZ5le9t7/61YbdEc/L3k=",
+ "BjjUyG16aTfd1c0h4oBzgQQOekrH1f+a5CmcXqMPTR4=",
+ "SBNgpCt4/V44yaQTfh+D027Yv1GJFHkjUEpPw6rAwRI=",
+ "XDENAtdQ7PyYx+Qx1wQGQtDWgg8WpIMgWGmd4RDOVWE=",
+ "tF7rB4sqamsiUk3K7fojdQSI0Q6iip72yKyhnvg/bC0=",
+ "VsAqflirAd/u4VsLdfRS2UvnH24ZNkFh6YN3DctLjzQ=",
+ "MntLbXkoI0LdcisCbNazmooiHXJyX91L1KERDAu1JRU=",
+ "TH6Zs8JBvFDbTDWgKbfGE4M5/cSwCtHD8ms5Y/U8zHQ=",
+ "jsZg0Z+qDPHymrbhdnesodhLNJ26QdunyMko1aVe4So=",
+ "rpKsyj6/vdnuMgLI2BApeijtGq9g5USRDL0w6X2bnlQ=",
+ "vCzliGT8A9vcLXj2sFf2kavOuYw69d70NpfgA22B4lI=",
+ "7OWoxSCtYXWcaBSifF7AXNBif/sjcuO0IelzXG/3PFk=",
+ "iiXtByNlT6nDMN9De5B58Jl8J0p6LCjnZ9aS3w2FEQU=",
+ "zDhd7gsJ4h4JkDeGK0Y0mfFd8IBdkLhMOANzwO+4Dig=",
+ "qANZ+AikwFReEA61JF009d/c3IHM/aSfIYwljckhJWE=",
+ "nNC30pDLxtXvUr+WDwfDSrAInNBpfSZkPsV2JlpheWI=",
+ "kGXE1pkt25P71kdJzmKIg4+yMR1VA5wNmbpBb/FhJQ8=",
+ "aLqPsY1Qiz2UCa2Jx3YNNt8r4JINMphks/43EiyZfXU=",
+ "bHGYZoQARZEM5LdFF6B74PkRqNd9EKxzuTvGYxjq+hk=",
+ "JOsYQjfE/9Y1u29hR+GvEkNyxUI8blgLhX1iJI/aGRQ=",
+ "yKjHjH5j600TJD/3WPsA1N3OmItDLifdjlysq4H6NV0=",
+ "9lTnUbsPp7BJ7XVN5/T4yGfzD9DJdqWB7xk72s19MAA=",
+ "5KHG8iY45em7zDhO/HlI0ydcZ0Ubn+XSyjifMmy7qXM="
+ ]
+ ],
+ $nsfw,
+ $country,
+ true
+ );
+
+ var_dump($creds);
+
+ sleep(2);
+ $test =
+ $this->get(
+ "https://search.brave.com/api/rewards/v1/orders/{$order}/credentials",
+ [],
+ $nsfw,
+ $country
+ );
+
+ var_dump($test);
+
+ $html =
+ $this->get(
+ "https://search.brave.com/goggles",
+ [
+ "q" => "site:dailymotion.com my bloody valentine"
+ ],
+ $nsfw,
+ $country,
+ false,
+ "__Secure-sku#brave-search-captcha=eyJ0eXBlIjoic2luZ2xlLXVzZSIsInZlcnNpb24iOjEsInNrdSI6ImJyYXZlLXNlYXJjaC1jYXB0Y2hhIiwicHJlc2VudGF0aW9uIjoiZXlKcGMzTjFaWElpT2lKaWNtRjJaUzVqYjIwL2MydDFQV0p5WVhabExYTmxZWEpqYUMxallYQjBZMmhoSWl3aWMybG5ibUYwZFhKbElqb2lNRzl0VDBneWQxZ3dTazkzU0VFMVJ6QTJaR1V5WjFOQ1dDdGhSM3B2Y2xsTVQwVTJZVVJtTUc5a1IweG1Wa3RhZEd0cU4xbHdia3BPT0VOVGNGbE5lVWR2YmpGRlNTOUhhMlZYU1RWNGQxTjJPWGxJTTNjOVBTSXNJblFpT2lKWlJWWldaVzR5TTJwQ01tSnZkakJ2U1hGNGJtSndUMGxEUW5Kd1drRjBRbWQxVnpoRlNURTNVREY2UVRaQlpUTXJSVGRFYm5NeVFqUmhka0pGYTFWM2FGY3JWRVZJVjNWcE9TdFllRU1yYlVSTVkyMTBRVDA5SW4wPSJ9"
+ );
+
+ var_dump($html);
+ }*/
+
+ private function appendtext($payload, &$text, &$index){
+
+ if(trim($payload) == ""){
+
+ return;
+ }
+
+ if(
+ $index !== 0 &&
+ $text[$index - 1]["type"] == "text"
+ ){
+
+ $text[$index - 1]["value"] .= "\n\n" . preg_replace('/ $/', " ", $payload);
+ }else{
+
+ $text[] = [
+ "type" => "text",
+ "value" => preg_replace('/ $/', " ", $payload)
+ ];
+ $index++;
+ }
+ }
+
+ private function tablesublink($html_collection, &$data){
+
+ foreach($html_collection as $html){
+
+ $html["innerHTML"] = preg_replace(
+ '/<style>[\S\s]*<\/style>/i',
+ "",
+ $html["innerHTML"]
+ );
+
+ $html =
+ explode(
+ ":",
+ $this->fuckhtml->getTextContent($html),
+ 2
+ );
+
+ if(count($html) === 1){
+
+ $html = ["Rating", $html[0]];
+ }
+
+ $data["table"][trim($html[0])] = trim($html[1]);
+ }
+ }
+
+ private function getimagelinkfromstyle($thumb){
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $thumb,
+ "div"
+ );
+
+ if(count($thumb) === 0){
+
+ return [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ $thumb = $thumb[0]["attributes"]["style"];
+
+ preg_match(
+ '/background-image: ?url\((\'[^\']+\'|"[^"]+"|[^\)]+)\)/',
+ $thumb,
+ $thumb
+ );
+
+ $url = $this->fuckhtml->getTextContent($this->unshiturl(trim($thumb[1], '"\' ')));
+
+ if(parse_url($url, PHP_URL_HOST) == "cdn.search.brave.com"){
+
+ return [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ return [
+ "url" => $url,
+ "ratio" => "16:9"
+ ];
+ }
+
+ private function limitstrlen($text){
+
+ return explode("\n", wordwrap($text, 300, "\n"))[0];
+ }
+
+ private function limitwhitespace($text){
+
+ return
+ preg_replace(
+ '/[\s]+/',
+ " ",
+ $text
+ );
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3));
+ }
+
+ return trim($title);
+ }
+
+ private function unshiturl($url){
+
+ // https://imgs.search.brave.com/XFnbR8Sl7ge82MBDEH7ju0UHImRovMVmQ2qnDvgNTuA/rs:fit:844:225:1/g:ce/aHR0cHM6Ly90c2U0/Lm1tLmJpbmcubmV0/L3RoP2lkPU9JUC54/UWotQXU5N2ozVndT/RDJnNG9BNVhnSGFF/SyZwaWQ9QXBp.jpeg
+
+ $tmp = explode("aHR0", $url);
+
+ if(count($tmp) !== 2){
+
+ // nothing to do
+ return $url;
+ }
+
+ return
+ base64_decode(
+ "aHR0" .
+ str_replace(["/", "_"], ["", "/"],
+ explode(
+ ".",
+ $tmp[1]
+ )[0]
+ )
+ );
+ }
+}
diff --git a/scraper/ddg.php b/scraper/ddg.php
new file mode 100644
index 0000000..c9c28af
--- /dev/null
+++ b/scraper/ddg.php
@@ -0,0 +1,2722 @@
+<?php
+
+class ddg{
+
+ public function __construct(){
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("ddg");
+ }
+
+ /*
+ curl functions
+ */
+ private const req_web = 0;
+ private const req_xhr = 1;
+
+ private function get($url, $get = [], $reqtype = self::req_web){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ switch($reqtype){
+ case self::req_web:
+ $headers =
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Encoding: gzip",
+ "Accept-Language: en-US,en;q=0.5",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site",
+ "Upgrade-Insecure-Requests: 1"];
+ break;
+
+ case self::req_xhr:
+ $headers =
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
+ "Accept: */*",
+ "Accept-Encoding: gzip",
+ "Accept-Language: en-US,en;q=0.5",
+ "Connection: keep-alive",
+ "Referer: https://duckduckgo.com/",
+ "X-Requested-With: XMLHttpRequest",
+ "DNT: 1",
+ "Sec-Fetch-Dest: script",
+ "Sec-Fetch-Mode: no-cors",
+ "Sec-Fetch-Site: same-site"];
+ break;
+ }
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function getfilters($pagetype){
+
+ switch($pagetype){
+
+ case "web":
+ return
+ [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "any" => "All Regions",
+ "ar-es" => "Argentina",
+ "au-en" => "Australia",
+ "at-de" => "Austria",
+ "be-fr" => "Belgium (fr)",
+ "be-nl" => "Belgium (nl)",
+ "br-pt" => "Brazil",
+ "bg-bg" => "Bulgaria",
+ "ca-en" => "Canada (en)",
+ "ca-fr" => "Canada (fr)",
+ "ct-ca" => "Catalonia",
+ "cl-es" => "Chile",
+ "cn-zh" => "China",
+ "co-es" => "Colombia",
+ "hr-hr" => "Croatia",
+ "cz-cs" => "Czech Republic",
+ "dk-da" => "Denmark",
+ "ee-et" => "Estonia",
+ "fi-fi" => "Finland",
+ "fr-fr" => "France",
+ "de-de" => "Germany",
+ "gr-el" => "Greece",
+ "hk-tzh" => "Hong Kong",
+ "hu-hu" => "Hungary",
+ "in-en" => "India (en)",
+ "id-en" => "Indonesia (en)",
+ "ie-en" => "Ireland",
+ "il-en" => "Israel (en)",
+ "it-it" => "Italy",
+ "jp-jp" => "Japan",
+ "kr-kr" => "Korea",
+ "lv-lv" => "Latvia",
+ "lt-lt" => "Lithuania",
+ "my-en" => "Malaysia (en)",
+ "mx-es" => "Mexico",
+ "nl-nl" => "Netherlands",
+ "nz-en" => "New Zealand",
+ "no-no" => "Norway",
+ "pk-en" => "Pakistan (en)",
+ "pe-es" => "Peru",
+ "ph-en" => "Philippines (en)",
+ "pl-pl" => "Poland",
+ "pt-pt" => "Portugal",
+ "ro-ro" => "Romania",
+ "ru-ru" => "Russia",
+ "xa-ar" => "Saudi Arabia",
+ "sg-en" => "Singapore",
+ "sk-sk" => "Slovakia",
+ "sl-sl" => "Slovenia",
+ "za-en" => "South Africa",
+ "es-ca" => "Spain (ca)",
+ "es-es" => "Spain (es)",
+ "se-sv" => "Sweden",
+ "ch-de" => "Switzerland (de)",
+ "ch-fr" => "Switzerland (fr)",
+ "tw-tzh" => "Taiwan",
+ "th-en" => "Thailand (en)",
+ "tr-tr" => "Turkey",
+ "us-en" => "US (English)",
+ "us-es" => "US (Spanish)",
+ "ua-uk" => "Ukraine",
+ "uk-en" => "United Kingdom",
+ "vn-en" => "Vietnam (en)"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "newer" => [
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ],
+ "extendedsearch" => [
+ // undefined display, so it wont show in frontend
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ]
+ ];
+ break;
+
+ case "images":
+ return
+ [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "us-en" => "US (English)",
+ "ar-es" => "Argentina",
+ "au-en" => "Australia",
+ "at-de" => "Austria",
+ "be-fr" => "Belgium (fr)",
+ "be-nl" => "Belgium (nl)",
+ "br-pt" => "Brazil",
+ "bg-bg" => "Bulgaria",
+ "ca-en" => "Canada (en)",
+ "ca-fr" => "Canada (fr)",
+ "ct-ca" => "Catalonia",
+ "cl-es" => "Chile",
+ "cn-zh" => "China",
+ "co-es" => "Colombia",
+ "hr-hr" => "Croatia",
+ "cz-cs" => "Czech Republic",
+ "dk-da" => "Denmark",
+ "ee-et" => "Estonia",
+ "fi-fi" => "Finland",
+ "fr-fr" => "France",
+ "de-de" => "Germany",
+ "gr-el" => "Greece",
+ "hk-tzh" => "Hong Kong",
+ "hu-hu" => "Hungary",
+ "in-en" => "India (en)",
+ "id-en" => "Indonesia (en)",
+ "ie-en" => "Ireland",
+ "il-en" => "Israel (en)",
+ "it-it" => "Italy",
+ "jp-jp" => "Japan",
+ "kr-kr" => "Korea",
+ "lv-lv" => "Latvia",
+ "lt-lt" => "Lithuania",
+ "my-en" => "Malaysia (en)",
+ "mx-es" => "Mexico",
+ "nl-nl" => "Netherlands",
+ "nz-en" => "New Zealand",
+ "no-no" => "Norway",
+ "pk-en" => "Pakistan (en)",
+ "pe-es" => "Peru",
+ "ph-en" => "Philippines (en)",
+ "pl-pl" => "Poland",
+ "pt-pt" => "Portugal",
+ "ro-ro" => "Romania",
+ "ru-ru" => "Russia",
+ "xa-ar" => "Saudi Arabia",
+ "sg-en" => "Singapore",
+ "sk-sk" => "Slovakia",
+ "sl-sl" => "Slovenia",
+ "za-en" => "South Africa",
+ "es-ca" => "Spain (ca)",
+ "es-es" => "Spain (es)",
+ "se-sv" => "Sweden",
+ "ch-de" => "Switzerland (de)",
+ "ch-fr" => "Switzerland (fr)",
+ "tw-tzh" => "Taiwan",
+ "th-en" => "Thailand (en)",
+ "tr-tr" => "Turkey",
+ "us-es" => "US (Spanish)",
+ "ua-uk" => "Ukraine",
+ "uk-en" => "United Kingdom",
+ "vn-en" => "Vietnam (en)"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ],
+ "date" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "Day" => "Past day",
+ "Week" => "Past week",
+ "Month" => "Past month"
+ ]
+ ],
+ "size" => [
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "Small" => "Small",
+ "Medium" => "Medium",
+ "Large" => "Large",
+ "Wallpaper" => "Wallpaper"
+ ]
+ ],
+ "color" => [
+ "display" => "Colors",
+ "option" => [
+ "any" => "All colors",
+ "Monochrome" => "Black and white",
+ "Red" => "Red",
+ "Orange" => "Orange",
+ "Yellow" => "Yellow",
+ "Green" => "Green",
+ "Blue" => "Blue",
+ "Purple" => "Purple",
+ "Pink" => "Pink",
+ "Brown" => "Brown",
+ "Black" => "Black",
+ "Gray" => "Gray",
+ "Teal" => "Teal",
+ "White" => "White"
+ ]
+ ],
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "All types",
+ "photo" => "Photograph",
+ "clipart" => "Clipart",
+ "gif" => "Animated GIF",
+ "transparent" => "Transparent"
+ ]
+ ],
+ "layout" => [
+ "display" => "Layout",
+ "option" => [
+ "any" => "All layouts",
+ "Square" => "Square",
+ "Tall" => "Tall",
+ "Wide" => "Wide"
+ ]
+ ],
+ "license" => [
+ "display" => "License",
+ "option" => [
+ "any" => "All licenses", // blame ddg for this
+ "Any" => "All Creative Commons",
+ "Public" => "Public domain",
+ "Share" => "Free to Share and Use",
+ "ShareCommercially" => "Free to Share and Use Commercially",
+ "Modify" => "Free to Modify, Share, and Use",
+ "ModifyCommercially" => "Free to Modify, Share, and Use Commercially"
+ ]
+ ]
+ ];
+ break;
+
+ case "videos":
+ return
+ [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "us-en" => "US (English)",
+ "ar-es" => "Argentina",
+ "au-en" => "Australia",
+ "at-de" => "Austria",
+ "be-fr" => "Belgium (fr)",
+ "be-nl" => "Belgium (nl)",
+ "br-pt" => "Brazil",
+ "bg-bg" => "Bulgaria",
+ "ca-en" => "Canada (en)",
+ "ca-fr" => "Canada (fr)",
+ "ct-ca" => "Catalonia",
+ "cl-es" => "Chile",
+ "cn-zh" => "China",
+ "co-es" => "Colombia",
+ "hr-hr" => "Croatia",
+ "cz-cs" => "Czech Republic",
+ "dk-da" => "Denmark",
+ "ee-et" => "Estonia",
+ "fi-fi" => "Finland",
+ "fr-fr" => "France",
+ "de-de" => "Germany",
+ "gr-el" => "Greece",
+ "hk-tzh" => "Hong Kong",
+ "hu-hu" => "Hungary",
+ "in-en" => "India (en)",
+ "id-en" => "Indonesia (en)",
+ "ie-en" => "Ireland",
+ "il-en" => "Israel (en)",
+ "it-it" => "Italy",
+ "jp-jp" => "Japan",
+ "kr-kr" => "Korea",
+ "lv-lv" => "Latvia",
+ "lt-lt" => "Lithuania",
+ "my-en" => "Malaysia (en)",
+ "mx-es" => "Mexico",
+ "nl-nl" => "Netherlands",
+ "nz-en" => "New Zealand",
+ "no-no" => "Norway",
+ "pk-en" => "Pakistan (en)",
+ "pe-es" => "Peru",
+ "ph-en" => "Philippines (en)",
+ "pl-pl" => "Poland",
+ "pt-pt" => "Portugal",
+ "ro-ro" => "Romania",
+ "ru-ru" => "Russia",
+ "xa-ar" => "Saudi Arabia",
+ "sg-en" => "Singapore",
+ "sk-sk" => "Slovakia",
+ "sl-sl" => "Slovenia",
+ "za-en" => "South Africa",
+ "es-ca" => "Spain (ca)",
+ "es-es" => "Spain (es)",
+ "se-sv" => "Sweden",
+ "ch-de" => "Switzerland (de)",
+ "ch-fr" => "Switzerland (fr)",
+ "tw-tzh" => "Taiwan",
+ "th-en" => "Thailand (en)",
+ "tr-tr" => "Turkey",
+ "us-en" => "US (English)",
+ "us-es" => "US (Spanish)",
+ "ua-uk" => "Ukraine",
+ "uk-en" => "United Kingdom",
+ "vn-en" => "Vietnam (en)"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ],
+ "date" => [
+ "display" => "Time fetched",
+ "option" => [
+ "any" => "Any time",
+ "d" => "Past day",
+ "w" => "Past week",
+ "m" => "Past month"
+ ]
+ ],
+ "resolution" => [ //videoDefinition
+ "display" => "Resolution",
+ "option" => [
+ "any" => "Any resolution",
+ "high" => "High definition",
+ "standard" => "Standard definition"
+ ]
+ ],
+ "duration" => [ // videoDuration
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short (>5min)",
+ "medium" => "Medium (5-20min)",
+ "long" => "Long (<20min)"
+ ]
+ ],
+ "license" => [
+ "display" => "License",
+ "option" => [
+ "any" => "Any license",
+ "creativeCommon" => "Creative Commons",
+ "youtube" => "YouTube Standard"
+ ]
+ ]
+ ];
+ break;
+
+ case "news":
+ return
+ [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "us-en" => "US (English)",
+ "ar-es" => "Argentina",
+ "au-en" => "Australia",
+ "at-de" => "Austria",
+ "be-fr" => "Belgium (fr)",
+ "be-nl" => "Belgium (nl)",
+ "br-pt" => "Brazil",
+ "bg-bg" => "Bulgaria",
+ "ca-en" => "Canada (en)",
+ "ca-fr" => "Canada (fr)",
+ "ct-ca" => "Catalonia",
+ "cl-es" => "Chile",
+ "cn-zh" => "China",
+ "co-es" => "Colombia",
+ "hr-hr" => "Croatia",
+ "cz-cs" => "Czech Republic",
+ "dk-da" => "Denmark",
+ "ee-et" => "Estonia",
+ "fi-fi" => "Finland",
+ "fr-fr" => "France",
+ "de-de" => "Germany",
+ "gr-el" => "Greece",
+ "hk-tzh" => "Hong Kong",
+ "hu-hu" => "Hungary",
+ "in-en" => "India (en)",
+ "id-en" => "Indonesia (en)",
+ "ie-en" => "Ireland",
+ "il-en" => "Israel (en)",
+ "it-it" => "Italy",
+ "jp-jp" => "Japan",
+ "kr-kr" => "Korea",
+ "lv-lv" => "Latvia",
+ "lt-lt" => "Lithuania",
+ "my-en" => "Malaysia (en)",
+ "mx-es" => "Mexico",
+ "nl-nl" => "Netherlands",
+ "nz-en" => "New Zealand",
+ "no-no" => "Norway",
+ "pk-en" => "Pakistan (en)",
+ "pe-es" => "Peru",
+ "ph-en" => "Philippines (en)",
+ "pl-pl" => "Poland",
+ "pt-pt" => "Portugal",
+ "ro-ro" => "Romania",
+ "ru-ru" => "Russia",
+ "xa-ar" => "Saudi Arabia",
+ "sg-en" => "Singapore",
+ "sk-sk" => "Slovakia",
+ "sl-sl" => "Slovenia",
+ "za-en" => "South Africa",
+ "es-ca" => "Spain (ca)",
+ "es-es" => "Spain (es)",
+ "se-sv" => "Sweden",
+ "ch-de" => "Switzerland (de)",
+ "ch-fr" => "Switzerland (fr)",
+ "tw-tzh" => "Taiwan",
+ "th-en" => "Thailand (en)",
+ "tr-tr" => "Turkey",
+ "us-en" => "US (English)",
+ "us-es" => "US (Spanish)",
+ "ua-uk" => "Ukraine",
+ "uk-en" => "United Kingdom",
+ "vn-en" => "Vietnam (en)"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "date" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "d" => "Past day",
+ "w" => "Past week",
+ "m" => "Past month"
+ ]
+ ]
+ ];
+ break;
+
+ default:
+ return [];
+ break;
+ }
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ $jsgrep = $this->nextpage->get($get["npt"], "web");
+
+ $extendedsearch = false;
+ $inithtml = "";
+
+ }else{
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $older = $get["older"];
+ $newer = $get["newer"];
+ $extendedsearch = $get["extendedsearch"] == "yes" ? true : false;
+
+ // generate filters
+ $get_filters = [
+ "q" => $search,
+ "kz" => "1" // force instant answers
+ ];
+
+ if($country == "any"){
+
+ $get_filters["kl"] = "wt-wt";
+ }else{
+
+ $get_filters["kl"] = $country;
+ }
+
+ switch($nsfw){
+
+ case "yes": $get_filters["kp"] = "-2"; break;
+ case "maybe": $get_filters["kp"] = "-1"; break;
+ case "no": $get_filters["kp"] = "1"; break;
+ }
+
+ $df = true;
+
+ if($newer === false){
+
+ if($older !== false){
+
+ $start = 36000;
+ $end = $older;
+ }else{
+
+ $df = false;
+ }
+ }else{
+
+ $start = $newer;
+
+ if($older !== false){
+
+ $end = $older;
+ }else{
+
+ $end = time();
+ }
+ }
+
+ if($df === true){
+ $get_filters["df"] = date("Y-m-d", $start) . ".." . date("Y-m-d", $end);
+ }
+
+ /*
+ Get html
+ */
+ // https://duckduckgo.com/?q=minecraft&kz=1&k1=-1&kp=-2
+ try{
+ $inithtml = $this->get(
+ "https://duckduckgo.com/",
+ $get_filters
+ );
+ }catch(Exception $e){
+
+ throw new Exception("Failed to get html");
+ }
+
+ preg_match(
+ '/DDG\.deep\.initialize\(\'(.*)\',/U',
+ $inithtml,
+ $jsgrep
+ );
+
+ if(!isset($jsgrep[1])){
+
+ throw new Exception("Failed to get d.js URL");
+ }
+
+ $jsgrep = $jsgrep[1];
+ }
+
+ // get javascript
+ try{
+
+ $js = $this->get(
+ "https://links.duckduckgo.com" . $jsgrep,
+ [],
+ ddg::req_xhr
+ );
+ }catch(Exception $e){
+
+ throw new Exception("Failed to fetch d.js");
+ }
+
+ // initialize api response array
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ /*
+ Additional requests
+ */
+
+ if($extendedsearch){
+
+ /*
+ Check for worknik results
+ */
+ preg_match(
+ '/nrj\(\'\/js\/spice\/dictionary\/definition\/([^\']+)\'\)/',
+ $js,
+ $wordnik
+ );
+
+ if(isset($wordnik[1])){
+
+ try{
+
+ $wordnik = $wordnik[1];
+
+ // get definition
+ $wordnikjs = $this->get(
+ "https://duckduckgo.com/js/spice/dictionary/definition/" . $wordnik,
+ [],
+ ddg::req_xhr
+ );
+
+ preg_match(
+ '/ddg_spice_dictionary_definition\(\n?(\[{[\S\s]*}])/',
+ $wordnikjs,
+ $wordnikjson
+ );
+
+ if(isset($wordnikjson[1])){
+
+ $wordnikjson = json_decode($wordnikjson[1], true);
+
+ $out["answer"][0] = [
+ "title" => urldecode($wordnik),
+ "description" => [],
+ "url" => "https://www.wordnik.com/words/" . $wordnik,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+
+ $partofspeech = false;
+ $wastext = false;
+ $textindent = 1;
+
+ // get audio
+
+ $wordnikaudio_json =
+ json_decode(
+ $this->get(
+ "https://duckduckgo.com/js/spice/dictionary/audio/" . $wordnik,
+ [],
+ ddg::req_xhr
+ ),
+ true
+ );
+
+ if(isset($wordnikaudio_json[0]["id"])){
+
+ usort($wordnikaudio_json, function($a, $b){
+
+ return $a["id"] < $b["id"];
+ });
+
+ $out["answer"][0]["description"][] = [
+ "type" => "audio",
+ "url" => $wordnikaudio_json[0]["fileUrl"]
+ ];
+ }
+
+ $collection = [];
+ $e[] = [];
+
+ foreach($wordnikjson as $data){
+
+ if(!isset($data["partOfSpeech"])){
+
+ continue;
+ }
+
+ if(isset($data["text"])){
+
+ if(!isset($collection[$data["partOfSpeech"]])){
+
+ $collection[$data["partOfSpeech"]] = [];
+ $c = 0;
+ }else{
+ $c = count($collection[$data["partOfSpeech"]]);
+ }
+
+ if(!isset($e[$data["partOfSpeech"]])){
+
+ $e[$data["partOfSpeech"]] = 0;
+ }
+
+ $e[$data["partOfSpeech"]]++;
+ $text = $e[$data["partOfSpeech"]] . ". " . $this->unescapehtml(strip_tags($data["text"]));
+
+ $syn = false;
+ if(
+ isset($data["relatedWords"]) &&
+ count($data["relatedWords"]) !== 0
+ ){
+
+ $syn = " (";
+
+ $u = 0;
+ foreach($data["relatedWords"] as $related){
+
+ $syn .= ucfirst($related["relationshipType"]) . ": ";
+
+ $c = count($related["words"]);
+ $b = 0;
+ foreach($related["words"] as $word){
+
+ $syn .= trim($this->unescapehtml(strip_tags($word)));
+
+ $b++;
+ if($b !== $c){
+
+ $syn .= ", ";
+ }
+ }
+
+ $u++;
+ if($u !== count($data["relatedWords"])){
+
+ $syn .= ". ";
+ }
+ }
+
+ $syn .= ")";
+ }
+
+ if(
+ $c !== 0 &&
+ $collection[$data["partOfSpeech"]][$c - 1]["type"] == "text"
+ ){
+ $collection[$data["partOfSpeech"]][$c - 1]["value"] .=
+ "\n" . $text;
+
+ }else{
+
+ if(
+ $c !== 0 &&
+ (
+ $collection[$data["partOfSpeech"]][$c - 1]["type"] == "text" ||
+ $collection[$data["partOfSpeech"]][$c - 1]["type"] == "italic"
+ )
+ ){
+
+ $text = "\n" . $text;
+ }
+
+ $collection[$data["partOfSpeech"]][] =
+ [
+ "type" => "text",
+ "value" => $text
+ ];
+ }
+
+ if($syn){
+
+ $collection[$data["partOfSpeech"]][] = [
+ "type" => "italic",
+ "value" => $syn
+ ];
+ }
+
+ if(isset($data["exampleUses"])){
+
+ foreach($data["exampleUses"] as $use){
+
+ $collection[$data["partOfSpeech"]][] = [
+ "type" => "quote",
+ "value" => $this->unescapehtml(strip_tags($use["text"]))
+ ];
+ }
+ }
+
+ if(isset($data["citations"])){
+
+ foreach($data["citations"] as $citation){
+
+ if(!isset($citation["cite"])){
+
+ continue;
+ }
+
+ $value = $this->unescapehtml(strip_tags($citation["cite"]));
+
+ if(
+ isset($citation["source"]) &&
+ trim($citation["source"]) != ""
+ ){
+ $value .= " - " . $this->unescapehtml(strip_tags($citation["source"]));
+ }
+
+ $collection[$data["partOfSpeech"]][] = [
+ "type" => "quote",
+ "value" => $value
+ ];
+ }
+ }
+ }
+ }
+
+ foreach($collection as $key => $items){
+
+ $out["answer"][0]["description"][] =
+ [
+ "type" => "title",
+ "value" => $key
+ ];
+
+ $out["answer"][0]["description"] =
+ array_merge($out["answer"][0]["description"], $items);
+ }
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+
+ unset($wordnik);
+
+ /*
+ Check for stackoverflow answers
+ */
+
+ // /a.js?p=1&src_id=stack_overflow&from=nlp_qa&id=3390396,2559318&q=how%20can%20i%20check%20for%20undefined%20in%20javascript&s=stackoverflow.com&tl=How%20can%20I%20check%20for%20%22undefined%22%20in%20JavaScript%3F%20%2D%20Stack%20Overflow
+ // /a.js?p=1&src_id=arqade&from=nlp_qa&id=370293,375682&q=what%20is%20the%20difference%20between%20at%20and%20positioned%20in%20execute&s=gaming.stackexchange.com&tl=minecraft%20java%20edition%20minecraft%20commands%20%2D%20What%20is%20the%20difference
+ // /a.js?p=1&src_id=unix&from=nlp_qa&id=312754&q=how%20to%20strip%20metadata%20from%20image%20files&s=unix.stackexchange.com&tl=How%20to%20strip%20metadata%20from%20image%20files%20%2D%20Unix%20%26%20Linux%20Stack%20Exchange
+ preg_match(
+ '/nrj\(\'(\/a\.js\?.*from=nlp_qa.*)\'\)/U',
+ $js,
+ $stack
+ );
+
+ if(isset($stack[1])){
+
+ $stack = $stack[1];
+
+ try{
+ $stackjs = $this->get(
+ "https://duckduckgo.com" . $stack,
+ [],
+ ddg::req_xhr
+ );
+
+ if(
+ !preg_match(
+ '/^DDG\.duckbar\.failed/',
+ $stackjs
+ )
+ ){
+
+ preg_match(
+ '/DDG\.duckbar\.add_array\((\[\{[\S\s]*}])\)/U',
+ $stackjs,
+ $stackjson
+ );
+
+ $stackjson = json_decode($stackjson[1], true)[0]["data"][0];
+
+ $out["answer"][] = [
+ "title" => $stackjson["Heading"],
+ "description" => $this->htmltoarray($stackjson["Abstract"]),
+ "url" => str_replace(["http://", "ddg"], ["https://", ""], $stackjson["AbstractURL"]),
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+
+ /*
+ Check for musicmatch (lyrics)
+ */
+ preg_match(
+ '/nrj\(\'(\/a\.js\?.*&s=lyrics.*)\'\)/U',
+ $js,
+ $lyrics
+ );
+
+ if(isset($lyrics[1])){
+
+ $lyrics = $lyrics[1];
+
+ try{
+ $lyricsjs = $this->get(
+ "https://duckduckgo.com" . $lyrics,
+ [],
+ ddg::req_xhr
+ );
+
+ if(
+ !preg_match(
+ '/^DDG\.duckbar\.failed/',
+ $lyricsjs
+ )
+ ){
+
+ preg_match(
+ '/DDG\.duckbar\.add_array\((\[\{[\S\s]*}])\)/U',
+ $lyricsjs,
+ $lyricsjson
+ );
+
+ $lyricsjson = json_decode($lyricsjson[1], true)[0]["data"][0];
+
+ $title = null;
+
+ if(isset($lyricsjson["Heading"])){
+
+ $title = $lyricsjson["Heading"];
+ }elseif(isset($lyricsjson["data"][1]["urlTitle"])){
+
+ $title = $lyricsjson["data"][1]["urlTitle"];
+ }else{
+
+ $title = $lyricsjson["data"][0]["song_title"];
+ }
+
+ $description = [
+ [
+ "type" => "text",
+ "value" => null
+ ]
+ ];
+ $parts =
+ explode(
+ "<br>",
+ str_ireplace(
+ ["<br>", "</br>", "<br/>"],
+ "<br>",
+ $lyricsjson["Abstract"]
+ ),
+ );
+
+ for($i=0; $i<count($parts); $i++){
+
+ $description[0]["value"] .= trim($parts[$i]) . "\n";
+ }
+
+ $description[0]["value"] = trim($description[0]["value"]);
+
+ $description[] =
+ [
+ "type" => "quote",
+ "value" =>
+ "Written by " . implode(", ", $lyricsjson["data"][0]["writers"]) .
+ "\nFrom the album " . $lyricsjson["data"][0]["albums"][0]["title"] .
+ "\nReleased on the " . date("jS \of F Y", strtotime($lyricsjson["data"][0]["albums"][0]["release_date"]))
+ ];
+
+ $out["answer"][] = [
+ "title" => $title,
+ "description" => $description,
+ "url" => $lyricsjson["AbstractURL"],
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+ }
+
+ /*
+ Get related searches
+ */
+ preg_match(
+ '/DDG\.duckbar\.loadModule\(\'related_searches\', ?{[\s\S]*"results":(\[{[\s\S]*}]),"vqd"/U',
+ $js,
+ $related
+ );
+
+ if(isset($related[1])){
+
+ try{
+ $related = json_decode($related[1], true);
+
+ for($i=0; $i<count($related); $i++){
+
+ if(isset($related[$i]["text"])){
+
+ array_push($out["related"], $related[$i]["text"]);
+ }
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+
+ unset($related);
+
+ /*
+ Get answers
+ */
+ $answer_count = preg_match_all(
+ '/DDG\.duckbar\.add\(({.*[\S\s]*})(?:\);|,null,"index"\))/U',
+ $js . $inithtml,
+ $answers
+ );
+
+ try{
+
+ if(isset($answers[1])){
+
+ $answers = $answers[1];
+
+ for($i=0; $i<$answer_count; $i++){
+
+ $answers[$i] = json_decode($answers[$i], true);
+
+ // remove dupes
+ for($k=0; $k<count($out["answer"]); $k++){
+
+ if(
+ !isset($answers[$i]["data"]["AbstractURL"]) ||
+ str_replace("_", "%20", $out["answer"][$k]["url"]) == str_replace("_", "%20", $this->sanitizeurl($answers[$i]["data"]["AbstractURL"]))
+ ){
+
+ continue 2;
+ }
+ }
+
+ // get more related queries
+ if(
+ isset($answers[$i]["data"]["RelatedTopics"]) &&
+ $answers[$i]["data"]["RelatedTopics"] != 0
+ ){
+
+ for($k=0; $k<count($answers[$i]["data"]["RelatedTopics"]); $k++){
+
+ if(isset($answers[$i]["data"]["RelatedTopics"][$k]["Result"])){
+
+ preg_match(
+ '/">(.*)<\//',
+ $answers[$i]["data"]["RelatedTopics"][$k]["Result"],
+ $label
+ );
+
+ array_push($out["related"], htmlspecialchars_decode(strip_tags($label[1])));
+ }
+ }
+ }
+
+ $image = null;
+
+ // get image
+ if(
+ isset($answers[$i]["data"]["Image"]) &&
+ !empty($answers[$i]["data"]["Image"]) &&
+ $answers[$i]["data"]["Image"] != "https://duckduckgo.com/i/"
+ ){
+ if(strpos($answers[$i]["data"]["Image"], "https://duckduckgo.com/i/") === true){
+
+ $image = $answers[$i]["data"]["Image"];
+ }else{
+
+ if(
+ strlen($answers[$i]["data"]["Image"]) > 0 &&
+ $answers[$i]["data"]["Image"][0] == "/"
+ ){
+
+ $answers[$i]["data"]["Image"] = substr($answers[$i]["data"]["Image"], 1);
+ }
+
+ $image = "https://duckduckgo.com/" . $answers[$i]["data"]["Image"];
+ }
+ }
+
+ $count = count($out["answer"]);
+
+ if(isset($answers[$i]["data"]["AbstractText"]) && !empty($answers[$i]["data"]["AbstractText"])){
+
+ $description = $this->htmltoarray($answers[$i]["data"]["AbstractText"]);
+ }elseif(isset($answers[$i]["data"]["Abstract"]) && !empty($answers[$i]["data"]["Abstract"])){
+
+ $description = $this->htmltoarray($answers[$i]["data"]["Abstract"]);
+ }elseif(isset($answers[$i]["data"]["Answer"]) && !empty($answers[$i]["data"]["Answer"])){
+
+ $description = $this->htmltoarray($answers[$i]["data"]["Answer"]);
+ }else{
+
+ $description = [];
+ }
+
+ if(isset($answers[$i]["data"]["Heading"]) && !empty($answers[$i]["data"]["Heading"])){
+
+ $title = $this->unescapehtml($answers[$i]["data"]["Heading"]);
+ }else{
+
+ // no title, ignore bs
+ continue;
+ //$title = null;
+ }
+
+ if(isset($answers[$i]["data"]["AbstractURL"]) && !empty($answers[$i]["data"]["AbstractURL"])){
+
+ $url = $answers[$i]["data"]["AbstractURL"];
+ }else{
+
+ $url = null;
+ }
+
+ $out["answer"][$count] = [
+ "title" => $title,
+ "description" => $description,
+ "url" => $this->sanitizeurl($url),
+ "thumb" => $image,
+ "table" => [],
+ "sublink" => []
+ ];
+
+ if(isset($answers[$i]["data"]["Infobox"]["content"])){
+
+ for($k=0; $k<count($answers[$i]["data"]["Infobox"]["content"]); $k++){
+
+ // populate table
+ if($answers[$i]["data"]["Infobox"]["content"][$k]["data_type"] == "string"){
+
+ $out["answer"][$count]["table"][$answers[$i]["data"]["Infobox"]["content"][$k]["label"]] =
+ $answers[$i]["data"]["Infobox"]["content"][$k]["value"];
+ continue;
+ }
+
+ $url = "";
+ $type = "Website";
+
+ switch($answers[$i]["data"]["Infobox"]["content"][$k]["data_type"]){
+ case "official_site":
+ case "official_website":
+ $type = "Website";
+ break;
+
+ case "wikipedia": $type = "Wikipedia"; break;
+ case "itunes": $type = "iTunes"; break;
+ case "amazon": $type = "Amazon"; break;
+
+ case "imdb_title_id":
+ case "imdb_id":
+ case "imdb_name_id":
+ $type = "IMDb";
+ $delim = substr($answers[$i]["data"]["Infobox"]["content"][$k]["value"], 0, 2);
+
+ if($delim == "nm"){
+
+ $url = "https://www.imdb.com/name/";
+ }elseif($delim == "tt"){
+
+ $url = "https://www.imdb.com/title/";
+ }elseif($delim == "co"){
+
+ $url = "https://www.imdb.com/search/title/?companies=";
+ }else{
+
+ $url = "https://www.imdb.com/title/";
+ }
+ break;
+
+ case "imdb_name_id": $url = "https://www.imdb.com/name/"; $type = "IMDb"; break;
+ case "twitter_profile": $url = "https://twitter.com/"; $type = "Twitter"; break;
+ case "instagram_profile": $url = "https://instagram.com/"; $type = "Instagram"; break;
+ case "facebook_profile": $url = "https://facebook.com/"; $type = "Facebook"; break;
+ case "spotify_artist_id": $url = "https://open.spotify.com/artist/"; $type = "Spotify"; break;
+ case "rotten_tomatoes": $url = "https://rottentomatoes.com/"; $type = "Rotten Tomatoes"; break;
+ case "youtube_channel": $url = "https://youtube.com/channel/"; $type = "YouTube"; break;
+ case "soundcloud_id": $url = "https://soundcloud.com/"; $type = "SoundCloud"; break;
+
+ default:
+ continue 2;
+ }
+
+ // populate sublinks
+ $out["answer"][$count]["sublink"][$type] =
+ $url . $answers[$i]["data"]["Infobox"]["content"][$k]["value"];
+ }
+ }
+ }
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+
+ /*
+ Get shitcoin conversions
+ */
+ if($extendedsearch){
+ if(
+ preg_match(
+ '/"https?:\/\/(?:www\.coinbase\.com\/converter\/([a-z0-9]+)\/([a-z0-9]+)|changelly\.com\/exchange\/([a-z0-9]+)\/([a-z0-9]+)|coinmarketcap\.com\/currencies\/[a-z0-9]+\/([a-z0-9]+)\/([a-z0-9]+))\/?"/',
+ $js,
+ $shitcoins
+ )
+ ){
+
+ $shitcoins = array_values(array_filter($shitcoins));
+
+ preg_match(
+ '/(?:[\s,.]*[0-9]+)+/',
+ $search,
+ $amount
+ );
+
+ if(count($amount) === 1){
+
+ $amount = (float)str_replace([" ", ","], ["", "."], $amount[0]);
+ }else{
+
+ $amount = 1;
+ }
+
+ try{
+
+ $description = [];
+
+ $shitcoinjs = $this->get(
+ "https://duckduckgo.com/js/spice/cryptocurrency/{$shitcoins[1]}/{$shitcoins[2]}/1",
+ [],
+ ddg::req_xhr
+ );
+
+ preg_match(
+ '/ddg_spice_cryptocurrency\(\s*({[\S\s]*})\s*\);/',
+ $shitcoinjs,
+ $shitcoinjson
+ );
+
+ $shitcoinjson = json_decode($shitcoinjson[1], true);
+
+ if(
+ !isset($shitcoinjson["error"]) &&
+ $shitcoinjson["status"]["error_code"] == 0
+ ){
+
+ $shitcoinjson = $shitcoinjson["data"];
+ $array_values = array_values($shitcoinjson["quote"])[0];
+
+ if($amount != 1){
+
+ // show conversion
+ $description[] = [
+ "type" => "title",
+ "value" => "Conversion"
+ ];
+
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ "{$amount} {$shitcoinjson["name"]} ({$shitcoinjson["symbol"]}) = " . $this->number_format($array_values["price"] * $amount) . " " . strtoupper($shitcoins[2]) . "\n" .
+ "{$amount} " . strtoupper($shitcoins[2]) . " = " . $this->number_format((1 / $array_values["price"]) * $amount) . " {$shitcoinjson["symbol"]}"
+ ];
+ }
+
+ $description[] = [
+ "type" => "title",
+ "value" => "Current rates"
+ ];
+
+ // rates
+ $description[] = [
+ "type" => "text",
+ "value" =>
+ "1 {$shitcoinjson["name"]} ({$shitcoinjson["symbol"]}) = " . $this->number_format($array_values["price"]) . " " . strtoupper($shitcoins[2]) . "\n" .
+ "1 " . strtoupper($shitcoins[2]) . " = " . $this->number_format(1 / $array_values["price"]) . " {$shitcoinjson["symbol"]}"
+ ];
+
+ $description[] = [
+ "type" => "quote",
+ "value" => "Last fetched: " . date("jS \of F Y @ g:ia", strtotime($shitcoinjson["last_updated"]))
+ ];
+
+ $out["answer"][] = [
+ "title" => $shitcoinjson["name"] . " (" . strtoupper($shitcoins[1]) . ") & " . strtoupper($shitcoins[2]) . " market",
+ "description" => $description,
+ "url" => "https://coinmarketcap.com/converter/" . strtoupper($shitcoins[1]) . "/" . strtoupper($shitcoins[2]) . "/?amt={$amount}",
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }else{
+
+ /*
+ Get currency conversion
+ */
+ if(
+ preg_match(
+ '/"https:\/\/www\.xe\.com\/currencyconverter\/convert\/\?From=([A-Z0-9]+)&To=([A-Z0-9]+)"/',
+ $js,
+ $currencies
+ )
+ ){
+
+ preg_match(
+ '/(?:[\s,.]*[0-9]+)+/',
+ $search,
+ $amount
+ );
+
+ if(count($amount) === 1){
+
+ $amount = (float)str_replace([" ", ","], ["", "."], $amount[0]);
+ }else{
+
+ $amount = 1;
+ }
+
+ try{
+ $currencyjs = $this->get(
+ "https://duckduckgo.com/js/spice/currency/{$amount}/" . strtolower($currencies[1]) . "/" . strtolower($currencies[2]),
+ [],
+ ddg::req_xhr
+ );
+
+ preg_match(
+ '/ddg_spice_currency\(\s*({[\S\s]*})\s*\);/',
+ $currencyjs,
+ $currencyjson
+ );
+
+ $currencyjson = json_decode($currencyjson[1], true);
+
+ if(empty($currencyjson["headers"]["description"])){
+
+ $currencyjson = $currencyjson["conversion"];
+ $description = [];
+
+ if($amount != 1){
+
+ $description[] =
+ [
+ "type" => "title",
+ "value" => "Conversion"
+ ];
+
+ $description[] =
+ [
+ "type" => "text",
+ "value" =>
+ $this->number_format($currencyjson["from-amount"]) . " {$currencyjson["from-currency-symbol"]} = " .
+ $this->number_format($currencyjson["converted-amount"]) . " {$currencyjson["to-currency-symbol"]}"
+ ];
+ }
+
+ $description[] =
+ [
+ "type" => "title",
+ "value" => "Current rates"
+ ];
+
+ $description[] =
+ [
+ "type" => "text",
+ "value" =>
+ "{$currencyjson["conversion-rate"]}\n" .
+ "{$currencyjson["conversion-inverse"]}"
+ ];
+
+ $description[] =
+ [
+ "type" => "quote",
+ "value" => "Last fetched: " . date("jS \of F Y @ g:ia", strtotime($currencyjson["rate-utc-timestamp"]))
+ ];
+
+ $out["answer"][] = [
+ "title" =>
+ "{$currencyjson["from-currency-name"]} ({$currencyjson["from-currency-symbol"]}) to " .
+ "{$currencyjson["to-currency-name"]} ({$currencyjson["to-currency-symbol"]})",
+ "description" => $description,
+ "url" => "https://www.xe.com/currencyconverter/convert/?Amount={$amount}&From={$currencies[1]}&To={$currencies[2]}",
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+ }
+ }
+
+ /*
+ Get small answer
+ */
+ preg_match(
+ '/DDG\.ready\(function ?\(\) ?{DDH\.add\(({[\S\s]+}),"index"\)}\)/U',
+ $inithtml,
+ $smallanswer
+ );
+
+ if(isset($smallanswer[1])){
+
+ $smallanswer = json_decode($smallanswer[1], true);
+
+ if(
+ !isset($smallanswer["require"]) &&
+ isset($smallanswer["data"]["title"])
+ ){
+
+ if(isset($smallanswer["data"]["url"])){
+
+ $url = $this->unescapehtml($smallanswer["data"]["url"]);
+ }elseif(isset($smallanswer["meta"]["sourceUrl"])){
+
+ $url = $this->unescapehtml($smallanswer["meta"]["sourceUrl"]);
+ }else{
+
+ $url = null;
+ }
+
+ $out["answer"] = [
+ [
+ "title" => $this->unescapehtml($smallanswer["data"]["title"]),
+ "description" => [],
+ "url" => $this->sanitizeurl($url),
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ],
+ ...$out["answer"]
+ ];
+
+ if(isset($smallanswer["data"]["subtitle"])){
+
+ $out["answer"][0]["description"][] =
+ [
+ "type" => "text",
+ "value" => isset($smallanswer["data"]["subtitle"]) ? $this->unescapehtml($smallanswer["data"]["subtitle"]) : null
+ ];
+ }
+ }
+ }
+
+ unset($inithtml);
+ unset($answers);
+ unset($answer_count);
+
+ /*
+ Get spelling autocorrect
+ */
+
+ preg_match(
+ '/DDG\.page\.showMessage\(\'spelling\',({[\S\s]+})\)/U',
+ $js,
+ $spelling
+ );
+
+ if(isset($spelling[1])){
+
+ $spelling = json_decode($spelling[1], true);
+
+ switch((int)$spelling["qc"]){
+
+ case 1:
+ case 3:
+ case 5:
+ $type = "including";
+ break;
+
+ default:
+ $type = "not_many";
+ break;
+ }
+
+ $out["spelling"] = [
+ "type" => $type,
+ "using" => $this->unescapehtml(strip_tags($spelling["suggestion"])),
+ "correction" => $this->unescapehtml(strip_tags($spelling["recourseText"]))
+ ];
+ }
+
+ unset($spelling);
+
+ /*
+ Get web results
+ */
+ preg_match(
+ '/DDG\.pageLayout\.load\(\'d\', ?(\[{"[\S\s]*"}])\)/U',
+ $js,
+ $web
+ );
+
+ if(isset($web[1])){
+
+ try{
+ $web = json_decode($web[1], true);
+
+ for($i=0; $i<count($web); $i++){
+
+ // ignore google placeholder + fake next page
+ if(
+ isset($web[$i]["t"]) &&
+ (
+ $web[$i]["t"] == "EOP" ||
+ $web[$i]["t"] == "EOF"
+ ) &&
+ strpos($web[$i]["c"], "://www.google.") !== false
+ ){
+
+ break;
+ }
+
+ // store next page token
+ if(isset($web[$i]["n"])){
+
+ $out["npt"] = $this->nextpage->store($web[$i]["n"] . "&biaexp=b&eslexp=a&litexp=c&msvrtexp=b&wrap=1", "web");
+ continue;
+ }
+
+ // ignore malformed data
+ if(!isset($web[$i]["t"])){
+
+ continue;
+ }
+
+ $sublinks = [];
+
+ if(isset($web[$i]["l"])){
+
+ for($k=0; $k<count($web[$i]["l"]); $k++){
+
+ if(
+ !isset($web[$i]["l"][$k]["targetUrl"]) ||
+ !isset($web[$i]["l"][$k]["text"])
+ ){
+
+ continue;
+ }
+
+ array_push(
+ $sublinks,
+ [
+ "title" => $this->titledots($this->unescapehtml($web[$i]["l"][$k]["text"])),
+ "date" => null,
+ "description" => isset($web[$i]["l"][$k]["snippet"]) ? $this->titledots($this->unescapehtml($web[$i]["l"][$k]["snippet"])) : null,
+ "url" => $this->sanitizeurl($web[$i]["l"][$k]["targetUrl"])
+ ]
+ );
+ }
+ }
+
+ if(
+ preg_match(
+ '/^<span class="result__type">PDF<\/span>/',
+ $web[$i]["t"]
+ )
+ ){
+
+ $type = "pdf";
+ $web[$i]["t"] =
+ str_replace(
+ '<span class="result__type">PDF</span>',
+ "",
+ $web[$i]["t"]
+ );
+ }else{
+
+ $type = "web";
+ }
+
+ if(isset($web[$i]["e"])){
+
+ $date = strtotime($web[$i]["e"]);
+ }else{
+
+ $date = null;
+ }
+
+ array_push(
+ $out["web"],
+ [
+ "title" => $this->titledots($this->unescapehtml(strip_tags($web[$i]["t"]))),
+ "description" => $this->titledots($this->unescapehtml(strip_tags($web[$i]["a"]))),
+ "url" => isset($web[$i]["u"]) ? $this->sanitizeurl($web[$i]["u"]) : $this->sanitizeurl($web[$i]["c"]),
+ "date" => $date,
+ "type" => $type,
+ "thumb" =>
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => $sublinks,
+ "table" => []
+ ]
+ );
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+
+ unset($web);
+
+ /*
+ Get images
+ */
+ preg_match(
+ '/DDG\.duckbar\.load\(\'images\', ?{[\s\S]*"results":(\[{"[\s\S]*}]),"vqd"/U',
+ $js,
+ $images
+ );
+
+ if(isset($images[1])){
+
+ try{
+ $images = json_decode($images[1], true);
+
+ for($i=0; $i<count($images); $i++){
+
+ if(
+ !isset($images[$i]["title"]) ||
+ !isset($images[$i]["image"]) ||
+ !isset($images[$i]["thumbnail"]) ||
+ !isset($images[$i]["width"]) ||
+ !isset($images[$i]["height"])
+ ){
+
+ continue;
+ }
+
+ $ratio =
+ $this->bingratio(
+ (int)$images[$i]["width"],
+ (int)$images[$i]["height"]
+ );
+
+ array_push(
+ $out["image"],
+ [
+ "title" => $this->titledots($this->unescapehtml($images[$i]["title"])),
+ "source" => [
+ [
+ "url" => $images[$i]["image"],
+ "width" => (int)$images[$i]["width"],
+ "height" => (int)$images[$i]["height"]
+ ],
+ [
+ "url" => $this->bingimg($images[$i]["thumbnail"]),
+ "width" => $ratio[0],
+ "height" => $ratio[1]
+ ]
+ ],
+ "url" => $this->sanitizeurl($images[$i]["url"])
+ ]
+ );
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+
+ unset($images);
+
+ /*
+ Get videos
+ */
+ preg_match(
+ '/DDG\.duckbar\.load\(\'videos\', ?{[\s\S]*"results":(\[{"[\s\S]*}]),"vqd"/U',
+ $js,
+ $videos
+ );
+
+ if(isset($videos[1])){
+ try{
+ $videos = json_decode($videos[1], true);
+
+ for($i=0; $i<count($videos); $i++){
+
+ $cachekey = false;
+
+ foreach(["large", "medium", "small"] as &$key){
+
+ if(isset($videos[$i]["images"][$key])){
+
+ $cachekey = $key;
+ break;
+ }
+ }
+
+ if(
+ !isset($videos[$i]["title"]) ||
+ !isset($videos[$i]["description"]) ||
+ $cachekey === false ||
+ !isset($videos[$i]["content"])
+ ){
+
+ continue;
+ }
+
+ array_push(
+ $out["video"],
+ [
+ "title" => $this->titledots($this->unescapehtml($videos[$i]["title"])),
+ "description" => $videos[$i]["description"] == "" ? null : $this->titledots($this->unescapehtml($videos[$i]["description"])),
+ "date" => $videos[$i]["published"] == "" ? null : strtotime($videos[$i]["published"]),
+ "duration" => $videos[$i]["duration"] == 0 ? null : $this->hmstoseconds($videos[$i]["duration"]),
+ "views" => $videos[$i]["statistics"]["viewCount"] == 0 ? null : $videos[$i]["statistics"]["viewCount"],
+ "thumb" =>
+ [
+ "url" => $this->bingimg($videos[$i]["images"][$cachekey]),
+ "ratio" => "16:9"
+ ],
+ "url" => $this->sanitizeurl($videos[$i]["content"])
+ ]
+ );
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+
+ unset($videos);
+
+ /*
+ Get news
+ */
+ preg_match(
+ '/DDG\.duckbar\.load\(\'news\', ?{[\s\S]*"results":(\[{"[\s\S]*}]),"vqd"/U',
+ $js,
+ $news
+ );
+
+ if(isset($news[1])){
+ try{
+ $news = json_decode($news[1], true);
+
+ for($i=0; $i<count($news); $i++){
+
+ if(
+ !isset($news[$i]["title"]) ||
+ !isset($news[$i]["excerpt"]) ||
+ !isset($news[$i]["url"])
+ ){
+
+ continue;
+ }
+
+ array_push(
+ $out["news"],
+ [
+ "title" => $this->titledots($this->unescapehtml($news[$i]["title"])),
+ "description" => $this->titledots($this->unescapehtml(strip_tags($news[$i]["excerpt"]))),
+ "date" => isset($news[$i]["date"]) ? (int)$news[$i]["date"] : null,
+ "thumb" =>
+ [
+ "url" => isset($news[$i]["image"]) ? $news[$i]["image"] : null,
+ "ratio" => "16:9"
+ ],
+ "url" => $this->sanitizeurl($news[$i]["url"])
+ ]
+ );
+ }
+
+ }catch(Exception $e){
+
+ // do nothing
+ }
+ }
+
+ return $out;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ $npt = $this->nextpage->get($get["npt"], "images");
+
+ try{
+ $json = json_decode($this->get(
+ "https://duckduckgo.com/i.js?" . $npt,
+ [],
+ ddg::req_xhr
+ ), true);
+
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get i.js");
+ }
+
+ }else{
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $date = $get["date"];
+ $size = $get["size"];
+ $color = $get["color"];
+ $type = $get["type"];
+ $layout = $get["layout"];
+ $license = $get["license"];
+
+ $filter = [];
+ $get_filters = [
+ "q" => $search,
+ "iax" => "images",
+ "ia" => "images"
+ ];
+
+ if($date != "any"){ $filter[] = "time:$date"; }
+ if($size != "any"){ $filter[] = "size:$size"; }
+ if($color != "any"){ $filter[] = "color:$color"; }
+ if($type != "any"){ $filter[] = "type:$type"; }
+ if($layout != "any"){ $filter[] = "layout:$layout"; }
+ if($license != "any"){ $filter[] = "license:$license"; }
+
+ $filter = implode(",", $filter);
+
+ if($filter != ""){
+
+ $get_filters["iaf"] = $filter;
+ }
+
+ switch($nsfw){
+
+ case "yes": $get_filters["kp"] = "-2"; break;
+ case "no": $get_filters["kp"] = "-1"; break;
+ }
+
+ try{
+
+ $html = $this->get(
+ "https://duckduckgo.com",
+ $get_filters,
+ ddg::req_web
+ );
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get html");
+ }
+
+ preg_match(
+ '/vqd=([0-9-]+)/',
+ $html,
+ $vqd
+ );
+
+ if(!isset($vqd[1])){
+
+ throw new Exception("Failed to get vqd token");
+ }
+
+ $vqd = $vqd[1];
+
+ // @TODO: s param = image offset
+ $js_params = [
+ "l" => $country,
+ "o" => "json",
+ "q" => $search,
+ "vqd" => $vqd
+ ];
+
+ switch($nsfw){
+
+ case "yes": $js_params["p"] = "-1"; break;
+ case "no": $js_params["p"] = "1"; break;
+ }
+
+ if(empty($filter)){
+
+ $js_params["f"] = "1";
+ }else{
+
+ $js_params["f"] = $filter;
+ }
+
+ try{
+ $json = json_decode($this->get(
+ "https://duckduckgo.com/i.js",
+ $js_params,
+ ddg::req_xhr
+ ), true);
+
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get i.js");
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ if(isset($json["next"])){
+
+ if(!isset($vqd)){
+
+ $vqd = array_values($json["vqd"])[0];
+ }
+
+ $out["npt"] =
+ $this->nextpage->store(
+ explode("?", $json["next"])[1] . "&vqd=" .
+ $vqd,
+ "images"
+ );
+ }
+
+ for($i=0; $i<count($json["results"]); $i++){
+
+ $bingimg = $this->bingimg($json["results"][$i]["thumbnail"]);
+ $ratio =
+ $this->bingratio(
+ (int)$json["results"][$i]["width"],
+ (int)$json["results"][$i]["height"]
+ );
+
+ $out["image"][] = [
+ "title" => $this->titledots($this->unescapehtml($json["results"][$i]["title"])),
+ "source" => [
+ [
+ "url" => $json["results"][$i]["image"],
+ "width" => (int)$json["results"][$i]["width"],
+ "height" => (int)$json["results"][$i]["height"]
+ ],
+ [
+ "url" => $bingimg,
+ "width" => $ratio[0],
+ "height" => $ratio[1],
+ ]
+ ],
+ "url" => $this->sanitizeurl($json["results"][$i]["url"])
+ ];
+ }
+
+ return $out;
+ }
+
+ public function video($get){
+
+ if($get["npt"]){
+
+ $npt = $this->nextpage->get($get["npt"], "videos");
+
+ try{
+ $json = json_decode($this->get(
+ "https://duckduckgo.com/v.js?" .
+ $npt,
+ [],
+ ddg::req_xhr
+ ), true);
+
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get v.js");
+ }
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $date = $get["date"];
+ $resolution = $get["resolution"];
+ $duration = $get["duration"];
+ $license = $get["license"];
+
+ $filter = [];
+
+ $get_filters = [
+ "q" => $search,
+ "iax" => "videos",
+ "ia" => "videos"
+ ];
+
+ switch($nsfw){
+
+ case "yes": $get_filters["kp"] = "-2"; break;
+ case "no": $get_filters["kp"] = "-1"; break;
+ }
+
+ if($date != "any"){ $filter[] = "publishedAfter:{$date}"; }
+ if($resolution != "any"){ $filter[] = "videoDefinition:{$resolution}"; }
+ if($duration != "any"){ $filter[] = "videoDuration:{$duration}"; }
+ if($license != "any"){ $filter[] = "videoLicense:{$license}"; }
+
+ $filter = implode(",", $filter);
+
+ try{
+
+ $html = $this->get(
+ "https://duckduckgo.com",
+ $get_filters,
+ ddg::req_web
+ );
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get html");
+ }
+
+ preg_match(
+ '/vqd=([0-9-]+)/',
+ $html,
+ $vqd
+ );
+
+ if(!isset($vqd[1])){
+
+ throw new Exception("Failed to get vqd token");
+ }
+
+ $vqd = $vqd[1];
+
+ try{
+ $json = json_decode($this->get(
+ "https://duckduckgo.com/v.js",
+ [
+ "l" => "us-en",
+ "o" => "json",
+ "sr" => 1,
+ "q" => $search,
+ "vqd" => $vqd,
+ "f" => $filter,
+ "p" => $get_filters["kp"]
+ ],
+ ddg::req_xhr
+ ), true);
+
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get v.js");
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ if(isset($json["next"])){
+
+ $out["npt"] =
+ $this->nextpage->store(
+ explode("?", $json["next"])[1],
+ "videos"
+ );
+ }
+
+ for($i=0; $i<count($json["results"]); $i++){
+
+ $cachekey = false;
+
+ foreach(["large", "medium", "small"] as &$key){
+
+ if(isset($json["results"][$i]["images"][$key])){
+
+ $cachekey = $key;
+ break;
+ }
+ }
+
+ if(
+ !isset($json["results"][$i]["title"]) ||
+ !isset($json["results"][$i]["description"]) ||
+ $cachekey === false ||
+ !isset($json["results"][$i]["content"])
+ ){
+
+ continue;
+ }
+
+ array_push(
+ $out["video"],
+ [
+ "title" => $this->titledots($this->unescapehtml($json["results"][$i]["title"])),
+ "description" => $json["results"][$i]["description"] == "" ? null : $this->titledots($this->unescapehtml($json["results"][$i]["description"])),
+ "author" => [
+ "name" => empty($json["results"][$i]["uploader"]) ? null : $this->unescapehtml($json["results"][$i]["uploader"]),
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => $json["results"][$i]["published"] == "" ? null : strtotime($json["results"][$i]["published"]),
+ "duration" => $json["results"][$i]["duration"] == 0 ? null : $this->hmstoseconds($json["results"][$i]["duration"]),
+ "views" => $json["results"][$i]["statistics"]["viewCount"] == 0 ? null : $json["results"][$i]["statistics"]["viewCount"],
+ "thumb" => [
+ "url" => $this->bingimg($json["results"][$i]["images"][$cachekey]),
+ "ratio" => "16:9"
+ ],
+ "url" => $this->sanitizeurl($json["results"][$i]["content"])
+ ]
+ );
+ }
+
+ return $out;
+ }
+
+ public function news($get){
+
+ if($get["npt"]){
+
+ $req = $this->nextpage->get($get["npt"], "news");
+
+ try{
+
+ $json = json_decode($this->get(
+ "https://duckduckgo.com/news.js?" .
+ $req,
+ [],
+ ddg::req_xhr
+ ), true);
+
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get news.js");
+ }
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+ $date = $get["date"];
+
+ $get_params = [
+ "q" => $search,
+ "iar" => "news",
+ "ia" => "news"
+ ];
+
+ switch($nsfw){
+
+ case "yes": $get_filters["kp"] = "-2"; break;
+ case "maybe": $get_filters["kp"] = "-1"; break;
+ case "no": $get_filters["kp"] = "1"; break;
+ }
+
+ if($date != "any"){
+
+ $get_params["df"] = $date;
+ }
+
+ try{
+
+ $html = $this->get(
+ "https://duckduckgo.com",
+ $get_params,
+ ddg::req_web
+ );
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get html");
+ }
+
+ preg_match(
+ '/vqd=([0-9-]+)/',
+ $html,
+ $vqd
+ );
+
+ if(!isset($vqd[1])){
+
+ throw new Exception("Failed to get vqd token");
+ }
+
+ $vqd = $vqd[1];
+
+ try{
+
+ $js_params = [
+ "l" => $country,
+ "o" => "json",
+ "noamp" => "1",
+ "q" => $search,
+ "vqd" => $vqd,
+ "p" => $get_filters["kp"]
+ ];
+
+ if($date != "any"){
+
+ $js_params["df"] = $date;
+ }else{
+
+ $js_params["df"] = "";
+ }
+
+ $json = json_decode($this->get(
+ "https://duckduckgo.com/news.js",
+ $js_params,
+ ddg::req_xhr
+ ), true);
+
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get news.js");
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ if(isset($json["next"])){
+
+ $out["npt"] =
+ $this->nextpage->store(
+ explode("?", $json["next"])[1],
+ "news"
+ );
+ }
+
+ for($i=0; $i<count($json["results"]); $i++){
+
+ $out["news"][] = [
+ "title" => $this->titledots($this->unescapehtml($json["results"][$i]["title"])),
+ "author" => $this->unescapehtml($json["results"][$i]["source"]),
+ "description" => $this->titledots($this->unescapehtml(strip_tags($json["results"][$i]["excerpt"]))),
+ "date" => $json["results"][$i]["date"],
+ "thumb" =>
+ [
+ "url" => isset($json["results"][$i]["image"]) ? $json["results"][$i]["image"] : null,
+ "ratio" => "16:9"
+ ],
+ "url" => $this->sanitizeurl($json["results"][$i]["url"])
+ ];
+ }
+
+ return $out;
+ }
+
+ private function hmstoseconds($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3));
+ }
+
+ return trim($title);
+ }
+
+ private function unescapehtml($str){
+
+ return html_entity_decode(
+ str_replace(
+ [
+ "<br>",
+ "<br/>",
+ "</br>",
+ "<BR>",
+ "<BR/>",
+ "</BR>",
+ ],
+ "\n",
+ $str
+ ),
+ ENT_QUOTES | ENT_XML1, 'UTF-8'
+ );
+ }
+
+ private function bingimg($url){
+
+ $parse = parse_url($url);
+ parse_str($parse["query"], $parts);
+
+ return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]);
+ }
+
+ private function htmltoarray($html){
+
+ $html = strip_tags($html, ["img", "pre", "code", "br", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "a"]);
+
+ libxml_use_internal_errors(true);
+ $dom = new DOMDocument("1.0", "utf-8");
+ $dom->loadHTML('<div>' . $html . '</div>');
+ $xpath = new DOMXPath($dom);
+ $descendants = $xpath->query('//div/node()');
+
+ $images = $xpath->query('//div/node()/img');
+ $imageiterator = 0;
+
+ if(count($descendants) === 0){
+
+ return [
+ "type" => "text",
+ "value" => $this->unescapehtml($html)
+ ];
+ }
+
+ $array = [];
+ $previoustype = null;
+
+ foreach($descendants as $node){
+
+ // $node->nodeValue = iconv("UTF-8", "ISO-8859-1//TRANSLIT", $node->nodeValue);
+
+ // get node type
+ switch($node->nodeName){
+ case "#text":
+ $type = "text";
+ break;
+
+ case "pre":
+ $type = "code";
+ break;
+
+ case "code":
+ $type = "inline_code";
+ break;
+
+ case "h1":
+ case "h2":
+ case "h3":
+ case "h4":
+ case "h5":
+ case "h6":
+ $type = "title";
+ break;
+
+ case "blockquote":
+ $type = "quote";
+ break;
+
+ case "a":
+ $type = "link";
+ break;
+
+ case "img":
+ $type = "image";
+ break;
+ }
+
+ // add node to array
+ switch($type){
+
+ case "text":
+ $value = preg_replace(
+ '/ {2,}/',
+ " ",
+ $this->limitnewlines($this->unescapehtml($node->textContent))
+ );
+
+ if(
+ $previoustype == "quote" ||
+ $previoustype === null ||
+ $previoustype == "image" ||
+ $previoustype == "title" ||
+ $previoustype == "code"
+ ){
+
+ $value = ltrim($value);
+ }
+
+ if($value == ""){
+
+ $previoustype = $type;
+ continue 2;
+ }
+
+ // merge with previous text node
+ if($previoustype == "text"){
+
+ $array[count($array) - 1]["value"] = trim($array[count($array) - 1]["value"]) . "\n" . $this->bstoutf8($value);
+ }else{
+
+ $array[] = [
+ "type" => "text",
+ "value" => $this->bstoutf8($value)
+ ];
+ }
+ break;
+
+ case "inline_code":
+ case "bold":
+ $array[] = [
+ "type" => "inline_code",
+ "value" => $this->bstoutf8(trim($this->limitnewlines($this->unescapehtml($node->textContent))))
+ ];
+ break;
+
+ case "link":
+ // check for link nested inside of image
+
+ if(strlen($node->childNodes->item(0)->textContent) !== 0){
+
+ $array[] = [
+ "type" => "link",
+ "value" => $this->bstoutf8(trim($this->unescapehtml($node->textContent))),
+ "url" => $this->bstoutf8(preg_replace('/\/ddg$/', "", preg_replace('/^http:\/\//', "https://", $this->sanitizeurl($node->getAttribute("href")))))
+ ];
+ break;
+ }
+
+ $type = "image";
+
+ if($previoustype == "text"){
+
+ $array[count($array) - 1]["value"] = rtrim($array[count($array) - 1]["value"]);
+ }
+
+ $array[] = [
+ "type" => "image",
+ "url" => $this->bstoutf8(preg_replace('/^http:\/\//', "https://", preg_replace('/^\/\/images\.duckduckgo\.com\/iu\/\?u=/', "", $images->item($imageiterator)->getAttribute("src"))))
+ ];
+
+ $imageiterator++;
+
+ break;
+
+ case "image":
+
+ if($previoustype == "text"){
+
+ $array[count($array) - 1]["value"] = rtrim($array[count($array) - 1]["value"]);
+ }
+
+ $array[] = [
+ "type" => "image",
+ "url" => $this->bstoutf8(preg_replace('/^http:\/\//', "https://", preg_replace('/^\/\/images\.duckduckgo\.com\/iu\/\?u=/', "", $node->getAttribute("src"))))
+ ];
+ break;
+
+ case "quote":
+ case "title":
+ case "code":
+ if($previoustype == "text"){
+
+ $array[count($array) - 1]["value"] = rtrim($array[count($array) - 1]["value"]);
+ }
+ // no break
+
+ default:
+
+ $value = trim($this->limitnewlines($this->unescapehtml($node->textContent)));
+ if($type != "code"){
+
+ $value = preg_replace(
+ '/ {2,}/',
+ " ",
+ $value
+ );
+ }
+
+ $array[] = [
+ "type" => $type,
+ "value" => $this->bstoutf8($value)
+ ];
+ break;
+ }
+
+ $previoustype = $type;
+ }
+
+ return $array;
+ }
+
+ private function bstoutf8($bs){
+
+ return iconv("UTF-8", "ISO-8859-1//TRANSLIT", $bs);
+ }
+
+ private function limitnewlines($text){
+
+ preg_replace(
+ '/(?:[\n\r] *){2,}/m',
+ "\n\n",
+ $text
+ );
+
+ return $text;
+ }
+
+ private function sanitizeurl($url){
+
+ // check for domains w/out first short subdomain (ex: www.)
+
+ $domain = parse_url($url, PHP_URL_HOST);
+
+ $subdomain = preg_replace(
+ '/^[A-z0-9]{1,3}\./',
+ "",
+ $domain
+ );
+
+ switch($subdomain){
+ case "ebay.com.au":
+ case "ebay.at":
+ case "ebay.ca":
+ case "ebay.fr":
+ case "ebay.de":
+ case "ebay.com.hk":
+ case "ebay.ie":
+ case "ebay.it":
+ case "ebay.com.my":
+ case "ebay.nl":
+ case "ebay.ph":
+ case "ebay.pl":
+ case "ebay.com.sg":
+ case "ebay.es":
+ case "ebay.ch":
+ case "ebay.co.uk":
+ case "cafr.ebay.ca":
+ case "ebay.com":
+ case "community.ebay.com":
+ case "pages.ebay.com":
+
+ // remove ebay tracking elements
+ $old_params = parse_url($url, PHP_URL_QUERY);
+ parse_str($old_params, $params);
+
+ if(isset($params["mkevt"])){ unset($params["mkevt"]); }
+ if(isset($params["mkcid"])){ unset($params["mkcid"]); }
+ if(isset($params["mkrid"])){ unset($params["mkrid"]); }
+ if(isset($params["campid"])){ unset($params["campid"]); }
+ if(isset($params["customid"])){ unset($params["customid"]); }
+ if(isset($params["toolid"])){ unset($params["toolid"]); }
+ if(isset($params["_sop"])){ unset($params["_sop"]); }
+ if(isset($params["_dcat"])){ unset($params["_dcat"]); }
+ if(isset($params["epid"])){ unset($params["epid"]); }
+ if(isset($params["epid"])){ unset($params["oid"]); }
+
+ $params = http_build_query($params);
+
+ if(strlen($params) === 0){
+ $replace = "\?";
+ }else{
+ $replace = "";
+ }
+
+ $url = preg_replace(
+ "/" . $replace . preg_quote($old_params, "/") . "$/",
+ $params,
+ $url
+ );
+ break;
+ }
+
+ return $url;
+ }
+
+ private function number_format($number){
+
+ $number = explode(".", sprintf('%f', $number));
+
+ if(count($number) === 1){
+
+ return number_format((float)$number[0], 0, ",", ".");
+ }
+
+ return number_format((float)$number[0], 0, ",", "") . "." . (string)$number[1];
+ }
+
+ private function bingratio($width, $height){
+
+ $ratio = [
+ 474 / $width,
+ 474 / $height
+ ];
+
+ if($ratio[0] < $ratio[1]){
+
+ $ratio = $ratio[0];
+ }else{
+
+ $ratio = $ratio[1];
+ }
+
+ return [
+ floor($width * $ratio),
+ floor($height * $ratio)
+ ];
+ }
+}
diff --git a/scraper/google.php b/scraper/google.php
new file mode 100644
index 0000000..6a746f7
--- /dev/null
+++ b/scraper/google.php
@@ -0,0 +1,1562 @@
+<?php
+
+class google{
+
+ private const is_class = ".";
+ private const is_id = "#";
+
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("google");
+ }
+
+ public function getfilters($page){
+
+ switch($page){
+
+ case "web": return [];/*
+ return [
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "zz" => "Instance region",
+ "af" => "Afghanistan",
+ "al" => "Albania",
+ "dz" => "Algeria",
+ "as" => "American Samoa",
+ "ad" => "Andorra",
+ "ao" => "Angola",
+ "ag" => "Antigua & Barbuda",
+ "ar" => "Argentina",
+ "am" => "Armenia",
+ "au" => "Australia",
+ "at" => "Austria",
+ "az" => "Azerbaijan",
+ "bs" => "Bahamas",
+ "bh" => "Bahrain",
+ "bd" => "Bangladesh",
+ "by" => "Belarus",
+ "be" => "Belgium",
+ "bz" => "Belize",
+ "bj" => "Benin",
+ "bt" => "Bhutan",
+ "bo" => "Bolivia",
+ "ba" => "Bosnia & Herzegovina",
+ "bw" => "Botswana",
+ "br" => "Brazil",
+ "bn" => "Brunei",
+ "bg" => "Bulgaria",
+ "bf" => "Burkina Faso",
+ "bi" => "Burundi",
+ "kh" => "Cambodia",
+ "cm" => "Cameroon",
+ "ca" => "Canada",
+ "cv" => "Cape Verde",
+ "cf" => "Central African Republic",
+ "td" => "Chad",
+ "cl" => "Chile",
+ "co" => "Colombia",
+ "cg" => "Congo - Brazzaville",
+ "cd" => "Congo - Kinshasa",
+ "ck" => "Cook Islands",
+ "cr" => "Costa Rica",
+ "ci" => "Côte d’Ivoire",
+ "hr" => "Croatia",
+ "cu" => "Cuba",
+ "cy" => "Cyprus",
+ "cz" => "Czechia",
+ "dk" => "Denmark",
+ "dj" => "Djibouti",
+ "dm" => "Dominica",
+ "do" => "Dominican Republic",
+ "ec" => "Ecuador",
+ "eg" => "Egypt",
+ "sv" => "El Salvador",
+ "ee" => "Estonia",
+ "et" => "Ethiopia",
+ "fj" => "Fiji",
+ "fi" => "Finland",
+ "fr" => "France",
+ "ga" => "Gabon",
+ "gm" => "Gambia",
+ "ge" => "Georgia",
+ "de" => "Germany",
+ "gh" => "Ghana",
+ "gi" => "Gibraltar",
+ "gr" => "Greece",
+ "gl" => "Greenland",
+ "gt" => "Guatemala",
+ "gg" => "Guernsey",
+ "gy" => "Guyana",
+ "ht" => "Haiti",
+ "hn" => "Honduras",
+ "hk" => "Hong Kong",
+ "hu" => "Hungary",
+ "is" => "Iceland",
+ "in" => "India",
+ "id" => "Indonesia",
+ "iq" => "Iraq",
+ "ie" => "Ireland",
+ "im" => "Isle of Man",
+ "il" => "Israel",
+ "it" => "Italy",
+ "jm" => "Jamaica",
+ "jp" => "Japan",
+ "je" => "Jersey",
+ "jo" => "Jordan",
+ "kz" => "Kazakhstan",
+ "ke" => "Kenya",
+ "ki" => "Kiribati",
+ "kw" => "Kuwait",
+ "kg" => "Kyrgyzstan",
+ "la" => "Laos",
+ "lv" => "Latvia",
+ "lb" => "Lebanon",
+ "ls" => "Lesotho",
+ "ly" => "Libya",
+ "li" => "Liechtenstein",
+ "lt" => "Lithuania",
+ "lu" => "Luxembourg",
+ "mg" => "Madagascar",
+ "mw" => "Malawi",
+ "my" => "Malaysia",
+ "mv" => "Maldives",
+ "ml" => "Mali",
+ "mt" => "Malta",
+ "mu" => "Mauritius",
+ "mx" => "Mexico",
+ "fm" => "Micronesia",
+ "md" => "Moldova",
+ "mn" => "Mongolia",
+ "me" => "Montenegro",
+ "ma" => "Morocco",
+ "mz" => "Mozambique",
+ "mm" => "Myanmar (Burma)",
+ "na" => "Namibia",
+ "nr" => "Nauru",
+ "np" => "Nepal",
+ "nl" => "Netherlands",
+ "nz" => "New Zealand",
+ "ni" => "Nicaragua",
+ "ne" => "Niger",
+ "ng" => "Nigeria",
+ "nu" => "Niue",
+ "mk" => "North Macedonia",
+ "no" => "Norway",
+ "om" => "Oman",
+ "pk" => "Pakistan",
+ "ps" => "Palestine",
+ "pa" => "Panama",
+ "pg" => "Papua New Guinea",
+ "py" => "Paraguay",
+ "pe" => "Peru",
+ "ph" => "Philippines",
+ "pn" => "Pitcairn Islands",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "pr" => "Puerto Rico",
+ "qa" => "Qatar",
+ "ro" => "Romania",
+ "ru" => "Russia",
+ "rw" => "Rwanda",
+ "ws" => "Samoa",
+ "sm" => "San Marino",
+ "st" => "São Tomé & Príncipe",
+ "sa" => "Saudi Arabia",
+ "sn" => "Senegal",
+ "rs" => "Serbia",
+ "sc" => "Seychelles",
+ "sl" => "Sierra Leone",
+ "sg" => "Singapore",
+ "sk" => "Slovakia",
+ "si" => "Slovenia",
+ "sb" => "Solomon Islands",
+ "so" => "Somalia",
+ "za" => "South Africa",
+ "kr" => "South Korea",
+ "es" => "Spain",
+ "lk" => "Sri Lanka",
+ "sh" => "St. Helena",
+ "vc" => "St. Vincent & Grenadines",
+ "sr" => "Suriname",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "tw" => "Taiwan",
+ "tj" => "Tajikistan",
+ "tz" => "Tanzania",
+ "th" => "Thailand",
+ "tl" => "Timor-Leste",
+ "tg" => "Togo",
+ "to" => "Tonga",
+ "tt" => "Trinidad & Tobago",
+ "tn" => "Tunisia",
+ "tr" => "Türkiye",
+ "tm" => "Turkmenistan",
+ "vi" => "U.S. Virgin Islands",
+ "ug" => "Uganda",
+ "ua" => "Ukraine",
+ "ae" => "United Arab Emirates",
+ "gb" => "United Kingdom",
+ "us" => "United States",
+ "uy" => "Uruguay",
+ "uz" => "Uzbekistan",
+ "vu" => "Vanuatu",
+ "ve" => "Venezuela",
+ "vn" => "Vietnam",
+ "zm" => "Zambia",
+ "zw" => "Zimbabwe"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ],
+ "lang" => [ // prefix with lang_
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "af" => "Afrikaans",
+ "ca" => "català",
+ "cs" => "čeština",
+ "da" => "dansk",
+ "de" => "Deutsch",
+ "et" => "eesti",
+ "en" => "English",
+ "es" => "español",
+ "eo" => "esperanto",
+ "tl" => "Filipino",
+ "fr" => "français",
+ "hr" => "hrvatski",
+ "id" => "Indonesia",
+ "is" => "íslenska",
+ "it" => "italiano",
+ "sw" => "Kiswahili",
+ "lv" => "latviešu",
+ "lt" => "lietuvių",
+ "hu" => "magyar",
+ "nl" => "Nederlands",
+ "no" => "norsk",
+ "pl" => "polski",
+ "pt" => "português",
+ "ro" => "română",
+ "sk" => "slovenčina",
+ "sl" => "slovenščina",
+ "fi" => "suomi",
+ "sv" => "svenska",
+ "vi" => "Tiếng Việt",
+ "tr" => "Türkçe",
+ "el" => "Ελληνικά",
+ "be" => "беларуская",
+ "bg" => "български",
+ "ru" => "русский",
+ "sr" => "српски",
+ "uk" => "українська",
+ "hy" => "հայերեն",
+ "iw" => "עברית",
+ "ar" => "العربية",
+ "fa" => "فارسی",
+ "hi" => "हिन्दी",
+ "th" => "ไทย",
+ "ko" => "한국어",
+ "zh-CN" => "中文 (简体)",
+ "zh-TW" => "中文 (繁體)",
+ "ja" => "日本語"
+ ]
+ ],
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "h" => "Last hour",
+ "d" => "Last 24 hours",
+ "w" => "Last week",
+ "m" => "Last month",
+ "y" => "Last year"
+ ]
+ ],
+ "verbatim" => [
+ "display" => "Verbatim",
+ "option" => [
+ "no" => "No",
+ "yes" => "Yes"
+ ]
+ ]
+ ];*/
+ break;
+
+ case "images":
+ return [
+ "country" => [ // gl=<country>
+ "display" => "Country",
+ "option" => [
+ "any" => "Instance's country",
+ "af" => "Afghanistan",
+ "al" => "Albania",
+ "dz" => "Algeria",
+ "as" => "American Samoa",
+ "ad" => "Andorra",
+ "ao" => "Angola",
+ "ai" => "Anguilla",
+ "aq" => "Antarctica",
+ "ag" => "Antigua and Barbuda",
+ "ar" => "Argentina",
+ "am" => "Armenia",
+ "aw" => "Aruba",
+ "au" => "Australia",
+ "at" => "Austria",
+ "az" => "Azerbaijan",
+ "bs" => "Bahamas",
+ "bh" => "Bahrain",
+ "bd" => "Bangladesh",
+ "bb" => "Barbados",
+ "by" => "Belarus",
+ "be" => "Belgium",
+ "bz" => "Belize",
+ "bj" => "Benin",
+ "bm" => "Bermuda",
+ "bt" => "Bhutan",
+ "bo" => "Bolivia",
+ "ba" => "Bosnia and Herzegovina",
+ "bw" => "Botswana",
+ "bv" => "Bouvet Island",
+ "br" => "Brazil",
+ "io" => "British Indian Ocean Territory",
+ "bn" => "Brunei Darussalam",
+ "bg" => "Bulgaria",
+ "bf" => "Burkina Faso",
+ "bi" => "Burundi",
+ "kh" => "Cambodia",
+ "cm" => "Cameroon",
+ "ca" => "Canada",
+ "cv" => "Cape Verde",
+ "ky" => "Cayman Islands",
+ "cf" => "Central African Republic",
+ "td" => "Chad",
+ "cl" => "Chile",
+ "cn" => "China",
+ "cx" => "Christmas Island",
+ "cc" => "Cocos (Keeling) Islands",
+ "co" => "Colombia",
+ "km" => "Comoros",
+ "cg" => "Congo",
+ "cd" => "Congo, the Democratic Republic of the",
+ "ck" => "Cook Islands",
+ "cr" => "Costa Rica",
+ "ci" => "Cote D'ivoire",
+ "hr" => "Croatia",
+ "cu" => "Cuba",
+ "cy" => "Cyprus",
+ "cz" => "Czech Republic",
+ "dk" => "Denmark",
+ "dj" => "Djibouti",
+ "dm" => "Dominica",
+ "do" => "Dominican Republic",
+ "ec" => "Ecuador",
+ "eg" => "Egypt",
+ "sv" => "El Salvador",
+ "gq" => "Equatorial Guinea",
+ "er" => "Eritrea",
+ "ee" => "Estonia",
+ "et" => "Ethiopia",
+ "fk" => "Falkland Islands (Malvinas)",
+ "fo" => "Faroe Islands",
+ "fj" => "Fiji",
+ "fi" => "Finland",
+ "fr" => "France",
+ "gf" => "French Guiana",
+ "pf" => "French Polynesia",
+ "tf" => "French Southern Territories",
+ "ga" => "Gabon",
+ "gm" => "Gambia",
+ "ge" => "Georgia",
+ "de" => "Germany",
+ "gh" => "Ghana",
+ "gi" => "Gibraltar",
+ "gr" => "Greece",
+ "gl" => "Greenland",
+ "gd" => "Grenada",
+ "gp" => "Guadeloupe",
+ "gu" => "Guam",
+ "gt" => "Guatemala",
+ "gn" => "Guinea",
+ "gw" => "Guinea-Bissau",
+ "gy" => "Guyana",
+ "ht" => "Haiti",
+ "hm" => "Heard Island and Mcdonald Islands",
+ "va" => "Holy See (Vatican City State)",
+ "hn" => "Honduras",
+ "hk" => "Hong Kong",
+ "hu" => "Hungary",
+ "is" => "Iceland",
+ "in" => "India",
+ "id" => "Indonesia",
+ "ir" => "Iran, Islamic Republic of",
+ "iq" => "Iraq",
+ "ie" => "Ireland",
+ "il" => "Israel",
+ "it" => "Italy",
+ "jm" => "Jamaica",
+ "jp" => "Japan",
+ "jo" => "Jordan",
+ "kz" => "Kazakhstan",
+ "ke" => "Kenya",
+ "ki" => "Kiribati",
+ "kp" => "Korea, Democratic People's Republic of",
+ "kr" => "Korea, Republic of",
+ "kw" => "Kuwait",
+ "kg" => "Kyrgyzstan",
+ "la" => "Lao People's Democratic Republic",
+ "lv" => "Latvia",
+ "lb" => "Lebanon",
+ "ls" => "Lesotho",
+ "lr" => "Liberia",
+ "ly" => "Libyan Arab Jamahiriya",
+ "li" => "Liechtenstein",
+ "lt" => "Lithuania",
+ "lu" => "Luxembourg",
+ "mo" => "Macao",
+ "mk" => "Macedonia, the Former Yugosalv Republic of",
+ "mg" => "Madagascar",
+ "mw" => "Malawi",
+ "my" => "Malaysia",
+ "mv" => "Maldives",
+ "ml" => "Mali",
+ "mt" => "Malta",
+ "mh" => "Marshall Islands",
+ "mq" => "Martinique",
+ "mr" => "Mauritania",
+ "mu" => "Mauritius",
+ "yt" => "Mayotte",
+ "mx" => "Mexico",
+ "fm" => "Micronesia, Federated States of",
+ "md" => "Moldova, Republic of",
+ "mc" => "Monaco",
+ "mn" => "Mongolia",
+ "ms" => "Montserrat",
+ "ma" => "Morocco",
+ "mz" => "Mozambique",
+ "mm" => "Myanmar",
+ "na" => "Namibia",
+ "nr" => "Nauru",
+ "np" => "Nepal",
+ "nl" => "Netherlands",
+ "an" => "Netherlands Antilles",
+ "nc" => "New Caledonia",
+ "nz" => "New Zealand",
+ "ni" => "Nicaragua",
+ "ne" => "Niger",
+ "ng" => "Nigeria",
+ "nu" => "Niue",
+ "nf" => "Norfolk Island",
+ "mp" => "Northern Mariana Islands",
+ "no" => "Norway",
+ "om" => "Oman",
+ "pk" => "Pakistan",
+ "pw" => "Palau",
+ "ps" => "Palestinian Territory, Occupied",
+ "pa" => "Panama",
+ "pg" => "Papua New Guinea",
+ "py" => "Paraguay",
+ "pe" => "Peru",
+ "ph" => "Philippines",
+ "pn" => "Pitcairn",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "pr" => "Puerto Rico",
+ "qa" => "Qatar",
+ "re" => "Reunion",
+ "ro" => "Romania",
+ "ru" => "Russian Federation",
+ "rw" => "Rwanda",
+ "sh" => "Saint Helena",
+ "kn" => "Saint Kitts and Nevis",
+ "lc" => "Saint Lucia",
+ "pm" => "Saint Pierre and Miquelon",
+ "vc" => "Saint Vincent and the Grenadines",
+ "ws" => "Samoa",
+ "sm" => "San Marino",
+ "st" => "Sao Tome and Principe",
+ "sa" => "Saudi Arabia",
+ "sn" => "Senegal",
+ "cs" => "Serbia and Montenegro",
+ "sc" => "Seychelles",
+ "sl" => "Sierra Leone",
+ "sg" => "Singapore",
+ "sk" => "Slovakia",
+ "si" => "Slovenia",
+ "sb" => "Solomon Islands",
+ "so" => "Somalia",
+ "za" => "South Africa",
+ "gs" => "South Georgia and the South Sandwich Islands",
+ "es" => "Spain",
+ "lk" => "Sri Lanka",
+ "sd" => "Sudan",
+ "sr" => "Suriname",
+ "sj" => "Svalbard and Jan Mayen",
+ "sz" => "Swaziland",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "sy" => "Syrian Arab Republic",
+ "tw" => "Taiwan, Province of China",
+ "tj" => "Tajikistan",
+ "tz" => "Tanzania, United Republic of",
+ "th" => "Thailand",
+ "tl" => "Timor-Leste",
+ "tg" => "Togo",
+ "tk" => "Tokelau",
+ "to" => "Tonga",
+ "tt" => "Trinidad and Tobago",
+ "tn" => "Tunisia",
+ "tr" => "Turkey",
+ "tm" => "Turkmenistan",
+ "tc" => "Turks and Caicos Islands",
+ "tv" => "Tuvalu",
+ "ug" => "Uganda",
+ "ua" => "Ukraine",
+ "ae" => "United Arab Emirates",
+ "uk" => "United Kingdom",
+ "us" => "United States",
+ "um" => "United States Minor Outlying Islands",
+ "uy" => "Uruguay",
+ "uz" => "Uzbekistan",
+ "vu" => "Vanuatu",
+ "ve" => "Venezuela",
+ "vn" => "Viet Nam",
+ "vg" => "Virgin Islands, British",
+ "vi" => "Virgin Islands, U.S.",
+ "wf" => "Wallis and Futuna",
+ "eh" => "Western Sahara",
+ "ye" => "Yemen",
+ "zm" => "Zambia",
+ "zw" => "Zimbabwe"
+ ]
+ ],
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes", // safe=active
+ "no" => "No" // safe=off
+ ]
+ ],
+ "lang" => [ // lr=<lang> (prefix lang with "lang_")
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "ar" => "Arabic",
+ "bg" => "Bulgarian",
+ "ca" => "Catalan",
+ "cs" => "Czech",
+ "da" => "Danish",
+ "de" => "German",
+ "el" => "Greek",
+ "en" => "English",
+ "es" => "Spanish",
+ "et" => "Estonian",
+ "fi" => "Finnish",
+ "fr" => "French",
+ "hr" => "Croatian",
+ "hu" => "Hungarian",
+ "id" => "Indonesian",
+ "is" => "Icelandic",
+ "it" => "Italian",
+ "iw" => "Hebrew",
+ "ja" => "Japanese",
+ "ko" => "Korean",
+ "lt" => "Lithuanian",
+ "lv" => "Latvian",
+ "nl" => "Dutch",
+ "no" => "Norwegian",
+ "pl" => "Polish",
+ "pt" => "Portuguese",
+ "ro" => "Romanian",
+ "ru" => "Russian",
+ "sk" => "Slovak",
+ "sl" => "Slovenian",
+ "sr" => "Serbian",
+ "sv" => "Swedish",
+ "tr" => "Turkish",
+ "zh-CN" => "Chinese (Simplified)",
+ "zh-TW" => "Chinese (Traditional)"
+ ]
+ ],
+ "newer" => [ // &sort=review-date:r:20090301:20090430
+ "display" => "Newer than",
+ "option" => "_DATE"
+ ],
+ "older" => [
+ "display" => "Older than",
+ "option" => "_DATE"
+ ],
+ "size" => [ // tbs=isz:<size>
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "l" => "Large",
+ "m" => "Medium",
+ "i" => "Icon"
+ ]
+ ],
+ "color" => [ // tbs=ic:<color>
+ "display" => "Color",
+ "option" => [
+ "any" => "Any color",
+ "gray" => "Black and white",
+ "trans" => "Transparent",
+ // from here, format is
+ // tbs=specific,isc:<color>
+ "red" => "Red",
+ "orange" => "Orange",
+ "yellow" => "Yellow",
+ "green" => "Green",
+ "teal" => "Teal",
+ "blue" => "Blue",
+ "purple" => "Purple",
+ "pink" => "Pink",
+ "white" => "White",
+ "gray" => "Gray",
+ "black" => "Black",
+ "brown" => "Brown"
+ ]
+ ],
+ "type" => [ // tbs=itp:<type>
+ "display" => "Type",
+ "option" => [
+ "any" => "Any type",
+ "clipart" => "Clip Art",
+ "lineart" => "Line Drawing",
+ "animated" => "GIF"
+ ]
+ ],
+ "rights" => [ // tbs=il:<rights>
+ "display" => "Usage rights",
+ "option" => [
+ "any" => "No license",
+ "cl" => "Creative Commons licenses",
+ "ol" => "Commercial & other licenses"
+ ]
+ ]
+ ];
+ break;
+ }
+ }
+
+ private function get($url, $get = []){
+
+ $headers = [
+ "User-Agent: Mozilla/5.0 (Linux; U; Android 2.3.3; pt-pt; LG-P500h-parrot Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1 MMS/LG-Android-MMS-V1.0/1.2",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ];
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ $handle = fopen("scraper/google.html", "r");
+ $html = fread($handle, filesize("scraper/google.html"));
+ fclose($handle);
+
+ $this->fuckhtml->load($html);
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $styles =
+ $this->fuckhtml
+ ->getElementsByTagName("style");
+
+ $this->computedstyle = [];
+
+ foreach($styles as $style){
+
+ $this->computedstyle =
+ array_merge(
+ $this->computedstyle,
+ $this->parsestyles($style["innerHTML"])
+ );
+ }
+
+ // get images in javascript var
+ preg_match(
+ '/google\.ldi=({[^}]+})/',
+ $html,
+ $js_image
+ );
+
+ if(count($js_image) !== 0){
+
+ $js_image = json_decode($js_image[1], true);
+ }else{
+
+ $js_image = [];
+ }
+
+ // get nodes
+ // fuck you google!!!!!!!!!!!!!!
+
+ $containers =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "background-color" => "#fff",
+ "margin-bottom" => "10px",
+ "-webkit-box-shadow" => "0 1px 6px rgba(32,33,36,0.28)",
+ "border-radius" => "8px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ foreach($containers as $container){
+
+ $this->fuckhtml->load($container);
+
+ // get link at the top
+ $link =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ );
+
+ if(count($link) !== 0){
+
+ $link =
+ $this->decodeurl(
+ $link
+ [0]
+ ["attributes"]
+ ["href"]
+ );
+ }
+
+ /*
+ Check for carousel presence
+ */
+ $carousel =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "pcitem",
+ "div"
+ );
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "color" => "#1967d2",
+ "font-size" => "20px",
+ "line-height" => "26px"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ $carousel_title =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "font-size" => "16px",
+ "line-height" => "20px",
+ "font-weight" => "400"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($carousel) !== 0){
+
+ $sublink = []; // twitter carousel sublinks
+ foreach($carousel as $item){
+
+ $this->fuckhtml->load($item);
+
+ $url =
+ $this->decodeurl(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0]
+ ["attributes"]
+ ["href"]
+ );
+
+ // detect if its a twitter carousel or
+ // a list of news articles
+
+ $grey_node =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "white-space" => "pre-line",
+ "word-wrap" => "break-word"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ if(count($carousel_title) !== 0){
+
+ if(
+ $this->fuckhtml
+ ->getTextContent(
+ $carousel_title[0]
+ )
+ == "Top stories"
+ ){
+
+ $img =
+ $this->fuckhtml
+ ->getElementsByTagName("img");
+
+ if(
+ count($img) !== 0 &&
+ isset($img[0]["attributes"]["id"]) &&
+ isset($js_image[$img[0]["attributes"]["id"]])
+ ){
+
+ $img = [
+ "url" => $js_image[$img[0]["attributes"]["id"]],
+ "ratio" => "16:9"
+ ];
+ }else{
+
+ $img = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ /*
+ Is a news node
+ */
+ $out["news"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $grey_node[0]
+ ),
+ "description" => null,
+ "date" =>
+ strtotime(
+ explode(
+ "\n",
+ $grey_node[1]["innerHTML"]
+ )[1]
+ ),
+ "thumb" => $img,
+ "url" => $url
+ ];
+ }
+ }else{
+
+ /*
+ Is a web node (twitter-like)
+ create a link -> sublink structure and
+ ignore images
+ */
+
+ switch(count($grey_node)){
+
+ case 0:
+ continue 2;
+
+ case 1:
+ $sublink_title = $grey_node[0];
+ $sublink_description = null;
+ break;
+
+ case 2:
+ $sublink_title = $grey_node[1];
+ $sublink_description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $grey_node[0]
+ )
+ );
+ break;
+ }
+
+ $sublink_url =
+ $this->decodeurl(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0]
+ ["attributes"]
+ ["href"]
+ )
+ );
+
+ if($link == $sublink_url){
+
+ continue;
+ }
+
+ $sublink_title =
+ explode(
+ " • ",
+ $this->fuckhtml
+ ->getTextContent(
+ $sublink_title["innerHTML"]
+ )
+ );
+
+ if(count($sublink_title) !== 1){
+
+ $date = strtotime($sublink_title[1]);
+ }else{
+
+ $date = null;
+ }
+
+ $sublink_title = $this->titledots($sublink_title[0]);
+
+ $sublink[] = [
+ "title" => $sublink_title,
+ "date" => $date,
+ "description" => $sublink_description,
+ "url" => $sublink_url
+ ];
+ }
+ }
+
+ // if it was a web node
+ if(count($sublink) !== 0){
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ ),
+ "description" => null,
+ "url" => $url,
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => $sublink,
+ "table" => []
+ ];
+ }
+
+ continue;
+ }
+
+ if(count($title) !== 0){
+
+ /*
+ Get WEB search results
+ */
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByTagName("img");
+
+ if(
+ count($thumb) !== 0 &&
+ isset($js_image[$thumb[0]["attributes"]["id"]])
+ ){
+
+ $thumb = [
+ "url" =>
+ $js_image[$thumb[0]["attributes"]["id"]],
+ "ratio" => "1:1"
+ ];
+ }else{
+
+ $thumb = [
+ "url" => null,
+ "ratio" => null
+ ];
+ }
+
+ // this contains description, sublinks
+ $inner_category =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "white-space" => "pre-line",
+ "word-wrap" => "break-word"
+ ],
+ self::is_class
+ ),
+ "div"
+ );
+
+ // set empty values
+ $description = null;
+ $table = [];
+ $sublinks = [];
+ $date = null;
+
+ foreach($inner_category as $category){
+
+ if($category["level"] !== 6){
+
+ // enterring protocol 6
+ // and u dont seem to understaaaaandddddd
+ continue;
+ }
+
+ $this->fuckhtml->load($category);
+
+ // check if its a table
+ preg_match(
+ '/^[A-z0-9 ]+: <span/',
+ $category["innerHTML"],
+ $tablematch
+ );
+
+ if(count($tablematch) !== 0){
+
+ $categories = explode("<br>", $category["innerHTML"]);
+
+ foreach($categories as $cat){
+
+ $cat = explode(":", $cat, 2);
+
+ $table[
+ $this->fuckhtml
+ ->getTextContent(
+ $cat[0]
+ )
+ ] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $cat[1]
+ )
+ );
+ }
+ continue;
+ }
+
+ $spans =
+ $this->fuckhtml
+ ->getElementsByTagName("span");
+
+ foreach($spans as $span){
+
+ // replace element with nothing
+ if(empty($description)){
+ $category["innerHTML"] =
+ str_replace(
+ $span["outerHTML"],
+ "",
+ $category["innerHTML"]
+ );
+ }
+
+ // get rating
+ if(isset($span["attributes"]["aria-hidden"])){
+
+ $table["Rating"] = $span["innerHTML"];
+ continue;
+ }
+ }
+
+ if(empty($description)){
+
+ $description =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $category
+ )
+ );
+ }
+ }
+
+ // check if traversed div is the description
+ /*
+ if(
+ count(
+ $this->fuckhtml
+ ->getElementsByTagName("*")
+ ) === 0
+ ){
+
+ $description =
+ $this->fuckhtml
+ ->getTextContent($inner_category);
+ }else{
+
+ $this->
+
+ // we need to traverse description struct
+ foreach($inner_category as $category){
+
+ // detect description
+ $this->fuckhtml->load($category);
+
+ $spans =
+ $this->fuckhtml
+ ->getElementsByTagName("span");
+
+ $is_desc = false;
+ $is_first_span = true;
+
+ foreach($spans as $span){
+
+ // get rating
+ if(isset($span["attributes"]["aria-hidden"])){
+
+ $table["Rating"] = $span["innerHTML"] . "/5";
+ continue;
+ }
+
+ // get date posted
+ if(
+ $is_first_span &&
+ $date_tmp = strtotime($span["innerHTML"])
+ ){
+
+ $date = $date_tmp;
+ continue;
+ }
+
+ $is_first_span = false;
+ }
+ }
+ }*/
+
+ // get sublinks
+ $this->fuckhtml->load($container["innerHTML"]);
+
+ $as =
+ $this->fuckhtml->getElementsByTagName("a");
+
+ foreach($as as $a){
+
+ $this->fuckhtml->load($a);
+
+ $detect =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "color" => "#1967d2",
+ "font-size" => "14px",
+ "line-height" => "20px"
+ ],
+ self::is_class
+ ),
+ "span"
+ );
+
+ if(count($detect) !== 0){
+
+ $sublinks[] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ )
+ ),
+ "date" => null,
+ "description" => null,
+ "url" =>
+ $this->decodeurl(
+ $a["attributes"]["href"]
+ )
+ ];
+ }
+ }
+
+ $data = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $title[0]
+ )
+ ),
+ "description" => $description,
+ "url" => $link,
+ "date" => $date,
+ "type" => "web",
+ "thumb" => $thumb,
+ "sublink" => $sublinks,
+ "table" => $table
+ ];
+
+ $out["web"][] = $data;
+
+ continue;
+ }
+
+ /*
+ Check related searches node
+ */
+ $relateds =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "display" => "block",
+ "position" => "relative",
+ "width" => "100%"
+ ],
+ self::is_class
+ ),
+ "a"
+ );
+
+ if(count($relateds) !== 0){
+
+ foreach($relateds as $related){
+
+ $out["related"][] =
+ $this->fuckhtml
+ ->getTextContent(
+ $related
+ );
+ }
+ }
+
+ /*
+ Get next page
+ */
+ $nextpage =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $this->findstyles(
+ [
+ "-webkit-box-flex" => "1",
+ "display" => "block"
+ ],
+ self::is_class
+ ),
+ "a"
+ );
+
+ if(count($nextpage) !== 0){
+
+ $out["npt"] =
+ explode(
+ "?",
+ $this->fuckhtml
+ ->getTextContent(
+ $nextpage[0]
+ ["attributes"]
+ ["href"]
+ )
+ )[1];
+ }
+ }
+
+ return $out;
+ }
+
+ public function image($get){
+
+ $handle = fopen("scraper/google-img.html", "r");
+ $html = fread($handle, filesize("scraper/google-img.html"));
+ fclose($handle);
+
+ $this->fuckhtml->load($html);
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ $images =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "islrtb isv-r",
+ "div"
+ );
+
+ // get next page
+ // https://www.google.com/search
+ // ?q=higurashi
+ // &tbm=isch
+ // &async=_id%3Aislrg_c%2C_fmt%3Ahtml
+ // &asearch=ichunklite
+ // &ved=0ahUKEwidjYXJqJSAAxWrElkFHZ07CDwQtDIIQygA
+ $ved =
+ $this->fuckhtml
+ ->getElementById("islrg", "div");
+
+ if($ved){
+
+ $ved =
+ $this->fuckhtml
+ ->getTextContent(
+ $ved["attributes"]["data-ved"]
+ );
+
+ // &vet=1{$ved}..i (10ahUKEwidjYXJqJSAAxWrElkFHZ07CDwQtDIIQygA..i)
+
+ /*
+ These 2 are handled by us
+ start = start + number of results
+ ijn = current page number
+ */
+ // &start=100
+ // &ijn=1
+
+ // &imgvl=CAEY7gQgBSj3Aji8VTjXVUC4AUC3AUgAYNdV
+ preg_match(
+ '/var e=\'([A-z0-9]+)\';/',
+ $html,
+ $imgvl
+ );
+
+ $imgvl = $imgvl[1];
+
+ $out["npt"] = [
+ "q" => $get["s"],
+ "tbm" => "isch",
+ "async" => "_id:islrg_c,_fmt:html",
+ "asearch" => "ichunklite",
+ "ved" => $ved,
+ "vet" => "1" . $ved . "..i",
+ "start" => 100,
+ "ijn" => 1,
+ "imgvl" => $imgvl
+ ];
+ }
+
+ foreach($images as $image){
+
+ $this->fuckhtml->load($image);
+ $img =
+ $this->fuckhtml
+ ->getElementsByTagName("img")[0];
+
+ $og_width = (int)$image["attributes"]["data-ow"];
+ $og_height = (int)$image["attributes"]["data-oh"];
+ $thumb_width = (int)$image["attributes"]["data-tw"];
+
+ $ratio = $og_width / $og_height;
+
+ if(isset($img["attributes"]["data-src"])){
+
+ $src = &$img["attributes"]["data-src"];
+ }else{
+
+ $src = &$img["attributes"]["src"];
+ }
+
+ $thumb_height = floor($thumb_width / $ratio);
+
+ $out["image"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $image["attributes"]["data-pt"]
+ )
+ ),
+ "source" => [
+ [
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $image["attributes"]["data-ou"]
+ ),
+ "width" => $og_width,
+ "height" => $og_height
+ ],
+ [
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $src
+ ),
+ "width" => $thumb_width,
+ "height" => $thumb_height
+ ]
+ ],
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $image["attributes"]["data-ru"]
+ )
+ ];
+ }
+
+ return $out;
+ }
+
+ private function findstyles($rules, $is){
+
+ ksort($rules);
+
+ foreach($this->computedstyle as $stylename => $styles){
+
+ if($styles == $rules){
+
+ preg_match(
+ '/\\' . $is . '([^ .]+)/',
+ $stylename,
+ $out
+ );
+
+ if(count($out) === 2){
+
+ return $out[1];
+ }
+
+ return false;
+ }
+ }
+
+ return false;
+ }
+
+ private function parsestyles($style){
+
+ // get style tags
+ preg_match_all(
+ '/([^{]+){([^}]+)}/',
+ $style,
+ $tags_regex
+ );
+
+ $tags = [];
+
+ for($i=0; $i<count($tags_regex[0]); $i++){
+
+ $tagnames = explode(",", trim($tags_regex[1][$i]));
+
+ foreach($tagnames as $tagname){
+
+ $tagname = trim($tagname);
+
+ if(!isset($tags[$tagname])){
+ $tags[$tagname] = [];
+ }
+
+ $values = explode(";", $tags_regex[2][$i]);
+
+ foreach($values as $value){
+
+ $value = explode(":", $value, 2);
+
+ if(count($value) !== 2){
+
+ continue;
+ }
+
+ $tags[$tagname][trim($value[0])] =
+ trim($value[1]);
+ }
+ }
+ }
+
+ foreach($tags as &$value){
+
+ ksort($value);
+ }
+
+ return $tags;
+ }
+
+ private function decodeurl($url){
+
+ preg_match(
+ '/^\/url\?q=([^&]+)|^\/interstitial\?url=([^&]+)/',
+ $this->fuckhtml
+ ->getTextContent($url),
+ $match
+ );
+
+ if(count($match) !== 0){
+
+ if(!empty($match[1])){
+
+ return urldecode($match[1]);
+ }
+
+ if(!empty($match[2])){
+
+ return urldecode($match[2]);
+ }
+ }
+
+ return null;
+ }
+
+ private function titledots($title){
+
+ return rtrim($title, ".… \t\n\r\0\x0B");
+ }
+}
+
diff --git a/scraper/marginalia.php b/scraper/marginalia.php
new file mode 100644
index 0000000..c8ab09f
--- /dev/null
+++ b/scraper/marginalia.php
@@ -0,0 +1,242 @@
+<?php
+
+class marginalia{
+ public function __construct(){
+
+ $this->key = "public";
+ }
+
+ public function getfilters($page){
+
+ switch($page){
+
+ case "web":
+ return [
+ "profile" => [
+ "display" => "Profile",
+ "option" => [
+ "any" => "Default",
+ "modern" => "Modern"
+ ]
+ ],
+ "format" => [
+ "display" => "Format",
+ "option" => [
+ "any" => "Any",
+ "html5" => "html5",
+ "xhtml" => "xhtml",
+ "html123" => "html123"
+ ]
+ ],
+ "file" => [
+ "display" => "File",
+ "option" => [
+ "any" => "Any",
+ "nomedia" => "Deny media",
+ "media" => "Contains media",
+ "audio" => "Contains audio",
+ "video" => "Contains video",
+ "archive" => "Contains archive",
+ "document" => "Contains document"
+ ]
+ ],
+ "javascript" => [
+ "display" => "Javascript",
+ "option" => [
+ "any" => "Allow JS",
+ "deny" => "Deny JS",
+ "require" => "Require JS"
+ ]
+ ],
+ "trackers" => [
+ "display" => "Trackers",
+ "option" => [
+ "any" => "Allow trackers",
+ "deny" => "Deny trackers",
+ "require" => "Require trackers"
+ ]
+ ],
+ "cookies" => [
+ "display" => "Cookies",
+ "option" => [
+ "any" => "Allow cookies",
+ "deny" => "Deny cookies",
+ "require" => "Require cookies"
+ ]
+ ],
+ "affiliate" => [
+ "display" => "Affiliate links in body",
+ "option" => [
+ "any" => "Allow affiliate links",
+ "deny" => "Deny affiliate links",
+ "require" => "Require affiliate links"
+ ]
+ ]
+ ];
+ }
+ }
+
+ private function get($url, $get = []){
+
+ $headers = [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ];
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ $search = [$get["s"]];
+ $profile = $get["profile"];
+ $format = $get["format"];
+ $file = $get["file"];
+
+ foreach(
+ [
+ "javascript" => $get["javascript"],
+ "trackers" => $get["trackers"],
+ "cookies" => $get["cookies"],
+ "affiliate" => $get["affiliate"]
+ ]
+ as $key => $value
+ ){
+
+ if($value == "any"){ continue; }
+
+ switch($key){
+
+ case "javascript": $str = "js:true"; break;
+ case "trackers": $str = "special:tracking"; break;
+ case "cookies": $str = "special:cookies"; break;
+ case "affiliate": $str = "special:affiliate"; break;
+ }
+
+ if($value == "deny"){
+ $str = "-" . $str;
+ }
+
+ $search[] = $str;
+ }
+
+ if($format != "any"){
+
+ $search[] = "format:$format";
+ }
+
+ switch($file){
+
+ case "any": break;
+ case "nomedia": $search[] = "-special:media"; break;
+ case "media": $search[] = "special:media"; break;
+
+ default:
+ $search[] = "file:$file";
+ }
+
+ $search = implode(" ", $search);
+
+ $params = [
+ "count" => 20
+ ];
+
+ if($profile == "modern"){
+
+ $params["index"] = 1;
+ }
+
+ try{
+ $json =
+ $this->get(
+ "https://api.marginalia.nu/{$this->key}/search/" . urlencode($search),
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get JSON");
+ }
+
+ if($json == "Slow down"){
+
+ throw new Exception("The API key used is rate limited. Please try again in a few minutes.");
+ }
+
+ $json = json_decode($json, true);
+ /*
+ $handle = fopen("scraper/marginalia.json", "r");
+ $json = json_decode(fread($handle, filesize("scraper/marginalia.json")), true);
+ fclose($handle);*/
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ foreach($json["results"] as $result){
+
+ $out["web"][] = [
+ "title" => $result["title"],
+ "description" => str_replace("\n", " ", $result["description"]),
+ "url" => $result["url"],
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+}
+
diff --git a/scraper/mojeek.php b/scraper/mojeek.php
new file mode 100644
index 0000000..a0b5016
--- /dev/null
+++ b/scraper/mojeek.php
@@ -0,0 +1,1182 @@
+<?php
+
+class mojeek{
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("mojeek");
+ }
+
+ public function getfilters($page){
+
+ switch($page){
+
+ case "web":
+ return [
+ "focus" => [
+ "display" => "Focus",
+ "option" => [
+ "any" => "No focus",
+ "blogs" => "Blogs",
+ "Dictionary" => "Dictionary",
+ "Recipes" => "Recipes",
+ "Time" => "Time",
+ "Weather" => "Weather"
+ ]
+ ],
+ "lang" => [
+ "display" => "Language",
+ "option" => [
+ "any" => "Any language",
+ "af" => "Afrikaans",
+ "sq" => "Albanian",
+ "an" => "Aragonese",
+ "ay" => "Aymara",
+ "bi" => "Bislama",
+ "br" => "Breton",
+ "ca" => "Catalan",
+ "kw" => "Cornish",
+ "co" => "Corsican",
+ "hr" => "Croatian",
+ "da" => "Danish",
+ "nl" => "Dutch",
+ "dz" => "Dzongkha",
+ "en" => "English",
+ "fj" => "Fijian",
+ "fi" => "Finnish",
+ "fr" => "French",
+ "gd" => "Gaelic",
+ "gl" => "Galician",
+ "de" => "German",
+ "ht" => "Haitian",
+ "io" => "Ido",
+ "id" => "Indonesian",
+ "ia" => "Interlingua",
+ "ie" => "Interlingue",
+ "ga" => "Irish",
+ "it" => "Italian",
+ "rw" => "Kinyarwanda",
+ "la" => "Latin",
+ "li" => "Limburgish",
+ "lb" => "Luxembourgish",
+ "no" => "Norwegian",
+ "nb" => "Norwegian Bokmål",
+ "nn" => "Norwegian Nynorsk",
+ "oc" => "Occitan (post 1500)",
+ "pl" => "Polish",
+ "pt" => "Portuguese",
+ "rm" => "Romansh",
+ "rn" => "Rundi",
+ "sg" => "Sango",
+ "so" => "Somali",
+ "es" => "Spanish",
+ "sw" => "Swahili",
+ "ss" => "Swati",
+ "sv" => "Swedish",
+ "ty" => "Tahitian",
+ "to" => "Tonga (Tonga Islands)",
+ "ts" => "Tsonga",
+ "vo" => "Volapük",
+ "wa" => "Walloon",
+ "cy" => "Welsh",
+ "xh" => "Xhosa",
+ "zu" => "Zulu"
+ ]
+ ],
+ "country" => [
+ "display" => "Country",
+ "option" => [
+ "any" => "No location bias",
+ "af" => "Afghanistan",
+ "ax" => "Åland Islands",
+ "al" => "Albania",
+ "dz" => "Algeria",
+ "as" => "American Samoa",
+ "ad" => "Andorra",
+ "ao" => "Angola",
+ "ai" => "Anguilla",
+ "aq" => "Antarctica",
+ "ag" => "Antigua and Barbuda",
+ "ar" => "Argentina",
+ "am" => "Armenia",
+ "aw" => "Aruba",
+ "au" => "Australia",
+ "at" => "Austria",
+ "az" => "Azerbaijan",
+ "bs" => "Bahamas",
+ "bh" => "Bahrain",
+ "bd" => "Bangladesh",
+ "bb" => "Barbados",
+ "by" => "Belarus",
+ "be" => "Belgium",
+ "bz" => "Belize",
+ "bj" => "Benin",
+ "bm" => "Bermuda",
+ "bt" => "Bhutan",
+ "bo" => "Bolivia (Plurinational State of)",
+ "bq" => "Bonaire, Sint Eustatius and Saba",
+ "ba" => "Bosnia and Herzegovina",
+ "bw" => "Botswana",
+ "bv" => "Bouvet Island",
+ "br" => "Brazil",
+ "io" => "British Indian Ocean Territory",
+ "bn" => "Brunei Darussalam",
+ "bg" => "Bulgaria",
+ "bf" => "Burkina Faso",
+ "bi" => "Burundi",
+ "cv" => "Cabo Verde",
+ "kh" => "Cambodia",
+ "cm" => "Cameroon",
+ "ca" => "Canada",
+ "ky" => "Cayman Islands",
+ "cf" => "Central African Republic",
+ "td" => "Chad",
+ "cl" => "Chile",
+ "cn" => "China",
+ "cx" => "Christmas Island",
+ "cc" => "Cocos (Keeling) Islands",
+ "co" => "Colombia",
+ "km" => "Comoros",
+ "cg" => "Congo",
+ "cd" => "Congo (Democratic Republic of the)",
+ "ck" => "Cook Islands",
+ "cr" => "Costa Rica",
+ "ci" => "Côte d'Ivoire",
+ "hr" => "Croatia",
+ "cu" => "Cuba",
+ "cw" => "Curaçao",
+ "cy" => "Cyprus",
+ "cz" => "Czechia",
+ "dk" => "Denmark",
+ "dj" => "Djibouti",
+ "dm" => "Dominica",
+ "do" => "Dominican Republic",
+ "ec" => "Ecuador",
+ "eg" => "Egypt",
+ "sv" => "El Salvador",
+ "gq" => "Equatorial Guinea",
+ "er" => "Eritrea",
+ "ee" => "Estonia",
+ "et" => "Ethiopia",
+ "fk" => "Falkland Islands (Malvinas)",
+ "fo" => "Faroe Islands",
+ "fj" => "Fiji",
+ "fi" => "Finland",
+ "fr" => "France",
+ "gf" => "French Guiana",
+ "pf" => "French Polynesia",
+ "tf" => "French Southern Territories",
+ "ga" => "Gabon",
+ "gm" => "Gambia",
+ "ge" => "Georgia",
+ "de" => "Germany",
+ "gh" => "Ghana",
+ "gi" => "Gibraltar",
+ "gr" => "Greece",
+ "gl" => "Greenland",
+ "gd" => "Grenada",
+ "gp" => "Guadeloupe",
+ "gu" => "Guam",
+ "gt" => "Guatemala",
+ "gg" => "Guernsey",
+ "gn" => "Guinea",
+ "gw" => "Guinea-Bissau",
+ "gy" => "Guyana",
+ "ht" => "Haiti",
+ "hm" => "Heard Island and McDonald Islands",
+ "va" => "Holy See",
+ "hn" => "Honduras",
+ "hk" => "Hong Kong",
+ "hu" => "Hungary",
+ "is" => "Iceland",
+ "in" => "India",
+ "id" => "Indonesia",
+ "ir" => "Iran (Islamic Republic of)",
+ "iq" => "Iraq",
+ "ie" => "Ireland",
+ "im" => "Isle of Man",
+ "il" => "Israel",
+ "it" => "Italy",
+ "jm" => "Jamaica",
+ "jp" => "Japan",
+ "je" => "Jersey",
+ "jo" => "Jordan",
+ "kz" => "Kazakhstan",
+ "ke" => "Kenya",
+ "ki" => "Kiribati",
+ "kp" => "Korea (Democratic People's Republic of)",
+ "kr" => "Korea (Republic of)",
+ "kw" => "Kuwait",
+ "kg" => "Kyrgyzstan",
+ "la" => "Lao People's Democratic Republic",
+ "lv" => "Latvia",
+ "lb" => "Lebanon",
+ "ls" => "Lesotho",
+ "lr" => "Liberia",
+ "ly" => "Libya",
+ "li" => "Liechtenstein",
+ "lt" => "Lithuania",
+ "lu" => "Luxembourg",
+ "mo" => "Macao",
+ "mk" => "Macedonia (the former Yugoslav Republic of)",
+ "mg" => "Madagascar",
+ "mw" => "Malawi",
+ "my" => "Malaysia",
+ "mv" => "Maldives",
+ "ml" => "Mali",
+ "mt" => "Malta",
+ "mh" => "Marshall Islands",
+ "mq" => "Martinique",
+ "mr" => "Mauritania",
+ "mu" => "Mauritius",
+ "yt" => "Mayotte",
+ "mx" => "Mexico",
+ "fm" => "Micronesia (Federated States of)",
+ "md" => "Moldova (Republic of)",
+ "mc" => "Monaco",
+ "mn" => "Mongolia",
+ "me" => "Montenegro",
+ "ms" => "Montserrat",
+ "ma" => "Morocco",
+ "mz" => "Mozambique",
+ "mm" => "Myanmar",
+ "na" => "Namibia",
+ "nr" => "Nauru",
+ "np" => "Nepal",
+ "nl" => "Netherlands",
+ "nc" => "New Caledonia",
+ "nz" => "New Zealand",
+ "ni" => "Nicaragua",
+ "ne" => "Niger",
+ "ng" => "Nigeria",
+ "nu" => "Niue",
+ "nf" => "Norfolk Island",
+ "mp" => "Northern Mariana Islands",
+ "no" => "Norway",
+ "om" => "Oman",
+ "pk" => "Pakistan",
+ "pw" => "Palau",
+ "ps" => "Palestine, State of",
+ "pa" => "Panama",
+ "pg" => "Papua New Guinea",
+ "py" => "Paraguay",
+ "pe" => "Peru",
+ "ph" => "Philippines",
+ "pn" => "Pitcairn",
+ "pl" => "Poland",
+ "pt" => "Portugal",
+ "pr" => "Puerto Rico",
+ "qa" => "Qatar",
+ "re" => "Réunion",
+ "ro" => "Romania",
+ "ru" => "Russian Federation",
+ "rw" => "Rwanda",
+ "bl" => "Saint Barthélemy",
+ "sh" => "Saint Helena, Ascension and Tristan da Cunha",
+ "kn" => "Saint Kitts and Nevis",
+ "lc" => "Saint Lucia",
+ "mf" => "Saint Martin (French part)",
+ "pm" => "Saint Pierre and Miquelon",
+ "vc" => "Saint Vincent and the Grenadines",
+ "ws" => "Samoa",
+ "sm" => "San Marino",
+ "st" => "Sao Tome and Principe",
+ "sa" => "Saudi Arabia",
+ "sn" => "Senegal",
+ "rs" => "Serbia",
+ "sc" => "Seychelles",
+ "sl" => "Sierra Leone",
+ "sg" => "Singapore",
+ "sx" => "Sint Maarten (Dutch part)",
+ "sk" => "Slovakia",
+ "si" => "Slovenia",
+ "sb" => "Solomon Islands",
+ "so" => "Somalia",
+ "za" => "South Africa",
+ "gs" => "South Georgia and South Sandwich Islands",
+ "ss" => "South Sudan",
+ "es" => "Spain",
+ "lk" => "Sri Lanka",
+ "sd" => "Sudan",
+ "sr" => "Suriname",
+ "sj" => "Svalbard and Jan Mayen",
+ "sz" => "Swaziland",
+ "se" => "Sweden",
+ "ch" => "Switzerland",
+ "sy" => "Syrian Arab Republic",
+ "tw" => "Taiwan",
+ "tj" => "Tajikistan",
+ "tz" => "Tanzania, United Republic of",
+ "th" => "Thailand",
+ "tl" => "Timor-Leste",
+ "tg" => "Togo",
+ "tk" => "Tokelau",
+ "to" => "Tonga",
+ "tt" => "Trinidad and Tobago",
+ "tn" => "Tunisia",
+ "tr" => "Turkey",
+ "tm" => "Turkmenistan",
+ "tc" => "Turks and Caicos Islands",
+ "tv" => "Tuvalu",
+ "ug" => "Uganda",
+ "ua" => "Ukraine",
+ "ae" => "United Arab Emirates",
+ "gb" => "United Kingdom",
+ "us" => "United States of America",
+ "um" => "United States Minor Outlying Islands",
+ "uy" => "Uruguay",
+ "uz" => "Uzbekistan",
+ "vu" => "Vanuatu",
+ "ve" => "Venezuela (Bolivarian Republic of)",
+ "vn" => "Viet Nam",
+ "vg" => "Virgin Islands (British)",
+ "vi" => "Virgin Islands (U.S.)",
+ "wf" => "Wallis and Futuna",
+ "eh" => "Western Sahara",
+ "ye" => "Yemen",
+ "zm" => "Zambia",
+ "zw" => "Zimbabwe"
+ ]
+ ],
+ "region" => [
+ "display" => "Region",
+ "option" => [
+ "any" => "Any region",
+ "eu" => "European Union",
+ "de" => "Germany",
+ "fr" => "France",
+ "uk" => "United Kingdom"
+ ]
+ ],
+ "domain" => [
+ "display" => "Results per domain",
+ "option" => [
+ "1" => "1 result",
+ "2" => "2 results",
+ "3" => "3 results",
+ "4" => "4 results",
+ "5" => "5 results",
+ "10" => "10 results",
+ "0" => "Unlimited",
+ ]
+ ]
+ ];
+ break;
+
+ case "news":
+ return [];
+ }
+ }
+
+ private function get($url, $get = []){
+
+ $headers = [
+ "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"
+ ];
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ $token = $this->nextpage->get($get["npt"], "web");
+
+ try{
+ $html =
+ $this->get(
+ "https://www.mojeek.com" . $token,
+ []
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ }else{
+ $search = $get["s"];
+ $lang = $get["lang"];
+ $country = $get["country"];
+ $region = $get["region"];
+ $domain = $get["domain"];
+ $focus = $get["focus"];
+
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $params = [
+ "q" => $search,
+ "t" => 20, // number of results/page
+ "tn" => 7, // number of news results/page
+ "date" => 1, // show date
+ "tlen" => 128, // max length of title
+ "dlen" => 511, // max length of description
+ "arc" => ($country == "any" ? "none" : $country) // location. don't use autodetect!
+ ];
+
+ switch($focus){
+
+ case "any": break;
+
+ case "blogs":
+ $params["fmt"] = "sst";
+ $params["sst"] = "1";
+ break;
+
+ default:
+ $params["foc_t"] = $focus;
+ break;
+ }
+
+ if($lang != "any"){
+
+ $params["lb"] = $lang;
+ }
+
+ if($region != "any"){
+
+ $params["reg"] = $region;
+ }
+
+ if($domain != "1"){
+
+ $params["si"] = $domain;
+ }
+
+ try{
+ $html =
+ $this->get(
+ "https://www.mojeek.com/search",
+ $params
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+ /*
+ $handle = fopen("scraper/mojeek.html", "r");
+ $html = fread($handle, filesize("scraper/mojeek.html"));
+ fclose($handle);*/
+
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ $results =
+ $this->fuckhtml
+ ->getElementsByClassName("results-standard", "ul");
+
+ if(count($results) === 0){
+
+ return $out;
+ }
+
+ $this->fuckhtml->load($results[0]);
+
+ /*
+ Get search results
+ */
+ $results =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ foreach($results as $result){
+
+ $data = [
+ "title" => null,
+ "description" => null,
+ "url" => null,
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+
+ $this->fuckhtml->load($result);
+
+ $title =
+ $this->fuckhtml
+ ->getElementsByClassName("title", "a")[0];
+
+ $data["title"] =
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $title["innerHTML"]
+ )
+ );
+
+ $data["url"] =
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $title["attributes"]["href"]
+ )
+ );
+
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "s", "p"
+ );
+
+ if(count($description) !== 0){
+
+ $data["description"] =
+ $this->titledots(
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ )
+ );
+ }
+
+ $data["date"] =
+ explode(
+ " - ",
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName("i", "p")[1]
+ )
+ );
+
+ $data["date"] =
+ strtotime(
+ $data["date"][count($data["date"]) - 1]
+ );
+
+ $out["web"][] = $data;
+ }
+
+ /*
+ Get instant answers
+ */
+ $this->fuckhtml->load($html);
+
+ $infoboxes =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "infobox infobox-top",
+ "div"
+ );
+
+ foreach($infoboxes as $infobox){
+
+ $answer = [
+ "title" => null,
+ "description" => [],
+ "url" => null,
+ "thumb" => null,
+ "table" => [],
+ "sublink" => []
+ ];
+
+ // load first part with title + short definition
+ $infobox_html =
+ explode(
+ "<hr>",
+ $infobox["innerHTML"]
+ );
+
+ $this->fuckhtml->load($infobox_html[0]);
+
+ // title
+ $answer["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName("h1")[0]
+ );
+
+ // short definition
+ $definition =
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "p"
+ );
+
+ if(count($definition) !== 0){
+
+ $answer["description"][] = [
+ "type" => "quote",
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $definition[0]
+ )
+ ];
+ }
+
+ // get thumbnail, if it exists
+ $this->fuckhtml->load($infobox_html[1]);
+
+ $thumb =
+ $this->fuckhtml
+ ->getElementsByClassName("float-right", "img");
+
+ if(count($thumb) !== 0){
+
+ preg_match(
+ '/\/image\?img=([^&]+)/i',
+ $thumb[0]["attributes"]["src"],
+ $thumb
+ );
+
+ if(count($thumb) === 2){
+
+ $answer["thumb"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $thumb[1]
+ );
+ }
+ }
+
+ // get description
+ $ps =
+ $this->fuckhtml
+ ->getElementsByTagName("p");
+
+ $first_tag = true;
+ foreach($ps as $p){
+
+ $this->fuckhtml->load($p);
+
+ if(
+ preg_match(
+ '/^\s*<strong>/i',
+ $p["innerHTML"]
+ )
+ ){
+
+ /*
+ Parse table
+ */
+
+ $strong =
+ $this->fuckhtml
+ ->getElementsByTagName("strong")[0];
+
+ $p["innerHTML"] =
+ str_replace($strong["innerHTML"], "", $p["innerHTML"]);
+
+ $strong =
+ preg_replace(
+ '/:$/',
+ "",
+ ucfirst(
+ $this->fuckhtml
+ ->getTextContent(
+ $strong
+ )
+ )
+ );
+
+ $answer["table"][trim($strong)] =
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ $p
+ )
+ );
+
+ continue;
+ }
+
+ $as =
+ $this->fuckhtml
+ ->getElementsByClassName("svg-icon");
+
+ if(count($as) !== 0){
+
+ /*
+ Parse websites
+ */
+ foreach($as as $a){
+
+ $answer["sublink"][
+ ucfirst(explode(" ", $a["attributes"]["class"], 2)[1])
+ ] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ );
+ }
+
+ continue;
+ }
+
+ /*
+ Parse text content
+ */
+ $tags =
+ $this->fuckhtml
+ ->getElementsByTagName("*");
+
+ $i = 0;
+ foreach($tags as $tag){
+
+ $c = count($answer["description"]);
+
+ // remove tag from innerHTML
+ $p["innerHTML"] =
+ explode($tag["outerHTML"], $p["innerHTML"], 2);
+
+ if(count($p["innerHTML"]) === 2){
+
+ if(
+ $i === 0 &&
+ $c !== 0 &&
+ $answer["description"][$c - 1]["type"] == "link"
+ ){
+
+ $append = "\n\n";
+ }else{
+
+ $append = "";
+ }
+
+ if($p["innerHTML"][0] != ""){
+ $answer["description"][] = [
+ "type" => "text",
+ "value" => $append . trim($p["innerHTML"][0])
+ ];
+ }
+
+ $p["innerHTML"] = $p["innerHTML"][1];
+ }else{
+
+ $p["innerHTML"] = $p["innerHTML"][0];
+ }
+
+ switch($tag["tagName"]){
+
+ case "a":
+
+ $value =
+ $this->fuckhtml
+ ->getTextContent(
+ $tag
+ );
+
+ if(strtolower($value) == "wikipedia"){
+
+ if($c !== 0){
+ $answer["description"][$c - 1]["value"] =
+ rtrim($answer["description"][$c - 1]["value"]);
+ }
+ break;
+ }
+
+ $answer["description"][] = [
+ "type" => "link",
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $tag["attributes"]["href"]
+ ),
+ "value" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $tag
+ )
+ ];
+ break;
+ }
+
+ $i++;
+ }
+ }
+
+ // get URL
+ $this->fuckhtml->load($infobox_html[2]);
+
+ $answer["url"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "a"
+ )[0]
+ ["attributes"]
+ ["href"]
+ );
+
+ // append answer
+ $out["answer"][] = $answer;
+ }
+
+ /*
+ Get news
+ */
+ $this->fuckhtml->load($html);
+
+ $news =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "results news-results",
+ "div"
+ );
+
+ if(count($news) !== 0){
+
+ $this->fuckhtml->load($news[0]);
+
+ $lis =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ foreach($lis as $li){
+
+ $this->fuckhtml->load($li);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "ob",
+ "a"
+ );
+
+ if(count($a) === 0){
+
+ continue;
+ }
+
+ $a = $a[0];
+
+ $out["news"][] = [
+ "title" =>
+ html_entity_decode(
+ $this->fuckhtml
+ ->getTextContent(
+ $a
+ )
+ ),
+ "description" => null,
+ "date" =>
+ strtotime(
+ explode(
+ " - ",
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName(
+ "span"
+ )[0]
+ ),
+ 2
+ )[1]
+ ),
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ )
+ ];
+ }
+ }
+
+ /*
+ Get next page
+ */
+ $this->fuckhtml->load($html);
+
+ $pagination =
+ $this->fuckhtml
+ ->getElementsByClassName("pagination");
+
+ if(count($pagination) !== false){
+
+ $this->fuckhtml->load($pagination[0]);
+ $as =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ foreach($as as $a){
+
+ if($a["innerHTML"] == "Next"){
+
+ $out["npt"] = $this->nextpage->store(
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ ),
+ "web"
+ );
+ }
+ }
+ }
+
+ return $out;
+ }
+
+ public function news($get){
+
+ $search = $get["s"];
+
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ try{
+ $html =
+ $this->get(
+ "https://www.mojeek.com/search",
+ [
+ "q" => $search,
+ "fmt" => "news"
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to get HTML");
+ }
+
+ /*
+ $handle = fopen("scraper/mojeek.html", "r");
+ $html = fread($handle, filesize("scraper/mojeek.html"));
+ fclose($handle);*/
+
+ /*
+ Get big, standard and smaller nodes
+ */
+ foreach(
+ [
+ "results-extended",
+ "results-standard"
+ ]
+ as $categoryname
+ ){
+
+ $this->fuckhtml->load($html);
+
+ $categories =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ $categoryname,
+ "ul"
+ );
+
+ foreach($categories as $category){
+
+ $this->fuckhtml->load($category);
+
+ $nodes =
+ $this->fuckhtml
+ ->getElementsByTagName("li");
+
+ foreach($nodes as $node){
+
+ $data = [
+ "title" => null,
+ "author" => null,
+ "description" => null,
+ "date" => null,
+ "thumb" =>
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" => null
+ ];
+
+ /*
+ Parse the results
+ */
+ $this->fuckhtml->load($node);
+
+ // get title + url
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName("a")[0];
+
+ $data["title"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["title"]
+ );
+
+ $data["url"] =
+ $this->fuckhtml
+ ->getTextContent(
+ $a["attributes"]["href"]
+ );
+
+ // get image
+ $image =
+ $this->fuckhtml
+ ->getElementsByTagName("img");
+
+ if(count($image) !== 0){
+
+ $data["thumb"] = [
+ "url" =>
+ urldecode(
+ str_replace(
+ "/image?img=",
+ "",
+ $this->fuckhtml
+ ->getTextContent(
+ $image[0]["attributes"]["src"]
+ )
+ )
+ ),
+ "ratio" => "16:9"
+ ];
+ }
+
+ // get description
+ $description =
+ $this->fuckhtml
+ ->getElementsByClassName("s", "p");
+
+ if(count($description) !== 0){
+
+ $data["description"] =
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $description[0]
+ )
+ );
+ }
+
+ // get date + time
+ $date =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "date",
+ "p"
+ );
+
+ $i =
+ $this->fuckhtml
+ ->getElementsByClassName("i", "p");
+
+ if(count($date) !== 0){
+
+ // we're inside a big node
+ $data["date"] = strtotime($date[0]["innerHTML"]);
+
+ if(count($i) !== 0){
+
+ $this->fuckhtml->load($i[0]);
+
+ $a =
+ $this->fuckhtml
+ ->getElementsByTagName("a");
+
+ if(count($a) !== 0){
+
+ $data["author"] =
+ $this->fuckhtml
+ ->getTextContent($a[0]);
+ }
+ }
+ }else{
+
+ // we're inside a small node
+ if(count($i) !== 0){
+
+ $i =
+ explode(
+ " - ",
+ $this->fuckhtml
+ ->getTextContent($i[0])
+ );
+
+ $data["date"] = strtotime(array_pop($i));
+ $data["author"] = implode(" - ", $i);
+ }
+ }
+
+ $out["news"][] = $data;
+ }
+ }
+ }
+
+ return $out;
+ }
+
+ private function titledots($title){
+
+ return trim($title, ". \t\n\r\0\x0B");
+ }
+}
+
diff --git a/scraper/wiby.php b/scraper/wiby.php
new file mode 100644
index 0000000..a1daf57
--- /dev/null
+++ b/scraper/wiby.php
@@ -0,0 +1,244 @@
+<?php
+
+class wiby{
+
+ public function __construct(){
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("wiby");
+ }
+
+ public function getfilters($page){
+
+ if($page != "web"){
+
+ return [];
+ }
+
+ return [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "no" => "No"
+ ]
+ ],
+ "date" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "day" => "Past day",
+ "week" => "Past week",
+ "month" => "Past month",
+ "year" => "Past year",
+ ]
+ ]
+ ];
+ }
+
+ private function get($url, $get = [], $nsfw){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Cookie: ws={$nsfw}",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ if($get["npt"]){
+
+ $q =
+ json_decode(
+ $this->nextpage->get($get["npt"], "web"),
+ true
+ );
+
+ $nsfw = $q["nsfw"];
+ unset($q["nsfw"]);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $date = $get["date"];
+ $nsfw = $get["nsfw"] == "yes" ? "0" : "1";
+
+ $search =
+ str_replace(
+ [
+ "!g",
+ "!gi",
+ "!gv",
+ "!gm",
+ "!b",
+ "!bi",
+ "!bv",
+ "!bm",
+ "!td",
+ "!tw",
+ "!tm",
+ "!ty",
+ "&g",
+ "&gi",
+ "&gv",
+ "&gm",
+ "&b",
+ "&bi",
+ "&bv",
+ "&bm",
+ "&td",
+ "&tw",
+ "&tm",
+ "&ty",
+ ],
+ "",
+ $search
+ );
+
+ switch($date){
+
+ case "day": $search = "!td " . $search; break;
+ case "week": $search = "!tw " . $search; break;
+ case "month": $search = "!tm " . $search; break;
+ case "year": $search = "!ty " . $search; break;
+ }
+
+ $q = [
+ "q" => $search
+ ];
+ }
+
+ try{
+ $html = $this->get(
+ "https://wiby.me/",
+ $q,
+ $nsfw
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch search page");
+ }
+
+ preg_match(
+ '/<p class="pin"><blockquote>(?:<\/p>)?<br><a class="more" href="\/\?q=[^"]+&p=([0-9]+)">Find more\.\.\.<\/a><\/blockquote>/',
+ $html,
+ $nextpage
+ );
+
+ if(count($nextpage) === 0){
+
+ $nextpage = null;
+ }else{
+
+ $nextpage =
+ $this->nextpage->store(
+ json_encode([
+ "q" => $q["q"],
+ "p" => (int)$nextpage[1],
+ "nsfw" => $nsfw
+ ]),
+ "web"
+ );
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => $nextpage,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ preg_match_all(
+ '/<blockquote>[\s]*<a .* href="(.*)">(.*)<\/a>.*<p>(.*)<\/p>[\s]*<\/blockquote>/Ui',
+ $html,
+ $links
+ );
+
+ for($i=0; $i<count($links[0]); $i++){
+
+ $out["web"][] = [
+ "title" => $this->unescapehtml(trim($links[2][$i])),
+ "description" => $this->unescapehtml(trim(strip_tags($links[3][$i]))),
+ "url" => trim($links[1][$i]),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ private function unescapehtml($str){
+
+ return html_entity_decode(
+ str_replace(
+ [
+ "<br>",
+ "<br/>",
+ "</br>",
+ "<BR>",
+ "<BR/>",
+ "</BR>",
+ ],
+ "\n",
+ $str
+ ),
+ ENT_QUOTES | ENT_XML1, 'UTF-8'
+ );
+ }
+}
diff --git a/scraper/yandex.php b/scraper/yandex.php
new file mode 100644
index 0000000..437c8aa
--- /dev/null
+++ b/scraper/yandex.php
@@ -0,0 +1,530 @@
+<?php
+
+class yandex{
+
+ /*
+ curl functions
+ */
+ public function __construct(){
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("yandex");
+ }
+
+ private function get($url, $get = [], $nsfw){
+
+ $curlproc = curl_init();
+
+ $search = $get["text"];
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ switch($nsfw){
+ case "yes": $nsfw = "0"; break;
+ case "maybe": $nsfw = "1"; break;
+ case "no": $nsfw = "2"; break;
+ }
+
+ $headers =
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/113.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Encoding: gzip",
+ "Accept-Language: en-US,en;q=0.5",
+ "DNT: 1",
+ "Cookie: yp=1716337604.sp.family%3A{$nsfw}#1685406411.szm.1:1920x1080:1920x999",
+ "Referer: https://yandex.com/images/search?text={$search}",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: cross-site",
+ "Upgrade-Insecure-Requests: 1"];
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function getfilters($pagetype){
+
+ switch($pagetype){
+
+ case "images":
+ return
+ [
+ "nsfw" => [
+ "display" => "NSFW",
+ "option" => [
+ "yes" => "Yes",
+ "maybe" => "Maybe",
+ "no" => "No"
+ ]
+ ],
+ "time" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "week" => "Last week"
+ ]
+ ],
+ "size" => [
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "small" => "Small",
+ "medium" => "Medium",
+ "large" => "Large",
+ "wallpaper" => "Wallpaper"
+ ]
+ ],
+ "color" => [
+ "display" => "Colors",
+ "option" => [
+ "any" => "All colors",
+ "color" => "Color images only",
+ "gray" => "Black and white",
+ "red" => "Red",
+ "orange" => "Orange",
+ "yellow" => "Yellow",
+ "cyan" => "Cyan",
+ "green" => "Green",
+ "blue" => "Blue",
+ "violet" => "Purple",
+ "white" => "White",
+ "black" => "Black"
+ ]
+ ],
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "any" => "All types",
+ "photo" => "Photos",
+ "clipart" => "White background",
+ "lineart" => "Drawings and sketches",
+ "face" => "People",
+ "demotivator" => "Demotivators"
+ ]
+ ],
+ "layout" => [
+ "display" => "Layout",
+ "option" => [
+ "any" => "All layouts",
+ "horizontal" => "Horizontal",
+ "vertical" => "Vertical",
+ "square" => "Square"
+ ]
+ ],
+ "format" => [
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "jpeg" => "JPEG",
+ "png" => "PNG",
+ "gif" => "GIF"
+ ]
+ ]
+ ];
+ break;
+
+ default:
+ return [];
+ break;
+ }
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ $request =
+ json_decode(
+ $this->nextpage->get(
+ $get["npt"],
+ "images"
+ ),
+ true
+ );
+
+ $nsfw = $request["nsfw"];
+ unset($request["nsfw"]);
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $nsfw = $get["nsfw"];
+ $time = $get["time"];
+ $size = $get["size"];
+ $color = $get["color"];
+ $type = $get["type"];
+ $layout = $get["layout"];
+ $format = $get["format"];
+ /*
+ $handle = fopen("scraper/yandex.json", "r");
+ $json = fread($handle, filesize("scraper/yandex.json"));
+ fclose($handle);*/
+
+ // SIZE
+ // large
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=large&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // medium
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=medium&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // small
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=small&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // ORIENTATION
+ // Horizontal
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=horizontal&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Vertical
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=vertical&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Square
+ // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=square&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // TYPE
+ // Photos
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=photo&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // White background
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=clipart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Drawings and sketches
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=lineart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // People
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=face&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Demotivators
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=demotivator&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // COLOR
+ // Color images only
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=color&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Black and white
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=gray&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Red
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=red&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Orange
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=orange&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Yellow
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=yellow&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Cyan
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=cyan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Green
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=green&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Blue
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=blue&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Purple
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=violet&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // White
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=white&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // Black
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=black&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // FORMAT
+ // jpeg
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=jpg&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // png
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=png&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // gif
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=gifan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // RECENT
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&recent=7D&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+ // WALLPAPER
+ // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=wallpaper&text=minecraft&wp=wh16x9_1920x1080&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
+
+
+ $request = [
+ "format" => "json",
+ "request" => [
+ "blocks" => [
+ [
+ "block" => "extra-content",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "i-global__params:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "search2:ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "preview__isWallpaper",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "content_type_search",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "serp-controller",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "cookies_ajax",
+ "params" => (object)[],
+ "version" => 2
+ ],
+ [
+ "block" => "advanced-search-block",
+ "params" => (object)[],
+ "version" => 2
+ ]
+ ],
+ "metadata" => [
+ "bundles" => [
+ "lb" => "AS?(E<X120"
+ ],
+ "assets" => [
+ // las base
+ "las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;"
+
+ // las default
+ //"las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;227.0=1;203.0=1;76fe94.0=1;215f96.0=1;75.0=1"
+ ],
+ "extraContent" => [
+ "names" => [
+ "i-react-ajax-adapter"
+ ]
+ ]
+ ]
+ ]
+ ];
+
+ /*
+ Apply filters
+ */
+ if($time == "week"){
+ $request["recent"] = "7D";
+ }
+
+ if($size != "any"){
+
+ $request["isize"] = $size;
+ }
+
+ if($type != "any"){
+
+ $request["type"] = $type;
+ }
+
+ if($color != "any"){
+
+ $request["icolor"] = $color;
+ }
+
+ if($layout != "any"){
+
+ $request["iorient"] = $layout;
+ }
+
+ if($format != "any"){
+
+ $request["itype"] = $format;
+ }
+
+ $request["text"] = $search;
+ $request["uinfo"] = "sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080";
+
+ $request["request"] = json_encode($request["request"]);
+ }
+
+ try{
+ $json = $this->get(
+ "https://yandex.com/images/search",
+ $request,
+ $nsfw
+ );
+ }catch(Exception $err){
+
+ throw new Exception("Failed to get JSON");
+ }
+ /*
+ $handle = fopen("scraper/yandex.json", "r");
+ $json = fread($handle, filesize("scraper/yandex.json"));
+ fclose($handle);*/
+
+ $json = json_decode($json, true);
+
+ if(
+ isset($json["type"]) &&
+ $json["type"] == "captcha"
+ ){
+
+ throw new Exception("Yandex blocked this 4get instance. Yandex blocks don't last very long, but the block timer gets reset everytime you make another unsuccessful request. Please try again in ~7 minutes.");
+ }
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ // get html
+ $html = "";
+ foreach($json["blocks"] as $block){
+
+ $html .= $block["html"];
+ }
+
+ $this->fuckhtml->load($html);
+ $div = $this->fuckhtml->getElementsByTagName("div");
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ // check for next page
+ if(
+ count(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "more more_direction_next",
+ $div
+ )
+ ) !== 0
+ ){
+
+ $request["nsfw"] = $nsfw;
+
+ if(isset($request["p"])){
+
+ $request["p"]++;
+ }else{
+
+ $request["p"] = 1;
+ }
+
+ $out["npt"] = $this->nextpage->store(json_encode($request), "images");
+ }
+
+ // get search results
+ foreach(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "serp-item serp-item_type_search",
+ $div
+ )
+ as $image
+ ){
+
+ $image =
+ json_decode(
+ $image
+ ["attributes"]
+ ["data-bem"],
+ true
+ )["serp-item"];
+
+ $title = [html_entity_decode($image["snippet"]["title"], ENT_QUOTES | ENT_HTML5)];
+
+ if(isset($image["snippet"]["text"])){
+
+ $title[] = html_entity_decode($image["snippet"]["text"], ENT_QUOTES | ENT_HTML5);
+ }
+
+ $tmp = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->titledots(
+ implode(": ", $title)
+ )
+ ),
+ "source" => [],
+ "url" => htmlspecialchars_decode($image["snippet"]["url"])
+ ];
+
+ foreach($image["dups"] as $dup){
+
+ $tmp["source"][] = [
+ "url" => htmlspecialchars_decode($dup["url"]),
+ "width" => (int)$dup["w"],
+ "height" => (int)$dup["h"],
+ ];
+ }
+
+ $tmp["source"][] = [
+ "url" =>
+ preg_replace(
+ '/^\/\//',
+ "https://",
+ htmlspecialchars_decode($image["thumb"]["url"])
+ ),
+ "width" => (int)$image["thumb"]["size"]["width"],
+ "height" => (int)$image["thumb"]["size"]["height"]
+ ];
+
+ $out["image"][] = $tmp;
+ }
+
+ return $out;
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3));
+ }
+
+ return trim($title);
+ }
+}
diff --git a/scraper/youtube.php b/scraper/youtube.php
new file mode 100644
index 0000000..83a68ba
--- /dev/null
+++ b/scraper/youtube.php
@@ -0,0 +1,1723 @@
+<?php
+
+//$yt = new youtube();
+//header("Content-Type: application/json");
+//echo json_encode($yt->video("minecraft", null, "today", "any", "any", "live", "relevance"));
+
+class youtube{
+
+ public function __construct(){
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("yt");
+ }
+
+ public function getfilters($page){
+
+ if($page != "videos"){
+
+ return [];
+ }
+
+ return [
+ "date" => [
+ "display" => "Time posted",
+ "option" => [
+ "any" => "Any time",
+ "hour" => "Last hour",
+ "today" => "Today",
+ "week" => "This week",
+ "month" => "This month",
+ "year" => "This year"
+ ]
+ ],
+ "type" => [
+ "display" => "Type",
+ "option" => [
+ "video" => "Video",
+ "channel" => "Channel",
+ "playlist" => "Playlist",
+ "Movie" => "Movie"
+ ]
+ ],
+ "duration" => [
+ "display" => "Duration",
+ "option" => [
+ "any" => "Any duration",
+ "short" => "Short (>4min)",
+ "medium" => "Medium (4-20min)",
+ "long" => "Long (<20min)"
+ ]
+ ],
+ "feature" => [
+ "display" => "Feature",
+ "option" => [
+ "any" => "No features",
+ "live" => "Live",
+ "4k" => "4K",
+ "hd" => "HD",
+ "subtitles" => "Subtitles/CC",
+ "creativecommons" => "Creative Commons",
+ "360" => "VR 360°",
+ "vr180" => "VR 180°",
+ "3d" => "3D",
+ "hdr" => "HDR"
+ ]
+ ],
+ "sort" => [
+ "display" => "Sort by",
+ "option" => [
+ "relevance" => "Relevance",
+ "upload_date" => "Upload date",
+ "view_count" => "View count",
+ "rating" => "Rating"
+ ]
+ ]
+ ];
+ }
+
+ private function ytfilter($date, $type, $duration, $feature, $sort){
+
+ // ------------
+ // INCOMPATIBLE FILTERS
+ // channel,playlist DURATION, FEATURES, SORT BY
+ // Movie Features=[live, subtitles, creative commons, 3d]
+
+ // live, 3D
+ // Type[channel, playlist, movie]
+
+ // UPLOAD DATE, DURATION, 4k, 360, VR180, HDR
+ // Type[channel, playlist]
+
+ // -----------
+
+ // MUST BE TOGETHER
+ // Relevance,upload date Type=Video
+
+ switch($type){
+
+ case "channel":
+ case "playlist":
+ if($duration != "any"){ $duration = "any"; }
+ if($feature != "any"){ $feature = "any"; }
+ if($sort != "any"){ $sort = "any"; }
+ break;
+
+ case "movie":
+ if(
+ in_array(
+ $feature,
+ [
+ "live",
+ "subtitles",
+ "creative_commons",
+ "3d"
+ ],
+ )
+ ){
+
+ $feature = "any";
+ }
+ break;
+ }
+
+ switch($feature){
+
+ case "live":
+ case "3d":
+ if(
+ in_array(
+ $type,
+ [
+ "channel",
+ "playlist",
+ "movie"
+ ],
+ )
+ ){
+
+ $type = "video";
+ }
+ break;
+ }
+
+ if(
+ (
+ $date != "any" ||
+ $duration != "any" ||
+ $feature == "4k" ||
+ $feature == "360" ||
+ $feature == "vr180" ||
+ $feature == "hdr"
+ ) &&
+ (
+ $type == "channel" ||
+ $type == "playlist"
+ )
+ ){
+
+ $type = "video";
+ }
+
+ if(
+ $date == "any" &&
+ $type == "video" &&
+ $duration == "any" &&
+ $feature == "any" &&
+ $sort == "relevance"
+ ){
+
+ return null;
+ }
+
+ //print_r([$date, $type, $duration, $feature, $sort]);
+
+ /*
+ Encode hex data
+ */
+
+ // UPLOAD DATE
+ // hour EgQIARAB 12 04 08 01 10 01
+ // today EgQIAhAB 12 04 08 02 10 01
+ // week EgQIAxAB 12 04 08 03 10 01
+ // month EgQIBBAB 12 04 08 04 10 01
+ // year EgQIBRAB 12 04 08 05 10 01
+
+ // TYPE
+ // video EgIQAQ%253D%253D 12 02 10 01
+ // channel EgIQAg%253D%253D 12 02 10 02
+ // playlist EgIQAw%253D%253D 12 02 10 03
+ // movie EgIQBA%253D%253D 12 02 10 04
+
+ // DURATION
+ // -4min EgIYAQ%253D%253D 12 02 18 01
+ // 4-20min EgIYAw%253D%253D 12 02 18 03
+ // 20+min EgIYAg%253D%253D 12 02 18 02
+
+ // FEATURE
+ // live EgJAAQ%253D%253D 12 02 40 01
+ // 4K EgJwAQ%253D%253D 12 02 70 01
+ // HD EgIgAQ%253D%253D 12 02 20 01
+ // Subtitles/CC EgIoAQ%253D%253D 12 02 28 01
+ // Creative Commons EgIwAQ%253D%253D 12 02 30 01
+ // 360 EgJ4AQ%253D%253D 12 02 78 01
+ // VR180 EgPQAQE%253D 12 03 d0 01 01
+ // 3D EgI4AQ%253D%253D 12 02 38 01
+ // HDR EgPIAQE%253D 12 03 c8 01 01
+ // (location & purchased unused)
+
+ // SORT BY
+ // Relevance CAASAhAB 08 00 12 02 10 01 (is nothing by default)
+ // Upload date CAI%253D 08 02
+ // View count CAM%253D 08 03
+ // Rating CAE%253D 08 01
+
+ // video
+ // 12 02 10 01
+
+ // under 4 minutes
+ // 12 02 18 01
+
+ // video + under 4 minutes
+ // 12 04 10 01 18 01
+
+ // video + under 4 minutes + HD
+ // 08 00 12 06 10 01 18 01 20 01
+
+ // video + under 4 minutes + upload date
+ // 08 02 12 04 10 01 18 01
+
+ // video + under 4 minutes + HD + upload date
+ // 08 02 12 06 10 01 18 01 20 01
+
+ // this year + video + under 4 minutes + HD + upload date
+ // 08 02 12 08 08 05 10 01 18 01 20 01
+
+ // this week + video + over 20 minutes + HD + view count
+ // 08 03 12 08 08 03 10 01 18 02 20 01
+
+ //echo urlencode(urlencode(base64_encode(hex2bin($str))));
+ //echo bin2hex(base64_decode(urldecode(urldecode("CAI%253D"))));
+
+ // week + video + 20min + rating
+ // 08 01 12 06 08 03 10 01 18 02
+
+ // week + video + 20min + live + rating
+ // 08 01 12 08 08 03 10 01 18 02 40 01
+
+ // live 12 02 40 01
+
+ $hex = null;
+ if(
+ $date == "any" &&
+ $type == "video" &&
+ $duration == "any" &&
+ $feature == "any" &&
+ $sort == "relevance"
+ ){
+
+ return $hex;
+ }
+
+ $opcode = 0;
+
+ if($date != "any"){ $opcode += 2; }
+ if($type != "any"){ $opcode += 2; }
+ if($duration != "any"){ $opcode += 2; }
+
+ switch($feature){
+
+ case "live":
+ case "4k":
+ case "hd":
+ case "subtitles":
+ case "creativecommons":
+ case "360":
+ case "3d":
+ $opcode += 2;
+ break;
+
+ case "hdr":
+ case "vr180":
+ $opcode += 3;
+ break;
+ }
+
+ switch($sort){
+
+ case "relevance": $hex .= "0800"; break;
+ case "upload_date": $hex .= "0802"; break;
+ case "view_count": $hex .= "0803"; break;
+ case "rating": $hex .= "0801"; break;
+ }
+
+ $hex .= "12" . "0".$opcode;
+
+ switch($date){
+
+ case "hour": $hex .= "0801"; break;
+ case "today": $hex .= "0802"; break;
+ case "week": $hex .= "0803"; break;
+ case "month": $hex .= "0804"; break;
+ case "year": $hex .= "0805"; break;
+ }
+
+ switch($type){
+
+ case "video": $hex .= "1001"; break;
+ case "channel": $hex .= "1002"; break;
+ case "playlist": $hex .= "1003"; break;
+ case "movie": $hex .= "1004"; break;
+ }
+
+ switch($duration){
+
+ case "short": $hex .= "1801"; break;
+ case "medium": $hex .= "1803"; break;
+ case "long": $hex .= "1802"; break;
+ }
+
+ switch($feature){
+
+ case "live": $hex .= "4001"; break;
+ case "4k": $hex .= "7001"; break;
+ case "hd": $hex .= "2001"; break;
+ case "subtitles": $hex .= "2801"; break;
+ case "creativecommons": $hex .= "3001"; break;
+ case "360": $hex .= "7801"; break;
+ case "vr180": $hex .= "d00101"; break;
+ case "3d": $hex .= "3801"; break;
+ case "hdr": $hex .= "c80101"; break;
+ }
+
+ //echo $hex . "\n\n";
+ return urlencode(base64_encode(hex2bin($hex)));
+ }
+
+ // me reading youtube's json
+ // https://imgur.com/X9hVlFX
+
+ const req_web = 0;
+ const req_xhr = 1;
+
+ private function get($url, $get = [], $reqtype = self::req_web, $continuation = null){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ switch($reqtype){
+ case self::req_web:
+ $headers =
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Cookie: PREF=tz=America.New_York",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"];
+ break;
+
+ case self::req_xhr:
+ $headers =
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:110.0) Gecko/20100101 Firefox/110.0",
+ "Accept: */*",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "Cookie: PREF=tz=America.New_York",
+ "Referer: https://youtube.com.com/",
+ "Content-Type: application/json",
+ "Content-Length: " . strlen($continuation),
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: same-origin",
+ "Sec-Fetch-Site: same-origin"];
+
+ curl_setopt($curlproc, CURLOPT_POST, true);
+ curl_setopt($curlproc, CURLOPT_POSTFIELDS, $continuation);
+ break;
+ }
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function video($get){
+
+ $this->out = [
+ "status" => "ok",
+ "npt" => null,
+ "video" => [],
+ "author" => [],
+ "livestream" => [],
+ "playlist" => [],
+ "reel" => []
+ ];
+
+ if($get["npt"]){
+
+ // parse nextPage
+ // https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false
+ /*
+ $handle = fopen("nextpage.json", "r");
+ $json = fread($handle, filesize("nextpage.json"));
+ fclose($handle);*/
+
+ $npt =
+ json_decode(
+ $this->nextpage->get(
+ $get["npt"],
+ "videos"
+ ),
+ true
+ );
+
+ try{
+ $json = $this->get(
+ "https://www.youtube.com/youtubei/v1/search",
+ [
+ "key" => $npt["key"],
+ "prettyPrint" => "false"
+ ],
+ self::req_xhr,
+ json_encode($npt["post"])
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch results page");
+ }
+
+ $json = json_decode($json);
+
+ foreach(
+ $json
+ ->onResponseReceivedCommands[0]
+ ->appendContinuationItemsAction
+ ->continuationItems[0]
+ ->itemSectionRenderer
+ ->contents
+ as $video
+ ){
+
+ $this->parsevideoobject($video);
+ }
+
+ if(
+ !isset(
+ $json
+ ->onResponseReceivedCommands[0]
+ ->appendContinuationItemsAction
+ ->continuationItems[1]
+ ->continuationItemRenderer
+ ->continuationEndpoint
+ ->continuationCommand
+ ->token
+ )
+ ){
+
+ $npt = null;
+
+ }else{
+ // prepare nextpage for later..
+ $npt["post"]["continuation"] =
+ $json
+ ->onResponseReceivedCommands[0]
+ ->appendContinuationItemsAction
+ ->continuationItems[1]
+ ->continuationItemRenderer
+ ->continuationEndpoint
+ ->continuationCommand
+ ->token;
+ }
+
+ $this->out["npt"] = $npt;
+
+ }else{
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $date = $get["date"];
+ $type = $get["type"];
+ $duration = $get["duration"];
+ $feature = $get["feature"];
+ $sort = $get["sort"];
+
+ // parse ytInitialData
+
+ $get = [
+ "search_query" => $search
+ ];
+
+ if(
+ (
+ $filter =
+ $this->ytfilter(
+ $date,
+ $type,
+ $duration,
+ $feature,
+ $sort
+ )
+ ) !== null
+ ){
+
+ $get["sp"] = $filter;
+ }
+
+ try{
+ $json = $this->get(
+ "https://www.youtube.com/results",
+ $get
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Could not fetch results page");
+ }
+ /*
+ $handle = fopen("test.html", "r");
+ $json = fread($handle, filesize("test.html"));
+ fclose($handle);
+ */
+ if(
+ !preg_match(
+ '/ytcfg\.set\(({".*})\); *window\.ytcfg/',
+ $json,
+ $ytconfig
+ )
+ ){
+
+ throw new Exception("Could not get ytcfg");
+ }
+
+ $ytconfig = json_decode($ytconfig[1]);
+
+ if(
+ !preg_match(
+ '/ytInitialData *= *({.*});<\/script>/',
+ $json,
+ $json
+ )
+ ){
+
+ throw new Exception("Could not get ytInitialData");
+ }
+
+ $json = json_decode($json[1]);
+
+ // generate POST data for nextpage
+
+ $ytconfig->INNERTUBE_CONTEXT->client->screenWidthPoints = 1239;
+ $ytconfig->INNERTUBE_CONTEXT->client->screenHeightPoints = 999;
+ $ytconfig->INNERTUBE_CONTEXT->client->screenPixelDensity = 1;
+ $ytconfig->INNERTUBE_CONTEXT->client->screenDensityFloat = 1;
+ $ytconfig->INNERTUBE_CONTEXT->client->utcOffsetMinutes = -240;
+ $ytconfig->INNERTUBE_CONTEXT->request->internalExperimentFlags = [];
+ $ytconfig->INNERTUBE_CONTEXT->request->consistencyTokenJars = [];
+
+ $ytconfig->INNERTUBE_CONTEXT->client->mainAppWebInfo = [
+ "graftUrl" => $ytconfig->INNERTUBE_CONTEXT->client->originalUrl,
+ "webDisplayMode" => "WEB_DISPLAY_MODE_BROWSER",
+ "isWebNativeShareAvailable" => false
+ ];
+
+ $ytconfig->INNERTUBE_CONTEXT->adSignalsInfo = [
+ "params" => [
+ [
+ "key" => "dt",
+ "value" => (string)$ytconfig->TIME_CREATED_MS
+ ],
+ [
+ "key" => "flash",
+ "value" => "0"
+ ],
+ [
+ "key" => "frm",
+ "value" => "0"
+ ],
+ [
+ "key" => "u_tz",
+ "value" => "-240"
+ ],
+ [
+ "key" => "u_his",
+ "value" => "3"
+ ],
+ [
+ "key" => "u_h",
+ "value" => "1080"
+ ],
+ [
+ "key" => "u_w",
+ "value" => "1920"
+ ],
+ [
+ "key" => "u_ah",
+ "value" => "1080"
+ ],
+ [
+ "key" => "u_cd",
+ "value" => "24"
+ ],
+ [
+ "key" => "bc",
+ "value" => "31"
+ ],
+ [
+ "key" => "bih",
+ "value" => "999"
+ ],
+ [
+ "key" => "biw",
+ "value" => "1239"
+ ],
+ [
+ "key" => "brdim",
+ "value" => "0,0,0,0,1920,0,1920,1061,1239,999"
+ ],
+ [
+ "key" => "vis",
+ "value" => "1"
+ ],
+ [
+ "key" => "wgl",
+ "value" => "true"
+ ],
+ [
+ "key" => "ca_type",
+ "value" => "image"
+ ]
+ ]
+ ];
+
+ /*
+ echo json_encode($json);
+ die();*/
+
+ // *inhales*
+ foreach(
+ $json
+ ->contents
+ ->twoColumnSearchResultsRenderer
+ ->primaryContents
+ ->sectionListRenderer
+ ->contents[0]
+ ->itemSectionRenderer
+ ->contents
+ as $video
+ ){
+
+ $this->parsevideoobject($video);
+ }
+
+ // get additional data from secondaryContents
+ if(
+ isset(
+ $json
+ ->contents
+ ->twoColumnSearchResultsRenderer
+ ->secondaryContents
+ ->secondarySearchContainerRenderer
+ ->contents[0]
+ ->universalWatchCardRenderer
+ )
+ ){
+
+ $video =
+ $json
+ ->contents
+ ->twoColumnSearchResultsRenderer
+ ->secondaryContents
+ ->secondarySearchContainerRenderer
+ ->contents[0]
+ ->universalWatchCardRenderer;
+ /*
+ echo json_encode($video);
+ die();*/
+
+ $author =
+ [
+ "name" =>
+ $video
+ ->header
+ ->watchCardRichHeaderRenderer
+ ->title
+ ->simpleText,
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->header
+ ->watchCardRichHeaderRenderer
+ ->titleNavigationEndpoint
+ ->browseEndpoint
+ ->browseId,
+ "avatar" => null
+ ];
+
+ if(
+ isset(
+ $video
+ ->header
+ ->watchCardRichHeaderRenderer
+ ->avatar
+ ->thumbnails[0]
+ ->url
+ )
+ ){
+
+ $author["avatar"] =
+ $video
+ ->header
+ ->watchCardRichHeaderRenderer
+ ->avatar
+ ->thumbnails[0]
+ ->url;
+ }
+
+ // add video in callToAction if present
+ if(
+ isset(
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->lengthText
+ )
+ ){
+
+ array_push(
+ $this->out["video"],
+ [
+ "title" =>
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->title
+ ->simpleText,
+ "description" => null,
+ "author" => $author,
+ "date" =>
+ $this->textualdate2unix(
+ trim(
+ explode(
+ "•",
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->subtitle
+ ->simpleText
+ )[2]
+ )
+ ),
+ "duration" =>
+ $this->hms2int(
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->lengthText
+ ->simpleText
+ ),
+ "views" =>
+ $this->truncatedcount2int(
+ trim(
+ explode(
+ "•",
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->subtitle
+ ->simpleText,
+ 2
+ )[1]
+ )
+ ),
+ "thumb" => [
+ "url" =>
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->heroImage
+ ->singleHeroImageRenderer
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->callToAction
+ ->watchCardHeroVideoRenderer
+ ->navigationEndpoint
+ ->watchEndpoint
+ ->videoId
+ ]
+ );
+ }
+
+ // get all playlists, ignore videos
+ $out = null;
+
+ foreach(
+ $video
+ ->sections
+ as $section
+ ){
+
+ if(
+ isset(
+ $section
+ ->watchCardSectionSequenceRenderer
+ ->lists[0]
+ ->horizontalCardListRenderer
+ ->cards
+ )
+ ){
+
+ $out =
+ $section
+ ->watchCardSectionSequenceRenderer
+ ->lists[0]
+ ->horizontalCardListRenderer
+ ->cards;
+ break;
+ }
+ }
+
+ if($out !== null){
+
+ foreach(
+ $out as $video
+ ){
+
+ if(
+ !isset(
+ $video
+ ->searchRefinementCardRenderer
+ )
+ ){
+
+ continue;
+ }
+
+ $video =
+ $video
+ ->searchRefinementCardRenderer;
+
+ array_push(
+ $this->out["playlist"],
+ [
+ "title" =>
+ $video
+ ->query
+ ->runs[0]
+ ->text,
+ "description" => null,
+ "author" => $author,
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" => [
+ "url" =>
+ $video
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "1:1"
+ ],
+ "url" =>
+ "https://www.youtube.com" .
+ $video
+ ->searchEndpoint
+ ->commandMetadata
+ ->webCommandMetadata
+ ->url
+ ]
+ );
+ }
+ }
+ }
+
+ foreach(
+ $json
+ ->contents
+ ->twoColumnSearchResultsRenderer
+ ->primaryContents
+ ->sectionListRenderer
+ ->contents
+ as $cont
+ ){
+
+ if(isset($cont->continuationItemRenderer)){
+
+ $this->out["npt"] = [
+ "key" =>
+ $ytconfig
+ ->INNERTUBE_API_KEY,
+ "post" => [
+ "context" =>
+ $ytconfig
+ ->INNERTUBE_CONTEXT,
+ "continuation" =>
+ $cont
+ ->continuationItemRenderer
+ ->continuationEndpoint
+ ->continuationCommand
+ ->token
+ ]
+ ];
+ break;
+ }
+ }
+ }
+
+ if($this->out["npt"] !== null){
+
+ $this->out["npt"] = $this->nextpage->store(json_encode($this->out["npt"]), "videos");
+ }
+
+ return $this->out;
+ }
+
+ private function parsevideoobject($video){
+
+ if(isset($video->videoRenderer)){
+
+ $video = $video->videoRenderer;
+
+ $description = null;
+
+ if(isset($video->detailedMetadataSnippets)){
+ foreach(
+ $video
+ ->detailedMetadataSnippets[0]
+ ->snippetText
+ ->runs
+ as $description_part
+ ){
+
+ $description .= $description_part->text;
+ }
+ }
+
+ if(
+ isset(
+ $video
+ ->badges[0]
+ ->metadataBadgeRenderer
+ ->icon
+ ->iconType
+ ) &&
+ $video
+ ->badges[0]
+ ->metadataBadgeRenderer
+ ->icon
+ ->iconType
+ == "LIVE"
+ ){
+
+ $type = "livestream";
+ $date = null;
+ $duration = "_LIVE";
+
+ if(isset($video->viewCountText->runs[0]->text)){
+
+ $views =
+ $this->views2int(
+ $video
+ ->viewCountText
+ ->runs[0]
+ ->text
+ );
+ }else{
+
+ $views = null;
+ }
+ }else{
+
+ $type = "video";
+
+ if(isset($video->publishedTimeText->simpleText)){
+
+ $date = $this->textualdate2unix(
+ $video
+ ->publishedTimeText
+ ->simpleText
+ );
+ }else{
+
+ $date = null;
+ }
+
+ if(isset($video->lengthText->simpleText)){
+
+ $duration =
+ $this->hms2int(
+ $video
+ ->lengthText
+ ->simpleText
+ );
+ }else{
+
+ $duration = null;
+ }
+
+ if(isset($video->viewCountText->simpleText)){
+
+ $views =
+ $this->views2int(
+ $video
+ ->viewCountText
+ ->simpleText
+ );
+ }else{
+
+ $views = null;
+ }
+ }
+
+ if(
+ $video
+ ->navigationEndpoint
+ ->commandMetadata
+ ->webCommandMetadata
+ ->webPageType
+ == "WEB_PAGE_TYPE_SHORTS"
+ ){
+
+ // haha you thought you could get me, youtube
+ // jokes on you i dont go outside
+ $type = "reel";
+ }
+
+ array_push(
+ $this->out[$type],
+ [
+ "title" =>
+ $video
+ ->title
+ ->runs[0]
+ ->text,
+ "description" =>
+ $this->titledots($description),
+ "author" => [
+ "name" =>
+ $video
+ ->longBylineText
+ ->runs[0]
+ ->text,
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->longBylineText
+ ->runs[0]
+ ->navigationEndpoint
+ ->browseEndpoint
+ ->browseId,
+ "avatar" =>
+ $this->checkhttpspresence(
+ $video
+ ->channelThumbnailSupportedRenderers
+ ->channelThumbnailWithLinkRenderer
+ ->thumbnail
+ ->thumbnails[0]
+ ->url
+ )
+ ],
+ "date" => $date,
+ "duration" => $duration,
+ "views" => $views,
+ "thumb" => [
+ "url" =>
+ $video
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->videoId
+ ]
+ );
+ }elseif(isset($video->watchCardCompactVideoRenderer)){
+
+ $video =
+ $video
+ ->watchCardCompactVideoRenderer;
+
+ array_push(
+ $this->out["video"],
+ [
+ "title" =>
+ $video
+ ->title
+ ->simpleText,
+ "description" => null,
+ "author" => [
+ "name" =>
+ $video
+ ->byline
+ ->runs[0]
+ ->text,
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->byline
+ ->runs[0]
+ ->navigationEndpoint
+ ->browseEndpoint
+ ->browseId,
+ "avatar" => null
+ ],
+ "date" =>
+ $this->textualdate2unix(
+ trim(
+ explode(
+ "•",
+ $video
+ ->subtitle
+ ->simpleText,
+ 2
+ )[1]
+ )
+ ),
+ "duration" =>
+ $this->hms2int(
+ $video
+ ->lengthText
+ ->simpleText
+ ),
+ "views" =>
+ $this->truncatedcount2int(
+ trim(
+ explode(
+ "•",
+ $video
+ ->subtitle
+ ->simpleText,
+ 2
+ )[0]
+ )
+ ),
+ "thumb" => [
+ "url" =>
+ $video
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->navigationEndpoint
+ ->watchEndpoint
+ ->videoId
+ ]
+ );
+
+ }elseif(isset($video->reelShelfRenderer)){
+
+ foreach(
+ $video
+ ->reelShelfRenderer
+ ->items
+ as $reel
+ ){
+
+ $reel =
+ $reel
+ ->reelItemRenderer;
+
+ array_push(
+ $this->out["reel"],
+ [
+ "title" =>
+ $reel
+ ->headline
+ ->simpleText,
+ "description" => null,
+ "author" => [
+ "name" => null,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => null,
+ "duration" =>
+ $this->textualtime2int(
+ $reel
+ ->accessibility
+ ->accessibilityData
+ ->label
+ ),
+ "views" =>
+ $this->truncatedcount2int(
+ $reel
+ ->viewCountText
+ ->simpleText
+ ),
+ "thumb" => [
+ "url" =>
+ $reel
+ ->thumbnail
+ ->thumbnails[0]
+ ->url,
+ "ratio" => "9:16"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $reel
+ ->videoId
+ ]
+ );
+ }
+ }
+
+ elseif(isset($video->channelRenderer)){
+
+ $video = $video->channelRenderer;
+
+ $description = null;
+
+ if(isset($video->descriptionSnippet)){
+
+ foreach(
+ $video
+ ->descriptionSnippet
+ ->runs
+ as $description_part
+ ){
+
+ $description .= $description_part->text;
+ }
+ }
+
+ array_push(
+ $this->out["author"],
+ [
+ "title" =>
+ $video
+ ->title
+ ->simpleText,
+ "followers" =>
+ isset(
+ $video
+ ->videoCountText
+ ->simpleText
+ ) ?
+ $this->truncatedcount2int(
+ $video
+ ->videoCountText
+ ->simpleText
+ ) :
+ 0,
+ "description" => $this->titledots($description),
+ "thumb" =>
+ [
+ "url" =>
+ $this->checkhttpspresence(
+ $video
+ ->thumbnail
+ ->thumbnails[
+ count(
+ $video
+ ->thumbnail
+ ->thumbnails
+ ) - 1
+ ]
+ ->url
+ ),
+ "ratio" => "1:1"
+ ],
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->channelId
+ ]
+ );
+ }
+
+ elseif(isset($video->shelfRenderer)){
+
+ if(
+ !is_object(
+ $video
+ ->shelfRenderer
+ ->content
+ ->verticalListRenderer
+ )
+ ){
+ return;
+ }
+
+ foreach(
+ $video
+ ->shelfRenderer
+ ->content
+ ->verticalListRenderer
+ ->items
+ as $shelfvideo
+ ){
+
+ $this->parsevideoobject($shelfvideo);
+ }
+
+ }elseif(isset($video->radioRenderer)){
+
+ $video = $video->radioRenderer;
+
+ $description =
+ $video
+ ->videoCountText
+ ->runs[0]
+ ->text
+ . ".";
+
+ $tmp = [];
+ foreach(
+ $video->videos
+ as $childvideo
+ ){
+
+ $tmp[] =
+ $childvideo
+ ->childVideoRenderer
+ ->title
+ ->simpleText;
+ }
+
+ if(count($tmp) !== 0){
+
+ $description .=
+ " " . implode(", ", $tmp);
+ }
+
+ array_push(
+ $this->out["playlist"],
+ [
+ "title" =>
+ $video
+ ->title
+ ->simpleText,
+ "description" => $description,
+ "author" => [
+ "name" =>
+ $video
+ ->longBylineText
+ ->simpleText,
+ "url" => null,
+ "avatar" => null
+ ],
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" => [
+ "url" =>
+ $video
+ ->thumbnail
+ ->thumbnails[
+ count(
+ $video
+ ->thumbnail
+ ->thumbnails
+ ) - 1
+ ]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->videos[0]
+ ->childVideoRenderer
+ ->videoId .
+ "&list=" .
+ $video
+ ->playlistId .
+ "&start_radio=1"
+ ]
+ );
+
+ }elseif(isset($video->playlistRenderer)){
+
+ $video = $video->playlistRenderer;
+
+ $description = $video->videoCount . " videos.";
+
+ $tmp = [];
+ foreach(
+ $video
+ ->videos
+ as $childvideo
+ ){
+
+ $tmp[] =
+ $childvideo
+ ->childVideoRenderer
+ ->title
+ ->simpleText;
+ }
+
+ if(count($tmp) !== 0){
+
+ $description .=
+ " " . implode(", ", $tmp);
+ }
+
+ array_push(
+ $this->out["playlist"],
+ [
+ "title" =>
+ $video
+ ->title
+ ->simpleText,
+ "description" => $description,
+ "author" => [
+ "name" =>
+ $video
+ ->longBylineText
+ ->runs[0]
+ ->text,
+ "url" =>
+ "https://www.youtube.com/channel/" .
+ $video
+ ->longBylineText
+ ->runs[0]
+ ->navigationEndpoint
+ ->browseEndpoint
+ ->browseId,
+ "picture" => null
+ ],
+ "date" => null,
+ "duration" => null,
+ "views" => null,
+ "thumb" =>
+ [
+ "url" =>
+ $video
+ ->thumbnails[0]
+ ->thumbnails[
+ count(
+ $video
+ ->thumbnails[0]
+ ->thumbnails
+ ) - 1
+ ]
+ ->url,
+ "ratio" => "16:9"
+ ],
+ "url" =>
+ "https://www.youtube.com/watch?v=" .
+ $video
+ ->videos[0]
+ ->childVideoRenderer
+ ->videoId .
+ "&list=" .
+ $video
+ ->playlistId .
+ "&start_radio=1"
+ ]
+ );
+
+ }/*else{
+ if(!isset($video->searchPyvRenderer)){
+ echo json_encode($video);
+ die();}
+ }*/
+ }
+
+ private function textualdate2unix($number){
+
+ $number =
+ explode(
+ " ",
+ str_replace(
+ [
+ " ago",
+ "seconds",
+ "minutes",
+ "hours",
+ "days",
+ "weeks",
+ "months",
+ "years"
+ ],
+ [
+ "",
+ "second",
+ "minute",
+ "hour",
+ "day",
+ "week",
+ "month",
+ "year"
+ ],
+ $number
+ ),
+ 2
+ );
+
+ $time = 0;
+ switch($number[1]){
+
+ case "second":
+ $time = (int)$number[0];
+ break;
+
+ case "minute":
+ $time = (int)$number[0] * 60;
+ break;
+
+ case "hour":
+ $time = (int)$number[0] * 3600;
+ break;
+
+ case "day":
+ $time = (int)$number[0] * 86400;
+ break;
+
+ case "week":
+ $time = (int)$number[0] * 604800;
+ break;
+
+ case "month":
+ $time = (int)$number[0] * 2629746;
+ break;
+
+ case "year":
+ $time = (int)$number[0] * 31556952;
+ break;
+ }
+
+ return time() - $time;
+ }
+
+ private function checkhttpspresence($link){
+
+ if(substr($link, 0, 2) == "//"){
+
+ return "https:" . $link;
+ }
+
+ return $link;
+ }
+
+ private function textualtime2int($number){
+
+ $number = explode(" - ", $number);
+
+ if(count($number) >= 2){
+
+ $number = $number[count($number) - 2];
+ }else{
+
+ $number = $number[0];
+ }
+
+ $number =
+ str_replace(
+ [
+ " ",
+ "seconds",
+ "minutes",
+ "hours",
+ ],
+ [
+ "",
+ "second",
+ "minute",
+ "hour"
+ ],
+ $number
+ );
+
+ preg_match_all(
+ '/([0-9]+)(second|minute|hour)/',
+ $number,
+ $number
+ );
+
+ $time = 0;
+
+ for($i=0; $i<count($number[0]); $i++){
+
+ switch($number[2][$i]){
+
+ case "second":
+ $time = $time + (int)$number[1][$i];
+ break;
+
+ case "minute":
+ $time = $time + ((int)$number[1][$i] * 60);
+ break;
+
+ case "hour":
+ $time = $time + ((int)$number[1][$i] * 3600);
+ break;
+ }
+ }
+
+ return $time;
+ }
+
+ private function views2int($views){
+
+ return
+ (int)str_replace(
+ ",", "",
+ explode(" ", $views, 2)[0]
+ );
+ }
+
+ private function hms2int($time){
+
+ $parts = explode(":", $time, 3);
+ $time = 0;
+
+ if(count($parts) === 3){
+
+ // hours
+ $time = $time + ((int)$parts[0] * 3600);
+ array_shift($parts);
+ }
+
+ if(count($parts) === 2){
+
+ // minutes
+ $time = $time + ((int)$parts[0] * 60);
+ array_shift($parts);
+ }
+
+ // seconds
+ $time = $time + (int)$parts[0];
+
+ return $time;
+ }
+
+ private function truncatedcount2int($number){
+
+ // decimal should always be 1 number long
+ $number = explode(" ", $number, 2);
+ $number = $number[0];
+
+ $unit = strtolower($number[strlen($number) - 1]);
+
+ $tmp = explode(".", $number, 2);
+ $number = (int)$number;
+
+ if(count($tmp) === 2){
+
+ $decimal = (int)$tmp[1];
+ }else{
+
+ $decimal = 0;
+ }
+
+ switch($unit){
+
+ case "k":
+ $exponant = 1000;
+ break;
+
+ case "m":
+ $exponant = 1000000;
+ break;
+
+ case "b";
+ $exponant = 1000000000;
+ break;
+
+ default:
+ $exponant = 1;
+ break;
+ }
+
+ return ($number * $exponant) + ($decimal * ($exponant / 10));
+ }
+
+ private function titledots($title){
+
+ $substr = substr($title, -3);
+
+ if(
+ $substr == "..." ||
+ $substr == "…"
+ ){
+
+ return trim(substr($title, 0, -3), " \n\r\t\v\x00\0\x0B\xc2\xa0");
+ }
+
+ return trim($title, " \n\r\t\v\x00\0\x0B\xc2\xa0");
+ }
+}