summaryrefslogtreecommitdiff
path: root/scraper/yep.php
diff options
context:
space:
mode:
Diffstat (limited to 'scraper/yep.php')
-rw-r--r--scraper/yep.php394
1 files changed, 361 insertions, 33 deletions
diff --git a/scraper/yep.php b/scraper/yep.php
index 7a73635..5be3806 100644
--- a/scraper/yep.php
+++ b/scraper/yep.php
@@ -261,7 +261,8 @@ class yep{
"Connection: keep-alive",
"Sec-Fetch-Dest: empty",
"Sec-Fetch-Mode: cors",
- "Sec-Fetch-Site: same-site"]
+ "Sec-Fetch-Site: same-site",
+ "TE: trailers"]
);
curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
@@ -283,6 +284,212 @@ class yep{
return $data;
}
+
+
+ public function web($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+
+ switch($nsfw){
+
+ case "yes": $nsfw = "off"; break;
+ case "maybe": $nsfw = "moderate"; break;
+ case "no": $nsfw = "strict"; break;
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ try{
+
+ // https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
+ $json =
+ json_decode(
+ $this->get(
+ $this->backend->get_ip(),
+ "https://api.yep.com/fs/2/search",
+ [
+ "client" => "web",
+ "gl" => $country == "all" ? $country : strtoupper($country),
+ "limit" => "99999",
+ "no_correct" => "false",
+ "q" => $search,
+ "safeSearch" => $nsfw,
+ "type" => "web"
+ ]
+ ),
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ //$json = json_decode(file_get_contents("scraper/yep.json"), true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(isset($json[1]["correction"])){
+
+ $out["spelling"] = [
+ "type" => "not_many",
+ "using" => $search,
+ "correction" => $json[1]["correction"][1]
+ ];
+ }
+
+ if(isset($json[1]["results"])){
+ foreach($json[1]["results"] as $item){
+
+ switch(strtolower($item["type"])){
+
+ case "organic":
+ $sublinks = [];
+
+ if(isset($item["sitelinks"]["full"])){
+
+ foreach($item["sitelinks"]["full"] as $link){
+
+ $sublinks[] = [
+ "title" => $link["title"],
+ "date" => null,
+ "description" =>
+ $this->titledots(
+ strip_tags(
+ html_entity_decode(
+ $link["snippet"]
+ )
+ )
+ ),
+ "url" => $link["url"]
+ ];
+ }
+ }
+
+ $out["web"][] = [
+ "title" => $item["title"],
+ "description" =>
+ $this->titledots(
+ strip_tags(
+ html_entity_decode(
+ $item["snippet"]
+ )
+ )
+ ),
+ "url" => $item["url"],
+ "date" => strtotime($item["first_seen"]),
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => $sublinks,
+ "table" => []
+ ];
+ break;
+ }
+ }
+ }
+
+ if(isset($json[1]["featured_news"])){
+
+ foreach($json[1]["featured_news"] as $news){
+
+ $out["news"][] = [
+ "title" => $news["title"],
+ "description" =>
+ $this->titledots(
+ strip_tags(
+ html_entity_decode(
+ $news["snippet"]
+ )
+ )
+ ),
+ "date" => strtotime($news["first_seen"]),
+ "thumb" =>
+ isset($news["img"]) ?
+ [
+ "url" => $this->unshiturl($news["img"]),
+ "ratio" => "16:9"
+ ] :
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" => $news["url"]
+ ];
+ }
+ }
+
+ if(isset($json[1]["featured_images"])){
+
+ foreach($json[1]["featured_images"] as $image){
+
+ if(
+ $image["width"] !== 0 &&
+ $image["height"] !== 0
+ ){
+
+ $thumb_width = $image["width"] >= 260 ? 260 : $image["width"];
+ $thumb_height = ceil($image["height"] * ($thumb_width / $image["width"]));
+
+ $width = $image["width"];
+ $height = $image["height"];
+ }else{
+
+ $thumb_width = null;
+ $thumb_height = null;
+ $width = null;
+ $height = null;
+ }
+
+ $out["image"][] = [
+ "title" => $image["title"],
+ "source" => [
+ [
+ "url" => $image["image_id"],
+ "width" => $width,
+ "height" => $height
+ ],
+ [
+ "url" => $image["src"],
+ "width" => $thumb_width,
+ "height" => $thumb_height
+ ]
+ ],
+ "url" => $image["host_page"]
+ ];
+ }
+ }
+
+ return $out;
+ }
+
+
+
public function image($get){
$search = $get["s"];
@@ -335,44 +542,165 @@ class yep{
throw new Exception("Failed to decode JSON");
}
- foreach($json[1]["results"] as $item){
-
- if(
- $item["width"] !== 0 &&
- $item["height"] !== 0
- ){
-
- $thumb_width = $item["width"] >= 260 ? 260 : $item["width"];
- $thumb_height = ceil($item["height"] * ($thumb_width / $item["width"]));
+ if(isset($json[1]["results"])){
+ foreach($json[1]["results"] as $item){
- $width = $item["width"];
- $height = $item["height"];
- }else{
+ if(
+ $item["width"] !== 0 &&
+ $item["height"] !== 0
+ ){
+
+ $thumb_width = $item["width"] >= 260 ? 260 : $item["width"];
+ $thumb_height = ceil($item["height"] * ($thumb_width / $item["width"]));
+
+ $width = $item["width"];
+ $height = $item["height"];
+ }else{
+
+ $thumb_width = null;
+ $thumb_height = null;
+ $width = null;
+ $height = null;
+ }
- $thumb_width = null;
- $thumb_height = null;
- $width = null;
- $height = null;
+ $out["image"][] = [
+ "title" => $item["title"],
+ "source" => [
+ [
+ "url" => $item["image_id"],
+ "width" => $width,
+ "height" => $height
+ ],
+ [
+ "url" => $item["src"],
+ "width" => $thumb_width,
+ "height" => $thumb_height
+ ]
+ ],
+ "url" => $item["host_page"]
+ ];
}
+ }
+
+ return $out;
+ }
+
+
+ public function news($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
- $out["image"][] = [
- "title" => $item["title"],
- "source" => [
- [
- "url" => $item["image_id"],
- "width" => $width,
- "height" => $height
- ],
- [
- "url" => $item["src"],
- "width" => $thumb_width,
- "height" => $thumb_height
- ]
- ],
- "url" => $item["host_page"]
- ];
+ throw new Exception("Search term is empty!");
+ }
+
+ $country = $get["country"];
+ $nsfw = $get["nsfw"];
+
+ switch($nsfw){
+
+ case "yes": $nsfw = "off"; break;
+ case "maybe": $nsfw = "moderate"; break;
+ case "no": $nsfw = "strict"; break;
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "news" => []
+ ];
+
+ try{
+
+ // https://api.yep.com/fs/2/search?client=web&gl=CA&no_correct=false&q=undefined+variable+javascript&safeSearch=off&type=web
+ $json =
+ json_decode(
+ $this->get(
+ $this->backend->get_ip(),
+ "https://api.yep.com/fs/2/search",
+ [
+ "client" => "web",
+ "gl" => $country == "all" ? $country : strtoupper($country),
+ "limit" => "99999",
+ "no_correct" => "false",
+ "q" => $search,
+ "safeSearch" => $nsfw,
+ "type" => "news"
+ ]
+ ),
+ true
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ //$json = json_decode(file_get_contents("scraper/yep.json"), true);
+
+ if($json === null){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ if(isset($json[1]["results"])){
+ foreach($json[1]["results"] as $item){
+
+ $out["news"][] = [
+ "title" => $item["title"],
+ "author" => null,
+ "description" =>
+ $this->titledots(
+ strip_tags(
+ html_entity_decode(
+ $item["snippet"]
+ )
+ )
+ ),
+ "date" => strtotime($item["first_seen"]),
+ "thumb" =>
+ isset($item["img"]) ?
+ [
+ "url" => $this->unshiturl($item["img"]),
+ "ratio" => "16:9"
+ ] :
+ [
+ "url" => null,
+ "ratio" => null
+ ],
+ "url" => $item["url"]
+ ];
+ }
}
return $out;
}
+
+
+ private function titledots($title){
+
+ $substr = substr($title, -4);
+
+ if(
+ strpos($substr, "...") !== false ||
+ strpos($substr, "…") !== false
+ ){
+
+ return trim(substr($title, 0, -4));
+ }
+
+ return trim($title);
+ }
+
+ private function unshiturl($url){
+
+ $newurl = parse_url($url, PHP_URL_QUERY);
+ parse_str($newurl, $newurl);
+
+ if(isset($newurl["url"])){
+
+ return $newurl["url"];
+ }
+
+ return $url;
+ }
}