diff options
author | lolcat <will@lolcat.ca> | 2023-10-16 02:30:43 -0400 |
---|---|---|
committer | lolcat <will@lolcat.ca> | 2023-10-16 02:30:43 -0400 |
commit | 3aa01807741ffad103b9f6a74d878e58f2e828e8 (patch) | |
tree | a06816e71b059d96b5e08f9d304cbc3818e85b48 /scraper/imgur.php | |
parent | fa9dc4d6efc593b7301229c18b058d90024c939e (diff) |
captcha and imgur, findthatmeme, yep imagesearch
Diffstat (limited to 'scraper/imgur.php')
-rw-r--r-- | scraper/imgur.php | 249 |
1 files changed, 249 insertions, 0 deletions
diff --git a/scraper/imgur.php b/scraper/imgur.php new file mode 100644 index 0000000..4a16de7 --- /dev/null +++ b/scraper/imgur.php @@ -0,0 +1,249 @@ +<?php + +class imgur{ + + public function __construct(){ + + include "lib/nextpage.php"; + $this->nextpage = new nextpage("imgur"); + + include "lib/fuckhtml.php"; + $this->fuckhtml = new fuckhtml(); + } + + public function getfilters($page){ + + return [ + "sort" => [ // /score/ + "display" => "Sort by", + "option" => [ + "score" => "Highest scoring", + "relevance" => "Most relevant", + "time" => "Newest first" + ] + ], + "time" => [ // /score/day/ + "display" => "Time posted", + "option" => [ + "all" => "All time", + "day" => "Today", + "week" => "This week", + "month" => "This month", + "year" => "This year" + ] + ], + "format" => [ // q_type + "display" => "Format", + "option" => [ + "any" => "Any format", + "jpg" => "JPG", + "png" => "PNG", + "gif" => "GIF", + "anigif" => "Animated GIF", + "album" => "Albums" + ] + ], + "size" => [ // q_size_px + "display" => "Size", + "option" => [ + "any" => "Any size", + "small" => "Small (500px or less)", + "med" => "Medium (500px to 2000px)", + "big" => "Big (2000px to 5000px)", + "lrg" => "Large (5000px to 10000px)", + "huge" => "Huge (10000px and above)" + ] + ] + ]; + } + + private function get($url, $get = []){ + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?scrolled&" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0", + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Referer: https://imgur.com/search/", + "Connection: keep-alive", + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-origin", + "TE: trailers", + "X-Requested-With: XMLHttpRequest"] + ); + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function image($get){ + + if($get["npt"]){ + + $filter = + json_decode( + $this->nextpage->get( + $get["npt"], + "images" + ), + true + ); + + $search = $filter["s"]; + unset($filter["s"]); + + $sort = $filter["sort"]; + unset($filter["sort"]); + + $time = $filter["time"]; + unset($filter["time"]); + + $format = $filter["format"]; + unset($filter["format"]); + + $size = $filter["size"]; + unset($filter["size"]); + + $page = $filter["page"]; + unset($filter["page"]); + }else{ + + $search = $get["s"]; + $sort = $get["sort"]; + $time = $get["time"]; + $format = $get["format"]; + $size = $get["size"]; + $page = 0; + + $filter = [ + "q" => $search + ]; + + if($format != "any"){ + + $filter["q_type"] = $format; + } + + if($size != "any"){ + + $filter["q_size_px"] = $size; + $filter["q_size_is_mpx"] = "off"; + } + } + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + try{ + $html = + $this->get( + "https://imgur.com/search/$sort/$time/page/$page", + $filter + ); + + }catch(Exception $error){ + + throw new Exception("Failed to fetch HTML"); + } + + $this->fuckhtml->load($html); + + $posts = + $this->fuckhtml + ->getElementsByClassName( + "post", + "div" + ); + + foreach($posts as $post){ + + $this->fuckhtml->load($post); + + $image = + $this->fuckhtml + ->getElementsByTagName("img")[0]; + + $image_url = "https:" . substr($this->fuckhtml->getTextContent($image["attributes"]["src"]), 0, -5); + + $out["image"][] = [ + "title" => + $this->fuckhtml + ->getTextContent( + $image["attributes"]["alt"] + ), + "source" => [ + [ + "url" => $image_url . ".jpg", + "width" => null, + "height" => null + ], + [ + "url" => $image_url . "m.jpg", + "width" => null, + "height" => null + ] + ], + "url" => + "https://imgur.com" . + $this->fuckhtml + ->getTextContent( + $this->fuckhtml + ->getElementsByClassName( + "image-list-link", + "a" + ) + [0] + ["attributes"] + ["href"] + ) + ]; + } + + if(isset($out["image"][0])){ + + // store nextpage + $filter["s"] = $search; + $filter["sort"] = $sort; + $filter["time"] = $time; + $filter["format"] = $format; + $filter["size"] = $size; + $filter["page"] = $page + 1; + + $out["npt"] = + $this->nextpage->store( + json_encode($filter), + "images" + ); + } + + return $out; + } +} |