summaryrefslogtreecommitdiff
path: root/scraper/imgur.php
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2023-10-16 02:30:43 -0400
committerlolcat <will@lolcat.ca>2023-10-16 02:30:43 -0400
commit3aa01807741ffad103b9f6a74d878e58f2e828e8 (patch)
treea06816e71b059d96b5e08f9d304cbc3818e85b48 /scraper/imgur.php
parentfa9dc4d6efc593b7301229c18b058d90024c939e (diff)
captcha and imgur, findthatmeme, yep imagesearch
Diffstat (limited to 'scraper/imgur.php')
-rw-r--r--scraper/imgur.php249
1 files changed, 249 insertions, 0 deletions
diff --git a/scraper/imgur.php b/scraper/imgur.php
new file mode 100644
index 0000000..4a16de7
--- /dev/null
+++ b/scraper/imgur.php
@@ -0,0 +1,249 @@
+<?php
+
+class imgur{
+
+ public function __construct(){
+
+ include "lib/nextpage.php";
+ $this->nextpage = new nextpage("imgur");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [
+ "sort" => [ // /score/
+ "display" => "Sort by",
+ "option" => [
+ "score" => "Highest scoring",
+ "relevance" => "Most relevant",
+ "time" => "Newest first"
+ ]
+ ],
+ "time" => [ // /score/day/
+ "display" => "Time posted",
+ "option" => [
+ "all" => "All time",
+ "day" => "Today",
+ "week" => "This week",
+ "month" => "This month",
+ "year" => "This year"
+ ]
+ ],
+ "format" => [ // q_type
+ "display" => "Format",
+ "option" => [
+ "any" => "Any format",
+ "jpg" => "JPG",
+ "png" => "PNG",
+ "gif" => "GIF",
+ "anigif" => "Animated GIF",
+ "album" => "Albums"
+ ]
+ ],
+ "size" => [ // q_size_px
+ "display" => "Size",
+ "option" => [
+ "any" => "Any size",
+ "small" => "Small (500px or less)",
+ "med" => "Medium (500px to 2000px)",
+ "big" => "Big (2000px to 5000px)",
+ "lrg" => "Large (5000px to 10000px)",
+ "huge" => "Huge (10000px and above)"
+ ]
+ ]
+ ];
+ }
+
+ private function get($url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?scrolled&" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/110.0",
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Referer: https://imgur.com/search/",
+ "Connection: keep-alive",
+ "Sec-Fetch-Dest: empty",
+ "Sec-Fetch-Mode: cors",
+ "Sec-Fetch-Site: same-origin",
+ "TE: trailers",
+ "X-Requested-With: XMLHttpRequest"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function image($get){
+
+ if($get["npt"]){
+
+ $filter =
+ json_decode(
+ $this->nextpage->get(
+ $get["npt"],
+ "images"
+ ),
+ true
+ );
+
+ $search = $filter["s"];
+ unset($filter["s"]);
+
+ $sort = $filter["sort"];
+ unset($filter["sort"]);
+
+ $time = $filter["time"];
+ unset($filter["time"]);
+
+ $format = $filter["format"];
+ unset($filter["format"]);
+
+ $size = $filter["size"];
+ unset($filter["size"]);
+
+ $page = $filter["page"];
+ unset($filter["page"]);
+ }else{
+
+ $search = $get["s"];
+ $sort = $get["sort"];
+ $time = $get["time"];
+ $format = $get["format"];
+ $size = $get["size"];
+ $page = 0;
+
+ $filter = [
+ "q" => $search
+ ];
+
+ if($format != "any"){
+
+ $filter["q_type"] = $format;
+ }
+
+ if($size != "any"){
+
+ $filter["q_size_px"] = $size;
+ $filter["q_size_is_mpx"] = "off";
+ }
+ }
+
+ $out = [
+ "status" => "ok",
+ "npt" => null,
+ "image" => []
+ ];
+
+ try{
+ $html =
+ $this->get(
+ "https://imgur.com/search/$sort/$time/page/$page",
+ $filter
+ );
+
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch HTML");
+ }
+
+ $this->fuckhtml->load($html);
+
+ $posts =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "post",
+ "div"
+ );
+
+ foreach($posts as $post){
+
+ $this->fuckhtml->load($post);
+
+ $image =
+ $this->fuckhtml
+ ->getElementsByTagName("img")[0];
+
+ $image_url = "https:" . substr($this->fuckhtml->getTextContent($image["attributes"]["src"]), 0, -5);
+
+ $out["image"][] = [
+ "title" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $image["attributes"]["alt"]
+ ),
+ "source" => [
+ [
+ "url" => $image_url . ".jpg",
+ "width" => null,
+ "height" => null
+ ],
+ [
+ "url" => $image_url . "m.jpg",
+ "width" => null,
+ "height" => null
+ ]
+ ],
+ "url" =>
+ "https://imgur.com" .
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "image-list-link",
+ "a"
+ )
+ [0]
+ ["attributes"]
+ ["href"]
+ )
+ ];
+ }
+
+ if(isset($out["image"][0])){
+
+ // store nextpage
+ $filter["s"] = $search;
+ $filter["sort"] = $sort;
+ $filter["time"] = $time;
+ $filter["format"] = $format;
+ $filter["size"] = $size;
+ $filter["page"] = $page + 1;
+
+ $out["npt"] =
+ $this->nextpage->store(
+ json_encode($filter),
+ "images"
+ );
+ }
+
+ return $out;
+ }
+}