summaryrefslogtreecommitdiff
path: root/scraper
diff options
context:
space:
mode:
Diffstat (limited to 'scraper')
-rw-r--r--scraper/crowdview.php145
1 files changed, 145 insertions, 0 deletions
diff --git a/scraper/crowdview.php b/scraper/crowdview.php
new file mode 100644
index 0000000..8fb267b
--- /dev/null
+++ b/scraper/crowdview.php
@@ -0,0 +1,145 @@
+<?php
+
+class crowdview{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("crowdview");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ $proxy = $this->backend->get_ip();
+
+ try{
+ $json = $this->get(
+ $proxy,
+ "https://crowdview-next-js.onrender.com/api/search-v3",
+ [
+ "query" => $search
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch JSON");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $json = json_decode($json, true);
+
+ if($json === NULL){
+
+ throw new Exception("Failed to decode JSON");
+ }
+
+ foreach($json["results"] as $item){
+
+ $description = explode("<b>", $item["snippet"], 2);
+
+ $out["web"][] = [
+ "title" => $this->sanitize($item["title"]),
+ "description" => $this->sanitize($description[1]),
+ "url" => $item["link"],
+ "date" => strtotime($description[0]),
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ private function sanitize($html){
+
+ return
+ trim(
+ $this->fuckhtml
+ ->getTextContent(
+ html_entity_decode(
+ $html
+ )
+ ),
+ ". "
+ );
+ }
+}