summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorlolcat <will@lolcat.ca>2024-02-27 04:27:18 +0000
committerlolcat <will@lolcat.ca>2024-02-27 04:27:18 +0000
commit5143d95014ac50a42cfe3e3b6c0a2e8466adf953 (patch)
tree5cd419ad1d5e530fd763114209774c848b86fe2b
parentba5cd270071023dff45151adb498974a47c53b80 (diff)
parenta20d4de1e4d1a00c0d152136e5d7cd81481ace7b (diff)
Merge branch 'master' into feature/fix_parse_instances
-rw-r--r--README.md295
-rw-r--r--data/config.php4
-rw-r--r--data/proxies/onion.txt4
-rw-r--r--docs/apache2.md221
-rw-r--r--docs/caddy.md58
-rw-r--r--docs/configure.md35
-rw-r--r--docs/docker.md50
-rw-r--r--docs/nginx.md103
-rw-r--r--docs/tor.md16
-rw-r--r--lib/frontend.php6
-rw-r--r--scraper/mwmbl.php168
-rw-r--r--settings.php4
12 files changed, 676 insertions, 288 deletions
diff --git a/README.md b/README.md
index 5c5d056..0867319 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,15 @@
[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/W7W2OZK5H)
-# 4get
-4get is a metasearch engine that doesn't suck (they live in our walls!)
+# 4get search
+**4get** is a proxy search engine that doesn't suck.
-# About 4get
+## About 4get
https://4get.ca/about
-# Try it out
+## Try it out
https://4get.ca
-# Totally unbiased comparison between alternatives
+## Totally unbiased comparison between alternatives
| | 4get | searx(ng) | librex | araa |
|----------------------------|-------------------------|-----------|-------------|----------|
@@ -18,7 +18,7 @@ https://4get.ca
| Does it work | ye | no | no | ye |
| Did the dev commit suicide | not until my 30s | idk | yes | no |
-## Supported websites
+# Supported websites
1. Web
- DuckDuckGo
- Brave
@@ -27,6 +27,7 @@ https://4get.ca
- Mojeek
- Marginalia
- wiby
+ - Curlie
2. Images
- DuckDuckGo
@@ -64,284 +65,8 @@ https://4get.ca
- YouTube
- SoundCloud
-More scrapers are coming soon. I currently want to add HackerNews (durr orange site!!), Qwant, Yep and other garbage. A shopping, files, tab and more music scrapers are also on my todo list.
-
# Installation
-This section is still to-do. You will need to figure shit out for some of the apache2 and nginx stuff. Everything else should be OK.
-
-## Install on Apache
-
-Login as root.
-
-```sh
-apt install php-mbstring apache2 certbot php-imagick imagemagick php-curl curl php-apcu git libapache2-mod-php python3-certbot-apache
-service apache2 start
-a2enmod rewrite
-```
-
-For all of the files in `/etc/apache2/sites-enabled/`, you must apply the following changes:
-- Uncomment `ServerName` directive, put your domain name there
-- Change `ServerAdmin` to your email
-- Change `DocumentRoot` to `/var/www/html/4get`
-- Change `ErrorLog` and `CustomLog` directives to log stuff out to `/dev/null/`
-
-Now open `/etc/apache2/apache2.conf` and change `ErrorLog` and `CustomLog` directives to have `/dev/null/` as a value
-
-This *should* disable logging completely, but I'm not 100% sure since I sort of had to troubleshoot alot of shit while writing this. So after we're done check if `/var/log/apache2/*` contains any personal info, and if it does, call me retarded trough email exchange.
-
-Blindly run the following shit
-
-```sh
-cd /var/www/html
-git clone https://git.lolcat.ca/lolcat/4get
-cd 4get
-mkdir icons
-chmod 777 -R icons/
-```
-
-Restart the service for good measure... `service apache2 restart`
-
-## Install on NGINX
-
-Login as root.
-
-Create a file in `/etc/nginx/sites-avaliable/` called `4get.conf` or any name you want and put this into the file:
-
-```
-server {
- # DO YOU REALLY NEED TO LOG SEARCHES?
- access_log /dev/null;
- error_log /dev/null;
- # Change this if you have 4get in other folder.
- root /var/www/4get;
- # Change yourdomain by your domain lol
- server_name www.yourdomain.com yourdomain.com;
-
- location @php {
- try_files $uri.php $uri/index.php =404;
- # Change the unix socket address if it's different for you.
- fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock;
- fastcgi_index index.php;
- # Change this to `fastcgi_params` if you use a debian based distro.
- include fastcgi.conf;
- fastcgi_intercept_errors on;
- }
-
- location / {
- try_files $uri @php;
- }
-
- location ~* ^(.*)\.php$ {
- return 301 $1;
- }
-
- listen 80;
-}
-```
-
-That is a very basic config so you will need to adapt it to your needs in case you have a more complicated nginx configuration. Anyways, you can see a real world example [here](https://git.zzls.xyz/Fijxu/etc-configs/src/branch/selfhost/nginx/sites-available/4get.zzls.xyz.conf)
-
-After you save the file you will need to do a symlink of the `4get.conf` file to `/etc/nignx/sites-enabled/`, you can do it with this command:
-
-```sh
-ln -s /etc/nginx/sites-available/4get.conf /etc/nginx/sites-available/4get.conf
-```
-
-Now test the nginx config with `nginx -t`, if it says that everything is good, restart nginx using `systemctl restart nginx`
-
-## Install using Docker (lol u lazy fuck)
-
-```
-docker run -d -p 80:80 -e FOURGET_SERVER_NAME="4get.ca" luuul/4get:latest
-```
-
-...Or with SSL:
-```
-docker run -d -p 443:443 -v /etc/letsencrypt/live/domain.tld:/etc/4get/certs -e FOURGET_SERVER_NAME="4get.ca" luuul/4get:latest
-```
-
-if the certificate files are not mounted to /etc/4get/certs the service listens to port 80
-
-the certificate directory expects files named `fullchain.pem` and `privkey.pem`
-
-
-## Install using Docker Compose
-
-copy `docker-compose.yaml`
-
-to serve custom banners create a directory named `banners` for example with images and mount to `/var/www/html/4get/banner`
-
-to serve captcha images create a directory named `captchas` for example containing subfolders with images and mount to `/var/www/html/4get/data/captcha`
-
-any environment variables prefixed with `FOURGET_` will be added to the generated config
-
-the entrypoint will automatically set the `CAPTCHA_DATASET` value for you based on directory names and number of files in each
-
-to set `INSTANCES` pass a comma separated string of urls (FOURGET_INSTANCES = "https://4get.ca,https://domain.tld")
-
-
-```
-version: "3.7"
-
-services:
- fourget:
- image: luuul/4get:latest
- restart: always
- environment:
- - FOURGET_SERVER_NAME=4get.ca
-
- ports:
- - "80:80"
- - "443:443"
-
- volumes:
- - /etc/letsencrypt/live/domain.tld:/etc/4get/certs
- - ./banners:/var/www/html/4get/banner
- - ./captchas:/var/www/html/4get/data/captcha
-```
-
-Replace relevant values and start with `docker compose up -d`
-
-## Install on Caddy
-
-1. Install dependencies:
-
-`sudo apt install caddy php8.2-dom php8.2-imagick imagemagick php8.2-curl curl php8.2-apcu git`
-
-2. Clone this repository where you want to host this from:
-
-`cd /var/www && sudo git clone https://git.konakona.moe/diowo/4get`
-
-3. Set permission on the `icons` directory inside `4get`
-
-`cd /var/www/4get/ && sudo chmod 777 -R icons/`
-
-4. Add an entry for 4get on your Caddyfile at `/etc/caddy/Caddyfile`
-
-```sh
-4get.konakona.moe {
- root * /var/www/4get
- file_server
- encode gzip
- php_fastcgi unix//var/run/php/php8.2-fpm.sock {
- index index.php
- }
- redir /{path}.php{query} 301
- try_files {path} {path}.php
-}
-```
-
-Caddy deals with SSL certificates automatically so you don't have to mess with anything. Also if needed, a sample of my Caddyfile can be found [here](https://git.konakona.moe/diowo/misc/src/branch/master/etc/caddy/Caddyfile).
-
-5. Restart Caddy
-
-`sudo systemctl restart caddy`
-
-# Encryption setup
-I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour.
-
-## Encryption setup on Apache
-
-```sh
-certbot --apache --rsa-key-size 4096 -d www.yourdomain.com -d yourdomain.com
-```
-When it asks to choose a vhost, choose the option with "HTTPS" listed. Don't setup HTTPS for tor, we don't need it (it doesn't even work anyways with let's encrypt)
-
-Edit `000-default-le-ssl.conf`
-
-Add this at the end:
-```xml
-<Directory /var/www/html/4get>
- RewriteEngine On
- RewriteCond %{REQUEST_FILENAME}.php -f
- RewriteRule (.*) $1.php [L]
- Options Indexes FollowSymLinks
- AllowOverride All
- Require all granted
-</Directory>
-```
-
-Now since this file is located in `/etc/apache2/sites-enabled/`, you must change all of the logging shit as to make it not log anything, like we did earlier.
-
-Restart again
-```sh
-service apache2 restart
-```
-
-## Encryption setup on NGINX
-
-Generate a certificate for the domain using:
-
-```sh
-certbot --nginx --key-type ecdsa -d www.yourdomain.com -d yourdomain.com
-```
-(Remember to install the nginx certbot plugin!!!)
-
-After doing that certbot should deploy the certificate automatically into your 4get nginx config file. It should be ready to use at that point.
-
-# Jesse it is time to configure the server the fucking bots are back
-
-Wohoo the awful piece of shit setup and fiddling with 3 gazillion files is GONE. All you need to do to configure your shit is to go in `data/config.php` and edit the self-documenting configuration file. You can also specify proxies in `data/proxies/whatever.txt` and captcha images in `data/captcha/category/1.png`... I further explain how to deal with that garbage in the config file I mentionned.
-
-# (Optional) Tor setup
-
-1. Install `tor`.
-2. Open `/etc/tor/torrc`
-3. Go to the line that contains `HiddenServiceDir` and `HiddenServicePort`
-4. Uncomment those 2 lines and set them like this:
- ```
- HiddenServiceDir /var/lib/tor/4get
- HiddenServicePort 80 127.0.0.1:80
- ```
-5. Start the tor service using `systemctl start tor`
-6. Wait some seconds...
-7. Login as root and execute this command: `cat /var/lib/tor/4get/hostname`
-8. That is your onion address.
-
-After you get your onion address you will need to configure your Apache or Nginx config or you will get 404 errors.
-
-I don't know to configure this shit on Apache so here is the NGINX one.
-
-## Tor setup on NGINX
-
-Important Note: Tor onion addresses are significantly longer than traditional domain names. Before proceeding with Nginx configuration, ensure you increase the `server_names_hash_bucket_size` value in your `nginx.conf` file. This setting in your Nginx configuration controls the internal data structure used to manage multiple server names (hostnames) associated with your web server. Each hostname requires a certain amount of memory within this structure. If the size is insufficient, Nginx will encounter errors.
-
-1. Open your `nginx.conf` file (that is under `/etc/nginx/nginx.conf`).
-2. Find the line containing `# server_names_hash_bucket_size 64;`.
-3. Uncomment the line and adjust the value. Start with 64, but if you encounter issues, incrementally increase it (e.g., 128, 256) until it accommodates your configuration.
-
-Open your current 4get NGINX config (that is under `/etc/nginx/sites-available/`) and append this to the end of the file:
-
-```
-server {
- access_log /dev/null;
- error_log /dev/null;
-
- listen 80;
- server_name <youronionaddress>;
- root /var/www/4get;
-
- location @php {
- try_files $uri.php $uri/index.php =404;
- # Change the unix socket address if it's different for you.
- fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock;
- fastcgi_index index.php;
- # Change this to `fastcgi_params` if you use a debian based distro.
- include fastcgi.conf;
- fastcgi_intercept_errors on;
- }
-
- location / {
- try_files $uri @php;
- }
-
- location ~* ^(.*)\.php$ {
- return 301 $1;
- }
-}
-```
-
-Obviously replace `<youronionaddress>` by the onion address of `/var/lib/tor/4get/hostname` and then check if the nginx config is valid with `nginx -t` if yes, then restart the nginx service and try opening the onion address into the Tor Browser. You can see a real world example [here](https://git.zzls.xyz/Fijxu/etc-configs/src/branch/selfhost/nginx/sites-available/4get.zzls.xyz.conf)
+Refer to the <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/">documentation index</a>!
-# Contact
-shit breaks all the time but I repair it all the time too. Email me here: will<at>lolcat(dot)ca
+## Contact
+Shit breaks all the time but I repair it all the time too! Email me here: will (at) lolcat.ca
diff --git a/data/config.php b/data/config.php
index 6327ba9..fd9071e 100644
--- a/data/config.php
+++ b/data/config.php
@@ -77,7 +77,8 @@ class config{
"https://4get.psily.garden",
"https://search.milivojevic.in.rs",
"https://4get.snine.nl",
- "https://4get.datura.network"
+ "https://4get.datura.network",
+ "https://4get.neco.lol"
];
// Default user agent to use for scraper requests. Sometimes ignored to get specific webpages
@@ -103,6 +104,7 @@ class config{
const PROXY_PINTEREST = false;
const PROXY_SEZNAM = false;
const PROXY_NAVER = false;
+ const PROXY_MWMBL = false;
const PROXY_FTM = false; // findthatmeme
const PROXY_IMGUR = false;
const PROXY_YANDEX_W = false; // yandex web
diff --git a/data/proxies/onion.txt b/data/proxies/onion.txt
index c9b03f0..28ab436 100644
--- a/data/proxies/onion.txt
+++ b/data/proxies/onion.txt
@@ -1,12 +1,12 @@
# Specify proxies by following this format:
-# <type>:<address>:<port>:<username>:<password>
+# <protocol>:<address>:<port>:<username>:<password>
#
# Examples:
# https:1.3.3.7:6969:abcd:efg
# socks4:1.2.3.4:8080::
# raw_ip::::
#
-# Available types:
+# Available protocols:
# raw_ip, http, https, socks4, socks5, socks4a, socks5_hostname
# Local tor proxy
diff --git a/docs/apache2.md b/docs/apache2.md
new file mode 100644
index 0000000..28a066e
--- /dev/null
+++ b/docs/apache2.md
@@ -0,0 +1,221 @@
+# Install guide for Apache2 webserver
+Welcome to the new and revamped 4get install manual for apache2. Even if you already have services running on an existing installation of apache2, you should still be able to adapt this guide to your needs.
+
+For starters, login as `root`.
+
+Then, install the following dependencies:
+```sh
+apt update
+apt upgrade
+apt install php-mbstring apache2 certbot php-imagick imagemagick php-curl curl php-apcu git libapache2-mod-php
+```
+
+Enable the required modules:
+```sh
+a2enmod ssl
+a2enmod rewrite
+```
+
+And enable these optional ones, which might be useful to you later on. The `proxy` module is useful for setting up reverse proxies to services like gitea, and `headers` is useful to tweak global header values:
+```sh
+a2enmod proxy
+a2enmod headers
+```
+
+Now, restart apache2:
+```sh
+service apache2 restart
+```
+
+Just for good measure, please check if your webserver is running. Access it through HTTP, not HTTPS. You should see the apache2 default landing page.
+
+## 000-default.conf
+Now, edit the following file: `/etc/apache2/sites-available/000-default.conf`, remove everything and carefully add each rule specified here, while making sure to replace my domains with your own:
+
+1. The `VirtualHost` here instructs apache2 to redirect all **HTTP** traffic that specify an unknown `Host` header be redirected to a specific domain of your choice. Configuring this is not required but highly recommended.
+```xml
+<VirtualHost *:80>
+ # no domain = go to 4get.ca
+ RedirectMatch 301 ^(.*)$ https://4get.ca$1
+</VirtualHost>
+```
+
+2. This instruction tells apache2 to redirect all HTTP traffic on `Host` lolcat.ca to the HTTPS version of the site. You should add a rule like this for all of your services explicitly.
+```xml
+<VirtualHost *:80>
+ ServerName lolcat.ca
+ RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
+</VirtualHost>
+```
+
+3. Subdomains won't be matched by the above rule, so I recommend you also add them to be more explicit:
+```xml
+<VirtualHost *:80>
+ ServerName www.lolcat.ca
+ RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
+</VirtualHost>
+```
+
+... Etc, for every service you own.
+
+4. And finally, append this configuration if you wish to host a tor or i2p access point. This configuration should not be binded to SSL(443) as Let's Encrypt does not let you create certificates for onion sites:
+```xml
+<VirtualHost *:80>
+ # tor site
+ ServerName 4getwebfrq5zr4sxugk6htxvawqehxtdgjrbcn2oslllcol2vepa23yd.onion
+
+ # compress
+ AddOutputFilterByType DEFLATE application/json
+ AddOutputFilterByType DEFLATE application/javascript
+ AddOutputFilterByType DEFLATE application/x-javascript
+ AddOutputFilterByType DEFLATE text/html
+ AddOutputFilterByType DEFLATE text/plain
+ AddOutputFilterByType DEFLATE text/css
+
+ DocumentRoot /var/www/4get
+
+ Options +MultiViews
+ RewriteEngine On
+ RewriteCond %{REQUEST_FILENAME} !-d
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteRule ^([^\.]+)$ $1.php [NC,L]
+
+ # deny access to private resources
+ <Directory /var/www/4get/data/>
+ Order Deny,allow
+ Deny from all
+ </Directory>
+</VirtualHost>
+```
+To make the above snippet work, please refer to our <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/tor.md">tor site guide</a>.
+
+## default-ssl.conf
+Now, edit the file `/etc/apache2/sites-available/default-ssl.conf`, remove everything and, again, add each rule while modifying the relevant fields:
+
+This ruleset will redirect all clients that specify an unknown `Host` to the domain of our choice. I recommend you uncomment the `ErrorLog` directive while setting things up in case a problem occurs with PHP. Don't worry about the invalid SSL paths, we will generate our certificates later; Just make sure you specify the right domains in there:
+```xml
+<VirtualHost *:443>
+ RedirectMatch 301 ^(.*)$ https://4get.ca$1
+ ServerAdmin will@lolcat.ca
+
+ #ErrorLog ${APACHE_LOG_DIR}/error.log
+
+ SSLEngine on
+
+ <FilesMatch "\.(?:cgi|shtml|phtml|php)$">
+ SSLOptions +StdEnvVars
+ </FilesMatch>
+ <Directory /usr/lib/cgi-bin>
+ SSLOptions +StdEnvVars
+ </Directory>
+
+ AddOutputFilterByType DEFLATE application/json
+ AddOutputFilterByType DEFLATE application/javascript
+ AddOutputFilterByType DEFLATE application/x-javascript
+ AddOutputFilterByType DEFLATE text/html
+ AddOutputFilterByType DEFLATE text/plain
+ AddOutputFilterByType DEFLATE text/css
+
+ SSLCertificateFile /etc/letsencrypt/live/4get.ca/fullchain.pem
+ SSLCertificateKeyFile /etc/letsencrypt/live/4get.ca/privkey.pem
+</VirtualHost>
+```
+
+This ruleset tells apache2 where 4get is located (`/var/www/4get`), ensures that `4get.ca/settings` resolves to `4get.ca/settings.php` internally and that we deny access to `/data/*`, which may contain files you might want to keep private.
+```xml
+<VirtualHost *:443>
+ ServerName 4get.ca
+
+ DocumentRoot /var/www/4get
+
+ Options +MultiViews
+ RewriteEngine On
+ RewriteCond %{REQUEST_FILENAME} !-d
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteRule ^([^\.]+)$ $1.php [NC,L]
+
+ # deny access to private resources
+ <Directory /var/www/4get/data/>
+ Order Deny,allow
+ Deny from all
+ </Directory>
+</VirtualHost>
+```
+
+Don't forget to specify your other services here! Here's an example of a ruleset I use for `lolcat.ca`:
+```xml
+<VirtualHost *:443>
+ ServerName lolcat.ca
+
+ DocumentRoot /var/www/lolcat
+
+ Options +MultiViews
+ RewriteEngine On
+ RewriteCond %{REQUEST_FILENAME} !-d
+ RewriteCond %{REQUEST_FILENAME} !-f
+ RewriteRule ^([^\.]+)$ $1.php [NC,L]
+</VirtualHost>
+```
+
+... Alongside with it's redirect rules.
+```xml
+<VirtualHost *:443>
+ ServerName www.lolcat.ca
+ RedirectMatch 301 ^(.*)$ https://lolcat.ca$1
+</VirtualHost>
+```
+
+## security.conf
+If you enabled the `headers` module, you can head over to `/etc/apache2/conf-enabled/security.conf` and edit:
+```sh
+ServerTokens Prod # instead off Full
+```
+and
+```sh
+ServerSignature Off #instead of On
+```
+This will ensure that the `Server` header apache2 returns is minimal and doesn't leak information like your host system's OS or apache2 version.
+
+You can also uncomment `Header set X-Content-Type-Options: "nosniff"` and `Header set Content-Security-Policy "frame-ancestors 'self';"` respectively.
+
+## charset.conf
+Head over to `/etc/apache2/conf-enabled/charset.conf` and uncomment `AddDefaultCharset UTF-8`.
+
+## other-vhost-access-log.conf
+Since none of our configuration files contains any `CustomLog` directives, all we need to do to disable logging entirely is comment out the `CustomLog` directive located in `/etc/apache2/conf-enabled/other-vhost-access-log.conf`. Only error logs will remain if you configured them.
+
+# Setup SSL
+Great, now we've configured the webserver, but we still don't have our security certificate. Let's generate one!
+
+First, stop `apache2`.
+```sh
+service apache2 stop
+```
+
+Now, run `certbot`, and specify all of your domains by prepending `-d` every time. Make sure the first domain you specify is your main domain, and the same domain you specified in the configuration above! We use ECDSA encryption here as it's better than RSA.
+```sh
+certbot certonly --standalone --key-type ecdsa -d 4get.ca -d www.4get.ca -d lolcat.ca -d www.lolcat.ca
+```
+
+Certbot should ask you a few questions, just play along. At the end of the setup, certbot should tell you about the location of the certificates. Double check to make sure they correspond to the paths we specified in `default-ssl.conf`. Your certificates should now update every 2-3 months automatically.
+
+After this is complete, create a directory in `/var/www/4get`.
+
+Now, start `apache2`.
+```sh
+service apache2 start
+```
+
+Congratulations! You now have a... 404 error on your webserver, if everything went well. Now's the time to make sure all of our redirect rules work!
+
+# Import the fun junk
+Run these commands:
+```
+cd /var/www/4get
+git clone https://git.lolcat.ca/lolcat/4get
+chmod 777 -R icons/
+```
+
+... And try accessing your webserver. You should now have a working 4get instance!
+
+Please make sure to check out how to further <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/configure.md">configure 4get</a> to your liking!
diff --git a/docs/caddy.md b/docs/caddy.md
new file mode 100644
index 0000000..90f438c
--- /dev/null
+++ b/docs/caddy.md
@@ -0,0 +1,58 @@
+# Install guide for Caddy webserver
+
+1. Install dependencies:
+
+`sudo apt install caddy php8.2-dom php8.2-imagick imagemagick php8.2-curl curl php8.2-apcu git`
+
+2. Clone this repository where you want to host this from:
+
+`cd /var/www && sudo git clone https://git.konakona.moe/diowo/4get`
+
+3. Set permission on the `icons` directory inside `4get`
+
+`cd /var/www/4get/ && sudo chmod 777 -R icons/`
+
+4. Add an entry for 4get on your Caddyfile at `/etc/caddy/Caddyfile`
+
+```sh
+4get.konakona.moe {
+ root * /var/www/4get
+ file_server
+ encode gzip
+ php_fastcgi unix//var/run/php/php8.2-fpm.sock {
+ index index.php
+ }
+ redir /{path}.php{query} 301
+ try_files {path} {path}.php
+}
+```
+
+Caddy deals with SSL certificates automatically so you don't have to mess with anything. Also if needed, a sample of my Caddyfile can be found [here](https://git.konakona.moe/diowo/misc/src/branch/master/etc/caddy/Caddyfile).
+
+5. Restart Caddy
+
+`sudo systemctl restart caddy`
+
+# Encryption setup
+I'm schizoid (as you should) so I'm gonna setup 4096bit key encryption. To complete this step, you need a domain or subdomain in your possession. Make sure that the DNS shit for your domain has propagated properly before continuing, because certbot is a piece of shit that will error out the ass once you reach 5 attempts under an hour.
+
+## Encryption setup on Apache
+
+```sh
+certbot --apache --rsa-key-size 4096 -d www.yourdomain.com -d yourdomain.com
+```
+When it asks to choose a vhost, choose the option with "HTTPS" listed. Don't setup HTTPS for tor, we don't need it (it doesn't even work anyways with let's encrypt)
+
+Edit `000-default-le-ssl.conf`
+
+Add this at the end:
+```xml
+<Directory /var/www/html/4get>
+ RewriteEngine On
+ RewriteCond %{REQUEST_FILENAME}.php -f
+ RewriteRule (.*) $1.php [L]
+ Options Indexes FollowSymLinks
+ AllowOverride All
+ Require all granted
+</Directory>
+```
diff --git a/docs/configure.md b/docs/configure.md
new file mode 100644
index 0000000..fc8b0bb
--- /dev/null
+++ b/docs/configure.md
@@ -0,0 +1,35 @@
+# 4get configuation options
+
+Welcome! This guide assumes that you have a working 4get instance. This will help you configure your instance to the best it can be!
+
+## Files location
+1. The main configuration file is located at `data/config.php`
+2. The proxies are located in `data/proxies/*.txt`
+3. The captcha imagesets are located in `data/captcha/your_image_set/*.png`
+4. The captcha font is located in `data/fonts/captcha.ttf`
+
+## Server listing
+To be listed on https://4get.ca/instances , you must contact *any* of the people in the server list and ask them to add you to their list of instances in their configuration. The instance list is distributed, and I don't have control over it.
+
+If you see spammy entries in your instances list, simply remove the instance from your list that pushes the offending entries.
+
+## Proxies
+4get supports rotating proxies for scrapers! Configuring one is really easy.
+
+1. Head over to the **proxies** folder. Give it any name you want, like `myproxy`, but make sure it has the `txt` extension.
+2. Add your proxies to the file. Examples:
+ ```conf
+ # format -> <protocol>:<address>:<port>:<username>:<password>
+ # protocol list:
+ # raw_ip, http, https, socks4, socks5, socks4a, socks5_hostname
+ socks5:1.1.1.1:juicy:cloaca00
+ http:1.3.3.7::
+ raw_ip::::
+ ```
+3. Go to the **main configuration file**. Then, find which website you want to setup a proxy for.
+4. Modify the value `false` with `"myproxy"`, with quotes included and the semicolon at the end.
+
+Done! The scraper you chose should now be using the rotating proxies. When asking for the next page of results, it will use the same proxy to avoid detection!
+
+### Important!
+If you ever test out a `socks5` proxy locally on your machine and find out it works but doesn't on your server, try supplying the `socks5_hostname` protocol instead.
diff --git a/docs/docker.md b/docs/docker.md
new file mode 100644
index 0000000..2aabd9f
--- /dev/null
+++ b/docs/docker.md
@@ -0,0 +1,50 @@
+# Install guide for Docker
+
+```
+docker run -d -p 80:80 -e FOURGET_SERVER_NAME="4get.ca" luuul/4get:latest
+```
+
+...Or with SSL:
+```
+docker run -d -p 443:443 -v /etc/letsencrypt/live/domain.tld:/etc/4get/certs -e FOURGET_SERVER_NAME="4get.ca" luuul/4get:latest
+```
+
+if the certificate files are not mounted to /etc/4get/certs the service listens to port 80
+
+the certificate directory expects files named `fullchain.pem` and `privkey.pem`
+
+# Install using Docker Compose
+
+copy `docker-compose.yaml`
+
+to serve custom banners create a directory named `banners` for example with images and mount to `/var/www/html/4get/banner`
+
+to serve captcha images create a directory named `captchas` for example containing subfolders with images and mount to `/var/www/html/4get/data/captcha`
+
+any environment variables prefixed with `FOURGET_` will be added to the generated config
+
+the entrypoint will automatically set the `CAPTCHA_DATASET` value for you based on directory names and number of files in each
+
+to set `INSTANCES` pass a comma separated string of urls (FOURGET_INSTANCES = "https://4get.ca,https://domain.tld")
+
+```
+version: "3.7"
+
+services:
+ fourget:
+ image: luuul/4get:latest
+ restart: always
+ environment:
+ - FOURGET_SERVER_NAME=4get.ca
+
+ ports:
+ - "80:80"
+ - "443:443"
+
+ volumes:
+ - /etc/letsencrypt/live/domain.tld:/etc/4get/certs
+ - ./banners:/var/www/html/4get/banner
+ - ./captchas:/var/www/html/4get/data/captcha
+```
+
+Replace relevant values and start with `docker compose up -d`
diff --git a/docs/nginx.md b/docs/nginx.md
new file mode 100644
index 0000000..8693559
--- /dev/null
+++ b/docs/nginx.md
@@ -0,0 +1,103 @@
+# Install on NGINX
+
+>I do NOT recommend following this guide, only follow this if you *really* need to use nginx. I recommend you use the apache2 steps instead.
+
+Login as root.
+
+Create a file in `/etc/nginx/sites-avaliable/` called `4get.conf` or any name you want and put this into the file:
+
+```
+server {
+ # DO YOU REALLY NEED TO LOG SEARCHES?
+ access_log /dev/null;
+ error_log /dev/null;
+ # Change this if you have 4get in other folder.
+ root /var/www/4get;
+ # Change yourdomain by your domain lol
+ server_name www.yourdomain.com yourdomain.com;
+
+ location @php {
+ try_files $uri.php $uri/index.php =404;
+ # Change the unix socket address if it's different for you.
+ fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock;
+ fastcgi_index index.php;
+ # Change this to `fastcgi_params` if you use a debian based distro.
+ include fastcgi.conf;
+ fastcgi_intercept_errors on;
+ }
+
+ location / {
+ try_files $uri @php;
+ }
+
+ location ~* ^(.*)\.php$ {
+ return 301 $1;
+ }
+
+ listen 80;
+}
+```
+
+That is a very basic config so you will need to adapt it to your needs in case you have a more complicated nginx configuration. Anyways, you can see a real world example [here](https://git.zzls.xyz/Fijxu/etc-configs/src/branch/selfhost/nginx/sites-available/4get.zzls.xyz.conf)
+
+After you save the file you will need to do a symlink of the `4get.conf` file to `/etc/nignx/sites-enabled/`, you can do it with this command:
+
+```sh
+ln -s /etc/nginx/sites-available/4get.conf /etc/nginx/sites-available/4get.conf
+```
+
+Now test the nginx config with `nginx -t`, if it says that everything is good, restart nginx using `systemctl restart nginx`
+
+# Encryption setup
+
+Generate a certificate for the domain using:
+
+```sh
+certbot --nginx --key-type ecdsa -d www.yourdomain.com -d yourdomain.com
+```
+(Remember to install the nginx certbot plugin!!!)
+
+After doing that certbot should deploy the certificate automatically into your 4get nginx config file. It should be ready to use at that point.
+
+# Tor setup on NGINX
+
+Important Note: Tor onion addresses are significantly longer than traditional domain names. Before proceeding with Nginx configuration, ensure you increase the `server_names_hash_bucket_size` value in your `nginx.conf` file. This setting in your Nginx configuration controls the internal data structure used to manage multiple server names (hostnames) associated with your web server. Each hostname requires a certain amount of memory within this structure. If the size is insufficient, Nginx will encounter errors.
+
+1. Open your `nginx.conf` file (that is under `/etc/nginx/nginx.conf`).
+2. Find the line containing `# server_names_hash_bucket_size 64;`.
+3. Uncomment the line and adjust the value. Start with 64, but if you encounter issues, incrementally increase it (e.g., 128, 256) until it accommodates your configuration.
+
+Open your current 4get NGINX config (that is under `/etc/nginx/sites-available/`) and append this to the end of the file:
+
+```
+server {
+ access_log /dev/null;
+ error_log /dev/null;
+
+ listen 80;
+ server_name <youronionaddress>;
+ root /var/www/4get;
+
+ location @php {
+ try_files $uri.php $uri/index.php =404;
+ # Change the unix socket address if it's different for you.
+ fastcgi_pass unix:/var/run/php-fpm/php-fpm.sock;
+ fastcgi_index index.php;
+ # Change this to `fastcgi_params` if you use a debian based distro.
+ include fastcgi.conf;
+ fastcgi_intercept_errors on;
+ }
+
+ location / {
+ try_files $uri @php;
+ }
+
+ location ~* ^(.*)\.php$ {
+ return 301 $1;
+ }
+}
+```
+
+Obviously replace `<youronionaddress>` by the onion address of `/var/lib/tor/4get/hostname` and then check if the nginx config is valid with `nginx -t` if yes, then restart the nginx service and try opening the onion address into the Tor Browser. You can see a real world example [here](https://git.zzls.xyz/Fijxu/etc-configs/src/branch/selfhost/nginx/sites-available/4get.zzls.xyz.conf)
+
+Once you did the above, refer to <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/tor.md">this tor guide</a> to setup your onionsite.
diff --git a/docs/tor.md b/docs/tor.md
new file mode 100644
index 0000000..b29ac3d
--- /dev/null
+++ b/docs/tor.md
@@ -0,0 +1,16 @@
+# Tor setup
+This guide assumes that there is already a configured webserver sitting on port 80 waiting for localhost connections. The <a href="https://git.lolcat.ca/lolcat/4get/src/branch/master/docs/apache2.md">apache2 guide</a> guides you through this.
+
+1. Login as `root`.
+2. Install `tor`.
+3. Edit `/etc/tor/torrc`
+4. Go to the line that contains `HiddenServiceDir` and `HiddenServicePort`, uncomment those 2 lines and set them like this:
+ ```
+ HiddenServiceDir /var/lib/tor/4get
+ HiddenServicePort 80 127.0.0.1:80
+ ```
+5. Restart the tor service using `service tor restart`
+6. Wait for a while...
+7. Run `cat /var/lib/tor/4get/hostname`. That is your onion address!
+
+# Specify your own tor address
diff --git a/lib/frontend.php b/lib/frontend.php
index 738ad83..7e3b6fb 100644
--- a/lib/frontend.php
+++ b/lib/frontend.php
@@ -902,6 +902,7 @@ class frontend{
"yandex" => "Yandex",
"google" => "Google",
"yep" => "Yep",
+ "mwmbl" => "Mwmbl",
"mojeek" => "Mojeek",
"marginalia" => "Marginalia",
"wiby" => "wiby",
@@ -1018,6 +1019,11 @@ class frontend{
$lib = new facebook();
break;*/
+ case "mwmbl":
+ include "scraper/mwmbl.php";
+ $lib = new mwmbl();
+ break;
+
case "mojeek":
include "scraper/mojeek.php";
$lib = new mojeek();
diff --git a/scraper/mwmbl.php b/scraper/mwmbl.php
new file mode 100644
index 0000000..671ec78
--- /dev/null
+++ b/scraper/mwmbl.php
@@ -0,0 +1,168 @@
+<?php
+
+class mwmbl{
+
+ public function __construct(){
+
+ include "lib/backend.php";
+ $this->backend = new backend("mwmbl");
+
+ include "lib/fuckhtml.php";
+ $this->fuckhtml = new fuckhtml();
+ }
+
+ public function getfilters($page){
+
+ return [];
+ }
+
+ private function get($proxy, $url, $get = []){
+
+ $curlproc = curl_init();
+
+ if($get !== []){
+ $get = http_build_query($get);
+ $url .= "?" . $get;
+ }
+
+ curl_setopt($curlproc, CURLOPT_URL, $url);
+
+ curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
+ curl_setopt($curlproc, CURLOPT_HTTPHEADER,
+ ["User-Agent: " . config::USER_AGENT,
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept-Language: en-US,en;q=0.5",
+ "Accept-Encoding: gzip",
+ "DNT: 1",
+ "Connection: keep-alive",
+ "Upgrade-Insecure-Requests: 1",
+ "Sec-Fetch-Dest: document",
+ "Sec-Fetch-Mode: navigate",
+ "Sec-Fetch-Site: none",
+ "Sec-Fetch-User: ?1"]
+ );
+
+ curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
+ curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
+ curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
+ curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
+
+ $this->backend->assign_proxy($curlproc, $proxy);
+
+ $data = curl_exec($curlproc);
+
+ if(curl_errno($curlproc)){
+
+ throw new Exception(curl_error($curlproc));
+ }
+
+ curl_close($curlproc);
+ return $data;
+ }
+
+ public function web($get){
+
+ $search = $get["s"];
+ if(strlen($search) === 0){
+
+ throw new Exception("Search term is empty!");
+ }
+
+ try{
+ $html = $this->get(
+ $this->backend->get_ip(), // no next page!
+ "https://mwmbl.org/app/home/",
+ [
+ "q" => $search
+ ]
+ );
+ }catch(Exception $error){
+
+ throw new Exception("Failed to fetch HTML");
+ }
+
+ $out = [
+ "status" => "ok",
+ "spelling" => [
+ "type" => "no_correction",
+ "using" => null,
+ "correction" => null
+ ],
+ "npt" => null,
+ "answer" => [],
+ "web" => [],
+ "image" => [],
+ "video" => [],
+ "news" => [],
+ "related" => []
+ ];
+
+ $this->fuckhtml->load($html);
+
+ $results =
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "result",
+ "li"
+ );
+
+ foreach($results as $result){
+
+ $this->fuckhtml->load($result);
+
+ $p =
+ $this->fuckhtml
+ ->getElementsByTagName("p");
+
+ $out["web"][] = [
+ "title" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "title",
+ $p
+ )[0]
+ )
+ ),
+ "description" =>
+ $this->titledots(
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByClassName(
+ "extract",
+ $p
+ )[0]
+ )
+ ),
+ "url" =>
+ $this->fuckhtml
+ ->getTextContent(
+ $this->fuckhtml
+ ->getElementsByTagName("a")
+ [0]
+ ["attributes"]
+ ["href"]
+ ),
+ "date" => null,
+ "type" => "web",
+ "thumb" => [
+ "url" => null,
+ "ratio" => null
+ ],
+ "sublink" => [],
+ "table" => []
+ ];
+ }
+
+ return $out;
+ }
+
+ private function titledots($title){
+
+ return rtrim($title, "…");
+ }
+}
diff --git a/settings.php b/settings.php
index 5572b19..49ba166 100644
--- a/settings.php
+++ b/settings.php
@@ -126,6 +126,10 @@ $settings = [
"text" => "Yep"
],
[
+ "value" => "mwmbl",
+ "text" => "Mwmbl"
+ ],
+ [
"value" => "mojeek",
"text" => "Mojeek"
],