From 015f73f0271ca509c7b7eef6cb437158fbb29a21 Mon Sep 17 00:00:00 2001 From: Arnan de Gans Date: Mon, 26 Aug 2024 20:00:37 -0600 Subject: [PATCH] Version 1.7.1 --- assets/css/styles.css | 18 ++-- box-office.php | 21 ++-- changelog.md | 27 ++++- config.default.php | 22 ++-- engines/boxoffice/eztv.php | 24 +++-- engines/boxoffice/yts.php | 22 ++-- engines/image/openverse.php | 18 ++-- engines/image/pixabay.php | 14 +-- engines/image/qwant-images.php | 11 +- engines/image/yahoo-images.php | 17 +--- engines/magnet/glotorrents.php | 166 ++++++++++++++++++++++++++++++ engines/magnet/lime.php | 5 +- engines/search-image.php | 26 +++-- engines/search-magnet.php | 25 +++-- engines/search-news.php | 11 +- engines/search.php | 30 +++--- engines/search/duckduckgo.php | 7 +- engines/search/google.php | 6 ++ engines/search/mojeek.php | 22 ++-- engines/special/currency.php | 2 +- engines/special/definition.php | 8 +- engines/special/ipify.php | 9 +- error.php | 54 +--------- footer.php | 8 ++ functions/oauth-openverse.php | 11 +- functions/search_engine.php | 31 ++++-- functions/timeout-status.php | 77 ++++++++++++++ functions/tools-magnet.php | 94 ++++++++--------- functions/tools.php | 179 +++++++++++++++++++++++---------- help.php | 21 ++-- index.php | 27 ++--- readme.md | 38 +++---- results.php | 109 ++++++++++---------- stats.php | 21 ++-- 34 files changed, 743 insertions(+), 438 deletions(-) create mode 100644 engines/magnet/glotorrents.php create mode 100644 footer.php create mode 100644 functions/timeout-status.php diff --git a/assets/css/styles.css b/assets/css/styles.css index 5f977c9..6e0adaa 100644 --- a/assets/css/styles.css +++ b/assets/css/styles.css @@ -130,13 +130,15 @@ input[type="search"]::-webkit-search-cancel-button { -webkit-appearance:none; -w .result-grid .result, .result-grid .meta { margin:0; padding:0; } /* Magnet highlight/Share/Boxoffice popup */ -.goosebox { display:none; position:fixed; inset:0; z-index:10000; overflow:auto; background-color:rgb(0, 0, 0, .75); } +.goosebox { display:none; position:fixed; inset:0; z-index:2; overflow:auto; background-color:rgb(0, 0, 0, .75); } .goosebox.open { display:block; } .goosebox-body { margin:50px auto; padding:20px; width:50%; background:var(--background-popup); border:1px solid var(--border); border-radius:10px; } .goosebox-body h2 { padding:0 0 .3em 0; } .goosebox-body h3 { font-size:1.2rem; } .goosebox-body a { cursor:pointer; } .goosebox-body a:visited { color:var(--link); } +.goosebox-body img { margin:0; padding:0 5px 5px 0; width:8em; border:0; border-radius:10px; float:left; } +.goosebox-body hr { clear:both; content:" "; border:0; } .goosebox-body button { margin:5px auto; padding:5px 10px; width:100%; height:35px; border:1px solid var(--border-alt); border-radius:10px; color:var(--button-text); background-color:var(--button-bg); text-align:center; font-size:1rem; } .goosebox-body button:hover { text-decoration:none; background-color:var(--button-hover); } .goosebox-body .share-field, .goosebox-body .share-button { box-sizing:border-box; position:relative; padding:5px 10px; width:100%; height:40px; font-size:1rem; } @@ -147,15 +149,14 @@ input[type="search"]::-webkit-search-cancel-button { -webkit-appearance:none; -w /* Stats display (stats page) */ .statspage .content h1 { margin-bottom:10px; padding:0; text-align:center; font-size:2.5em; font-weight:400; } .statspage .content h2 { margin-bottom:10px; padding:0; text-align:center; font-size:1.5em; } -.statspage p { font-family:'Courier New'; } +.statspage p { font-family:'American Typewriter', 'Courier New', serif; } /* oAUTH page */ -.oauthpage { background-color:var(--background-alt); color:var(--text-alt); } -.oauthpage .oauth-form { text-align:center; margin-top:20px; } -.oauthpage .oauth-form p, .oauth-form small { margin-bottom:15px; color:var(--text-alt); } -.oauthpage .oauth-form .field { padding:5px 10px; width:300px; color:#f0f6fc; background-color:var(--background-alt); font-size:1.2rem; border:1px solid var(--startpage-border); border-radius:10px; } -.oauthpage .oauth-buttons button { margin:30px 20px 10px 20px; padding:13px 10px; min-width:130px; color:var(--text-alt); background-color:var(--startpage-button-bg); border:1px solid var(--startpage-border); font-size:1.2rem; border-radius:6px; } -.oauthpage .oauth-buttons button:hover { border:1px solid var(--startpage-border-alt); background-color:var(--startpage-button-bg-alt); text-decoration:none; } +.oauth-form { margin-top:20px; } +.oauth-form p, .oauth-form small { margin-bottom:15px; color:var(--text-alt); } +.oauth-form .field { padding:5px 10px; width:300px; color:#f0f6fc; background-color:var(--background-alt); font-size:1.2rem; border:1px solid var(--startpage-border); border-radius:10px; } +.oauth-buttons button { margin:30px 0 10px 0; padding:13px 10px; min-width:130px; color:var(--text-alt); background-color:var(--startpage-button-bg); border:1px solid var(--startpage-border); font-size:1.2rem; border-radius:6px; } +.oauth-buttons button:hover { border:1px solid var(--startpage-border-alt); background-color:var(--startpage-button-bg-alt); text-decoration:none; } /* Tooltips */ .tooltip-question::before { content:""; display:inline-block; width:1.1em; height:1.1em; background:var(--link); vertical-align:text-bottom; mask-image:url('data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiA/Pjxzdmcgdmlld0JveD0iMCAwIDI0IDI0IiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciPjxwYXRoIGQ9Ik0xMS4yOSwxNS4yOWExLjU4LDEuNTgsMCwwLDAtLjEyLjE1Ljc2Ljc2LDAsMCwwLS4wOS4xOC42NC42NCwwLDAsMC0uMDYuMTgsMS4zNiwxLjM2LDAsMCwwLDAsLjIuODQuODQsMCwwLDAsLjA4LjM4LjkuOSwwLDAsMCwuNTQuNTQuOTQuOTQsMCwwLDAsLjc2LDAsLjkuOSwwLDAsMCwuNTQtLjU0QTEsMSwwLDAsMCwxMywxNmExLDEsMCwwLDAtLjI5LS43MUExLDEsMCwwLDAsMTEuMjksMTUuMjlaTTEyLDJBMTAsMTAsMCwxLDAsMjIsMTIsMTAsMTAsMCwwLDAsMTIsMlptMCwxOGE4LDgsMCwxLDEsOC04QTgsOCwwLDAsMSwxMiwyMFpNMTIsN0EzLDMsMCwwLDAsOS40LDguNWExLDEsMCwxLDAsMS43MywxQTEsMSwwLDAsMSwxMiw5YTEsMSwwLDAsMSwwLDIsMSwxLDAsMCwwLTEsMXYxYTEsMSwwLDAsMCwyLDB2LS4xOEEzLDMsMCwwLDAsMTIsN1oiIGZpbGw9IiM2NTYzZmYiLz48L3N2Zz4='); } @@ -191,6 +192,7 @@ input[type="search"]::-webkit-search-cancel-button { -webkit-appearance:none; -w /* Misc */ .goosle-g { color:var(--color-accent); } +.plainpage { background-color:var(--background-alt); color:var(--text-alt); } .green { color:var(--green); } .red { color:var(--red); } .yellow { color:var(--yellow); } diff --git a/box-office.php b/box-office.php index 464c74c..8eb1ef3 100644 --- a/box-office.php +++ b/box-office.php @@ -55,7 +55,7 @@

Goosle

- " name="q" /> + " name="q" /> @@ -140,19 +140,12 @@

Goosle does not index, offer or distribute torrent files.

- - - -
Redirecting
- - + diff --git a/changelog.md b/changelog.md index f7a37c2..c906e19 100644 --- a/changelog.md +++ b/changelog.md @@ -1,6 +1,30 @@ # Goosle ## The best Meta Search Engine to find everything +### 1.7.1 - August 26, 2024 +- NOTICE: config.default.php has changed, update your config.php!! +- [new] Engine timeouts for specific response codes when a search engine returns some kind of error or quota limitation +- [new] Engine timeouts status page at /functions/timeout-status.php +- [new] GloTorrents (Glodls) Magnet results +- [new] Use multiple search overrides (eg: 'size:large safe:off goose gone wild') +- [fix] Search suggestions re-added and they now work (Scraped from Mojeek, Google, DuckduckGo and Yahoo Images) +- [fix] Limetorrents search query is now properly formatted +- [fix] Search overrides (Safe search, image size, etc.) trigger no longer included in search query +- [fix] Search query no longer sent out double encoded +- [fix] Strings (Search queries, seo texts, etc.) are now properly limited to max length where required +- [fix] Multiple currency conversions now also works when caching is enabled +- [fix] Multiple ip lookups (from different user IPs) now also works when caching is enabled +- [update] Oauth page no longer center aligned +- [update] Updated Language explanation in config.default.php +- [change] Moved footer code to footer.php +- [change] Redid error.php to only include the error html +- [change] Openverse adds image category tags to image alt text +- [change] Pixabay uses image tags as image alt text +- [change] Image alt text now has a 1.5x multiplier for result ranking (was 1x) +- [change] Added imdb.com link to Box office popup +- [change] Added movie poster to Box office popup +- [change] Simplified verified uploader popup text + ### 1.7 - August 7, 2024 - NOTICE: config.default.php has changed, update your config.php!! - [new] Mojeek search results @@ -18,9 +42,10 @@ - [change] Replaced 'porn' with 'nsfw' for safe search switch - [change] Removed 'xxx' as an keyword to disable safe search - [change] Don't search on nyaa.si and YTS if you search with safemode off +- [change] Only search on sukebei.nyaa.si if you search with safemode off - [change] Moved image size override into search object - [change] Added a little space between rows for image results on mobile -- [change] Stats font is now 'Courier' +- [change] Stats font is now 'Courier New' - [fix] Google search query not providing good results - [fix] Search query not always properly urlencoded - [removed] Removed search suggestions as they didn't work diff --git a/config.default.php b/config.default.php index 80effed..756271b 100644 --- a/config.default.php +++ b/config.default.php @@ -63,18 +63,20 @@ /* ------------------------------------------------------------------------------------ LANGUAGE: To not fit the USA mold, Goosle defaults to the United Kingdom for english results. - DuckDuckGo and Google are language agnostic. + DuckDuckGo and Google are mostly language agnostic. Invalid values either cause the search engine to fail or will default to English depending on how wrong the value is. - Google has no language setting because as soon as you specify it all 'anonymous' settings stop working. + Google uses a search region and defaults to the United Kingdom. This usually applies to a country (us, uk, es, fr, nl, etc.) - DuckDuckGo uses language regions and defaults to the United Kingdom. To change it see if your region is available - https://duckduckgo.com/duckduckgo-help-pages/settings/params/. - - Wikipedia needs to be told which language you want. This changes the search url. Use any of their supported languages (en, es, fr, nl, etc.) + DuckDuckGo uses language regions and defaults to the United Kingdom. A list of all regions: https://duckduckgo.com/duckduckgo-help-pages/settings/params/. Qwant uses a locale similar to DuckDuckGo and defaults to the United Kingdom as well. Available locales are: bg_bg, br_fr, ca_ad, ca_es, ca_fr, co_fr, cs_cz, cy_gb, da_dk, de_at, de_ch, de_de, ec_ca, el_gr, en_au, en_ca, en_gb, en_ie, en_my, en_nz, en_us, es_ad, es_ar, es_cl, es_co, es_es, es_mx, es_pe, et_ee, eu_es, eu_fr, fc_ca, fi_fi, fr_ad, fr_be, fr_ca, fr_ch, fr_fr, gd_gb, he_il, hu_hu, it_ch, it_it, ko_kr, nb_no, nl_be, nl_nl, pl_pl, pt_ad, pt_pt, ro_ro, sv_se, th_th, zh_cn, zh_hk. + Mojeek supports a few search regions: uk, de, fr, eu and '' (empty, no preference) + + Wikipedia needs to be told which language you want. This changes the search url. Use any of their supported languages (en, es, fr, nl, etc.) + SOCIAL MEDIA RELEVANCE: Show social media results lower in results if you don't value such results. This includes websites like Facebook, Instagram, Twitter/X, Snapchat, TikTok, LinkedIn and Reddit. @@ -90,13 +92,14 @@ Chrome may attract attention because of the lack of Chrome information (tracking) aside from the user agent. The search engine may know something is 'weird'. Opera/Edge/Brave and many others use Chrome under the hood and are not a good pick for that reason. - Do not use user agents for mobile devices. Where possible Goosle explicitly tells the service it's a desktop computer to get a certain format for results. + Do NOT use user agents for mobile devices or tablets. Where possible Goosle explicitly tells the service it's a desktop computer to get a certain format for results. Contradicting the request with a mobile user agent may get your banned. MAGNET TRACKERS: Add more or less magnet trackers to the list but keep at least five or so! - These are added to the magnet links Goosle creates by itself. - Generally you do not need to change these. + No one tracker knows everything, more trackers is usually better for faster discovery and downloads. + Some search engines only provide torrent hashes. Goosle then uses these magnets to create a magnet link. + Generally you do not need to change these unless you need some specific tracker. ------------------------------------------------------------------------------------ */ return (object) array( @@ -138,6 +141,7 @@ 'magnet' => array( 'limetorrents' => 'on', // Default: on (Anything) 'piratebay' => 'on', // Default: on (Anything) + 'glotorrents' => 'on', // Default: on (Anything) 'yts' => 'on', // Default: on (Movies) 'eztv' => 'on', // Default: on (TV-Shows) 'nyaa' => 'on', // Default: on (Anime) @@ -170,7 +174,7 @@ ), 'show_nsfw_magnets' => 'off', // Default: off (Set to 'off' to try and hide adult content. Override with 'safe:off' or 'nsfw') - 'show_zero_seeders' => 'off', // Default: off + 'show_zero_seeders' => 'off', // Default: off (Set to 'off' to hide torrents with 0 seeds) 'show_yts_highlight' => 'on', // Default: off (Show latest YTS movies above Magnet search results) 'show_share_option' => 'on', // Default: on (Show a share option for Magnet results) 'piratebay_categories_blocked' => array(206, 210), // Default: 206, 210 (Comma separated numbers, see /engines/magnet/thepiratebay.php for all categories) diff --git a/engines/boxoffice/eztv.php b/engines/boxoffice/eztv.php index a394f11..8336d36 100644 --- a/engines/boxoffice/eztv.php +++ b/engines/boxoffice/eztv.php @@ -6,7 +6,7 @@ * Copyright 2023-2024 Arnan de Gans. All Rights Reserved. * * COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. -* By using this code you agree to indemnify Arnan de Gans from any +* By using this code you agree to indemnify Arnan de Gans from any * liability that might arise from its use. ------------------------------------------------------------------------------------ */ function eztv_boxoffice($opts) { @@ -17,7 +17,7 @@ function eztv_boxoffice($opts) { return fetch_cached_results($opts->cache_type, $opts->hash, $api_url); } - $response = do_curl_request( + $response = do_curl_request( $api_url, // (string) Where? array('Accept: application/json, */*;q=0.7', 'User-Agent: '.$opts->user_agents[0].';'), // (array) User agent + Headers 'get', // (string) post/get @@ -25,7 +25,7 @@ function eztv_boxoffice($opts) { ); $json_response = json_decode($response, true); $results = $results_temp = array(); - + // No response if(empty($json_response)) { if($opts->querylog == 'on') querylog('BoxofficeEZTV', 'a', $api_url, 'No response', 0); @@ -37,9 +37,11 @@ function eztv_boxoffice($opts) { if($opts->querylog == 'on') querylog('BoxofficeEZTV', 'a', $api_url, 'No Results', 0); return $results; } - + foreach($json_response['torrents'] as $result) { $title = (!empty($result['title'])) ? sanitize($result['title']) : null; + $imdb = sanitize($result['imdb_id']); + $year = (!empty($result['date_released_unix'])) ? gmdate('Y', sanitize($result['date_released_unix'])) : null; $hash = (!empty($result['hash'])) ? strtolower(sanitize($result['hash'])) : null; $thumbnail = (!empty($result['small_screenshot'])) ? sanitize($result['small_screenshot']) : null; @@ -54,9 +56,10 @@ function eztv_boxoffice($opts) { // Add codec to quality if(!empty($codec)) $quality = $quality.' '.$codec; - // Clean up show name + // Clean up show name and fix up the imdb ID $title = (preg_match('/.+?(?=[0-9]{3,4}p|xvid|divx|(x|h)26(4|5))/i', $title, $clean_name)) ? $clean_name[0] : $title; // Break off show name before video resolution $title = trim(str_replace(array('S0E0', 'S00E00'), '', $title)); // Strip spaces and empty season/episode indicator from name + $imdb = 'tt'.$imdb; // Group the same episodes in one result if(count($results) > 0) { @@ -70,9 +73,9 @@ function eztv_boxoffice($opts) { if($found_id !== false) { // Add the download to a previous result $results[$found_id]['magnet_links'][] = array( - 'hash' => $hash, - 'magnet' => $magnet_link, - 'filesize' => $filesize, + 'hash' => $hash, + 'magnet' => $magnet_link, + 'filesize' => $filesize, 'quality' => $quality, 'audio' => $audio ); @@ -83,11 +86,12 @@ function eztv_boxoffice($opts) { $results[$result_id] = array ( 'id' => $result_id, // string 'title' => $title, // string + 'imdb_id' => $imdb, // string 'year' => $year, // int(4) 'thumbnail' => $thumbnail, // string 'magnet_links' => array(array( // Yes, two array (For merging results)... 'hash' => $hash, // string - 'magnet' => $magnet_link, // string + 'magnet' => $magnet_link, // string 'filesize' => $filesize, // int 'quality' => $quality, // string 'audio' => $audio // string @@ -107,4 +111,4 @@ function eztv_boxoffice($opts) { return $results; } -?> \ No newline at end of file +?> diff --git a/engines/boxoffice/yts.php b/engines/boxoffice/yts.php index 7864a30..85b7f38 100644 --- a/engines/boxoffice/yts.php +++ b/engines/boxoffice/yts.php @@ -6,7 +6,7 @@ * Copyright 2023-2024 Arnan de Gans. All Rights Reserved. * * COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. -* By using this code you agree to indemnify Arnan de Gans from any +* By using this code you agree to indemnify Arnan de Gans from any * liability that might arise from its use. ------------------------------------------------------------------------------------ */ function yts_boxoffice($opts, $what) { @@ -17,7 +17,7 @@ function yts_boxoffice($opts, $what) { return fetch_cached_results($opts->cache_type, $opts->hash, $api_url); } - $response = do_curl_request( + $response = do_curl_request( $api_url, // (string) Where? array('Accept: application/json, */*;q=0.7', 'User-Agent: '.$opts->user_agents[0].';'), // (array) User agent + Headers 'get', // (string) post/get @@ -40,6 +40,7 @@ function yts_boxoffice($opts, $what) { foreach($json_response['data']['movies'] as $result) { $title = sanitize($result['title']); + $imdb = sanitize($result['imdb_code']); $year = (!empty($result['year'])) ? sanitize($result['year']) : 0; $category = (!empty($result['genres'])) ? $result['genres'] : null; @@ -55,7 +56,7 @@ function yts_boxoffice($opts, $what) { if(is_array($category)) { // Block these categories if(count(array_uintersect($category, $opts->yts_categories_blocked, 'strcasecmp')) > 0) continue; - + // Set actual category $category = sanitize(implode(', ', $category)); } @@ -76,10 +77,10 @@ function yts_boxoffice($opts, $what) { if(!empty($bitrate)) $quality = $quality.' '.$bitrate.'bit'; $downloads[] = array ( - 'hash' => $hash, - 'magnet' => $magnet, - 'filesize' => $filesize, - 'type' => $type, + 'hash' => $hash, + 'magnet' => $magnet, + 'filesize' => $filesize, + 'type' => $type, 'quality' => $quality, 'audio' => $audio ); @@ -91,6 +92,7 @@ function yts_boxoffice($opts, $what) { $results[$result_id] = array ( 'id' => $result_id, // Semi random string to separate results 'title' => $title, // string + 'imdb_id' => $imdb, // string 'year' => $year, // int(4) 'category' => $category, // string|null 'language' => $language, // string|null @@ -100,8 +102,8 @@ function yts_boxoffice($opts, $what) { 'thumbnail' => $thumbnail, // string|empty 'magnet_links' => $downloads // array ); - - unset($result, $title, $thumbnail, $year, $category, $language, $rating, $url, $summary, $downloads); + + unset($result, $title, $imdb, $thumbnail, $year, $category, $language, $rating, $url, $summary, $downloads); } unset($response, $json_response); @@ -114,4 +116,4 @@ function yts_boxoffice($opts, $what) { return $results; } -?> \ No newline at end of file +?> diff --git a/engines/image/openverse.php b/engines/image/openverse.php index 719637c..1f6be04 100644 --- a/engines/image/openverse.php +++ b/engines/image/openverse.php @@ -11,17 +11,14 @@ ------------------------------------------------------------------------------------ */ class OpenverseRequest extends EngineRequest { public function get_request_url() { - $query = $this->search->query; - - // Max 200 chars - $query = (strlen($query) > 200) ? substr($query, 0, 200) : $query; - $query = implode(',', make_tags_from_string($query)); + // Format query & max 200 chars + $query = implode(' ', make_terms_array_from_string(limit_string_length($this->search->query, 200, ''))); // Safe search override if($this->search->safe == 0) { - $safe = '1'; + $safe = true; } else { - $safe = '0'; + $safe = false; } // Size override @@ -87,8 +84,8 @@ public function parse_results($response) { $image_full = (!empty($result['url'])) ? sanitize($result['url']) : null; $url = (!empty($result['foreign_landing_url'])) ? sanitize($result['foreign_landing_url']) : null; $alt = (!empty($result['title'])) ? sanitize($result['title']) : null; + $tags = (count($result['tags']) > 0) ? implode(', ', array_unique(array_column($result['tags'], 'name'))) : null; $creator = (!empty($result['creator'])) ? " by ".sanitize($result['creator']) : null; - $tags = (count($result['tags']) > 0) ? array_column($result['tags'], 'name') : make_tags_from_string($alt); // Skip broken results if(empty($image_thumb)) continue; @@ -100,10 +97,10 @@ public function parse_results($response) { $dimensions_h = (!empty($result['height'])) ? sanitize($result['height']) : null; // Prepare data + if(!is_null($tags)) $alt = $alt.$tags; if(!is_null($creator)) $alt = $alt.$creator; - $tags = array_unique($tags); - // Skip duplicate IMAGE urls/results + // Skip duplicate IMAGE urls/results if(!empty($engine_temp)) { if(in_array($image_full, array_column($engine_temp, 'image_full'))) continue; } @@ -114,7 +111,6 @@ public function parse_results($response) { 'image_full' => $image_full, // string 'url' => $url, // string 'alt' => $alt, // string - 'tags' => $tags, // array 'engine_rank' => $rank, // int // Optional 'width' => $dimensions_w, // int | null diff --git a/engines/image/pixabay.php b/engines/image/pixabay.php index 8dc8136..47b75b0 100644 --- a/engines/image/pixabay.php +++ b/engines/image/pixabay.php @@ -11,13 +11,10 @@ ------------------------------------------------------------------------------------ */ class PixabayRequest extends EngineRequest { public function get_request_url() { - $query = $this->search->query; + // Format query & max 100 chars + $query = implode(' ', make_terms_array_from_string(limit_string_length($this->search->query, 100, ''))); - // Max 100 chars - $query = (strlen($query) > 100) ? substr($query, 0, 100) : $query; - $query = implode(',', make_tags_from_string($query)); - - // Safe search override + // Safe search override if($this->search->safe == 0) { $safe = true; } else { @@ -97,9 +94,8 @@ public function parse_results($response) { $image_thumb = (!empty($result['previewURL'])) ? sanitize($result['previewURL']) : null; $image_full = (!empty($result['largeImageURL'])) ? sanitize($result['largeImageURL']) : null; $url = (!empty($result['pageURL'])) ? sanitize($result['pageURL']) : null; - $alt = (!empty($image_thumb)) ? substr(strrchr($image_thumb, "/"), 1) : null; + $alt = (!empty($result['tags'])) ? $result['tags'] : null; $creator = (!empty($result['user'])) ? " by ".sanitize($result['user']) : null; - $tags = (!empty($result['tags'])) ? explode(', ', $result['tags']) : make_tags_from_string($alt); // Skip broken results if(empty($image_thumb)) continue; @@ -112,7 +108,6 @@ public function parse_results($response) { // Process data if(!is_null($creator)) $alt = $alt.$creator; - $tags = array_unique($tags); // Skip duplicate IMAGE urls/results if(!empty($engine_temp)) { @@ -125,7 +120,6 @@ public function parse_results($response) { 'image_full' => $image_full, // string 'url' => $url, // string 'alt' => $alt, // string - 'tags' => $tags, // array 'engine_rank' => $rank, // int // Optional 'width' => $dimensions_w, // int | null diff --git a/engines/image/qwant-images.php b/engines/image/qwant-images.php index 05a5cc3..7162d00 100644 --- a/engines/image/qwant-images.php +++ b/engines/image/qwant-images.php @@ -11,8 +11,6 @@ ------------------------------------------------------------------------------------ */ class QwantImageRequest extends EngineRequest { public function get_request_url() { - $query = $this->search->query; - // Size override $size = 'all'; if($this->search->size == 1) $size = 'small'; @@ -24,7 +22,7 @@ public function get_request_url() { // Based on https://github.com/locness3/qwant-api-docs and variables from qwant website $url = 'https://api.qwant.com/v3/search/images?'.http_build_query(array( - 'q' => $query, // Search query + 'q' => $this->search->query, // Search query 't' => 'images', // Type of search, Images 'count' => 150, // Up-to how many images to return (Max 150) 'size' => $size, // General image size @@ -33,7 +31,7 @@ public function get_request_url() { 'safesearch' => $this->search->safe // Safe search filter (0 = off, 1 = normal, 2 = strict) )); - unset($query, $size, $language); + unset($size, $language); return $url; } @@ -74,7 +72,6 @@ public function parse_results($response) { $image_full = (!empty($result['media'])) ? sanitize($result['media']) : null; $url = (!empty($result['url'])) ? sanitize($result['url']) : null; $alt = (!empty($result['title'])) ? sanitize($result['title']) : null; - $tags = (!empty($alt)) ? make_tags_from_string($alt) : array(); // Skip broken results if(empty($image_thumb)) continue; @@ -85,9 +82,6 @@ public function parse_results($response) { $dimensions_w = (!empty($result['width'])) ? sanitize($result['width']) : null; $dimensions_h = (!empty($result['height'])) ? sanitize($result['height']) : null; - // Process data - $tags = array_unique($tags); - // Skip duplicate IMAGE urls/results if(!empty($engine_temp)) { if(in_array($image_full, array_column($engine_temp, 'image_full'))) continue; @@ -99,7 +93,6 @@ public function parse_results($response) { 'image_full' => $image_full, // string 'url' => $url, // string 'alt' => $alt, // string - 'tags' => $tags, // array 'engine_rank' => $rank, // int // Optional 'width' => $dimensions_w, // int | null diff --git a/engines/image/yahoo-images.php b/engines/image/yahoo-images.php index 1b19874..8491b93 100644 --- a/engines/image/yahoo-images.php +++ b/engines/image/yahoo-images.php @@ -11,8 +11,6 @@ ------------------------------------------------------------------------------------ */ class YahooImageRequest extends EngineRequest { public function get_request_url() { - $query = $this->search->query; - // Safe search override if($this->search->safe == 0) { $safe = '0'; @@ -28,12 +26,12 @@ public function get_request_url() { if($this->search->size == 4) $size = 'wallpaper'; $url = 'https://images.search.yahoo.com/search/images?'.http_build_query(array( - 'p' => $query, // Search query + 'p' => $this->search->query, // Search query 'imgsz' => $size, // Image size (small|medium|large|wallpaper) 'safe' => $safe // Safe search filter (0 = off, "" = on) )); - unset($query, $size, $safe); + unset($size, $safe); return $url; } @@ -68,14 +66,10 @@ public function parse_results($response) { } // Scrape recommended - $didyoumean = $xpath->query(".//section[@class='dym-c']/section/h3/a")[0]; + $didyoumean = $xpath->query("//section[@class='dym-c']/section/h3/a")[0]; if(!is_null($didyoumean)) { - $engine_result['did_you_mean'] = $didyoumean->textContent; + $engine_result['did_you_mean'][] = $didyoumean->textContent; } - $search_specific = $xpath->query(".//section[@class='dym-c']/section/h5/a")[0]; - if(!is_null($search_specific)) { - $engine_result['search_specific'] = $search_specific->textContent; - } foreach($scrape as $result) { // Find data @@ -110,7 +104,6 @@ public function parse_results($response) { $image_full = (array_key_exists('imgurl', $usable_data)) ? sanitize($usable_data['imgurl']) : null; $url = (array_key_exists('rurl', $usable_data)) ? sanitize($usable_data['rurl']) : null; $alt = (array_key_exists('tt', $usable_data)) ? sanitize($usable_data['tt']) : null; - $tags = (!empty($alt)) ? make_tags_from_string($alt) : array(); // Skip broken results if(empty($image_full)) continue; @@ -132,7 +125,6 @@ public function parse_results($response) { $image_full = '//'.$image_full; } } - $tags = array_unique($tags); // Skip duplicate IMAGE urls/results if(!empty($engine_temp)) { @@ -145,7 +137,6 @@ public function parse_results($response) { 'image_full' => $image_full, // string 'url' => $url, // string 'alt' => $alt, // string - 'tags' => $tags, // array 'engine_rank' => $rank, // int // Optional 'width' => $dimensions_w, // int | null diff --git a/engines/magnet/glotorrents.php b/engines/magnet/glotorrents.php new file mode 100644 index 0000000..80de875 --- /dev/null +++ b/engines/magnet/glotorrents.php @@ -0,0 +1,166 @@ +search->query); + + unset($query); + + return $url; + } + + public function get_request_headers() { + return array( + 'Accept' => 'text/html, application/xhtml+xml, application/xml;q=0.8, */*;q=0.7', + ); + } + + public function parse_results($response) { + $engine_temp = $engine_result = array(); + $xpath = get_xpath($response); + + // No response + if(!$xpath) { + if($this->opts->querylog == 'on') querylog(get_class($this), 's', $this->url, 'No response', 0); + return $engine_result; + } + + // Scrape the results + $scrape = $xpath->query('//div[@class="myBlock-con"]/table//tr'); + + // No results + if(count($scrape) == 0) { + if($this->opts->querylog == 'on') querylog(get_class($this), 's', $this->url, 'No results', 0); + return $engine_result; + } + + $categories = array( + 1 => 'Movies', + 5 => 'Android', + 10 => 'Games', + 18 => 'Software/Apps', + 22 => 'Music', + 28 => 'Anime', + 33 => 'Other', + 41 => 'TV', + 51 => 'Books', + 52 => 'Mobile Apps/Games', + 54 => 'Windows', + 55 => 'Macintosh', + 70 => 'Pictures', + 71 => 'Video', + 72 => 'TV/Movie Packs', + 74 => 'Tutorials', + 75 => 'FLAC', + 76 => 'Sports' + ); + + foreach($scrape as $result) { + // Find data + $title = $xpath->evaluate(".//td[2]//a[2]/@title", $result); + $magnet = $xpath->evaluate(".//td[4]/a/@href", $result); + $seeders = $xpath->evaluate(".//td[6]//b", $result); + $leechers = $xpath->evaluate(".//td[7]//b", $result); + $filesize = $xpath->evaluate(".//td[5]", $result); + + // Skip broken results + if($title->length == 0) continue; + if($magnet->length == 0) continue; + + // Process data + $title = sanitize($title[0]->textContent); + $magnet = sanitize($magnet[0]->textContent); + parse_str(parse_url($magnet, PHP_URL_QUERY), $hash_parameters); + $hash = strtolower(str_replace('urn:btih:', '', $hash_parameters['xt'])); + $seeders = ($seeders->length > 0) ? sanitize($seeders[0]->textContent) : 0; + $leechers = ($leechers->length > 0) ? sanitize($leechers[0]->textContent) : 0; + $filesize = ($filesize->length > 0) ? filesize_to_bytes(sanitize($filesize[0]->textContent)) : 0; + + // Ignore results with 0 seeders? + if($this->opts->show_zero_seeders == 'off' AND $seeders == 0) continue; + + // Throw out mismatched tv-show episodes when searching for tv shows + if(!is_season_or_episode($this->search->query, $title)) continue; + + // Find extra data + $category = $xpath->evaluate(".//td[1]/a/@href", $result); + $url = $xpath->evaluate(".//td[2]//a[2]/@href", $result); + + // Process extra data + if($category->length > 0) { + $category = str_replace('/search.php?cat=', '', sanitize($category[0]->textContent)); + $category = (preg_match('/[0-9]+/', $category, $category)) ? $category[0] : null; + } else { + $category = null; + } + $url = ($url->length > 0) ? 'https://glodls.to'.sanitize($url[0]->textContent) : null; + + // Find meta data for certain categories + if(!is_null($category)) { + $nsfw = (detect_nsfw($title)) ? true : false; + $quality = $codec = $audio = null; + if($category == 1 || $category == 28 || $category == 41 || $category == 71 || $category == 72 || $category == 74) { + $quality = find_video_quality($title); + $codec = find_video_codec($title); + $audio = find_audio_codec($title); + + // Add codec to quality + if(!empty($codec)) $quality = $quality.' '.$codec; + } else if($category == 22 || $category == 75) { + $audio = find_audio_codec($title); + } + + // Set actual category + $category = $categories[$category]; + } + + $engine_temp[] = array ( + // Required + 'hash' => $hash, // string + 'title' => $title, // string + 'magnet' => $magnet, // string + 'seeders' => $seeders, // int + 'leechers' => $leechers, // int + 'filesize' => $filesize, // int + // Optional + 'verified_uploader' => null, // string|null + 'nsfw' => $nsfw, // bool + 'quality' => $quality, // string|null + 'type' => null, // string|null + 'audio' => $audio, // string|null + 'runtime' => null, // int(timestamp)|null + 'year' => null, // int(4)|null + 'timestamp' => null, // int(timestamp)|null + 'category' => $category, // string|null + 'mpa_rating' => null, // string|null + 'language' => null, // string|null + 'url' => $url // string|null + ); + + unset($result, $title, $hash, $magnet, $seeders, $leechers, $filesize, $quality, $codec, $audio, $category, $url); + } + + // Base info + if(!empty($engine_temp)) { + $engine_result['source'] = 'glodls.to'; + $engine_result['search'] = $engine_temp; + } + + if($this->opts->querylog == 'on') querylog(get_class($this), 's', $this->url, count($scrape), count($engine_temp)); + + unset($response, $xpath, $scrape, $engine_temp); + + return $engine_result; + } +} +?> diff --git a/engines/magnet/lime.php b/engines/magnet/lime.php index 5f4e7cf..1d39250 100644 --- a/engines/magnet/lime.php +++ b/engines/magnet/lime.php @@ -11,10 +11,9 @@ ------------------------------------------------------------------------------------ */ class LimeRequest extends EngineRequest { public function get_request_url() { - $query = preg_replace('/[^a-z0-9- ]+/', '', $this->search->query); - $query = strtolower(str_replace(' ', '-', $query)); + $query = preg_replace('/[^a-z0-9- ]/', '', strtolower($this->search->query)); - $url = 'https://www.limetorrents.lol/search/all/'.$query.'/'; + $url = 'https://www.limetorrents.lol/search/all/'.urlencode($query).'/'; unset($query); diff --git a/engines/search-image.php b/engines/search-image.php index 6d4c555..f04af75 100644 --- a/engines/search-image.php +++ b/engines/search-image.php @@ -16,22 +16,22 @@ public function __construct($search, $opts, $mh) { $this->requests = array(); if($opts->enable_image_search == 'on') { - if($opts->image['yahooimages'] == 'on') { + if($opts->image['yahooimages'] == 'on' && !has_timeout('YahooImageRequest')) { require ABSPATH.'engines/image/yahoo-images.php'; $this->requests[] = new YahooImageRequest($search, $opts, $mh); } - if($opts->image['qwantimages'] == 'on') { + if($opts->image['qwantimages'] == 'on' && !has_timeout('QwantImageRequest')) { require ABSPATH.'engines/image/qwant-images.php'; $this->requests[] = new QwantImageRequest($search, $opts, $mh); } - if($opts->image['pixabay'] == 'on') { + if($opts->image['pixabay'] == 'on' && !has_timeout('PixabayRequest')) { require ABSPATH.'engines/image/pixabay.php'; $this->requests[] = new PixabayRequest($search, $opts, $mh); } - if($opts->image['openverse'] == 'on') { + if($opts->image['openverse'] == 'on' && !has_timeout('OpenverseRequest')) { require ABSPATH.'engines/image/openverse.php'; $this->requests[] = new OpenverseRequest($search, $opts, $mh); } @@ -77,8 +77,8 @@ public function parse_results($response) { $goosle_results['search'][$found_id]['combo_source'][] = $engine_result['source']; } else { // First find, rank and add to results - $match_rank = match_count($result['tags'], $request->search->query_terms, 2); -// $match_rank += match_count($result['alt'], $request->search->query_terms); + $match_rank = 0; + $match_rank += match_count($result['alt'], $request->search->query_terms, 1.5); $match_rank += match_count($result['url'], $request->search->query_terms, 0.5); $result['goosle_rank'] = $goosle_rank + $match_rank; @@ -166,6 +166,14 @@ public static function print_results($goosle_results, $search, $opts) { } echo ""; + // Search suggestions + if(array_key_exists('did_you_mean', $goosle_results)) { + echo "
  • "; + echo "

    ".search_suggestion($search->type, $opts->hash, $goosle_results['did_you_mean'])."

    "; + echo "

    Or instead search for query)."%22&t=".$search->type."&a=".$opts->hash."\">\"".$search->query."\"

    "; + echo "
  • "; + } + echo ""; // Search results @@ -178,7 +186,9 @@ public static function print_results($goosle_results, $search, $opts) { echo " \"".$result['alt']."\""; echo " "; echo "
    "; - if(!empty($result['height']) && !empty($result['width'])) echo "

    ".$result['width']."×".$result['height']."

    "; + if(!empty($result['height']) && !empty($result['width'])) { + echo "

    ".$result['width']."×".$result['height']."

    "; + } echo "

    WebsiteImage

    "; if($opts->show_search_rank == 'on') echo "

    Rank: ".$result['goosle_rank']."

    "; echo "
    "; @@ -194,7 +204,7 @@ public static function print_results($goosle_results, $search, $opts) { echo "

    ".search_pagination($search, $opts, $goosle_results['number_of_results'])."

    "; } - echo "

    Goosle does not store or distribute image files.

    "; + echo "

    Goosle does not store or distribute image files. Images may be subject to copyright.

    "; } // No results found diff --git a/engines/search-magnet.php b/engines/search-magnet.php index 94d9628..8c002f0 100644 --- a/engines/search-magnet.php +++ b/engines/search-magnet.php @@ -19,38 +19,43 @@ public function __construct($search, $opts, $mh) { // Extra functions to process magnet results require ABSPATH.'functions/tools-magnet.php'; - if($opts->magnet['limetorrents'] == 'on') { + if($opts->magnet['limetorrents'] == 'on' && !has_timeout('LimeRequest')) { require ABSPATH.'engines/magnet/lime.php'; $this->requests[] = new LimeRequest($search, $opts, $mh); } - if($opts->magnet['piratebay'] == 'on') { + if($opts->magnet['piratebay'] == 'on' && !has_timeout('PirateBayRequest')) { require ABSPATH.'engines/magnet/thepiratebay.php'; $this->requests[] = new PirateBayRequest($search, $opts, $mh); } - if($opts->magnet['yts'] == 'on') { + if($opts->magnet['glotorrents'] == 'on' && !has_timeout('GlodlsRequest')) { + require ABSPATH.'engines/magnet/glotorrents.php'; + $this->requests[] = new GlodlsRequest($search, $opts, $mh); + } + + if($opts->magnet['yts'] == 'on' && !has_timeout('YTSRequest')) { if($search->safe !== 0) { require ABSPATH.'engines/magnet/yts.php'; $this->requests[] = new YTSRequest($search, $opts, $mh); } } - if($opts->magnet['nyaa'] == 'on') { + if($opts->magnet['nyaa'] == 'on' && !has_timeout('NyaaRequest')) { if($search->safe !== 0) { require ABSPATH.'engines/magnet/nyaa.php'; $this->requests[] = new NyaaRequest($search, $opts, $mh); } } - if($opts->magnet['sukebei'] == 'on') { + if($opts->magnet['sukebei'] == 'on' && !has_timeout('SukebeiRequest')) { if($opts->show_nsfw_magnets == 'on' || ($opts->show_nsfw_magnets == 'off' && $search->safe === 0)) { require ABSPATH.'engines/magnet/sukebei.php'; $this->requests[] = new SukebeiRequest($search, $opts, $mh); } } - if($opts->magnet['eztv'] == 'on') { + if($opts->magnet['eztv'] == 'on' && !has_timeout('EZTVRequest')) { if(substr(strtolower($search->query), 0, 2) == 'tt') { require ABSPATH.'engines/magnet/eztv.php'; $this->requests[] = new EZTVRequest($search, $opts, $mh); @@ -315,7 +320,7 @@ public static function print_results($goosle_results, $search, $opts) { echo "

    ".search_pagination($search, $opts, $goosle_results['number_of_results'])."

    "; } - echo "

    Goosle does not index, offer or distribute torrent files.

    "; + echo "

    Goosle does not index, offer or distribute torrent files. Found content may be subject to copyright.

    "; // Torrent site warning popup (Normally hidden) echo "
    "; @@ -330,9 +335,9 @@ public static function print_results($goosle_results, $search, $opts) { echo "
    "; echo "
    "; echo "

    Trusted uploaders

    "; - echo "

    Some websites have a group of verified and/or trusted uploaders. These are users that are known to provide good quality downloads.

    "; - echo "

    Downloads with a blue shield and checkmark are uploaded by a verified or trusted user according to the torrent site.

    "; - echo "

    Downloads with a red shield and questionmark indicate that the user is not verified by the website providing the download. This can mean this is a new user, or that the file is provided from an anonymous source. Unverified magnet links are not necessarily bad but may contain low quality or misleading content or simply have a poorly written title.

    "; + echo "

    Some websites have a group of verified and/or trusted uploaders. These are persons or groups that are known to provide good quality downloads.

    "; + echo "

    Downloads with a blue shield and checkmark are uploaded by a verified or trusted uploader according to the torrent site.

    "; + echo "

    Downloads with a red shield and questionmark indicate that the uploader is not verified by the website providing the download. Unverified magnet links are not necessarily bad but may contain low quality or misleading content.

    "; echo "

    Close

    "; echo "
    "; echo "
    "; diff --git a/engines/search-news.php b/engines/search-news.php index e1ac455..ca70e3a 100644 --- a/engines/search-news.php +++ b/engines/search-news.php @@ -16,22 +16,22 @@ public function __construct($search, $opts, $mh) { $this->requests = array(); if($opts->enable_news_search == 'on') { - if($opts->news['qwantnews'] == 'on') { + if($opts->news['qwantnews'] == 'on' && !has_timeout('QwantNewsRequest')) { require ABSPATH.'engines/news/qwant-news.php'; $this->requests[] = new QwantNewsRequest($search, $opts, $mh); } - if($opts->news['yahoonews'] == 'on') { + if($opts->news['yahoonews'] == 'on' && !has_timeout('YahooNewsRequest')) { require ABSPATH.'engines/news/yahoo-news.php'; $this->requests[] = new YahooNewsRequest($search, $opts, $mh); } - if($opts->news['bravenews'] == 'on') { + if($opts->news['bravenews'] == 'on' && !has_timeout('BraveNewsRequest')) { require ABSPATH.'engines/news/brave-news.php'; $this->requests[] = new BraveNewsRequest($search, $opts, $mh); } - if($opts->news['hackernews'] == 'on') { + if($opts->news['hackernews'] == 'on' && !has_timeout('HackernewsRequest')) { require ABSPATH.'engines/news/hackernews.php'; $this->requests[] = new HackernewsRequest($search, $opts, $mh); } @@ -70,7 +70,8 @@ public function parse_results($response) { $goosle_results['search'][$found_id]['combo_source'][] = $engine_result['source']; } else { // First find, rank and add to results - $match_rank = match_count($result['title'], $request->search->query_terms, 1.2); + $match_rank = 0; + $match_rank += match_count($result['title'], $request->search->query_terms, 1.2); $match_rank += match_count($result['description'], $request->search->query_terms); $match_rank += match_count($result['url'], $request->search->query_terms, 0.5); diff --git a/engines/search.php b/engines/search.php index 5afa1c8..cf3ee08 100644 --- a/engines/search.php +++ b/engines/search.php @@ -16,36 +16,37 @@ public function __construct($search, $opts, $mh) { $this->requests = array(); if($opts->enable_web_search == 'on') { - if($opts->web['duckduckgo'] == 'on') { + if($opts->web['duckduckgo'] == 'on' && !has_timeout('DuckDuckGoRequest')) { require ABSPATH.'engines/search/duckduckgo.php'; $this->requests[] = new DuckDuckGoRequest($search, $opts, $mh); } - if($opts->web['mojeek'] == 'on') { + if($opts->web['mojeek'] == 'on' && !has_timeout('MojeekRequest')) { require ABSPATH.'engines/search/mojeek.php'; $this->requests[] = new MojeekRequest($search, $opts, $mh); } - if($opts->web['google'] == 'on') { + if($opts->web['google'] == 'on' && !has_timeout('GoogleRequest')) { require ABSPATH.'engines/search/google.php'; $this->requests[] = new GoogleRequest($search, $opts, $mh); } - if($opts->web['qwant'] == 'on') { + if($opts->web['qwant'] == 'on' && !has_timeout('QwantRequest')) { require ABSPATH.'engines/search/qwant.php'; $this->requests[] = new QwantRequest($search, $opts, $mh); } - if($opts->web['brave'] == 'on') { + if($opts->web['brave'] == 'on' && !has_timeout('BraveRequest')) { require ABSPATH.'engines/search/brave.php'; $this->requests[] = new BraveRequest($search, $opts, $mh); } - if($opts->web['wikipedia'] == 'on') { + if($opts->web['wikipedia'] == 'on' && !has_timeout('WikiRequest')) { require ABSPATH.'engines/search/wikipedia.php'; $this->requests[] = new WikiRequest($search, $opts, $mh); } } + /* --- SPECIAL SEARCHES --- */ // Currency converter @@ -99,11 +100,7 @@ public function parse_results($response) { if(!empty($engine_result)) { if(isset($engine_result['did_you_mean'])) { - $goosle_results['did_you_mean'] = $engine_result['did_you_mean']; - } - - if(isset($engine_result['search_specific'])) { - $goosle_results['search_specific'][] = $engine_result['search_specific']; + $goosle_results['did_you_mean'][] = $engine_result['did_you_mean']; } if(isset($engine_result['search'])) { @@ -128,7 +125,8 @@ public function parse_results($response) { $goosle_results['search'][$found_id]['combo_source'][] = $engine_result['source']; } else { // First find, rank and add to results - $match_rank = match_count($result['title'], $request->search->query_terms); + $match_rank = 0; + $match_rank += match_count($result['title'], $request->search->query_terms); $match_rank += match_count($result['description'], $request->search->query_terms, 2);; $match_rank += match_count($result['url'], $request->search->query_terms, 0.5); @@ -228,6 +226,14 @@ public static function print_results($goosle_results, $search, $opts) { } echo ""; + // Search suggestions + if(array_key_exists('did_you_mean', $goosle_results)) { + echo "
  • "; + echo "

    ".search_suggestion($search->type, $opts->hash, $goosle_results['did_you_mean'])."

    "; + echo "

    Or instead search for query)."%22&t=".$search->type."&a=".$opts->hash."\">\"".$search->query."\"

    "; + echo "
  • "; + } + // Special result if(array_key_exists('special', $goosle_results)) { echo "
  • "; diff --git a/engines/search/duckduckgo.php b/engines/search/duckduckgo.php index 5d026cb..65186c8 100644 --- a/engines/search/duckduckgo.php +++ b/engines/search/duckduckgo.php @@ -58,7 +58,6 @@ public function parse_results($response) { } // Scrape the results -// $scrape = $xpath->query("/html/body/div[1]/div[".count($xpath->query("/html/body/div[1]/div"))."]/div/div/div[contains(@class, 'web-result')]/div"); $scrape = $xpath->query("//div[contains(@class, 'result__body')]"); // Figure out results and base rank @@ -70,6 +69,12 @@ public function parse_results($response) { return $engine_result; } + // Scrape recommended + $didyoumean = $xpath->query('//div[contains(@class, "msg--spelling")]/div/a[1]')[0]; + if(!is_null($didyoumean)) { + $engine_result['did_you_mean'] = strip_tags($didyoumean->textContent); + } + foreach($scrape as $result) { // Find data $url = $xpath->evaluate(".//h2[@class='result__title']//a/@href", $result); diff --git a/engines/search/google.php b/engines/search/google.php index 1ddac59..2a67ff6 100644 --- a/engines/search/google.php +++ b/engines/search/google.php @@ -55,6 +55,12 @@ public function parse_results($response) { return $engine_result; } + // Scrape recommended + $didyoumean = $xpath->query("//a[@class='gL9Hy']")[0]; + if(!is_null($didyoumean)) { + $engine_result['did_you_mean'] = strip_tags($didyoumean->textContent); + } + foreach($scrape as $result) { // Find data $url = $xpath->evaluate(".//div[@class='yuRUbf']//a/@href", $result); diff --git a/engines/search/mojeek.php b/engines/search/mojeek.php index 3581b7f..8135b89 100644 --- a/engines/search/mojeek.php +++ b/engines/search/mojeek.php @@ -6,7 +6,7 @@ * Copyright 2023-2024 Arnan de Gans. All Rights Reserved. * * COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. -* By using this code you agree to indemnify Arnan de Gans from any +* By using this code you agree to indemnify Arnan de Gans from any * liability that might arise from its use. ------------------------------------------------------------------------------------ */ class MojeekRequest extends EngineRequest { @@ -19,7 +19,7 @@ public function get_request_url() { } else { $safe = ''; } - + // All parameters and values: https://www.mojeek.com/preferences $url = 'https://www.mojeek.com/search?'.http_build_query(array( 'q' => $this->search->query, // Search query @@ -65,10 +65,16 @@ public function parse_results($response) { // No results if($number_of_results == 0) { - if($this->opts->querylog == 'on') querylog(get_class($this), 's', $this->url, 'No results', 0); + if($this->opts->querylog == 'on') querylog(get_class($this), 's', $this->url, 'No results', 0); return $engine_result; } + // Scrape recommended + $didyoumean = $xpath->query("//p[contains(@class, 'spell')]//a")[0]; + if(!is_null($didyoumean)) { + $engine_result['did_you_mean'] = strip_tags($didyoumean->textContent); + } + foreach($scrape as $result) { // Find data $url = $xpath->evaluate(".//h2/a/@href", $result); @@ -78,21 +84,21 @@ public function parse_results($response) { // Skip broken results if($url->length == 0) continue; if($title->length == 0) continue; - + // Process data $url = sanitize($url[0]->textContent); $title = strip_newlines(sanitize($title[0]->textContent)); $description = ($description->length == 0) ? "No description was provided for this site." : limit_string_length(strip_newlines(sanitize($description[0]->textContent))); - + // filter duplicate urls/results if(!empty($engine_temp)) { if(in_array($url, array_column($engine_temp, 'url'))) continue; } $engine_temp[] = array( - 'title' => $title, - 'url' => $url, - 'description' => $description, + 'title' => $title, + 'url' => $url, + 'description' => $description, 'engine_rank' => $rank ); $rank -= 1; diff --git a/engines/special/currency.php b/engines/special/currency.php index 2bfd588..eb52683 100644 --- a/engines/special/currency.php +++ b/engines/special/currency.php @@ -11,7 +11,7 @@ ------------------------------------------------------------------------------------ */ class CurrencyRequest extends EngineRequest { public function get_request_url() { - $url = 'https://cdn.moneyconvert.net/api/latest.json'; + $url = 'https://cdn.moneyconvert.net/api/latest.json?to='.urlencode($this->search->query_terms[3]); return $url; } diff --git a/engines/special/definition.php b/engines/special/definition.php index 6827899..4eb4ac7 100644 --- a/engines/special/definition.php +++ b/engines/special/definition.php @@ -6,15 +6,15 @@ * Copyright 2023-2024 Arnan de Gans. All Rights Reserved. * * COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. -* By using this code you agree to indemnify Arnan de Gans from any +* By using this code you agree to indemnify Arnan de Gans from any * liability that might arise from its use. ------------------------------------------------------------------------------------ */ class DefinitionRequest extends EngineRequest { public function get_request_url() { // [0] = (define|meaning) // [1] = WORD - $url = 'https://api.dictionaryapi.dev/api/v2/entries/en/'.$this->search->query_terms[1]; - + $url = 'https://api.dictionaryapi.dev/api/v2/entries/en/'.urlencode($this->search->query_terms[1]); + return $url; } @@ -81,7 +81,7 @@ public function parse_results($response) { unset($meaning); } - + // Return result $engine_result = array( 'title' => "Definition for: ".sanitize($result['word'])." [".sanitize($phonetic)."]", diff --git a/engines/special/ipify.php b/engines/special/ipify.php index 6782ce5..9cbe039 100644 --- a/engines/special/ipify.php +++ b/engines/special/ipify.php @@ -6,16 +6,16 @@ * Copyright 2023-2024 Arnan de Gans. All Rights Reserved. * * COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. -* By using this code you agree to indemnify Arnan de Gans from any +* By using this code you agree to indemnify Arnan de Gans from any * liability that might arise from its use. ------------------------------------------------------------------------------------ */ class ipRequest extends EngineRequest { public function get_request_url() { - $url = 'https://api64.ipify.org?format=json'; - + $url = 'https://api64.ipify.org?format=json&cache='.rand(1000, 9999); + return $url; } - + public function get_request_headers() { return array( 'Accept' => 'application/json, */*;q=0.8', @@ -37,7 +37,6 @@ public function parse_results($response) { return $engine_result; } - // Return result $engine_result = array( 'title' => "Your IP Address: ".$_SERVER["REMOTE_ADDR"], diff --git a/error.php b/error.php index 18c7224..4badf24 100644 --- a/error.php +++ b/error.php @@ -1,50 +1,4 @@ - - - - - Goosle Search - - - - - - - - - - - - - - - - - - - - - - -
    -

    You can't use Goosle without an authorization key!

    -

    Contact the website administrator for more information.

    -
    - - \ No newline at end of file +
    +

    You can't use Goosle without an authorization key!

    +

    Contact the website administrator for more information.

    +
    diff --git a/footer.php b/footer.php new file mode 100644 index 0000000..d982bd2 --- /dev/null +++ b/footer.php @@ -0,0 +1,8 @@ + diff --git a/functions/oauth-openverse.php b/functions/oauth-openverse.php index 2aa3565..4540d00 100644 --- a/functions/oauth-openverse.php +++ b/functions/oauth-openverse.php @@ -35,7 +35,7 @@ - + hash_auth, $opts->hash, $auth)) { ?> @@ -122,10 +122,11 @@
  • - -
    Redirecting
    - - + diff --git a/functions/search_engine.php b/functions/search_engine.php index 423d43c..aeb8e26 100644 --- a/functions/search_engine.php +++ b/functions/search_engine.php @@ -6,7 +6,7 @@ * Copyright 2023-2024 Arnan de Gans. All Rights Reserved. * * COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. -* By using this code you agree to indemnify Arnan de Gans from any +* By using this code you agree to indemnify Arnan de Gans from any * liability that might arise from its use. ------------------------------------------------------------------------------------ */ abstract class EngineRequest { @@ -20,8 +20,8 @@ function __construct($search, $opts, $mh) { // No search engine url if(!$this->url) return; - - // Skip if there is a cached result (from earlier search) + + // Skip if there is a cached result if($this->opts->cache_type !== 'off' && has_cached_results($this->opts->cache_type, $this->opts->hash, $this->url, $this->opts->cache_time)) return; // Default headers for the curl request @@ -55,7 +55,7 @@ function __construct($search, $opts, $mh) { // Curl $this->ch = curl_init(); - + curl_setopt($this->ch, CURLOPT_URL, $this->url); curl_setopt($this->ch, CURLOPT_HTTPGET, 1); // Redundant? Probably... curl_setopt($this->ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTPS | CURLPROTO_HTTP); @@ -78,18 +78,24 @@ function __construct($search, $opts, $mh) { public function get_request_url() { return ''; } - + /*-------------------------------------- // Check if a request to a search engine was successful --------------------------------------*/ public function request_successful() { if((isset($this->ch) && curl_getinfo($this->ch)['http_code'] == '200') || ($this->opts->cache_type !== 'off' && has_cached_results($this->opts->cache_type, $this->opts->hash, $this->url, $this->opts->cache_time))) { return true; - } + } + + // Set a timeout if Goosle is (temporarily) unable to use engine + set_timeout(get_class($this), curl_getinfo($this->ch)['http_code']); return false; } - + + /*-------------------------------------- + // Process results so Goosle can use it + --------------------------------------*/ abstract function parse_results($response); /*-------------------------------------- @@ -99,7 +105,7 @@ public function get_results() { if(!isset($this->url)) { return $this->parse_results(null); } - + // If there is a cached result from an earlier search use that instead if($this->opts->cache_type !== 'off' && has_cached_results($this->opts->cache_type, $this->opts->hash, $this->url, $this->opts->cache_time)) { return fetch_cached_results($this->opts->cache_type, $this->opts->hash, $this->url); @@ -116,13 +122,16 @@ public function get_results() { // Cache last request if there is something to cache if($this->opts->cache_type !== 'off') { - $ttl = ($this->search->type == 2) ? 1 : $this->opts->cache_time; + $ttl = ($this->search->type == 2) ? 1 : $this->opts->cache_time; // Cache news (type 2) for 1 hour only if(count($results) > 0) store_cached_results($this->opts->cache_type, $this->opts->hash, $this->url, $results, $ttl); } return $results; } - + + /*-------------------------------------- + // Output search results after processing + --------------------------------------*/ public static function print_results($results, $search, $opts) {} } -?> \ No newline at end of file +?> diff --git a/functions/timeout-status.php b/functions/timeout-status.php new file mode 100644 index 0000000..c19cb6e --- /dev/null +++ b/functions/timeout-status.php @@ -0,0 +1,77 @@ +user_auth; +/* ------------------------------------------------------------------------------------ +* Goosle - The fast, privacy oriented search tool that just works. +* +* COPYRIGHT NOTICE +* Copyright 2023-2024 Arnan de Gans. All Rights Reserved. +* +* COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. +* By using this code you agree to indemnify Arnan de Gans from any +* liability that might arise from its use. +------------------------------------------------------------------------------------ */ +?> + + + + Goosle Search Timeouts + + + + + + + + + + + + + + + + +hash_auth, $opts->hash, $auth)) { +?> + +
    +

    Goosle

    +

    This page lists all recorded timeouts, currently active and from the past.
    + If a search engine doesn't work, or results are missing. Check here if it isn't simply in a timeout.

    + +

    A timeout will be set by Goosle if a search engine blocks your request or you make too many requests. Depending on the response code a timeout of 15 minutes up-to 12 hours can be set. Dates in red are still in effect.

    + +

    Timeouts

    + "; + foreach($timeouts as $engine => $expiry) { + $class = ($expiry > time()) ? "red" : "green"; + echo "
  • ".$engine.": ".the_date('M d, Y H:i:s', $expiry).""; + } + echo "
      "; + } else { + echo "No timeouts have been set"; + } + ?> +

      Back to Goosle

      +
  • + + + + + diff --git a/functions/tools-magnet.php b/functions/tools-magnet.php index 25f9d9c..dd930eb 100644 --- a/functions/tools-magnet.php +++ b/functions/tools-magnet.php @@ -6,7 +6,7 @@ * Copyright 2023-2024 Arnan de Gans. All Rights Reserved. * * COPYRIGHT NOTICES AND ALL THE COMMENTS SHOULD REMAIN INTACT. -* By using this code you agree to indemnify Arnan de Gans from any +* By using this code you agree to indemnify Arnan de Gans from any * liability that might arise from its use. ------------------------------------------------------------------------------------ */ @@ -17,6 +17,7 @@ function highlight_popup($opts_hash, $highlight) { $meta = $magnet_meta = array(); $search_query = urlencode($highlight['title']." ".$highlight['year']); + $thumb = (!empty($highlight['thumbnail'])) ? $highlight['thumbnail'] : $opts-pixel; if(isset($highlight['category'])) $meta[] = "Genre: ".$highlight['category']; if(isset($highlight['language'])) $meta[] = "Language: ".get_language($highlight['language']); @@ -28,9 +29,10 @@ function highlight_popup($opts_hash, $highlight) { $output .= "
    "; $output .= "

    ".$highlight['title']."

    "; if(isset($highlight['summary'])) { - $output .= "

    ".$highlight['summary']."

    "; + $output .= "

    \"".$highlight['title']."\"".$highlight['summary']."

    "; + $output .= "
    "; } - $output .= "

    Search on GoosleFind more Magnet links

    "; + $output .= "

    View on imdb.comSearch on GoosleFind more Magnet links

    "; if(!empty($meta)) { $output .= "

    ".implode('
    ', $meta)."

    "; } @@ -55,7 +57,7 @@ function highlight_popup($opts_hash, $highlight) { $output .= "
    "; unset($highlight, $magnet, $magnet_meta); - + return $output; } @@ -67,42 +69,42 @@ function detect_nsfw($string) { $string = strtolower($string); // Forbidden terms - //Basic pattern: ^cum[-_\s]?play(ing|ed|s)? + // Basic pattern: ^cum[-_\s]?play(ing|ed|s)? $nsfw_keywords = array( - '/(deepthroat|gangbang|cowgirl|dildo|fuck|cuckold|anal|hump|finger|pegg|fist|ballbust|twerk|dogg|squirt|dick|orgasm)(ing|ed|s)?/', - '/(yaoi|porn|gonzo|erotica|blowbang|bukkake|gokkun|softcore|hardcore|latex|lingerie|interracial|bdsm|chastity|kinky|bondage|shibari|hitachi|upskirt)/', - '/(cock|creampie|cameltoe|enema|nipple|sybian|vibrator|cougar|threesome|foursome|pornstar|escort)(s)?/', - '/(cmnf|cfnm|pov|cbt|bbw|pawg|ssbbw|joi|cei)/', - '/(blow|rim|foot|hand)job(s)?/', - '/(org|puss)(y|ies)\s?/', - '/hentai(ed)?/', - '/jerk(ing)?[-_\s]?off/', + '/(deepthroat|gangbang|cowgirl|dildo|fuck|cuckold|anal|hump|finger|pegg|fist|ballbust|twerk|dogg|squirt|dick|orgasm)(ing|ed|s)?/', + '/(yaoi|porn|gonzo|erotica|blowbang|bukkake|gokkun|softcore|hardcore|latex|lingerie|interracial|bdsm|chastity|kinky|bondage|shibari|hitachi|upskirt)/', + '/(cock|creampie|cameltoe|enema|nipple|sybian|vibrator|cougar|threesome|foursome|pornstar|escort)(s)?/', + '/(cmnf|cfnm|pov|cbt|bbw|pawg|ssbbw|joi|cei)/', + '/(blow|rim|foot|hand)job(s)?/', + '/(org|puss)(y|ies)\s?/', + '/hentai(ed)?/', + '/jerk(ing)?[-_\s]?off/', '/tw(i|u)nk(s)?/', - '/cum(bot|ming|s)?/', + '/cum(bot|ming|s)?/', '/porn(hub)?|xhamster|youporn|faphouse|sexually(\s)?broken|adulttime|transfixed|tsseduction|waterbondage|fuckingmachines|monstersofcock|deeplush|hotandmean|onlyfans|fansly|manyvids|transangels|premiumhdv|genderx|evil(\s)?angel|thetrainingofo|rocco(\s)?siffredi|electrosluts|ultimatesurrender|whippedass|insex|herlimit|analdays|bangbus|faketaxi|horrorporn|neighboraffair|naughtybookworms|sexandsubmission|housewife1on1|devicebondage|tspussyhunters|everythingbutt|theupperfloor|public(\s)?disgrace|fuckedandbound|alterotic|divinebitches|wiredpussy/', - '/(m|g)ilf(s)?/', - '/clit(oris|s)?/', - '/tit(ties|s)/', - '/strap[-_\s]?on(ed|s)?/', - '/webcam(ming|s)?/', - '/doggy(style)?/', - '/(masturbat|penetrat)(e|ion|ing|ed)/', - '/face(fuck|sit)?(ing|ting|ed|s)?/', - '/(gap|scissor)(e|ing|ed)?/', - '/(fetish|penis|ass)(es)?/', - '/(fem|lez|male)dom/', - '/futa(nari)?/', - '/(slave|pet)[-_\s]?play(ing|ed|s)?/', - '/submissive(d|s)?/', - '/tied[-_\s]?(up)?/', - '/glory[-_\s]?hole(d|s)?/', - '/swing(er|ers|ing)?/', + '/(m|g)ilf(s)?/', + '/clit(oris|s)?/', + '/tit(ties|s)/', + '/strap[-_\s]?on(ed|s)?/', + '/webcam(ming|s)?/', + '/doggy(style)?/', + '/(masturbat|penetrat)(e|ion|ing|ed)/', + '/face(fuck|sit)?(ing|ting|ed|s)?/', + '/(gap|scissor)(e|ing|ed)?/', + '/(fetish|penis|ass)(es)?/', + '/(fem|lez|male)dom/', + '/futa(nari)?/', + '/(slave|pet)[-_\s]?play(ing|ed|s)?/', + '/submissive(d|s)?/', + '/tied[-_\s]?(up)?/', + '/glory[-_\s]?hole(d|s)?/', + '/swing(er|ers|ing)?/', ); // Replace everything but alphanumeric with a space $string = preg_replace('/\s{2,}|[^a-z0-9]+/', ' ', $string); - preg_replace($nsfw_keywords, '*', $string, -1 , $count); + preg_replace($nsfw_keywords, '*', $string, -1 , $count); return ($count > 0) ? true : false; } @@ -123,7 +125,7 @@ function find_video_quality($string) { if($match == '5k') $match = '2880p (5K)'; if($match == '8k') $match = '4320p (8K)'; } - + return $match; } @@ -155,23 +157,23 @@ function find_video_codec($string) { $return[] = $codec; } - + // Maybe a bitrate? - $bitrate = (preg_match('/\b(8|10|12)-?bit\b/i', $string, $bitrate)) ? $bitrate[0] : null; + $bitrate = (preg_match('/\b(8|10|12)-?bit\b/i', $string, $bitrate)) ? $bitrate[0] : null; if(!is_null($bitrate)) { $return[] = trim(strtolower($bitrate)); } // Maybe HDR? - $hdr = (preg_match('/\bhdr|uhd|imax\b/i', $string, $hdr)) ? $hdr[0] : null; + $hdr = (preg_match('/\bhdr|uhd|imax\b/i', $string, $hdr)) ? $hdr[0] : null; if(!is_null($hdr)) { $return[] = trim(strtoupper($hdr)); } if(count($return) > 0) return implode(' ', $return); - + return null; } @@ -198,7 +200,7 @@ function find_audio_codec($string) { if($codec == 'TRUEHD') $codec = 'TrueHD'; $return[] = $codec; - } + } // Try to add channels $channels = (preg_match('/(2|5|7|9)[ \.](0|1|2)\b/i', $string, $channels)) ? $channels[0] : null; @@ -216,14 +218,14 @@ function find_audio_codec($string) { } // Maybe sub-codec? - $codec2 = (preg_match('/\batmos\b/i', $string, $codec2)) ? $codec2[0] : null; + $codec2 = (preg_match('/\batmos\b/i', $string, $codec2)) ? $codec2[0] : null; if(!is_null($codec2)) { $return[] = ucfirst(trim(strtolower($codec2))); } if(count($return) > 0) return implode(' ', $return); - + return null; } @@ -232,12 +234,12 @@ function find_audio_codec($string) { --------------------------------------*/ function movie_star_rating($rating) { $rating = round($rating); - + $star_rating = ''; for($i = 1; $i <= 10; $i++) { $star_rating .= ($i <= $rating) ? "" : ""; } - + return $star_rating; } @@ -258,8 +260,8 @@ function movie_mpa_rating($rating) { $rating = "NC-17 - Adults OnlyNot suitable for persons under 17."; } else { $rating = "".$rating.""; - } - + } + return $rating; } @@ -268,7 +270,7 @@ function movie_mpa_rating($rating) { --------------------------------------*/ function get_language($string) { $languages = array("ab" => "Abkhaz", "aa" => "Afar", "af" => "Afrikaans", "ak" => "Akan", "sq" => "Albanian", "am" => "Amharic", "ar" => "Arabic", "an" => "Aragonese", "hy" => "Armenian", "as" => "Assamese", "av" => "Avaric", "ae" => "Avestan", "ay" => "Aymara", "az" => "Azerbaijani", "bm" => "Bambara", "ba" => "Bashkir", "eu" => "Basque", "be" => "Belarusian", "bn" => "Bengali", "bh" => "Bihari", "bi" => "Bislama", "bs" => "Bosnian", "br" => "Breton", "bg" => "Bulgarian", "my" => "Burmese", "ca" => "Catalan", "ch" => "Chamorro", "ce" => "Chechen", "ny" => "Nyanja", "zh" => "Chinese", "cn" => "Chinese", "cv" => "Chuvash", "kw" => "Cornish", "co" => "Corsican", "cr" => "Cree", "hr" => "Croatian", "cs" => "Czech", "da" => "Danish", "dv" => "Maldivian;", "nl" => "Dutch", "en" => "English", "eo" => "Esperanto", "et" => "Estonian", "ee" => "Ewe", "fo" => "Faroese", "fj" => "Fijian", "fi" => "Finnish", "fr" => "French", "ff" => "Fulah", "gl" => "Galician", "ka" => "Georgian", "de" => "German", "el" => "Greek, Modern", "gn" => "Guaraní", "gu" => "Gujarati", "ht" => "Haitian Creole", "ha" => "Hausa", "he" => "Hebrew (modern)", "hz" => "Herero", "hi" => "Hindi", "ho" => "Hiri Motu", "hu" => "Hungarian", "ia" => "Interlingua", "id" => "Indonesian", "ie" => "Interlingue", "ga" => "Irish", "ig" => "Igbo", "ik" => "Inupiaq", "io" => "Ido", "is" => "Icelandic", "it" => "Italian", "iu" => "Inuktitut", "ja" => "Japanese", "jv" => "Javanese", "kl" => "Kalaallisut", "kn" => "Kannada", "kr" => "Kanuri", "ks" => "Kashmiri", "kk" => "Kazakh", "km" => "Khmer", "ki" => "Kikuyu", "rw" => "Kinyarwanda", "ky" => "Kirghiz, Kyrgyz", "kv" => "Komi", "kg" => "Kongo", "ko" => "Korean", "ku" => "Kurdish", "kj" => "Kwanyama", "la" => "Latin", "lb" => "Luxembourgish", "lg" => "Luganda", "li" => "Limburgish, Limburgan, Limburger", "ln" => "Lingala", "lo" => "Lao", "lt" => "Lithuanian", "lu" => "Luba-Katanga", "lv" => "Latvian", "gv" => "Manx", "mk" => "Macedonian", "mg" => "Malagasy", "ms" => "Malay", "ml" => "Malayalam", "mt" => "Maltese", "mi" => "Māori", "mr" => "Marathi", "mh" => "Marshallese", "mn" => "Mongolian", "na" => "Nauru", "nv" => "Navajo, Navaho", "nb" => "Norwegian Bokmål", "nd" => "North Ndebele", "ne" => "Nepali", "ng" => "Ndonga", "nn" => "Norwegian Nynorsk", "no" => "Norwegian", "ii" => "Nuosu", "nr" => "South Ndebele", "oc" => "Occitan", "oj" => "Ojibwe, Ojibwa", "cu" => "Old Slavonic", "om" => "Oromo", "or" => "Oriya", "os" => "Ossetian", "pa" => "Punjabi", "pi" => "Pāli", "fa" => "Persian", "pl" => "Polish", "ps" => "Pashto, Pushto", "pt" => "Portuguese", "qu" => "Quechua", "rm" => "Romansh", "rn" => "Kirundi", "ro" => "Romanian", "ru" => "Russian", "sa" => "Sanskrit", "sc" => "Sardinian", "sd" => "Sindhi", "se" => "Northern Sami", "sm" => "Samoan", "sg" => "Sango", "sr" => "Serbian", "gd" => "Gaelic", "sn" => "Shona", "si" => "Sinhala", "sk" => "Slovak", "sl" => "Slovene", "so" => "Somali", "st" => "Southern Sotho", "es" => "Spanish", "su" => "Sundanese", "sw" => "Swahili", "ss" => "Swati", "sv" => "Swedish", "ta" => "Tamil", "te" => "Telugu", "tg" => "Tajik", "th" => "Thai", "ti" => "Tigrinya", "bo" => "Tibetan Standard, Tibetan, Central", "tk" => "Turkmen", "tl" => "Tagalog", "tn" => "Tswana", "to" => "Tonga", "tr" => "Turkish", "ts" => "Tsonga", "tt" => "Tatar", "tw" => "Twi", "ty" => "Tahitian", "ug" => "Uighur, Uyghur", "uk" => "Ukrainian", "ur" => "Urdu", "uz" => "Uzbek", "ve" => "Venda", "vi" => "Vietnamese", "vo" => "Volapük", "wa" => "Walloon", "cy" => "Welsh", "wo" => "Wolof", "fy" => "Western Frisian", "xh" => "Xhosa", "yi" => "Yiddish", "yo" => "Yoruba", "za" => "Zhuang, Chuang"); - + return $languages[$string]; } @@ -286,4 +288,4 @@ function is_season_or_episode($search_query, $result_query) { return true; } -?> \ No newline at end of file +?> diff --git a/functions/tools.php b/functions/tools.php index d02e278..2273492 100644 --- a/functions/tools.php +++ b/functions/tools.php @@ -11,7 +11,7 @@ ------------------------------------------------------------------------------------ */ // Current Goosle version -$current_version = '1.7'; +$current_version = '1.7.1'; /*-------------------------------------- // Verify the hash, or not, and let people in, or not @@ -41,6 +41,15 @@ function load_opts() { $opts->user_auth = (isset($_REQUEST['a'])) ? sanitize($_REQUEST['a']) : ''; $opts->pixel = 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='; + // Set up engine timeouts + $timeout_file = ABSPATH.'cache/timeout.data'; + + if(is_file($timeout_file)) { + $opts->timeouts = unserialize(file_get_contents($timeout_file)); + } else { + $opts->timeouts = array(); + } + // Force a few defaults and safeguards if($opts->cache_type == 'file' && !is_dir(ABSPATH.'cache/')) $opts->cache_type = 'off'; if($opts->cache_type == 'apcu' && !function_exists('apcu_exists')) $opts->cache_type = 'off'; @@ -90,56 +99,56 @@ function load_search() { $search->page = (isset($_REQUEST['p'])) ? sanitize($_REQUEST['p']) : 1; // Remove ! at the start of queries to prevent DDG Bangs (!g, !c and crap like that) - if(substr($search->query, 0, 1) == '!') $search->query = substr($search->query, 1); + if(substr($search->query, 0, 1) === '!') $search->query = substr($search->query, 1); + // Preserve quotes $search->query = str_replace('%22', '\"', $search->query); + $search->query = str_replace('%27', '\'', $search->query); // Special searches and filters - $search->query_terms = explode(' ', strtolower($search->query)); // Break up query - $search->count_terms = count($search->query_terms); // How many keywords? + $search->query_terms = make_terms_array_from_string($search->query); // Break up query + $search->count_terms = count($search->query_terms); // How many terms? // Safe search override // 0 = off, 1 = normal (default), 2 = on/strict $search->safe = 1; - if($search->query_terms[0] == 'safe:on') { - $search->safe = 2; - $search->query = trim(str_replace($search->query_terms[0], '', $search->query)); - } + if($search->count_terms > 1) { + if(in_array('safe:on', $search->query_terms)) { + $search->safe = 2; + $search->query = trim(str_ireplace('safe:on', '', $search->query)); + } - if($search->query_terms[0] == 'safe:off' || $search->query_terms[0] == 'nsfw') { - $search->safe = 0; - $search->query = trim(str_replace($search->query_terms[0], '', $search->query)); + if(in_array('safe:off', $search->query_terms) || in_array('nsfw', $search->query_terms)) { + $search->safe = 0; + $search->query = trim(str_ireplace(array('safe:off', 'nsfw'), '', $search->query)); + } } // Size search override (For image search only) - // 0 = all, 1 = small, 2 = medium, 3 = large, 4 extra large + // 0 = all (default), 1 = small, 2 = medium, 3 = large, 4 extra large $search->size = 0; - if($search->type == 1) { - if($search->query_terms[0] == 'size:small') { + if($search->type == 1 && $search->count_terms > 1) { + if(in_array('size:small', $search->query_terms)) { $search->size = 1; - $search->query = trim(str_replace($search->query_terms[0], '', $search->query)); + $search->query = trim(str_ireplace('size:small', '', $search->query)); } - if($search->query_terms[0] == 'size:medium') { + if(in_array('size:medium', $search->query_terms)) { $search->size = 2; - $search->query = trim(str_replace($search->query_terms[0], '', $search->query)); + $search->query = trim(str_ireplace('size:medium', '', $search->query)); } - if($search->query_terms[0] == 'size:large') { + if(in_array('size:large', $search->query_terms)) { $search->size = 3; - $search->query = trim(str_replace($search->query_terms[0], '', $search->query)); + $search->query = trim(str_ireplace('size:large', '', $search->query)); } - if($search->query_terms[0] == 'size:xlarge') { + if(in_array('size:xlarge', $search->query_terms)) { $search->size = 4; - $search->query = trim(str_replace($search->query_terms[0], '', $search->query)); + $search->query = trim(str_ireplace('size:xlarge', '', $search->query)); } } - // Create a 'human-readable' and Urlencoded query - $search->nice_query = $search->query; - $search->query = urlencode($search->query); - // Maybe count stats? if(!empty($search->query)) count_stats(); @@ -234,6 +243,54 @@ function do_curl_request($url, $headers, $method, $post_fields) { return $response; } +/*-------------------------------------- +// Set a timeout if an engine is being mean to us +--------------------------------------*/ +function set_timeout($engine, $http_code) { + $timeout_file = ABSPATH.'cache/timeout.data'; + + if(is_file($timeout_file)) { + $timeouts = unserialize(file_get_contents($timeout_file)); + } else { + $timeouts = array(); + } + + if($http_code == 401 || $http_code == 403) { + // Unauthorized / banned + $timeout = 21600; // 6 hours + } else if($http_code == 410) { + // Resource no longer available + $timeout = 3600; // 1 hour + } else if($http_code == 429) { + // Too many requests + $timeout = 1800; // 30 minutes + } else if($http_code >= 500 || $http_code < 600) { + // Some kind of server error + $timeout = 43200; // 12 hours + } else { + // Unspecified error/status + $timeout = 900; // 15 minutes + } + + $timeouts[$engine] = time() + $timeout; + + file_put_contents($timeout_file, serialize($timeouts)); +} + +/*-------------------------------------- +// Engine has a timeout? +--------------------------------------*/ +function has_timeout($engine) { + global $opts; + + if(isset($opts->timeouts)) { + if(isset($opts->timeouts[$engine])) { + if($opts->timeouts[$engine] > time()) return true; + } + } + + return false; +} /*-------------------------------------- // Load pages into a DOM @@ -396,15 +453,26 @@ function strip_newlines($string) { function limit_string_length($string, $length = 200, $append = '…') { $string = trim($string); - if(str_word_count($string, 0) > $length) { - $words = str_word_count($string, 2); - $pos = array_keys($words); - $string = substr($string, 0, $pos[$length]) . $append; + if(strlen($string) > $length) { + preg_match('/(.{' . $length . '}.*?)\b/', $string, $matches); + $string = rtrim($matches[1]) . $append; } return $string; } +function make_terms_array_from_string($string) { + if(empty($string)) return array(); + + $string = strtolower($string); + + // Replace anything but alphanumeric with a space + $string = preg_replace('/\s{2,}|[^a-z0-9]+/', ' ', $string); + $keywords = array_filter(array_unique(explode(' ', $string))); + + return $keywords; +} + /*-------------------------------------- // Count matching keywords between result and search query --------------------------------------*/ @@ -412,39 +480,16 @@ function match_count($result_terms, $query_terms, $multiplier = 1) { if(empty($result_terms)) return 0; if(!is_array($result_terms)) { - $result_terms = make_tags_from_string($result_terms); + $result_terms = make_terms_array_from_string($result_terms); } + // Get matching keywords and apply multiplier $matches = array_intersect($result_terms, $query_terms); $matches = count($matches) * $multiplier; return $matches; } -/*-------------------------------------- -// Turn a string (title or something) into an array of words (tags) ---------------------------------------*/ -function make_tags_from_string($string) { - if(empty($string)) return array(); - - $string = strtolower($string); - - // Replace anything but alphanumeric with a space - $string = preg_replace('/\s{2,}|[^a-z0-9]+/', ' ', $string); - $keywords = array_filter(array_unique(explode(' ', $string))); - - // Get rid of short words and letters - foreach($keywords as $k => $word) { - if(strlen($word) < 3) unset($keywords[$k]); - } - - // Get rid of filler words (English) - $filler_words = array('and', 'ago', 'but', 'for', 'get', 'gets', 'have', 'haves', 'has', 'into', 'nor', 'off', 'onto', 'the', 'with', 'yet'); - $keywords = array_diff($keywords, $filler_words); - - return $keywords; -} - /*-------------------------------------- // Output and format dates in local time --------------------------------------*/ @@ -497,6 +542,32 @@ function detect_social_media($string) { return ($count > 0) ? true : false; } +/*-------------------------------------- +// Search suggestions +--------------------------------------*/ +function search_suggestion($search_type, $hash, $suggestions) { + // Remove duplicate suggestions + $suggestions = array_unique($suggestions); + + if(count($suggestions) > 1) { + // List multiple suggestions and format them as usable links + foreach($suggestions as $key => $suggestion) { + $suggestions[$key] = "".$suggestion.""; + + unset($key, $suggestion); + } + + $result = "Did you mean ".implode(' or ', $suggestions)."?"; + } else { + // Format the one suggestion + $result = "Did you mean ".$suggestions[0]."?"; + } + + unset($suggestions); + + return $result; +} + /*-------------------------------------- // Count and format search sources --------------------------------------*/ diff --git a/help.php b/help.php index 883d641..564f790 100644 --- a/help.php +++ b/help.php @@ -50,7 +50,7 @@

    Goosle

    - " name="q" /> + " name="q" /> @@ -207,19 +207,12 @@ Goosle started as a fork of LibreY, and takes some design cues from DuckDuckGo.com.

    - - - -
    Redirecting
    - - + diff --git a/index.php b/index.php index 46a92aa..f42833c 100644 --- a/index.php +++ b/index.php @@ -45,12 +45,14 @@ hash_auth, $opts->hash, $opts->user_auth)) { ?> -

    Goosle

    - +
    + +
    +
    @@ -71,25 +73,18 @@ password_generator == "on") { ?>
    - Password Generator
    + Password Generator
    - - - -
    Redirecting
    - - + diff --git a/readme.md b/readme.md index cdec413..1c099a4 100644 --- a/readme.md +++ b/readme.md @@ -18,12 +18,12 @@ After-all, finding things should be easy and not turn into a chore. ## Features - Works on **any** hosting package that does PHP7.4 or newer -- Search results from DuckDuckGo, Google, Qwant, Brave, Wikipedia +- Search results from DuckDuckGo, Google, Qwant, Brave and Wikipedia - Image search through Yahoo! Images, Qwant, Pixabay and Openverse - Recent news via Qwant news, Yahoo! News, Brave and Hackernews - Search for magnet links on popular Torrent sites - Algorithm for ranking search results for relevancy -- Option to down-rank the biggest social media sites such as facebook, instagram, twitter, tiktok, reddit, snapchat and a few others. +- Option to down-rank the biggest social media sites such as facebook, instagram, twitter, tiktok, reddit, snapchat and a few others - Special searches for; Currency conversion, Dictionary, IP Lookup and php.net - Randomized user-agents for to prevent profiling by search providers - Non-personalized Google results without instant results or other non-sense @@ -72,7 +72,15 @@ Developed on Apache with PHP8.2. 4. Load Goosle in your browser. If you've enabled the access hash don't forget to add *?a=YOUR_HASH* to the url. 5. Enjoy your updated search experience! -Take a look at the [changelog](changelog.md) for every update here. \ +Take a look at the [changelog](changelog.md) for every update here. + +## Installation and setup notes +- When using file caching you should set up a cronjob to execute goosle-cron.php every few hours. This deletes cached results. +- When you use Openverse for your image searches you should set up a cron job to execute goosle-cron.php every 11 hours or less. This will automagically renew the access token. +- If you want update notifications in the footer of Goosle set up the cron job so Goosle can ping Github weekly to see what's new. +- The .htaccess file has a redirect to force HTTPS, catch 404 errors with a redirect as well as browser caching rules ready to go. +- The robots.txt has a rule to tell all crawlers to not crawl Goosle. But keep in mind that not every crawler obeys this file. +- The access hash is NOT meant as a super secure measure and only works for surface level prying eyes. ## Setting up a Cronjob / background task For a number of background tasks like clearing up the file cache and/or renewing your Openverse access token you need to set up a cronjob. \ @@ -98,16 +106,17 @@ Example for 5 minutes past every 8 hours (I use this on my Goosle) \ Example for every midnight \ `0 0 * * * wget -qO - https://example.com/goosle-cron.php?a=YOUR_HASH` -Why a few minutes past the hour? Because most people run stuff exactly on the hour or some other predictable interval like 15 or 30 minutes. Running things a few minutes later spreads server load. +Why a few minutes past the hour? Because most people run stuff exactly on the hour or some other predictable interval like 15 or 30 minutes. Running things a few minutes offset helps spread server load. ## Authorizing access to the Openverse search API -OpenVerse image search provides (mostly) royalty free images. \ +Openverse image search provides (mostly) royalty free images. \ Millions of high quality photos from photographers from all over the world. \ -If you're into high quality photo backgrounds, need images for blogs and articles or just like to look at high-res anything, then Openverse is a useful engine to use. +If you're into high quality photo backgrounds, need images for blogs and articles or just like to look at high-res anything, then Openverse is a useful engine to use. \ +Check out Openverse here: [https://www.openverse.com](https://www.openverse.com) -To use Openverse Image Search you'll need to register Goosle for an oAUTH access token. +To use Openverse Image Search you'll need to register Goosle for an oAUTH access token. \ +Goosle includes a oAuth routine to easily register for an access token. -Goosle includes a oAuth routine to easily register for an access token. \ - In your browser navigate to your goosle setup and add /functions/oauth-openverse.php to the url (ex. example.com/functions/oauth-openverse.php or example.com/functions/oauth-openverse.php?a=YOUR_HASH). - Follow the onscreen prompts to get an authorization token to use Openverse. - When prompted save the Client ID and Client Secret somewhere on your computer, in a note or something. Should the token file that Goosle creates get lost you'll need these strings to continue using Openverse. @@ -130,15 +139,8 @@ Once registered and logged in, you can find your API key in the Documentation he ## Support You can post your questions on Github Discussions or say hi on [Mastodon](https://mas.to/@arnan) or through my [website](https://www.arnan.me). -## Notes -- When using file caching you should set up a cronjob to execute goosle-cron.php every few hours. This deletes 'old' results. -- When you use Openverse for your image searches you should set up a cron job to execute goosle-cron.php every 11 hours or so. This will automagically renew the access token. -- If you want update notifications in the footer of Goosle set up the cron job so Goosle can ping Github weekly to see what's new. -- The .htaccess file has a redirect to force HTTPS, catch 404 errors with a redirect as well as browser caching rules ready to go. -- The robots.txt has a rule to tell all crawlers to not crawl Goosle. But keep in mind that not every crawler obeys this file. -- The access hash is NOT meant as a super secure measure and only works for surface level prying eyes. -- Results provided by Openverse and Pixabay are simplistic keyword matches which are not necessarily accurately sorted by relevancy. - ## Known "issues" -- Duckduckgo sometimes returns a 202 header and no results. I'm not sure what causes that but suspect it's something to do with quotas or a service limitation on their end. +- Duckduckgo sometimes returns a '202' header and no results. I'm not sure what causes that but suspect it's something to do with quotas or a service limitation on their end. +- YTS api does not reliably provide complete movie information for new additions, mostly missing movie summaries. +- Mojeek is very picky on who they respond to. Goosle can get randomly banned for days because of it. - Some crawlers for Magnet searches may return empty results. These are likely quota limits on their end. diff --git a/results.php b/results.php index ddf1023..d9bdce9 100644 --- a/results.php +++ b/results.php @@ -21,7 +21,7 @@ $start_time = microtime(true); // SEO description -$description = (strlen($search->nice_query) > 0) ? "Check out these Goosle search results about: '".urldecode($search->nice_query)."'." : "Check out these Goosle search results!"; +$description = (strlen($search->query) > 0) ? "Check out these Goosle search results about: '".urldecode($search->query)."'." : "Check out these Goosle search results!"; ?> @@ -56,7 +56,7 @@

    Goosle

    - " name="q" /> + " name="q" /> @@ -82,66 +82,59 @@
    + query)) { + // Curl + $mh = curl_multi_init(); + + // Load search script + if($search->type == 0) { + require ABSPATH.'engines/search.php'; + $search_results = new Search($search, $opts, $mh); + } else if($search->type == 1) { + require ABSPATH.'engines/search-image.php'; + $search_results = new ImageSearch($search, $opts, $mh); + } else if($search->type == 2) { + require ABSPATH.'engines/search-news.php'; + $search_results = new NewsSearch($search, $opts, $mh); + } else if($search->type == 9) { + require ABSPATH.'engines/search-magnet.php'; + $search_results = new MagnetSearch($search, $opts, $mh); + } + + $running = null; + + do { + $status = curl_multi_exec($mh, $running); + if($running) { + curl_multi_select($mh); + } + } while ($running && $status == CURLM_OK); + + $results = $search_results->get_results(); + + curl_multi_close($mh); + + // Add elapsed time to results + $results['time'] = number_format(microtime(true) - $start_time, 5, '.', ''); + + // Echoes results and special searches + $search_results->print_results($results, $search, $opts); + } else { + echo "
    "; + echo "

    Search query can not be empty!

    "; + echo "

    Not sure what went wrong? Learn more about user_auth."\" title=\"how to use Goosle!\">how to use Goosle.

    "; + echo "
    "; + } + ?> +
    + query)) { - // Curl - $mh = curl_multi_init(); - - // Load search script - if($search->type == 0) { - require ABSPATH.'engines/search.php'; - $search_results = new Search($search, $opts, $mh); - } else if($search->type == 1) { - require ABSPATH.'engines/search-image.php'; - $search_results = new ImageSearch($search, $opts, $mh); - } else if($search->type == 2) { - require ABSPATH.'engines/search-news.php'; - $search_results = new NewsSearch($search, $opts, $mh); - } else if($search->type == 9) { - require ABSPATH.'engines/search-magnet.php'; - $search_results = new MagnetSearch($search, $opts, $mh); - } - - $running = null; - - do { - $status = curl_multi_exec($mh, $running); - if($running) { - curl_multi_select($mh); - } - } while ($running && $status == CURLM_OK); - - $results = $search_results->get_results(); - - curl_multi_close($mh); - - // Add elapsed time to results - $results['time'] = number_format(microtime(true) - $start_time, 5, '.', ''); - - // Echoes results and special searches - $search_results->print_results($results, $search, $opts); + include_once('footer.php'); } else { - echo "
    "; - echo "

    Search query can not be empty!

    "; - echo "

    Not sure what went wrong? Learn more about user_auth."\" title=\"how to use Goosle!\">how to use Goosle.

    "; - echo "
    "; + include_once('error.php'); } ?> -
    - - - - -
    Redirecting
    - - diff --git a/stats.php b/stats.php index 88942f8..9644bc4 100644 --- a/stats.php +++ b/stats.php @@ -51,7 +51,7 @@

    Goosle

    - " name="q" /> + " name="q" /> @@ -83,19 +83,12 @@

    - - - -
    Redirecting
    - - +