From 5ff2b87c2eca1defceb73dac0989c212c776cbfe Mon Sep 17 00:00:00 2001 From: vdbhb59 Date: Fri, 18 Apr 2025 16:33:34 +0530 Subject: [PATCH] Synced fixed bing images failing to load, added flickr(https://git.lolcat.ca/lolcat/4get/commit/ff8b1addf7059a8289049e937475c289e1ba407d) fixed bing images failing to load, added flickr (https://git.lolcat.ca/lolcat/4get/commit/ff8b1addf7059a8289049e937475c289e1ba407d) // (https://git.lolcat.ca/lolcat/4get/commit/077692db4904df65302be05da64a0cbcddf18fbb // (https://git.lolcat.ca/lolcat/4get/raw/commit/ff8b1addf7059a8289049e937475c289e1ba407d/scraper/qwant.php // (https://git.lolcat.ca/lolcat/4get/commit/566680fe3603f8fc12161612b5857ec024ec63e3) // (https://git.lolcat.ca/lolcat/4get/commit/8d07e72dfe3964f4352e70dcc680b301acc28569) // (https://git.lolcat.ca/lolcat/4get/commit/4b85841a3e3485874e1b58dd9c839528e1293782) --- lib/frontend.php | 7 +- proxy.php | 58 +++++-- scraper/ddg.php | 29 +++- scraper/flickr.php | 415 +++++++++++++++++++++++++++++++++++++++++++++ scraper/qwant.php | 84 +++++++-- settings.php | 4 + 6 files changed, 561 insertions(+), 36 deletions(-) create mode 100644 scraper/flickr.php diff --git a/lib/frontend.php b/lib/frontend.php index 6b439e2..4156a76 100644 --- a/lib/frontend.php +++ b/lib/frontend.php @@ -939,6 +939,7 @@ class frontend{ "brave" => "Brave", "yandex" => "Yandex", "google" => "Google", + //"google_api" => "Google API", "google_cse" => "Google CSE", "startpage" => "Startpage", "qwant" => "Qwant", @@ -970,10 +971,12 @@ class frontend{ "yep" => "Yep", "solofield" => "Solofield", "pinterest" => "Pinterest", + "flickr" => "Flickr", "fivehpx" => "500px", "vsco" => "VSCO", "imgur" => "Imgur", - "ftm" => "FindThatMeme" + "ftm" => "FindThatMeme", + //"sankakucomplex" => "SankakuComplex" ] ]; break; @@ -1339,4 +1342,4 @@ class frontend{ return $page . "?" . $query . "&npt=" . $npt; } -} +} \ No newline at end of file diff --git a/proxy.php b/proxy.php index e4e8541..c782a24 100644 --- a/proxy.php +++ b/proxy.php @@ -34,36 +34,60 @@ try{ ) ){ - if( - !isset($image["query"]) || - !isset($image["path"]) || - $image["path"] != "/th" - ){ + if(!isset($image["path"])){ - header("X-Error: Invalid bing image path"); + header("X-Error: Missing bing image path"); $proxy->do404(); die(); } - parse_str($image["query"], $str); - - if(!isset($str["id"])){ + // + // get image ID + // formations: + // https://tse2.mm.bing.net/th/id/OIP.3yLBkUPn8EXA1wlhWP2BHwHaE3 + // https://tse2.mm.bing.net/th?id=OIP.3yLBkUPn8EXA1wlhWP2BHwHaE3 + // + $id = null; + if(isset($image["query"])){ - header("X-Error: Missing bing ID"); + parse_str($image["query"], $str); + + if(isset($str["id"])){ + + $id = $str["id"]; + } + } + + if($id === null){ + + $id = explode("/th/id/", $image["path"], 2); + + if(count($id) !== 2){ + + // malformed + return $url; + } + + $id = $id[1]; + } + + if(is_array($id)){ + + header("X-Error: Missing bing id parameter"); $proxy->do404(); die(); } switch($_GET["s"]){ - case "portrait": $req = "&w=50&h=90&p=0&qlt=90"; break; - case "landscape": $req = "&w=160&h=90&p=0&qlt=90"; break; - case "square": $req = "&w=90&h=90&p=0&qlt=90"; break; - case "thumb": $req = "&w=236&h=180&p=0&qlt=90"; break; - case "cover": $req = "&w=207&h=270&p=0&qlt=90"; break; + case "portrait": $req = "?w=50&h=90&p=0&qlt=90"; break; + case "landscape": $req = "?w=160&h=90&p=0&qlt=90"; break; + case "square": $req = "?w=90&h=90&p=0&qlt=90"; break; + case "thumb": $req = "?w=236&h=180&p=0&qlt=90"; break; + case "cover": $req = "?w=207&h=270&p=0&qlt=90"; break; } - $proxy->stream_linear_image("https://" . $image["host"] . "/th?id=" . urlencode($str["id"]) . $req, "https://www.bing.com"); + $proxy->stream_linear_image("https://" . $image["host"] . "/th/id/" . urlencode($id) . $req, "https://www.bing.com"); die(); } @@ -153,4 +177,4 @@ try{ header("X-Error: " . $error->getMessage()); $proxy->do404(); die(); -} +} \ No newline at end of file diff --git a/scraper/ddg.php b/scraper/ddg.php index bae02e5..6231889 100644 --- a/scraper/ddg.php +++ b/scraper/ddg.php @@ -1943,10 +1943,33 @@ class ddg{ private function bingimg($url){ - $parse = parse_url($url); - parse_str($parse["query"], $parts); + $image = parse_url($url); - return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]); + $id = null; + if(isset($image["query"])){ + + parse_str($image["query"], $str); + + if(isset($str["id"])){ + + $id = $str["id"]; + } + } + + if($id === null){ + + $id = explode("/th/id/", $image["path"], 2); + + if(count($id) !== 2){ + + // malformed + return $url; + } + + $id = $id[1]; + } + + return "https://" . $image["host"] . "/th/id/" . $id; } private function bingratio($width, $height){ diff --git a/scraper/flickr.php b/scraper/flickr.php new file mode 100644 index 0000000..b13c050 --- /dev/null +++ b/scraper/flickr.php @@ -0,0 +1,415 @@ +backend = new backend("flickr"); + + include "lib/fuckhtml.php"; + $this->fuckhtml = new fuckhtml(); + } + + public function getfilters($page){ + + return [ + "nsfw" => [ + "display" => "NSFW", + "option" => [ + "yes" => "Yes", + "maybe" => "Maybe", + "no" => "No", + ] + ], + "sort" => [ + "display" => "Sort by", + "option" => [ + "relevance" => "Relevance", + "date-posted-desc" => "Newest uploads", + "date-posted-asc" => "Oldest uploads", + "date-taken-desc" => "Newest taken", + "date-taken-asc" => "Oldest taken", + "interestingness-desc" => "Interesting" + ] + ], + "color" => [ + "display" => "Color", + "option" => [ + "any" => "Any color", + // color_codes= + "0" => "Red", + "1" => "Brown", + "2" => "Orange", + "b" => "Pink", + "4" => "Yellow", + "3" => "Golden", + "5" => "Lime", + "6" => "Green", + "7" => "Sky blue", + "8" => "Blue", + "9" => "Purple", + "a" => "Hot pink", + "c" => "White", + "d" => "Gray", + "e" => "Black", + // styles= override + "blackandwhite" => "Black & white", + ] + ], + "style" => [ // styles= + "display" => "Style", + "option" => [ + "any" => "Any style", + "depthoffield" => "Depth of field", + "minimalism" => "Minimalism", + "pattern" => "Patterns" + ] + ], + "license" => [ + "display" => "License", + "option" => [ + "any" => "Any license", + "1,2,3,4,5,6,9,11,12,13,14,15,16" => "All creative commons", + "4,5,6,9,10,11,12,13" => "Commercial use allowed", + "1,2,4,5,9,10,11,12,14,15" => "Modifications allowed", + "4,5,9,10,11,12" => "Commercial use & mods allowed", + "7,9,10" => "No known copyright restrictions", + "8" => "U.S Government works" + ] + ] + ]; + } + + private function get($proxy, $url, $get = [], $reqtype){ + + $curlproc = curl_init(); + + if($get !== []){ + $get = http_build_query($get); + $url .= "?" . $get; + } + + curl_setopt($curlproc, CURLOPT_URL, $url); + + curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding + + if($reqtype === flickr::req_web){ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "DNT: 1", + "Sec-GPC: 1", + "Connection: keep-alive", + "Upgrade-Insecure-Requests: 1", + "Sec-Fetch-Dest: document", + "Sec-Fetch-Mode: navigate", + "Sec-Fetch-Site: same-origin", + "Sec-Fetch-User: ?1", + "Priority: u=0, i", + "TE: trailers"] + ); + }else{ + + curl_setopt($curlproc, CURLOPT_HTTPHEADER, + ["User-Agent: " . config::USER_AGENT, + "Accept: */*", + "Accept-Language: en-US,en;q=0.5", + "Accept-Encoding: gzip", + "Origin: https://www.flickr.com", + "DNT: 1", + "Sec-GPC: 1", + "Connection: keep-alive", + "Referer: https://www.flickr.com/", + // Cookie: + "Sec-Fetch-Dest: empty", + "Sec-Fetch-Mode: cors", + "Sec-Fetch-Site: same-site", + "TE: trailers"] + ); + } + + curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2); + curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true); + curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($curlproc, CURLOPT_TIMEOUT, 30); + + // http2 bypass + curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); + + $this->backend->assign_proxy($curlproc, $proxy); + + $data = curl_exec($curlproc); + + if(curl_errno($curlproc)){ + + throw new Exception(curl_error($curlproc)); + } + + curl_close($curlproc); + return $data; + } + + public function image($get){ + + if($get["npt"]){ + + [$filters, $proxy] = + $this->backend->get( + $get["npt"], "images" + ); + + $filters = json_decode($filters, true); + + // Workaround for the future, if flickr deprecates &page argument on html page + /* + try{ + + $json = + $this->get( + $proxy, + "https://api.flickr.com/services/rest", + [ + "sort" => $data["sort"], + "parse_tags" => 1, + // url_s,url_n,url_w,url_m,url_z,url_c,url_l,url_h,url_k,url_3k,url_4k,url_5k,url_6k,url_o + "extras" => "can_comment,can_print,count_comments,count_faves,description,isfavorite,license,media,needs_interstitial,owner_name,path_alias,realname,rotation,url_sq,url_q,url_t,url_s,url_n,url_w,url_m,url_z,url_c,url_l", + "per_page" => 100, + "page" => $data["page"], + "lang" => "en-US", + "text" => $data["search"], + "viewerNSID" => "", + "method" => "flickr.photos.search", + "csrf" => "", + "api_key" => $data["api_key"], + "format" => "json", + "hermes" => 1, + "hermesClient" => 1, + "reqId" => $data["reqId"], + "nojsoncallback" => 1 + ] + ); + }catch(Exception $error){ + + throw new Exception("Failed to fetch JSON"); + }*/ + + }else{ + + if(strlen($get["s"]) === 0){ + + throw new Exception("Search term is empty!"); + } + + $proxy = $this->backend->get_ip(); + + // compute filters + $filters = [ + "page" => 1, + "sort" => $get["sort"] + ]; + + if($get["style"] != "any"){ + + $filters["styles"] = $get["style"]; + } + + if($get["color"] != "any"){ + + if($get["color"] != "blackandwhite"){ + + $filters["color_codes"] = $get["color"]; + }else{ + + $filters["styles"] = "blackandwhite"; + } + } + + if($get["license"] != "any"){ + + $filters["license"] = $get["license"]; + } + + switch($get["nsfw"]){ + + case "yes": $filters["safe_search"] = 0; break; + case "maybe": $filters["safe_search"] = 2; break; + case "no": $filters["safe_search"] = 1; break; + } + } + + $get_params = [ + "text" => $get["s"], + "per_page" => 50, + // scrape highest resolution + "extras" => "url_s,url_n,url_w,url_m,url_z,url_c,url_l,url_h,url_k,url_3k,url_4k,url_5k,url_6k,url_o", + "view_all" => 1 + ]; + + $get_params = array_merge($get_params, $filters); + + $html = + $this->get( + $proxy, + "https://www.flickr.com/search/", + $get_params, + flickr::req_web + ); + + // @TODO + // get api_key and reqId, if flickr deprecates &page + + $this->fuckhtml->load($html); + + // + // get response JSON + // + $scripts = + $this->fuckhtml + ->getElementsByClassName( + "modelExport", + "script" + ); + + $found = false; + foreach($scripts as $script){ + + $json = + preg_split( + '/modelExport: ?/', + $script["innerHTML"], + 2 + ); + + if(count($json) !== 0){ + + $found = true; + $json = $json[1]; + break; + } + } + + if($found === false){ + + throw new Exception("Failed to grep JSON"); + } + + $json = + json_decode( + $this->fuckhtml + ->extract_json( + $json + ), + true + ); + + if($json === null){ + + throw new Exception("Failed to decode JSON"); + } + + $out = [ + "status" => "ok", + "npt" => null, + "image" => [] + ]; + + if(!isset($json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["_data"])){ + + throw new Exception("Failed to access data object"); + } + + foreach($json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["_data"] as $image){ + + if(!isset($image["data"])){ + + // flickr likes to gives us empty array objects + continue; + } + + $image = $image["data"]; + + $title = []; + + if(isset($image["title"])){ + + $title[] = + $this->fuckhtml + ->getTextContent( + $image["title"] + ); + } + + if(isset($image["description"])){ + + $title[] = + $this->fuckhtml + ->getTextContent( + str_replace( + "\n", + " ", + $image["description"] + ) + ); + } + + $title = implode(": ", $title); + + $sources = array_values($image["sizes"]["data"]); + + $suitable_sizes = ["n", "m", "w", "s"]; + + $thumb = &$sources[0]["data"]; + foreach($suitable_sizes as $testing_size){ + + if(isset($image["sizes"]["data"][$testing_size])){ + + $thumb = &$image["sizes"]["data"][$testing_size]["data"]; + break; + } + } + + $og = &$sources[count($sources) - 1]["data"]; + + $out["image"][] = [ + "title" => $title, + "source" => [ + [ + "url" => "https:" . $og["displayUrl"], + "width" => (int)$og["width"], + "height" => (int)$og["height"] + ], + [ + "url" => "https:" . $thumb["displayUrl"], + "width" => (int)$thumb["width"], + "height" => (int)$thumb["height"] + ] + ], + "url" => "https://www.flickr.com/photos/" . $image["ownerNsid"] . "/" . $image["id"] . "/" + ]; + } + + $total_items = (int)$json["main"]["search-photos-lite-models"][0]["data"]["photos"]["data"]["totalItems"]; + + if(($filters["page"]) * 50 < $total_items){ + + $filters["page"]++; + + $out["npt"] = + $this->backend->store( + json_encode($filters), + "images", + $proxy + ); + } + + return $out; + } +} \ No newline at end of file diff --git a/scraper/qwant.php b/scraper/qwant.php index beaa57c..702ffd1 100644 --- a/scraper/qwant.php +++ b/scraper/qwant.php @@ -410,10 +410,7 @@ class qwant{ "thumb" => $answer["data"]["result"]["thumbnail"]["landscape"] == null ? null : - $this->unshitimage( - $answer["data"]["result"]["thumbnail"]["landscape"], - false - ), + $this->unshitimage($answer["data"]["result"]["thumbnail"]["landscape"]), "table" => [], "sublink" => [] ]; @@ -770,7 +767,7 @@ class qwant{ }else{ $thumb = [ - "url" => $this->unshitimage($video["thumbnail"], false), + "url" => $this->unshitimage($video["thumbnail"]), "ratio" => "16:9" ]; } @@ -870,7 +867,7 @@ class qwant{ }else{ $thumb = [ - "url" => $this->unshitimage($news["media"][0]["pict_big"]["url"], false), + "url" => $this->unshitimage($news["media"][0]["pict_big"]["url"]), "ratio" => "16:9" ]; } @@ -920,18 +917,77 @@ class qwant{ return trim($text, ". "); } - private function unshitimage($url, $is_bing = true){ + private function unshitimage($url){ // https://s1.qwant.com/thumbr/0x0/8/d/f6de4deb2c2b12f55d8bdcaae576f9f62fd58a05ec0feeac117b354d1bf5c2/th.jpg?u=https%3A%2F%2Fwww.bing.com%2Fth%3Fid%3DOIP.vvDWsagzxjoKKP_rOqhwrQAAAA%26w%3D160%26h%3D160%26c%3D7%26pid%3D5.1&q=0&b=1&p=0&a=0 - parse_str(parse_url($url)["query"], $parts); + // https://s2.qwant.com/thumbr/474x289/7/f/412d13b3fe3a03eb2b89633c8e88b609b7d0b93cdd9a5e52db3c663e41e65e/th.jpg?u=https%3A%2F%2Ftse.mm.bing.net%2Fth%3Fid%3DOIP.9Tm_Eo6m7V7ltN19mxduDgHaEh%26pid%3DApi&q=0&b=1&p=0&a=0 - if($is_bing){ - $parse = parse_url($parts["u"]); - parse_str($parse["query"], $parts); + $image = parse_url($url); + + if( + !isset($image["host"]) || + !isset($image["query"]) + ){ - return "https://" . $parse["host"] . "/th?id=" . urlencode($parts["id"]); + // cant do anything + return $url; } - return $parts["u"]; + $id = null; + + if( + preg_match( + '/s[0-9]+\.qwant\.com$/', + $image["host"] + ) + ){ + + parse_str($image["query"], $str); + + // we're being served a proxy URL + if(isset($str["u"])){ + + $bing_url = $str["u"]; + }else{ + + // give up + return $url; + } + } + + // parse bing URL + $id = null; + $image = parse_url($bing_url); + + if(isset($image["query"])){ + + parse_str($image["query"], $str); + + if(isset($str["id"])){ + + $id = $str["id"]; + } + } + + if($id === null){ + + $id = explode("/th/id/", $image["path"], 2); + + if(count($id) !== 2){ + + // malformed + return $url; + } + + $id = $id[1]; + } + + if(is_array($id)){ + + // fuck off, let proxy.php deal with it + return $url; + } + + return "https://" . $image["host"] . "/th/id/" . $id; } -} +} \ No newline at end of file diff --git a/settings.php b/settings.php index b150e10..f480f1d 100644 --- a/settings.php +++ b/settings.php @@ -231,6 +231,10 @@ $settings = [ "value" => "pinterest", "text" => "Pinterest" ], + [ + "value" => "flickr", + "text" => "Flickr" + ], [ "value" => "fivehpx", "text" => "500px"