mirror of
https://git.vern.cc/cobra/rural-dict.git
synced 2025-11-18 17:58:36 +05:30
87 lines
3.6 KiB
Python
87 lines
3.6 KiB
Python
#!/usr/bin/env python
|
|
## Copyright (C) 2023-2025 Skylar Astaroth <cobra@vern.cc>
|
|
## Copyright (C) 2024 Zubarev Grigoriy <thirtysix@thirtysix.pw>
|
|
## Copyright (C) 2024 Blair Noctis <ncts@debian.org>
|
|
##
|
|
## This file is part of Rural Dictionary (rural-dict)
|
|
##
|
|
## rural-dict is free software: you can redistribute it and/or modify it under
|
|
## the terms of the GNU Affero General Public License as published by the Free
|
|
## Software Foundation, either version 3 of the License, or (at your option) any
|
|
## later version.
|
|
##
|
|
## This program is distributed in the hope that it will be useful, but WITHOUT
|
|
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
## FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
|
|
## for more details.
|
|
##
|
|
## You should have received a copy of the GNU Affero General Public License
|
|
## along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
from flask import Flask, render_template, request, redirect
|
|
import requests
|
|
import html
|
|
import re
|
|
from bs4 import BeautifulSoup
|
|
from urllib.parse import quote, unquote
|
|
import os
|
|
|
|
def scrape(url):
|
|
data = requests.get(url)
|
|
|
|
our_path = re.sub(r".*://.*/", "/", request.url)
|
|
path = re.sub(r".*://.*/", "/", data.url)
|
|
if our_path != path and \
|
|
quote(unquote(re.sub("[?&=]", "", our_path))) != re.sub("[?&=]", "", path):
|
|
# this is bad ^
|
|
return f"REDIRECT {path}"
|
|
ret = []
|
|
soup = BeautifulSoup(data.text, "html.parser")
|
|
|
|
defs = [(div, div.get('data-defid')) for div in soup.find_all("div") if div.get('data-word')]
|
|
|
|
try:
|
|
votes_data = requests.get(
|
|
'https://www.urbandictionary.com/api/vote?defids=' + ','.join(defid for (_, defid) in defs) + '&signature=' + soup.body.get('data-vote-signature')
|
|
).json()['votes']
|
|
except:
|
|
votes_data = {}
|
|
|
|
for (definition, defid) in defs:
|
|
word = definition.select("div div h1 a, div div h2 a")[0].get_text()
|
|
meaning = definition.find(attrs={"class": ["break-words meaning mb-4"]}).decode_contents()
|
|
example = definition.find(attrs={"class": ["break-words example italic mb-4"]}).decode_contents()
|
|
contributor = definition.find(attrs={"class": ["contributor font-bold"]})
|
|
votes_up = votes_data.get(str(defid), {}).get('up')
|
|
votes_down = votes_data.get(str(defid), {}).get('down')
|
|
ret.append([defid, word, meaning, example, contributor, votes_up, votes_down])
|
|
|
|
pages = soup.find(attrs={"class": ["pagination text-xl text-center"]})
|
|
if pages == None:
|
|
pages = ""
|
|
|
|
if ret == []:
|
|
ret = ["SIMILAR"]
|
|
words = soup.find("ul", attrs={"class": ["mt-5 list-none"]})
|
|
if words:
|
|
for word in words.find_all("a"):
|
|
ret.append(word.get_text())
|
|
return ret
|
|
|
|
return (ret, pages)
|
|
|
|
app = Flask(__name__, template_folder="templates", static_folder="static")
|
|
|
|
@app.route('/', defaults={'path': ''})
|
|
@app.route('/<path:path>')
|
|
def catch_all(path):
|
|
scraped = scrape(f"https://urbandictionary.com/{re.sub(r'.*://.*/', '/', request.url)}")
|
|
if type(scraped) == str and scraped.startswith("REDIRECT"):
|
|
return redirect(scraped.replace("REDIRECT ", ""), 302)
|
|
elif scraped[0] == "SIMILAR":
|
|
return render_template('similar.html', similar_words=scraped[1:], term=request.args.get("term"))
|
|
return render_template('index.html', results=scraped[0], pagination=scraped[1], term=request.args.get("term"))
|
|
|
|
if __name__ == '__main__':
|
|
from waitress import serve
|
|
serve(app, host=os.environ.get('RD_BIND', "0.0.0.0"), port=int(os.environ.get('RD_PORT', 2944)))
|