mirror of
https://github.com/parchlinuxB/Gitee.git
synced 2025-02-23 02:15:43 -05:00
Partial reverse engineering of the DuckDuckGo (DDG) engines including a improved language and region handling based on the enigne.traits_v1 data. - DDG Lite - DDG Instant Answer API - DDG Images - DDG Weather docs/src/searx.engine.duckduckgo.rst: Online documentation of the DDG engines (make docs.live) searx/data/engine_traits.json Add data type "traits_v1" generated by the fetch_traits() functions from: - "duckduckgo" (WEB), - "duckduckgo images" and - "duckduckgo weather" and remove data from obsolete data type "supported_languages". searx/autocomplete.py: Reversed engineered Autocomplete from DDG. Supports DDG's languages. searx/engines/duckduckgo.py: - fetch_traits(): Fetch languages & regions from DDG. - get_ddg_lang(): Get DDG's language identifier from SearXNG's locale. DDG defines its languages by region codes. DDG-Lite does not offer a language selection to the user, only a region can be selected by the user. - Cache ``vqd`` value: The vqd value depends on the query string and is needed for the follow up pages or the images loaded by a XMLHttpRequest (DDG images). The ``vqd`` value of a search term is stored for 10min in the redis DB. - DDG Lite engine: reversed engineered request method with improved Language and region support and better ``vqd`` handling. searx/engines/duckduckgo_definitions.py: DDG Instant Answer API The *instant answers* API does not support languages, or at least we could not find out how language support should work. It seems that most of the features are based on English terms. searx/engines/duckduckgo_images.py: DDG Images Reversed engineered request method. Improved language and region handling based on cookies and the enigne.traits_v1 data. Response: add image format to the result list searx/engines/duckduckgo_weather.py: DDG Weather Improved language and region handling based on cookies and the enigne.traits_v1 data. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
222 lines
5.9 KiB
Python
222 lines
5.9 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
# lint: pylint
|
|
"""This module implements functions needed for the autocompleter.
|
|
|
|
"""
|
|
# pylint: disable=use-dict-literal
|
|
|
|
from json import loads
|
|
from urllib.parse import urlencode
|
|
|
|
from lxml import etree
|
|
from httpx import HTTPError
|
|
|
|
from searx import settings
|
|
from searx.engines import engines
|
|
from searx.network import get as http_get
|
|
from searx.exceptions import SearxEngineResponseException
|
|
|
|
# a fetch_supported_languages() for XPath engines isn't available right now
|
|
# _brave = ENGINES_LANGUAGES['brave'].keys()
|
|
|
|
|
|
def get(*args, **kwargs):
|
|
if 'timeout' not in kwargs:
|
|
kwargs['timeout'] = settings['outgoing']['request_timeout']
|
|
kwargs['raise_for_httperror'] = True
|
|
return http_get(*args, **kwargs)
|
|
|
|
|
|
def brave(query, _lang):
|
|
# brave search autocompleter
|
|
url = 'https://search.brave.com/api/suggest?'
|
|
url += urlencode({'q': query})
|
|
country = 'all'
|
|
# if lang in _brave:
|
|
# country = lang
|
|
kwargs = {'cookies': {'country': country}}
|
|
resp = get(url, **kwargs)
|
|
|
|
results = []
|
|
|
|
if resp.ok:
|
|
data = resp.json()
|
|
for item in data[1]:
|
|
results.append(item)
|
|
return results
|
|
|
|
|
|
def dbpedia(query, _lang):
|
|
# dbpedia autocompleter, no HTTPS
|
|
autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
|
|
|
|
response = get(autocomplete_url + urlencode(dict(QueryString=query)))
|
|
|
|
results = []
|
|
|
|
if response.ok:
|
|
dom = etree.fromstring(response.content)
|
|
results = dom.xpath('//Result/Label//text()')
|
|
|
|
return results
|
|
|
|
|
|
def duckduckgo(query, sxng_locale):
|
|
"""Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
|
|
|
|
traits = engines['duckduckgo'].traits
|
|
args = {
|
|
'q': query,
|
|
'kl': traits.get_region(sxng_locale, traits.all_locale),
|
|
}
|
|
|
|
url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
|
|
resp = get(url)
|
|
|
|
ret_val = []
|
|
if resp.ok:
|
|
j = resp.json()
|
|
if len(j) > 1:
|
|
ret_val = j[1]
|
|
return ret_val
|
|
|
|
|
|
def google(query, lang):
|
|
# google autocompleter
|
|
autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
|
|
|
|
response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
|
|
|
|
results = []
|
|
|
|
if response.ok:
|
|
dom = etree.fromstring(response.text)
|
|
results = dom.xpath('//suggestion/@data')
|
|
|
|
return results
|
|
|
|
|
|
def seznam(query, _lang):
|
|
# seznam search autocompleter
|
|
url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
|
|
|
|
resp = get(
|
|
url.format(
|
|
query=urlencode(
|
|
{'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
|
|
)
|
|
)
|
|
)
|
|
|
|
if not resp.ok:
|
|
return []
|
|
|
|
data = resp.json()
|
|
return [
|
|
''.join([part.get('text', '') for part in item.get('text', [])])
|
|
for item in data.get('result', [])
|
|
if item.get('itemType', None) == 'ItemType.TEXT'
|
|
]
|
|
|
|
|
|
def startpage(query, sxng_locale):
|
|
"""Autocomplete from Startpage. Supports Startpage's languages"""
|
|
lui = engines['startpage'].traits.get_language(sxng_locale, 'english')
|
|
url = 'https://startpage.com/suggestions?{query}'
|
|
resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
|
|
data = resp.json()
|
|
return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
|
|
|
|
|
|
def swisscows(query, _lang):
|
|
# swisscows autocompleter
|
|
url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
|
|
|
|
resp = loads(get(url.format(query=urlencode({'query': query}))).text)
|
|
return resp
|
|
|
|
|
|
def qwant(query, sxng_locale):
|
|
"""Autocomplete from Qwant. Supports Qwant's regions."""
|
|
results = []
|
|
|
|
locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
|
|
url = 'https://api.qwant.com/v3/suggest?{query}'
|
|
resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
|
|
|
|
if resp.ok:
|
|
data = resp.json()
|
|
if data['status'] == 'success':
|
|
for item in data['data']['items']:
|
|
results.append(item['value'])
|
|
|
|
return results
|
|
|
|
|
|
def wikipedia(query, sxng_locale):
|
|
"""Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
|
|
results = []
|
|
eng_traits = engines['wikipedia'].traits
|
|
wiki_lang = eng_traits.get_language(sxng_locale, 'en')
|
|
wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
|
|
|
|
url = 'https://{wiki_netloc}/w/api.php?{args}'
|
|
args = urlencode(
|
|
{
|
|
'action': 'opensearch',
|
|
'format': 'json',
|
|
'formatversion': '2',
|
|
'search': query,
|
|
'namespace': '0',
|
|
'limit': '10',
|
|
}
|
|
)
|
|
resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
|
|
if resp.ok:
|
|
data = resp.json()
|
|
if len(data) > 1:
|
|
results = data[1]
|
|
|
|
return results
|
|
|
|
|
|
def yandex(query, _lang):
|
|
# yandex autocompleter
|
|
url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
|
|
|
|
resp = loads(get(url.format(urlencode(dict(part=query)))).text)
|
|
if len(resp) > 1:
|
|
return resp[1]
|
|
return []
|
|
|
|
|
|
backends = {
|
|
'dbpedia': dbpedia,
|
|
'duckduckgo': duckduckgo,
|
|
'google': google,
|
|
'seznam': seznam,
|
|
'startpage': startpage,
|
|
'swisscows': swisscows,
|
|
'qwant': qwant,
|
|
'wikipedia': wikipedia,
|
|
'brave': brave,
|
|
'yandex': yandex,
|
|
}
|
|
|
|
|
|
def search_autocomplete(backend_name, query, sxng_locale):
|
|
backend = backends.get(backend_name)
|
|
if backend is None:
|
|
return []
|
|
|
|
if engines[backend_name].traits.data_type != "traits_v1":
|
|
# vintage / deprecated
|
|
if not sxng_locale or sxng_locale == 'all':
|
|
sxng_locale = 'en'
|
|
else:
|
|
sxng_locale = sxng_locale.split('-')[0]
|
|
|
|
try:
|
|
return backend(query, sxng_locale)
|
|
except (HTTPError, SearxEngineResponseException):
|
|
return []
|