tipuesearch_content.json: update filter only articles and improved JSON API
This commit is contained in:
parent
ed201535a3
commit
9d22ec021f
@ -62,9 +62,9 @@ if (isset($_GET['q'])) {
|
|||||||
$keywords = explode(' ', $keywords);
|
$keywords = explode(' ', $keywords);
|
||||||
$found_results = [];
|
$found_results = [];
|
||||||
|
|
||||||
foreach ($web_content["pages"] as $page) {
|
foreach ($web_content["videos"] as $page) {
|
||||||
$score = 0;
|
$score = 0;
|
||||||
$page['text'] = htmlentities($page['text']);
|
$page['description'] = htmlentities($page['description']);
|
||||||
|
|
||||||
foreach ($keywords as $word) {
|
foreach ($keywords as $word) {
|
||||||
if (preg_match("/$word/i", $page['url'])) {
|
if (preg_match("/$word/i", $page['url'])) {
|
||||||
@ -78,16 +78,16 @@ if (isset($_GET['q'])) {
|
|||||||
}
|
}
|
||||||
// It replaces uppercase matches with lowercase matches, but it's fine for now.
|
// It replaces uppercase matches with lowercase matches, but it's fine for now.
|
||||||
if ($stop_words_ignored == 1) {
|
if ($stop_words_ignored == 1) {
|
||||||
$page['text'] = preg_replace("/$word/i", $word, $page['text'], -1, $match_count);
|
$page['description'] = preg_replace("/$word/i", $word, $page['description'], -1, $match_count);
|
||||||
} else {
|
} else {
|
||||||
$page['text'] = preg_replace("/$word/i", '<span class="tipue_search_content_bold highlighted">' . $word . '</span>', $page['text'], -1, $match_count);
|
$page['description'] = preg_replace("/$word/i", '<span class="tipue_search_content_bold highlighted">' . $word . '</span>', $page['description'], -1, $match_count);
|
||||||
}
|
}
|
||||||
if ($match_count > 0) {
|
if ($match_count > 0) {
|
||||||
$score += 10 * $match_count;
|
$score += 10 * $match_count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($score != 0) {
|
if ($score != 0) {
|
||||||
$found_results[] = ['score' => $score, 'title' => $page['title'], 'url' => $page['url'], 'text' => $page['text']];
|
$found_results[] = ['score' => $score, 'title' => $page['title'], 'url' => $page['url'], 'description' => $page['description']];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -123,7 +123,7 @@ if (isset($_GET['q'])) {
|
|||||||
printf('<div class="tipue_search_content_title"><a class="is-cyan" href="%s">%s</a></div>', $found_result['url'], $found_result['title']);
|
printf('<div class="tipue_search_content_title"><a class="is-cyan" href="%s">%s</a></div>', $found_result['url'], $found_result['title']);
|
||||||
printf('<div class="tipue_search_content_url"><a href="%s">%s</a></div>', $found_result['url'], $found_result['url']);
|
printf('<div class="tipue_search_content_url"><a href="%s">%s</a></div>', $found_result['url'], $found_result['url']);
|
||||||
|
|
||||||
$description_words = explode(' ', $found_result['text']);
|
$description_words = explode(' ', $found_result['description']);
|
||||||
$description_words_count = count($description_words);
|
$description_words_count = count($description_words);
|
||||||
$first_match = false;
|
$first_match = false;
|
||||||
for ($i = 0; $i < $description_words_count; $i++) {
|
for ($i = 0; $i < $description_words_count; $i++) {
|
||||||
|
@ -32,66 +32,107 @@ class Tipue_Search_JSON_Generator(object):
|
|||||||
self.siteurl = settings.get('SITEURL')
|
self.siteurl = settings.get('SITEURL')
|
||||||
self.relative_urls = settings.get('RELATIVE_URLS')
|
self.relative_urls = settings.get('RELATIVE_URLS')
|
||||||
self.tpages = settings.get('TEMPLATE_PAGES')
|
self.tpages = settings.get('TEMPLATE_PAGES')
|
||||||
|
self.tstatic = settings.get('THEME_STATIC_DIR')
|
||||||
self.output_path = output_path
|
self.output_path = output_path
|
||||||
self.json_nodes = []
|
self.json_nodes = []
|
||||||
|
|
||||||
def create_json_node(self, page):
|
def create_json_node(self, article):
|
||||||
|
|
||||||
if getattr(page, 'status', 'published') != 'published':
|
if getattr(article, 'status', 'published') != 'published':
|
||||||
return
|
return
|
||||||
|
|
||||||
soup_title = BeautifulSoup(page.title.replace(' ', ' '), 'html.parser')
|
soup_title = BeautifulSoup(article.title.replace(' ', ' '), 'html.parser')
|
||||||
page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('^', '^')
|
video_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('^', '^')
|
||||||
|
|
||||||
soup_text = BeautifulSoup(page.content, 'html.parser')
|
soup_text = BeautifulSoup(article.content, 'html.parser')
|
||||||
page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^')
|
video_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^')
|
||||||
page_text = ' '.join(page_text.split())
|
video_text = ' '.join(video_text.split())
|
||||||
|
|
||||||
page_category = page.category.name if getattr(page, 'category', 'None') != 'None' else ''
|
if self.relative_urls:
|
||||||
|
image_url = '.'
|
||||||
|
else:
|
||||||
|
image_url = self.siteurl
|
||||||
|
|
||||||
page_url = '.'
|
# thumbnail
|
||||||
if page.url:
|
video_image = article.image if getattr(
|
||||||
page_url = page.url if self.relative_urls else (self.siteurl + '/' + page.url)
|
article, 'image', 'None') != 'None' else ''
|
||||||
|
|
||||||
node = {'title': page_title,
|
url_image = "%s/%s/../wp-content/uploads/article/poster/%s" % (
|
||||||
'text': page_text,
|
image_url, self.tstatic, video_image
|
||||||
'tags': page_category,
|
)
|
||||||
'url': page_url}
|
|
||||||
|
# publish
|
||||||
|
video_publish = article.date.strftime("%a, %d %B, %Y") if getattr(
|
||||||
|
article, 'date', 'None') != 'None' else ''
|
||||||
|
|
||||||
|
# author
|
||||||
|
video_author = str(article.author) if getattr(
|
||||||
|
article, 'author', 'None') != 'None' else ''
|
||||||
|
|
||||||
|
# time
|
||||||
|
video_time = article.time if getattr(
|
||||||
|
article, 'time', 'None') != 'None' else ''
|
||||||
|
|
||||||
|
video_url = '.'
|
||||||
|
if article.url:
|
||||||
|
video_url = article.url if self.relative_urls else (
|
||||||
|
self.siteurl + '/' + article.url)
|
||||||
|
|
||||||
|
video_src = article.og_video if getattr(
|
||||||
|
article, 'og_video', 'None') != 'None' else ''
|
||||||
|
|
||||||
|
video_category = article.category.name if getattr(
|
||||||
|
article, 'category', 'None') != 'None' else ''
|
||||||
|
|
||||||
|
node = {'title': video_title,
|
||||||
|
'description': video_text,
|
||||||
|
'videoThumbnail': url_image,
|
||||||
|
'formatStreams': {
|
||||||
|
'url': video_src,
|
||||||
|
},
|
||||||
|
'author': video_author,
|
||||||
|
'publishedText': video_publish,
|
||||||
|
'time': video_time,
|
||||||
|
'tags': video_category,
|
||||||
|
'url': video_url}
|
||||||
|
|
||||||
self.json_nodes.append(node)
|
self.json_nodes.append(node)
|
||||||
|
|
||||||
def create_tpage_node(self, srclink):
|
def create_tpage_node(self, srclink):
|
||||||
|
|
||||||
srcfile = open(os.path.join(self.output_path, self.tpages[srclink]), encoding='utf-8')
|
srcfile = open(os.path.join(self.output_path,
|
||||||
|
self.tpages[srclink]),
|
||||||
|
encoding='utf-8')
|
||||||
soup = BeautifulSoup(srcfile, 'html.parser')
|
soup = BeautifulSoup(srcfile, 'html.parser')
|
||||||
page_title = soup.title.string if soup.title is not None else ''
|
video_title = soup.title.string if soup.title is not None else ''
|
||||||
page_text = soup.get_text()
|
video_text = soup.get_text()
|
||||||
|
|
||||||
# Should set default category?
|
# Should set default category?
|
||||||
page_category = ''
|
video_category = ''
|
||||||
page_url = urljoin(self.siteurl, self.tpages[srclink])
|
video_url = urljoin(self.siteurl, self.tpages[srclink])
|
||||||
|
|
||||||
node = {'title': page_title,
|
node = {'title': video_title,
|
||||||
'text': page_text,
|
'text': video_text,
|
||||||
'tags': page_category,
|
'tags': video_category,
|
||||||
'url': page_url}
|
'url': video_url}
|
||||||
|
|
||||||
self.json_nodes.append(node)
|
self.json_nodes.append(node)
|
||||||
|
|
||||||
def generate_output(self, writer):
|
def generate_output(self, writer):
|
||||||
path = os.path.join(self.output_path, 'tipuesearch_content.json')
|
path = os.path.join(self.output_path, 'tipuesearch_content.json')
|
||||||
|
|
||||||
pages = self.context['pages'] + self.context['articles']
|
articles = self.context['articles']
|
||||||
|
|
||||||
for article in self.context['articles']:
|
for article in self.context['articles']:
|
||||||
pages += article.translations
|
articles += article.translations
|
||||||
|
|
||||||
for srclink in self.tpages:
|
for srclink in self.tpages:
|
||||||
self.create_tpage_node(srclink)
|
self.create_tpage_node(srclink)
|
||||||
|
|
||||||
for page in pages:
|
for article in articles:
|
||||||
self.create_json_node(page)
|
self.create_json_node(article)
|
||||||
root_node = {'pages': self.json_nodes}
|
|
||||||
|
root_node = {'videos': self.json_nodes}
|
||||||
|
|
||||||
with open(path, 'w', encoding='utf-8') as fd:
|
with open(path, 'w', encoding='utf-8') as fd:
|
||||||
json.dump(root_node, fd, separators=(',', ':'), ensure_ascii=False)
|
json.dump(root_node, fd, separators=(',', ':'), ensure_ascii=False)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user