176 lines
7.2 KiB
Markdown
176 lines
7.2 KiB
Markdown
Author: Jorge Maldonado Ventura
|
|
Date: 2017-04-22 20:38
|
|
Modified: 2018-01-11 16:38
|
|
Save_as: buscar.php
|
|
Status: hidden
|
|
Title: Resultados
|
|
|
|
<div id="tipue_search_content">
|
|
<?php
|
|
$STOP_WORDS = ['a', 'un', 'una', 'unas', 'unos', 'uno', 'sobre', 'todo',
|
|
'también', 'tras', 'otro', 'algun', 'alguno', 'alguna', 'algunos',
|
|
'algunas', 'ser', 'es', 'soy', 'eres', 'somos', 'sois', 'estoy', 'esta',
|
|
'estamos', 'estais', 'estan', 'como', 'en', 'para', 'atras', 'porque',
|
|
'por', 'que', 'estado', 'estaba', 'ante', 'antes', 'siendo', 'ambos',
|
|
'pero', 'por', 'poder', 'puede', 'puedo', 'podemos', 'podeis', 'pueden',
|
|
'fui', 'fue', 'fuimos', 'fueron', 'hacer', 'hago', 'hace', 'hacemos',
|
|
'haceis', 'hacen', 'cada', 'fin', 'incluso', 'primero', 'desde',
|
|
'conseguir', 'consigo', 'consigue', 'consigues', 'conseguimos',
|
|
'consiguen', 'ir', 'voy', 'va', 'vamos', 'vais', 'van', 'vaya', 'gueno',
|
|
'ha', 'tener', 'tengo', 'tiene', 'tenemos', 'teneis', 'tienen', 'el', 'la',
|
|
'lo', 'las', 'los', 'su', 'aqui', 'mio', 'tuyo', 'ellos', 'ellas', 'nos',
|
|
'nosotros', 'vosotros', 'vosotras', 'si', 'dentro', 'solo', 'solamente',
|
|
'saber', 'sabes', 'sabe', 'sabemos', 'sabeis', 'saben', 'ultimo', 'largo',
|
|
'bastante', 'haces', 'muchos', 'aquellos', 'aquellas', 'sus', 'entonces',
|
|
'tiempo', 'verdad', 'verdadero', 'verdadera', 'cierto', 'ciertos',
|
|
'cierta', 'ciertas', 'intentar', 'intento', 'intenta', 'intentas',
|
|
'intentamos', 'intentais', 'intentan', 'dos', 'bajo', 'arriba', 'encima',
|
|
'usar', 'uso', 'usas', 'usa', 'usamos', 'usais', 'usan', 'emplear',
|
|
'empleo', 'empleas', 'emplean', 'ampleamos', 'empleais', 'valor', 'muy',
|
|
'era', 'eras', 'eramos', 'eran', 'modo', 'bien', 'cual', 'cuando', 'donde',
|
|
'mientras', 'quien', 'con', 'entre', 'sin', 'trabajo', 'trabajar',
|
|
'trabajas', 'trabaja', 'trabajamos', 'trabajais', 'trabajan', 'podria',
|
|
'podrias', 'podriamos', 'podrian', 'podriais', 'yo', 'aquel'];
|
|
|
|
$DESCRIPTION_LENGTH = 25;
|
|
$HALF_DESCRIPTION_LENGTH = floor($DESCRIPTION_LENGTH / 2);
|
|
|
|
$web_content = json_decode(file_get_contents('tipuesearch_content.json'), true);
|
|
$stop_words_ignored = false;
|
|
|
|
if (isset($_GET['q'])) {
|
|
|
|
$search_str = trim($_REQUEST['q']);
|
|
|
|
$keywords = explode(' ', $search_str);
|
|
$keywords_temp = NULL;
|
|
foreach ($keywords as $keyword) {
|
|
$is_stop_word = false;
|
|
foreach ($STOP_WORDS as $stop_word) {
|
|
if ($keyword == $stop_word) {
|
|
$is_stop_word = true;
|
|
$stop_words_ignored = true;
|
|
break;
|
|
}
|
|
}
|
|
if (! $is_stop_word) {
|
|
$keywords_temp .= "{$keyword} ";
|
|
}
|
|
}
|
|
|
|
$keywords = trim($keywords_temp);
|
|
$keywords = explode(' ', $keywords);
|
|
$found_results = [];
|
|
|
|
foreach ($web_content["pages"] as $page) {
|
|
$score = 0;
|
|
$page['text'] = htmlentities($page['text']);
|
|
|
|
foreach ($keywords as $word) {
|
|
if (preg_match("/$word/i", $page['url'])) {
|
|
$score += 35;
|
|
}
|
|
if (preg_match("/$word/i", $page['title'])) {
|
|
$score += 35;
|
|
}
|
|
if (preg_match("/$word/i", $page['tags'])) {
|
|
$score += 30;
|
|
}
|
|
// It replaces uppercase matches with lowercase matches, but it's fine for now.
|
|
if ($stop_words_ignored == 1) {
|
|
$page['text'] = preg_replace("/$word/i", $word, $page['text'], -1, $match_count);
|
|
} else {
|
|
$page['text'] = preg_replace("/$word/i", '<span class="tipue_search_content_bold highlighted">' . $word . '</span>', $page['text'], -1, $match_count);
|
|
}
|
|
if ($match_count > 0) {
|
|
$score += 10 * $match_count;
|
|
}
|
|
}
|
|
if ($score != 0) {
|
|
$found_results[] = ['score' => $score, 'title' => $page['title'], 'url' => $page['url'], 'text' => $page['text']];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Compare results score.
|
|
*/
|
|
function comp_result_score($a, $b) {
|
|
if ($a['score'] == $b['score']) {
|
|
return 0;
|
|
}
|
|
return ($a['score'] > $b['score']) ? -1 : 1;
|
|
}
|
|
if ($stop_words_ignored) {
|
|
printf('<div id="tipue_search_warning">%s</div>', 'Las palabras comunes se ignoran en gran parte');
|
|
}
|
|
|
|
$found_results_count = count($found_results);
|
|
if ($found_results_count > 0) {
|
|
usort($found_results, 'comp_result_score');
|
|
if ($found_results_count == 1) {
|
|
$found_results_count_str = '1 resultado';
|
|
} else {
|
|
$found_results_count_str = $found_results_count . ' resultados';
|
|
}
|
|
} else if ($found_results_count == 0) {
|
|
$found_results_count_str = NULL;
|
|
printf('<div id="tipue_search_warning">%s</div>', 'No se ha encontrado nada');
|
|
}
|
|
|
|
printf('<div id="tipue_search_results_count">%s</div>', $found_results_count_str);
|
|
|
|
foreach ($found_results as $found_result) {
|
|
printf('<div class="tipue_search_content_title"><a class="is-cyan" href="%s">%s</a></div>', $found_result['url'], $found_result['title']);
|
|
printf('<div class="tipue_search_content_url"><a href="%s">%s</a></div>', $found_result['url'], $found_result['url']);
|
|
|
|
$description_words = explode(' ', $found_result['text']);
|
|
$description_words_count = count($description_words);
|
|
$first_match = false;
|
|
for ($i = 0; $i < $description_words_count; $i++) {
|
|
if ($description_words[$i] == '<span>') {
|
|
$first_match = $i;
|
|
break;
|
|
}
|
|
}
|
|
if ($first_match !== false) {
|
|
echo '<div class="tipue_search_content_text">';
|
|
if ($first_match - 12 <= 0) {
|
|
for ($i = 0; $i < $DESCRIPTION_LENGTH; $i++) {
|
|
echo $description_words[$i] . ' ';
|
|
}
|
|
echo '...';
|
|
} else if($first_match + 12 >= $description_words_count) {
|
|
echo '...';
|
|
for ($i = $first_match - 12; $i < $description_words_count; $i++) {
|
|
echo ' ' . $description_words[$i];
|
|
}
|
|
}
|
|
else {
|
|
echo '...';
|
|
for ($i = $first_match - 12; $i <= $first_match + 12; $i++) {
|
|
echo $description_words[$i] . ' ';
|
|
}
|
|
echo '...';
|
|
}
|
|
echo '</div>';
|
|
} elseif ( $description_words_count === 1 ) {
|
|
printf('<div class="tipue_search_content_text"><p>%s</p></div>', 'No hay palabras en el artículo');
|
|
break;
|
|
} else {
|
|
echo '<div class="tipue_search_content_text">';
|
|
for ($i = 0; $i < $DESCRIPTION_LENGTH; $i++) {
|
|
echo $description_words[$i] . ' ';
|
|
}
|
|
if ($description_words_count > $DESCRIPTION_LENGTH) {
|
|
echo '...';
|
|
}
|
|
echo '</div>';
|
|
}
|
|
}
|
|
|
|
} else {
|
|
printf('<div id="tipue_search_warning">%s</div>', 'Aún no has buscado');
|
|
}
|
|
?>
|
|
</div>
|