$value) { // If the word shouldn't be indexed, remove it if (!validate_search_word($value, $idx)) unset($words[$key]); } return $words; } // // Checks if a word is a valid searchable word // function validate_search_word($word, $idx) { static $stopwords; // If the word is a keyword we don't want to index it, but we do want to be allowed to search it if (is_keyword($word)) return !$idx; if (!isset($stopwords)) { if (file_exists(FORUM_CACHE_DIR.'cache_stopwords.php')) include FORUM_CACHE_DIR.'cache_stopwords.php'; if (!defined('PUN_STOPWORDS_LOADED')) { if (!defined('FORUM_CACHE_FUNCTIONS_LOADED')) require PUN_ROOT.'include/cache.php'; generate_stopwords_cache(); require FORUM_CACHE_DIR.'cache_stopwords.php'; } } // If it is a stopword it isn't valid if (in_array($word, $stopwords)) return false; // If the word is CJK we don't want to index it, but we do want to be allowed to search it if (is_cjk($word)) return !$idx; // Exclude % and * when checking whether current word is valid $word = str_replace(array('%', '*'), '', $word); // Check the word is within the min/max length $num_chars = pun_strlen($word); return $num_chars >= PUN_SEARCH_MIN_WORD && $num_chars <= PUN_SEARCH_MAX_WORD; } // // Check a given word is a search keyword. // function is_keyword($word) { return $word == 'and' || $word == 'or' || $word == 'not'; } // // Check if a given word is CJK or Hangul. // function is_cjk($word) { return preg_match('%^'.PUN_CJK_HANGUL_REGEX.'+$%u', $word) ? true : false; } // // Strip [img] [url] and [email] out of the message so we don't index their contents // function strip_bbcode($text) { static $patterns; if (!isset($patterns)) { $patterns = array( '%\[img=([^\]]*+)\]([^[]*+)\[/img\]%' => '$2 $1', // Keep the url and description '%\[(url|email)=([^\]]*+)\]([^[]*+(?:(?!\[/\1\])\[[^[]*+)*)\[/\1\]%' => '$2 $3', // Keep the url and text '%\[(img|url|email)\]([^[]*+(?:(?!\[/\1\])\[[^[]*+)*)\[/\1\]%' => '$2', // Keep the url '%\[(topic|post|forum|user)\][1-9]\d*\[/\1\]%' => ' ', // Do not index topic/post/forum/user ID ); } return preg_replace(array_keys($patterns), array_values($patterns), $text); } // // Updates the search index with the contents of $post_id (and $subject) // function update_search_index($mode, $post_id, $message, $subject = null) { global $db_type, $db; $message = utf8_strtolower($message); $subject = utf8_strtolower($subject); // Remove any bbcode that we shouldn't index $message = strip_bbcode($message); // Split old and new post/subject to obtain array of 'words' $words_message = split_words($message, true); $words_subject = ($subject) ? split_words($subject, true) : array(); if ($mode == 'edit') { $result = $db->query('SELECT w.id, w.word, m.subject_match FROM '.$db->prefix.'search_words AS w INNER JOIN '.$db->prefix.'search_matches AS m ON w.id=m.word_id WHERE m.post_id='.$post_id, true) or error('Unable to fetch search index words', __FILE__, __LINE__, $db->error()); // Declare here to stop array_keys() and array_diff() from complaining if not set $cur_words['post'] = array(); $cur_words['subject'] = array(); while ($row = $db->fetch_row($result)) { $match_in = ($row[2]) ? 'subject' : 'post'; $cur_words[$match_in][$row[1]] = $row[0]; } $db->free_result($result); $words['add']['post'] = array_diff($words_message, array_keys($cur_words['post'])); $words['add']['subject'] = array_diff($words_subject, array_keys($cur_words['subject'])); $words['del']['post'] = array_diff(array_keys($cur_words['post']), $words_message); $words['del']['subject'] = array_diff(array_keys($cur_words['subject']), $words_subject); } else { $words['add']['post'] = $words_message; $words['add']['subject'] = $words_subject; $words['del']['post'] = array(); $words['del']['subject'] = array(); } unset($words_message); unset($words_subject); // Get unique words from the above arrays $unique_words = array_unique(array_merge($words['add']['post'], $words['add']['subject'])); if (!empty($unique_words)) { $result = $db->query('SELECT id, word FROM '.$db->prefix.'search_words WHERE word IN(\''.implode('\',\'', array_map(array($db, 'escape'), $unique_words)).'\')', true) or error('Unable to fetch search index words', __FILE__, __LINE__, $db->error()); $word_ids = array(); while ($row = $db->fetch_row($result)) $word_ids[$row[1]] = $row[0]; $db->free_result($result); $new_words = array_diff($unique_words, array_keys($word_ids)); unset($unique_words); if (!empty($new_words)) { switch ($db_type) { case 'mysql': case 'mysqli': case 'mysql_innodb': case 'mysqli_innodb': $db->query('INSERT INTO '.$db->prefix.'search_words (word) VALUES(\''.implode('\'),(\'', array_map(array($db, 'escape'), $new_words)).'\')'); break; default: foreach ($new_words as $word) $db->query('INSERT INTO '.$db->prefix.'search_words (word) VALUES(\''.$db->escape($word).'\')'); break; } } unset($new_words); } // Delete matches (only if editing a post) foreach ($words['del'] as $match_in => $wordlist) { $subject_match = ($match_in == 'subject') ? 1 : 0; if (!empty($wordlist)) { $sql = ''; foreach ($wordlist as $word) $sql .= (($sql != '') ? ',' : '').$cur_words[$match_in][$word]; $db->query('DELETE FROM '.$db->prefix.'search_matches WHERE word_id IN('.$sql.') AND post_id='.$post_id.' AND subject_match='.$subject_match) or error('Unable to delete search index word matches', __FILE__, __LINE__, $db->error()); } } // Add new matches foreach ($words['add'] as $match_in => $wordlist) { $subject_match = ($match_in == 'subject') ? 1 : 0; if (!empty($wordlist)) $db->query('INSERT INTO '.$db->prefix.'search_matches (post_id, word_id, subject_match) SELECT '.$post_id.', id, '.$subject_match.' FROM '.$db->prefix.'search_words WHERE word IN(\''.implode('\',\'', array_map(array($db, 'escape'), $wordlist)).'\')') or error('Unable to insert search index word matches', __FILE__, __LINE__, $db->error()); } unset($words); } // // Strip search index of indexed words in $post_ids // function strip_search_index($post_ids) { global $db_type, $db; switch ($db_type) { case 'mysql': case 'mysqli': case 'mysql_innodb': case 'mysqli_innodb': { $result = $db->query('SELECT word_id FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id') or error('Unable to fetch search index word match', __FILE__, __LINE__, $db->error()); if ($db->num_rows($result)) { $word_ids = ''; while ($row = $db->fetch_row($result)) $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0]; $result = $db->query('SELECT word_id FROM '.$db->prefix.'search_matches WHERE word_id IN('.$word_ids.') GROUP BY word_id HAVING COUNT(word_id)=1') or error('Unable to fetch search index word match', __FILE__, __LINE__, $db->error()); if ($db->num_rows($result)) { $word_ids = ''; while ($row = $db->fetch_row($result)) $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0]; $db->query('DELETE FROM '.$db->prefix.'search_words WHERE id IN('.$word_ids.')') or error('Unable to delete search index word', __FILE__, __LINE__, $db->error()); } } break; } default: $db->query('DELETE FROM '.$db->prefix.'search_words WHERE id IN(SELECT word_id FROM '.$db->prefix.'search_matches WHERE word_id IN(SELECT word_id FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id) GROUP BY word_id HAVING COUNT(word_id)=1)') or error('Unable to delete from search index', __FILE__, __LINE__, $db->error()); break; } $db->query('DELETE FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.')') or error('Unable to delete search index word match', __FILE__, __LINE__, $db->error()); }