Changeset 45253
- Timestamp:
- 05/11/08 16:55:03 (2 months ago)
- Files:
-
- similar-posts/trunk/readme.txt (modified) (2 diffs)
- similar-posts/trunk/similar-posts-admin.php (modified) (9 diffs)
- similar-posts/trunk/similar-posts.php (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
similar-posts/trunk/readme.txt
r45217 r45253 5 5 Requires at least: 1.5 6 6 Tested up to: 2.5.1 7 Stable tag: 2.5b2 77 Stable tag: 2.5b28 8 8 Displays a list of posts similar to the current one based on content, title and/or tags. 9 9 … … 32 32 == Version History == 33 33 34 * 2.5b28 35 * improvements to Similar Posts matching 36 * experiment with Chinese/Korean/Japanese matching 34 37 * 2.5b27 35 38 * fixed bug with bulk indexing of tags similar-posts/trunk/similar-posts-admin.php
r45217 r45253 1 1 <?php 2 2 3 // Admin stuff for Similar Posts Plugin, Version 2.5b2 73 // Admin stuff for Similar Posts Plugin, Version 2.5b28 4 4 5 5 function similar_posts_option_menu() { … … 216 216 $options['utf8'] = 'false'; 217 217 } 218 $options['cjk'] = $_POST['cjk']; 219 if (!function_exists('mb_internal_encoding')) { 220 $options['cjk'] = 'false'; 221 } 218 222 $options['use_stemmer'] = $_POST['use_stemmer']; 219 223 $options['batch'] = ppl_check_cardinal($_POST['batch']); 220 224 if ($options['batch'] === 0) $options['batch'] = 100; 221 225 flush(); 222 $termcount = save_index_entries (($options['utf8']==='true'), ($options['use_stemmer']==='true'), $options['batch'] );226 $termcount = save_index_entries (($options['utf8']==='true'), ($options['use_stemmer']==='true'), $options['batch'], ($options['cjk']==='true')); 223 227 update_option('similar-posts', $options); 224 228 //show a message … … 234 238 The index is created when the plugin is activated and then kept up-to-date 235 239 automatically when posts are added, edited, or deleted.</p> 236 <p>The twooptions that affect the index can be set below.</p>', 'post_plugins');240 <p>The options that affect the index can be set below.</p>', 'post_plugins'); 237 241 _e('<p>If you are using a language other than english you may find that the plugin 238 242 mangles some characters since PHP is normally blind to multibyte characters. You … … 240 244 of a little speed. <em>This facility is only available if your 241 245 installation of PHP supports the mbstring functions.</em></p>', 'post_plugins'); 246 _e('<p>Languages like Chinese, Korean and Japanese pose a special difficulty for 247 the full-text search algorithm. As an <em>experiment</em> I have introduced an option below 248 to work around some of these issues. The text must be encoded as UTF-8. I would be very grateful for feedback from any 249 users knowledgeable in these languages.</em></p>', 'post_plugins'); 242 250 _e('<p>Some related word forms should really be counted together, e.g., "follow", 243 251 "follows", and "following". The plugin can use a <em>stemming</em> algorithm to … … 261 269 <option <?php if($options['utf8'] == 'false') { echo 'selected="selected"'; } ?> value="false">No</option> 262 270 <option <?php if($options['utf8'] == 'true') { echo 'selected="selected"'; } ?> value="true">Yes</option> 271 </select> 272 </td> 273 </tr> 274 <tr valign="top"> 275 <th scope="row"><?php _e('Treat as Chinese, Korean, or Japanese?', 'post_plugins') ?></th> 276 <td> 277 <select name="cjk" id="cjk" <?php if (!function_exists('mb_split')) echo 'disabled="true"'; ?> > 278 <option <?php if($options['cjk'] == 'false') { echo 'selected="selected"'; } ?> value="false">No</option> 279 <option <?php if($options['cjk'] == 'true') { echo 'selected="selected"'; } ?> value="true">Yes</option> 263 280 </select> 264 281 </td> … … 493 510 494 511 // sets up the index for the blog 495 function save_index_entries ($utf8=false, $use_stemmer=false, $batch=100 ) {512 function save_index_entries ($utf8=false, $use_stemmer=false, $batch=100, $cjk=false) { 496 513 global $wpdb, $table_prefix; 497 514 $table_name = $table_prefix.'similar_posts'; … … 503 520 reset($posts); 504 521 while (list($dummy, $post) = each($posts)) { 505 $content = sp_get_post_terms($post['post_content'], $utf , $use_stemmer);506 $title = sp_get_title_terms($post['post_title'], $utf , $use_stemmer);522 $content = sp_get_post_terms($post['post_content'], $utf8, $use_stemmer, $cjk); 523 $title = sp_get_title_terms($post['post_title'], $utf8, $use_stemmer, $cjk); 507 524 $postID = $post['ID']; 508 $tags = sp_get_tag_terms($postID, $utf );525 $tags = sp_get_tag_terms($postID, $utf8); 509 526 $wpdb->query("INSERT INTO `$table_name` (pID, content, title, tags) VALUES ($postID, \"$content\", \"$title\", \"$tags\")"); 510 527 $termcount = $termcount + 1; … … 631 648 if (!isset($options['utf8'])) $options['utf8'] = 'false'; 632 649 if (!function_exists('mb_internal_encoding')) $options['utf8'] = 'false'; 650 if (!isset($options['cjk'])) $options['cjk'] = 'false'; 651 if (!function_exists('mb_internal_encoding')) $options['cjk'] = 'false'; 633 652 if (!isset($options['use_stemmer'])) $options['use_stemmer'] = 'false'; 634 653 if (!isset($options['batch'])) $options['batch'] = '100'; … … 638 657 // initial creation of the index, if the table is empty 639 658 $num_index_posts = $wpdb->get_var("SELECT COUNT(*) FROM `$table_name`"); 640 if ($num_index_posts == 0) save_index_entries (($options['utf8'] === 'true'), false );659 if ($num_index_posts == 0) save_index_entries (($options['utf8'] === 'true'), false, $options['batch'], ($options['cjk'] === 'true')); 641 660 642 661 // deactivate legacy Similar Posts Feed if present similar-posts/trunk/similar-posts.php
r45217 r45253 4 4 Plugin URI: http://rmarsh.com/plugins/similar-posts/ 5 5 Description: Displays a <a href="options-general.php?page=similar-posts.php">highly configurable</a> list of related posts. Similarity can be based on any combination of word usage in the content, title, or tags. Don't be disturbed if it takes a few moments to complete the installation -- the plugin is indexing your posts. <a href="http://rmarsh.com/plugins/post-options/">Instructions and help online</a>. Requires the latest version of the <a href="http://wordpress.org/extend/plugins/post-plugin-library/">Post-Plugin Library</a> to be installed. 6 Version: 2.5b2 76 Version: 2.5b28 7 7 Author: Rob Marsh, SJ 8 8 Author URI: http://rmarsh.com/ … … 23 23 */ 24 24 25 $similar_posts_version = $similar_posts_feed_version= '2.5b2 7';25 $similar_posts_version = $similar_posts_feed_version= '2.5b28'; 26 26 27 27 /* … … 291 291 $options = get_option('similar-posts'); 292 292 $utf8 = ($options['utf8'] === 'true'); 293 $cjk = ($options['cjk'] === 'true'); 293 294 $use_stemmer = ($options['use_stemmer'] === 'true'); 294 $content = sp_get_post_terms($post['post_content'], $utf8, $use_stemmer );295 $title = sp_get_title_terms($post['post_title'], $utf8, $use_stemmer );295 $content = sp_get_post_terms($post['post_content'], $utf8, $use_stemmer, $cjk); 296 $title = sp_get_title_terms($post['post_title'], $utf8, $use_stemmer, $cjk); 296 297 $tags = sp_get_tag_terms($postID, $utf8); 297 298 //check to see if the field is set … … 340 341 } 341 342 342 function sp_get_post_terms($text, $utf8, $use_stemmer) { 343 // takes a cjk string and insert spaces between each character -- any ascii text is left unspaced 344 function sp_cjk_spacer($string) { 345 mb_internal_encoding("UTF-8"); 346 $strlen = mb_strlen($string); 347 $ascii = ''; 348 $result = array(); 349 for ($i = 0; $i < $strlen; $i++) { 350 $c = mb_substr($string, $i, 1); 351 if (strlen($c) > 1) { 352 if ($ascii) { 353 $result[] = $ascii; 354 $ascii = ''; 355 } 356 $result[] = sp_mb_str_pad($c, 4, '_'); 357 } else { 358 $ascii .= $c; 359 } 360 } 361 if ($ascii) $result[] = $ascii; 362 return implode(' ', $result); 363 } 364 365 function sp_get_post_terms($text, $utf8, $use_stemmer, $cjk) { 366 if ($cjk) return sp_cjk_spacer(sp_mb_clean_words($text)); 343 367 global $overusedwords; 344 if ($utf ) {368 if ($utf8) { 345 369 if ($use_stemmer) { 346 370 mb_regex_encoding('UTF-8'); … … 401 425 $tinywords = array('the' => 1, 'and' => 1, 'of' => 1, 'a' => 1, 'for' => 1, 'on' => 1); 402 426 403 function sp_get_title_terms($text, $utf8, $use_stemmer ) {427 function sp_get_title_terms($text, $utf8, $use_stemmer, $cjk) { 404 428 global $tinywords; 405 if ($utf) { 429 if ($cjk) return sp_cjk_spacer(sp_mb_clean_words($text)); 430 if ($utf8) { 406 431 if ($use_stemmer) { 407 432 mb_regex_encoding('UTF-8');
