ことのおこり
ひさしぶりに管理者でDokuWikiにログインしたら上のほうがうるさいの。
ぐぐる…
DokuWiki はぐぐっても情報ないんだよね…。
ちょうさ
うちのDokuWikiは ここを参考に MeCab 入れて,inc/indexer.php
に手を加えている。indexer.php
が変わると,ただコピペしているだけの身としては手が出なくなる…。
ええと,
が増えた。idx_listIndexLengths()
がidx_indexLengths(&$filter)
を使って書き換えられた。idx_listIndexLengths()
がなくなった。idx_upgradePageWords()
かな?これならいけそう??
でぃふ
--- E:/dokuwiki/dokuwiki-2009-12-25c/dokuwiki-2009-12-25/inc/indexer.php Sun Jan 17 19:35:46 2010 +++ E:/dokuwiki/dokuwiki-2010-11-07a/inc/indexer.php Mon Jan 17 03:04:09 2011 @@ -1,15 +1,12 @@
<?php /** - * Common DokuWiki functions + * Functions to create the fulltext search index * * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) * @author Andreas Gohr <andi@splitbrain.org> */ if(!defined('DOKU_INC')) die('meh.'); -require_once(DOKU_INC.'inc/io.php'); -require_once(DOKU_INC.'inc/utf8.php'); -require_once(DOKU_INC.'inc/parserutils.php'); // set the minimum token length to use in the index (note, this doesn't apply to numeric tokens) if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',2); @@ -308,6 +305,8 @@ } unset($page_idx); // free memory + idx_saveIndexLine('title', '', $pid, p_get_first_heading($page, false)); + $pagewords = array(); // get word usage in page $words = idx_getPageWords($page); @@ -413,7 +412,58 @@ return join(':',$updated)."\n"; } +/** + * Get the list of lenghts indexed in the wiki + * + * Read the index directory or a cache file and returns + * a sorted array of lengths of the words used in the wiki. + * + * @author YoBoY <yoboy.leguesh@gmail.com> + */ +function idx_listIndexLengths() { + global $conf; + // testing what we have to do, create a cache file or not. + if ($conf['readdircache'] == 0) { + $docache = false; + } else { + clearstatcache(); + if (@file_exists($conf['indexdir'].'/lengths.idx') and (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) { + if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ) !== false) { + $idx = array(); + foreach ( $lengths as $length) { + $idx[] = (int)$length; + } + return $idx; + } + } + $docache = true; + } + if ($conf['readdircache'] == 0 or $docache ) { + $dir = @opendir($conf['indexdir']); + if($dir===false) + return array(); + $idx[] = array(); + while (($f = readdir($dir)) !== false) { + if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ + $i = substr($f,1,-4); + if (is_numeric($i)) + $idx[] = (int)$i; + } + } + closedir($dir); + sort($idx); + // we save this in a file. + if ($docache === true) { + $handle = @fopen($conf['indexdir'].'/lengths.idx','w'); + @fwrite($handle, implode("\n",$idx)); + @fclose($handle); + } + return $idx; + } + + return array(); +} /** * Get the word lengths that have been indexed. @@ -421,35 +471,27 @@ * Reads the index directory and returns an array of lengths * that there are indices for. * - * @author Tom N Harris <tnharris@whoopdedo.org> + * @author YoBoY <yoboy.leguesh@gmail.com> */ function idx_indexLengths(&$filter){ global $conf; - $dir = @opendir($conf['indexdir']); - if($dir===false) - return array(); $idx = array(); if(is_array($filter)){ - while (($f = readdir($dir)) !== false) { - if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ - $i = substr($f,1,-4); - if (is_numeric($i) && isset($filter[(int)$i])) - $idx[] = (int)$i; + // testing if index files exists only + foreach ($filter as $key => $value) { + if (@file_exists($conf['indexdir']."/i$key.idx")) { + $idx[] = $key; } } }else{ - // Exact match first. - if(@file_exists($conf['indexdir']."/i$filter.idx")) - $idx[] = $filter; - while (($f = readdir($dir)) !== false) { - if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){ - $i = substr($f,1,-4); - if (is_numeric($i) && $i > $filter) - $idx[] = (int)$i; + $lengths = idx_listIndexLengths(); + foreach ( $lengths as $key => $length) { + // we keep all the values equal or superior + if ((int)$length >= (int)$filter) { + $idx[] = $length; } } } - closedir($dir); return $idx; } @@ -657,51 +699,6 @@ } return $words; -} - -/** - * Create a pagewords index from the existing index. - * - * @author Tom N Harris <tnharris@whoopdedo.org> - */ -function idx_upgradePageWords(){ - global $conf; - $page_idx = idx_getIndex('page',''); - if (empty($page_idx)) return; - $pagewords = array(); - $len = count($page_idx); - for ($n=0;$n<$len;$n++){ - $pagewords[] = array(); - } - unset($page_idx); - - $n=0; - foreach (idx_indexLengths($n) as $wlen) { - $lines = idx_getIndex('i',$wlen); - $len = count($lines); - for ($wid=0;$wid<$len;$wid++) { - $wkey = "$wlen*$wid"; - foreach (explode(':',trim($lines[$wid])) as $part) { - if($part == '') continue; - list($doc,$cnt) = explode('*',$part); - $pagewords[(int)$doc][] = $wkey; - } - } - } - - $fn = $conf['indexdir'].'/pageword'; - $fh = @fopen($fn.'.tmp','w'); - if (!$fh){ - trigger_error("Failed to write word index", E_USER_ERROR); - return false; - } - foreach ($pagewords as $line){ - fwrite($fh, join(':',$line)."\n"); - } - fclose($fh); - if($conf['fperm']) chmod($fn.'.tmp', $conf['fperm']); - io_rename($fn.'.tmp', $fn.'.idx'); - return true; } //Setup VIM: ex: et ts=4 enc=utf-8 :
つづく
0 件のコメント:
コメントを投稿