ことのおこり
ひさしぶりに管理者でDokuWikiにログインしたら上のほうがうるさいの。
ぐぐる…
DokuWiki はぐぐっても情報ないんだよね…。
ちょうさ
うちのDokuWikiは ここを参考に MeCab 入れて,inc/indexer.php
に手を加えている。indexer.php
が変わると,ただコピペしているだけの身としては手が出なくなる…。
ええと,
かな?これならいけそう??
でぃふ
--- E:/dokuwiki/dokuwiki-2009-12-25c/dokuwiki-2009-12-25/inc/indexer.php Sun Jan 17 19:35:46 2010
+++ E:/dokuwiki/dokuwiki-2010-11-07a/inc/indexer.php Mon Jan 17 03:04:09 2011
@@ -1,15 +1,12 @@
<?php
/**
- * Common DokuWiki functions
+ * Functions to create the fulltext search index
*
* @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
* @author Andreas Gohr <andi@splitbrain.org>
*/
if(!defined('DOKU_INC')) die('meh.');
-require_once(DOKU_INC.'inc/io.php');
-require_once(DOKU_INC.'inc/utf8.php');
-require_once(DOKU_INC.'inc/parserutils.php');
// set the minimum token length to use in the index (note, this doesn't apply to numeric tokens)
if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',2);
@@ -308,6 +305,8 @@
}
unset($page_idx); // free memory
+ idx_saveIndexLine('title', '', $pid, p_get_first_heading($page, false));
+
$pagewords = array();
// get word usage in page
$words = idx_getPageWords($page);
@@ -413,7 +412,58 @@
return join(':',$updated)."\n";
}
+/**
+ * Get the list of lenghts indexed in the wiki
+ *
+ * Read the index directory or a cache file and returns
+ * a sorted array of lengths of the words used in the wiki.
+ *
+ * @author YoBoY <yoboy.leguesh@gmail.com>
+ */
+function idx_listIndexLengths() {
+ global $conf;
+ // testing what we have to do, create a cache file or not.
+ if ($conf['readdircache'] == 0) {
+ $docache = false;
+ } else {
+ clearstatcache();
+ if (@file_exists($conf['indexdir'].'/lengths.idx') and (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) {
+ if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ) !== false) {
+ $idx = array();
+ foreach ( $lengths as $length) {
+ $idx[] = (int)$length;
+ }
+ return $idx;
+ }
+ }
+ $docache = true;
+ }
+ if ($conf['readdircache'] == 0 or $docache ) {
+ $dir = @opendir($conf['indexdir']);
+ if($dir===false)
+ return array();
+ $idx[] = array();
+ while (($f = readdir($dir)) !== false) {
+ if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
+ $i = substr($f,1,-4);
+ if (is_numeric($i))
+ $idx[] = (int)$i;
+ }
+ }
+ closedir($dir);
+ sort($idx);
+ // we save this in a file.
+ if ($docache === true) {
+ $handle = @fopen($conf['indexdir'].'/lengths.idx','w');
+ @fwrite($handle, implode("\n",$idx));
+ @fclose($handle);
+ }
+ return $idx;
+ }
+
+ return array();
+}
/**
* Get the word lengths that have been indexed.
@@ -421,35 +471,27 @@
* Reads the index directory and returns an array of lengths
* that there are indices for.
*
- * @author Tom N Harris <tnharris@whoopdedo.org>
+ * @author YoBoY <yoboy.leguesh@gmail.com>
*/
function idx_indexLengths(&$filter){
global $conf;
- $dir = @opendir($conf['indexdir']);
- if($dir===false)
- return array();
$idx = array();
if(is_array($filter)){
- while (($f = readdir($dir)) !== false) {
- if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
- $i = substr($f,1,-4);
- if (is_numeric($i) && isset($filter[(int)$i]))
- $idx[] = (int)$i;
+ // testing if index files exists only
+ foreach ($filter as $key => $value) {
+ if (@file_exists($conf['indexdir']."/i$key.idx")) {
+ $idx[] = $key;
}
}
}else{
- // Exact match first.
- if(@file_exists($conf['indexdir']."/i$filter.idx"))
- $idx[] = $filter;
- while (($f = readdir($dir)) !== false) {
- if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
- $i = substr($f,1,-4);
- if (is_numeric($i) && $i > $filter)
- $idx[] = (int)$i;
+ $lengths = idx_listIndexLengths();
+ foreach ( $lengths as $key => $length) {
+ // we keep all the values equal or superior
+ if ((int)$length >= (int)$filter) {
+ $idx[] = $length;
}
}
}
- closedir($dir);
return $idx;
}
@@ -657,51 +699,6 @@
}
return $words;
-}
-
-/**
- * Create a pagewords index from the existing index.
- *
- * @author Tom N Harris <tnharris@whoopdedo.org>
- */
-function idx_upgradePageWords(){
- global $conf;
- $page_idx = idx_getIndex('page','');
- if (empty($page_idx)) return;
- $pagewords = array();
- $len = count($page_idx);
- for ($n=0;$n<$len;$n++){
- $pagewords[] = array();
- }
- unset($page_idx);
-
- $n=0;
- foreach (idx_indexLengths($n) as $wlen) {
- $lines = idx_getIndex('i',$wlen);
- $len = count($lines);
- for ($wid=0;$wid<$len;$wid++) {
- $wkey = "$wlen*$wid";
- foreach (explode(':',trim($lines[$wid])) as $part) {
- if($part == '') continue;
- list($doc,$cnt) = explode('*',$part);
- $pagewords[(int)$doc][] = $wkey;
- }
- }
- }
-
- $fn = $conf['indexdir'].'/pageword';
- $fh = @fopen($fn.'.tmp','w');
- if (!$fh){
- trigger_error("Failed to write word index", E_USER_ERROR);
- return false;
- }
- foreach ($pagewords as $line){
- fwrite($fh, join(':',$line)."\n");
- }
- fclose($fh);
- if($conf['fperm']) chmod($fn.'.tmp', $conf['fperm']);
- io_rename($fn.'.tmp', $fn.'.idx');
- return true;
}
//Setup VIM: ex: et ts=4 enc=utf-8 :
つづく