ひまつぶし: DokuWiki-2010-11-07a その1

ことのおこり

ひさしぶりに管理者でDokuWikiにログインしたら上のほうがうるさいの。

ぐぐる…

DokuWiki はぐぐっても情報ないんだよね…。

ちょうさ

うちのDokuWikiはここを参考に MeCab 入れて，inc/indexer.php に手を加えている。indexer.php が変わると，ただコピペしているだけの身としては手が出なくなる…。

ええと，

```
idx_listIndexLengths()
```
が増えた。

idx_indexLengths(&$filter)

が

idx_listIndexLengths()

を使って書き換えられた。

```
idx_upgradePageWords()
```
がなくなった。

かな？これならいけそう？？

でぃふ

--- E:/dokuwiki/dokuwiki-2009-12-25c/dokuwiki-2009-12-25/inc/indexer.php    Sun Jan 17 19:35:46 2010
+++ E:/dokuwiki/dokuwiki-2010-11-07a/inc/indexer.php    Mon Jan 17 03:04:09 2011
@@ -1,15 +1,12 @@

 <?php
 /**
- * Common DokuWiki functions
+ * Functions to create the fulltext search index

  *
  * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
  * @author     Andreas Gohr <andi@splitbrain.org>
  */

 if(!defined('DOKU_INC')) die('meh.');
-require_once(DOKU_INC.'inc/io.php');
-require_once(DOKU_INC.'inc/utf8.php');
-require_once(DOKU_INC.'inc/parserutils.php');

 // set the minimum token length to use in the index (note, this doesn't apply to numeric tokens)
 if (!defined('IDX_MINWORDLENGTH')) define('IDX_MINWORDLENGTH',2);
@@ -308,6 +305,8 @@

     }
     unset($page_idx); // free memory

+    idx_saveIndexLine('title', '', $pid, p_get_first_heading($page, false));
+
     $pagewords = array();
     // get word usage in page
     $words = idx_getPageWords($page);
@@ -413,7 +412,58 @@
     return join(':',$updated)."\n";
 }

+/**
+ * Get the list of lenghts indexed in the wiki
+ *

+ * Read the index directory or a cache file and returns
+ * a sorted array of lengths of the words used in the wiki.
+ *
+ * @author YoBoY <yoboy.leguesh@gmail.com>
+ */
+function idx_listIndexLengths() {
+    global $conf;
+    // testing what we have to do, create a cache file or not.

+    if ($conf['readdircache'] == 0) {
+        $docache = false;
+    } else {
+        clearstatcache();
+        if (@file_exists($conf['indexdir'].'/lengths.idx') and (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) {
+            if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) ) !== false) {
+                $idx = array();
+                foreach ( $lengths as $length) {

+                    $idx[] = (int)$length;
+                }
+                return $idx;
+            }
+        }
+        $docache = true;
+    }

+    if ($conf['readdircache'] == 0 or $docache ) {

+        $dir = @opendir($conf['indexdir']);
+        if($dir===false)
+            return array();
+        $idx[] = array();
+        while (($f = readdir($dir)) !== false) {
+            if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
+                $i = substr($f,1,-4);
+                if (is_numeric($i))

+                    $idx[] = (int)$i;
+            }
+        }
+        closedir($dir);
+        sort($idx);
+        // we save this in a file.
+        if ($docache === true) {
+            $handle = @fopen($conf['indexdir'].'/lengths.idx','w');
+            @fwrite($handle, implode("\n",$idx));

+            @fclose($handle);
+        }
+        return $idx;
+    }
+
+    return array();
+}

 /**
  * Get the word lengths that have been indexed.

@@ -421,35 +471,27 @@
  * Reads the index directory and returns an array of lengths
  * that there are indices for.
  *
- * @author Tom N Harris <tnharris@whoopdedo.org>
+ * @author YoBoY <yoboy.leguesh@gmail.com>
  */
 function idx_indexLengths(&$filter){
     global $conf;
-    $dir = @opendir($conf['indexdir']);
-    if($dir===false)

-        return array();
     $idx = array();
     if(is_array($filter)){
-        while (($f = readdir($dir)) !== false) {
-            if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
-                $i = substr($f,1,-4);
-                if (is_numeric($i) && isset($filter[(int)$i]))
-                    $idx[] = (int)$i;

+        // testing if index files exists only
+        foreach ($filter as $key => $value) {
+            if (@file_exists($conf['indexdir']."/i$key.idx")) {
+                $idx[] = $key;
             }
         }
     }else{
-        // Exact match first.
-        if(@file_exists($conf['indexdir']."/i$filter.idx"))
-            $idx[] = $filter;

-        while (($f = readdir($dir)) !== false) {
-            if (substr($f,0,1) == 'i' && substr($f,-4) == '.idx'){
-                $i = substr($f,1,-4);
-                if (is_numeric($i) && $i > $filter)
-                    $idx[] = (int)$i;
+        $lengths = idx_listIndexLengths();

+        foreach ( $lengths as $key => $length) {
+            // we keep all the values equal or superior
+            if ((int)$length >= (int)$filter) {
+                $idx[] = $length;
             }
         }
     }
-    closedir($dir);
     return $idx;
 }


@@ -657,51 +699,6 @@
     }

     return $words;
-}
-
-/**
- * Create a pagewords index from the existing index.
- *
- * @author Tom N Harris <tnharris@whoopdedo.org>

- */
-function idx_upgradePageWords(){
-    global $conf;
-    $page_idx = idx_getIndex('page','');
-    if (empty($page_idx)) return;
-    $pagewords = array();
-    $len = count($page_idx);
-    for ($n=0;$n<$len;$n++){

-        $pagewords[] = array();
-    }
-    unset($page_idx);
-
-    $n=0;
-    foreach (idx_indexLengths($n) as $wlen) {
-        $lines = idx_getIndex('i',$wlen);
-        $len = count($lines);
-        for ($wid=0;$wid<$len;$wid++) {

-            $wkey = "$wlen*$wid";
-            foreach (explode(':',trim($lines[$wid])) as $part) {
-                if($part == '') continue;
-                list($doc,$cnt) = explode('*',$part);
-                $pagewords[(int)$doc][] = $wkey;
-            }
-        }
-    }
-

-    $fn = $conf['indexdir'].'/pageword';
-    $fh = @fopen($fn.'.tmp','w');
-    if (!$fh){
-        trigger_error("Failed to write word index", E_USER_ERROR);
-        return false;
-    }
-    foreach ($pagewords as $line){
-        fwrite($fh, join(':',$line)."\n");
-    }

-    fclose($fh);
-    if($conf['fperm']) chmod($fn.'.tmp', $conf['fperm']);
-    io_rename($fn.'.tmp', $fn.'.idx');
-    return true;
 }

 //Setup VIM: ex: et ts=4 enc=utf-8 :

つづく

ひまつぶし

2011年3月3日木曜日

DokuWiki-2010-11-07a その1

ことのおこり

ぐぐる…

ちょうさ

でぃふ

0 件のコメント:

わるさんとは

twitter

ラベル

アーカイブ