Zend/Search/Lucene.php 相关示例

PHP语言基础

下载此实例

开发语言：PHP
实例大小：0.01M
下载次数：14
浏览次数：413
发布时间：2013-01-02
实例类别：PHP语言基础
发布人：chaogu
文件格式：.zip
所需积分：2

实例介绍

【实例简介】Zend/Search/Lucene.php 构建文件索引 Zend/Search/Lucene/Analysis/Analyzer.php 分析
【实例截图】
【核心代码】

代码一：

<?php
require_once "Zend/Search/Lucene.php";
require_once "Zend/Search/Lucene/Document.php";
//设置构造函数的第二个参数实现新的索引的创建
$index=new Zend_Search_Lucene('index',true);
//新建一个文档对象
$doc=new Zend_Search_Lucene_Document();
//将文档的路径存储在名为url的text类型的字段中
$doc->addField(Zend_Search_Lucene_Field::Text('url',$docUrl));
//索引文档内容，使用名为contents的UnStored类型字段
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents',$docContent));
//将文档对象添加到索引中
$index->addDocument($doc);
//执行索引对象的commit()方法更新索引
$index->commit();
?>

代码二：

<?php
require_once "Zend/Search/Lucene.php";					//加载Zend_Search_Lucene
//设置构造函数的第一个参数指定当前路径的index目录为索引对象
//设置构造函数的第二个参数实现新的索引的创建
$index=new Zend_Search_Lucene('index',true);
$dir=$index->getDirectory();							//获取路径
echo "索引指定的路径信息为：<p>";
print_r($dir);										//输出结果
?>

代码三：

<?php
require_once "Zend/Search/Lucene.php";					//加载Zend_Search_Lucene
//打开一个存在的索引
$index=new Zend_Search_Lucene('index');
//新建一个文档对象
$doc=new Zend_Search_Lucene_Document();
//将文档的路径存储在名为url的text类型的字段中
$doc->addField(Zend_Search_Lucene_Field::Text('url',$docUrl));
//索引文档内容，使用名为contents的UnStored类型字段
$doc->addField(Zend_Search_Lucene_Field::UnStored('contents',$docContent));
//将文档对象添加到索引中
$index->addDocument($doc);
//执行索引对象的commit()方法更新索引
$index->commit();
$result=$index->getFieldNames();						//获取字段列表
echo "\$index索引的全部字段为：<p>";
foreach($result as $value)								//遍历数组内容
{
	echo $value;									//输出结果
	echo "<p>";
}
?>

代码四：

<?
require_once 'Zend/Search/Lucene.php';							//加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test');							//打开存在的（23-7.php创建的）索引
echo "索引包括{$index->count()}个文档<p>\n";						//输出文档数
$search="beijing";											//定义查询Query
$hits=$index->find($search);									//执行find()方法进行查找，并返回结果
foreach($hits as $hit)										//使用foreach遍历结果集
{
	echo str_repeat('-', 80)."--<p>";								//输出分隔线
	echo '结果ID为：'.$hit->id."<p>";							//输出结果ID
	echo '分值为：'.sprintf('%.2f', $hit->score)."<p>";				//输出结果分值
	$document=$hit->getDocument();							//获取结果文档
	echo "标题为：".$hit->title;								//输出结果标题
	echo "<p>";
	echo $document->getFieldValue('contents');					//输出结果内容
}
?>

代码五：

<?
require_once 'Zend/Search/Lucene.php';									//加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test',true);								//新建一个名为test的索引
$data=array(
	array(
		'title'=>'hello',
		'auth'=>'h',
		'contents'=>'hello world!'
	),
	array(
		'title'=>'test',
		'auth'=>'t',
		'contents'=>'this is a test'
	),
	array(
		'title'=>'hello',
		'auth'=>'h',
		'contents'=>'hello sky!'
	)
);																//定义数组
foreach($data as $temp)												//循环插入索引中
{
	$doc=new Zend_Search_Lucene_Document();							//新建文档对象
	$doc->addField(Zend_Search_Lucene_Field::Text('title',$temp['title']));		//添加标题
	$doc->addField(Zend_Search_Lucene_Field::Text('auth',$temp['auth']));		//添加作者
	$doc->addField(Zend_Search_Lucene_Field::Text('contents',$temp['contents']));	//添加内容
	$index->addDocument($doc);										//将文档添加到索引
}
$index->commit();													//提交索引
echo $index->count()."个文档已经被索引";								//输出文档数
?>

代码六：

<?
require_once 'Zend/Search/Lucene.php';							//加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test');							//打开存在的索引
$search="world or sky";										//定义查询Query
$hits=$index->find($search);									//执行find()方法进行查找，并返回结果
echo "符合包含world或者sky的结果数为：";
echo count($hits);											//输出结果数
echo "<p>";
echo str_repeat('-',40)."<p>";								//输出分隔线
foreach($hits as $hit)										//使用foreach遍历结果集
{
	echo '结果ID为：'.$hit->id."<p>";							//输出结果ID
	echo '分值为：'.sprintf('%.2f',$hit->score)."<p>";				//输出结果分值
	$document=$hit->getDocument();							//获取结果文档
	echo "标题为：".$hit->title;								//输出结果标题
	echo "<p>";
	echo "作者为：".$hit->auth;								//输出结果作者
	echo "<p>";
	echo "内容为：".$document->getFieldValue('contents');			//输出结果内容
	echo "<p>";
}
?>

代码七：

<?
require_once 'Zend/Search/Lucene.php';							//加载Zend_Search_Lucene
$index=new Zend_Search_Lucene('test');							//打开存在的索引
$search= "title:hello AND NOT sky";								//定义查询Query
$hits=$index->find($search);									//执行find()方法进行查找，并返回结果
echo "符合标题包含hello同时内容不含sky的结果数为：";
echo count($hits);											//输出结果数
echo "<p>";
echo str_repeat('-',40)."<p>";								//输出分隔线
foreach($hits as $hit)										//使用foreach遍历结果集
{
	echo '结果ID为：'.$hit->id."<p>";							//输出结果ID
	echo '分值为：'.sprintf('%.2f',$hit->score)."<p>";				//输出结果分值
	$document=$hit->getDocument();							//获取结果文档
	echo "标题为：".$hit->title;								//输出结果标题
	echo "<p>";
	echo "作者为：".$hit->auth;								//输出结果作者
	echo "<p>";
	echo "内容为：".$document->getFieldValue('contents');			//输出结果内容
	echo "<p>";
}
?>

代码八：

<?
//设置页面编码
header("Content-Type","utf-8");
//加载Zend_Search_Lucene_Analysis_Analyzer子类
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
//自定义文本分析程序
class Phpbean_Lucene_Analyzer extends Zend_Search_Lucene_Analysis_Analyzer_Common
{
	private $_position;						//定义位置
	private $_cnStopWords=array();			//定义中文分词数组
	//设置中文分词
	public function setCnStopWords($cnStopWords)
	{
		$this->_cnStopWords=$cnStopWords;
	}
	/**
	*reset方法，重置数据
	*/
	public function reset()
	{
		$this->_position=0;					//起始位置为0
		$search=array(",", "/", "\\", ".", ";", ":", "\"", "!", "~", "`", "^", "(", ")", "?", "-", "'", "<", ">", "$", "&", "%", "#", "@", " ", "=", "{", "}", "[", "]", "：", "）", "（", "．", "。", "，", "！", "；", "“", "”", "‘", "’", "［", "］", "、", "—", "　", "《", "》", "－", "…", "【", "】","的");				//定义数组
		//替换空格
		$this->_input=str_replace($search,' ',$this->_input);
		//替换分词内容
		$this->_input=str_replace($this->_cnStopWords,' ',$this->_input); 
	}
	/** 
	*Tokenization stream API 
	*Get next token 
	*Returns null at the end of stream 
	*
	*@return Zend_Search_Lucene_Analysis_Token|null
	*/
	public function nextToken()
	{
		if ($this->_input === null)
		{
			return null;
		}
		$len=strlen($this->_input);
		while($this->_position < $len)
		{
			while ($this->_position < $len && $this->_input[$this->_position]==' ' )
			{
				$this->_position  ; 
			}
			$termStartPosition = $this->_position;
			$temp_char = $this->_input[$this->_position];
			$isCnWord=false;
			if(ord($temp_char)>127)
			{
				$i=0;
				while($this->_position < $len && ord( $this->_input[$this->_position] )>127)
				{
					$this->_position = $this->_position   3; 
					$i   ; 
					if($i==2)
					{ 
						$isCnWord = true; 
						break; 
					}
				}
				if($i==1)continue;
			}
			else
			{
				while ($this->_position < $len && ctype_alnum( $this->_input[$this->_position] ))
				{ 
					$this->_position  ;
				} 
				//echo $this->_position.":".$this->_input[$this->_position]."\n"; 
			}
			if($this->_position == $termStartPosition)
			{
				$this->_position  ;
				continue;
			}
			$token=new Zend_Search_Lucene_Analysis_Token(substr($this->_input,$termStartPosition, $this->_position - $termStartPosition),$termStartPosition,$this->_position);
			$token = $this->normalize($token);
			if($isCnWord)$this->_position = $this->_position - 3;
			if ($token !== null)
			{
				return $token;
			} 
		} 
		return null; 
	}
}
$stopWords=array('a', 'an', 'at', 'the', 'and', 'or', 'is', 'am');
$stopWordsFilter=new Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords); 
$analyzer=new Phpbean_Lucene_Analyzer();
$cnStopWords=array('的');
$analyzer->setCnStopWords($cnStopWords);
$analyzer->addFilter($stopWordsFilter);
$value='this is a 中文的内容的测试';
$analyzer->setInput($value, 'utf-8');
$position=0;
$tokenCounter=0;
while(($token=$analyzer->nextToken())!==null)
{
	$tokenCounter  ;
	$tokens[]=$token;
}
print_r($tokens);
?>

更多实例代码详见压缩包

标签： Zend Lucene

实例下载地址

Zend/Search/Lucene.php 相关示例

点此下载实例

不能下载？内容有错？点击这里报错 + 投诉 + 提问

好例子网口号：伸出你的我的手 — 分享！

网友评论

我要评论

小贴士

感谢您为本站写下的评论，您的评论对其它用户来说具有重要的参考价值，所以请认真填写。

类似“顶”、“沙发”之类没有营养的文字，对勤劳贡献的楼主来说是令人沮丧的反馈信息。
相信您也不想看到一排文字/表情墙，所以请不要反馈意义不大的重复字符，也请尽量不要纯表情的回复。
提问之前请再仔细看一遍楼主的说明，或许是您遗漏了。
请勿到处挖坑绊人、招贴广告。既占空间让人厌烦，又没人会搭理，于人于己都无利。

关于好例子网

本站旨在为广大IT学习爱好者提供一个非营利性互相学习交流分享平台。本站所有资源都可以被免费获取学习研究。本站资源来自网友分享，对搜索内容的合法性不具有预见性、识别性、控制性，仅供学习研究，请务必在下载后24小时内给予删除，不得用于其他任何用途，否则后果自负。基于互联网的特殊性，平台无法对用户传输的作品、信息、内容的权属或合法性、安全性、合规性、真实性、科学性、完整权、有效性等进行实质审查；无论平台是否已进行审查，用户均应自行承担因其传输的作品、信息、内容而可能或已经产生的侵权或权属纠纷等法律责任。本站所有资源不代表本站的观点或立场，基于网友分享，根据中国法律《信息网络传播权保护条例》第二十二与二十三条之规定，若资源存在侵权或相关问题请联系本站客服人员，点此联系我们。关于更多版权及免责申明参见版权及免责申明

Zend/Search/Lucene.php 相关示例

同类人气实例

实例介绍

实例下载地址

Zend/Search/Lucene.php 相关示例

相关软件

相关文章

网友评论

小贴士

关于好例子网

下载周排行

下载总排行