实例介绍
【实例简介】
关键字挖掘机源码
【实例截图】
【核心代码】
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Windows.Forms;
namespace KeyWordsExc
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
Thread t1;
private void baidu_Click(object sender, EventArgs e)
{
Config.WriteIniData("KeyWord", "key", txtInputKey.Text.Trim(), Application.StartupPath @"\config\config.ini");
Config.WriteIniData("Rule", "rule", txtFilter.Text.Trim(), Application.StartupPath @"\config\config.ini");
Config.WriteIniData("MaxPage", "maxpage", txtCount.Text.Trim(), Application.StartupPath @"\config\config.ini");
Config.WriteIniData("MaxChar", "maxchar", txtMaxKey.Text.Trim(), Application.StartupPath @"\config\config.ini");
dataGridView1.Rows.Clear();
t1 = new Thread(new ThreadStart(Thread1));
t1.Priority = ThreadPriority.BelowNormal;
t1.Start();
}
private void stop_Click(object sender, EventArgs e)
{
t1.Abort();
}
public void Thread1()
{
int itotal = 0;
int num = int.Parse(txtCount.Text);//搜索条数
string sfilter = txtFilter.Text;
string[] sfilters=null;
if (sfilter.Trim() != "")
{
sfilters=sfilter.Trim().Split(',');
}
string[] arrKey = txtInputKey.Text.Trim().TrimEnd(',').Split(',');
for (int iCountKey = 0; iCountKey < arrKey.Length; iCountKey )
{
for (int ipage = 0; ipage < num; ipage )
{
string url = "http://www.baidu.com/s?wd=" arrKey[iCountKey].Trim() "&rn=" num "&pn=" ipage * 9;
string html = search(url, "utf-8");
BaiduSearch baidu = new BaiduSearch();
if (!string.IsNullOrEmpty(html))
{
int count = baidu.GetSearchCount(html);//搜索条数
if (count > 0)
{
List<Keyword> keywords = baidu.GetKeywords(html, arrKey[iCountKey].Trim());
//dataGridView1.DataSource = keywords;
for (int icout = 0; icout < keywords.Count; icout )
{
try
{
itotal ;
this.Invoke((EventHandler)delegate
{
ltotal.Text = itotal.ToString();
DataGridViewRow drs = new DataGridViewRow();
DataGridViewTextBoxCell txt_id = new DataGridViewTextBoxCell();
DataGridViewTextBoxCell txt_word = new DataGridViewTextBoxCell();
txt_id.Value = itotal;// keywords[icout].ID;
txt_word.Value = keywords[icout].KeyWord;
drs.Cells.Add(txt_id);
drs.Cells.Add(txt_word);
bool isAdd = true;
if (keywords[icout].KeyWord.Length > int.Parse(txtMaxKey.Text))
isAdd = false;
for (int ifi = 0; ifi < sfilters.Length; ifi )
{
if (keywords[icout].KeyWord.IndexOf(sfilters[ifi].ToString()) >= 0)
{
isAdd = false;
}
}
if (dataGridView1.Rows.Count > 1 && isAdd)
foreach (DataGridViewRow dgvr in dataGridView1.Rows)
{
if (dgvr.Cells[0].Value != null)
{
if (dgvr.Cells["KeyWord"].Value.ToString().Trim() == keywords[icout].KeyWord.Trim() || keywords[icout].KeyWord.Length > int.Parse(txtMaxKey.Text))
isAdd = false;
}
}
if (isAdd)
dataGridView1.Rows.Add(drs);
});
}
catch { }
}
}
}
}
}
MessageBox.Show("生成完成!");
}
private void google_Click(object sender, EventArgs e)
{
int num = 100;
string url = "http://www.google.com.hk/search?hl=zh-CN&source=hp&q=" txtInputKey.Text.Trim() "&aq=f&aqi=&aql=&oq=&num=" num "";
string html = search(url, "utf-8");
if (!string.IsNullOrEmpty(html))
{
googleSearch google = new googleSearch();
List<Keyword> keywords = google.GetKeywords(html, txtInputKey.Text.Trim());
//dataGridView1.Columns[0]..Width = 600;
dataGridView1.DataSource = keywords;
//dataGridView1.AutoSizeColumnsMode
}
}
/// <summary>
/// 搜索处理
/// </summary>
/// <param name="url">搜索网址</param>
/// <param name="Chareset">编码</param>
public string search(string url, string Chareset)
{
HttpState result = new HttpState();
Uri uri = new Uri(url);
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url);
myHttpWebRequest.UseDefaultCredentials = true;
myHttpWebRequest.ContentType = "text/html";
myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50215;)";
myHttpWebRequest.Method = "GET";
myHttpWebRequest.CookieContainer = new CookieContainer();
try
{
HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse();
// 从 ResponseStream 中读取HTML源码并格式化 add by cqp
result.Html = readResponseStream(response, Chareset);
result.CookieContainer = myHttpWebRequest.CookieContainer;
return result.Html;
}
catch (Exception ex)
{
return ex.ToString();
}
}
public string readResponseStream(HttpWebResponse response, string Chareset)
{
string result = "";
using (StreamReader responseReader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(Chareset)))
{
result = formatHTML(responseReader.ReadToEnd());
}
return result;
}
/// <summary>
/// 描述:格式化网页源码
///
/// </summary>
/// <param name="htmlContent"></param>
/// <returns></returns>
public string formatHTML(string htmlContent)
{
string result = "";
result = htmlContent.Replace("»", "").Replace(" ", "")
.Replace("©", "").Replace("/r", "").Replace("/t", "")
.Replace("/n", "").Replace("&", "&");
return result;
}
public class BaiduSearch
{
protected string uri = "http://www.baidu.com/s?wd=";
protected Encoding queryEncoding = Encoding.GetEncoding("gb2312");
protected Encoding pageEncoding = Encoding.GetEncoding("gb2312");
protected string resultPattern = @"(?<=找到相关结果[约]?)[0-9,]*?(?=个)";
public int GetSearchCount(string html)
{
int result = 0;
string searchcount = string.Empty;
Regex regex = new Regex(resultPattern);
Match match = regex.Match(html);
if (match.Success)
{
searchcount = match.Value;
}
else
{
searchcount = "0";
}
if (searchcount.IndexOf(",") > 0)
{
searchcount = searchcount.Replace(",", string.Empty);
}
int.TryParse(searchcount, out result);
return result;
}
public List<Keyword> GetKeywords(string html, string word)
{
int i = 1;
List<Keyword> keywords = new List<Keyword>();
//string ss="<h3 class=\"t\"><a.*?href=\"(?<url>.*?)\".*?>(?<content>.*?)</a>";
string ss = ">(?<content>.*?)</a></h3>";
MatchCollection mcTable = Regex.Matches(html,ss);
foreach (Match mTable in mcTable)
{
if (mTable.Success)
{
Keyword keyword = new Keyword();
keyword.ID = i ;
keyword.KeyWord = Regex.Replace(mTable.Groups["content"].Value, "<[^>]*>", string.Empty);
//keyword.Link = mTable.Groups["url"].Value;
keywords.Add(keyword);
}
}
return keywords;
}
}
public class googleSearch
{
public List<Keyword> GetKeywords(string html, string word)
{
int i = 1;
List<Keyword> keywords = new List<Keyword>();
Regex regTable = new Regex("<h3 class=\"r\"><a.*?href=\"(?<url>.*?)\".*?>(?<content>.*?)</a>", RegexOptions.IgnoreCase);
Regex regA = new Regex(@"(?is)<a/b[^>]*?href=(['""]?)(?<link>[^'""/s>] )/1[^>]*>(?<title>.*?)</a>", RegexOptions.IgnoreCase);
MatchCollection mcTable = regTable.Matches(html);
foreach (Match mTable in mcTable)
{
if (mTable.Success)
{
Keyword keyword = new Keyword();
keyword.ID = i ;
keyword.KeyWord = Regex.Replace(mTable.Groups["content"].Value, "<[^>]*>", string.Empty);
//keyword.Link = mTable.Groups["url"].Value;
keywords.Add(keyword);
}
}
return keywords;
}
}
private void btnOutPut_Click(object sender, EventArgs e)
{
SaveFileDialog saveFile1 = new SaveFileDialog();
saveFile1.Filter = "文本文件(.txt)|*.txt";
saveFile1.FilterIndex = 1;
if (saveFile1.ShowDialog() == System.Windows.Forms.DialogResult.OK && saveFile1.FileName.Length > 0)
{
System.IO.StreamWriter sw = new System.IO.StreamWriter(saveFile1.FileName, false);
try
{
string sword = "";
foreach(DataGridViewRow dr in dataGridView1.Rows)
{
sword = dr.Cells["KeyWord"].Value.ToString();
sw.WriteLine(sword);
}
}
catch
{
//throw;
}
finally
{
sw.Close();
}
}
}
private void Form1_Load(object sender, EventArgs e)
{
this.skinEngine1.SkinFile = "config\\MP10.ssk";
string sKeyword = Config.ReadIniData("KeyWord", "key", "", Application.StartupPath @"\config\config.ini");
txtInputKey.Text = sKeyword;
string sRule = Config.ReadIniData("Rule", "rule", "", Application.StartupPath @"\config\config.ini");
txtFilter.Text = sRule;
string sCount = Config.ReadIniData("MaxPage", "maxpage", "", Application.StartupPath @"\config\config.ini");
txtCount.Text=sCount;
string sMaxchar = Config.ReadIniData("MaxChar", "maxchar","", Application.StartupPath @"\config\config.ini");
txtMaxKey.Text=sMaxchar;
}
}
}
标签: 源码
小贴士
感谢您为本站写下的评论,您的评论对其它用户来说具有重要的参考价值,所以请认真填写。
- 类似“顶”、“沙发”之类没有营养的文字,对勤劳贡献的楼主来说是令人沮丧的反馈信息。
- 相信您也不想看到一排文字/表情墙,所以请不要反馈意义不大的重复字符,也请尽量不要纯表情的回复。
- 提问之前请再仔细看一遍楼主的说明,或许是您遗漏了。
- 请勿到处挖坑绊人、招贴广告。既占空间让人厌烦,又没人会搭理,于人于己都无利。
关于好例子网
本站旨在为广大IT学习爱好者提供一个非营利性互相学习交流分享平台。本站所有资源都可以被免费获取学习研究。本站资源来自网友分享,对搜索内容的合法性不具有预见性、识别性、控制性,仅供学习研究,请务必在下载后24小时内给予删除,不得用于其他任何用途,否则后果自负。基于互联网的特殊性,平台无法对用户传输的作品、信息、内容的权属或合法性、安全性、合规性、真实性、科学性、完整权、有效性等进行实质审查;无论平台是否已进行审查,用户均应自行承担因其传输的作品、信息、内容而可能或已经产生的侵权或权属纠纷等法律责任。本站所有资源不代表本站的观点或立场,基于网友分享,根据中国法律《信息网络传播权保护条例》第二十二与二十三条之规定,若资源存在侵权或相关问题请联系本站客服人员,点此联系我们。关于更多版权及免责申明参见 版权及免责申明


网友评论
我要评论