实例介绍
【实例简介】
关键字挖掘机源码
【实例截图】
【核心代码】
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.IO; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Windows.Forms; namespace KeyWordsExc { public partial class Form1 : Form { public Form1() { InitializeComponent(); } Thread t1; private void baidu_Click(object sender, EventArgs e) { Config.WriteIniData("KeyWord", "key", txtInputKey.Text.Trim(), Application.StartupPath @"\config\config.ini"); Config.WriteIniData("Rule", "rule", txtFilter.Text.Trim(), Application.StartupPath @"\config\config.ini"); Config.WriteIniData("MaxPage", "maxpage", txtCount.Text.Trim(), Application.StartupPath @"\config\config.ini"); Config.WriteIniData("MaxChar", "maxchar", txtMaxKey.Text.Trim(), Application.StartupPath @"\config\config.ini"); dataGridView1.Rows.Clear(); t1 = new Thread(new ThreadStart(Thread1)); t1.Priority = ThreadPriority.BelowNormal; t1.Start(); } private void stop_Click(object sender, EventArgs e) { t1.Abort(); } public void Thread1() { int itotal = 0; int num = int.Parse(txtCount.Text);//搜索条数 string sfilter = txtFilter.Text; string[] sfilters=null; if (sfilter.Trim() != "") { sfilters=sfilter.Trim().Split(','); } string[] arrKey = txtInputKey.Text.Trim().TrimEnd(',').Split(','); for (int iCountKey = 0; iCountKey < arrKey.Length; iCountKey ) { for (int ipage = 0; ipage < num; ipage ) { string url = "http://www.baidu.com/s?wd=" arrKey[iCountKey].Trim() "&rn=" num "&pn=" ipage * 9; string html = search(url, "utf-8"); BaiduSearch baidu = new BaiduSearch(); if (!string.IsNullOrEmpty(html)) { int count = baidu.GetSearchCount(html);//搜索条数 if (count > 0) { List<Keyword> keywords = baidu.GetKeywords(html, arrKey[iCountKey].Trim()); //dataGridView1.DataSource = keywords; for (int icout = 0; icout < keywords.Count; icout ) { try { itotal ; this.Invoke((EventHandler)delegate { ltotal.Text = itotal.ToString(); DataGridViewRow drs = new DataGridViewRow(); DataGridViewTextBoxCell txt_id = new DataGridViewTextBoxCell(); DataGridViewTextBoxCell txt_word = new DataGridViewTextBoxCell(); txt_id.Value = itotal;// keywords[icout].ID; txt_word.Value = keywords[icout].KeyWord; drs.Cells.Add(txt_id); drs.Cells.Add(txt_word); bool isAdd = true; if (keywords[icout].KeyWord.Length > int.Parse(txtMaxKey.Text)) isAdd = false; for (int ifi = 0; ifi < sfilters.Length; ifi ) { if (keywords[icout].KeyWord.IndexOf(sfilters[ifi].ToString()) >= 0) { isAdd = false; } } if (dataGridView1.Rows.Count > 1 && isAdd) foreach (DataGridViewRow dgvr in dataGridView1.Rows) { if (dgvr.Cells[0].Value != null) { if (dgvr.Cells["KeyWord"].Value.ToString().Trim() == keywords[icout].KeyWord.Trim() || keywords[icout].KeyWord.Length > int.Parse(txtMaxKey.Text)) isAdd = false; } } if (isAdd) dataGridView1.Rows.Add(drs); }); } catch { } } } } } } MessageBox.Show("生成完成!"); } private void google_Click(object sender, EventArgs e) { int num = 100; string url = "http://www.google.com.hk/search?hl=zh-CN&source=hp&q=" txtInputKey.Text.Trim() "&aq=f&aqi=&aql=&oq=&num=" num ""; string html = search(url, "utf-8"); if (!string.IsNullOrEmpty(html)) { googleSearch google = new googleSearch(); List<Keyword> keywords = google.GetKeywords(html, txtInputKey.Text.Trim()); //dataGridView1.Columns[0]..Width = 600; dataGridView1.DataSource = keywords; //dataGridView1.AutoSizeColumnsMode } } /// <summary> /// 搜索处理 /// </summary> /// <param name="url">搜索网址</param> /// <param name="Chareset">编码</param> public string search(string url, string Chareset) { HttpState result = new HttpState(); Uri uri = new Uri(url); HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(url); myHttpWebRequest.UseDefaultCredentials = true; myHttpWebRequest.ContentType = "text/html"; myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50215;)"; myHttpWebRequest.Method = "GET"; myHttpWebRequest.CookieContainer = new CookieContainer(); try { HttpWebResponse response = (HttpWebResponse)myHttpWebRequest.GetResponse(); // 从 ResponseStream 中读取HTML源码并格式化 add by cqp result.Html = readResponseStream(response, Chareset); result.CookieContainer = myHttpWebRequest.CookieContainer; return result.Html; } catch (Exception ex) { return ex.ToString(); } } public string readResponseStream(HttpWebResponse response, string Chareset) { string result = ""; using (StreamReader responseReader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(Chareset))) { result = formatHTML(responseReader.ReadToEnd()); } return result; } /// <summary> /// 描述:格式化网页源码 /// /// </summary> /// <param name="htmlContent"></param> /// <returns></returns> public string formatHTML(string htmlContent) { string result = ""; result = htmlContent.Replace("»", "").Replace(" ", "") .Replace("©", "").Replace("/r", "").Replace("/t", "") .Replace("/n", "").Replace("&", "&"); return result; } public class BaiduSearch { protected string uri = "http://www.baidu.com/s?wd="; protected Encoding queryEncoding = Encoding.GetEncoding("gb2312"); protected Encoding pageEncoding = Encoding.GetEncoding("gb2312"); protected string resultPattern = @"(?<=找到相关结果[约]?)[0-9,]*?(?=个)"; public int GetSearchCount(string html) { int result = 0; string searchcount = string.Empty; Regex regex = new Regex(resultPattern); Match match = regex.Match(html); if (match.Success) { searchcount = match.Value; } else { searchcount = "0"; } if (searchcount.IndexOf(",") > 0) { searchcount = searchcount.Replace(",", string.Empty); } int.TryParse(searchcount, out result); return result; } public List<Keyword> GetKeywords(string html, string word) { int i = 1; List<Keyword> keywords = new List<Keyword>(); //string ss="<h3 class=\"t\"><a.*?href=\"(?<url>.*?)\".*?>(?<content>.*?)</a>"; string ss = ">(?<content>.*?)</a></h3>"; MatchCollection mcTable = Regex.Matches(html,ss); foreach (Match mTable in mcTable) { if (mTable.Success) { Keyword keyword = new Keyword(); keyword.ID = i ; keyword.KeyWord = Regex.Replace(mTable.Groups["content"].Value, "<[^>]*>", string.Empty); //keyword.Link = mTable.Groups["url"].Value; keywords.Add(keyword); } } return keywords; } } public class googleSearch { public List<Keyword> GetKeywords(string html, string word) { int i = 1; List<Keyword> keywords = new List<Keyword>(); Regex regTable = new Regex("<h3 class=\"r\"><a.*?href=\"(?<url>.*?)\".*?>(?<content>.*?)</a>", RegexOptions.IgnoreCase); Regex regA = new Regex(@"(?is)<a/b[^>]*?href=(['""]?)(?<link>[^'""/s>] )/1[^>]*>(?<title>.*?)</a>", RegexOptions.IgnoreCase); MatchCollection mcTable = regTable.Matches(html); foreach (Match mTable in mcTable) { if (mTable.Success) { Keyword keyword = new Keyword(); keyword.ID = i ; keyword.KeyWord = Regex.Replace(mTable.Groups["content"].Value, "<[^>]*>", string.Empty); //keyword.Link = mTable.Groups["url"].Value; keywords.Add(keyword); } } return keywords; } } private void btnOutPut_Click(object sender, EventArgs e) { SaveFileDialog saveFile1 = new SaveFileDialog(); saveFile1.Filter = "文本文件(.txt)|*.txt"; saveFile1.FilterIndex = 1; if (saveFile1.ShowDialog() == System.Windows.Forms.DialogResult.OK && saveFile1.FileName.Length > 0) { System.IO.StreamWriter sw = new System.IO.StreamWriter(saveFile1.FileName, false); try { string sword = ""; foreach(DataGridViewRow dr in dataGridView1.Rows) { sword = dr.Cells["KeyWord"].Value.ToString(); sw.WriteLine(sword); } } catch { //throw; } finally { sw.Close(); } } } private void Form1_Load(object sender, EventArgs e) { this.skinEngine1.SkinFile = "config\\MP10.ssk"; string sKeyword = Config.ReadIniData("KeyWord", "key", "", Application.StartupPath @"\config\config.ini"); txtInputKey.Text = sKeyword; string sRule = Config.ReadIniData("Rule", "rule", "", Application.StartupPath @"\config\config.ini"); txtFilter.Text = sRule; string sCount = Config.ReadIniData("MaxPage", "maxpage", "", Application.StartupPath @"\config\config.ini"); txtCount.Text=sCount; string sMaxchar = Config.ReadIniData("MaxChar", "maxchar","", Application.StartupPath @"\config\config.ini"); txtMaxKey.Text=sMaxchar; } } }
标签: 源码
小贴士
感谢您为本站写下的评论,您的评论对其它用户来说具有重要的参考价值,所以请认真填写。
- 类似“顶”、“沙发”之类没有营养的文字,对勤劳贡献的楼主来说是令人沮丧的反馈信息。
- 相信您也不想看到一排文字/表情墙,所以请不要反馈意义不大的重复字符,也请尽量不要纯表情的回复。
- 提问之前请再仔细看一遍楼主的说明,或许是您遗漏了。
- 请勿到处挖坑绊人、招贴广告。既占空间让人厌烦,又没人会搭理,于人于己都无利。
关于好例子网
本站旨在为广大IT学习爱好者提供一个非营利性互相学习交流分享平台。本站所有资源都可以被免费获取学习研究。本站资源来自网友分享,对搜索内容的合法性不具有预见性、识别性、控制性,仅供学习研究,请务必在下载后24小时内给予删除,不得用于其他任何用途,否则后果自负。基于互联网的特殊性,平台无法对用户传输的作品、信息、内容的权属或合法性、安全性、合规性、真实性、科学性、完整权、有效性等进行实质审查;无论平台是否已进行审查,用户均应自行承担因其传输的作品、信息、内容而可能或已经产生的侵权或权属纠纷等法律责任。本站所有资源不代表本站的观点或立场,基于网友分享,根据中国法律《信息网络传播权保护条例》第二十二与二十三条之规定,若资源存在侵权或相关问题请联系本站客服人员,点此联系我们。关于更多版权及免责申明参见 版权及免责申明
网友评论
我要评论