敏感词分析

发布于 2013-12-20  187 次阅读


using System;
using System.IO;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;

namespace MGC
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        string SeekResult = "";
        string notice1 = "请先在关键字设置选项卡中选择一个关键词列表,在这里放置要进行检测的文本,然后点击【开始检测】";
        /// <summary>
        /// 读取文件内容到文本框。
        /// </summary>
        public void ReadFileTotxtBox(string url, TextBox t)
        {
            if (File.Exists(@url))
            {
                FileStream fs1 = new FileStream(url, FileMode.Open);
                StreamReader sr = new StreamReader(fs1, System.Text.Encoding.GetEncoding("gb2312"));
                string str1 = sr.ReadToEnd();
                t.Text = str1;
                sr.Close();
                fs1.Close();
            }
            else
            {
                MessageBox.Show("指定的URL无法访问,请检查路径 - \"" + url + "\"", "文件无法访问", MessageBoxButtons.OK, MessageBoxIcon.Error);
            }
        }
        /// <summary>
        /// 读取文件内容到返回值。
        /// </summary>
        public string ReadFileToReturn(string url)
        {
            if (File.Exists(@url))
            {
                FileStream fs2 = new FileStream(url, FileMode.Open);
                StreamReader sr = new StreamReader(fs2, System.Text.Encoding.GetEncoding("gb2312"));
                string value = sr.ReadToEnd();
                sr.Close();
                fs2.Close();
                return (value);
            }
            else
            {
                MessageBox.Show("指定的URL无法访问,请检查路径 - \"" + url + "\"", "文件无法访问", MessageBoxButtons.OK, MessageBoxIcon.Error);
                return ("-1");
            }
        }
        /// <summary>
        /// 获取敏感词列表。
        /// </summary>
        public IList<string> GetKeyWords(string url)
        {
            string otext;
            if (txtKeyFilePreview.Text != "")
            {
                otext = txtKeyFilePreview.Text;
            }
            else
            {
                otext = ReadFileToReturn(url);
            }
            string[] o_keywords = (otext + "\r\n#####面包屑#####").Split(new char[] { '\r', '\n' });
            IList<String> listS = new List<String>(o_keywords);
            IList<string> keywords = listS.Distinct().ToList();
            keywords.Remove("");
            return keywords;
        }
        /// <summary>
        /// 向记录中输出SeekKeyWord的单条log。
        /// </summary>
        public void WriteLog(string KeyWord, long Times)
        {
            if (Times > 0)
            {
                SeekResult += "敏感词\"" + KeyWord + "\"出现了" + Times.ToString() + "次;\r\n";
            }
            else
            {
                if (ckbUncheck.Checked)
                {
                    SeekResult += "敏感词\"" + KeyWord + "\"出现了" + Times.ToString() + "次;\r\n";
                }
            }
        }
        /// <summary>
        /// 查找单个敏感词出现的次数并返回这个数量。
        /// </summary>
        public void SeekKeyword(string keyword)
        {
            string text = txtOText.Text;
            int i = 1;
            long count = 0;
            while (text.IndexOf(keyword, i) >= 0)
            {
                i = text.IndexOf(keyword, i) + 1;
                count++;
            }
            WriteLog(keyword, count);
        }
        private void txtOText_TextChanged(object sender, EventArgs e)
        {
            txtOText.Font = new System.Drawing.Font("微软雅黑", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(134)));
            txtOText.ForeColor = Color.Black;
        }
        /// <summary>
        /// 开始执行查找流程。
        /// </summary>
        private void btnStart_Click(object sender, EventArgs e)
        {
            if (File.Exists(@txtKeywordFileURL.Text)||txtKeyFilePreview.Text!="")
            {
                //隐藏开始查找按钮并显示动态GIF
                picWait.Visible = true;
                btnStart.Visible = false;
                SeekResult = "";
                //设置并生成本次查找所用的关键词列表
                IList<string> KeyWords = GetKeyWords(txtKeywordFileURL.Text);
                int Num = KeyWords.Count;
                //开始查找
                for (int i = 1; i < Num; i++)
                {
                    SeekKeyword(KeyWords[i - 1]);
                }
                picWait.Visible = false;
                btnStart.Visible = true;
                txtOutput.Font = new System.Drawing.Font("微软雅黑", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(134)));
                txtOutput.ForeColor = Color.Black;
                txtOutput.Text = SeekResult;
            }
            else
            {
                if (txtKeywordFileURL.Text == "")
                {
                    MessageBox.Show("请先加载一个敏感词文件!", "未加载敏感词文件", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                }
                else
                {
                    MessageBox.Show("指定的URL不存在或不是有效的敏感词文件,请选择其他的敏感词文件!", "敏感词文件未找到", MessageBoxButtons.OK, MessageBoxIcon.Error);
                }
            }
        }
        private void Form1_Load(object sender, EventArgs e)
        {
            txtOText.Text = notice1;
            txtOText.ForeColor = Color.Gray;
            txtOText.Font = new System.Drawing.Font("微软雅黑", 9F, System.Drawing.FontStyle.Italic, System.Drawing.GraphicsUnit.Point, ((byte)(134)));
        }
        private void txtOText_MouseClick(object sender, MouseEventArgs e)
        {
            if (txtOText.Text == notice1)
            {
                txtOText.Text = "";
                txtOText.Font = new System.Drawing.Font("微软雅黑", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(134)));
                txtOText.ForeColor = Color.Black;
            }
            /*else
            {
                if (txtOText.Text == "")
                {
                    txtOText.Text = notice1;
                    txtOText.Font = new System.Drawing.Font("微软雅黑", 9F, System.Drawing.FontStyle.Italic, System.Drawing.GraphicsUnit.Point, ((byte)(134)));
                    txtOText.ForeColor = Color.Gray;
                }
            }*/
        }
        private void txtOText_Leave(object sender, EventArgs e)
        {
            /*if (txtOText.Text == "")
            {
                txtOText.Text = notice1;
                txtOText.Font = new System.Drawing.Font("微软雅黑", 9F, System.Drawing.FontStyle.Italic, System.Drawing.GraphicsUnit.Point, ((byte)(134)));
                txtOText.ForeColor = Color.Gray;
            }*/
        }

        private void linkLabel1_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e)
        {
            if (this.dlgSelectFile.ShowDialog() == DialogResult.OK)
            {
                string FileName = this.dlgSelectFile.FileName;
                dlgSelectFile.Title = "选择要导入的文本:";
                dlgSelectFile.Filter = "文本文件(*.txt)|*.txt|所有文件|*.*";
                ReadFileTotxtBox(FileName, txtOText);
            }
        }

        private void txtKeywordFileURL_SelectedIndexChanged(object sender, EventArgs e)
        {
            string FileName = txtKeywordFileURL.SelectedItem.ToString();
            if (File.Exists(@FileName))
            {
                this.Text = "敏感词检测系统 - 列表[" + FileName + "]";
                ReadFileTotxtBox(FileName, txtKeyFilePreview);
            };
        }

        private void button2_Click_1(object sender, EventArgs e)
        {
            if (this.dlgSelectFile.ShowDialog() == DialogResult.OK)
            {
                string FileName = this.dlgSelectFile.FileName;
                txtKeywordFileURL.Text = FileName;
                txtKeywordFileURL.Items.AddRange(new object[] { FileName });
                this.Text = "敏感词检测系统 - 列表[" + FileName + "]";
                ReadFileTotxtBox(FileName, txtKeyFilePreview);
            }
        }
    }
}