匹配关键词和敏感词

前一段时间被大佬问到如何实现一个敏感词的匹配,所以写了一个简版的敏感词匹配结构,还待后期优化!

代码地址:sensitive_words

github.com/tianye/sensitive_words

注:暂时不可以投放到生产环境使用

有喜欢的可以提交代码给我,或者拿走自己维护

希望各位大佬指点 谢谢, 关键词为网上找到的所以不是很全,望大家海涵谢谢!

使用方法:

#####1.先构建敏感关键词树 (应为树在初始化时候建立完成 后期可以直接使用,减少后期构建过程)

var tree = decision.CreateTree()

//初始化 构建 敏感词数
func init() {
    strList := [] string{
        "考前答案","答案","前答",
    }

    for _, strItem := range strList {
        //构建敏感词树
        decision.BuildTrue(strItem, tree)
    }
}

####2.匹配敏感关键词

    //等待匹配的数据
    str := "啊前答我是一个文字啦啦啦考前答案答案前答我也是文答案字啦"

    //匹配关键词
    res, loc := decision.MatchingSensitiveWords(tree, str)

    fmt.Println("\n结果:", "是否存在敏感词:", res, "敏感词每个字所在位置:", loc)

输出结果:

结果: 是否存在敏感词: true 敏感词每个字所在位置: [2 3 13 14 15 16 17 18 19 20 25 26]

使用实例: build_decision.go 文件查看,谢谢

源码实现方式:

package decision

import (
    "fmt"
    "strings"
)

type Node struct {
    Word     string
    Node     [] *Node
    Location int

    IsSensitive bool
}

type Tree struct {
    TreeNode [] *Node
}

//创建一个叶子节点
func CreateNode(word string, location int, isSensitive bool) *Node {
    return &Node{Word: word, Location: location, IsSensitive: isSensitive}
}

//创建一个树
func CreateTree() *Tree {
    tree := &Tree{}

    return tree
}

//查找一个Node节点
func SearchNode(str string, nodeList [] *Node) *Node {
    //查找当前层级的所有node
    for _, v := range nodeList {
        //存在则直接返回当前node
        if v.Word == str {
            return v
        }
    }

    return nil
}

//插入一个子节点
func AppendNode(nowNode, newNode *Node) (*Node) {
    nowNode.Node = append(nowNode.Node, newNode)
    return newNode
}

//Build一个树
func BuildTrue(str string, tree *Tree) *Tree {
    end := strings.Count(str, "") - 1
    var nowNode = &Node{}

    var i = 0
    for _, val := range str {
        i++

        isSensitive := false
        if i == end {
            isSensitive = true
        }

        newNode := CreateNode(string(val), i, isSensitive)

        if i == 1 {
            nowNode = SearchNode(newNode.Word, tree.TreeNode)
            if nowNode != nil {
                continue
            }

            tree.TreeNode = append(tree.TreeNode, newNode)
            nowNode = newNode

            continue
        }

        if nowNode.Node != nil {
            nowNode := SearchNode(newNode.Word, nowNode.Node)
            if nowNode != nil {
                continue
            }
        }

        nowNode = AppendNode(nowNode, newNode)
    }

    return tree
}

//匹配的敏感词汇
func MatchingSensitiveWords(tree *Tree, str string) (isSensitive bool, allLocationStr []int) {
    node := tree.TreeNode
    isSensitive = false
    locationStr := make([]int, 0)
    allLocationStr = make([]int, 0)

    var i = 0
    for _, v := range str {
        i++
        node, isSensitive = SearchLeavesNode(string(v), node)

        //没有下一个节点了 并且当前不是敏感词
        if node == nil && isSensitive == false {
            //节点回到最初
            node = tree.TreeNode
            //当前子重新匹配
            node, isSensitive = SearchLeavesNode(string(v), node)
            //记录新的本次匹配地址
            locationStr = []int{i}
        } else {
            //追加记录位置
            locationStr = append(locationStr, i)
        }

        //如果是敏感词则记录到位置中
        if isSensitive == true {
            node = tree.TreeNode

            allLocationStr = append(allLocationStr, locationStr...)
            locationStr = []int{}
        }
    }

    //匹配到了关键词
    if len(allLocationStr) > 0 {
        isSensitive = true
    }

    return isSensitive, allLocationStr
}

//搜索层级关键字
func SearchLeavesNode(str string, params []*Node) (node []*Node, isSensitive bool) {
    for _, node := range params {
        if node.Word == str {
            return node.Node, node.IsSensitive
        }
    }

    return nil, false
}

//观察树结构
func WatchPrint(params []*Node) {
    for _, watch := range params {
        fmt.Print("watch.Word:", " ", watch.Word, " ", watch.Location, watch.IsSensitive, "-----", watch, "\n")
        if watch.Node != nil {
            WatchPrint(watch.Node)
        }
    }
}
本作品采用《CC 协议》,转载必须注明作者和本文链接
讨论数量: 0
(= ̄ω ̄=)··· 暂无内容!

讨论应以学习和精进为目的。请勿发布不友善或者负能量的内容,与人为善,比聪明更重要!