java敏感词过滤
---------
敏感词:“美元”,“中国”,“北京大学”,“北大”,“南京大学”
DFAUtils
--------
外汇实时点差https://www.fx61.com/spreads.html
```java
`import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
public class DFAUtils {
/**
* 添加敏感词到算法树
*/
public static void addSensitiveWord(String sensitiveWord) {
if (null == sensitiveWord || sensitiveWord.length() == 0) {
return;
}
char[] chars = sensitiveWord.toCharArray();
Map<Character, Map> parentMap = sensitiveWordsMap;
Map<Character, Map> current = null;
synchronized (lock) {
for (int i = 0; i < chars.length; i++) {
if (i == 0) {
if (sensitiveWordsMap.size() == 0) {
/* 添加第一个敏感词的第一个字符执行此code */
if (chars.length == 1) {
Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
sensitiveWordsMap.put(chars[0], endMap);
} else {
sensitiveWordsMap.put(chars[0], null);
}
} else {
current = parentMap.get(chars[0]);
if (null == current) {
if (chars.length == 1) {
Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
sensitiveWordsMap.put(chars[0], endMap);
break;
} else {
sensitiveWordsMap.put(chars[0], null);
}
} else {
if (chars.length == 1) {
current.put(null, null);
break;
}
}
}
} else {
if (null == current) {
Map<Character, Map> childMap = new HashMap<Character, Map>();
if (i == chars.length - 1) {
Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
childMap.put(chars[i], endMap);
parentMap.put(chars[i - 1], childMap);
break;
} else {
childMap.put(chars[i], null);
parentMap.put(chars[i - 1], childMap);
parentMap = childMap;
current = null;
}
} else {
Map<Character, Map> childMap = current.get(chars[i]);
if (null == childMap) {
if (i == chars.length - 1) {
Map<Character, Map> endMap = new HashMap<>(1);
endMap.put(null, null);
current.put(chars[i], endMap);
} else {
current.put(chars[i], null);
parentMap = current;
current = null;
}
} else {
if (i == chars.length - 1) {
childMap.put(null, null);
} else {
parentMap = current;
current = childMap;
}
}
}
}
}
}
}
/**
* 检查敏感词(找到符合敏感词则返回--单个字符敏感词前后不是中文字符才算敏感词)
*/
public static String checkSensitiveWord(String content) {
if (null == content || content.length() == 0 || sensitiveWordsMap.size() == 0) {
return null;
}
char[] chars = content.toCharArray();
boolean isContain = Boolean.FALSE;
StringBuilder sbResult = new StringBuilder();
for (int i = 0; i < chars.length; i++) {
if (sensitiveWordsMap.containsKey(chars[i])) {
Map<Character, Map> currentMap = sensitiveWordsMap.get(chars[i]);
sbResult.append(chars[i]);
if (null == currentMap) {
break;
} else {
if (currentMap.containsKey(null)) {
if (sbResult.length() == 1) {
/* 前一个字符或后一个字符是否是中文字符 */
boolean before = Boolean.FALSE;
if (i - 1 < 0) {
before = Boolean.TRUE;
} else {
if (chars[i - 1] < 13312 || chars[i - 1] > 40895) {
before = Boolean.TRUE;
}
}
boolean after = Boolean.FALSE;
if (i + 1 >= chars.length) {
after = Boolean.TRUE;
} else {
if (chars[i + 1] < 13312 || chars[i + 1] > 40895) {
after = Boolean.TRUE;
}
}
if (before && after) {
isContain = Boolean.TRUE;
break;
}
/* From当前index开始匹配是否存在敏感词 */
int j = i + 1;
for (; j < chars.length; j++) {
if (currentMap.containsKey(chars[j])) {
sbResult.append(chars[j]);
currentMap = currentMap.get(chars[j]);
if (currentMap.containsKey(null)) {
isContain = Boolean.TRUE;
break;
} else {
continue;
}
} else {
break;
}
}
} else {
isContain = Boolean.TRUE;
break;
}
} else {
/* From当前index开始匹配是否存在敏感词 */
int j = i + 1;
for (; j < chars.length; j++) {
if (currentMap.containsKey(chars[j])) {
sbResult.append(chars[j]);
currentMap = currentMap.get(chars[j]);
if (currentMap.containsKey(null)) {
isContain = Boolean.TRUE;
break;
} else {
continue;
}
} else {
break;
}
}
}
if (isContain) {
break;
} else {
sbResult.setLength(0);
}
}
}
}
if (isContain) {
return sbResult.toString();
} else {
return null;
}
}
/**
* 删除算法树的敏感词
*/
public static void delSensitiveWord(String sensitiveWord) {
if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) {
return;
}
int delIndex = 0;
char[] chars = sensitiveWord.toCharArray();
Map<Character, Map> current = sensitiveWordsMap;
synchronized (lock) {
int i = 0;
for (; i < chars.length; i++) {
if (current.containsKey(chars[i])) {
if (current.get(chars[i]).size() > 1) {
delIndex = i;
}
} else {
break;
}
current = current.get(chars[i]);
}
if (!current.containsKey(null)) {
return;
}
current = sensitiveWordsMap;
if (i == chars.length) {
for (i = 0; i < delIndex; i++) {
current = current.get(chars[i]);
}
if (i == chars.length) {
current.remove(chars[i]);
} else {
if (i == 0 && chars.length == 1) {
if (current.get(chars[i]).size() == 1) {
current.remove(chars[i]);
} else {
current.get(chars[i]).remove(null);
}
} else {
if (i + 1 == chars.length) {
current.get(chars[i]).remove(null);
} else {
current.get(chars[i]).remove(chars[i + 1]);
}
}
}
}
}
}
/**
* 获取算法树的敏感词
*/
public static LinkedList<String> getSevsitiveWords() {
LinkedList<String> listWords = new LinkedList<String>();
if (sensitiveWordsMap.size() == 0) {
return listWords;
}
StringBuilder sbWord = new StringBuilder();
getSevsitiveWords(sensitiveWordsMap, listWords, sbWord);
return listWords;
}
/**
* 算法树是否包含对应的敏感词
*/
public static boolean containSensitiveWord(String sensitiveWord) {
if (null == sensitiveWord || sensitiveWord.length() == 0 || sensitiveWordsMap.size() == 0) {
return false;
}
return sensitiveWord.equals(checkSensitiveWord(sensitiveWord));
}
/**
* 清空算法树
*/
public static void clearSensitiveWord() {
synchronized (lock) {
sensitiveWordsMap = new HashMap<Character, Map>();
}
}
/**
* 递归获取算法树的敏感词
*/
private static void getSevsitiveWords(Map<Character, Map> childMap, LinkedList<String> listWords,
StringBuilder sbWord) {
if (childMap.size() == 1 && childMap.containsKey(null)) {
listWords.add(sbWord.toString());
sbWord.setLength(sbWord.length() - 1);
return;
}
for (Map.Entry<Character, Map> entry : childMap.entrySet()) {
Character keyChar = entry.getKey();
Map<Character, Map> valueMap = entry.getValue();
if (null == keyChar) {
continue;
}
sbWord.append(keyChar);
if (valueMap.containsKey(null)) {
listWords.add(sbWord.toString());
if (valueMap.size() == 1) {
sbWord.setLength(sbWord.length() - 1);
} else {
getSevsitiveWords(valueMap, listWords, sbWord);
sbWord.setLength(sbWord.length() - 1);
}
} else {
getSevsitiveWords(valueMap, listWords, sbWord);
sbWord.setLength(sbWord.length() - 1);
}
}
}
private final static Object lock = new Object();
private static Map<Character, Map> sensitiveWordsMap = new HashMap<Character, Map>();
}`
```
DFAUtilsTest
------------
```java
import org.junit.Assert;
import org.junit.Test;
import java.util.LinkedList;
public class DFAUtilsTest {
/*==========================AddSensitiveWord-start==========================*/
@Test
public void testAddSensitiveWord01() {
DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.addSensitiveWord("中哈");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中哈");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.addSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.addSensitiveWord("人");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(4, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(5, listWords.size());
}
/*==========================AddSensitiveWord-end============================*/
/*==========================CheckSensitiveWord-start==========================*/
@Test
public void testCheckSensitiveWord01() {
DFAUtils.clearSensitiveWord();
String sencitivaWord = null;
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("大");
DFAUtils.addSensitiveWord("大学");
DFAUtils.addSensitiveWord("中中中国中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(8, listWords.size());
sencitivaWord = DFAUtils.checkSensitiveWord("滚");
Assert.assertEquals("滚", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("翻滚");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滚 ");
Assert.assertEquals("滚", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord(" 滚");
Assert.assertEquals("滚", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("体操");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("你好滚滚");
Assert.assertEquals("滚滚", sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滚你好滚");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("滚轮胎");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("你你国国");
Assert.assertEquals(null, sencitivaWord);
sencitivaWord = DFAUtils.checkSensitiveWord("中中国中中 中中中中国中中中");
Assert.assertEquals("中中中国中中中", sencitivaWord);
}
/*==========================CheckSensitiveWord-start==========================*/
/*==========================DelSensitiveWor-start==========================*/
@Test
public void testDelSensitiveWord01() {
DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("国");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord02() {
DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord03() {
DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中中");
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord(" 中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中 ");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(1, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(0, listWords.size());
}
@Test
public void testDelSensitiveWord04() {
DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("中中中111");
DFAUtils.addSensitiveWord("中中");
DFAUtils.addSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中中中111");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中中111");
DFAUtils.delSensitiveWord("中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.addSensitiveWord("中中 ");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(3, listWords.size());
DFAUtils.delSensitiveWord("中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
DFAUtils.delSensitiveWord("中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(2, listWords.size());
}
/*==========================DelSensitiveWor-end============================*/
/*==========================ContainSensitiveWord-start==========================*/
@Test
public void testContainSensitiveWord01() {
DFAUtils.clearSensitiveWord();
LinkedList<String> listWords = null;
DFAUtils.addSensitiveWord("滚");
DFAUtils.addSensitiveWord("中中中国中中中");
listWords = DFAUtils.getSevsitiveWords();
Assert.assertEquals(7, listWords.size());
Assert.assertEquals(false, DFAUtils.containSensitiveWord(" "));
Assert.assertEquals(true, DFAUtils.containSensitiveWord("操"));
}
/*==========================ContainSensitiveWord-end============================*/
}
``` |
|