\u4f7f\u7528DFA(Deterministic Finite Automaton)\u8fdb\u884c\u5b9e\u73b0<\/strong><\/div>\nDFA\u5373Deterministic Finite Automaton\uff0c\u4e5f\u5c31\u662f\u786e\u5b9a\u6709\u7a77\u81ea\u52a8\u673a\u3002<\/p>\n
\u4f7f\u7528\u4e86\u5d4c\u5957\u7684\u5b57\u5178\u6765\u5b9e\u73b0\u3002<\/p>\n
\r\nclass DFAFilter():\r\n \r\n '''Filter Messages from keywords\r\n \r\n Use DFA to keep algorithm perform constantly\r\n \r\n >>> f = DFAFilter()\r\n >>> f.add(\"sexy\")\r\n >>> f.filter(\"hello sexy baby\")\r\n hello **** baby\r\n '''\r\n \r\n def __init__(self):\r\n self.keyword_chains = {}\r\n self.delimit = '\\x00'\r\n \r\n def add(self, keyword):\r\n if not isinstance(keyword, str):\r\n keyword = keyword.decode('utf-8')\r\n keyword = keyword.lower()\r\n chars = keyword.strip()\r\n if not chars:\r\n return\r\n level = self.keyword_chains\r\n for i in range(len(chars)):\r\n if chars[i] in level:\r\n level = level[chars[i]]\r\n else:\r\n if not isinstance(level, dict):\r\n break\r\n for j in range(i, len(chars)):\r\n level[chars[j]] = {}\r\n last_level, last_char = level, chars[j]\r\n level = level[chars[j]]\r\n last_level[last_char] = {self.delimit: 0}\r\n break\r\n if i == len(chars) - 1:\r\n level[self.delimit] = 0\r\n \r\n def parse(self, path):\r\n with open(path,encoding='UTF-8') as f:\r\n for keyword in f:\r\n self.add(keyword.strip())\r\n \r\n def filter(self, message, repl=\"*\"):\r\n if not isinstance(message, str):\r\n message = message.decode('utf-8')\r\n message = message.lower()\r\n ret = []\r\n start = 0\r\n while start < len(message):\r\n level = self.keyword_chains\r\n step_ins = 0\r\n for char in message[start:]:\r\n if char in level:\r\n step_ins += 1\r\n if self.delimit not in level[char]:\r\n level = level[char]\r\n else:\r\n ret.append(repl * step_ins)\r\n start += step_ins - 1\r\n break\r\n else:\r\n ret.append(message[start])\r\n break\r\n else:\r\n ret.append(message[start])\r\n start += 1\r\n \r\n return ''.join(ret)<\/pre>\n\u5230\u6b64\u8fd9\u7bc7\u5173\u4e8ePython \u654f\u611f\u8bcd\u8fc7\u6ee4\u7684\u5b9e\u73b0\u793a\u4f8b\u7684\u6587\u7ae0\u5c31\u4ecb\u7ecd\u5230\u8fd9\u4e86\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"
\u4e3b\u8981\u662f\u901a\u8fc7\u5faa\u73af\u548creplace\u7684\u65b9\u5f0f\u8fdb\u884c\u654f\u611f\u8bcd\u7684\u66ff\u6362 class NaiveFilter(): ”’Filt […]<\/p>\n","protected":false},"author":1482,"featured_media":176468,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[55],"tags":[],"class_list":["post-225763","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-thread"],"acf":[],"_links":{"self":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/225763","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/users\/1482"}],"replies":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/comments?post=225763"}],"version-history":[{"count":3,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/225763\/revisions"}],"predecessor-version":[{"id":225766,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/225763\/revisions\/225766"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/media\/176468"}],"wp:attachment":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/media?parent=225763"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/categories?post=225763"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/tags?post=225763"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}