{"id":225763,"date":"2021-09-16T09:32:37","date_gmt":"2021-09-16T01:32:37","guid":{"rendered":"https:\/\/gulass.cn\/?p=225763"},"modified":"2021-09-02T10:33:10","modified_gmt":"2021-09-02T02:33:10","slug":"python-linux-language","status":"publish","type":"post","link":"https:\/\/gulass.cn\/python-linux-language.html","title":{"rendered":"\u6559\u4f60\u5982\u4f55\u5b9e\u73b0Python \u8fc7\u6ee4\u654f\u611f\u8bcd"},"content":{"rendered":"\n\n\n
\u5bfc\u8bfb<\/td>\n\u672c\u6587\u4e3b\u8981\u4ecb\u7ecd\u4e86Python \u654f\u611f\u8bcd\u8fc7\u6ee4\u7684\u5b9e\u73b0\u793a\u4f8b\uff0c\u6587\u4e2d\u901a\u8fc7\u793a\u4f8b\u4ee3\u7801\u4ecb\u7ecd\u7684\u975e\u5e38\u8be6\u7ec6\uff0c\u5177\u6709\u4e00\u5b9a\u7684\u53c2\u8003\u4ef7\u503c\uff0c\u611f\u5174\u8da3\u7684\u5c0f\u4f19\u4f34\u4eec\u53ef\u4ee5\u53c2\u8003\u4e00\u4e0b<\/strong><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n
\u4e00\u4e2a\u7b80\u5355\u7684\u5b9e\u73b0<\/strong><\/div>\n

\u4e3b\u8981\u662f\u901a\u8fc7\u5faa\u73af\u548creplace\u7684\u65b9\u5f0f\u8fdb\u884c\u654f\u611f\u8bcd\u7684\u66ff\u6362<\/p>\n

\r\nclass NaiveFilter():\r\n \r\n    '''Filter Messages from keywords\r\n \r\n    very simple filter implementation\r\n \r\n    >>> f = NaiveFilter()\r\n    >>> f.parse(\"filepath\")\r\n    >>> f.filter(\"hello sexy baby\")\r\n    hello **** baby\r\n    '''\r\n \r\n    def __init__(self):\r\n        self.keywords = set([])\r\n \r\n    def parse(self, path):\r\n        for keyword in open(path):\r\n            self.keywords.add(keyword.strip().decode('utf-8').lower())\r\n \r\n    def filter(self, message, repl=\"*\"):\r\n        message = str(message).lower()\r\n        for kw in self.keywords:\r\n            message = message.replace(kw, repl)\r\n        return message<\/pre>\n
\u4f7f\u7528BSF(\u5bbd\u5ea6\u4f18\u5148\u641c\u7d22)\u8fdb\u884c\u5b9e\u73b0<\/strong><\/div>\n

\u5bf9\u4e8e\u641c\u7d22\u67e5\u627e\u8fdb\u884c\u4e86\u4f18\u5316\uff0c\u5bf9\u4e8e\u82f1\u8bed\u5355\u8bcd\uff0c\u76f4\u63a5\u8fdb\u884c\u4e86\u6309\u8bcd\u7d22\u5f15\u5b57\u5178\u67e5\u627e\u3002\u5bf9\u4e8e\u5176\u4ed6\u8bed\u8a00\u6a21\u5f0f\uff0c\u6211\u4eec\u91c7\u7528\u9010\u5b57\u7b26\u67e5\u627e\u5339\u914d\u7684\u4e00\u79cd\u6a21\u5f0f\u3002<\/p>\n

BFS\uff1a\u5bbd\u5ea6\u4f18\u5148\u641c\u7d22\u65b9\u5f0f<\/p>\n

\r\nclass BSFilter:\r\n \r\n    '''Filter Messages from keywords\r\n \r\n    Use Back Sorted Mapping to reduce replacement times\r\n \r\n    >>> f = BSFilter()\r\n    >>> f.add(\"sexy\")\r\n    >>> f.filter(\"hello sexy baby\")\r\n    hello **** baby\r\n    '''\r\n \r\n    def __init__(self):\r\n        self.keywords = []\r\n        self.kwsets = set([])\r\n        self.bsdict = defaultdict(set)\r\n        self.pat_en = re.compile(r'^[0-9a-zA-Z]+$')  # english phrase or not\r\n \r\n    def add(self, keyword):\r\n        if not isinstance(keyword, str):\r\n            keyword = keyword.decode('utf-8')\r\n        keyword = keyword.lower()\r\n        if keyword not in self.kwsets:\r\n            self.keywords.append(keyword)\r\n            self.kwsets.add(keyword)\r\n            index = len(self.keywords) - 1\r\n            for word in keyword.split():\r\n                if self.pat_en.search(word):\r\n                    self.bsdict[word].add(index)\r\n                else:\r\n                    for char in word:\r\n                        self.bsdict[char].add(index)\r\n \r\n    def parse(self, path):\r\n        with open(path, \"r\") as f:\r\n            for keyword in f:\r\n                self.add(keyword.strip())\r\n \r\n    def filter(self, message, repl=\"*\"):\r\n        if not isinstance(message, str):\r\n            message = message.decode('utf-8')\r\n        message = message.lower()\r\n        for word in message.split():\r\n            if self.pat_en.search(word):\r\n                for index in self.bsdict[word]:\r\n                    message = message.replace(self.keywords[index], repl)\r\n            else:\r\n                for char in word:\r\n                    for index in self.bsdict[char]:\r\n                        message = message.replace(self.keywords[index], repl)\r\n        return message<\/pre>\n
\u4f7f\u7528DFA(Deterministic Finite Automaton)\u8fdb\u884c\u5b9e\u73b0<\/strong><\/div>\n

DFA\u5373Deterministic Finite Automaton\uff0c\u4e5f\u5c31\u662f\u786e\u5b9a\u6709\u7a77\u81ea\u52a8\u673a\u3002<\/p>\n

\u4f7f\u7528\u4e86\u5d4c\u5957\u7684\u5b57\u5178\u6765\u5b9e\u73b0\u3002<\/p>\n

\r\nclass DFAFilter():\r\n \r\n    '''Filter Messages from keywords\r\n \r\n    Use DFA to keep algorithm perform constantly\r\n \r\n    >>> f = DFAFilter()\r\n    >>> f.add(\"sexy\")\r\n    >>> f.filter(\"hello sexy baby\")\r\n    hello **** baby\r\n    '''\r\n \r\n    def __init__(self):\r\n        self.keyword_chains = {}\r\n        self.delimit = '\\x00'\r\n \r\n    def add(self, keyword):\r\n        if not isinstance(keyword, str):\r\n            keyword = keyword.decode('utf-8')\r\n        keyword = keyword.lower()\r\n        chars = keyword.strip()\r\n        if not chars:\r\n            return\r\n        level = self.keyword_chains\r\n        for i in range(len(chars)):\r\n            if chars[i] in level:\r\n                level = level[chars[i]]\r\n            else:\r\n                if not isinstance(level, dict):\r\n                    break\r\n                for j in range(i, len(chars)):\r\n                    level[chars[j]] = {}\r\n                    last_level, last_char = level, chars[j]\r\n                    level = level[chars[j]]\r\n                last_level[last_char] = {self.delimit: 0}\r\n                break\r\n        if i == len(chars) - 1:\r\n            level[self.delimit] = 0\r\n \r\n    def parse(self, path):\r\n        with open(path,encoding='UTF-8') as f:\r\n            for keyword in f:\r\n                self.add(keyword.strip())\r\n \r\n    def filter(self, message, repl=\"*\"):\r\n        if not isinstance(message, str):\r\n            message = message.decode('utf-8')\r\n        message = message.lower()\r\n        ret = []\r\n        start = 0\r\n        while start < len(message):\r\n            level = self.keyword_chains\r\n            step_ins = 0\r\n            for char in message[start:]:\r\n                if char in level:\r\n                    step_ins += 1\r\n                    if self.delimit not in level[char]:\r\n                        level = level[char]\r\n                    else:\r\n                        ret.append(repl * step_ins)\r\n                        start += step_ins - 1\r\n                        break\r\n                else:\r\n                    ret.append(message[start])\r\n                    break\r\n            else:\r\n                ret.append(message[start])\r\n            start += 1\r\n \r\n        return ''.join(ret)<\/pre>\n

\u5230\u6b64\u8fd9\u7bc7\u5173\u4e8ePython \u654f\u611f\u8bcd\u8fc7\u6ee4\u7684\u5b9e\u73b0\u793a\u4f8b\u7684\u6587\u7ae0\u5c31\u4ecb\u7ecd\u5230\u8fd9\u4e86\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"

\u4e3b\u8981\u662f\u901a\u8fc7\u5faa\u73af\u548creplace\u7684\u65b9\u5f0f\u8fdb\u884c\u654f\u611f\u8bcd\u7684\u66ff\u6362 class NaiveFilter(): ”’Filt […]<\/p>\n","protected":false},"author":1482,"featured_media":176468,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[55],"tags":[],"class_list":["post-225763","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-thread"],"acf":[],"_links":{"self":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/225763","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/users\/1482"}],"replies":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/comments?post=225763"}],"version-history":[{"count":3,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/225763\/revisions"}],"predecessor-version":[{"id":225766,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/posts\/225763\/revisions\/225766"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/media\/176468"}],"wp:attachment":[{"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/media?parent=225763"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/categories?post=225763"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gulass.cn\/wp-json\/wp\/v2\/tags?post=225763"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}