from INN import * from email.header import decode_header import sys import string import random import os def intern(headerName): return sys.intern(headerName) Newsgroups = intern("Newsgroups") Path = intern("Path") References = intern("References") Subject = intern("Subject") UserAgent = intern("User-Agent") __BODY__ = intern("__BODY__") __LINES__ = intern("__LINES__") saved_art_path = "/usr/local/news/spool/pythai" def log(s): syslog("notice", s) def tostr(mv): return mv.tobytes().decode("utf-8") class InndFilter: def __init__(self): syslog("notice", "Filter starting") if not os.path.isdir(saved_art_path): os.mkdir(saved_art_path) def copy_spam(self, art): name = ''.join(random.choice(string.ascii_letters + string.digits) for i in range(0, 64)) path = os.path.join(saved_art_path, name) f = open(os.path.join(saved_art_path, path + ".tmp"), "wb") for key in art: if key != __BODY__ and key != __LINES__ and art[key] != None: f.write(key.encode("utf-8")) f.write(b": ") f.write(art[key].tobytes()) f.write(b"\n") f.write(b"\n") f.write(art[__BODY__].tobytes()) f.close() os.rename(path + ".tmp", path + ".to-rewrite") def filter_art(self, art): log("Article arrived") if art[References] != None: log("Whitelisting, because has references") return None if "google-groups.googlegroups.com" not in tostr(art[Path]): log("Whitelisting, because google groups not in Path") log("Path is: %s" % tostr(art[Path])) return None if tostr(art[UserAgent]) != "G2/1.0": log("Whitelisting, because User-Agent is not G2/1.0") return None if art[Subject] == None: log("Whitelisting, because Subject is empty") return None # if "pl.soc.polityka" not in tostr(art[Newsgroups]): # log("Whitelisting, because checked group is not in Newsgroups") # return None try: subjlist = decode_header(tostr(art[Subject])) if len(subjlist) == 0: log("List of tokens in subject is zero") return None if subjlist[0][1] == None: log("First subject token has no encoding") return None if subjlist[0][1].lower() != "utf-8": log("First subject token is not in UTF-8") return None subj = subjlist[0][0].decode("utf-8") except Exception as e: log("Exception when decoding Subject: %s" % e) return None for ch in subj: if ord(ch) >= 0x0e00 and ord(ch) <= 0x0eff: log("Article classified as spam") self.copy_spam(art) return "Spam not welcome" log("Article classified as ham") return None spamfilter = InndFilter() try: set_filter_hook(spamfilter) syslog('n', "spamfilter successfully hooked into INN") except Exception as errmsg: syslog('e', "Cannot obtain INN hook for spamfilter: %s" % errmsg[0])