#! /usr/bin/python import os, sys, re, pwd, time, grp, datetime psCommand = "/bin/ps -eo uname,pid,lstart,args" killCommand = "/bin/kill -9" suCommand = "/bin/su" rmCommand = "/bin/rm" chownCommand = "/bin/chown" sendmailCommand = "/usr/sbin/sendmail -t" logFile = "/var/log/check_for_old_processes.log" killFileName = ".processes_to_be_killed" validGroups = ["students", "staff", "faculty"] ignoreUsers = ["root"] DEBUG = False # Find out if we should run in debug mode for arg in sys.argv: if (arg == "DEBUG"): DEBUG = True # Open the log file try: if (DEBUG): log = open("temp.log", "a") else: log = open(logFile, "a") except IOError: print "Could not open log file. Using /tmp/logfile.log instead." log = open("/tmp/logfile.log", "a") log.write("Started check_for_old_processes: " + str(datetime.datetime.now()) + "\n") # Parse the min and max age inputs minAge = 0 maxAge = 9999999999 if (len(sys.argv) == 1): print "Usage: check_for_old_processes.py minAgeDays maxAgeDays [DEBUG]" sys.exit(1) if (len(sys.argv) > 1): minAge = int(sys.argv[1]) if (len(sys.argv) > 2): maxAge = int(sys.argv[2]) # Build the valid user lists validUsers = [] for validGroup in validGroups: group = grp.getgrnam(validGroup) for user in group[3]: if (user not in ignoreUsers): validUsers.append(user) if (DEBUG): print "Valid users: " + str(validUsers) # For each user who is running processes check if they have any to delete killedProcesses = {} for userName in validUsers: userInfo = pwd.getpwnam(userName) fileName = userInfo[5] + "/" + killFileName try: killFile = open(fileName, "r") if (DEBUG): print "Processing kill file for user: " + userInfo[4] log.write("Processing kill file for user: " + userInfo[4] + "\n") for line in killFile: # Keep track of all the processes we've killed so we can email them killedList = [] if userName in killedProcesses: killedList = killedProcesses[userName] else: killedProcesses[userName] = killedList killedList.append(line) # Find the PID and kill it match = re.search("\s*([a-zA-Z0-9\\-]+)\s+(\d+)", line) if (match): # Make sure we execute this command as the USER not as ROOT! command = suCommand + " " + userName + " -c \"" + killCommand + " " + match.group(2) +"\"" log.write("\tKilling: " + command + "\n") if (DEBUG): print "\tCommand: " + command else: # Take any errors we get from doing the kill and add them to the log (stdin, child, stderr) = os.popen3(command, "r") for line in stderr: log.write("\t\tERROR:" + line) child.close() # Remove the old file command = rmCommand + " " + fileName os.system(command) except IOError: if (DEBUG): print "No kill file for user: " + userInfo[4] log.write("No kill file for user: " + userInfo[4] + "\n") continue # Get the list of processes userProcessMap = {} ps = os.popen(psCommand, "r") ps.readline() for line in ps: match = re.search("\s*([a-zA-Z0-9\\-]+)", line) if (match): userInfo = pwd.getpwnam(match.group(1)) userName = userInfo[0] if (userName not in validUsers): continue if userName in userProcessMap: userProcessMap[userName].append(line) else: newList = [line] userProcessMap[userName] = newList nowDate = datetime.datetime.now() oldProcesses = {} for userName in userProcessMap.keys(): if (DEBUG): print "Checking for old processess for user: " + pwd.getpwnam(userName)[4] log.write("Checking for old processes for user: " + pwd.getpwnam(userName)[4] + "\n") for proc in userProcessMap[userName]: match = re.search("\s*([a-zA-Z0-9\\-]+)\s+(\d+)\s+((Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d+\s+\d+:\d+:\d+\s+\d+) (.*)$", proc ) #(\S+ \S+ [0-9]+ [0-9]+\\:[0-9]+\\;[0-9]+ [0-9]+) (.*)", line) if (match): uid = match.group(1) pid = match.group(2) date = datetime.datetime.fromtimestamp(time.mktime(time.strptime(match.group(3)))) ageDays = (nowDate - date).days process = match.group(6) if ((ageDays > minAge) & (ageDays < maxAge)): if (DEBUG): print "\tFound processe: " + pid + " age: " + str(ageDays) + " " + process log.write("\tFound process: "+ pid + " age: " + str(ageDays) + " " + process +"\n") oldList = [] if userName in oldProcesses: oldList = oldProcesses[userName] else: oldProcesses[userName] = oldList oldList.append(proc) # Write the kill files GenerateKillFile = True for userName in oldProcesses.keys(): userInfo = pwd.getpwnam(userName) log.write("Writing kill file for " + userInfo[4] + "\n") if (DEBUG): print "Writing kill file for " + userInfo[4] fileName = userInfo[5] + "/" + killFileName if (GenerateKillFile): killFile = open(fileName, "w") for process in oldProcesses[userName]: killFile.write(process) killFile.close() os.system(chownCommand + " " + userName + " " + fileName) # Send the emails for userName in oldProcesses.keys(): userInfo = pwd.getpwnam(userName) m = "To: " + userInfo[4] +" <" + userName + "@localhost>\n" m = m + "CC: cluster-admin@localhost\n" m = m + "From: Bagels Cluster \n" m = m + "Subject: Old Processes on the Head Node\n" m = m + "\nDear " + userInfo[4] + ",\n\n" m = m + "You have multiple processes running on the bagels head node that are more than " + str(minAge) + " days old. These processes have been listed in the file " + killFileName + " in your home directory." if ((maxAge-minAge) > 365): m = m + " They will be automatically killed if you do nothing.\n\n" else: m = m + " They will be automatically killed in " + str(maxAge-minAge) + " days if you do nothing.\n\n" m = m + "If you do not want any of these processes killed simply remove them from the aforementioned file or delete the file.\n\n" m = m + "If you have any questions please check the cluster documentation at .\n\n" m = m + "Processes to be killed:\n" for process in oldProcesses[userName]: m = m + process log.write("Message sent to " + userInfo[4]+ "\n") if (DEBUG): print "Mail: " + m else: p = os.popen(sendmailCommand, "w") p.write(m); p.close() log.close()