#!/usr/bin/env python # # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import os import re import datetime import sys import string from optparse import OptionParser from datetime import datetime, timedelta import shutil def showUsage(): log("./processTests.py [-b|--broker-log-dir] [-t|--test-dir] ") ACCESS="Access" MODIFY="Modify" BROKER_LOG="broker.log" BROKER_PID="broker.pid" BROKER_CPU="broker_cpu.log" BROKER_CPU_DATED="broker_cpu.log.dated" BROKER_STATS="broker.stats" BROKER_GC="gc.log" GRAPH_DATA="graph.data" _verbose = False _debug = False _brokerLogs = "" def exitError(message): log(message) sys.exit(1) def main(): global _log, _verbose, _debug, _brokerLogs # Load the parser = OptionParser() parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="enable verbose output") parser.add_option("-d", "--debug", dest="debug", action="store_true", default=False, help="enable debug output") parser.add_option("-b", "--broker-log-dir", dest="brokerLogs", action="store", default=True, help="Broker Logs") parser.add_option("-t", "--test-dir", dest="testDir", action="store", default="", help="Test Results") (options, args) = parser.parse_args() _verbose = options.verbose _debug = options.debug testDir = options.testDir _brokerLogs = options.brokerLogs if testDir == "" or _brokerLogs == "" : log("Broker Log Dir and Test Dir are both requried.") showUsage() if not os.path.exists(testDir): exitError("Test directory does not exist:" + testDir) if not os.path.exists(_brokerLogs): exitError("Broker log directory does not exist:" + _brokerLogs) # Standardize the format of the broker logs preProcessBrokerLogs(_brokerLogs) # Get list of test results from test_dir processTestResults(testDir) # # Process the log files we know of # def preProcessBrokerLogs(resultDir): print "Pre Processing Broker Logs" # Pre-Process GC - no pre processing required # Process Log4j - no processing required as file is already time stamped. # Pre-Process broker_cpu processCPUUsage(resultDir) # # Process the broker CPU log file and create an output file of format # # # def processCPUUsage(resultDir): logfile=resultDir+os.sep+BROKER_CPU datedFile=resultDir+os.sep+BROKER_CPU_DATED start = extractTime(ACCESS, logfile+".stat") pid = getPID(BROKER_PID) topRate = getFirstLine(_brokerLogs+os.sep+"top.rate") # # Calulate addition required per process line output # if topRate.find(".") == -1: seconds = topRate millis = 0 else: split = topRate.split('.') seconds = split[0] # Convert millis = float("0."+split[1]) * 1000 offset = timedelta(seconds=int(seconds),milliseconds=int(millis)) # # Process the CPU log file and make a file of format: # datetime # # Open log CPU file for reading logfile = open(logfile, "r") # Open the output file, erasing any existing version # Keep track of the min/max sum and entries,. minCPU=float(sys.maxint) maxCPU=0.0 minMem=float(sys.maxint) maxMem=0.0 entries=0 sumCPU=0.0 sumMem=0.0 output= open(datedFile, "w") for line in logfile: # # Data format # 0 1 2 3 4 5 6 7 8 9 10 11 # PID USER PR NI %CPU TIME+ %MEM VIRT RES SHR S COMMAND # PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND # # %CPU and %MEM are vary, probably based on os/version of top. # lets auto-detect where it is. # # Index is 0 based for array usage. index = 0 if line.find("PID") != -1: for key in line.split(" "): strippedKey = key.lstrip() if len(strippedKey) > 0: # Record the key index if (strippedKey == "%CPU"): cpuIndex=index if (strippedKey == "%MEM"): memIndex=index # Increase count for next key index = index + 1 # Find lines that contain our broker process if line.find("QPBRKR") != -1: # Split line on whitespace data = line.split() #Write out the date time (ISO-8601 format) output.write(str(start)) # Output the %CPU value output.write(" "+str(data[cpuIndex])) # Output the %MEM value output.write(" "+str(data[memIndex])) output.write('\n') # Add the offset based on the logging rate start = start + offset # Record entires entries = entries + 1 # Record Metrics # Record CPU data cpu = float(data[cpuIndex]) if (cpu < minCPU): minCPU = cpu if (cpu > maxCPU): maxCPU = cpu sumCPU = sumCPU + cpu # Record Mem data mem = float(data[memIndex]) if (mem < minMem): minMem = mem if (mem > maxMem): maxMem = mem sumMem = sumMem + mem #end for # Close the files logfile.close output.close # Output stats file statFile=resultDir+os.sep+BROKER_CPU+".stats" output= open(statFile, "w") output.write("#type:min/max/avg") output.write('\n') output.write("CPU:"+str(minCPU)+"/"+str(maxCPU)+"/"+str(float(sumCPU)/float(entries))) output.write('\n') output.write("MEM:"+str(minMem)+"/"+str(maxMem)+"/"+str(float(sumMem)/float(entries))) output.write('\n') output.close log("Pre Process of CPU Log file '"+BROKER_CPU+"' complete") # # Give an known process type get the recorded PID. # def getPID(process): return getFirstLine(_brokerLogs+os.sep+process) # # Get the first line of the file without EOL chars. # NOTE: this will load the entire file into memory to do it. # def getFirstLine(fileName): f = open(fileName,"r") line = f.read().splitlines()[0] f.close return line # # Walk the directory given and process all csv test results # def processTestResults(resultDir): for root, dirs, files in os.walk(resultDir, topdown=False): if len(files) == 0: exitError("Test result directory is empty:" + resultDir) for file in files: if file.endswith(".csv"): processTestResult(root , file) def processTestResult(root, resultFile): # Open stat file and extract test times, we determine: # -start time based on the 'Access' value # -end time based on the 'Modify' value 'Change' would also work statFile=root+os.sep+resultFile+".stat" if not os.path.exists(statFile): log("Unable to process : Unable to open stat file:" + statFile) return createResultSetPackage(root, resultFile) def extractTime(field, statFile): stats = open(statFile, "r") for line in stats: if line.startswith(field): if line.find("(") == -1: dt = lineToDate(" ".join(line.split()[1:])) # # TODO We need to handle time time zone issues as I'm sure we will have issues with the # log4j matching. stats.close return dt # # Given a text line in ISO format convert it to a date object # def lineToDate(line): #2009-06-22 17:04:44,320 #2009-06-22 17:04:44.320 pattern = re.compile(r'(?P^[0-9][0-9][0-9][0-9])-(?P[0-9][0-9])-(?P[0-9][0-9]) (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])') m = pattern.match(line) if m: year = int(m.group('year')) month = int(m.group('month')) day = int(m.group('day')) hour = int(m.group('hour')) minute = int(m.group('minute')) seconds = int(m.group('seconds')) pattern = re.compile(r'(?P^[0-9][0-9][0-9][0-9])-(?P[0-9][0-9])-(?P[0-9][0-9]) (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])[.|,](?P[0-9]+)') m = pattern.match(line) micro = None if m: micro = m.group('micro') if micro == None: micro = 0 # Correct issue where micros are actually nanos if int(micro) > 999999: micro = int(micro) / 1000 return datetime(year,month,day,hour,minute,seconds,int(micro)) else: # Error we shouldn't get here return null def createResultSetPackage(root, resultFile): # Get the Name of the test to make a directory with said name testName = resultFile.split(".csv")[0] resultDir = root+ os.sep + testName log("Processing Result set for:"+ testName) mkdir(resultDir) # Move result file to new directory shutil.move(root + os.sep + resultFile, resultDir) # Move stat file to new directory shutil.move(root + os.sep + resultFile + ".stat", resultDir) statFile=resultDir + os.sep + resultFile + ".stat" # # Get start and end time for test run # start = extractTime(ACCESS, statFile) end = extractTime(MODIFY, statFile) sliceBrokerLogs(resultDir, start, end) createGraphData(resultDir, testName) createTestStatData(resultDir, testName) log("Created Result Package for:"+ testName) def sliceBrokerLogs(resultDir, start, end): sliceCPULog(resultDir, start, end) sliceLog4j(resultDir, start, end) sliceGCLog(resultDir, start, end) def sliceCPULog(resultDir, start, end): global _brokerLogs logfilePath=_brokerLogs+os.sep+BROKER_CPU_DATED cpuSliceFile=resultDir+os.sep+BROKER_CPU # Process the CPU log file and make a file of format: # datetime # # Open log CPU file for reading logFile = open(logfilePath, "r") # Open the output file, erasing any existing version # Keep track of the min/max sum and entries,. minCPU=float(sys.maxint) maxCPU=0.0 minMem=float(sys.maxint) maxMem=0.0 entries=0 sumCPU=0.0 sumMem=0.0 # # Create outputfile # cpuslice = open(cpuSliceFile,"w") for line in logFile: data = line.split() # # //fixme remove tz addition. # lineTime = lineToDate(" ".join(data[0:2])+" +0000") if lineTime > start: if lineTime < end: # Write the data though to the new file cpuslice.writelines(line) # Perform stat processing for the min/max/avg data = line.split() # # Data format is # <%CPU> <%MEM> # 2010-02-19 10:16:17 157 28.1 # cpuIndex = 2 memIndex = 3 # Record entires entries = entries + 1 # Record Metrics # Record CPU data cpu = float(data[cpuIndex]) if (cpu < minCPU): minCPU = cpu if (cpu > maxCPU): maxCPU = cpu sumCPU = sumCPU + cpu # Record Mem data mem = float(data[memIndex]) if (mem < minMem): minMem = mem if (mem > maxMem): maxMem = mem sumMem = sumMem + mem logFile.close() cpuslice.close() log("Sliced CPU log") # Output stats file statFile=cpuSliceFile+".stats" output= open(statFile, "w") output.write("#type:min/max/avg") output.write('\n') output.write("CPU:"+str(minCPU)+"/"+str(maxCPU)+"/"+str(float(sumCPU)/float(entries))) output.write('\n') output.write("MEM:"+str(minMem)+"/"+str(maxMem)+"/"+str(float(sumMem)/float(entries))) output.write('\n') output.close log("Generated stat data from CPU Log file") def sliceGCLog(resultDir, start, end): global _brokerLogs logfilePath=_brokerLogs+os.sep+BROKER_GC sliceFile=resultDir+os.sep+BROKER_GC gcstart = extractTime(ACCESS, logfilePath+".stat") # Open the output file, erasing any existing version # Keep track of the min/max sum and entries,. minGCDuration=float(sys.maxint) maxGCDuration=0.0 sumGCDuration=0.0 entriesGCDuration = 0 # Open log GC file for reading logFile = open(logfilePath, "r") # Open the output file, erasing any existing version output= open(sliceFile, "w") # Use a regular expression to pull out the Seconds.Millis values from the # Start of the gc log line. pattern = re.compile(r'(?P^[0-9]+)\.(?P[0-9]+):') for line in logFile: m = pattern.match(line) if m: seconds = m.group('seconds'); millis = m.group('millis'); offset = timedelta(seconds=int(seconds),milliseconds=int(millis)) lineTime = gcstart + offset if lineTime > start: if lineTime < end: output.writelines(line) # Perform stat processing for the min/max/avg # Process GC Duration lines in ParNew gc , # ensure we do not have CMS printed as that means the line line has been corrupted if line.find("ParNew") != -1 & line.find("CMS") == -1: # # Example data line # 7.646: [GC 7.646: [ParNew: 14778K->461K(14784K), 0.0026610 secs] 49879K->36609K(73288K), 0.0027560 secs] [Times: user=0.01 sys=0.00, real=0.01 secs] # # So entry 5 is the ParNew time and 8 is the whole GC cycle. 14 entries total data = line.split() gcTime = 0 # Check we have a valid ParNew Line if (len(data) == 15): # Record entires # Record GC Duration data entriesGCDuration = entriesGCDuration + 1 gcTime = float(data[8]) if (gcTime < minGCDuration): minGCDuration = gcTime if (gcTime > maxGCDuration): maxGCDuration = gcTime sumGCDuration = sumGCDuration + gcTime # Close the files logFile.close output.close() log("Sliced gc log") # Output stats file statFile=sliceFile+".stats" output= open(statFile, "w") output.write("#type:min/max/avg") output.write('\n') # # Only provide GCDuration if it was processed # output.write("GC_DUR:%.14f/%.14f/%.14f" % (minGCDuration, maxGCDuration , (sumGCDuration/float(entriesGCDuration)))) output.write('\n') output.close log("Generated stat data from CPU Log file") def sliceLog4j(resultDir, start, end): global _brokerLogs logfilePath=_brokerLogs+os.sep+BROKER_LOG log4jSliceFile=resultDir+os.sep+BROKER_LOG log4jstart = extractTime(ACCESS, logfilePath+".stat") # # Say that first line is the start of the file, # This value will give a time value to the initial # logging before Log4j kicks in. # lineTime = log4jstart # Process the broker log4j file # Open log CPU file for reading logFile = open(logfilePath, "r") # # Create outputfile # log4jslice = open(log4jSliceFile,"w") for line in logFile: data = line.split() # # If the line has a time at the start then process it # otherwise use the previous time. This means if there is # a stack trace in the middle of the log file then it will # be copied over to the split file as long as it is in the # split time. # if (hasTime(data)): # # //fixme remove tz addition. # lineTime = lineToDate(" ".join(data[0:2])+" +0000") if lineTime > start: if lineTime < end: print line log4jslice.writelines(line) logFile.close() log4jslice.close() log("Sliced broker log") # # Check the first two entries of data can make a datetime object # def hasTime(data): date = data[0] time = data[1] # Examples: # 2009-06-22 17:04:44,246 # 2009-06-22 17:04:44.2464 # 2009-06-22 17:04:44 # ISO-8601 '-' format date dateRE = re.compile('[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]') # # Check for times with/out millis # e.g. # 10:00:00,000 - log4j # 10:00:00.0000 - generated in script for cpu time # timeRE = re.compile('[0-9][0-9]:[0-9][0-9]:[0-9][0-9]?[0-9]*') return dateRE.match(date) and timeRE.match(time) def createGraphData(resultDir, testName): # Create graph.data file for process.sh # Format two lines : Title and filename # $version $type : $volume% volume # $version-$brokerState-$type-$volume version=getBrokerVersion() test= extractTestValue("n",resultDir, testName) volume = int(float(extractTestResult("Test * Size Throughput", resultDir, testName)) * 1000) messageSize = extractTestValue("messageSize",resultDir, testName) ackMode = ackModeToString(extractTestValue("consAckMode",resultDir, testName)) graphDataFile=resultDir+os.sep+GRAPH_DATA graphData = open(graphDataFile, "w") # # Write Title graphData.write(version+":"+test+":"+str(messageSize)+"kb x "+str(volume)+" msg/sec using "+ackMode) graphData.write('\n') # # Write FileName graphData.writelines(version+"-"+testName) graphData.write('\n') graphData.close log("Created graph.data") def getBrokerVersion(): global _brokerLogs READY = "Qpid Broker Ready" brokerLogFile = _brokerLogs + os.sep + BROKER_LOG log = open(brokerLogFile, "r") dataLine = "" for line in log: if line.find(READY) != -1: dataLine = line break # Log Entry #2009-06-19 17:04:02,493 INFO [main] server.Main (Main.java:456) - Qpid Broker Ready :2.3.0.1 build: 727403M # Split on READY data = dataLine.split(READY) # So [1] should be # :2.3.0.1 build: 727403M readyEntries = data[1].split() # so spliting on white space should give us ':version' # and a quick split on ':' will give us the version version = readyEntries[0].split(':')[1] # Strip to ensure we have no whitespace return version.strip() def extractTestValue(property,resultDir,testName): return extractTestData(property,resultDir,testName," =") def extractTestResult(property,resultDir,testName): return extractTestData(property,resultDir,testName,":") def extractTestData(property,resultDir,testName,type): resultFile = resultDir + os.sep + testName+".csv" results = open(resultFile, "r") dataLine = "" for line in results: if line.find("Total Tests:") == 0: dataLine = line results.close() # Data is CSV data = dataLine.split(',') found = False result = "" searchProperty = property+type for entry in data: if found: result = entry break if entry.strip() == searchProperty: found=True return result.strip() def createTestStatData(resultDir, testName): csvFilePath=resultDir + os.sep + testName + ".csv" # Open the output file, erasing any existing version # Keep track of the min/max sum and entries,. minLatency=float(sys.maxint) maxLatency=0.0 minThroughput=float(sys.maxint) maxThroughput=0.0 entries=0 sumLatency=0.0 sumThroughput=0.0 # # Open csv File # csvFile = open(csvFilePath,"r") for line in csvFile: # The PingAsyncTestPerf test class outputs the latency and throughput data. if line.find("PingAsyncTestPerf") != -1: # # Data format is # #org.apache.qpid.ping.PingAsyncTestPerf, testAsyncPingOk, Dispatcher-Channel-1, Pass, 209.074, 219.706, 0, 1, 10 LatencyIndex = 5 ThroughputIndex = 4 # The PingLatencyTestPerf test class just outputs the latency data. if line.find("PingLatencyTestPerf") != -1: # # Data format is # # org.apache.qpid.ping.PingLatencyTestPerf, testPingLatency, Dispatcher-Channel-1, Pass, 397.05502, 0, 2, 1000 LatencyIndex = 4 ThroughputIndex = -1 # Only process the test lines that have 'org.apache.qpid.ping', i.e. skip header and footer. if line.find("org.apache.qpid.ping") != -1: # Perform stat processing for the min/max/avg data = line.split(",") # Record entires entries = entries + 1 # Record Metrics # Record Latency data latency = float(data[LatencyIndex]) if (latency < minLatency): minLatency = latency if (latency > maxLatency): maxLatency = latency sumLatency = sumLatency + latency if (ThroughputIndex != -1): # Record Latency data throughput = float(data[ThroughputIndex]) if (throughput < minThroughput): minThroughput = throughput if (throughput > maxThroughput): maxThroughput = throughput sumThroughput = sumThroughput + throughput csvFile.close() # Output stats file statFile=resultDir + os.sep + testName+".stats" output= open(statFile, "w") output.write("#type:min/max/avg") output.write('\n') output.write("LATENCY:"+str(minLatency)+"/"+str(maxLatency)+"/"+str(float(sumLatency)/float(entries))) output.write('\n') if (ThroughputIndex != -1): # Output msgs/sec based on time for a batch of msgs output.write("THROUGHPUT:"+str(float(1000)/maxThroughput)+"/"+str(float(1000)/minThroughput)+"/"+str(float(1000)/(float(sumThroughput)/float(entries)))) output.write('\n') output.close log("Generated stat data from test "+testName+" CSV file") def ackModeToString(ackMode): if ackMode == '0': return "Transacted" elif ackMode == '1': return "AutoAck" elif ackMode == '2': return "ClientAck" elif ackMode == '3': return "DupsOK" elif ackMode == '257': return "NoAck" elif ackMode == '258': return "PreAck" else: return str(ackMode) def debug(msg): global _debug if _debug: log(msg) def log(msg): print msg def mkdir(dir): if not os.path.exists(dir): os.mkdir(dir) if __name__ == "__main__": main()