IIS Logs Scripts
Posted by Dan | Posted in IIS, Python, SQL Server | Posted on 12-12-2009
0
While working with some IIS logs, I decided to start practicing my Python. I put together some handy Python functions to work with IIS Log files. These will come in handy. On a 3GB, 2.5GHz, running WinXP machine, these functions take about 3 seconds to process a 180MB Text file. Python code could be optimized to be faster if you’re dealing with larger sized files.
#!/usr/bin/env python # An IIS log file can have various log properties. Everytime you add new columns to log for # in IIS, it creates a new row full of columns. import re import os MainLogDelimiter = "#Software: Microsoft Internet Information Services 6.0" TestFile = "C:\\Dan\\IIS-Log-Import\\Logs\\not-the-same.txt" BigTestFile = "C:\\Dan\\IIS-Log-Import\\Logs\\ex090914\\ex090914.log" LogsDir = "C:\\Dan\\IIS-Log-Import\\Logs" def SearchForFile( rootpath, searchfor, includepath = 0 ): # Search for a file recursively from a root directory. # rootpath = root directory to start searching from. # searchfor = regexp to search for, e.g.: # search for *.jpg : \.exe$ # includepath = appends the full path to the file # this attribute is optional # Returns a list of filenames that can be used to loop # through. # # TODO: Use the glob module instead. Could be faster. names = [] append = "" for root, dirs, files in os.walk( rootpath ): for name in files: if re.search( searchfor, name ): if includepath == 0: root = "" else: append = "\\" names.append( root + append + name ) return names def isSameLogProperties( FILE ): # Tests to see if a log file has the same number of columns throughout # This is in case new column properties were added/subtracted in the course # of the log file. FILE.seek( 0, 0 ) SubLogs = FILE.read().split( MainLogDelimiter ) # SubLogs[0] Stores the number of different log variations in the log file SubLogs[0] = len( SubLogs ) - 1 # Grab the column names from the log file, separated by space columns = re.search( "^#Fields:\s([\w\-()\s]+)$", SubLogs[1], re.IGNORECASE | re.MULTILINE ).group(1) LogSameProperties = True for i in range( 2, SubLogs[0] + 1 ): # If there are columns if ( len( columns ) > 0 ): if ( columns != re.search( "^#Fields:\s([\w\-()\s]+)$", SubLogs[i], re.IGNORECASE | re.MULTILINE ).group(1) ): LogSameProperties = False break return LogSameProperties def getFirstColumn( FILE ): # This gets the columns from a log file. It returns only the first columns, and ignores another column # row that may exist in case new columns were added/subtracted in IIS. # input: FILE # output: 1 single element List FILE.seek( 0, 0 ) names = [] # Grab the column names from the log file, separated by space names.append( re.search( "^#Fields:\s([\w\-()\s]+)$", FILE.read().split( MainLogDelimiter )[1], re.IGNORECASE | re.MULTILINE ).group(1).strip() ) return names def getAllColumns( FILE ): # This gets all the columns from a log file. # input: FILE # output: List FILE.seek( 0, 0 ) names = [] SubLogs = FILE.read().split( MainLogDelimiter ) # SubLogs[0] Stores the number of different log variations in the log file SubLogs[0] = len( SubLogs ) - 1 for i in range( 1, SubLogs[0] + 1 ): names.append( re.search( "^#Fields:\s([\w\-()\s]+)$", SubLogs[i], re.IGNORECASE | re.MULTILINE ).group(1).strip() ) return names # EXAMPLE: # Loop through all the IIS log files in the directory # for file in SearchForFile( LogsDir, "\.txt$", 1 ): LogFile = open( file, "r" ) if ( isSameLogProperties( LogFile ) ): print file, "the same" else: print file, "not the same" LogFile.close()
























