Blob Blame History Raw
#! /usr/libexec/platform-python
# Note: this script is python2 and python3 compatible.
#
#  Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
#  Author: Erwan Velu  <erwan@enovance.com>
#
#  The license below covers all files distributed with fio unless otherwise
#  noted in the file itself.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License version 2 as
#  published by the Free Software Foundation.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

from __future__ import absolute_import
from __future__ import print_function
import os
import fnmatch
import sys
import getopt
import re
import math
import shutil
from six.moves import map
from six.moves import range

def find_file(path, pattern):
	fio_data_file=[]
	# For all the local files
	for file in os.listdir(path):
		# If the file matches the glob
		if fnmatch.fnmatch(file, pattern):
			# Let's consider this file
			fio_data_file.append(file)

	return fio_data_file

def generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir):
	if verbose: print("Generating rendering scripts")
	filename=gnuplot_output_dir+'mygraph'
	temporary_files.append(filename)
	f=open(filename,'w')

	# Plotting 3D or comparing graphs doesn't have a meaning unless if there is at least 2 traces
	if len(fio_data_file) > 1:
		f.write("call \'%s/graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode))

		# Setting up the compare files that will be plot later
		compare=open(gnuplot_output_dir + 'compare.gnuplot','w')
		compare.write('''
set title '%s'
set terminal png size 1280,1024
set ytics axis out auto
set key top left reverse
set xlabel "Time (Seconds)"
set ylabel '%s'
set yrange [0:]
set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green"
'''% (title,mode))
		compare.close()
		#Copying the common file for all kind of graph (raw/smooth/trend)
		compare_raw_filename="compare-%s-2Draw" % (gnuplot_output_filename)
		compare_smooth_filename="compare-%s-2Dsmooth" % (gnuplot_output_filename)
		compare_trend_filename="compare-%s-2Dtrend" % (gnuplot_output_filename)

		shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_raw_filename+".gnuplot")
		shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_smooth_filename+".gnuplot")
		shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_trend_filename+".gnuplot")
		temporary_files.append(gnuplot_output_dir+compare_raw_filename+".gnuplot")
		temporary_files.append(gnuplot_output_dir+compare_smooth_filename+".gnuplot")
		temporary_files.append(gnuplot_output_dir+compare_trend_filename+".gnuplot")

		#Setting up a different output filename for each kind of graph
		compare_raw=open(gnuplot_output_dir+compare_raw_filename + ".gnuplot",'a')
		compare_raw.write("set output '%s.png'\n" % compare_raw_filename)
		compare_smooth=open(gnuplot_output_dir+compare_smooth_filename+".gnuplot",'a')
		compare_smooth.write("set output '%s.png'\n" % compare_smooth_filename)
		compare_trend=open(gnuplot_output_dir+compare_trend_filename+".gnuplot",'a')
		compare_trend.write("set output '%s.png'\n" % compare_trend_filename)

		# Let's plot the average value for all the traces
		global_disk_perf = sum(disk_perf, [])
		global_avg  = average(global_disk_perf)
		compare_raw.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg));
		compare_smooth.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg));
		compare_trend.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg));

		pos=0
		# Let's create a temporary file for each selected fio file
		for file in fio_data_file:
			tmp_filename = "gnuplot_temp_file.%d" % pos

		# Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces
		if len(fio_data_file) > 1:
			# Adding the plot instruction for each kind of comparing graphs
			compare_raw.write(",\\\n'%s' using 2:3 with linespoints title '%s'" % (tmp_filename,fio_data_file[pos]))
			compare_smooth.write(",\\\n'%s' using 2:3 smooth csplines title '%s'" % (tmp_filename,fio_data_file[pos]))
			compare_trend.write(",\\\n'%s' using 2:3 smooth bezier title '%s'" % (tmp_filename,fio_data_file[pos]))

		png_file=file.replace('.log','')
		raw_filename = "%s-2Draw" % (png_file)
		smooth_filename = "%s-2Dsmooth" % (png_file)
		trend_filename = "%s-2Dtrend" % (png_file)
		avg  = average(disk_perf[pos])
		f.write("call \'%s/graph2D.gpm\' \'%s' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%f\'\n" % (gpm_dir,title,tmp_filename,fio_data_file[pos],raw_filename,mode,smooth_filename,trend_filename,avg))
		pos = pos +1

	# Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces
	if len(fio_data_file) > 1:
		os.remove(gnuplot_output_dir+"compare.gnuplot")
		compare_raw.close()
		compare_smooth.close()
		compare_trend.close()
	f.close()

def generate_gnuplot_math_script(title,gnuplot_output_filename,mode,average,gnuplot_output_dir,gpm_dir):
	filename=gnuplot_output_dir+'mymath';
	temporary_files.append(filename)
	f=open(filename,'a')
	f.write("call \'%s/math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode,average))
	f.close()

def compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir):
	if verbose: print("Processing data file 2/2")
	temp_files=[]
	pos=0

	# Let's create a temporary file for each selected fio file
	for file in fio_data_file:
		tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir, pos)
		temp_files.append(open(tmp_filename,'r'))
		pos = pos +1

	f = open(gnuplot_output_dir+gnuplot_output_filename, "w")
	temporary_files.append(gnuplot_output_dir+gnuplot_output_filename)
	index=0
	# Let's add some information
	for tempfile in temp_files:
		    f.write("# Disk%d was coming from %s\n" % (index,fio_data_file[index]))
		    f.write(tempfile.read())
		    f.write("\n")
		    tempfile.close()
		    index = index + 1
	f.close()

def average(s): return sum(s) * 1.0 / len(s)

def compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir, min_time, max_time):
	end_time=max_time
	if end_time == -1:
		end_time="infinite"
	if verbose: print("Processing data file 1/2 with %s<time<%s" % (min_time,end_time))
	files=[]
	temp_outfile=[]
	blk_size=0
	for file in fio_data_file:
		files.append(open(file))
		pos = len(files) - 1
		tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir,pos)
		temporary_files.append(tmp_filename)
		gnuplot_file=open(tmp_filename,'w')
		temp_outfile.append(gnuplot_file)
		gnuplot_file.write("#Temporary file based on file %s\n" % file)
		disk_perf.append([])

	shall_break = False
	while True:
		current_line=[]
		nb_empty_files=0
		nb_files=len(files)
		for myfile in files:
			s=myfile.readline().replace(',',' ').split()
			if not s:
				nb_empty_files+=1
				s="-1, 0, 0, 0".replace(',',' ').split()

			if (nb_empty_files == nb_files):
				shall_break=True
				break;

			current_line.append(s);

		if shall_break == True:
			break

		last_time = -1
		index=-1
		perfs=[]
		for line in enumerate(current_line):
			# Index will be used to remember what file was featuring what value
			index=index+1

			time, perf, x, block_size = line[1]
			if (blk_size == 0):
				try:
					blk_size=int(block_size)
				except:
					print("Error while reading the following line :")
					print(line)
					sys.exit(1);

			# We ignore the first 500msec as it doesn't seems to be part of the real benchmark
			# Time < 500 usually reports BW=0 breaking the min computing
			if (min_time == 0):
				min_time==0.5

			# Then we estimate if the data we got is part of the time range we want to plot
			if ((float(time)>(float(min_time)*1000)) and ((int(time) < (int(max_time)*1000)) or max_time==-1)):
					disk_perf[index].append(int(perf))
					perfs.append("%d %s %s"% (index, time, perf))

		# If we reach this point, it means that all the traces are coherent
		for p in enumerate(perfs):
			index, perf_time,perf = p[1].split()
			temp_outfile[int(index)].write("%s %.2f %s\n" % (index, float(float(perf_time)/1000), perf))


	for file in files:
		file.close()
	for file in temp_outfile:
                file.close()
	return blk_size

def compute_math(fio_data_file, title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir):
	if verbose: print("Computing Maths")
	global_min=[]
	global_max=[]
	average_file=open(gnuplot_output_dir+gnuplot_output_filename+'.average', 'w')
	min_file=open(gnuplot_output_dir+gnuplot_output_filename+'.min', 'w')
	max_file=open(gnuplot_output_dir+gnuplot_output_filename+'.max', 'w')
	stddev_file=open(gnuplot_output_dir+gnuplot_output_filename+'.stddev', 'w')
	global_file=open(gnuplot_output_dir+gnuplot_output_filename+'.global','w')
	temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.average')
	temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.min')
	temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.max')
	temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.stddev')
	temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.global')

	min_file.write('DiskName %s\n' % mode)
	max_file.write('DiskName %s\n'% mode)
	average_file.write('DiskName %s\n'% mode)
	stddev_file.write('DiskName %s\n'% mode )
	for disk in range(len(fio_data_file)):
#		print disk_perf[disk]
		min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
		max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
		average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
		stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk]))
		avg  = average(disk_perf[disk])
		variance = [(x - avg)**2 for x in disk_perf[disk]]
		standard_deviation = math.sqrt(average(variance))
#		print "Disk%d [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (disk,min(disk_perf[disk]),max(disk_perf[disk]),avg, standard_deviation)
		average_file.write('%d %d\n' % (disk, avg))
		stddev_file.write('%d %d\n' % (disk, standard_deviation))
		local_min=min(disk_perf[disk])
		local_max=max(disk_perf[disk])
		min_file.write('%d %d\n' % (disk, local_min))
		max_file.write('%d %d\n' % (disk, local_max))
		global_min.append(int(local_min))
		global_max.append(int(local_max))

	global_disk_perf = sum(disk_perf, [])
	avg  = average(global_disk_perf)
	variance = [(x - avg)**2 for x in global_disk_perf]
	standard_deviation = math.sqrt(average(variance))

	global_file.write('min=%.2f\n' % min(global_disk_perf))
	global_file.write('max=%.2f\n' % max(global_disk_perf))
	global_file.write('avg=%.2f\n' % avg)
	global_file.write('stddev=%.2f\n' % standard_deviation)
	global_file.write('values_count=%d\n' % len(global_disk_perf))
	global_file.write('disks_count=%d\n' % len(fio_data_file))
	#print "Global [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (min(global_disk_perf),max(global_disk_perf),avg, standard_deviation)

	average_file.close()
	min_file.close()
	max_file.close()
	stddev_file.close()
	global_file.close()
	try:
		os.remove(gnuplot_output_dir+'mymath')
	except:
		True

	generate_gnuplot_math_script("Average values of "+title,gnuplot_output_filename+'.average',mode,int(avg),gnuplot_output_dir,gpm_dir)
	generate_gnuplot_math_script("Min values of "+title,gnuplot_output_filename+'.min',mode,average(global_min),gnuplot_output_dir,gpm_dir)
	generate_gnuplot_math_script("Max values of "+title,gnuplot_output_filename+'.max',mode,average(global_max),gnuplot_output_dir,gpm_dir)
	generate_gnuplot_math_script("Standard Deviation of "+title,gnuplot_output_filename+'.stddev',mode,int(standard_deviation),gnuplot_output_dir,gpm_dir)

def parse_global_files(fio_data_file, global_search):
	max_result=0
	max_file=''
	for file in fio_data_file:
		f=open(file)
		disk_count=0
		search_value=-1

		# Let's read the complete file
		while True:
			try:
				# We do split the name from the value
				name,value=f.readline().split("=")
			except:
				f.close()
				break
			# If we ended the file
			if not name:
				# Let's process what we have
				f.close()
				break
			else:
				# disks_count is not global_search item
				# As we need it for some computation, let's save it
				if name=="disks_count":
					disks_count=int(value)

				# Let's catch the searched item
				if global_search in name:
					search_value=float(value)

		# Let's process the avg value by estimated the global bandwidth per file
		# We keep the biggest in memory for reporting
		if global_search == "avg":
			if (disks_count > 0) and (search_value != -1):
				result=disks_count*search_value
				if (result > max_result):
					max_result=result
					max_file=file
	# Let's print the avg output
	if global_search == "avg":
		print("Biggest aggregated value of %s was %2.f in file %s\n" % (global_search, max_result, max_file))
	else:
		print("Global search %s is not yet implemented\n" % global_search)

def render_gnuplot(fio_data_file, gnuplot_output_dir):
	print("Running gnuplot Rendering")
	try:
		# Let's render all the compared files if some
		if len(fio_data_file) > 1:
			if verbose: print(" |-> Rendering comparing traces")
			os.system("cd %s; for i in *.gnuplot; do gnuplot $i; done" % gnuplot_output_dir)
		if verbose: print(" |-> Rendering math traces")
		os.system("cd %s; gnuplot mymath" % gnuplot_output_dir)
		if verbose: print(" |-> Rendering 2D & 3D traces")
		os.system("cd %s; gnuplot mygraph" % gnuplot_output_dir)

		name_of_directory="the current"
		if gnuplot_output_dir != "./":
			name_of_directory=gnuplot_output_dir
		print("\nRendering traces are available in %s directory" % name_of_directory)
		global keep_temp_files
		keep_temp_files=False
	except:
		print("Could not run gnuplot on mymath or mygraph !\n")
		sys.exit(1);

def print_help():
    print('fio2gnuplot -ghbiodvk -t <title> -o <outputfile> -p <pattern> -G <type> -m <time> -M <time>')
    print()
    print('-h --help                           : Print this help')
    print('-p <pattern> or --pattern <pattern> : A glob pattern to select fio input files')
    print('-b           or --bandwidth         : A predefined pattern for selecting *_bw.log files')
    print('-i           or --iops              : A predefined pattern for selecting *_iops.log files')
    print('-g           or --gnuplot           : Render gnuplot traces before exiting')
    print('-o           or --outputfile <file> : The basename for gnuplot traces')
    print('                                       - Basename is set with the pattern if defined')
    print('-d           or --outputdir <dir>   : The directory where gnuplot shall render files')
    print('-t           or --title <title>     : The title of the gnuplot traces')
    print('                                       - Title is set with the block size detected in fio traces')
    print('-G           or --Global <type>     : Search for <type> in .global files match by a pattern')
    print('                                       - Available types are : min, max, avg, stddev')
    print('                                       - The .global extension is added automatically to the pattern')
    print('-m           or --min_time <time>   : Only consider data starting from <time> seconds (default is 0)')
    print('-M           or --max_time <time>   : Only consider data ending before <time> seconds (default is -1 aka nolimit)')
    print('-v           or --verbose           : Increasing verbosity')
    print('-k           or --keep              : Keep all temporary files from gnuplot\'s output dir')

def main(argv):
    mode='unknown'
    pattern=''
    pattern_set_by_user=False
    title='No title'
    gnuplot_output_filename='result'
    gnuplot_output_dir='./'
    gpm_dir="/usr/share/fio/"
    disk_perf=[]
    run_gnuplot=False
    parse_global=False
    global_search=''
    min_time=0
    max_time=-1
    global verbose
    verbose=False
    global temporary_files
    temporary_files=[]
    global keep_temp_files
    keep_temp_files=True
    force_keep_temp_files=False

    if not os.path.isfile(gpm_dir+'math.gpm'):
        gpm_dir="/usr/local/share/fio/"
        if not os.path.isfile(gpm_dir+'math.gpm'):
            print("Looks like fio didn't get installed properly as no gpm files found in '/usr/share/fio' or '/usr/local/share/fio'\n")
            sys.exit(3)

    try:
        opts, args = getopt.getopt(argv[1:],"ghkbivo:d:t:p:G:m:M:",['bandwidth', 'iops', 'pattern', 'outputfile', 'outputdir', 'title', 'min_time', 'max_time', 'gnuplot', 'Global', 'help', 'verbose','keep'])
    except getopt.GetoptError:
        print("Error: One of the options passed to the cmdline was not supported")
        print("Please fix your command line or read the help (-h option)")
        sys.exit(2)

    for opt, arg in opts:
        if opt in ("-b", "--bandwidth"):
            pattern='*_bw.log'
        elif opt in ("-i", "--iops"):
            pattern='*_iops.log'
        elif opt in ("-v", "--verbose"):
            verbose=True
        elif opt in ("-k", "--keep"):
            #User really wants to keep the temporary files
            force_keep_temp_files=True
        elif opt in ("-p", "--pattern"):
            pattern_set_by_user=True
            pattern=arg
            pattern=pattern.replace('\\','')
        elif opt in ("-o", "--outputfile"):
            gnuplot_output_filename=arg
        elif opt in ("-d", "--outputdir"):
            gnuplot_output_dir=arg
            if not gnuplot_output_dir.endswith('/'):
                gnuplot_output_dir=gnuplot_output_dir+'/'
            if not os.path.exists(gnuplot_output_dir):
                os.makedirs(gnuplot_output_dir)
        elif opt in ("-t", "--title"):
            title=arg
        elif opt in ("-m", "--min_time"):
            min_time=arg
        elif opt in ("-M", "--max_time"):
            max_time=arg
        elif opt in ("-g", "--gnuplot"):
            run_gnuplot=True
        elif opt in ("-G", "--Global"):
            parse_global=True
            global_search=arg
        elif opt in ("-h", "--help"):
            print_help()
            sys.exit(1)

    # Adding .global extension to the file
    if parse_global==True:
        if not gnuplot_output_filename.endswith('.global'):
            pattern = pattern+'.global'

    fio_data_file=find_file('.',pattern)
    if len(fio_data_file) == 0:
        print("No log file found with pattern %s!" % pattern)
        # Try numjob log file format if per_numjob_logs=1
        if (pattern == '*_bw.log'):
            fio_data_file=find_file('.','*_bw.*.log')
        if (pattern == '*_iops.log'):
            fio_data_file=find_file('.','*_iops.*.log')
        if len(fio_data_file) == 0:
            sys.exit(1)
        else:
            print("Using log file per job format instead")
    else:
        print("%d files Selected with pattern '%s'" % (len(fio_data_file), pattern))

    fio_data_file=sorted(fio_data_file, key=str.lower)
    for file in fio_data_file:
        print(' |-> %s' % file)
        if "_bw.log" in file :
            mode="Bandwidth (KB/sec)"
        if "_iops.log" in file :
            mode="IO per Seconds (IO/sec)"
    if (title == 'No title') and (mode != 'unknown'):
        if "Bandwidth" in mode:
            title='Bandwidth benchmark with %d fio results' % len(fio_data_file)
        if "IO" in mode:
            title='IO benchmark with %d fio results' % len(fio_data_file)

    print()
    #We need to adjust the output filename regarding the pattern required by the user
    if (pattern_set_by_user == True):
        gnuplot_output_filename=pattern
        # As we do have some glob in the pattern, let's make this simpliest
        # We do remove the simpliest parts of the expression to get a clear file name
        gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-')
        gnuplot_output_filename=gnuplot_output_filename.replace('*','-')
        gnuplot_output_filename=gnuplot_output_filename.replace('--','-')
        gnuplot_output_filename=gnuplot_output_filename.replace('.log','')
        # Insure that we don't have any starting or trailing dash to the filename
        gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename
        gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename
        if (gnuplot_output_filename == ''):
            gnuplot_output_filename='default'

    if parse_global==True:
        parse_global_files(fio_data_file, global_search)
    else:
        blk_size=compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir,min_time,max_time)
        title="%s @ Blocksize = %dK" % (title,blk_size/1024)
        compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir)
        compute_math(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)
        generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir)

        if (run_gnuplot==True):
            render_gnuplot(fio_data_file, gnuplot_output_dir)

        # Shall we clean the temporary files ?
        if keep_temp_files==False and force_keep_temp_files==False:
            # Cleaning temporary files
            if verbose: print("Cleaning temporary files")
            for f in enumerate(temporary_files):
                if verbose: print(" -> %s"%f[1])
                try:
                    os.remove(f[1])
                except:
                    True

#Main
if __name__ == "__main__":
    sys.exit(main(sys.argv))