Tree - source-git/gfs2-utils - CentOS Git server

source-git / gfs2-utils

Files

Commit: 360c39361a3769f2e6281399a650d64a67ccc0bd
Blob Blame History Raw
#!/usr/bin/python
"""
The script "gfs2_lockcapture" will capture locking information from GFS2 file
systems and DLM.

@author    : Shane Bradley
@contact   : sbradley@redhat.com
@version   : 0.95
@copyright : GPLv2
"""
import sys
import os
import os.path
import logging
import logging.handlers
from optparse import OptionParser, Option, SUPPRESS_HELP
import time
import platform
import shutil
import subprocess
import tarfile

# #####################################################################
# Global vars:
# #####################################################################
"""
@cvar VERSION_NUMBER: The version number of this script.
@type VERSION_NUMBER: String
@cvar MAIN_LOGGER_NAME: The name of the logger.
@type MAIN_LOGGER_NAME: String
@cvar PATH_TO_DEBUG_DIR: The path to the debug directory for the linux kernel.
@type PATH_TO_DEBUG_DIR: String
@cvar PATH_TO_PID_FILENAME: The path to the pid file that will be used to make
sure only 1 instance of this script is running at any time.
@type PATH_TO_PID_FILENAME: String
"""
VERSION_NUMBER = "0.9-8"
MAIN_LOGGER_NAME = "%s" %(os.path.basename(sys.argv[0]))
PATH_TO_DEBUG_DIR="/sys/kernel/debug"
PATH_TO_PID_FILENAME = "/var/run/%s.pid" %(os.path.basename(sys.argv[0]))

# #####################################################################
# Class to define what a clusternode is.
# #####################################################################
class ClusterNode:
    """
    This class represents a cluster node that is a current member in a cluster.
    """
    def __init__(self, clusternodeName, clusternodeID, clusterName, mapOfMountedFilesystemLabels):
        """
        @param clusternodeName: The name of the cluster node.
        @type clusternodeName: String
        @param clusterName: The name of the cluster that this cluster node is a
        member of.
        @param clusternodeID: The id of the cluster node.
        @type clusternodeID: Int
        @param clusterName: The name of the cluster that this cluster node is a
        @type clusterName: String
        @param mapOfMountedFilesystemLabels: A map of filesystem labels(key) for
        a mounted filesystem. The value is the line for the matching mounted
        filesystem from the mount -l command.
        @type mapOfMountedFilesystemLabels: Dict
        """
        self.__clusternodeName = clusternodeName
        self.__clusternodeID  = clusternodeID
        self.__clusterName = clusterName
        self.__mapOfMountedFilesystemLabels = mapOfMountedFilesystemLabels

    def __str__(self):
        """
        This function will return a string representation of the object.

        @return: Returns a string representation of the object.
        @rtype: String
        """
        rString = ""
        rString += "%s:%s(id:%d)" %(self.getClusterName(), self.getClusterNodeName(), self.getClusterNodeID())
        fsLabels = list(self.__mapOfMountedFilesystemLabels.keys())
        fsLabels.sort()
        for fsLabel in fsLabels:
            rString += "\n\t%s --> %s" %(fsLabel, self.__mapOfMountedFilesystemLabels.get(fsLabel))
        return rString.rstrip()

    def getClusterNodeName(self):
        """
        Returns the name of the cluster node.

        @return: Returns the name of the cluster node.
        @rtype: String
        """
        return self.__clusternodeName

    def getClusterNodeID(self):
        """
        Returns the id of the cluster node.
        @return: Returns the id of the cluster node.
        @rtype: String
        """
        return self.__clusternodeID

    def getClusterName(self):
        """
        Returns the name of cluster that this cluster node is a member of.

        @return: Returns the name of cluster that this cluster node is a member
        of.
        @rtype: String
        """
        return self.__clusterName

    def getMountedGFS2FilesystemNames(self, includeClusterName=True):
        """
        Returns the names of all the mounted GFS2 filesystems. By default
        includeClusterName is True which will include the name of the cluster
        and the GFS2 filesystem name(ex. f18cluster:mygfs2vol1) in the list of
        mounted GFS2 filesystems. If includeClusterName is False it will only
        return a list of all the mounted GFS2 filesystem names(ex. mygfs2vol1).

        @return: Returns a list of all the mounted GFS2 filesystem names.
        @rtype: Array

        @param includeClusterName: By default this option is True and will
        include the name of the cluster and the GFS2 filesystem name. If False
        then only the GFS2 filesystem name will be included.
        @param includeClusterName: Boolean
        """
        # If true will prepend the cluster name to gfs2 fs name
        if (includeClusterName):
            return list(self.__mapOfMountedFilesystemLabels.keys())
        else:
            listOfGFS2MountedFilesystemLabels = []
            for fsLabel in list(self.__mapOfMountedFilesystemLabels.keys()):
                fsLabelSplit = fsLabel.split(":", 1)
                if (len(fsLabelSplit) == 2):
                    listOfGFS2MountedFilesystemLabels.append(fsLabelSplit[1])
            return listOfGFS2MountedFilesystemLabels

    def getMountedGFS2FilesystemPaths(self):
        """
        Returns a map of all the mounted GFS2 filesystem paths. The key is the
        GFS2 fs name(clustername:fs name) and value is the mountpoint.

        @return: Returns a map of all the mounted GFS2 filesystem paths. The key
        is the GFS2 fs name(clustername:fs name) and value is the mountpoint.
        Returns a list of all the mounted GFS2 filesystem paths.
        @rtype: Map
        """
        mapOfGFS2MountedFilesystemPaths = {}
        for fsLabel in list(self.__mapOfMountedFilesystemLabels.keys()):
            value = self.__mapOfMountedFilesystemLabels.get(fsLabel)
            mountPoint = value.split("type", 1)[0].split("on")[1]
            if (len(mountPoint) > 0):
                mapOfGFS2MountedFilesystemPaths[fsLabel] = mountPoint
        return mapOfGFS2MountedFilesystemPaths

# #####################################################################
# Helper functions.
# #####################################################################
def runCommand(command, listOfCommandOptions, standardOut=subprocess.PIPE, standardError=subprocess.PIPE):
    """
    This function will execute a command. It will return True if the return code
    was zero, otherwise False is returned.

    @return: Returns True if the return code was zero, otherwise False is
    returned.
    @rtype: Boolean

    @param command: The command that will be executed.
    @type command: String
    @param listOfCommandOptions: The list of options for the command that will
    be executed.
    @type listOfCommandOptions: Array
    @param standardOut: The pipe that will be used to write standard output. By
    default the pipe that is used is subprocess.PIPE.
    @type standardOut: Pipe
    @param standardError: The pipe that will be used to write standard error. By
    default the pipe that is used is subprocess.PIPE.
    @type standardError: Pipe
    """
    stdout = ""
    stderr = ""
    try:
        commandList = [command]
        commandList += listOfCommandOptions
        task = subprocess.Popen(commandList, stdout=standardOut, stderr=standardError)
        task.wait()
        (stdout, stderr) = task.communicate()
        return (task.returncode == 0)
    except OSError:
        commandOptionString = ""
        for option in listOfCommandOptions:
            commandOptionString += "%s " %(option)
        message = "An error occurred running the command: $ %s %s" %(command, commandOptionString)
        if (len(stdout.rstrip()) > 0):
            message += "\n%s" %(stdout.rstrip())
        if (len(stderr.rstrip()) > 0):
            message += "\n%s" %(stderr.rstrip())
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
    return False

def runCommandOutput(command, listOfCommandOptions, standardOut=subprocess.PIPE, standardError=subprocess.PIPE):
    """
    This function will execute a command. Returns the output that was written to standard output. None is
    returned if there was an error.

    @return: Returns the output that was written to standard output. None is
    returned if there was an error.
    @rtype: String

    @param command: The command that will be executed.
    @type command: String
    @param listOfCommandOptions: The list of options for the command that will
    be executed.
    @type listOfCommandOptions: Array
    @param standardOut: The pipe that will be used to write standard output. By
    default the pipe that is used is subprocess.PIPE.
    @type standardOut: Pipe
    @param standardError: The pipe that will be used to write standard error. By
    default the pipe that is used is subprocess.PIPE.
    @type standardError: Pipe
    """
    stdout = ""
    stderr = ""
    try:
        commandList = [command]
        commandList += listOfCommandOptions
        task = subprocess.Popen(commandList, stdout=standardOut, stderr=standardError)
        task.wait()
        (stdout, stderr) = task.communicate()
    except OSError:
        commandOptionString = ""
        for option in listOfCommandOptions:
            commandOptionString += "%s " %(option)
        message = "An error occurred running the command: $ %s %s" %(command, commandOptionString)
        if (len(stdout.rstrip()) > 0):
            message += "\n%s" %(stdout.rstrip())
        if (len(stderr.rstrip()) > 0):
            message += "\n%s" %(stderr.rstrip())
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
        return None
    return stdout.decode().strip().rstrip()

def writeToFile(pathToFilename, data, appendToFile=True, createFile=False):
    """
    This function will write a string to a file.

    @return: Returns True if the string was successfully written to the file,
    otherwise False is returned.
    @rtype: Boolean

    @param pathToFilename: The path to the file that will have a string written
    to it.
    @type pathToFilename: String
    @param data: The string that will be written to the file.
    @type data: String
    @param appendToFile: If True then the data will be appened to the file, if
    False then the data will overwrite the contents of the file.
    @type appendToFile: Boolean
    @param createFile: If True then the file will be created if it does not
    exists, if False then file will not be created if it does not exist
    resulting in no data being written to the file.
    @type createFile: Boolean
    """
    [parentDir, filename] = os.path.split(pathToFilename)
    if (os.path.isfile(pathToFilename) or (os.path.isdir(parentDir) and createFile)):
        try:
            filemode = "w"
            if (appendToFile):
                filemode = "a"
            fout = open(pathToFilename, filemode)
            fout.write(data + "\n")
            fout.close()
            return True
        except UnicodeEncodeError as e:
            message = "There was a unicode encode error writing to the file: %s." %(pathToFilename)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
        except IOError:
            message = "There was an error writing to the file: %s." %(pathToFilename)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
    return False

def mkdirs(pathToDSTDir):
    """
    This function will attempt to create a directory with the path of the value of pathToDSTDir.

    @return: Returns True if the directory was created or already exists.
    @rtype: Boolean

    @param pathToDSTDir: The path to the directory that will be created.
    @type pathToDSTDir: String
    """
    if (os.path.isdir(pathToDSTDir)):
        return True
    elif ((not os.access(pathToDSTDir, os.F_OK)) and (len(pathToDSTDir) > 0)):
        try:
            os.makedirs(pathToDSTDir)
        except (OSError, os.error):
            message = "Could not create the directory: %s." %(pathToDSTDir)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
        except (IOError, os.error):
            message = "Could not create the directory with the path: %s." %(pathToDSTDir)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
    return os.path.isdir(pathToDSTDir)

def removePIDFile():
    """
    This function will remove the pid file.

    @return: Returns True if the file was successfully remove or does not exist,
    otherwise False is returned.
    @rtype: Boolean
    """
    message = "Removing the pid file: %s" %(PATH_TO_PID_FILENAME)
    logging.getLogger(MAIN_LOGGER_NAME).debug(message)
    if (os.path.exists(PATH_TO_PID_FILENAME)):
        try:
            os.remove(PATH_TO_PID_FILENAME)
        except IOError:
            message = "There was an error removing the file: %s." %(PATH_TO_PID_FILENAME)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
    return os.path.exists(PATH_TO_PID_FILENAME)

def archiveData(pathToSrcDir):
    """
    This function will return the path to the tar.bz2 file that was created. If
    the tar.bz2 file failed to be created then an empty string will be returned
    which would indicate an error occurred.

    @return: This function will return the path to the tar.bz2 file that was
    created. If the tar.bz2 file failed to be created then an empty string will
    be returned which would indicate an error occurred.
    @rtype: String

    @param pathToSrcDir: The path to the directory that will be archived into a
    .tar.bz2 file.
    @type pathToSrcDir: String
    """
    if (os.path.exists(pathToSrcDir)):
        pathToTarFilename = "%s-%s.tar.bz2" %(pathToSrcDir, platform.node())
        if (os.path.exists(pathToTarFilename)):
            message = "A compressed archvied file already exists and will be removed: %s" %(pathToTarFilename)
            logging.getLogger(MAIN_LOGGER_NAME).status(message)
            try:
                os.remove(pathToTarFilename)
            except IOError:
                message = "There was an error removing the file: %s." %(pathToTarFilename)
                logging.getLogger(MAIN_LOGGER_NAME).error(message)
                return ""
        message = "Creating a compressed archvied file: %s" %(pathToTarFilename)
        logging.getLogger(MAIN_LOGGER_NAME).status(message)
        try:
            tar = tarfile.open(pathToTarFilename, "w:bz2")
            tar.add(pathToSrcDir, arcname=os.path.basename(pathToSrcDir))
            tar.close()
        except tarfile.TarError:
            message = "There was an error creating the tarfile: %s." %(pathToTarFilename)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return ""
        if (os.path.exists(pathToTarFilename)):
            return pathToTarFilename
    return ""

def getDataFromFile(pathToSrcFile) :
    """
    This function will return the data in an array. Where each newline in file
    is a seperate item in the array. This should really just be used on
    relatively small files.

    None is returned if no file is found.

    @return: Returns an array of Strings, where each newline in file is an item
    in the array.
    @rtype: Array

    @param pathToSrcFile: The path to the file which will be read.
    @type pathToSrcFile: String
    """
    if (len(pathToSrcFile) > 0) :
        try:
            fin = open(pathToSrcFile, "r")
            data = fin.readlines()
            fin.close()
            return data
        except (IOError, os.error):
            message = "An error occured reading the file: %s." %(pathToSrcFile)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
    return None

def copyFile(pathToSrcFile, pathToDstFile):
    """
    This function will copy a src file to dst file.

    @return: Returns True if the file was copied successfully.
    @rtype: Boolean

    @param pathToSrcFile: The path to the source file that will be copied.
    @type pathToSrcFile: String
    @param pathToDstFile: The path to the destination of the file.
    @type pathToDstFile: String
    """
    if(not os.path.exists(pathToSrcFile)):
        message = "The file does not exist with the path: %s." %(pathToSrcFile)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
        return False
    elif (not os.path.isfile(pathToSrcFile)):
        message = "The path to the source file is not a regular file: %s." %(pathToSrcFile)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
        return False
    elif (pathToSrcFile == pathToDstFile):
        message = "The path to the source file and path to destination file cannot be the same: %s." %(pathToDstFile)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
        return False
    else:
        # Create the directory structure if it does not exist.
        (head, tail) = os.path.split(pathToDstFile)
        if (not mkdirs(head)) :
            # The path to the directory was not created so file
            # could not be copied.
            return False
        # Copy the file to the dst path.
        try:
            shutil.copy(pathToSrcFile, pathToDstFile)
        except shutil.Error:
            message = "Cannot copy the file %s to %s." %(pathToSrcFile, pathToDstFile)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
        except OSError:
            message = "Cannot copy the file %s to %s." %(pathToSrcFile, pathToDstFile)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
        except IOError:
            message = "Cannot copy the file %s to %s." %(pathToSrcFile, pathToDstFile)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
        return (os.path.exists(pathToDstFile))

def copyDirectory(pathToSrcDir, pathToDstDir):
    """
    This function will copy a src dir to dst dir.

    @return: Returns True if the dir was copied successfully.
    @rtype: Boolean

    @param pathToSrcDir: The path to the source dir that will be copied.
    @type pathToSrcDir: String
    @param pathToDstDir: The path to the destination of the dir.
    @type pathToDstDir: String
    """
    if(not os.path.exists(pathToSrcDir)):
        message = "The directory does not exist with the path: %s." %(pathToSrcDir)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
        return False
    elif (not os.path.isdir(pathToSrcDir)):
        message = "The path to the source directory is not a directory: %s." %(pathToSrcDir)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
        return False
    elif (pathToSrcDir == pathToDstDir):
        message = "The path to the source directory and path to destination directory cannot be the same: %s." %(pathToDstDir)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
        return False
    else:
        if (not mkdirs(pathToDstDir)) :
            # The path to the directory was not created so file
            # could not be copied.
            return False
        # Copy the file to the dst path.
        dst = os.path.join(pathToDstDir, os.path.basename(pathToSrcDir))
        try:
            shutil.copytree(pathToSrcDir, dst)
        except shutil.Error:
            message = "Cannot copy the directory %s to %s." %(pathToSrcDir, dst)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
        except OSError:
            message = "Cannot copy the directory %s to %s." %(pathToSrcDir, dst)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
        except IOError:
            message = "Cannot copy the directory %s to %s." %(pathToSrcDir, dst)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            return False
        return (os.path.exists(dst))

def backupOutputDirectory(pathToOutputDir):
    """
    This function will return True if the pathToOutputDir does not exist or the
    directory was successfully rename. If pathToOutputDir exists and was not
    successfully rename then False is returned.

    @return: Returns True if the pathToOutputDir does not exist or the directory
    was successfully rename. If pathToOutputDir exists and was not successfully
    rename then False is returned.
    @rtype: Boolean

    @param pathToOutputDir: The path to the directory that will be backed up.
    @type pathToOutputDir: String
    """
    if (os.path.exists(pathToOutputDir)):
        message = "The path already exists and could contain previous lockdump data: %s" %(pathToOutputDir)
        logging.getLogger(MAIN_LOGGER_NAME).info(message)
        backupIndex = 1
        pathToDST = ""
        keepSearchingForIndex = True
        while (keepSearchingForIndex):
            pathToDST = "%s.bk-%d" %(pathToOutputDir, backupIndex)
            if (os.path.exists(pathToDST)):
                backupIndex += 1
            else:
                keepSearchingForIndex = False
        try:
            message = "The existing output directory will be renamed: %s to %s." %(pathToOutputDir, pathToDST)
            logging.getLogger(MAIN_LOGGER_NAME).status(message)
            shutil.move(pathToOutputDir, pathToDST)
        except shutil.Error:
            message = "There was an error renaming the directory: %s to %s." %(pathToOutputDir, pathToDST)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
        except OSError:
            message = "There was an error renaming the directory: %s to %s." %(pathToOutputDir, pathToDST)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
    # The path should not exists now, else there was an error backing up an
    # existing output directory.
    return (not os.path.exists(pathToOutputDir))

def mountFilesystem(filesystemType, pathToDevice, pathToMountPoint):
    """
    This function will attempt to mount a filesystem. If the filesystem is
    already mounted or the filesystem was successfully mounted then True is
    returned, otherwise False is returned.

    @return: If the filesystem is already mounted or the filesystem was
    successfully mounted then True is returned, otherwise False is returned.
    @rtype: Boolean

    @param filesystemType: The type of filesystem that will be mounted.
    @type filesystemType: String
    @param pathToDevice: The path to the device that will be mounted.
    @type pathToDevice: String
    @param pathToMountPoint: The path to the directory that will be used as the
    mount point for the device.
    @type pathToMountPoint: String
    """
    if (os.path.ismount(PATH_TO_DEBUG_DIR)):
        return True
    listOfCommandOptions = ["-t", filesystemType, pathToDevice, pathToMountPoint]
    if (not runCommand("mount", listOfCommandOptions)):
        message = "There was an error mounting the filesystem type %s for the device %s to the mount point %s." %(filesystemType, pathToDevice, pathToMountPoint)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
    return  os.path.ismount(PATH_TO_DEBUG_DIR)

def exitScript(removePidFile=True, errorCode=0):
    """
    This function will cause the script to exit or quit. It will return an error
    code and will remove the pid file that was created.

    @param removePidFile: If True(default) then the pid file will be remove
    before the script exits.
    @type removePidFile: Boolean
    @param errorCode: The exit code that will be returned. The default value is 0.
    @type errorCode: Int
    """
    if (removePidFile):
        removePIDFile()
    message = "The script will exit."
    logging.getLogger(MAIN_LOGGER_NAME).info(message)
    sys.exit(errorCode)

# #####################################################################
# Helper functions for gathering the lockdumps.
# #####################################################################
def getClusterNode(listOfGFS2Names):
    """
    This function return a ClusterNode object if the machine is a member of a
    cluster and has GFS2 filesystems mounted for that cluster. The
    listOfGFS2Names is a list of GFS2 filesystem that need to have their data
    capture. If the list is empty then that means that all the mounted GFS2
    filesystems will be captured, if list is not empty then only those GFS2
    filesystems in the list will have their data captured.

    @return: Returns a cluster node object if there was mounted GFS2 filesystems
    found that will have their data captured.
    @rtype: ClusterNode

    @param listOfGFS2Names: A list of GFS2 filesystem names that will have their
    data captured.  If the list is empty then that means that all the mounted
    GFS2 filesystems will be captured, if list is not empty then only those GFS2
    filesystems in the list will have their data captured.
    @type listOfGFS2Names: Array
    """
    # Return a ClusterNode object if the clusternode and cluster name are found
    # in the output, else return None.
    clusterName = ""
    clusternodeName = ""
    clusternodeID = ""
    if (runCommand("which", ["cman_tool"])):
        stdout = runCommandOutput("cman_tool", ["status"])
        if (not stdout == None):
            stdoutSplit = stdout.split("\n")
            clusterName = ""
            clusternodeName = ""
            for line in stdoutSplit:
                if (line.startswith("Cluster Name:")):
                    clusterName = line.split("Cluster Name:")[1].strip().rstrip()
                if (line.startswith("Node name: ")):
                    clusternodeName = line.split("Node name:")[1].strip().rstrip()
                if (line.startswith("Node ID: ")):
                    clusternodeID = line.split("Node ID: ")[1].strip().rstrip()
    elif (runCommand("which", ["corosync-cmapctl"])):
        # Another way to get the local cluster node is: $ crm_node -i; crm_node -l
        # Get the name of the cluster.
        stdout = runCommandOutput("corosync-cmapctl", ["-g", "totem.cluster_name"])
        if (not stdout == None):
            stdoutSplit = stdout.split("=")
            if (len(stdoutSplit) == 2):
                clusterName = stdoutSplit[1].strip().rstrip()
        # Get the id of the local cluster node so we can get the clusternode name
        clusternodeID = ""
        stdout = runCommandOutput("corosync-cmapctl", ["-g", "runtime.votequorum.this_node_id"])
        if (not stdout == None):
            stdoutSplit = stdout.split("=")
            if (len(stdoutSplit) == 2):
               clusternodeID = stdoutSplit[1].strip().rstrip()
        # Now that we the nodeid then we can get the clusternode name.
        if (len(clusternodeID) > 0):
            stdout = runCommandOutput("corosync-quorumtool", ["-l"])
            if (not stdout == None):
                for line in stdout.split("\n"):
                    if (line.find("local") >=0):
                        splitLine = line.split(" (local)")
                        clusternodeName = splitLine[0].split()[2]
                        break;
    # If a clusternode name and cluster name was found then return a new object
    # since this means this cluster is part of cluster.
    if ((len(clusterName) > 0) and (len(clusternodeName) > 0)):
        mapOfMountedFilesystemLabels = getLabelMapForMountedFilesystems(clusterName, getMountedGFS2Filesystems())
        # These will be the GFS2 filesystems that will have their lockdump information gathered.
        if (len(listOfGFS2Names) > 0):
            for label in list(mapOfMountedFilesystemLabels.keys()):
                foundMatch = False
                for gfs2FSName in listOfGFS2Names:
                    if ((gfs2FSName == label) or ("%s:%s"%(clusterName, gfs2FSName) == label)):
                        foundMatch = True
                        break
                if ((not foundMatch) and (label in mapOfMountedFilesystemLabels)):
                    del(mapOfMountedFilesystemLabels[label])
        # Cast the node id to an int, and default is 0 if node is not found or
        # not castable.
        clusternodeIDInt = 0
        if (clusternodeID.isalnum()):
            try:
                clusternodeIDInt = int(clusternodeID)
            except(ValueError):
                pass
        return ClusterNode(clusternodeName, clusternodeIDInt, clusterName, mapOfMountedFilesystemLabels)
    else:
        return None

def parse_dlm_ls(dlm_ls):
    """
    This function returns the names of all the dlm lockspace names found with the
    commands "dlm_tool ls" or "group_tool ls" output.

    @return: A list of all the dlm lockspace names.
    @rtype: Array
    """
    dlmLockspaces = []
    if (not dlm_ls == None):
        dlm_ls = dlm_ls.replace("dlm lockspaces\n", "")
        dlmToolLSKeys = ["name", "id", "flags", "change", "members"]
        # Split on newlines
        dlm_lsSections = dlm_ls.split("\n\n")
        for section in dlm_lsSections:
            # Create tmp map to hold data
            if (section.startswith("fence domain")):
                # Not concerned with fence information.
                continue
            dlmToolLSMap = dict.fromkeys(dlmToolLSKeys)
            lines = section.split("\n")
            for line in lines:
                for dlmToolLSKey in list(dlmToolLSMap.keys()):
                    if (line.startswith(dlmToolLSKey)):
                        value = line.replace(dlmToolLSKey, " ", 1).strip().rstrip()
                        dlmToolLSMap[dlmToolLSKey] = value
                if ((not dlmToolLSMap.get("name") == None) and (not dlmToolLSMap.get("id") == None)):
                    dlmLockspaces.append(dlmToolLSMap.get("name"))
    return dlmLockspaces

def getGroupToolDLMLockspaces():
    """
    This function returns the names of all the dlm lockspace names found with the
    command: "group_tool ls".

    @return: A list of all the dlm lockspace names.
    @rtype: Array
    """
    dlmLockspaces = []
    stdout = runCommandOutput("group_tool", ["ls"])
    if (not stdout == None):
        lines = stdout.split("\n")
        if (len(lines) > 0):
            if (lines[0].startswith("type")):
                # Then running cman-2.0
                for line in lines:
                    if (line.startswith("dlm")):
                        dlmLockspaces.append(line.split()[2])
            else:
                # Then running cman-3.0 and uses same sorta output as `dlm_tool ls`.
                dlmLockspaces = parse_dlm_ls(stdout)
    return dlmLockspaces

def getDLMLockspaces():
    """
    Returns a list of the dlm lockspace names.

    @return: Returns a list of dlm lockspace names.
    @rtype: Array
    """
    message = "Gathering the DLM Lockspace Names."
    logging.getLogger(MAIN_LOGGER_NAME).debug(message)
    dlmLockspaces = parse_dlm_ls(runCommandOutput("dlm_tool", ["ls"]))
    if (not len(dlmLockspaces) > 0):
        message = "There was no dlm lockspaces found with the \"dlm_tool ls\" command.  Trying with the \"group_tool ls\" command."
        logging.getLogger(MAIN_LOGGER_NAME).debug(message)
        dlmLockspaces = getGroupToolDLMLockspaces()
    return dlmLockspaces

def getVerifiedDLMLockspaceNames(lockspaceNames):
    """
    Returns a list of DLM lockspaces that have been verified to exists in the
    command output of $(dlm_tool ls).

    @return: Returns a list of DLM lockspaces that have been verified to exists
    in the command output of $(dlm_tool ls).
    @rtype: Array

    @param lockspaceNames: This is the list of DLM lockspaces that will have
    their debug directory copied.
    @type lockspaceNames: Array
    """
    # Get a list of all the DLM lockspaces names.
    dlmLockspaces = getDLMLockspaces()
    # Verify the lockspaceNames are lockspaces that exist.
    verifiedLockspaceNames = []
    for lockspaceName in lockspaceNames:
        if ((lockspaceName in dlmLockspaces) and
            (not lockspaceName in verifiedLockspaceNames)):
            verifiedLockspaceNames.append(lockspaceName)
    return verifiedLockspaceNames

def getMountedGFS2Filesystems():
    """
    This function returns a list of all the mounted GFS2 filesystems.

    @return: Returns a list of all the mounted GFS2 filesystems.
    @rtype: Array
    """
    fsType = "gfs2"
    listOfMountedFilesystems = []
    stdout = runCommandOutput("mount", ["-l"])
    if (not stdout == None):
        stdoutSplit = stdout.split("\n")
        for line in stdoutSplit:
            splitLine = line.split()
            if (len(splitLine) >= 5):
                if (splitLine[4] == fsType):
                    listOfMountedFilesystems.append(line)
    return listOfMountedFilesystems

def getLabelMapForMountedFilesystems(clusterName, listOfMountedFilesystems):
    """
    This function will return a dictionary of the mounted GFS2 filesystem that
    contain a label that starts with the cluster name. For example:
    {'f18cluster:mygfs2vol1': '/dev/vdb1 on /mnt/gfs2vol1 type gfs2 (rw,relatime) [f18cluster:mygfs2vol1]'}

    @return: Returns a dictionary of the mounted GFS2 filesystems that contain a
    label that starts with the cluster name.
    @rtype: Dict

    @param clusterName: The name of the cluster.
    @type clusterName: String
    @param listOfMountedFilesystems: A list of all the mounted GFS2 filesystems.
    @type listOfMountedFilesystems: Array
    """
    mapOfMountedFilesystemLabels = {}
    for mountedFilesystem in listOfMountedFilesystems:
        splitMountedFilesystem = mountedFilesystem.split()
        fsLabel = splitMountedFilesystem[-1].strip().strip("[").rstrip("]")
        if (len(fsLabel) > 0):
            # Verify it starts with name of the cluster.
            if (fsLabel.startswith("%s:" %(clusterName))):
                mapOfMountedFilesystemLabels[fsLabel] = mountedFilesystem
    return mapOfMountedFilesystemLabels

# #####################################################################
# Gather output from command functions
# #####################################################################
def gatherHostData(pathToDSTDir):
    """
    This function will gather general information about the cluster and write
    the results to a file. The following data will be captured: hostname, date,
    uname -a, uptime.

    @param pathToDSTDir: This is the path to directory where the files will be
    written to.
    @type pathToDSTDir: String
    """
    # Gather some general information and write to system.txt.
    systemString = "HOSTNAME=%s\nTIMESTAMP=%s\n" %(platform.node(), time.strftime("%Y-%m-%d %H:%M:%S"))
    stdout = runCommandOutput("uname", ["-a"]).strip().rstrip()
    if (not stdout == None):
        systemString += "UNAMEA=%s\n" %(stdout)
    stdout = runCommandOutput("uptime", []).strip().rstrip()
    if (not stdout == None):
        systemString += "UPTIME=%s" %(stdout)
    writeToFile(os.path.join(pathToDSTDir, "hostinformation.txt"), systemString, createFile=True)

def gatherDiagnosticData(pathToDSTDir):
    """
    This function will gather general information about the cluster and write (or
    copy) the results to a file.

    @param pathToDSTDir: This is the path to directory where the files will be
    written to.
    @type pathToDSTDir: String

    """
    # Get "ps -eo user,pid,%cpu,%mem,vsz,rss,tty,stat,start,time,comm,wchan" data.
    # Get " ps h -AL -o tid,s,cmd
    command = "ps"
    pathToCommandOutput = os.path.join(pathToDSTDir, "ps_hALo-tid.s.cmd")
    try:
        fout = open(pathToCommandOutput, "w")
        #runCommand(command, ["-eo", "user,pid,%cpu,%mem,vsz,rss,tty,stat,start,time,comm,wchan"], standardOut=fout)
        runCommand(command, ["h", "-AL", "-o", "tid,s,cmd"], standardOut=fout)
        fout.close()
    except IOError:
        message = "There was an error writing the command output for %s to the file %s." %(command, pathToCommandOutput)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)

    # Copy misc files
    pathToSrcFile = "/proc/mounts"
    copyFile(pathToSrcFile, os.path.join(pathToDSTDir, pathToSrcFile.strip("/")))
    pathToSrcFile = "/proc/slabinfo"
    copyFile(pathToSrcFile, os.path.join(pathToDSTDir, pathToSrcFile.strip("/")))

    # Copy the DLM hash table sizes:
    pathToHashTableFiles = ["/sys/kernel/config/dlm/cluster/lkbtbl_size", "/sys/kernel/config/dlm/cluster/dirtbl_size",
                            "/sys/kernel/config/dlm/cluster/rsbtbl_size"]
    for pathToSrcFile in pathToHashTableFiles:
        if (os.path.exists(pathToSrcFile)):
            copyFile(pathToSrcFile, os.path.join(pathToDSTDir, pathToSrcFile.strip("/")))

def gatherOptionalDiagnosticData(pathToDSTDir):
    """
    This function will gather optional information about the cluster and write
    the results to a file.

    @param pathToDSTDir: This is the path to directory where the files will be
    written to.
    @type pathToDSTDir: String
    """
    # Get df -h ouput
    command = "df"
    pathToCommandOutput = os.path.join(pathToDSTDir, "df-h.cmd")
    try:
        fout = open(pathToCommandOutput, "w")
        runCommand(command, ["-h"], standardOut=fout)
        fout.close()
    except IOError:
        message = "There was an error writing the command output for %s to the file %s." %(command, pathToCommandOutput)
        logging.getLogger(MAIN_LOGGER_NAME).error(message)

    # Write the status of all the nodes in the cluster out.
    if (runCommand("which", ["cman_tool"])):
        command = "cman_tool"
        pathToCommandOutput = os.path.join(pathToDSTDir, "cman_tool_status")
        try:
            fout = open(pathToCommandOutput, "w")
            runCommand(command, ["status"], standardOut=fout)
            fout.close()
        except IOError:
            message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
    elif (runCommand("which", ["corosync-cmapctl"])):
        command = "corosync-quorumtool"
        pathToCommandOutput = os.path.join(pathToDSTDir, "corosync-quorumtool_l")
        try:
            fout = open(pathToCommandOutput, "w")
            runCommand(command, ["-l"], standardOut=fout)
            fout.close()
        except IOError:
            message = "There was an error the command output for %s to the file %s." %(command, pathToCommandOutput)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)

# #####################################################################
# Gather Process Information
# #####################################################################
def isProcPidStackEnabled(pathToPidData):
    """
    Returns true if the init process has the file "stack" in its pid data
    directory which contains the task functions for that process.

    @return: Returns true if the init process has the file "stack" in its pid
    data directory which contains the task functions for that process.
    @rtype: Boolean

    @param pathToPidData: The path to the directory where all the pid data
    directories are located.
    @type pathToPidData: String
    """
    return os.path.exists(os.path.join(pathToPidData, "1/stack"))

def gatherPidData(pathToPidData, pathToDSTDir):
    """
    This command will gather all the directories which contain data about all the pids.

    @return: Returns a list of paths to the directory that contains the
    information about the pid.
    @rtype: Array

    @param pathToPidData: The path to the directory where all the pid data
    directories are located.
    @type pathToPidData: String
    """
    # Status has: command name, pid, ppid, state, possibly registers
    listOfFilesToCopy = ["cmdline", "stack", "status"]
    listOfPathToPidsData = []
    if (os.path.exists(pathToPidData)):
        for srcFilename in os.listdir(pathToPidData):
            pathToPidDirDST = os.path.join(pathToDSTDir, srcFilename)
            if (srcFilename.isdigit()):
                pathToSrcDir = os.path.join(pathToPidData, srcFilename)
                for filenameToCopy in listOfFilesToCopy:
                    copyFile(os.path.join(pathToSrcDir, filenameToCopy), os.path.join(pathToPidDirDST, filenameToCopy))
                if (os.path.exists(pathToPidDirDST)):
                    listOfPathToPidsData.append(pathToPidDirDST)
    return listOfPathToPidsData

def triggerSysRQEvents():
    """
    This command will trigger sysrq events which will write the output to
    /var/log/messages. The events that will be trigger are "m" and "t". The "m"
    event will dump information about memory allocation. The "t" event will dump
    all the threads state information.
    """
    command = "echo"
    pathToSysrqTriggerFile = "/proc/sysrq-trigger"
    # m - dump information about memory allocation
    # t - dump thread state information
    # triggers = ["m", "t"]
    triggers = ["t"]
    for trigger in triggers:
        try:
            fout = open(pathToSysrqTriggerFile, "w")
            runCommand(command, [trigger], standardOut=fout)
            fout.close()
        except IOError:
            message = "There was an error writing the command output for %s to the file %s." %(command, pathToSysrqTriggerFile)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)

# #####################################################################
# Gather lockdumps and logs
# #####################################################################
def gatherLogs(pathToDSTDir):
    """
    This function will copy all the cluster logs(/var/log/cluster) and the
    system log(/var/log/messages) to the directory given by pathToDSTDir.

    @param pathToDSTDir: This is the path to directory where the files will be
    copied to.
    @type pathToDSTDir: String
    """
    pathToLogFile = "/var/log/messages"
    pathToDSTLogFile = os.path.join(pathToDSTDir, os.path.basename(pathToLogFile))
    copyFile(pathToLogFile, pathToDSTLogFile)

    pathToLogDir = "/var/log/cluster"
    if (os.path.exists(pathToLogDir)):
        pathToDSTLogDir = os.path.join(pathToDSTDir, os.path.basename(pathToLogDir))
        copyDirectory(pathToLogDir, pathToDSTDir)

def gatherDLMLockDumps(pathToDSTDir, lockspaceNames):
    """
    This function copies all the debug files for dlm and sorts them into their
    own directory based on name of dlm lockspace.

    @param pathToDSTDir: This is the path to directory where the files will be
    copied to.
    @type pathToDSTDir: String
    @param lockspaceNames: This is the list of DLM lockspaces that will have
    their debug directory copied.
    @type lockspaceNames: Array
    """
    # This function assumes that verifiedLockspaceNames has already been called
    # to verify the lockspace does exist.
    lockDumpType = "dlm"
    pathToSrcDir = os.path.join(PATH_TO_DEBUG_DIR, lockDumpType)
    pathToOutputDir = os.path.join(pathToDSTDir, lockDumpType)
    message = "Copying the files in the %s lockdump data directory %s." %(lockDumpType.upper(), pathToSrcDir)
    logging.getLogger(MAIN_LOGGER_NAME).debug(message)

    # Get list of all the dlm lockspaces
    if (os.path.exists(pathToSrcDir)):
        for filename in os.listdir(pathToSrcDir):
            for lockspaceName in lockspaceNames:
                if (filename.startswith(lockspaceName)):
                    copyFile(os.path.join(pathToSrcDir, filename),
                             os.path.join(os.path.join(pathToOutputDir, lockspaceName), filename))

    # Run dlm_tool lockdebug against the lockspace names and write to file.
    for lockspaceName in lockspaceNames:
        dstDir = os.path.join(pathToOutputDir, lockspaceName)
        if (mkdirs(dstDir)):
            pathToCommandOutput = os.path.join(dstDir,"%s_lockdebug" %(lockspaceName))
            try:
                fout = open(pathToCommandOutput, "w")
                runCommand("dlm_tool", ["lockdebug", "-v", "-s", "-w", lockspaceName], standardOut=fout)
                fout.close()
            except IOError:
                message = "There was an error writing the command output to the file %s." %(pathToCommandOutput)
                logging.getLogger(MAIN_LOGGER_NAME).error(message)

def gatherGFS2LockDumps(pathToDSTDir, listOfGFS2Filesystems):
    """
    This function copies the debug directory for a GFS2 filesystems in the list
    to a directory. The list of GFS2 filesystems will include the cluster name
    and filesystem name for each item in the list. For example:
    "f18cluster:mygfs2vol1"

    @return: Returns True if files(not directories) were copied to the
    destination directory.
    @rtype: Boolean

    @param pathToDSTDir: This is the path to directory where the files will be
    copied to.
    @type pathToDSTDir: String
    @param listOfGFS2Filesystems: This is the list of the GFS2 filesystems that
    will have their debug directory copied.
    @type listOfGFS2Filesystems: Array
    """
    lockDumpType = "gfs2"
    pathToSrcDir = os.path.join(PATH_TO_DEBUG_DIR, lockDumpType)
    pathToOutputDir = os.path.join(pathToDSTDir, lockDumpType)
    # The number of files that were copied
    fileCopiedCount = 0
    if (not os.path.exists(pathToSrcDir)):
        return False
    for dirName in os.listdir(pathToSrcDir):
        pathToCurrentDir = os.path.join(pathToSrcDir, dirName)
        if ((os.path.isdir(pathToCurrentDir)) and (dirName in listOfGFS2Filesystems)):
            message = "Copying the lockdump data for the %s filesystem: %s" %(lockDumpType.upper(), dirName)
            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
            copySuccessful = copyDirectory(pathToCurrentDir, pathToOutputDir)
            if (copySuccessful and os.path.exists(os.path.join(pathToOutputDir, dirName))):
                fileCopiedCount = len(os.listdir(os.path.join(pathToOutputDir, dirName)))
    # If the number of files(not directories) copied was greater than zero then files were copied
    # succesfully.
    return (fileCopiedCount > 0)

# ##############################################################################
# Get user selected options
# ##############################################################################
def __getOptions(version) :
    """
    This function creates the OptionParser and returns commandline
    a tuple of the selected commandline options and commandline args.

    The cmdlineOpts which is the options user selected and cmdLineArgs
    is value passed and  not associated with an option.

    @return: A tuple of the selected commandline options and commandline args.
    @rtype: Tuple

    @param version: The version of the this script.
    @type version: String
    """
    cmdParser = OptionParserExtended(version)
    cmdParser.add_option("-d", "--debug",
                         action="store_true",
                         dest="enableDebugLogging",
                         help="enables debug logging",
                         default=False)
    cmdParser.add_option("-q", "--quiet",
                         action="store_true",
                         dest="disableLoggingToConsole",
                         help="disables logging to console",
                         default=False)
    cmdParser.add_option("-y", "--no_ask",
                         action="store_true",
                         dest="disableQuestions",
                         help="disables all questions and assumes yes",
                         default=False)
    cmdParser.add_option("-i", "--info",
                         action="store_true",
                         dest="enablePrintInfo",
                         help="prints information about the mounted GFS2 file-systems",
                         default=False)
    cmdParser.add_option("-P", "--disable_process_gather",
                         action="store_true",
                         dest="disableProcessGather",
                         help="the gathering of process information will be disabled",
                         default=False)
    cmdParser.add_option("-m", "--diagnostic_data",
                         action="store_true",
                         dest="enableDiagnosticData",
                         help=SUPPRESS_HELP,
                         default=False)
    cmdParser.add_option("-o", "--path_to_output_dir",
                         action="store",
                         dest="pathToOutputDir",
                         help="the directory where all the collect data will be stored",
                         type="string",
                         metavar="<output directory>",
                         default="/tmp")
    cmdParser.add_option("-r", "--num_of_runs",
                         action="store",
                         dest="numberOfRuns",
                         help="number of runs capturing the lockdump data(default: 3 runs)",
                         type="int",
                         metavar="<number of runs>",
                         default=3)
    cmdParser.add_option("-s", "--seconds_sleep",
                         action="store",
                         dest="secondsToSleep",
                         help="number of seconds to sleep between runs of capturing the lockdump data(default: 120 seconds)",
                         type="int",
                         metavar="<seconds to sleep>",
                         default=120)
    cmdParser.add_option("-n", "--fs_name",
                         action="extend",
                         dest="listOfGFS2Names",
                         help="name of the GFS2 filesystem(s) that will have their lockdump data captured(default: all GFS2 file-systems will be captured)",
                         type="string",
                         metavar="<name of GFS2 filesystem>",
                         default=[])
 # Get the options and return the result.
    (cmdLineOpts, cmdLineArgs) = cmdParser.parse_args()
    return (cmdLineOpts, cmdLineArgs)

# ##############################################################################
# OptParse classes for commandline options
# ##############################################################################
class OptionParserExtended(OptionParser):
    """
    This is the class that gets the command line options the end user
    selects.
    """
    def __init__(self, version) :
        """
        @param version: The version of the this script.
        @type version: String
        """
        self.__commandName = os.path.basename(sys.argv[0])
        versionMessage = "%s %s\n" %(self.__commandName, version)

        commandDescription  ="%s gfs2_lockcapture will capture locking information from GFS2 file systems and DLM.\n"%(self.__commandName)

        OptionParser.__init__(self, option_class=ExtendOption,
                              version=versionMessage,
                              description=commandDescription)

    def print_help(self):
        """
        Print examples at the bottom of the help message.
        """
        self.print_version()
        examplesMessage = "\n"
        examplesMessage = "\nPrints information about the available GFS2 filesystems that can have lockdump data captured."
        examplesMessage += "\n# %s -i\n" %(self.__commandName)

        examplesMessage += "\nIt will do 3 runs of gathering the lockdump information in 10 second intervals for only the"
        examplesMessage += "\nGFS2 filesystems with the names myGFS2vol2,myGFS2vol1. Then it will archive and compress"
        examplesMessage += "\nthe data collected in the output directory /tmp and all the questions will be answered with yes.\n"
        examplesMessage += "\n# %s -r 3 -s 10 -n myGFS2vol2,myGFS2vol1 -o /tmp -y\n" %(self.__commandName)

        examplesMessage += "\nIt will do 2 runs of gathering the lockdump information in 25 second intervals for all the"
        examplesMessage += "\nmounted GFS2 filesystems. The gathering process data will be disabled. Then it will archive and compress"
        examplesMessage += "\nthe data collected in the output directory: /tmp and all the questions will be answered with yes.\n"
        examplesMessage += "\n# %s -r 2 -s 25 -P -o /tmp\n" %(self.__commandName)
        OptionParser.print_help(self)
        print(examplesMessage)

class ExtendOption (Option):
    """
    Allow to specify comma delimited list of entries for arrays
    and dictionaries.
    """
    ACTIONS = Option.ACTIONS + ("extend",)
    STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
    TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)

    def take_action(self, action, dest, opt, value, values, parser):
        """
        This function is a wrapper to take certain options passed on command
        prompt and wrap them into an Array.

        @param action: The type of action that will be taken. For example:
        "store_true", "store_false", "extend".
        @type action: String
        @param dest: The name of the variable that will be used to store the
        option.
        @type dest: String/Boolean/Array
        @param opt: The option string that triggered the action.
        @type opt: String
        @param value: The value of opt(option) if it takes a
        value, if not then None.
        @type value:
        @param values: All the opt(options) in a dictionary.
        @type values: Dictionary
        @param parser: The option parser that was orginally called.
        @type parser: OptionParser
        """
        if (action == "extend") :
            valueList = []
            try:
                for v in value.split(","):
                    # Need to add code for dealing with paths if there is option for paths.
                    newValue = value.strip().rstrip()
                    if (len(newValue) > 0):
                        valueList.append(newValue)
            except:
                pass
            else:
                values.ensure_value(dest, []).extend(valueList)
        else:
            Option.take_action(self, action, dest, opt, value, values, parser)

# ###############################################################################
# Main Function
# ###############################################################################
if __name__ == "__main__":
    """
    When the script is executed then this code is ran. If there was files(not
    directories) created then 0 will be returned, else a 1 is returned.
    """
    try:
        # #######################################################################
        # Get the options from the commandline.
        # #######################################################################
        (cmdLineOpts, cmdLineArgs) = __getOptions(VERSION_NUMBER)
        # #######################################################################
        # Setup the logger and create config directory
        # #######################################################################
        # Create the logger
        logLevel = logging.INFO
        logger = logging.getLogger(MAIN_LOGGER_NAME)
        logger.setLevel(logLevel)
        # Create a new status function and level.
        logging.STATUS = logging.INFO + 2
        logging.addLevelName(logging.STATUS, "STATUS")

        # Log to main system logger that script has started then close the
        # handler before the other handlers are created.
        sysLogHandler = logging.handlers.SysLogHandler(address = '/dev/log')
        logger.addHandler(sysLogHandler)
        logger.info("Capturing of the data to analyze GFS2 lockdumps.")
        logger.removeHandler(sysLogHandler)

        # Create a function for the STATUS_LEVEL since not defined by python. This
        # means you can call it like the other predefined message
        # functions. Example: logging.getLogger("loggerName").status(message)
        setattr(logger, "status", lambda *args: logger.log(logging.STATUS, *args))
        streamHandler = logging.StreamHandler()
        streamHandler.setLevel(logLevel)
        streamHandler.setFormatter(logging.Formatter("%(levelname)s %(message)s"))
        logger.addHandler(streamHandler)

        # Please note there will not be a global log file created. If a log file
        # is needed then redirect the output. There will be a log file created
        # for each run in the corresponding directory.

        # #######################################################################
        # Set the logging levels.
        # #######################################################################
        if ((cmdLineOpts.enableDebugLogging) and (not cmdLineOpts.disableLoggingToConsole)):
            logging.getLogger(MAIN_LOGGER_NAME).setLevel(logging.DEBUG)
            streamHandler.setLevel(logging.DEBUG)
            message = "Debugging has been enabled."
            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
        if (cmdLineOpts.disableLoggingToConsole):
            streamHandler.setLevel(logging.CRITICAL)
        # #######################################################################
        # Check to see if pid file exists and error if it does.
        # #######################################################################
        if (os.path.exists(PATH_TO_PID_FILENAME)):
            message = "The PID file %s already exists and this script cannot run till it does not exist." %(PATH_TO_PID_FILENAME)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            message = "Verify that there are no other existing processes running. If there are running processes those need to be stopped first and the file removed."
            logging.getLogger(MAIN_LOGGER_NAME).info(message)
            exitScript(removePidFile=False, errorCode=1)
        else:
            message = "Creating the pid file: %s" %(PATH_TO_PID_FILENAME)
            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
            # Creata the pid file so we dont have more than 1 process of this
            # script running.
            writeToFile(PATH_TO_PID_FILENAME, str(os.getpid()), createFile=True)
        # #######################################################################
        # Get the clusternode name and verify that mounted GFS2 filesystems were
        # found.
        # #######################################################################
        clusternode = getClusterNode(cmdLineOpts.listOfGFS2Names)
        if (clusternode == None):
            message = "The cluster or cluster node name could not be found."
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            exitScript(removePidFile=True, errorCode=1)
        elif (not len(clusternode.getMountedGFS2FilesystemNames()) > 0):
            message = "There were no mounted GFS2 filesystems found."
            if (len(cmdLineOpts.listOfGFS2Names) > 0):
                message = "There were no mounted GFS2 filesystems found with the name:"
                for name in cmdLineOpts.listOfGFS2Names:
                    message += " %s" %(name)
                message += "."
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
        if (cmdLineOpts.enablePrintInfo):
            logging.disable(logging.CRITICAL)
            print("List of all the mounted GFS2 filesystems that can have their lockdump data captured:")
            print(clusternode)
            exitScript()
        # #######################################################################
        # Verify they want to continue because this script will trigger sysrq events.
        # #######################################################################
        if (not cmdLineOpts.disableQuestions and not cmdLineOpts.disableProcessGather):
            valid = {"yes":True, "y":True, "no":False, "n":False}
            question = "This script will trigger a sysrq -t event or collect the data for each pid directory located in /proc for each run. Are you sure you want to continue?"
            prompt = " [y/n] "
            while True:
                sys.stdout.write(question + prompt)
                try: # python2 compatible input
                    input = raw_input
                except NameError:
                    pass
                choice = input().lower()
                if (choice in valid):
                    if (valid.get(choice)):
                        # If yes, or y then exit loop and continue.
                        break
                    else:
                        message = "The script will not continue since you chose not to continue."
                        logging.getLogger(MAIN_LOGGER_NAME).error(message)
                        exitScript(removePidFile=True, errorCode=1)
                else:
                    sys.stdout.write("Please respond with '(y)es' or '(n)o'.\n")
        # #######################################################################
        # Create the output directory to verify it can be created before
        # proceeding unless it is already created from a previous run data needs
        # to be analyzed. Probably could add more debugging on if file or dir.

        # Backup any existing directory with same name as current output
        # directory.
        # #######################################################################
        pathToOutputDir = "%s" %(os.path.join(cmdLineOpts.pathToOutputDir, "%s-%s" %(os.path.basename(sys.argv[0]), time.strftime("%Y-%m-%d"))))
        if (backupOutputDirectory(pathToOutputDir)):
            message = "This directory that will be used to capture all the data: %s" %(pathToOutputDir)
            logging.getLogger(MAIN_LOGGER_NAME).info(message)
            if (not mkdirs(pathToOutputDir)):
                exitScript(errorCode=1)
        else:
            # There was an existing directory with same path as current output
            # directory and it failed to back it up.
            message = "Please change the output directory path (-o) or manual rename or remove the existing path: %s" %(pathToOutputDir)
            logging.getLogger(MAIN_LOGGER_NAME).info(message)
            exitScript(errorCode=1)
        # #######################################################################
        # Check to see if the debug directory is mounted. If not then
        # log an error.
        # #######################################################################
        if(mountFilesystem("debugfs", "none", PATH_TO_DEBUG_DIR)):
            message = "The debug filesystem %s is mounted." %(PATH_TO_DEBUG_DIR)
            logging.getLogger(MAIN_LOGGER_NAME).info(message)
        else:
            message = "There was a problem mounting the debug filesystem: %s" %(PATH_TO_DEBUG_DIR)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
            message = "The debug filesystem is required to be mounted for this script to run."
            logging.getLogger(MAIN_LOGGER_NAME).info(message)
            exitScript(errorCode=1)
        # #######################################################################
        # Gather data and the lockdumps.
        # #######################################################################
        if (cmdLineOpts.numberOfRuns <= 0):
            message = "The number of runs must be greater than zero."
            logging.getLogger(MAIN_LOGGER_NAME).warning(message)
            exitScript(errorCode=1)
        # If GFS2 lockdump files were successfully copied to output directory
        # then the exit code will be set to 0, else the exit code will be 1.
        exitCode = 1
        for i in range(1,(cmdLineOpts.numberOfRuns + 1)):
            # The current log count that will start at 1 and not zero to make it
            # make sense in logs.
            # Add clusternode name under each run dir to make combining multple
            # clusternode gfs2_lockgather data together and all data in each run directory.
            pathToOutputRunDir = os.path.join(pathToOutputDir, "run%d/%s" %(i, clusternode.getClusterNodeName()))
            # Create the the directory that will be used to capture the data.
            if (not mkdirs(pathToOutputRunDir)):
                exitScript(errorCode=1)
            # Set the handler for writing to log file for this run.
            currentRunFileHandler = None
            pathToLogFile = os.path.join(pathToOutputRunDir, "%s.log" %(MAIN_LOGGER_NAME))
            if (((os.access(pathToLogFile, os.W_OK) and os.access("/tmp", os.R_OK))) or (not os.path.exists(pathToLogFile))):
                currentRunFileHandler = logging.FileHandler(pathToLogFile)
                currentRunFileHandler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s", "%Y-%m-%d %H:%M:%S"))
                logging.getLogger(MAIN_LOGGER_NAME).addHandler(currentRunFileHandler)
            message = "Pass (%d/%d): Gathering all the lockdump data." %(i, cmdLineOpts.numberOfRuns)
            logging.getLogger(MAIN_LOGGER_NAME).status(message)

            # Gather various bits of data from the clusternode.
            message = "Pass (%d/%d): Gathering simple data about the host." %(i, cmdLineOpts.numberOfRuns)
            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
            gatherHostData(pathToOutputRunDir)
            # Write the clusternode name and id to the general information file.
            writeToFile(os.path.join(pathToOutputRunDir, "hostinformation.txt"),
                        "NODE_NAME=%s\nNODE_ID=%d" %(clusternode.getClusterNodeName(), clusternode.getClusterNodeID()),
                        appendToFile=True, createFile=True)
            # #######################################################################
            # Gather the DLM data and lock-dumps
            # #######################################################################
            # Gather data for the  DLM lockspaces that are found.
            lockspaceNames = clusternode.getMountedGFS2FilesystemNames(includeClusterName=False)
            # In addition always gather these lockspaces(if they exist).
            lockspaceNames.append("clvmd")
            lockspaceNames.append("rgmanager")
            # Verify that these lockspace names exist.
            lockspaceNames = getVerifiedDLMLockspaceNames(lockspaceNames)
            # Gather the dlm locks.
            message = "Pass (%d/%d): Gathering the DLM lock-dumps for the host." %(i, cmdLineOpts.numberOfRuns)
            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
            # Add other notable lockspace names that should be captured if they exist.
            gatherDLMLockDumps(pathToOutputRunDir, lockspaceNames)

            # #######################################################################
            # Gather the GFS2 data and lock-dumps
            # #######################################################################
            # Gather the glock locks from gfs2.
            message = "Pass (%d/%d): Gathering the GFS2 lock-dumps for the host." %(i, cmdLineOpts.numberOfRuns)
            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
            if(gatherGFS2LockDumps(pathToOutputRunDir, clusternode.getMountedGFS2FilesystemNames())):
                exitCode = 0
            # If enabled then gather the process data. This will be included even if -R option is enabled.
            if (not cmdLineOpts.disableProcessGather):
                # Gather the backtraces for all the pids, by grabbing the /proc/<pid
                # number> or triggering sysrq events to capture task bask traces
                # from log.
                # Gather the data in the /proc/<pid> directory if the file
                # </proc/<pid>/stack exists. If file exists we will not trigger
                # sysrq events.

                # Should I gather anyhow and only capture sysrq if needed.
                pathToPidData = "/proc"
                if (isProcPidStackEnabled(pathToPidData)):
                    message = "Pass (%d/%d): Triggering the capture of all pid directories in %s." %(i, cmdLineOpts.numberOfRuns, pathToPidData)
                    logging.getLogger(MAIN_LOGGER_NAME).debug(message)
                    gatherPidData(pathToPidData, os.path.join(pathToOutputRunDir, pathToPidData.strip("/")))
                else:
                    message = "Pass (%d/%d): Triggering the sysrq events for the host since stack was not captured in pid directory." %(i, cmdLineOpts.numberOfRuns)
                    logging.getLogger(MAIN_LOGGER_NAME).debug(message)
                    triggerSysRQEvents()

            # Gather log files
            message = "Pass (%d/%d): Gathering the log files for the host." %(i, cmdLineOpts.numberOfRuns)
            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
            gatherLogs(os.path.join(pathToOutputRunDir, "logs"))

            # Gather diagnostic data
            message = "Pass (%d/%d): Gathering diagnostic data about the host." %(i, cmdLineOpts.numberOfRuns)
            logging.getLogger(MAIN_LOGGER_NAME).debug(message)
            gatherDiagnosticData(pathToOutputRunDir)
            if (cmdLineOpts.enableDiagnosticData):
                # Gather diagnostic data
                message = "Pass (%d/%d): Gathering optional diagnostic data about the host." %(i, cmdLineOpts.numberOfRuns)
                logging.getLogger(MAIN_LOGGER_NAME).debug(message)
                gatherOptionalDiagnosticData(pathToOutputRunDir)

            # #######################################################################
            # Sleep for X seconds between runs
            # #######################################################################
            # Sleep between each run if secondsToSleep is greater than or equal
            # to 0 and current run is not the last run. Add 2 seconds to each sleep so
            # that we know that there is a timestamp difference in logs between runs.
            # The minimal sleep is 2 seconds.
            secondsToSleep = cmdLineOpts.secondsToSleep + 2
            if (secondsToSleep < 2):
                secondsToSleep = 2
            if (i < cmdLineOpts.numberOfRuns):
                message = "The script will sleep for %d seconds between each run of capturing the lockdump data." %(secondsToSleep)
                logging.getLogger(MAIN_LOGGER_NAME).info(message)
                time.sleep(secondsToSleep)
            # Remove the handler:
            logging.getLogger(MAIN_LOGGER_NAME).removeHandler(currentRunFileHandler)

        # #######################################################################
        # Archive the directory that contains all the data and archive it after
        # all the information has been gathered.
        # #######################################################################
        message = "All the files have been gathered and this directory contains all the captured data: %s" %(pathToOutputDir)
        logging.getLogger(MAIN_LOGGER_NAME).info(message)
        message = "The lockdump data will now be archive. This could some time depending on the size of the data collected."
        logging.getLogger(MAIN_LOGGER_NAME).info(message)
        pathToTarFilename = archiveData(pathToOutputDir)
        if (os.path.exists(pathToTarFilename)):
            message = "The compressed archvied file was created: %s" %(pathToTarFilename)
            logging.getLogger(MAIN_LOGGER_NAME).info(message)
            # Do some cleanup by removing the directory of the data if file archived file was created.
            try:
                shutil.rmtree(pathToOutputDir)
            except OSError:
                message = "There was an error removing the directory: %s." %(pathToOutputDir)
                logging.getLogger(MAIN_LOGGER_NAME).error(message)
        else:
            message = "The compressed archvied failed to be created: %s" %(pathToTarFilename)
            logging.getLogger(MAIN_LOGGER_NAME).error(message)
        # #######################################################################
    except KeyboardInterrupt:
        print("")
        message =  "This script will exit since control-c was executed by end user."
        logging.getLogger(MAIN_LOGGER_NAME).error(message)
        exitScript(errorCode=1)
    # #######################################################################
    # Exit the application with zero exit code since we cleanly exited.
    # #######################################################################
    exitScript(errorCode=exitCode)
source-git / gfs2-utils

Source Code

Files