When using Hadoop under Linux, we need to type lots of commands to operate on the cluster, some of the commands are quite frequently used. Here I try to abbreviate those frequently used commands so as to use them in a more convenient and comfortable way.

You can add them in your personal shell configuration file(~/.bashrc or ~/.zshrc).

 

hadoop fs series commands

alias hchecksum='hadoop fs -checksum'
alias hls='hadoop fs -ls -h'
alias hcp='hadoop fs -cp'
alias hmv='hadoop fs -mv'
alias hrm='hadoop fs -rm -r -f'
alias hget='hadoop fs -get'
alias hput='hadoop fs -put'
alias hmkdir='hadoop fs -mkdir -p'
alias hcat='hadoop fs -cat'
alias htext='hadoop fs -text'
alias htail='hadoop fs -tail'
alias hstat='hadoop fs -stat'
alias htest='hadoop fs -test'
alias htouchz='hadoop fs -touchz'
alias hdu='hadoop fs -du -h'
alias hchmod='hadoop fs -chmod'
alias hchgrp='hadoop fs -chgrp'
alias hchown='hadoop fs -chown'
alias hmerge='hadoop fs -getmerge'

 

MapReduce job information related commands

    • show Hadoop/MapReduce related processes
function mr_ps()
{
	ps -ef | grep -v grep | grep NameNode
	ps -ef | grep -v grep | grep SecondaryNameNode
	ps -ef | grep -v grep | grep ResourceManager
	ps -ef | grep -v grep | grep JobHistoryServer
	ps -ef | grep -v grep | grep DataNode
	ps -ef | grep -v grep | grep NodeManager
	ps -ef | grep -v grep | grep JournalNode
}
    • grep job information, according to job’s redirected log file(parameter $1 represents log file name)
function mr_info()
{
	if [ $# -ne 1 ]; then
		echo "require 1 parameter ..."
		return 1
	fi
	grep -E -e '(Total input paths to process|number of splits|The url to track the job|Running job)' "$1"
}
    • show job log(parameter $1 represents job id)
function mr_log()
{
	if [ $# -ne 1 ]; then
		echo "require 1 parameter ..."
		return 1
	fi
	mapred job -logs "$1" | less
}
    • show job counter(parameter $1 represents job id)
function mr_counter()
{
	if [ $# -ne 1 ]; then
		echo "require 1 parameter ..."
		return 1
	fi
	mapred job -status "$1" | less
}
    • kill job(parameter $1 represents job id)
function mr_kill()
{
	if [ $# -ne 1 ]; then
		echo "require 1 parameter ..."
		return 1
	fi
	mapred job -kill "$1"
}
    • list running jobs
function mr_list()
{
	mapred job -list
}
    • list a specific Linux user’s jobs, sort by starting time(parameter $1 represents Linux user name)
function mr_user()
{
	if [ $# -ne 1 ]; then
		echo "require 1 parameter ..."
		return 1
	fi
	mapred job -list | grep -w "$1" | sort
}
    • list a specific Hadoop queue’s jobs, sort by starting time(parameter $1 represents Hadoop queue name)
function mr_queue()
{
	if [ $# -ne 1 ]; then
		echo "require 1 parameter ..."
		return 1
	fi
	mapred job -list | grep -w "root.$1" | sort
}

 

Hope you enjoy it. 🙂

Leave a Reply

Your email address will not be published. Required fields are marked *