Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 714b9e9e4d | |||
| 64cc2c57cf | |||
| 9cdd860903 | |||
| 9d2faec7a5 | |||
| 742a717b49 | |||
| 9ec4d877eb | |||
| 1a5e1c39c7 | |||
| 1d2ddb82be | |||
| f389c9c3f0 | |||
| 1e80821bf8 |
46
README.md
46
README.md
@ -6,7 +6,7 @@ A naive tool for observing gpu status and auto set visible gpu in python code.
|
||||
|
||||
1. install the package.
|
||||
```shell
|
||||
pip install https://git.zmy.pub/zmyme/gpuutil/archive/v0.0.3.tar.gz
|
||||
pip install https://git.zmy.pub/zmyme/gpuutil/archive/v0.0.5.tar.gz
|
||||
```
|
||||
|
||||
2. for observing gpu status, just input
|
||||
@ -74,6 +74,46 @@ def auto_set(num, allow_nonfree=True, ask=True, blacklist=[], show=True):
|
||||
# some code here.
|
||||
```
|
||||
|
||||
## Use this inside an docker.
|
||||
For some reason, codes that running in docker cannot get the correct information about the process that using the gpu.
|
||||
To support that, gpuutil supports read the output command of nvidia-smi and ps from an given file, which should be generated by you from host machine
|
||||
To use this in docker, try the following steps:
|
||||
1. figure out a way to pass the output of command ```nvidia-smi -q -x``` to the docker that your are currently using, save the output as a text file.
|
||||
2. pass the output of a ps-like command to the docker. It is a table-like output, the first line is header, which should at least contains user, pid and command. below is an valid output generated by running ```ps -axo user,pid,command```on host machine:
|
||||
```
|
||||
USER PID COMMAND
|
||||
root 1 /bin/bash -c bash /etc/init.docker; /usr/sbin/sshd -D
|
||||
root 8 sshd: /usr/sbin/sshd -D [listener] 0 of 10-100 startups
|
||||
root 9 sshd: user1 [priv]
|
||||
user1 19 sshd: user1@pts/0
|
||||
user1 20 -zsh
|
||||
user1 97 tmux
|
||||
user1 98 -zsh
|
||||
```
|
||||
if your generated output have different name, for example when you are using ```docker top``` instead of ```ps```, the ```COMMAND``` section would be ```CMD```, therefore you need prepare a dict that maps its name to either of ```user, pid, command```, note that its insensitive to upper case.
|
||||
|
||||
3. run the configuration script.
|
||||
```shell
|
||||
python -m gpuutil.set_redirect -nv path/to/your/nvidia/output -ps /path/to/your/ps/output -pst cmd=command,username=user
|
||||
```
|
||||
for more information about the script, run ```python -m gpuutil.set_redirect -h```, you will get:
|
||||
```
|
||||
usage: set_redirect.py [-h] [--nvsmi NVSMI] [--ps PS] [--ps_name_trans PS_NAME_TRANS]
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--nvsmi NVSMI, -nv NVSMI
|
||||
a file indicates real nvidia-smi -q -x output.
|
||||
--ps PS, -ps PS a file indicates real ps-like output.
|
||||
--ps_name_trans PS_NAME_TRANS, -pst PS_NAME_TRANS
|
||||
a dict of name trans, format: name1=buildin,name2=buildin, buildin can be choosen from cmd,user,pid
|
||||
```
|
||||
> some advice:
|
||||
> 1. you can use a script that run nvidia-smi and ps command and save their output to a directory, the mount the directory to the docker as readonly.
|
||||
> 2. you could consider mount the directory as tmpfs.
|
||||
|
||||
## ps:
|
||||
1. you can get more detailed gpu info via accessing gpuutil.GPUStat class, for more information, just look the code.
|
||||
2. Since it use ps command to get detailed process info, it can only be used on linux.
|
||||
1. You can get more detailed gpu info via accessing gpuutil.GPUStat class, for more information, just look the code.
|
||||
2. Since it use ps command to get detailed process info, it can only be used on linux, if you use it on windows, some information might be missing.
|
||||
3. If you have any trouble, feel free to open an issue.
|
||||
4. The code is straight forward, it's also a good choice to take an look at the code if you got any trouble.
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
from gpuutil import GPUStat
|
||||
import curses
|
||||
from gpuutil import GPUStat, loaddict, savedict
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import os
|
||||
|
||||
@ -18,13 +18,11 @@ def load_config():
|
||||
configpath = os.path.join(home_dir, '.gpuutil.conf')
|
||||
if not os.path.isfile(configpath):
|
||||
return {}
|
||||
with open(configpath, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
return loaddict(configpath)
|
||||
def save_config(config):
|
||||
home_dir = os.path.expanduser('~')
|
||||
configdir = os.path.join(home_dir, '.gpuutil.conf')
|
||||
with open(configdir, 'w+', encoding='utf-8') as f:
|
||||
json.dump(config, f, ensure_ascii=False, indent=4)
|
||||
savedict(configdir, config)
|
||||
|
||||
# style format: |c|l:15|r|c:14rl:13|
|
||||
def parse_style(style):
|
||||
@ -61,12 +59,13 @@ if __name__ == '__main__':
|
||||
recommended_cols = ['ID', 'Fan', 'Temp', 'Pwr', 'Freq', 'Util', 'Vmem']
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--profile', '-p', default=None, type=str, help='profile keyword, corresponding configuration are saved in ~/.gpuutil.conf')
|
||||
parser.add_argument('--profile', '-p', default='default', type=str, help='profile keyword, corresponding configuration are saved in ~/.gpuutil.conf')
|
||||
parser.add_argument('--cols', '-c', type=csv2list, help='colums to show.(Availabel cols: {0}'.format(avaliable_cols))
|
||||
parser.add_argument('--style', '-sty', type=str, default=None, help='column style, format: |c|l:15|r|c:14rl:13|, c,l,r are align methods, | is line and :(int) are width limit.')
|
||||
parser.add_argument('--show-process', '-sp', default=True, type=str2bool, help='whether show process or not')
|
||||
parser.add_argument('--vertical', '-v', default=False, type=str2bool, help='whether show each user in different lines. (show user vertically)')
|
||||
parser.add_argument('--save', default=False, action="store_true", help='save config to profile')
|
||||
parser.add_argument('--watch', '-w', default=-1, type=float, help='save config to profile')
|
||||
args = parser.parse_args()
|
||||
cols = args.cols if args.cols is not None else recommended_cols
|
||||
show_process = args.show_process
|
||||
@ -93,6 +92,14 @@ if __name__ == '__main__':
|
||||
save_config(config)
|
||||
elif args.profile is not None:
|
||||
config = load_config()
|
||||
if 'default' not in config:
|
||||
config['default'] = {
|
||||
"cols": cols,
|
||||
"style": style,
|
||||
"limit": limit,
|
||||
"show-process": show_process,
|
||||
"vertical": vertical
|
||||
}
|
||||
if args.profile in config:
|
||||
params = config[args.profile]
|
||||
cols = params["cols"]
|
||||
@ -106,7 +113,35 @@ if __name__ == '__main__':
|
||||
limit = params["limit"]
|
||||
if "vertical" in params:
|
||||
vertical = params["vertical"]
|
||||
|
||||
else:
|
||||
raise ValueError('Profile do not exist.\nAvaliable Profiles:{0}'.format(','.join(list(config.keys()))))
|
||||
stat.show(enabled_cols = cols, colsty=style, colsz=limit, vertical=vertical, show_command=show_process)
|
||||
info = stat.show(enabled_cols = cols, colsty=style, colsz=limit, vertical=vertical, show_command=show_process, tostdout=False)
|
||||
if args.watch < 0:
|
||||
print(info)
|
||||
else:
|
||||
from curses import wrapper
|
||||
import time
|
||||
def continuous_watch(stdscr, info):
|
||||
curses.curs_set(0)
|
||||
stdscr.clear()
|
||||
stdscr.nodelay(True)
|
||||
lasttime = time.time()
|
||||
try:
|
||||
while True:
|
||||
c = stdscr.getch()
|
||||
if c in [ord('q'), ord('Q')]:
|
||||
break
|
||||
curses.flushinp()
|
||||
hint = "Interval: {0} S | CurrentTime: {1}".format(args.watch, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
||||
stdscr.erase()
|
||||
stdscr.addstr(0, 0, hint + '\n' + info)
|
||||
stdscr.refresh()
|
||||
passed_time = time.time() - lasttime
|
||||
if passed_time < args.watch:
|
||||
time.sleep(args.watch - passed_time)
|
||||
lasttime = time.time()
|
||||
info = stat.show(enabled_cols = cols, colsty=style, colsz=limit, vertical=vertical, show_command=show_process, tostdout=False)
|
||||
except KeyboardInterrupt:
|
||||
curses.flushinp()
|
||||
pass
|
||||
wrapper(continuous_watch, info)
|
||||
@ -10,6 +10,31 @@ import platform
|
||||
|
||||
osname = platform.system()
|
||||
|
||||
def loadfile(path):
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
def savefile(path, content):
|
||||
with open(path, 'w+', encoding='utf-8') as f:
|
||||
return f.write(content)
|
||||
def loaddict(path):
|
||||
content = loadfile(path)
|
||||
content = content.strip()
|
||||
if len(content) != 0:
|
||||
return json.loads(content)
|
||||
else:
|
||||
return {}
|
||||
def savedict(path, dictionary):
|
||||
content = json.dumps(dictionary, indent=4, ensure_ascii=False)
|
||||
savefile(path, content)
|
||||
def clean_split(line, delemeter=' '):
|
||||
words = line.split(delemeter)
|
||||
words = [w.strip() for w in words]
|
||||
words = [w for w in words if w != '']
|
||||
return words
|
||||
|
||||
def exe_cmd(command):
|
||||
pipe = os.popen(command)
|
||||
return pipe.read()
|
||||
|
||||
def xml2dict(node):
|
||||
node_dict = {}
|
||||
@ -25,10 +50,8 @@ def xml2dict(node):
|
||||
node_dict[child.tag].append(xml2dict(child))
|
||||
return node_dict
|
||||
|
||||
def parse_nvsmi_info(command='nvidia-smi -q -x'):
|
||||
pipe = os.popen(command)
|
||||
xml = pipe.read()
|
||||
tree = ET.fromstring(xml)
|
||||
def parse_nvsmi_info(nvsmixml):
|
||||
tree = ET.fromstring(nvsmixml)
|
||||
return xml2dict(tree)
|
||||
|
||||
def parse_gpu_info(stat):
|
||||
@ -140,7 +163,7 @@ def get_basic_process_info_linux():
|
||||
lines = output.split('\n')[1:]
|
||||
processes = {}
|
||||
for line in lines:
|
||||
words = [p for p in line.split(' ') if p != '']
|
||||
words = clean_split(line)
|
||||
if len(words) < 3:
|
||||
continue
|
||||
username = words[0]
|
||||
@ -168,6 +191,37 @@ def get_basic_process_info_windows():
|
||||
}
|
||||
return processes
|
||||
|
||||
def get_basic_process_info_by_file(filepath, col_name_trans=None):
|
||||
# suppose cmd is always at the last, and the previous lines have no space.
|
||||
content = loadfile(filepath)
|
||||
lines = content.split('\n')
|
||||
header = clean_split(lines[0])
|
||||
interested = {
|
||||
'user': None,
|
||||
'pid': None,
|
||||
'command': None
|
||||
}
|
||||
if col_name_trans is None:
|
||||
col_name_trans = {'cmd': 'command'}
|
||||
for i, word in enumerate(header):
|
||||
word = word.lower()
|
||||
if word in col_name_trans:
|
||||
word = col_name_trans[word]
|
||||
if word in interested:
|
||||
interested[word] = i
|
||||
processes = {}
|
||||
for line in lines[1:]:
|
||||
words = clean_split(line)
|
||||
pid = words[interested['pid']]
|
||||
user = words[interested['user']]
|
||||
cmd = ' '.join(words[interested['command']:])
|
||||
processes[pid] = {
|
||||
"user": user,
|
||||
"command": cmd
|
||||
}
|
||||
return processes
|
||||
|
||||
|
||||
def draw_table(table, rowsty=None, colsty=None, colsz = None):
|
||||
def justify(s, align, width):
|
||||
if align == 'c':
|
||||
@ -267,13 +321,35 @@ class GPUStat():
|
||||
self.cuda_version = ''
|
||||
self.attached_gpus = ''
|
||||
self.driver_version = ''
|
||||
self.nvsmi_source = None
|
||||
self.ps_source = None
|
||||
self.ps_name_trans = None
|
||||
self.load_configure()
|
||||
def load_configure(self):
|
||||
configuration_path = os.path.expanduser('~/.gpuutil.conf')
|
||||
if os.path.isfile(configuration_path):
|
||||
configuration = loaddict(configuration_path)
|
||||
if 'redirect' in configuration:
|
||||
if 'nvsmi_src' in configuration['redirect']:
|
||||
self.nvsmi_source = configuration['redirect']['nvsmi_src']
|
||||
if 'ps_src' in configuration['redirect']:
|
||||
self.ps_source = configuration['redirect']['ps_src']
|
||||
if 'ps_name_trans' in configuration['redirect']:
|
||||
self.ps_name_trans = configuration['redirect']['ps_name_trans']
|
||||
|
||||
|
||||
def get_process_info(self):
|
||||
if self.ps_source is not None:
|
||||
return get_basic_process_info_by_file(self.ps_source, self.ps_name_trans)
|
||||
if osname == 'Windows':
|
||||
return get_basic_process_info_windows()
|
||||
elif osname == 'Linux':
|
||||
return get_basic_process_info_linux()
|
||||
def parse(self):
|
||||
self.raw_info = parse_nvsmi_info('nvidia-smi -q -x')
|
||||
if self.nvsmi_source is None:
|
||||
self.raw_info = parse_nvsmi_info(exe_cmd('nvidia-smi -q -x'))
|
||||
else:
|
||||
self.raw_info = parse_nvsmi_info(loadfile(self.nvsmi_source))
|
||||
self.detailed_info = {}
|
||||
for key, value in self.raw_info.items():
|
||||
if key != 'gpu':
|
||||
@ -302,7 +378,7 @@ class GPUStat():
|
||||
gpu['id'] = i
|
||||
self.gpus.append(gpu)
|
||||
|
||||
def show(self, enabled_cols = ['ID', 'Fan', 'Temp', 'Pwr', 'Freq', 'Util', 'Vmem', 'Users'], colsty=None, colsz=None, show_command=True, vertical=False):
|
||||
def show(self, enabled_cols = ['ID', 'Fan', 'Temp', 'Pwr', 'Freq', 'Util', 'Vmem', 'Users'], colsty=None, colsz=None, show_command=True, vertical=False, tostdout=True):
|
||||
self.parse()
|
||||
gpu_infos = []
|
||||
# stats = {
|
||||
@ -374,7 +450,10 @@ class GPUStat():
|
||||
pid = proc['pid']
|
||||
proc['gpu'] = [str(gpu['id'])]
|
||||
if type(proc['vmem']) is str:
|
||||
proc['vmem'] = int(proc['vmem'].split(' ')[0])
|
||||
try:
|
||||
proc['vmem'] = int(proc['vmem'].split(' ')[0])
|
||||
except:
|
||||
proc['vmem'] = 0
|
||||
if pid not in procs:
|
||||
procs[pid] = proc
|
||||
else:
|
||||
@ -395,7 +474,9 @@ class GPUStat():
|
||||
table_width = info.find('\n')
|
||||
proc_info = draw_table([['Process Info'.center(table_width-4)], [proc_info]], rowsty="c|c|", colsty="|l|", colsz=[table_width-4])
|
||||
info += proc_info
|
||||
print(info)
|
||||
if tostdout:
|
||||
print(info)
|
||||
return info
|
||||
|
||||
class MoreGPUNeededError(Exception):
|
||||
def __init__(self):
|
||||
|
||||
42
gpuutil/set_redirect.py
Normal file
42
gpuutil/set_redirect.py
Normal file
@ -0,0 +1,42 @@
|
||||
import argparse
|
||||
import os
|
||||
from gpuutil import loaddict, savedict
|
||||
|
||||
availabel_name_trans = ['command', 'user', 'pid']
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--nvsmi', '-nv', default=None, type=str, help='a file indicates real nvidia-smi -q -x output.')
|
||||
parser.add_argument('--ps', '-ps', default=None, type=str, help='a file indicates real ps-like output.')
|
||||
parser.add_argument('--ps_name_trans', '-pst', default=None, type=str, help='a dict of name trans, \
|
||||
format: name1=buildin,name2=buildin, \
|
||||
buildin can be choosen from {0}'.format(','.join(availabel_name_trans)))
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# lets chech the pst.
|
||||
parsed_name_trans = {}
|
||||
name_trans = args.ps_name_trans
|
||||
if name_trans is not None:
|
||||
name_trans = name_trans.split(',')
|
||||
name_trans = [t.strip() for t in name_trans]
|
||||
name_trans = [t for t in name_trans if t!='']
|
||||
for item in name_trans:
|
||||
item = item.split('=', maxsplit=1)
|
||||
if len(item) != 2:
|
||||
raise ValueError('there must be a = in nametrans')
|
||||
key, value = item
|
||||
if value not in availabel_name_trans:
|
||||
raise ValueError('given buildin name {0} do not exist, avaliable: {1}'.format(value, ','.join(availabel_name_trans)))
|
||||
parsed_name_trans[key] = value
|
||||
|
||||
config_file = os.path.expanduser('~/.gpuutil.conf')
|
||||
configuration = {}
|
||||
if os.path.isfile(config_file):
|
||||
configuration = loaddict(config_file)
|
||||
configuration['redirect'] = {
|
||||
"nvsmi_src": args.nvsmi,
|
||||
"ps_src": args.ps,
|
||||
"ps_name_trans": parsed_name_trans
|
||||
}
|
||||
|
||||
savedict(config_file, configuration)
|
||||
Loading…
Reference in New Issue
Block a user