Compare commits

..

10 Commits

Author SHA1 Message Date
zmy
714b9e9e4d add default config, fix flash issue 2021-07-16 22:31:12 +08:00
zmy
64cc2c57cf add watch support 2021-07-16 22:19:42 +08:00
zmy
9cdd860903 🐛 fix an bug when parsing from file 2020-12-24 11:08:57 +08:00
zmy
9d2faec7a5 🐛 fix bug on empty configuration file 2020-12-22 20:24:26 +08:00
zmy
742a717b49 🐛 fix bug on empty configuration file 2020-12-22 20:17:24 +08:00
zmy
9ec4d877eb 🐛 fix a bug 2020-12-22 20:04:55 +08:00
zmy
1a5e1c39c7 update readme 2020-12-22 19:51:23 +08:00
zmy
1d2ddb82be update readme 2020-12-22 19:49:23 +08:00
zmy
f389c9c3f0 a little improvments and update readme 2020-12-22 19:47:34 +08:00
zmy
1e80821bf8 add support for docker by a new configuration item redirect 2020-12-22 19:21:11 +08:00
5 changed files with 220 additions and 22 deletions

View File

@ -6,7 +6,7 @@ A naive tool for observing gpu status and auto set visible gpu in python code.
1. install the package.
```shell
pip install https://git.zmy.pub/zmyme/gpuutil/archive/v0.0.3.tar.gz
pip install https://git.zmy.pub/zmyme/gpuutil/archive/v0.0.5.tar.gz
```
2. for observing gpu status, just input
@ -74,6 +74,46 @@ def auto_set(num, allow_nonfree=True, ask=True, blacklist=[], show=True):
# some code here.
```
## Use this inside an docker.
For some reason, codes that running in docker cannot get the correct information about the process that using the gpu.
To support that, gpuutil supports read the output command of nvidia-smi and ps from an given file, which should be generated by you from host machine
To use this in docker, try the following steps:
1. figure out a way to pass the output of command ```nvidia-smi -q -x``` to the docker that your are currently using, save the output as a text file.
2. pass the output of a ps-like command to the docker. It is a table-like output, the first line is header, which should at least contains user, pid and command. below is an valid output generated by running ```ps -axo user,pid,command```on host machine:
```
USER PID COMMAND
root 1 /bin/bash -c bash /etc/init.docker; /usr/sbin/sshd -D
root 8 sshd: /usr/sbin/sshd -D [listener] 0 of 10-100 startups
root 9 sshd: user1 [priv]
user1 19 sshd: user1@pts/0
user1 20 -zsh
user1 97 tmux
user1 98 -zsh
```
if your generated output have different name, for example when you are using ```docker top``` instead of ```ps```, the ```COMMAND``` section would be ```CMD```, therefore you need prepare a dict that maps its name to either of ```user, pid, command```, note that its insensitive to upper case.
3. run the configuration script.
```shell
python -m gpuutil.set_redirect -nv path/to/your/nvidia/output -ps /path/to/your/ps/output -pst cmd=command,username=user
```
for more information about the script, run ```python -m gpuutil.set_redirect -h```, you will get:
```
usage: set_redirect.py [-h] [--nvsmi NVSMI] [--ps PS] [--ps_name_trans PS_NAME_TRANS]
optional arguments:
-h, --help show this help message and exit
--nvsmi NVSMI, -nv NVSMI
a file indicates real nvidia-smi -q -x output.
--ps PS, -ps PS a file indicates real ps-like output.
--ps_name_trans PS_NAME_TRANS, -pst PS_NAME_TRANS
a dict of name trans, format: name1=buildin,name2=buildin, buildin can be choosen from cmd,user,pid
```
> some advice:
> 1. you can use a script that run nvidia-smi and ps command and save their output to a directory, the mount the directory to the docker as readonly.
> 2. you could consider mount the directory as tmpfs.
## ps:
1. you can get more detailed gpu info via accessing gpuutil.GPUStat class, for more information, just look the code.
2. Since it use ps command to get detailed process info, it can only be used on linux.
1. You can get more detailed gpu info via accessing gpuutil.GPUStat class, for more information, just look the code.
2. Since it use ps command to get detailed process info, it can only be used on linux, if you use it on windows, some information might be missing.
3. If you have any trouble, feel free to open an issue.
4. The code is straight forward, it's also a good choice to take an look at the code if you got any trouble.

View File

@ -1,6 +1,6 @@
from gpuutil import GPUStat
import curses
from gpuutil import GPUStat, loaddict, savedict
import sys
import json
import argparse
import os
@ -18,13 +18,11 @@ def load_config():
configpath = os.path.join(home_dir, '.gpuutil.conf')
if not os.path.isfile(configpath):
return {}
with open(configpath, 'r', encoding='utf-8') as f:
return json.load(f)
return loaddict(configpath)
def save_config(config):
home_dir = os.path.expanduser('~')
configdir = os.path.join(home_dir, '.gpuutil.conf')
with open(configdir, 'w+', encoding='utf-8') as f:
json.dump(config, f, ensure_ascii=False, indent=4)
savedict(configdir, config)
# style format: |c|l:15|r|c:14rl:13|
def parse_style(style):
@ -61,12 +59,13 @@ if __name__ == '__main__':
recommended_cols = ['ID', 'Fan', 'Temp', 'Pwr', 'Freq', 'Util', 'Vmem']
parser = argparse.ArgumentParser()
parser.add_argument('--profile', '-p', default=None, type=str, help='profile keyword, corresponding configuration are saved in ~/.gpuutil.conf')
parser.add_argument('--profile', '-p', default='default', type=str, help='profile keyword, corresponding configuration are saved in ~/.gpuutil.conf')
parser.add_argument('--cols', '-c', type=csv2list, help='colums to show.(Availabel cols: {0}'.format(avaliable_cols))
parser.add_argument('--style', '-sty', type=str, default=None, help='column style, format: |c|l:15|r|c:14rl:13|, c,l,r are align methods, | is line and :(int) are width limit.')
parser.add_argument('--show-process', '-sp', default=True, type=str2bool, help='whether show process or not')
parser.add_argument('--vertical', '-v', default=False, type=str2bool, help='whether show each user in different lines. (show user vertically)')
parser.add_argument('--save', default=False, action="store_true", help='save config to profile')
parser.add_argument('--watch', '-w', default=-1, type=float, help='save config to profile')
args = parser.parse_args()
cols = args.cols if args.cols is not None else recommended_cols
show_process = args.show_process
@ -93,6 +92,14 @@ if __name__ == '__main__':
save_config(config)
elif args.profile is not None:
config = load_config()
if 'default' not in config:
config['default'] = {
"cols": cols,
"style": style,
"limit": limit,
"show-process": show_process,
"vertical": vertical
}
if args.profile in config:
params = config[args.profile]
cols = params["cols"]
@ -106,7 +113,35 @@ if __name__ == '__main__':
limit = params["limit"]
if "vertical" in params:
vertical = params["vertical"]
else:
raise ValueError('Profile do not exist.\nAvaliable Profiles:{0}'.format(','.join(list(config.keys()))))
stat.show(enabled_cols = cols, colsty=style, colsz=limit, vertical=vertical, show_command=show_process)
info = stat.show(enabled_cols = cols, colsty=style, colsz=limit, vertical=vertical, show_command=show_process, tostdout=False)
if args.watch < 0:
print(info)
else:
from curses import wrapper
import time
def continuous_watch(stdscr, info):
curses.curs_set(0)
stdscr.clear()
stdscr.nodelay(True)
lasttime = time.time()
try:
while True:
c = stdscr.getch()
if c in [ord('q'), ord('Q')]:
break
curses.flushinp()
hint = "Interval: {0} S | CurrentTime: {1}".format(args.watch, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
stdscr.erase()
stdscr.addstr(0, 0, hint + '\n' + info)
stdscr.refresh()
passed_time = time.time() - lasttime
if passed_time < args.watch:
time.sleep(args.watch - passed_time)
lasttime = time.time()
info = stat.show(enabled_cols = cols, colsty=style, colsz=limit, vertical=vertical, show_command=show_process, tostdout=False)
except KeyboardInterrupt:
curses.flushinp()
pass
wrapper(continuous_watch, info)

View File

@ -10,6 +10,31 @@ import platform
osname = platform.system()
def loadfile(path):
with open(path, 'r', encoding='utf-8') as f:
return f.read()
def savefile(path, content):
with open(path, 'w+', encoding='utf-8') as f:
return f.write(content)
def loaddict(path):
content = loadfile(path)
content = content.strip()
if len(content) != 0:
return json.loads(content)
else:
return {}
def savedict(path, dictionary):
content = json.dumps(dictionary, indent=4, ensure_ascii=False)
savefile(path, content)
def clean_split(line, delemeter=' '):
words = line.split(delemeter)
words = [w.strip() for w in words]
words = [w for w in words if w != '']
return words
def exe_cmd(command):
pipe = os.popen(command)
return pipe.read()
def xml2dict(node):
node_dict = {}
@ -25,10 +50,8 @@ def xml2dict(node):
node_dict[child.tag].append(xml2dict(child))
return node_dict
def parse_nvsmi_info(command='nvidia-smi -q -x'):
pipe = os.popen(command)
xml = pipe.read()
tree = ET.fromstring(xml)
def parse_nvsmi_info(nvsmixml):
tree = ET.fromstring(nvsmixml)
return xml2dict(tree)
def parse_gpu_info(stat):
@ -140,7 +163,7 @@ def get_basic_process_info_linux():
lines = output.split('\n')[1:]
processes = {}
for line in lines:
words = [p for p in line.split(' ') if p != '']
words = clean_split(line)
if len(words) < 3:
continue
username = words[0]
@ -168,6 +191,37 @@ def get_basic_process_info_windows():
}
return processes
def get_basic_process_info_by_file(filepath, col_name_trans=None):
# suppose cmd is always at the last, and the previous lines have no space.
content = loadfile(filepath)
lines = content.split('\n')
header = clean_split(lines[0])
interested = {
'user': None,
'pid': None,
'command': None
}
if col_name_trans is None:
col_name_trans = {'cmd': 'command'}
for i, word in enumerate(header):
word = word.lower()
if word in col_name_trans:
word = col_name_trans[word]
if word in interested:
interested[word] = i
processes = {}
for line in lines[1:]:
words = clean_split(line)
pid = words[interested['pid']]
user = words[interested['user']]
cmd = ' '.join(words[interested['command']:])
processes[pid] = {
"user": user,
"command": cmd
}
return processes
def draw_table(table, rowsty=None, colsty=None, colsz = None):
def justify(s, align, width):
if align == 'c':
@ -267,13 +321,35 @@ class GPUStat():
self.cuda_version = ''
self.attached_gpus = ''
self.driver_version = ''
self.nvsmi_source = None
self.ps_source = None
self.ps_name_trans = None
self.load_configure()
def load_configure(self):
configuration_path = os.path.expanduser('~/.gpuutil.conf')
if os.path.isfile(configuration_path):
configuration = loaddict(configuration_path)
if 'redirect' in configuration:
if 'nvsmi_src' in configuration['redirect']:
self.nvsmi_source = configuration['redirect']['nvsmi_src']
if 'ps_src' in configuration['redirect']:
self.ps_source = configuration['redirect']['ps_src']
if 'ps_name_trans' in configuration['redirect']:
self.ps_name_trans = configuration['redirect']['ps_name_trans']
def get_process_info(self):
if self.ps_source is not None:
return get_basic_process_info_by_file(self.ps_source, self.ps_name_trans)
if osname == 'Windows':
return get_basic_process_info_windows()
elif osname == 'Linux':
return get_basic_process_info_linux()
def parse(self):
self.raw_info = parse_nvsmi_info('nvidia-smi -q -x')
if self.nvsmi_source is None:
self.raw_info = parse_nvsmi_info(exe_cmd('nvidia-smi -q -x'))
else:
self.raw_info = parse_nvsmi_info(loadfile(self.nvsmi_source))
self.detailed_info = {}
for key, value in self.raw_info.items():
if key != 'gpu':
@ -302,7 +378,7 @@ class GPUStat():
gpu['id'] = i
self.gpus.append(gpu)
def show(self, enabled_cols = ['ID', 'Fan', 'Temp', 'Pwr', 'Freq', 'Util', 'Vmem', 'Users'], colsty=None, colsz=None, show_command=True, vertical=False):
def show(self, enabled_cols = ['ID', 'Fan', 'Temp', 'Pwr', 'Freq', 'Util', 'Vmem', 'Users'], colsty=None, colsz=None, show_command=True, vertical=False, tostdout=True):
self.parse()
gpu_infos = []
# stats = {
@ -374,7 +450,10 @@ class GPUStat():
pid = proc['pid']
proc['gpu'] = [str(gpu['id'])]
if type(proc['vmem']) is str:
proc['vmem'] = int(proc['vmem'].split(' ')[0])
try:
proc['vmem'] = int(proc['vmem'].split(' ')[0])
except:
proc['vmem'] = 0
if pid not in procs:
procs[pid] = proc
else:
@ -395,7 +474,9 @@ class GPUStat():
table_width = info.find('\n')
proc_info = draw_table([['Process Info'.center(table_width-4)], [proc_info]], rowsty="c|c|", colsty="|l|", colsz=[table_width-4])
info += proc_info
print(info)
if tostdout:
print(info)
return info
class MoreGPUNeededError(Exception):
def __init__(self):

42
gpuutil/set_redirect.py Normal file
View File

@ -0,0 +1,42 @@
import argparse
import os
from gpuutil import loaddict, savedict
availabel_name_trans = ['command', 'user', 'pid']
parser = argparse.ArgumentParser()
parser.add_argument('--nvsmi', '-nv', default=None, type=str, help='a file indicates real nvidia-smi -q -x output.')
parser.add_argument('--ps', '-ps', default=None, type=str, help='a file indicates real ps-like output.')
parser.add_argument('--ps_name_trans', '-pst', default=None, type=str, help='a dict of name trans, \
format: name1=buildin,name2=buildin, \
buildin can be choosen from {0}'.format(','.join(availabel_name_trans)))
args = parser.parse_args()
# lets chech the pst.
parsed_name_trans = {}
name_trans = args.ps_name_trans
if name_trans is not None:
name_trans = name_trans.split(',')
name_trans = [t.strip() for t in name_trans]
name_trans = [t for t in name_trans if t!='']
for item in name_trans:
item = item.split('=', maxsplit=1)
if len(item) != 2:
raise ValueError('there must be a = in nametrans')
key, value = item
if value not in availabel_name_trans:
raise ValueError('given buildin name {0} do not exist, avaliable: {1}'.format(value, ','.join(availabel_name_trans)))
parsed_name_trans[key] = value
config_file = os.path.expanduser('~/.gpuutil.conf')
configuration = {}
if os.path.isfile(config_file):
configuration = loaddict(config_file)
configuration['redirect'] = {
"nvsmi_src": args.nvsmi,
"ps_src": args.ps,
"ps_name_trans": parsed_name_trans
}
savedict(config_file, configuration)

View File

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name = 'gpuutil',
version = '0.0.3',
version = '0.0.5',
keywords='gpu utils',
description = 'A tool for observing gpu stat and auto set visible gpu in python code.',
license = 'MIT License',