Commit 83b3c968 authored by Prasanna's avatar Prasanna
Browse files

search code commit

parent b2f03e13
Directory structure
===================
```
benchmarks
directory for problems
experiments
directory for saving the running the experiments and storing the results
search
directory for source files
```
Install instructions
====================
With anaconda do the following:
```
conda create -n dl-hps python=3
source activate dl-hps
conda install h5py
conda install scikit-learn
conda install pandas
conda install mpi4py
conda install -c conda-forge keras
conda install -c conda-forge scikit-optimize
git clone https://github.com/scikit-optimize/scikit-optimize.git
cd scikit-optimize
pip install -e.
```
Usage
=====
```
cd search
usage: async-search.py [-h] [-v] [--prob_dir [PROB_DIR]] [--exp_dir [EXP_DIR]]
[--exp_id [EXP_ID]] [--max_evals [MAX_EVALS]]
[--max_time [MAX_TIME]]
optional arguments:
-h, --help show this help message and exit
-v, --version show program's version number and exit
--prob_dir [PROB_DIR]
problem directory
--exp_dir [EXP_DIR] experiments directory
--exp_id [EXP_ID] experiments id
--max_evals [MAX_EVALS]
maximum number of evaluations
--max_time [MAX_TIME]
maximum time in secs
```
Example
=======
```
mpiexec -np 2 python async-search.py --prob_dir=../benchmarks/qmcp --exp_dir=../experiments/ --exp_id=exp-01 --max_evals=10 --max_time=60
```
\ No newline at end of file
from collections import OrderedDict
class Problem():
def __init__(self):
space = OrderedDict()
#bechmark specific parameters
space['OMP_NUM_THREADS'] = [2,4,6,8,10,12,14,16,18,20]
space['a'] = [8,16,32,64,128,256,512,1024,1536]
space['w'] = range(8,41)
self.space = space
self.params = self.space.keys()
self.starting_point = [2, 8, 8]
if __name__ == '__main__':
instance = Problem()
print(instance.space)
print(instance.params)
#!/bin/bash -x
#COBALT -n 1
#COBALT -q debug-flat-quad
#COBALT -A Performance
#COBALT -t 30
outputfile=$outputfile
ut=$$(date '+%s')
echo START TIME:$$ut > $outputfile
echo INPUT:"$inpstr" >> $outputfile
OMP_NUM_THREADS=$ompn /nfs2/pbalapra/Projects/qmcpack/miniqmc/build-openmp/bin/check_spo -g "2 2 2" -i 5 $cmd >> $outputfile
ut=$$(date '+%s')
echo END TIME:$$ut>> $outputfile
echo
#!/usr/bin/env python
from __future__ import print_function
from mpi4py import MPI
import re
import os
import sys
import time
import json
import math
from skopt import Optimizer
from utils import *
import os
import argparse
seed = 12345
def create_parser():
'command line parser for keras'
parser = argparse.ArgumentParser(add_help=True)
group = parser.add_argument_group('required arguments')
parser.add_argument('-v', '--version', action='version',
version='%(prog)s 0.1')
parser.add_argument("--prob_dir", nargs='?', type=str,
default='../problems/prob1',
help="problem directory")
parser.add_argument("--exp_dir", nargs='?', type=str,
default='../experiments',
help="experiments directory")
parser.add_argument("--exp_id", nargs='?', type=str,
default='exp-01',
help="experiments id")
parser.add_argument('--max_evals', action='store', dest='max_evals',
nargs='?', const=2, type=int, default='30',
help='maximum number of evaluations')
parser.add_argument('--max_time', action='store', dest='max_time',
nargs='?', const=1, type=float, default='60',
help='maximum time in secs')
return(parser)
parser = create_parser()
cmdline_args = parser.parse_args()
param_dict = vars(cmdline_args)
prob_dir = param_dict['prob_dir'] #'/Users/pbalapra/Projects/repos/2017/dl-hps/benchmarks/test'
exp_dir = param_dict['exp_dir'] #'/Users/pbalapra/Projects/repos/2017/dl-hps/experiments'
eid = param_dict['exp_id'] #'exp-01'
max_evals = param_dict['max_evals']
max_time = param_dict['max_time']
exp_dir = exp_dir+'/'+eid
jobs_dir = exp_dir+'/jobs'
results_dir = exp_dir+'/results'
results_json_fname = exp_dir+'/'+eid+'_results.json'
results_csv_fname = exp_dir+'/'+eid+'_results.csv'
sys.path.insert(0, prob_dir)
import problem as problem
instance = problem.Problem()
spaceDict = instance.space
params = instance.params
starting_point = instance.starting_point
def enum(*sequential, **named):
"""Handy way to fake an enumerated type in Python
http://stackoverflow.com/questions/36932/how-can-i-represent-an-enum-in-python
"""
enums = dict(zip(sequential, range(len(sequential))), **named)
return type('Enum', (), enums)
# Define MPI message tags
tags = enum('READY', 'DONE', 'EXIT', 'START')
# Initializations and preliminaries
comm = MPI.COMM_WORLD # get MPI communicator object
size = comm.size # total number of processes
rank = comm.rank # rank of this process
status = MPI.Status() # get MPI status object
# Master process executes code below
if rank == 0:
start_time = time.time()
for dir_name in [exp_dir, jobs_dir, results_dir]:
if not os.path.exists(dir_name):
os.makedirs(dir_name)
num_workers = size - 1
closed_workers = 0
space = [spaceDict[key] for key in params]
eval_counter = 0
parDict = {}
evalDict = {}
resultsList = []
parDict['kappa'] = 0
init_x = []
opt = Optimizer(space, base_estimator='RF', acq_optimizer='sampling',
acq_func='LCB', acq_func_kwargs=parDict, random_state=seed)
print("Master starting with %d workers" % num_workers)
while closed_workers < num_workers:
data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status)
source = status.Get_source()
tag = status.Get_tag()
elapsed_time = float(time.time() - start_time)
print('elapsed_time:%1.3f'%elapsed_time)
if tag == tags.READY:
if eval_counter < max_evals and elapsed_time < max_time:
# Worker is ready, so send it a task
if starting_point is not None:
x = starting_point
if num_workers-1 > 0:
init_x = opt.ask(n_points=num_workers-1)
starting_point = None
else:
if len(init_x) > 0:
x = init_x.pop(0)
else:
x = opt.ask(n_points=1)[0]
key = str(x)
print('sample %s' % key)
if key in evalDict.keys():
print('%s already evalauted' % key)
evalDict[key] = None
task = {}
task['x'] = x
task['eval_counter'] = eval_counter
task['start_time'] = elapsed_time
print("Sending task %d to worker %d" % (eval_counter, source))
comm.send(task, dest=source, tag=tags.START)
eval_counter = eval_counter + 1
else:
comm.send(None, dest=source, tag=tags.EXIT)
elif tag == tags.DONE:
result = data
result['end_time'] = elapsed_time
print("Got data from worker %d" % source)
print(result)
resultsList.append(result)
x = result['x']
y = result['cost']
opt.tell(x, y)
elif tag == tags.EXIT:
print("Worker %d exited." % source)
closed_workers = closed_workers + 1
print('Search finishing')
saveResults(resultsList, results_json_fname, results_csv_fname)
else:
# Worker processes execute code below
name = MPI.Get_processor_name()
print("worker with rank %d on %s." % (rank, name))
while True:
comm.send(None, dest=0, tag=tags.READY)
task = comm.recv(source=0, tag=MPI.ANY_TAG, status=status)
tag = status.Get_tag()
if tag == tags.START:
print(task)
result = evaluatePoint(task['x'], task['eval_counter'], params, prob_dir, jobs_dir, results_dir)
result['start_time'] = task['start_time']
comm.send(result, dest=0, tag=tags.DONE)
elif tag == tags.EXIT:
break
comm.send(None, dest=0, tag=tags.EXIT)
from string import Template
import re
import os
import sys
import time
import json
import math
import os
import subprocess
import csv
def readResults(fname, evalnum):
pattern1 = re.compile("START TIME:", re.IGNORECASE)
pattern2 = re.compile("OUTPUT:", re.IGNORECASE)
pattern3 = re.compile("END TIME:", re.IGNORECASE)
pattern4 = re.compile("INPUT:", re.IGNORECASE)
resDict = {}
resDict['evalnum'] = evalnum
resDict['startTime'] = -1
resDict['endTime'] = -1
resDict['cost'] = sys.float_info.max
resDict['x'] = None
try:
while True:
with open(fname, 'rt') as in_file:
for linenum, line in enumerate(in_file):
if pattern1.search(line) is not None:
print(line)
str1 = line.rstrip('\n')
res = re.findall('START TIME:(.*)', str1)
resDict['startTime'] = int(res[0])
elif pattern2.search(line) is not None:
print(line)
str1 = line.rstrip('\n')
res = re.findall('OUTPUT:(.*)', str1)
rv = float(res[0])
if math.isnan(rv):
rv = sys.float_info.max
resDict['cost'] = rv
elif pattern3.search(line) is not None:
print(line)
str1 = line.rstrip('\n')
res = re.findall('END TIME:(.*)', str1)
resDict['endTime'] = int(res[0])
elif pattern4.search(line) is not None:
print(line)
str1 = line.rstrip('\n')
res = re.findall('INPUT:(.*)', str1)
resDict['x'] = eval(res[0])
if len(resDict.keys()) == 5:
key = os.path.basename(fname)
resDict['key'] = key
resDict['status'] = 0
if 'endTime' in resDict.keys():
break
time.sleep(5)
except Exception:
print('Unexpected error:', sys.exc_info()[0])
print(resDict)
return(resDict)
def saveResults(resultsList, json_fname, csv_fname):
print(resultsList)
print(json.dumps(resultsList, indent=4, sort_keys=True))
with open(json_fname, 'w') as outfile:
json.dump(resultsList, outfile, indent=4, sort_keys=True)
keys = resultsList[0].keys()
with open(csv_fname, 'w') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(resultsList)
def commandLine(x, params):
cmd = ''
hlist = []
for p, v in zip(params, x):
if 'OMP_NUM_THREADS' not in p:
cmd = cmd + ('-%s %s ') % (p, str(v))
else:
hlist.append(v)
print(cmd)
return(cmd)
def evaluatePoint(x, evalCounter, params, prob_dir, job_dir, result_dir):
cmd = commandLine(x, params)
jobfile = job_dir+'/%05d.job' % evalCounter
outputfile = result_dir+'/%05d.dat' % evalCounter
filein = open(prob_dir+'/theta.tmpl')
src = Template(filein.read())
inpstr = str(x)
d = {'outputfile': outputfile, 'inpstr': inpstr, 'cmd': cmd, 'ompn':x[0]}
result = src.substitute(d)
with open(jobfile, "w") as jobFile:
jobFile.write(result)
status = subprocess.check_output('chmod +x %s' % jobfile, shell=True)
status = subprocess.call(' sh %s ' % jobfile, shell=True)
resDict = readResults(outputfile, evalCounter)
print(resDict)
return(resDict)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment