# Copyright (C) 2011-2012 Johan Hake
#
# This file is part of Gotran.
#
# Gotran is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Gotran is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Gotran. If not, see <http://www.gnu.org/licenses/>.
__all__ = ["load", "save", "present_time_str"]
# System imports
import time
import os
import numpy
from pickle import Unpickler, Pickler
from os.path import isdir, join
# gotran imports
from gotran.common import *
TIME_FORMAT = "%Y.%m.%d-%H.%M.%S"
[docs]def present_time_str():
"Returns the present time nicely formated"
return time.strftime(TIME_FORMAT)
[docs]def save(basename, **data):
"""
Save data using cPickle
@type basename : str
@param basename : The name of the file to save the data in, .cpickle
will be appended to the file name if not provided
@param data : The actuall data to be saved.
"""
check_arg(basename, str, 0)
# If zero data size just return
if len(data) == 0:
return
filename = basename if ".cpickle" in basename else basename+".cpickle"
f = open(filename,'w')
p = Pickler(f)
# Dump the dictionary kwarg
p.dump(data)
f.close()
def compare_dicts(p1, p2):
"Recursively compares a dict of values"
assert(isinstance(p1, dict))
assert(isinstance(p2, dict))
try:
ret = p1 == p2
except ValueError as e:
ret = True
for key, value in p1.items():
if isinstance(value, numpy.ndarray):
ret = ret and (value == p2[key]).all()
elif isinstance(value, dict):
ret = ret and compare_dicts(value,p2[key])
else:
ret = ret and value == p2[key]
return ret
[docs]def load(basename, latest_timestamp=False, collect=False):
"""
Load data using cPickle
@type basename : str
@param basename : The name of the file where the data will be loaded from,
'.cpickle' will be appended to the file name if not provided
@type latest_timestamp : bool
@param latest_timestamp : If true return the data from latest version of
saved data with the same basename
@type collect : bool
@param collect : If True collect all data with the same basename and
the same parameters
"""
check_arg(basename, str, 0)
if latest_timestamp and collect:
raise TypeError("'collect' and 'latest_timestamp' cannot both be True")
# If not collect just return the data froma single data file
if not collect:
return load_single_data(basename, latest_timestamp)
filenames = get_data_filenames(basename)
# No filenames with timestamp. Try to return data file without timestamp
if not filenames:
return load_single_data(basename, False)
# Start with the latest filename and load the data and collect them if
# the data have the same parameter
data = load_single_data(filenames.pop(-1), False)
params = data["params"]
for filename in reversed(filenames):
local_data = load_single_data(filename, False)
if not compare_dicts(params, local_data["params"]):
info("Not the same parameters, skipping data from '%s'", filename)
continue
merge_data_dicts(data, local_data)
return data
def merge_data_dicts(data0, data1):
"Merge data from data1 into data0"
def recursively_merge_data(data0, data1):
for (key, values), org_values in zip(iter(data1.items()),\
list(data0.values())):
if isinstance(values, dict):
data0[key] = recursively_merge_data(org_values, values)
elif isinstance(values, list):
if isinstance(values[0], list):
for i in range(len(values)):
org_values[i].extend(values[i])
else:
org_values.extend(values)
data0[key] = org_values
return data0
for (key, values), org_values in zip(iter(data1.items()), list(data0.values())):
if key == "params":
continue
data0[key] = recursively_merge_data(values, org_values)
def load_single_data(basename, latest_timestamp):
"Helper function for load"
if latest_timestamp:
filenames = get_data_filenames(basename)
if not filenames:
raise IOError("No files with timestamp for basename: "\
"'%s' excist"%basename)
basename = filenames[-1]
filename = basename if ".cpickle" in basename else basename+".cpickle"
if not os.path.isfile(filename):
raise IOError("No file with basename: '%s' excists"%basename)
info("Loading data from: %s", filename)
f = open(filename,'r')
return Unpickler(f).load()
def get_data_filenames(basename):
"Helper functions for getting data filenames"
import glob, re
basename = basename if ".cpickle" in basename else \
basename.replace(".cpickle", "")
pattern = re.compile("%s-[0-9]+\.[0-9]+\.[0-9]+-[0-9]+"\
"\.[0-9]+\.[0-9]+.cpickle"%basename)
filenames = [filename for filename in \
glob.glob("%s*.cpickle"%basename) \
if re.search(pattern, filename)]
if not filenames:
return []
filenames.sort()
return filenames