def fetch_load_isolet(data_dir=None):
train = 'isolet1+2+3+4.data.Z'
test = 'isolet5.data.Z'
path_train = os.path.join(get_data_home(data_dir), train)
path_test = os.path.join(get_data_home(data_dir), test)
if not os.path.exists(path_train[:-2]) or not os.path.exists(path_test[:-2]):
from urllib import request
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/isolet/'
if not os.path.exists(path_train[:-2]):
if not os.path.exists(path_train):
print('Downloading Isolated Letter Speech Recognition data set from {}...'.format(
url))
request.urlretrieve(url=url+train, filename=path_train)
# os.system('gzip -d ' + path_train)
decompress_z(path_train)
if not os.path.exists(path_test[:-2]):
if not os.path.exists(path_test):
print('Downloading Isolated Letter Speech Recognition data set from {}...'.format(
url))
request.urlretrieve(url=url+test, filename=path_test)
# os.system('gzip -d ' + path_test)
decompress_z(path_test)
else:
print('Found Isolated Letter Speech Recognition data set!')
xtr, ytr = [], []
with open(path_train[:-2]) as f:
reader = csv.reader(f)
for row in reader:
xtr.append(row[:-1])
ytr.append(int(float(row[-1])))
labels, ytr = np.unique(ytr, return_inverse=True)
xte, yte = [], []
with open(path_test[:-2]) as f:
reader = csv.reader(f)
for row in reader:
xte.append(row[:-1])
yte.append(int(float(row[-1])))
labels, yte = np.unique(yte, return_inverse=True)
return np.asarray(xtr, dtype=float), np.asarray(xte, dtype=float), ytr, yte
评论列表
文章目录