Download Data#
For the tutorials in this book, we will use partially-preprocessed data from two open multi-echo datasets: Euskalibur and Cambridge. For more information about these datasets, see Open Multi-Echo Datasets.
import os
from pprint import pprint
from tedana import datasets
DATA_DIR = os.path.abspath("../data")
euskalibur_dataset = datasets.fetch_euskalibur(
n_subjects=5,
low_resolution=False,
data_dir=DATA_DIR,
)
pprint(euskalibur_dataset)
cambridge_dataset = datasets.fetch_cambridge(
n_subjects=5,
low_resolution=False,
data_dir=DATA_DIR,
)
pprint(cambridge_dataset)
For now, we will use repo2data to download data from OpenNeuro.
import os
from repo2data.repo2data import Repo2Data
# Install the data if running locally, or point to cached data if running on neurolibre
DATA_REQ_FILE = os.path.join("../binder/data_requirement.json")
# Download data
repo2data = Repo2Data(DATA_REQ_FILE)
data_path = repo2data.install()
data_path = os.path.abspath(data_path[0])
---- repo2data starting ----
/opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/site-packages/repo2data
Config from file :
../binder/data_requirement.json
Destination:
./../data/ds006193/multi-echo-data-analysis
Info : Starting to download from datalad https://github.com/OpenNeuroDatasets/ds006193.git ...
It is highly recommended to configure Git before using DataLad. Set both 'user.name' and 'user.email' configuration variables.
[INFO] Attempting a clone into /home/runner/work/multi-echo-data-analysis/multi-echo-data-analysis/data/ds006193/multi-echo-data-analysis
[INFO] Attempting to clone from https://github.com/OpenNeuroDatasets/ds006193.git to /home/runner/work/multi-echo-data-analysis/multi-echo-data-analysis/data/ds006193/multi-echo-data-analysis
[INFO] Start enumerating objects
[INFO] Start counting objects
[INFO] Start compressing objects
[INFO] Start receiving objects
[INFO] Start resolving deltas
[INFO] Completed clone attempts for Dataset(/home/runner/work/multi-echo-data-analysis/multi-echo-data-analysis/data/ds006193/multi-echo-data-analysis)
install(error): /home/runner/work/multi-echo-data-analysis/multi-echo-data-analysis/data/ds006193/multi-echo-data-analysis (dataset) [No working git-annex installation of version >= 8.20200309. Visit http://handbook.datalad.org/r.html?install for instructions on how to install DataLad and git-annex.] [No working git-annex installation of version >= 8.20200309. Visit http://handbook.datalad.org/r.html?install for instructions on how to install DataLad and git-annex.]
---------------------------------------------------------------------------
CalledProcessError Traceback (most recent call last)
Cell In[1], line 10
8 # Download data
9 repo2data = Repo2Data(DATA_REQ_FILE)
---> 10 data_path = repo2data.install()
11 data_path = os.path.abspath(data_path[0])
File /opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/site-packages/repo2data/repo2data.py:106, in Repo2Data.install(self)
103 for key, value in self._data_requirement_file.items():
104 if isinstance(value, dict):
105 ret += [Repo2DataChild(value, self._use_server,
--> 106 self._data_requirement_path,key,self._server_dst_folder).install()]
107 # if not, it is a single assignment
108 else:
109 ret += [Repo2DataChild(self._data_requirement_file,
110 self._use_server, self._data_requirement_path, None, self._server_dst_folder).install()]
File /opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/site-packages/repo2data/repo2data.py:364, in Repo2DataChild.install(self)
362 os.makedirs(self._dst_path)
363 # Downloading with the right method, depending on the src type
--> 364 self._scan_dl_type()
365 # If needed, decompression of the data
366 self._archive_decompress()
File /opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/site-packages/repo2data/repo2data.py:332, in Repo2DataChild._scan_dl_type(self)
330 # if the source link has a .git, we use datalad
331 elif re.match(".*?\\.git$", self._data_requirement_file["src"]):
--> 332 self._datalad_download()
333 # or coming from google drive
334 elif re.match(".*?(drive\\.google\\.com).*?", self._data_requirement_file["src"]):
File /opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/site-packages/repo2data/repo2data.py:263, in Repo2DataChild._datalad_download(self)
260 print("Info : Starting to download from datalad %s ..." %
261 (self._data_requirement_file["src"]))
262 try:
--> 263 subprocess.check_call(
264 ['datalad', 'install', self._dst_path, "-s", self._data_requirement_file["src"]])
265 except FileNotFoundError:
266 print("Error: datalad does not appear to be installed")
File /opt/hostedtoolcache/Python/3.10.18/x64/lib/python3.10/subprocess.py:369, in check_call(*popenargs, **kwargs)
367 if cmd is None:
368 cmd = popenargs[0]
--> 369 raise CalledProcessError(retcode, cmd)
370 return 0
CalledProcessError: Command '['datalad', 'install', './../data/ds006193/multi-echo-data-analysis', '-s', 'https://github.com/OpenNeuroDatasets/ds006193.git']' returned non-zero exit status 1.