parallel_map briefly stopped working

This commit is contained in:
Ari Brown 2023-09-26 17:53:16 -04:00
parent 02d26bbaaf
commit ffdd27e506
2 changed files with 26 additions and 3 deletions

View file

@ -1,3 +1,5 @@
from .parallel import parallel_map
from .athena import Athena
from .redshift import Redshift
from .s3 import S3
@ -8,8 +10,6 @@ from .pier import Pier
from .minerva import Minerva
from .parallel_map import parallel_map
__all__ = [
"Athena",
"Redshift",
@ -17,6 +17,7 @@ __all__ = [
"Docker",
"Machine",
"Pier",
"Minerva"
"Minerva",
"parallel_map"
]

22
minerva/cluster.py Normal file
View file

@ -0,0 +1,22 @@
import dask
# https://cloudprovider.dask.org/en/latest/aws.html#elastic-compute-cloud-ec2
# https://github.com/dask/dask-ec2/blob/master/notebooks/03-timeseries.ipynb
# https://github.com/dask/distributed/issues/2267
# import dask
# import dask.distributed
# import dask.dataframe as dd
#
# c = dask.distributed.Client('<ip>:8786')
# d = dask.delayed(dd.read_parquet)('gcs://<bucket_name>/0.parquet',
# storage_options={'token':'cloud'})
# df = dask.compute(d)[0]
# https://saturncloud.io/blog/how-to-read-parquet-files-from-s3-using-dask-with-a-specific-aws-profile/
# Manually build cluster
# https://saturncloud.io/blog/how-to-set-up-a-dask-cluster/