From ffdd27e5069b5c7da835b414105e1e4fb1e0bfd1 Mon Sep 17 00:00:00 2001 From: Ari Brown Date: Tue, 26 Sep 2023 17:53:16 -0400 Subject: [PATCH] parallel_map briefly stopped working --- minerva/__init__.py | 7 ++++--- minerva/cluster.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 minerva/cluster.py diff --git a/minerva/__init__.py b/minerva/__init__.py index 6b84ba0..beb4cfa 100644 --- a/minerva/__init__.py +++ b/minerva/__init__.py @@ -1,3 +1,5 @@ +from .parallel import parallel_map + from .athena import Athena from .redshift import Redshift from .s3 import S3 @@ -8,8 +10,6 @@ from .pier import Pier from .minerva import Minerva -from .parallel_map import parallel_map - __all__ = [ "Athena", "Redshift", @@ -17,6 +17,7 @@ __all__ = [ "Docker", "Machine", "Pier", - "Minerva" + "Minerva", + "parallel_map" ] diff --git a/minerva/cluster.py b/minerva/cluster.py new file mode 100644 index 0000000..62bd8e5 --- /dev/null +++ b/minerva/cluster.py @@ -0,0 +1,22 @@ +import dask + +# https://cloudprovider.dask.org/en/latest/aws.html#elastic-compute-cloud-ec2 + +# https://github.com/dask/dask-ec2/blob/master/notebooks/03-timeseries.ipynb + +# https://github.com/dask/distributed/issues/2267 + +# import dask +# import dask.distributed +# import dask.dataframe as dd +# +# c = dask.distributed.Client(':8786') +# d = dask.delayed(dd.read_parquet)('gcs:///0.parquet', +# storage_options={'token':'cloud'}) +# df = dask.compute(d)[0] + +# https://saturncloud.io/blog/how-to-read-parquet-files-from-s3-using-dask-with-a-specific-aws-profile/ + +# Manually build cluster +# https://saturncloud.io/blog/how-to-set-up-a-dask-cluster/ +