diff --git a/minerva/__init__.py b/minerva/__init__.py index 6b84ba0..beb4cfa 100644 --- a/minerva/__init__.py +++ b/minerva/__init__.py @@ -1,3 +1,5 @@ +from .parallel import parallel_map + from .athena import Athena from .redshift import Redshift from .s3 import S3 @@ -8,8 +10,6 @@ from .pier import Pier from .minerva import Minerva -from .parallel_map import parallel_map - __all__ = [ "Athena", "Redshift", @@ -17,6 +17,7 @@ __all__ = [ "Docker", "Machine", "Pier", - "Minerva" + "Minerva", + "parallel_map" ] diff --git a/minerva/cluster.py b/minerva/cluster.py new file mode 100644 index 0000000..62bd8e5 --- /dev/null +++ b/minerva/cluster.py @@ -0,0 +1,22 @@ +import dask + +# https://cloudprovider.dask.org/en/latest/aws.html#elastic-compute-cloud-ec2 + +# https://github.com/dask/dask-ec2/blob/master/notebooks/03-timeseries.ipynb + +# https://github.com/dask/distributed/issues/2267 + +# import dask +# import dask.distributed +# import dask.dataframe as dd +# +# c = dask.distributed.Client(':8786') +# d = dask.delayed(dd.read_parquet)('gcs:///0.parquet', +# storage_options={'token':'cloud'}) +# df = dask.compute(d)[0] + +# https://saturncloud.io/blog/how-to-read-parquet-files-from-s3-using-dask-with-a-specific-aws-profile/ + +# Manually build cluster +# https://saturncloud.io/blog/how-to-set-up-a-dask-cluster/ +