import dask.distributed as distributed import dask_cloudprovider.aws as aws import configparser import os import contextlib # altered /Users/ari/opt/miniconda3/envs/mamba_oa_env/lib/python3.10/site-packages/aiobotocore/endpoint.py:96 # needs [default] AWS credential # `security = False` (can't use TLS because otherwise the UserData param is too long) def aws_profile(profile): parser = configparser.RawConfigParser() parser.read(os.path.expanduser("~/.aws/credentials")) config = parser.items(profile) config = {key.upper(): value for key, value in [*config]} config['AWS_REGION'] = config.pop('REGION') return config # Create a cluster cluster = aws.EC2Cluster( env_vars = aws_profile("hay"), key_name = "Ari-Brown-HAY", vpc = "vpc-0823964489ecc1e85", subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a n_workers = 2, region = "us-east-1", bootstrap = True, security_groups = ["sg-0f9e555954e863954", # ssh "sg-0b34a3f7398076545", # default "sg-04cd2626d91ac093c"], # dask (8786, 8787) #worker_module = "dask_cuda.cli.dask_cuda_worker", # for running GPU clusters #iam_instance_profile = "S3+SSM+CloudWatch+ECR", # this is actually a dict? what contents??? worker_instance_type = "t3.small", ami = "ami-0b0cd81283738558a", # ubuntu 22.04 x86 security = False) print(cluster) exit() # Connect to the cluster client = distributed.Client(cluster) print(client) # Practice with a big array import numpy as np import dask.array as da large_array = np.random.rand(1000000, 1000000) dask_array = da.from_array(large_array, chunks=(1000, 1000)) dask_array = dask_array.persist() # non-blocking mean = dask_array.mean().compute() print(mean)