minerva/test.py
2023-10-02 17:26:02 -04:00

56 lines
1.8 KiB
Python

import dask.distributed as distributed
import dask_cloudprovider.aws as aws
import configparser
import os
import contextlib
# altered /Users/ari/opt/miniconda3/envs/mamba_oa_env/lib/python3.10/site-packages/aiobotocore/endpoint.py:96
# needs [default] AWS credential
# `security = False` (can't use TLS because otherwise the UserData param is too long)
def aws_profile(profile):
parser = configparser.RawConfigParser()
parser.read(os.path.expanduser("~/.aws/credentials"))
config = parser.items(profile)
config = {key.upper(): value for key, value in [*config]}
config['AWS_REGION'] = config.pop('REGION')
return config
# Create a cluster
cluster = aws.EC2Cluster(
env_vars = aws_profile("hay"),
key_name = "Ari-Brown-HAY",
vpc = "vpc-0823964489ecc1e85",
subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
n_workers = 2,
region = "us-east-1",
bootstrap = True,
security_groups = ["sg-0f9e555954e863954", # ssh
"sg-0b34a3f7398076545", # default
"sg-04cd2626d91ac093c"], # dask (8786, 8787)
#worker_module = "dask_cuda.cli.dask_cuda_worker", # for running GPU clusters
#iam_instance_profile = "S3+SSM+CloudWatch+ECR", # this is actually a dict? what contents???
worker_instance_type = "t3.small",
ami = "ami-0b0cd81283738558a", # ubuntu 22.04 x86
security = False)
print(cluster)
exit()
# Connect to the cluster
client = distributed.Client(cluster)
print(client)
# Practice with a big array
import numpy as np
import dask.array as da
large_array = np.random.rand(1000000, 1000000)
dask_array = da.from_array(large_array, chunks=(1000, 1000))
dask_array = dask_array.persist() # non-blocking
mean = dask_array.mean().compute()
print(mean)