forked from bellwether/minerva
moving cluster scripts to new dir
This commit is contained in:
parent
97c27f25a0
commit
e854a93e60
5 changed files with 154 additions and 184 deletions
102
cluster/ec2_cluster.py
Normal file
102
cluster/ec2_cluster.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import sys
|
||||
import dask
|
||||
from dask_cloudprovider.aws import EC2Cluster
|
||||
from minerva.timing import Timing
|
||||
import dask.dataframe as dd
|
||||
import dask.distributed
|
||||
from dask.distributed import Client
|
||||
import configparser
|
||||
import os
|
||||
import contextlib
|
||||
|
||||
def get_aws_credentials():
|
||||
parser = configparser.RawConfigParser()
|
||||
parser.read(os.path.expanduser('~/.aws/credentials'))
|
||||
credentials = parser.items('default')
|
||||
all_credentials = {key.upper(): value for key, value in credentials}
|
||||
with contextlib.suppress(KeyError):
|
||||
all_credentials["AWS_REGION"] = all_credentials.pop("REGION")
|
||||
return all_credentials
|
||||
|
||||
env = get_aws_credentials()
|
||||
env['EXTRA_PIP_PACKAGES'] = 's3fs'
|
||||
|
||||
cluster = EC2Cluster(env_vars = env,
|
||||
n_workers = 2,
|
||||
instance_type = 'm5.large',
|
||||
subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
|
||||
security_groups = ["sg-0f9e555954e863954", # ssh
|
||||
"sg-0b34a3f7398076545", # default
|
||||
"sg-04cd2626d91ac093c"], # dask (8786, 8787)
|
||||
ami = "ami-0399a4f70ca684620",
|
||||
key_name = "Ari-Brown-HAY",
|
||||
security = False,
|
||||
iam_instance_profile = {'Name': 'Minerva'})
|
||||
|
||||
client = Client(cluster)
|
||||
|
||||
manifest_files = ['s3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_90fc7ac6-1be8-42a3-8485-f5fda039a23b',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_3fd296d8-4d16-4491-a950-d5e4b0fff172',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_19afa54e-ed8f-4da5-a575-a2f7acd02399',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0240d219-11d8-4965-9425-ed2cbe9f0984',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_657a9eee-2db8-4b55-8c2b-c98c67eb2992',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_4fb5f559-ac3e-4d0b-bff6-e90aefcecff7',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_a7b3bdfa-aa06-4c7b-b010-f0c59d925051',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_444110c5-885d-4369-9cbd-bb842383baa7',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_921dc55e-8d94-4f91-b71b-ec1b45ff999f',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_dc3fafb8-8d8a-432c-a9f4-386332b7720c',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_9b8c79b9-d8a6-487e-a10b-6d65dae9daff',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_26fe7c8b-15f4-419f-a7c7-c87461f1b69c',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_45c5c82b-befc-4b0c-97fa-673de0feb9dd',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_aca904de-b154-4f56-b255-b71de0be3060',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_9a131e04-2353-44f1-9c13-7c1b381ca553',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_66e9e85d-406b-4164-87f3-3ffbe4ff9162',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_e32f16e7-591e-4da4-a2b1-4b00ac4cb617',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_88eb15ea-9278-4e54-978b-9c211a4b834f',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_71b697fe-3da2-4c5b-a046-e5773b494e7b',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0454e926-10fb-4af4-82ea-082e6bdb7c5c',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_6ee2d3b4-a837-419f-b181-53a127a791e3',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_abfdb890-64d6-4e05-adf6-c020633fb1ab',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_ee27888e-c3fa-4731-a75b-b2f20efcafc3',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_c0d5978a-a66a-4faf-9d5b-5f5ebd7e7311',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_bdf24af9-bdca-467c-abca-04e215eb190c',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0dddbf5f-ce5d-4685-8361-cda906bef37c',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_a6800c32-4790-4e77-bfdf-7900ed44097a',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0d0913a9-9f82-4418-9450-4cbf364ca9fb',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_ab1a2407-b7a4-4d69-9539-a05391945149',
|
||||
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_bcfde315-905f-475c-a3c3-3a3556e53fe4']
|
||||
|
||||
try:
|
||||
with Timing("read parquet from athena"):
|
||||
df = dd.read_parquet(manifest_files)
|
||||
print("distributed")
|
||||
|
||||
with Timing("partitioning"):
|
||||
divisions = list(range(0, 10001))
|
||||
df = df.set_index('agent', divisions=divisions)
|
||||
|
||||
with Timing("persisting"):
|
||||
dp = df.persist()
|
||||
|
||||
with Timing("memory usage"):
|
||||
print(dp.get_partition(400).memory_usage())
|
||||
|
||||
with Timing("count()"):
|
||||
print(dp.count().compute())
|
||||
|
||||
with Timing("memory usage"):
|
||||
print(dp.get_partition(400).memory_usage())
|
||||
|
||||
with Timing("mean latitude"):
|
||||
print(dp.groupby(dp.index).latitude.mean().compute())
|
||||
|
||||
with Timing("count()"):
|
||||
print(dp.count().compute())
|
||||
|
||||
finally:
|
||||
########## FIN #######################
|
||||
print("closing client")
|
||||
client.close()
|
||||
|
||||
cluster.close()
|
||||
|
||||
53
cluster/run_cluster.py
Normal file
53
cluster/run_cluster.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import sys
|
||||
import minerva
|
||||
from minerva.pier import Pier
|
||||
|
||||
########### PREP ############################
|
||||
|
||||
DASK_BASE = "ami-0399a4f70ca684620" # dask on ubuntu 22.04 x86
|
||||
|
||||
def worker(pier, n):
|
||||
mach = pier.machine(ami = DASK_BASE,
|
||||
instance_type = "m5.large",
|
||||
username = "ubuntu",
|
||||
name = f"dask-worker-{n}",
|
||||
variables = {"type": "worker",
|
||||
"number": n},
|
||||
disk_size = 512)
|
||||
return mach
|
||||
|
||||
def scheduler(pier):
|
||||
mach = pier.machine(ami = DASK_BASE,
|
||||
instance_type = "m5.large", # "r5.xlarge",
|
||||
username = "ubuntu",
|
||||
name = f"dask-scheduler",
|
||||
variables = {"type": "scheduler"},
|
||||
disk_size = 32)
|
||||
return mach
|
||||
|
||||
########## CLUSTER ##########################
|
||||
|
||||
m = minerva.Minerva("hay")
|
||||
pier = m.pier(subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
|
||||
sg_groups = ["sg-0f9e555954e863954", # ssh
|
||||
"sg-0b34a3f7398076545", # default
|
||||
"sg-04cd2626d91ac093c"], # dask (8786, 8787)
|
||||
key_pair = ("Ari-Brown-HAY", "~/.ssh/Ari-Brown-HAY.pem"),
|
||||
iam = "Minerva")
|
||||
|
||||
cluster = pier.cluster(scheduler, worker, num_workers=int(sys.argv[1]))
|
||||
cluster.start()
|
||||
|
||||
print()
|
||||
print(f"dashboard: http://{cluster.scheduler.public_ip}:8787/")
|
||||
print(f"cluster: {cluster.public_location}")
|
||||
print()
|
||||
|
||||
print("type `exit()` to terminate the cluster")
|
||||
print()
|
||||
|
||||
import IPython
|
||||
IPython.embed()
|
||||
|
||||
cluster.terminate()
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue