forked from bellwether/minerva
102 lines
6.2 KiB
Python
102 lines
6.2 KiB
Python
import sys
|
|
import dask
|
|
from dask_cloudprovider.aws import EC2Cluster
|
|
from minerva.timing import Timing
|
|
import dask.dataframe as dd
|
|
import dask.distributed
|
|
from dask.distributed import Client
|
|
import configparser
|
|
import os
|
|
import contextlib
|
|
|
|
def get_aws_credentials():
|
|
parser = configparser.RawConfigParser()
|
|
parser.read(os.path.expanduser('~/.aws/credentials'))
|
|
credentials = parser.items('default')
|
|
all_credentials = {key.upper(): value for key, value in credentials}
|
|
with contextlib.suppress(KeyError):
|
|
all_credentials["AWS_REGION"] = all_credentials.pop("REGION")
|
|
return all_credentials
|
|
|
|
env = get_aws_credentials()
|
|
env['EXTRA_PIP_PACKAGES'] = 's3fs'
|
|
|
|
cluster = EC2Cluster(env_vars = env,
|
|
n_workers = 2,
|
|
instance_type = 'm5.large',
|
|
subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
|
|
security_groups = ["sg-0f9e555954e863954", # ssh
|
|
"sg-0b34a3f7398076545", # default
|
|
"sg-04cd2626d91ac093c"], # dask (8786, 8787)
|
|
ami = "ami-0399a4f70ca684620",
|
|
key_name = "Ari-Brown-HAY",
|
|
security = False,
|
|
iam_instance_profile = {'Name': 'Minerva'})
|
|
|
|
client = Client(cluster)
|
|
|
|
manifest_files = ['s3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_90fc7ac6-1be8-42a3-8485-f5fda039a23b',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_3fd296d8-4d16-4491-a950-d5e4b0fff172',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_19afa54e-ed8f-4da5-a575-a2f7acd02399',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0240d219-11d8-4965-9425-ed2cbe9f0984',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_657a9eee-2db8-4b55-8c2b-c98c67eb2992',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_4fb5f559-ac3e-4d0b-bff6-e90aefcecff7',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_a7b3bdfa-aa06-4c7b-b010-f0c59d925051',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_444110c5-885d-4369-9cbd-bb842383baa7',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_921dc55e-8d94-4f91-b71b-ec1b45ff999f',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_dc3fafb8-8d8a-432c-a9f4-386332b7720c',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_9b8c79b9-d8a6-487e-a10b-6d65dae9daff',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_26fe7c8b-15f4-419f-a7c7-c87461f1b69c',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_45c5c82b-befc-4b0c-97fa-673de0feb9dd',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_aca904de-b154-4f56-b255-b71de0be3060',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_9a131e04-2353-44f1-9c13-7c1b381ca553',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_66e9e85d-406b-4164-87f3-3ffbe4ff9162',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_e32f16e7-591e-4da4-a2b1-4b00ac4cb617',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_88eb15ea-9278-4e54-978b-9c211a4b834f',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_71b697fe-3da2-4c5b-a046-e5773b494e7b',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0454e926-10fb-4af4-82ea-082e6bdb7c5c',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_6ee2d3b4-a837-419f-b181-53a127a791e3',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_abfdb890-64d6-4e05-adf6-c020633fb1ab',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_ee27888e-c3fa-4731-a75b-b2f20efcafc3',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_c0d5978a-a66a-4faf-9d5b-5f5ebd7e7311',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_bdf24af9-bdca-467c-abca-04e215eb190c',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0dddbf5f-ce5d-4685-8361-cda906bef37c',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_a6800c32-4790-4e77-bfdf-7900ed44097a',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0d0913a9-9f82-4418-9450-4cbf364ca9fb',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_ab1a2407-b7a4-4d69-9539-a05391945149',
|
|
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_bcfde315-905f-475c-a3c3-3a3556e53fe4']
|
|
|
|
try:
|
|
with Timing("read parquet from athena"):
|
|
df = dd.read_parquet(manifest_files)
|
|
print("distributed")
|
|
|
|
with Timing("partitioning"):
|
|
divisions = list(range(0, 10001))
|
|
df = df.set_index('agent', divisions=divisions)
|
|
|
|
with Timing("persisting"):
|
|
dp = df.persist()
|
|
|
|
with Timing("memory usage"):
|
|
print(dp.get_partition(400).memory_usage())
|
|
|
|
with Timing("count()"):
|
|
print(dp.count().compute())
|
|
|
|
with Timing("memory usage"):
|
|
print(dp.get_partition(400).memory_usage())
|
|
|
|
with Timing("mean latitude"):
|
|
print(dp.groupby(dp.index).latitude.mean().compute())
|
|
|
|
with Timing("count()"):
|
|
print(dp.count().compute())
|
|
|
|
finally:
|
|
########## FIN #######################
|
|
print("closing client")
|
|
client.close()
|
|
|
|
cluster.close()
|
|
|