better redshift support, working on dask stuff

This commit is contained in:
Ari Brown 2023-12-21 14:23:58 -05:00
parent e854a93e60
commit 5eb8471081
5 changed files with 86 additions and 175 deletions

View file

@ -18,7 +18,7 @@ def get_aws_credentials():
all_credentials["AWS_REGION"] = all_credentials.pop("REGION")
return all_credentials
env = get_aws_credentials()
env = {} #get_aws_credentials()
env['EXTRA_PIP_PACKAGES'] = 's3fs'
cluster = EC2Cluster(env_vars = env,
@ -35,36 +35,7 @@ cluster = EC2Cluster(env_vars = env,
client = Client(cluster)
manifest_files = ['s3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_90fc7ac6-1be8-42a3-8485-f5fda039a23b',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_3fd296d8-4d16-4491-a950-d5e4b0fff172',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_19afa54e-ed8f-4da5-a575-a2f7acd02399',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0240d219-11d8-4965-9425-ed2cbe9f0984',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_657a9eee-2db8-4b55-8c2b-c98c67eb2992',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_4fb5f559-ac3e-4d0b-bff6-e90aefcecff7',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_a7b3bdfa-aa06-4c7b-b010-f0c59d925051',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_444110c5-885d-4369-9cbd-bb842383baa7',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_921dc55e-8d94-4f91-b71b-ec1b45ff999f',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_dc3fafb8-8d8a-432c-a9f4-386332b7720c',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_9b8c79b9-d8a6-487e-a10b-6d65dae9daff',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_26fe7c8b-15f4-419f-a7c7-c87461f1b69c',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_45c5c82b-befc-4b0c-97fa-673de0feb9dd',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_aca904de-b154-4f56-b255-b71de0be3060',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_9a131e04-2353-44f1-9c13-7c1b381ca553',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_66e9e85d-406b-4164-87f3-3ffbe4ff9162',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_e32f16e7-591e-4da4-a2b1-4b00ac4cb617',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_88eb15ea-9278-4e54-978b-9c211a4b834f',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_71b697fe-3da2-4c5b-a046-e5773b494e7b',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0454e926-10fb-4af4-82ea-082e6bdb7c5c',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_6ee2d3b4-a837-419f-b181-53a127a791e3',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_abfdb890-64d6-4e05-adf6-c020633fb1ab',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_ee27888e-c3fa-4731-a75b-b2f20efcafc3',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_c0d5978a-a66a-4faf-9d5b-5f5ebd7e7311',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_bdf24af9-bdca-467c-abca-04e215eb190c',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0dddbf5f-ce5d-4685-8361-cda906bef37c',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_a6800c32-4790-4e77-bfdf-7900ed44097a',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_0d0913a9-9f82-4418-9450-4cbf364ca9fb',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_ab1a2407-b7a4-4d69-9539-a05391945149',
's3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_bcfde315-905f-475c-a3c3-3a3556e53fe4']
manifest_files = ['s3://haystac-pmo-athena/results/0.1664769366230633/20231129_180352_00031_3yekz_90fc7ac6-1be8-42a3-8485-f5fda039a23b']
try:
with Timing("read parquet from athena"):

View file

@ -5,10 +5,16 @@ from minerva.pier import Pier
########### PREP ############################
DASK_BASE = "ami-0399a4f70ca684620" # dask on ubuntu 22.04 x86
NUM_WORK = int(sys.argv[1])
if sys.argv[2] == "large":
WORKER_TYPE = "r5.xlarge"
else:
WORKER_TYPE = "m5.large"
def worker(pier, n):
mach = pier.machine(ami = DASK_BASE,
instance_type = "m5.large",
instance_type = WORKER_TYPE,
username = "ubuntu",
name = f"dask-worker-{n}",
variables = {"type": "worker",
@ -18,7 +24,7 @@ def worker(pier, n):
def scheduler(pier):
mach = pier.machine(ami = DASK_BASE,
instance_type = "m5.large", # "r5.xlarge",
instance_type = "m5.large",
username = "ubuntu",
name = f"dask-scheduler",
variables = {"type": "scheduler"},
@ -35,7 +41,7 @@ pier = m.pier(subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public
key_pair = ("Ari-Brown-HAY", "~/.ssh/Ari-Brown-HAY.pem"),
iam = "Minerva")
cluster = pier.cluster(scheduler, worker, num_workers=int(sys.argv[1]))
cluster = pier.cluster(scheduler, worker, num_workers=NUM_WORK)
cluster.start()
print()