forked from bellwether/minerva
significant improvement to the readme and verification that all the examples work
This commit is contained in:
parent
e3c11fb1aa
commit
5dccce53e9
9 changed files with 275 additions and 109 deletions
|
|
@ -1,11 +1,9 @@
|
|||
import minerva
|
||||
import pprint
|
||||
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
|
||||
m = minerva.Minerva("hay")
|
||||
m = minerva.Minerva("hay")
|
||||
athena = m.athena("s3://haystac-pmo-athena/")
|
||||
|
||||
# `execute()` does NOT provide any results and does NOT use `UNLOAD`
|
||||
query = athena.execute(
|
||||
"""
|
||||
create database if not exists test
|
||||
|
|
|
|||
|
|
@ -13,15 +13,17 @@ athena = m.athena("s3://haystac-pmo-athena/")
|
|||
# Everything *needs* to have a column in order for unloading to parquet to work,
|
||||
# so scalar values have to be assigned something, so here we use `as count` to
|
||||
# create a temporary column called `count`
|
||||
query = athena.query(
|
||||
"""
|
||||
select round(longitude, 3) as lon, count(*) as count
|
||||
from trajectories.baseline
|
||||
where agent = 4
|
||||
group by round(longitude, 3)
|
||||
order by count(*) desc
|
||||
"""
|
||||
)
|
||||
#query = athena.query(
|
||||
# """
|
||||
# select round(longitude, 3) as lon, count(*) as count
|
||||
# from trajectories.basline
|
||||
# where agent = 4
|
||||
# group by round(longitude, 3)
|
||||
# order by count(*) desc
|
||||
# """
|
||||
#)
|
||||
|
||||
query = athena.query("select * from trajectories.basline where agent < 100 limit 100")
|
||||
data = query.results()
|
||||
|
||||
pp.pprint(data.head(10))
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
from minerva.cluster import Cluster
|
||||
from minerva.pier import Pier
|
||||
import minerva
|
||||
from minerva.timing import Timing
|
||||
from dask.distributed import Client
|
||||
import dask
|
||||
|
|
@ -7,7 +6,7 @@ import dask
|
|||
########### PREP ############################
|
||||
|
||||
def worker(pier, n):
|
||||
mach = pier.machine(ami = "ami-01f85b935dc9f674c", # dask on ubuntu 22.04 x86
|
||||
mach = pier.machine(ami = "ami-0399a4f70ca684620", # dask on ubuntu 22.04 x86
|
||||
instance_type = "t3.medium",
|
||||
username = "ubuntu",
|
||||
name = f"dask-worker-{n}",
|
||||
|
|
@ -17,24 +16,25 @@ def worker(pier, n):
|
|||
return mach
|
||||
|
||||
def scheduler(pier):
|
||||
mach = pier.machine(ami = "ami-01f85b935dc9f674c", # dask on ubuntu 22.04 x86
|
||||
mach = pier.machine(ami = "ami-0399a4f70ca684620", # dask on ubuntu 22.04 x86
|
||||
instance_type = "t3.medium",
|
||||
username = "ubuntu",
|
||||
disk_size = 32,
|
||||
name = f"dask-scheduler",
|
||||
variables = {"type": "scheduler"})
|
||||
return mach
|
||||
|
||||
########## CLUSTER ##########################
|
||||
|
||||
pier = Pier("hay",
|
||||
subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
|
||||
sg_groups = ["sg-0f9e555954e863954", # ssh
|
||||
"sg-0b34a3f7398076545", # default
|
||||
"sg-04cd2626d91ac093c"], # dask (8786, 8787)
|
||||
key_pair = ("Ari-Brown-HAY", "~/.ssh/Ari-Brown-HAY.pem"),
|
||||
iam = "S3+SSM+CloudWatch+ECR")
|
||||
m = minerva.Minerva("hay")
|
||||
pier = m.pier(subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
|
||||
sg_groups = ["sg-0f9e555954e863954", # ssh
|
||||
"sg-0b34a3f7398076545", # default
|
||||
"sg-04cd2626d91ac093c"], # dask (8786, 8787)
|
||||
key_pair = ("Ari-Brown-HAY", "~/.ssh/Ari-Brown-HAY.pem"),
|
||||
iam = "S3+SSM+CloudWatch+ECR")
|
||||
|
||||
cluster = Cluster(pier, scheduler, worker, num_workers=5)
|
||||
cluster = pier.cluster(scheduler, worker, num_workers=2)
|
||||
cluster.start()
|
||||
|
||||
########## USAGE ########################
|
||||
|
|
@ -43,31 +43,9 @@ try:
|
|||
client = Client(cluster.public_location)
|
||||
print(client)
|
||||
|
||||
# Practice with a big array
|
||||
# https://matthewrocklin.com/blog/work/2017/01/12/dask-dataframes
|
||||
|
||||
#import numpy as np
|
||||
#import dask.array as da
|
||||
import dask.dataframe as dd
|
||||
import time
|
||||
|
||||
# https://stackoverflow.com/questions/43796774/loading-local-file-from-client-onto-dask-distributed-cluster
|
||||
|
||||
# Iteratively load files and scatter them to the cluster
|
||||
#
|
||||
# futures = []
|
||||
# for fn in filenames:
|
||||
# df = pd.read_csv(fn)
|
||||
# future = client.scatter(df)
|
||||
# futures.append(future)
|
||||
#
|
||||
# ddf = dd.from_delayed(futures, meta=df)
|
||||
|
||||
# query = athena.query("select * from trajectories")
|
||||
# ddf = query.distribute_results(client)
|
||||
|
||||
with Timing("reading parquet"):
|
||||
df = dd.read_parquet("s3://haystac-archive-phase1.trial1/ta1.kitware/ta1/simulation/train/")
|
||||
athena = m.athena("s3://haystac-pmo-athena/")
|
||||
query = athena.query("select * from trajectories.basline where agent < 100")
|
||||
df = query.distribute_results(client)
|
||||
|
||||
with Timing("persisting"):
|
||||
dp = df.persist()
|
||||
|
|
|
|||
26
examples/docker_instance.py
Normal file
26
examples/docker_instance.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import minerva
|
||||
from minerva.docker import Docker
|
||||
import sys
|
||||
|
||||
m = minerva.Minerva("hay")
|
||||
pier = m.pier(subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
|
||||
sg_groups = ["sg-0f9e555954e863954", # ssh
|
||||
"sg-0b34a3f7398076545"], # default
|
||||
iam = "S3+SSM+CloudWatch+ECR")
|
||||
|
||||
mach = pier.machine(ami = "ami-0b0cd81283738558a", # ubuntu 22.04 x86
|
||||
instance_type = "t3.medium",
|
||||
username = "ubuntu",
|
||||
name = f"minerva-aws-test")
|
||||
|
||||
# Running the machine in the HAYSTAC PMO account
|
||||
# Pulling a container from the HAYSTAC T&E account
|
||||
d = Docker(machine = mach,
|
||||
#container = "436820952613.dkr.ecr.us-east-1.amazonaws.com/test:latest",
|
||||
container = "amazon/aws-cli:latest",
|
||||
variables = {"hello": "world"},
|
||||
stdout = sys.stdout)
|
||||
|
||||
d.create()
|
||||
d.run("s3 ls")
|
||||
d.terminate()
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
from minerva.pier import Pier
|
||||
from minerva.docker import Docker
|
||||
import sys
|
||||
|
||||
profile = "hay"
|
||||
pier = Pier(profile,
|
||||
subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
|
||||
sg_groups = ["sg-0f9e555954e863954", # ssh
|
||||
"sg-0b34a3f7398076545"], # default
|
||||
iam = "S3+SSM+CloudWatch+ECR")
|
||||
|
||||
num = 0
|
||||
mach = pier.machine(ami = "ami-0b0cd81283738558a", # ubuntu 22.04 x86
|
||||
instance_type = "t3.medium",
|
||||
username = "ubuntu",
|
||||
name = f"minerva-{num}",
|
||||
variables = {"num": num})
|
||||
|
||||
# Running the machine in the HAYSTAC PMO account
|
||||
# Pulling a container from the HAYSTAC T&E account
|
||||
d = Docker(machine = mach,
|
||||
#container = "436820952613.dkr.ecr.us-east-1.amazonaws.com/test:latest",
|
||||
container = "amazon/aws-cli:latest",
|
||||
variables = {"num": num},
|
||||
stdout = sys.stdout)
|
||||
|
||||
d.create()
|
||||
d.run()
|
||||
#d.terminate()
|
||||
|
|
@ -8,8 +8,11 @@ red = m.redshift("s3://haystac-te-athena/",
|
|||
db = "train",
|
||||
workgroup = "phase1-trial2")
|
||||
|
||||
query = red.query("select agent, st_astext(geom), datetime from public.baseline where agent = 4 limit 200")
|
||||
query = red.query("""select agent, st_astext(geom), timestamp from
|
||||
public.baseline_v1 where agent = 44 limit 200""")
|
||||
|
||||
data = query.results()
|
||||
|
||||
pp.pprint(data.head(10))
|
||||
print(query.runtime)
|
||||
print(query.cost)
|
||||
|
|
|
|||
|
|
@ -1,23 +1,31 @@
|
|||
import minerva
|
||||
|
||||
def worker(pier, n=0):
|
||||
mach = pier.machine(ami = "ami-05a242924e713f80a", # dask on ubuntu 22.04 x86
|
||||
instance_type = "t3.medium",
|
||||
username = "ubuntu",
|
||||
name = f"test-{n}",
|
||||
variables = {"type": "worker",
|
||||
"number": n})
|
||||
return mach
|
||||
|
||||
m = minerva.Minerva("hay")
|
||||
pier = m.pier(subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a
|
||||
sg_groups = ["sg-0f9e555954e863954", # ssh
|
||||
"sg-0b34a3f7398076545"] # default
|
||||
"sg-0b34a3f7398076545"], # default
|
||||
iam = "S3+SSM+CloudWatch+ECR",
|
||||
key_pair = ("Ari-Brown-HAY", "~/.ssh/Ari-Brown-HAY.pem"))
|
||||
|
||||
|
||||
def worker(pier, n=0):
|
||||
mach = pier.machine(ami = "ami-0399a4f70ca684620", # dask on ubuntu 22.04 x86
|
||||
instance_type = "t3.medium",
|
||||
username = "ubuntu",
|
||||
name = f"test-{n}",
|
||||
disk_size = 32,
|
||||
variables = {"type": "worker",
|
||||
"number": n})
|
||||
return mach
|
||||
|
||||
mach = worker(pier)
|
||||
|
||||
mach.create()
|
||||
mach.login()
|
||||
print("*******")
|
||||
print(repr(mach.cmd("echo 'hello world'").stdout))
|
||||
print("*******")
|
||||
print(mach.cmd("echo 'I am machine $number of type $type'"))
|
||||
print("*******")
|
||||
mach.terminate()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue