forked from bellwether/minerva
35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
import minerva
|
|
import pprint
|
|
|
|
pp = pprint.PrettyPrinter(indent=4)
|
|
|
|
# Create the Minerva object which gives you access to the account under the
|
|
# profile `hay`
|
|
m = minerva.Minerva("hay")
|
|
|
|
# Get the Athena object
|
|
athena = m.athena("s3://haystac-pmo-athena/")
|
|
|
|
# Parallelize across the `data` and split it into `n` chunks, one chunk per process.
|
|
# Since `num_agents` is a number, it's turned into a range and then split.
|
|
num_agents = 10000
|
|
parallel = athena.parallelize("trajectories", n = 200, data = num_agents)
|
|
for agents in parallel:
|
|
# Everything *needs* to have a column in order for unloading to parquet to work,
|
|
# so scalar values have to be assigned something, so here we use `as count` to
|
|
# create a temporary column called `count`
|
|
sql = f"""
|
|
select count(*) as cnt
|
|
from trajectories.basline
|
|
where agent >= {min(agents)} and
|
|
agent < {max(agents)}
|
|
group by agent
|
|
"""
|
|
parallel << athena.query(query, partition = {"agent": agents})
|
|
|
|
pp.pprint(parallel.results().head(10))
|
|
|
|
# We also get important statistics
|
|
print(parallel.runtime)
|
|
print(parallel.cost)
|
|
|