forked from bellwether/minerva
added more examples
This commit is contained in:
parent
a3374fd85c
commit
745919e587
3 changed files with 138 additions and 0 deletions
35
examples/parallelization.py
Normal file
35
examples/parallelization.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import minerva
|
||||
import pprint
|
||||
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
|
||||
# Create the Minerva object which gives you access to the account under the
|
||||
# profile `hay`
|
||||
m = minerva.Minerva("hay")
|
||||
|
||||
# Get the Athena object
|
||||
athena = m.athena("s3://haystac-pmo-athena/")
|
||||
|
||||
# Parallelize across the `data` and split it into `n` chunks, one chunk per process.
|
||||
# Since `num_agents` is a number, it's turned into a range and then split.
|
||||
num_agents = 10000
|
||||
parallel = athena.parallelize("trajectories", n = 200, data = num_agents)
|
||||
for agents in parallel:
|
||||
# Everything *needs* to have a column in order for unloading to parquet to work,
|
||||
# so scalar values have to be assigned something, so here we use `as count` to
|
||||
# create a temporary column called `count`
|
||||
sql = f"""
|
||||
select count(*) as cnt
|
||||
from trajectories.basline
|
||||
where agent >= {min(agents)} and
|
||||
agent < {max(agents)}
|
||||
group by agent
|
||||
"""
|
||||
parallel << athena.query(query, partition = {"agent": agents})
|
||||
|
||||
pp.pprint(parallel.results().head(10))
|
||||
|
||||
# We also get important statistics
|
||||
print(parallel.runtime)
|
||||
print(parallel.cost)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue