forked from bellwether/minerva
added binary file for an easy athena console
This commit is contained in:
parent
383185e6cb
commit
ab344374d9
3 changed files with 50 additions and 3 deletions
32
bin/minerva-console
Executable file
32
bin/minerva-console
Executable file
|
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/env python3
|
||||
import minerva
|
||||
import pprint
|
||||
import readline
|
||||
import argparse
|
||||
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
|
||||
parser = argparse.ArgumentParser(description="""
|
||||
REPL for the Athena SQL engine
|
||||
""")
|
||||
parser.add_argument("-p", "--profile", default="hay", help="The AWS profile to use")
|
||||
parser.add_argument("-o", "--output", default="s3://haystac-pmo-athena/output")
|
||||
args = parser.parse_args()
|
||||
|
||||
m = minerva.Minerva(args.profile)
|
||||
athena = m.athena(args.output)
|
||||
|
||||
text = input("> ")
|
||||
while text != "\\q":
|
||||
query = athena.query(text)
|
||||
|
||||
try:
|
||||
data = query.results()
|
||||
pp.pprint(data.head(10))
|
||||
print()
|
||||
print(f"\t({'$%.2f' % query.cost}, {query.runtime})")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
text = input("> ")
|
||||
|
||||
|
|
@ -42,6 +42,7 @@ class Execute:
|
|||
self.params = [str(p) for p in params]
|
||||
self.info_cache = None
|
||||
self.temps = []
|
||||
self.ds = None
|
||||
|
||||
# The string of the query
|
||||
def query(self):
|
||||
|
|
@ -80,6 +81,10 @@ class Execute:
|
|||
|
||||
ms = self.info_cache['Statistics']['TotalExecutionTimeInMillis']
|
||||
self.runtime = datetime.timedelta(seconds=ms / 1000)
|
||||
|
||||
scanned = self.info_cache['Statistics']['DataScannedInBytes']
|
||||
self.cost = 5.0 * scanned / (1024 ** 4) # $5/TB scanned
|
||||
|
||||
return stat # finalized state
|
||||
|
||||
|
||||
|
|
@ -122,11 +127,19 @@ class Query(Execute):
|
|||
# dataset of the results.
|
||||
# Calls `self.manifest_files()` which blocks via `self.finish()`
|
||||
def results(self):
|
||||
if self.ds:
|
||||
return self.ds
|
||||
|
||||
self.temps = [self.handler.s3.download(f) for f in self.manifest_files()]
|
||||
#local = parallel_map(self.handler.s3.download, self.manifest_files())
|
||||
self.ds = pa.dataset.dataset(self.temps)
|
||||
return self.ds
|
||||
|
||||
# Return scalar results
|
||||
# Abstracts away a bunch of keystrokes
|
||||
def scalar(self):
|
||||
return self.results().head(1)[0][0].as_py()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,15 @@
|
|||
[tool.poetry]
|
||||
name = "minerva"
|
||||
version = "0.3.5"
|
||||
version = "0.4.0"
|
||||
description = "Easier access to AWS Athena and Redshift"
|
||||
authors = [
|
||||
"Ari Brown <ari@airintech.com>",
|
||||
"Roshan Punnoose <roshan.punnoose@jhuapl.edu>"
|
||||
"Roshan Punnoose <roshan.punnoose@jhuapl.edu>",
|
||||
"Alex Zabriskie <alex.zabriskie@jhuapl.edu>"
|
||||
]
|
||||
packages = [
|
||||
{ include = "minerva/**/*.py"}
|
||||
{ include = "minerva/**/*.py" },
|
||||
{ include = "bin/*" }
|
||||
]
|
||||
readme = "README.md"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue