forked from bellwether/minerva
added binary file for an easy athena console
This commit is contained in:
parent
383185e6cb
commit
ab344374d9
3 changed files with 50 additions and 3 deletions
32
bin/minerva-console
Executable file
32
bin/minerva-console
Executable file
|
|
@ -0,0 +1,32 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import minerva
|
||||||
|
import pprint
|
||||||
|
import readline
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="""
|
||||||
|
REPL for the Athena SQL engine
|
||||||
|
""")
|
||||||
|
parser.add_argument("-p", "--profile", default="hay", help="The AWS profile to use")
|
||||||
|
parser.add_argument("-o", "--output", default="s3://haystac-pmo-athena/output")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
m = minerva.Minerva(args.profile)
|
||||||
|
athena = m.athena(args.output)
|
||||||
|
|
||||||
|
text = input("> ")
|
||||||
|
while text != "\\q":
|
||||||
|
query = athena.query(text)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = query.results()
|
||||||
|
pp.pprint(data.head(10))
|
||||||
|
print()
|
||||||
|
print(f"\t({'$%.2f' % query.cost}, {query.runtime})")
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
text = input("> ")
|
||||||
|
|
||||||
|
|
@ -42,6 +42,7 @@ class Execute:
|
||||||
self.params = [str(p) for p in params]
|
self.params = [str(p) for p in params]
|
||||||
self.info_cache = None
|
self.info_cache = None
|
||||||
self.temps = []
|
self.temps = []
|
||||||
|
self.ds = None
|
||||||
|
|
||||||
# The string of the query
|
# The string of the query
|
||||||
def query(self):
|
def query(self):
|
||||||
|
|
@ -80,6 +81,10 @@ class Execute:
|
||||||
|
|
||||||
ms = self.info_cache['Statistics']['TotalExecutionTimeInMillis']
|
ms = self.info_cache['Statistics']['TotalExecutionTimeInMillis']
|
||||||
self.runtime = datetime.timedelta(seconds=ms / 1000)
|
self.runtime = datetime.timedelta(seconds=ms / 1000)
|
||||||
|
|
||||||
|
scanned = self.info_cache['Statistics']['DataScannedInBytes']
|
||||||
|
self.cost = 5.0 * scanned / (1024 ** 4) # $5/TB scanned
|
||||||
|
|
||||||
return stat # finalized state
|
return stat # finalized state
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -122,11 +127,19 @@ class Query(Execute):
|
||||||
# dataset of the results.
|
# dataset of the results.
|
||||||
# Calls `self.manifest_files()` which blocks via `self.finish()`
|
# Calls `self.manifest_files()` which blocks via `self.finish()`
|
||||||
def results(self):
|
def results(self):
|
||||||
|
if self.ds:
|
||||||
|
return self.ds
|
||||||
|
|
||||||
self.temps = [self.handler.s3.download(f) for f in self.manifest_files()]
|
self.temps = [self.handler.s3.download(f) for f in self.manifest_files()]
|
||||||
#local = parallel_map(self.handler.s3.download, self.manifest_files())
|
#local = parallel_map(self.handler.s3.download, self.manifest_files())
|
||||||
self.ds = pa.dataset.dataset(self.temps)
|
self.ds = pa.dataset.dataset(self.temps)
|
||||||
return self.ds
|
return self.ds
|
||||||
|
|
||||||
|
# Return scalar results
|
||||||
|
# Abstracts away a bunch of keystrokes
|
||||||
|
def scalar(self):
|
||||||
|
return self.results().head(1)[0][0].as_py()
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,15 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "minerva"
|
name = "minerva"
|
||||||
version = "0.3.5"
|
version = "0.4.0"
|
||||||
description = "Easier access to AWS Athena and Redshift"
|
description = "Easier access to AWS Athena and Redshift"
|
||||||
authors = [
|
authors = [
|
||||||
"Ari Brown <ari@airintech.com>",
|
"Ari Brown <ari@airintech.com>",
|
||||||
"Roshan Punnoose <roshan.punnoose@jhuapl.edu>"
|
"Roshan Punnoose <roshan.punnoose@jhuapl.edu>",
|
||||||
|
"Alex Zabriskie <alex.zabriskie@jhuapl.edu>"
|
||||||
]
|
]
|
||||||
packages = [
|
packages = [
|
||||||
{ include = "minerva/**/*.py"}
|
{ include = "minerva/**/*.py" },
|
||||||
|
{ include = "bin/*" }
|
||||||
]
|
]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue