added binary file for an easy athena console

This commit is contained in:
Ari Brown 2023-09-15 10:20:53 -04:00
parent 383185e6cb
commit ab344374d9
3 changed files with 50 additions and 3 deletions

32
bin/minerva-console Executable file
View file

@ -0,0 +1,32 @@
#!/usr/bin/env python3
import minerva
import pprint
import readline
import argparse
pp = pprint.PrettyPrinter(indent=4)
parser = argparse.ArgumentParser(description="""
REPL for the Athena SQL engine
""")
parser.add_argument("-p", "--profile", default="hay", help="The AWS profile to use")
parser.add_argument("-o", "--output", default="s3://haystac-pmo-athena/output")
args = parser.parse_args()
m = minerva.Minerva(args.profile)
athena = m.athena(args.output)
text = input("> ")
while text != "\\q":
query = athena.query(text)
try:
data = query.results()
pp.pprint(data.head(10))
print()
print(f"\t({'$%.2f' % query.cost}, {query.runtime})")
except Exception as e:
print(e)
text = input("> ")

View file

@ -42,6 +42,7 @@ class Execute:
self.params = [str(p) for p in params] self.params = [str(p) for p in params]
self.info_cache = None self.info_cache = None
self.temps = [] self.temps = []
self.ds = None
# The string of the query # The string of the query
def query(self): def query(self):
@ -80,6 +81,10 @@ class Execute:
ms = self.info_cache['Statistics']['TotalExecutionTimeInMillis'] ms = self.info_cache['Statistics']['TotalExecutionTimeInMillis']
self.runtime = datetime.timedelta(seconds=ms / 1000) self.runtime = datetime.timedelta(seconds=ms / 1000)
scanned = self.info_cache['Statistics']['DataScannedInBytes']
self.cost = 5.0 * scanned / (1024 ** 4) # $5/TB scanned
return stat # finalized state return stat # finalized state
@ -122,11 +127,19 @@ class Query(Execute):
# dataset of the results. # dataset of the results.
# Calls `self.manifest_files()` which blocks via `self.finish()` # Calls `self.manifest_files()` which blocks via `self.finish()`
def results(self): def results(self):
if self.ds:
return self.ds
self.temps = [self.handler.s3.download(f) for f in self.manifest_files()] self.temps = [self.handler.s3.download(f) for f in self.manifest_files()]
#local = parallel_map(self.handler.s3.download, self.manifest_files()) #local = parallel_map(self.handler.s3.download, self.manifest_files())
self.ds = pa.dataset.dataset(self.temps) self.ds = pa.dataset.dataset(self.temps)
return self.ds return self.ds
# Return scalar results
# Abstracts away a bunch of keystrokes
def scalar(self):
return self.results().head(1)[0][0].as_py()
def __enter__(self): def __enter__(self):
return self return self

View file

@ -1,13 +1,15 @@
[tool.poetry] [tool.poetry]
name = "minerva" name = "minerva"
version = "0.3.5" version = "0.4.0"
description = "Easier access to AWS Athena and Redshift" description = "Easier access to AWS Athena and Redshift"
authors = [ authors = [
"Ari Brown <ari@airintech.com>", "Ari Brown <ari@airintech.com>",
"Roshan Punnoose <roshan.punnoose@jhuapl.edu>" "Roshan Punnoose <roshan.punnoose@jhuapl.edu>",
"Alex Zabriskie <alex.zabriskie@jhuapl.edu>"
] ]
packages = [ packages = [
{ include = "minerva/**/*.py"} { include = "minerva/**/*.py" },
{ include = "bin/*" }
] ]
readme = "README.md" readme = "README.md"