added binary file for an easy athena console

2023-09-15 10:20:53 -04:00 · 2023-09-15 10:20:53 -04:00 · ab344374d9
commit ab344374d9
parent 383185e6cb
3 changed files with 50 additions and 3 deletions
--- a/bin/minerva-console
+++ b/bin/minerva-console
@ -0,0 +1,32 @@
 #!/usr/bin/env python3
 import minerva
 import pprint
 import readline
 import argparse
 pp = pprint.PrettyPrinter(indent=4)
 parser = argparse.ArgumentParser(description="""
 REPL for the Athena SQL engine
 """)
 parser.add_argument("-p", "--profile", default="hay", help="The AWS profile to use")
 parser.add_argument("-o", "--output", default="s3://haystac-pmo-athena/output")
 args = parser.parse_args()
 m = minerva.Minerva(args.profile)
 athena = m.athena(args.output)
 text  = input("> ")
 while text != "\\q":
  query = athena.query(text)
  try:
    data  = query.results()
    pp.pprint(data.head(10))
    print()
    print(f"\t({'$%.2f' % query.cost}, {query.runtime})")
  except Exception as e:
    print(e)
  text = input("> ")
--- a/minerva/athena.py
+++ b/minerva/athena.py
@ -42,6 +42,7 @@ class Execute:
        self.params  = [str(p) for p in params]
        self.info_cache = None
        self.temps   = []
        self.ds      = None
    # The string of the query
    def query(self):
@ -80,6 +81,10 @@ class Execute:
        ms = self.info_cache['Statistics']['TotalExecutionTimeInMillis']
        self.runtime = datetime.timedelta(seconds=ms / 1000)
        scanned   = self.info_cache['Statistics']['DataScannedInBytes']
        self.cost = 5.0 * scanned / (1024 ** 4) # $5/TB scanned
        return stat # finalized state
@ -122,11 +127,19 @@ class Query(Execute):
    # dataset of the results.
    # Calls `self.manifest_files()` which blocks via `self.finish()`
    def results(self):
        if self.ds:
            return self.ds
        self.temps = [self.handler.s3.download(f) for f in self.manifest_files()]
        #local      = parallel_map(self.handler.s3.download, self.manifest_files())
        self.ds = pa.dataset.dataset(self.temps)
        return self.ds
    # Return scalar results
    # Abstracts away a bunch of keystrokes
    def scalar(self):
        return self.results().head(1)[0][0].as_py()
    def __enter__(self):
        return self
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,13 +1,15 @@
 [tool.poetry]
 name = "minerva"
-version = "0.3.5"
+version = "0.4.0"
 description = "Easier access to AWS Athena and Redshift"
 authors = [
  "Ari Brown <ari@airintech.com>",
-  "Roshan Punnoose <roshan.punnoose@jhuapl.edu>"
+  "Roshan Punnoose <roshan.punnoose@jhuapl.edu>",
  "Alex Zabriskie <alex.zabriskie@jhuapl.edu>"
 ]
 packages = [
-  { include = "minerva/**/*.py"}
+  { include = "minerva/**/*.py" },
  { include = "bin/*" }
 ]
 readme = "README.md"