diff --git a/minerva/__init__.py b/minerva/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/minerva/__pycache__/__init__.cpython-310.pyc b/minerva/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..2edd91a Binary files /dev/null and b/minerva/__pycache__/__init__.cpython-310.pyc differ diff --git a/minerva/__pycache__/blueshift.cpython-310.pyc b/minerva/__pycache__/blueshift.cpython-310.pyc new file mode 100644 index 0000000..3bddbfc Binary files /dev/null and b/minerva/__pycache__/blueshift.cpython-310.pyc differ diff --git a/minerva/__pycache__/minerva.cpython-310.pyc b/minerva/__pycache__/minerva.cpython-310.pyc new file mode 100644 index 0000000..8beeb50 Binary files /dev/null and b/minerva/__pycache__/minerva.cpython-310.pyc differ diff --git a/blueshift.py b/minerva/blueshift.py similarity index 97% rename from blueshift.py rename to minerva/blueshift.py index ce1f26a..3090005 100644 --- a/blueshift.py +++ b/minerva/blueshift.py @@ -6,6 +6,7 @@ import pyarrow as pa import pyarrow.dataset import pprint import json +import datetime pp = pprint.PrettyPrinter(indent=4) @@ -79,6 +80,8 @@ class Query: local = [self.handler.download(f) for f in files] self.ds = pa.dataset.dataset(local) + self.runtime = tiedot['UpdatedAt'] - tiedot['CreatedAt'] + return self.ds else: print("Error:") diff --git a/minerva.py b/minerva/minerva.py similarity index 91% rename from minerva.py rename to minerva/minerva.py index ce8debb..fbe3280 100644 --- a/minerva.py +++ b/minerva/minerva.py @@ -5,6 +5,7 @@ import time import pyarrow as pa import pyarrow.dataset import pprint +import datetime pp = pprint.PrettyPrinter(indent=4) @@ -60,7 +61,8 @@ class Query: while status in ['QUEUED', 'RUNNING']: time.sleep(5) - status = self.status()['State'] + tiedot = self.info() + status = tiedot['Status']['State'] if status == "SUCCEEDED": # Because we're using `UNLOAD`, we get a manifest of the files @@ -72,6 +74,9 @@ class Query: local = [self.handler.download(f) for f in files] self.ds = pa.dataset.dataset(local) + ms = tiedot['Statistics']['TotalExecutionTimeInMillis'] + self.runtime = datetime.timedelta(seconds=ms / 1000) + return self.ds else: return status # canceled or error diff --git a/test.py b/test.py index 2a63c5a..8cf7a3f 100644 --- a/test.py +++ b/test.py @@ -1,9 +1,9 @@ -import minerva as m +import minerva.minerva as a import pprint pp = pprint.PrettyPrinter(indent=4) -athena = m.Athena("hay", "s3://haystac-pmo-athena/") +athena = a.Athena("hay", "s3://haystac-pmo-athena/") #query = athena.query( #"""SELECT * #FROM trajectories.kitware @@ -14,7 +14,8 @@ athena = m.Athena("hay", "s3://haystac-pmo-athena/") #""") query = athena.query("select count(*) as count from trajectories.kitware") data = query.results() -pp.pprint(query.info()['Statistics']) +pp.pprint(data.head(10)) +print(query.runtime) # Everything *needs* to have a column in order for parquet to work, so scalar # values have to be assigned something, so here we use `as count` to create diff --git a/test2.py b/test2.py index 2f0d8af..c108da7 100644 --- a/test2.py +++ b/test2.py @@ -1,4 +1,4 @@ -import blueshift as b +import minerva.blueshift as b import pprint pp = pprint.PrettyPrinter(indent=4) @@ -7,7 +7,8 @@ red = b.Redshift("hay", "s3://haystac-pmo-athena/", db="dev", cluster="redshift-cluster-1") query = red.query("select count(*) from myspectrum_schema.kitware") -res = query.results() -pp.pprint(res.head(10)) -pp.pprint(query.info()) +print(query) +data = query.results() +pp.pprint(data.head(10)) +print(query.runtime)