forked from bellwether/minerva
restructuring as a library with athena and redshift
This commit is contained in:
parent
adf909608d
commit
c32f8359e7
8 changed files with 18 additions and 8 deletions
0
minerva/__init__.py
Normal file
0
minerva/__init__.py
Normal file
BIN
minerva/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
minerva/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
BIN
minerva/__pycache__/blueshift.cpython-310.pyc
Normal file
BIN
minerva/__pycache__/blueshift.cpython-310.pyc
Normal file
Binary file not shown.
BIN
minerva/__pycache__/minerva.cpython-310.pyc
Normal file
BIN
minerva/__pycache__/minerva.cpython-310.pyc
Normal file
Binary file not shown.
|
|
@ -6,6 +6,7 @@ import pyarrow as pa
|
||||||
import pyarrow.dataset
|
import pyarrow.dataset
|
||||||
import pprint
|
import pprint
|
||||||
import json
|
import json
|
||||||
|
import datetime
|
||||||
|
|
||||||
pp = pprint.PrettyPrinter(indent=4)
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
|
|
||||||
|
|
@ -79,6 +80,8 @@ class Query:
|
||||||
local = [self.handler.download(f) for f in files]
|
local = [self.handler.download(f) for f in files]
|
||||||
self.ds = pa.dataset.dataset(local)
|
self.ds = pa.dataset.dataset(local)
|
||||||
|
|
||||||
|
self.runtime = tiedot['UpdatedAt'] - tiedot['CreatedAt']
|
||||||
|
|
||||||
return self.ds
|
return self.ds
|
||||||
else:
|
else:
|
||||||
print("Error:")
|
print("Error:")
|
||||||
|
|
@ -5,6 +5,7 @@ import time
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pyarrow.dataset
|
import pyarrow.dataset
|
||||||
import pprint
|
import pprint
|
||||||
|
import datetime
|
||||||
|
|
||||||
pp = pprint.PrettyPrinter(indent=4)
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
|
|
||||||
|
|
@ -60,7 +61,8 @@ class Query:
|
||||||
|
|
||||||
while status in ['QUEUED', 'RUNNING']:
|
while status in ['QUEUED', 'RUNNING']:
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
status = self.status()['State']
|
tiedot = self.info()
|
||||||
|
status = tiedot['Status']['State']
|
||||||
|
|
||||||
if status == "SUCCEEDED":
|
if status == "SUCCEEDED":
|
||||||
# Because we're using `UNLOAD`, we get a manifest of the files
|
# Because we're using `UNLOAD`, we get a manifest of the files
|
||||||
|
|
@ -72,6 +74,9 @@ class Query:
|
||||||
local = [self.handler.download(f) for f in files]
|
local = [self.handler.download(f) for f in files]
|
||||||
self.ds = pa.dataset.dataset(local)
|
self.ds = pa.dataset.dataset(local)
|
||||||
|
|
||||||
|
ms = tiedot['Statistics']['TotalExecutionTimeInMillis']
|
||||||
|
self.runtime = datetime.timedelta(seconds=ms / 1000)
|
||||||
|
|
||||||
return self.ds
|
return self.ds
|
||||||
else:
|
else:
|
||||||
return status # canceled or error
|
return status # canceled or error
|
||||||
7
test.py
7
test.py
|
|
@ -1,9 +1,9 @@
|
||||||
import minerva as m
|
import minerva.minerva as a
|
||||||
import pprint
|
import pprint
|
||||||
|
|
||||||
pp = pprint.PrettyPrinter(indent=4)
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
|
|
||||||
athena = m.Athena("hay", "s3://haystac-pmo-athena/")
|
athena = a.Athena("hay", "s3://haystac-pmo-athena/")
|
||||||
#query = athena.query(
|
#query = athena.query(
|
||||||
#"""SELECT *
|
#"""SELECT *
|
||||||
#FROM trajectories.kitware
|
#FROM trajectories.kitware
|
||||||
|
|
@ -14,7 +14,8 @@ athena = m.Athena("hay", "s3://haystac-pmo-athena/")
|
||||||
#""")
|
#""")
|
||||||
query = athena.query("select count(*) as count from trajectories.kitware")
|
query = athena.query("select count(*) as count from trajectories.kitware")
|
||||||
data = query.results()
|
data = query.results()
|
||||||
pp.pprint(query.info()['Statistics'])
|
pp.pprint(data.head(10))
|
||||||
|
print(query.runtime)
|
||||||
|
|
||||||
# Everything *needs* to have a column in order for parquet to work, so scalar
|
# Everything *needs* to have a column in order for parquet to work, so scalar
|
||||||
# values have to be assigned something, so here we use `as count` to create
|
# values have to be assigned something, so here we use `as count` to create
|
||||||
|
|
|
||||||
9
test2.py
9
test2.py
|
|
@ -1,4 +1,4 @@
|
||||||
import blueshift as b
|
import minerva.blueshift as b
|
||||||
import pprint
|
import pprint
|
||||||
|
|
||||||
pp = pprint.PrettyPrinter(indent=4)
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
|
|
@ -7,7 +7,8 @@ red = b.Redshift("hay", "s3://haystac-pmo-athena/",
|
||||||
db="dev",
|
db="dev",
|
||||||
cluster="redshift-cluster-1")
|
cluster="redshift-cluster-1")
|
||||||
query = red.query("select count(*) from myspectrum_schema.kitware")
|
query = red.query("select count(*) from myspectrum_schema.kitware")
|
||||||
res = query.results()
|
print(query)
|
||||||
pp.pprint(res.head(10))
|
data = query.results()
|
||||||
pp.pprint(query.info())
|
pp.pprint(data.head(10))
|
||||||
|
print(query.runtime)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue