diff --git a/examples/athena_basic_execute.py b/examples/athena_basic_execute.py new file mode 100644 index 0000000..c9995f0 --- /dev/null +++ b/examples/athena_basic_execute.py @@ -0,0 +1,16 @@ +import minerva +import pprint + +pp = pprint.PrettyPrinter(indent=4) + +m = minerva.Minerva("hay") +athena = m.athena("s3://haystac-pmo-athena/") + +query = athena.execute( + """ + create database if not exists test + """ +) +print(query.finish()) +print(query.runtime) + diff --git a/examples/athena_basic_query.py b/examples/athena_basic_query.py new file mode 100644 index 0000000..8c64435 --- /dev/null +++ b/examples/athena_basic_query.py @@ -0,0 +1,39 @@ +import minerva +import pprint + +pp = pprint.PrettyPrinter(indent=4) + +m = minerva.Minerva("hay") +athena = m.athena("s3://haystac-pmo-athena/") + +#query = athena.query( +#"""SELECT * +#FROM trajectories.kitware +#WHERE ST_Disjoint( +# ST_GeometryFromText('POLYGON((103.6 1.2151693, 103.6 1.5151693, 104.14797 1.5151693, 104.14797 1.2151693, 103.6 1.2151693))'), +# ST_Point(longitude, latitude) +#) +#""") + +# Everything *needs* to have a column in order for parquet to work, so scalar +# values have to be assigned something, so here we use `as count` to create +# a temporary column called `count` +#print(athena.query("select count(*) as count from trajectories.kitware").results().head(1)) + +query = athena.query( + """ + select round(longitude, 3) as lon, count(*) as count + from trajectories.kitware + where agent = 4 + group by round(longitude, 3) + order by count(*) desc + """ +) +data = query.results() +pp.pprint(data.head(10)) +print(query.runtime) + +#import IPython +#IPython.embed() + + diff --git a/examples/launch_instances.py b/examples/launch_instances.py new file mode 100644 index 0000000..0b95c2e --- /dev/null +++ b/examples/launch_instances.py @@ -0,0 +1,29 @@ +from minerva.pier import Pier +from minerva.docker import Docker +import sys + +profile = "hay" +pier = Pier(profile, + subnet_id = "subnet-05eb26d8649a093e1", # project-subnet-public1-us-east-1a + sg_groups = ["sg-0f9e555954e863954", # ssh + "sg-0b34a3f7398076545"], # default + iam = "S3+SSM+CloudWatch+ECR") + +num = 0 +mach = pier.machine(ami = "ami-0b0cd81283738558a", # ubuntu 22.04 x86 + instance_type = "t3.medium", + username = "ubuntu", + name = f"minerva-{num}", + variables = {"num": num}) + +# Running the machine in the HAYSTAC PMO account +# Pulling a container from the HAYSTAC T&E account +d = Docker(machine = mach, + #container = "436820952613.dkr.ecr.us-east-1.amazonaws.com/test:latest", + container = "amazon/aws-cli:latest", + variables = {"num": num}, + stdout = sys.stdout) + +d.create() +d.run() +#d.terminate() diff --git a/examples/redshift_basic_query.py b/examples/redshift_basic_query.py new file mode 100644 index 0000000..ae6fcba --- /dev/null +++ b/examples/redshift_basic_query.py @@ -0,0 +1,14 @@ +import minerva +import pprint + +pp = pprint.PrettyPrinter(indent=4) + +m = minerva.Minerva("hay") +red = m.redshift("s3://haystac-pmo-athena/", + db="dev", + cluster="redshift-cluster-1") +query = red.query("select count(*) from myspectrum_schema.kitware where agent = 4") +data = query.results() +pp.pprint(data.head(10)) +print(query.runtime) + diff --git a/examples/s3_ls.py b/examples/s3_ls.py new file mode 100644 index 0000000..6843e6e --- /dev/null +++ b/examples/s3_ls.py @@ -0,0 +1,4 @@ +import minerva + +m = minerva.Minerva("hay") +print(list(m.s3.ls("s3://haystac-pmo-athena/")))