diff --git a/minerva.py b/minerva.py index d6c7fe4..ce8debb 100644 --- a/minerva.py +++ b/minerva.py @@ -66,6 +66,7 @@ class Query: # Because we're using `UNLOAD`, we get a manifest of the files # that make up our data. files = self.manifest(tiedot).strip().split("\n") + files = [f.strip() for f in files if f.strip()] # filter empty # TODO parallelize this local = [self.handler.download(f) for f in files] diff --git a/test.py b/test.py index 9bd4563..dffa287 100644 --- a/test.py +++ b/test.py @@ -4,12 +4,19 @@ import pprint pp = pprint.PrettyPrinter(indent=4) athena = m.Athena("hay", "s3://haystac-pmo-athena/") -query = athena.query('select * from "trajectories"."kitware" limit 10') +query = athena.query( +"""SELECT * +FROM trajectories.kitware +WHERE ST_Disjoint( + ST_GeometryFromText('POLYGON((103.6 1.2151693, 103.6 1.5151693, 104.14797 1.5151693, 104.14797 1.2151693, 103.6 1.2151693))'), + ST_Point(longitude, latitude) +) +""") data = query.results() print(data.head(10)) # Everything *needs* to have a column in order for parquet to work, so scalar # values have to be assigned something, so here we use `as count` to create # a temporary column called `count` -print(athena.query("select count(*) as count from trajectories.kitware").results().head(1)) +#print(athena.query("select count(*) as count from trajectories.kitware").results().head(1))