fixed repartition

This commit is contained in:
Ari Brown 2024-09-24 10:48:47 -04:00
parent e3055333b3
commit 5e604f07f7
4 changed files with 10 additions and 8 deletions

View file

@ -8,4 +8,4 @@ run:
- poetry config http-basic.gitlab gitlab-ci-token "$CI_JOB_TOKEN" - poetry config http-basic.gitlab gitlab-ci-token "$CI_JOB_TOKEN"
- poetry publish --repository gitlab - poetry publish --repository gitlab
tags: tags:
- dind - autoscale

View file

@ -38,7 +38,7 @@ def repartition(mach, agents):
# Prep the info for the docker container # Prep the info for the docker container
variables = {"min_agent": min(agents), variables = {"min_agent": min(agents),
"max_agent": max(agents), "max_agent": max(agents),
"source": src_top_level, "source": sorted_top,
"destination": dst_top_level, "destination": dst_top_level,
"secondary_destination": None} "secondary_destination": None}
@ -54,7 +54,7 @@ def repartition(mach, agents):
##################################### #####################################
# Prep the work # Prep the work
# Find out how many hours there are in the dataset # Find out how many hours there are in the dataset
pool_size = 5 pool_size = 1
objs = s.m.s3.ls(src_top_level + "year=") objs = s.m.s3.ls(src_top_level + "year=")
hours = set(["s3://" + '/'.join([o.bucket_name, *o.key.split("/")[0:-1]]) hours = set(["s3://" + '/'.join([o.bucket_name, *o.key.split("/")[0:-1]])
@ -87,9 +87,6 @@ try:
# Second part: repartition # Second part: repartition
pool.run(repartition, data=groups) pool.run(repartition, data=groups)
import IPython
IPython.embed()
finally: finally:
pool.terminate() pool.terminate()

View file

@ -2,13 +2,16 @@ import boto3
import minerva as m import minerva as m
class Minerva: class Minerva:
def __init__(self, profile=None): def __init__(self, profile=None, region=None):
kwargs = {} kwargs = {}
if profile: if profile:
kwargs["profile_name"] = profile kwargs["profile_name"] = profile
else: else:
kwargs["region_name"] = "us-east-1" kwargs["region_name"] = "us-east-1"
if region:
kwargs["region_name"] = region
self.session = boto3.session.Session(**kwargs) self.session = boto3.session.Session(**kwargs)
self.s3 = m.S3(self) self.s3 = m.S3(self)

View file

@ -16,7 +16,7 @@ readme = "README.md"
minerva-console = "minerva.console:main" minerva-console = "minerva.console:main"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = ">3.9, <3.12" python = ">3.9"
boto3 = "^1.34.0" boto3 = "^1.34.0"
pyarrow = "^14.0.1" pyarrow = "^14.0.1"
joblib = "^1.1.0" joblib = "^1.1.0"
@ -25,3 +25,5 @@ s3fs = ">2023.6.0"
mako = ">1.2.0" mako = ">1.2.0"
dask = ">2023.11.0" dask = ">2023.11.0"
distributed = ">2023.11.0" distributed = ">2023.11.0"
pandas = ">2.0.0"
numpy = ">1.26.0"