diff --git a/.github/workflows/regression_tests.yml b/.github/workflows/regression_tests.yml index ce60313d6..2594e6dad 100644 --- a/.github/workflows/regression_tests.yml +++ b/.github/workflows/regression_tests.yml @@ -116,7 +116,7 @@ jobs: - name: Run containerized workload run: | docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }} - docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }} -d fineweb_edu_10B -f jax -s algorithms/archived_paper_baselines/adamw/jax/submission.py -w finewebedu_lm -t algorithms/archived_paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false --data_bucket mlcommons-data --logs_bucket mlcommons-runs --data_bucket mlcommons-data --logs_bucket mlcommons-runs + docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_jax_${{ github.head_ref || github.ref_name }} -d finewebedu -f jax -s algorithms/archived_paper_baselines/adamw/jax/submission.py -w finewebedu_lm -t algorithms/archived_paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false --data_bucket mlcommons-data --logs_bucket mlcommons-runs --data_bucket mlcommons-data --logs_bucket mlcommons-runs fastmri_pytorch: runs-on: self-hosted needs: build_and_push_pytorch_docker_image @@ -198,4 +198,4 @@ jobs: - name: Run containerized workload run: | docker pull us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} - docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d fineweb_edu_10B -f pytorch -s algorithms/archived_paper_baselines/adamw/pytorch/submission.py -w finewebedu_lm -t algorithms/archived_paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false --data_bucket mlcommons-data --logs_bucket mlcommons-runs --data_bucket mlcommons-data --logs_bucket mlcommons-runs + docker run -v $HOME/data/:/data/ -v $HOME/experiment_runs/:/experiment_runs -v $HOME/experiment_runs/logs:/logs --gpus all --ipc=host us-central1-docker.pkg.dev/training-algorithms-external/mlcommons-docker-repo/algoperf_pytorch_${{ github.head_ref || github.ref_name }} -d finewebedu -f pytorch -s algorithms/archived_paper_baselines/adamw/pytorch/submission.py -w finewebedu_lm -t algorithms/archived_paper_baselines/adamw/tuning_search_space.json -e tests/regression_tests/adamw -m 10 -c False -o True -r false --data_bucket mlcommons-data --logs_bucket mlcommons-runs --data_bucket mlcommons-data --logs_bucket mlcommons-runs diff --git a/algoperf/_version.py b/algoperf/_version.py new file mode 100644 index 000000000..397ff039f --- /dev/null +++ b/algoperf/_version.py @@ -0,0 +1,34 @@ +# file generated by setuptools-scm +# don't change, don't track in version control + +__all__ = [ + "__version__", + "__version_tuple__", + "version", + "version_tuple", + "__commit_id__", + "commit_id", +] + +TYPE_CHECKING = False +if TYPE_CHECKING: + from typing import Tuple + from typing import Union + + VERSION_TUPLE = Tuple[Union[int, str], ...] + COMMIT_ID = Union[str, None] +else: + VERSION_TUPLE = object + COMMIT_ID = object + +version: str +__version__: str +__version_tuple__: VERSION_TUPLE +version_tuple: VERSION_TUPLE +commit_id: COMMIT_ID +__commit_id__: COMMIT_ID + +__version__ = version = '1.0.1' +__version_tuple__ = version_tuple = (1, 0, 1) + +__commit_id__ = commit_id = 'ga673f5835' diff --git a/algoperf/workloads/finewebedu_lm/input_pipeline.py b/algoperf/workloads/finewebedu_lm/input_pipeline.py index 3007371fc..5798f6638 100644 --- a/algoperf/workloads/finewebedu_lm/input_pipeline.py +++ b/algoperf/workloads/finewebedu_lm/input_pipeline.py @@ -6,6 +6,7 @@ import jax import tensorflow as tf +import datasets as hf_datasets from algoperf import data_utils @@ -83,7 +84,8 @@ def get_lm_dataset( shuffle_seed = jax.random.randint(data_rng, (), -(2**31), 2**31 - 1) data_dir = os.path.join(data_dir, TFDS_SPLIT_NAME[split]) - tokens_ds = tf.data.Dataset.load(data_dir) + ds = hf_datasets.load_from_disk(data_dir) + tokens_ds = ds.to_tf_dataset() # tokens tokens_ds = tokens_ds.flat_map(tf.data.Dataset.from_tensor_slices) diff --git a/dataset/dataset_setup.py b/dataset/dataset_setup.py index 30bd9dec6..9374b11ea 100644 --- a/dataset/dataset_setup.py +++ b/dataset/dataset_setup.py @@ -782,7 +782,7 @@ def download_finewebedu( ): """Download FineWebEdu-10B.""" - data_dir = os.path.join(data_dir, 'fineweb_edu_10B') + data_dir = os.path.join(data_dir, 'finewebedu') _maybe_mkdir(data_dir) _maybe_mkdir(tmp_dir) diff --git a/docker/scripts/startup.sh b/docker/scripts/startup.sh index 1cd676d2a..b4a7111f2 100644 --- a/docker/scripts/startup.sh +++ b/docker/scripts/startup.sh @@ -174,7 +174,7 @@ fi # Check if arguments are valid VALID_DATASETS=("criteo1tb" "imagenet" "fastmri" "ogbg" "librispeech" \ - "wmt" "mnist" "fineweb_edu_10B") + "wmt" "mnist" "finewebedu") VALID_WORKLOADS=("criteo1tb" "imagenet_resnet" "imagenet_resnet_silu" "imagenet_resnet_gelu" \ "imagenet_resnet_large_bn_init" "imagenet_vit" "imagenet_vit_glu" \ "imagenet_vit_post_ln" "imagenet_vit_map" "fastmri" "ogbg" \ diff --git a/scoring/utils/slurm/make_job_config.py b/scoring/utils/slurm/make_job_config.py index f796c6caf..787d9a152 100644 --- a/scoring/utils/slurm/make_job_config.py +++ b/scoring/utils/slurm/make_job_config.py @@ -67,7 +67,7 @@ 'librispeech_deepspeech': {'dataset': 'librispeech'}, 'criteo1tb': {'dataset': 'criteo1tb'}, 'librispeech_conformer': {'dataset': 'librispeech'}, - 'finewebedu_lm': {'dataset': 'fineweb_edu_10B'}, + 'finewebedu_lm': {'dataset': 'finewebedu'}, } RULESET_CONFIGS = { diff --git a/scoring/utils/slurm/run_jobs.sh b/scoring/utils/slurm/run_jobs.sh index 1047b31c0..85272cc23 100644 --- a/scoring/utils/slurm/run_jobs.sh +++ b/scoring/utils/slurm/run_jobs.sh @@ -91,6 +91,7 @@ DOCKER_CMD=( docker run -v /opt/data/:/data/ -v "$HOME/experiment_runs:/experiment_runs" + -v "$HOME/algorithmic-efficiency/:/algorithmic-efficiency/" -v "$HOME/submissions_algorithms/:/algorithmic-efficiency/submissions_algorithms" -v "$HOME/algorithmic-efficiency/docker/scripts/startup.sh:/algorithmic-efficiency/docker/scripts/startup.sh" --gpus all diff --git a/scoring/utils/workload_metadata_external_tuning.json b/scoring/utils/workload_metadata_external_tuning.json index 0ba0d99ee..f4d268683 100644 --- a/scoring/utils/workload_metadata_external_tuning.json +++ b/scoring/utils/workload_metadata_external_tuning.json @@ -33,6 +33,6 @@ }, "finewebedu_lm" : { "max_steps": 55000, - "dataset":"fineweb_edu_10B" + "dataset":"finewebedu" } }