Skip to content

Commit

Permalink
chore: bump transformers version (#665)
Browse files Browse the repository at this point in the history
  • Loading branch information
dacorvo committed Sep 2, 2024
1 parent 763b507 commit 4b25c27
Show file tree
Hide file tree
Showing 12 changed files with 58 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/inference_cache_llm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.17.1.0 aws-neuronx-runtime-lib=2.20.22.0-1b3ca6425 aws-neuronx-collectives=2.20.22.0-c101c322e -y
sudo apt-get install aws-neuronx-tools=2.18.3.0 aws-neuronx-runtime-lib=2.21.41.0-fb1705f5f aws-neuronx-collectives=2.21.46.0-69b77134b -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Checkout
uses: actions/checkout@v4
Expand Down
1 change: 1 addition & 0 deletions optimum/exporters/neuron/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ def get_submodels_for_export_stable_diffusion(
text_encoder_2 = getattr(pipeline, "text_encoder_2", None)
if text_encoder_2 is not None:
text_encoder_2.config.output_hidden_states = True
text_encoder_2.text_model.config.output_hidden_states = True
models_for_export.append((DIFFUSION_MODEL_TEXT_ENCODER_2_NAME, copy.deepcopy(text_encoder_2)))

# U-NET
Expand Down
1 change: 1 addition & 0 deletions optimum/neuron/generation/token_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def create(
"""
generation_config.validate()
generation_config = copy.deepcopy(generation_config)
model._prepare_special_tokens(generation_config)

unsupported_generation_flags = [
"output_attentions",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ line-length = 119
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741", "W605"]
select = ["C", "E", "F", "I", "W"]
exclude = ["*.ipynb"]

# Ignore import violations in all `__init__.py` files.
[tool.ruff.lint.per-file-ignores]
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@


INSTALL_REQUIRES = [
"transformers == 4.41.1",
"transformers == 4.43.2",
"accelerate == 0.29.2",
"optimum ~= 1.20.0",
"optimum ~= 1.21.0",
"huggingface_hub >= 0.20.1",
"numpy>=1.22.2, <=1.25.2",
"protobuf<4",
Expand Down
13 changes: 7 additions & 6 deletions tests/cache/test_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@
)
from optimum.neuron.utils import get_hub_cached_entries, synchronize_hub_cache
from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
from optimum.utils.testing_utils import TOKEN


@pytest.fixture
def cache_repos():
def cache_repos(staging):
# Setup: create temporary Hub repository and local cache directory
api = HfApi(endpoint=ENDPOINT_STAGING, token=TOKEN)
user = api.whoami()["name"]
token = staging["token"]
user = staging["user"]
api = HfApi(endpoint=ENDPOINT_STAGING, token=token)
hostname = socket.gethostname()
cache_repo_id = f"{user}/{hostname}-optimum-neuron-cache"
if api.repo_exists(cache_repo_id):
Expand All @@ -57,7 +57,7 @@ def cache_repos():
os.environ["NEURON_COMPILE_CACHE_URL"] = cache_path
os.environ["CUSTOM_CACHE_REPO"] = cache_repo_id
os.environ["HF_ENDPOINT"] = ENDPOINT_STAGING
os.environ["HF_TOKEN"] = TOKEN
os.environ["HF_TOKEN"] = token
yield (cache_path, cache_repo_id)
# Teardown
api.delete_repo(cache_repo_id)
Expand Down Expand Up @@ -173,7 +173,8 @@ def check_traced_cache_entry(cache_path):


def assert_local_and_hub_cache_sync(cache_path, cache_repo_id):
api = HfApi(endpoint=ENDPOINT_STAGING, token=TOKEN)
# Since created models are public on the staging endpoint we don't need a token
api = HfApi(endpoint=ENDPOINT_STAGING)
remote_files = api.list_repo_files(cache_repo_id)
local_files = get_local_cached_files(cache_path)
for file in local_files:
Expand Down
18 changes: 17 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
set_neuron_cache_path,
)

from .utils import OPTIMUM_INTERNAL_TESTING_CACHE_REPO, get_random_string
from .utils import OPTIMUM_INTERNAL_TESTING_CACHE_REPO, TOKEN_STAGING, USER_STAGING, get_random_string


# Inferentia fixtures
Expand Down Expand Up @@ -171,3 +171,19 @@ def pytest_fixture_setup(fixturedef, request):
if getattr(fixturedef.func, "is_dist_fixture", False):
dist_fixture_class = fixturedef.func()
dist_fixture_class(request)


@pytest.fixture
def staging():
"""A pytest fixture only available in huggingface_hub staging mode
If the huggingface_hub is not operating in staging mode, tests using
that fixture are automatically skipped.
Returns:
a Dict containing a valid staging user and token.
"""
return {
"user": USER_STAGING,
"token": TOKEN_STAGING,
}
3 changes: 1 addition & 2 deletions tests/decoder/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ def _export_model(model_id, export_kwargs, neuron_model_path):
try:
subprocess.run(export_command, check=True)
except subprocess.CalledProcessError as e:
logger.error(f"Failed to export model: {e}")
return
raise SystemError(f"Failed to export model: {e}")


@pytest.fixture(scope="session", params=DECODER_MODEL_CONFIGURATIONS.keys())
Expand Down
43 changes: 14 additions & 29 deletions tests/generation/test_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re

from huggingface_hub import HfApi
from transformers.testing_utils import ENDPOINT_STAGING

from optimum.neuron import NeuronModelForSeq2SeqLM
from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx
from optimum.utils.testing_utils import TOKEN, USER


def _test_push_to_hub(model, model_path, repo_id, ignore_patterns=[]):
model.push_to_hub(model_path, repo_id, use_auth_token=TOKEN, endpoint=ENDPOINT_STAGING)
api = HfApi(endpoint=ENDPOINT_STAGING, token=TOKEN)
try:
hub_files_path = api.list_repo_files(repo_id)
for path, _, files in os.walk(model_path):
for name in files:
local_file_path = os.path.join(path, name)
hub_file_path = os.path.relpath(local_file_path, model_path)
excluded = False
for pattern in ignore_patterns:
if re.compile(pattern).match(hub_file_path) is not None:
excluded = True
break
assert excluded or hub_file_path in hub_files_path
finally:
api.delete_repo(repo_id)


def neuron_push_model_id(model_id):
model_name = model_id.split("/")[-1]
repo_id = f"{USER}/{model_name}-neuronx"
return repo_id


@is_inferentia_test
Expand All @@ -59,6 +32,18 @@ def test_seq2seq_model_from_hub():

@is_inferentia_test
@requires_neuronx
def test_push_seq2seq_to_hub(neuron_seq2seq_greedy_path, neuron_push_seq2seq_id):
def test_push_seq2seq_to_hub(neuron_seq2seq_greedy_path, neuron_push_seq2seq_id, staging):
model = NeuronModelForSeq2SeqLM.from_pretrained(neuron_seq2seq_greedy_path)
_test_push_to_hub(model, neuron_seq2seq_greedy_path, neuron_push_seq2seq_id)
model.push_to_hub(
neuron_seq2seq_greedy_path, neuron_push_seq2seq_id, use_auth_token=staging.token, endpoint=ENDPOINT_STAGING
)
api = HfApi(endpoint=ENDPOINT_STAGING, token=staging.token)
try:
hub_files_path = api.list_repo_files(neuron_push_seq2seq_id)
for path, _, files in os.walk(neuron_seq2seq_greedy_path):
for name in files:
local_file_path = os.path.join(path, name)
hub_file_path = os.path.relpath(local_file_path, neuron_seq2seq_greedy_path)
assert hub_file_path in hub_files_path
finally:
api.delete_repo(neuron_push_seq2seq_id)
7 changes: 3 additions & 4 deletions tests/test_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@
set_neuron_cache_path,
)
from optimum.neuron.utils.testing_utils import is_trainium_test
from optimum.utils.testing_utils import TOKEN, USER

from .utils import StagingTestMixin, TrainiumTestMixin, get_random_string
from .utils import TOKEN_STAGING, USER_STAGING, StagingTestMixin, TrainiumTestMixin, get_random_string


DUMMY_COMPILER_VERSION = "1.2.3"
Expand Down Expand Up @@ -147,10 +146,10 @@ def test_list_files_in_neuron_cache(self):
class StagingNeuronUtilsTestCase(StagingTestMixin, TestCase):
def test_set_custom_cache_repo_name_in_hf_home(self):
orig_token = get_token()
login(TOKEN)
login(TOKEN_STAGING)

repo_name = f"blablabla-{self.seed}"
repo_id = f"{USER}/{repo_name}"
repo_id = f"{USER_STAGING}/{repo_name}"
create_repo(repo_name, repo_type="model")

def remove_repo():
Expand Down
14 changes: 9 additions & 5 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,15 @@
from optimum.neuron.utils.patching import DynamicPatch, Patcher
from optimum.neuron.utils.require_utils import requires_neuronx_distributed
from optimum.utils import logging
from optimum.utils.testing_utils import TOKEN, USER


logger = logging.get_logger(__name__)


# Not critical, only usable on the sandboxed CI instance.
USER_STAGING = "__DUMMY_OPTIMUM_USER__"
TOKEN_STAGING = "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"

Check warning on line 68 in tests/utils.py

View workflow job for this annotation

GitHub Actions / secrets

Found unverified HuggingFace result 🐷🔑

SEED = 42
OPTIMUM_INTERNAL_TESTING_CACHE_REPO = "optimum-internal-testing/optimum-neuron-cache-for-testing"

Expand Down Expand Up @@ -450,7 +454,7 @@ def tearDownClass(cls):

class StagingTestMixin:
CUSTOM_CACHE_REPO_NAME = "optimum-neuron-cache-testing"
CUSTOM_CACHE_REPO = f"{USER}/{CUSTOM_CACHE_REPO_NAME}"
CUSTOM_CACHE_REPO = f"{USER_STAGING}/{CUSTOM_CACHE_REPO_NAME}"
CUSTOM_PRIVATE_CACHE_REPO = f"{CUSTOM_CACHE_REPO}-private"
_token = ""
MAX_NUM_LINEARS = 20
Expand All @@ -468,8 +472,8 @@ def set_hf_hub_token(cls, token: Optional[str]) -> Optional[str]:

@classmethod
def setUpClass(cls):
cls._staging_token = TOKEN
cls._token = cls.set_hf_hub_token(TOKEN)
cls._staging_token = TOKEN_STAGING
cls._token = cls.set_hf_hub_token(TOKEN_STAGING)
cls._custom_cache_repo_name = load_custom_cache_repo_name_from_hf_home()
delete_custom_cache_repo_name_from_hf_home()

Expand Down Expand Up @@ -511,6 +515,6 @@ def remove_all_files_in_repo(self, repo_id: str):
pass

def tearDown(self):
login(TOKEN)
login(TOKEN_STAGING)
self.remove_all_files_in_repo(self.CUSTOM_CACHE_REPO)
self.remove_all_files_in_repo(self.CUSTOM_PRIVATE_CACHE_REPO)
3 changes: 1 addition & 2 deletions text-generation-inference/tests/fixtures/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ def export_model(model_id, export_kwargs, neuron_model_path):
try:
subprocess.run(export_command, check=True)
except subprocess.CalledProcessError as e:
logger.error(f"Failed to export model: {e}")
return
raise ValueError(f"Failed to export model: {e}")


@pytest.fixture(scope="session", params=MODEL_CONFIGURATIONS.keys())
Expand Down

0 comments on commit 4b25c27

Please sign in to comment.