AMD AI 370 APU의 NPU 사용해 보기

7월 28, 2025

AMD AI 370 APU를 사용한 minipc에서 NPU활성화 방법을 찾아보다가 테스트한 결과를 정리

lemonade-sdk를 사용해야 하고, 여기에서 사용하는 python 버전은 3.10 버전이여야 정상적으로 라이브러리 설치가능

# 1. 기존 가상환경 제거 (필요시)
rmdir /s .venv
 
# 2. Python 3.10으로 가상환경 생성
uv python install 3.10
uv venv --python 3.10
 
# 3. 가상환경 활성화
.venv\Scripts\activate
 
# 4. pip 설치 (uv 가상환경에서 필요)
uv pip install pip
 
# 5. lemonade-sdk 설치 (llm-oga-hybrid 옵션과 함께)
uv pip install "lemonade-sdk[llm-oga-hybrid]"
 
# 6. RyzenAI hybrid 설치 실행
lemonade-install --ryzenai hybrid

# test.py
from lemonade.api import from_pretrained
 
model, tokenizer = from_pretrained(
    "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
    recipe="oga-hybrid"
)
 
input_ids = tokenizer("서울의 가을날씨는 보통 몇 도의 온도를 가져?", return_tensors="pt").input_ids
response = model.generate(input_ids, max_new_tokens=30)
print(tokenizer.decode(response[0]))

스트레스 테스트를 해보자

import time
import psutil
import threading
from datetime import datetime
from lemonade.api import from_pretrained
 
# NPU 스트레스 테스트 설정
STRESS_TEST_DURATION = 3600  # 1시간 (초 단위)
MAX_NEW_TOKENS = 50
ITERATIONS = 0
running = True
 
# 다양한 테스트 문장들 (한국어와 영어 혼합)
test_prompts = [
    "서울의 가을날씨는 보통 몇 도의 온도를 가져?",
    "What is the capital of France?", 
    "인공지능의 발전이 인류에게 미치는 영향은?",
    "Explain quantum computing in simple terms.",
    "한국의 전통 음식에 대해 설명해주세요.",
    "How does machine learning work?",
    "미래의 기술 발전에 대한 예측을 해보세요.",
    "Describe the process of photosynthesis.",
    "K-pop이 세계에 미치는 문화적 영향은?",
    "What are the benefits of renewable energy?"
]
 
def monitor_system():
    """시스템 리소스 모니터링"""
    while running:
        cpu_percent = psutil.cpu_percent(interval=1)
        memory = psutil.virtual_memory()
        print(f"[Monitor] CPU: {cpu_percent:.1f}%, Memory: {memory.percent:.1f}%")
        time.sleep(10)
 
print("NPU 스트레스 테스트 시작...")
print(f"테스트 지속 시간: {STRESS_TEST_DURATION}초")
print("Ctrl+C로 중단 가능합니다.\n")
 
# 모델 로드
print("모델 로딩 중...")
model, tokenizer = from_pretrained(
    "amd/Llama-3.2-1B-Instruct-awq-g128-int4-asym-fp16-onnx-hybrid",
    recipe="oga-hybrid"
)
print("모델 로딩 완료!\n")
 
# 시스템 모니터링 스레드 시작
monitor_thread = threading.Thread(target=monitor_system, daemon=True)
monitor_thread.start()
 
start_time = time.time()
try:
    while running:
        current_time = time.time()
        elapsed_time = current_time - start_time
        
        # 시간 제한 체크
        if elapsed_time >= STRESS_TEST_DURATION:
            print(f"\n스트레스 테스트 완료! {STRESS_TEST_DURATION}초 실행됨")
            break
            
        # 다양한 프롬프트로 테스트
        prompt = test_prompts[ITERATIONS % len(test_prompts)]
        
        print(f"[{datetime.now().strftime('%H:%M:%S')}] Iteration {ITERATIONS + 1}")
        print(f"Prompt: {prompt}")
        
        # NPU 추론 실행
        inference_start = time.time()
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
        response = model.generate(input_ids, max_new_tokens=MAX_NEW_TOKENS)
        inference_time = time.time() - inference_start
        
        output = tokenizer.decode(response[0], skip_special_tokens=True)
        print(f"Response: {output}")
        print(f"Inference time: {inference_time:.3f}s")
        print(f"Elapsed total time: {elapsed_time:.1f}s")
        print("-" * 80)
        
        ITERATIONS += 1
        
        # 짧은 대기 (NPU 과열 방지)
        time.sleep(0.5)
 
except KeyboardInterrupt:
    print("\n사용자에 의해 테스트가 중단되었습니다.")
    
finally:
    running = False
    total_time = time.time() - start_time
    print("\n=== NPU 스트레스 테스트 결과 ===")
    print(f"총 실행 시간: {total_time:.1f}초")
    print(f"총 반복 횟수: {ITERATIONS}")
    print(f"평균 처리 시간: {total_time/max(ITERATIONS, 1):.3f}초/iteration")
    print("테스트 완료!")

댓글 쓰기 · 수정

0 댓글