Update README.md
Browse files
README.md
CHANGED
|
@@ -131,15 +131,15 @@ vllm serve \
|
|
| 131 |
```bash
|
| 132 |
# Download model from Red Hat Registry via docker
|
| 133 |
# Note: This downloads the model to ~/.cache/instructlab/models unless --model-dir is specified.
|
| 134 |
-
ilab model download --repository docker://registry.redhat.io/rhelai1/mistral-small-3-1-24b-instruct-2503:1.5
|
| 135 |
```
|
| 136 |
|
| 137 |
```bash
|
| 138 |
# Serve model via ilab
|
| 139 |
-
ilab model serve --model-path ~/.cache/instructlab/models/mistral-small-3-1-24b-instruct-2503
|
| 140 |
|
| 141 |
# Chat with model
|
| 142 |
-
ilab model chat --model ~/.cache/instructlab/models/mistral-small-3-1-24b-instruct-2503
|
| 143 |
```
|
| 144 |
See [Red Hat Enterprise Linux AI documentation](https://docs.redhat.com/en/documentation/red_hat_enterprise_linux_ai/1.4) for more details.
|
| 145 |
</details>
|
|
@@ -193,9 +193,9 @@ apiVersion: serving.kserve.io/v1beta1
|
|
| 193 |
kind: InferenceService
|
| 194 |
metadata:
|
| 195 |
annotations:
|
| 196 |
-
openshift.io/display-name:
|
| 197 |
serving.kserve.io/deploymentMode: RawDeployment
|
| 198 |
-
name:
|
| 199 |
labels:
|
| 200 |
opendatahub.io/dashboard: 'true'
|
| 201 |
spec:
|
|
@@ -244,7 +244,7 @@ oc apply -f qwen-inferenceservice.yaml
|
|
| 244 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
| 245 |
-H "Content-Type: application/json" \
|
| 246 |
-d '{
|
| 247 |
-
"model": "
|
| 248 |
"stream": true,
|
| 249 |
"stream_options": {
|
| 250 |
"include_usage": true
|
|
|
|
| 131 |
```bash
|
| 132 |
# Download model from Red Hat Registry via docker
|
| 133 |
# Note: This downloads the model to ~/.cache/instructlab/models unless --model-dir is specified.
|
| 134 |
+
ilab model download --repository docker://registry.redhat.io/rhelai1/mistral-small-3-1-24b-instruct-2503-fp8-dynamic:1.5
|
| 135 |
```
|
| 136 |
|
| 137 |
```bash
|
| 138 |
# Serve model via ilab
|
| 139 |
+
ilab model serve --model-path ~/.cache/instructlab/models/mistral-small-3-1-24b-instruct-2503-fp8-dynamic
|
| 140 |
|
| 141 |
# Chat with model
|
| 142 |
+
ilab model chat --model ~/.cache/instructlab/models/mistral-small-3-1-24b-instruct-2503-fp8-dynamic
|
| 143 |
```
|
| 144 |
See [Red Hat Enterprise Linux AI documentation](https://docs.redhat.com/en/documentation/red_hat_enterprise_linux_ai/1.4) for more details.
|
| 145 |
</details>
|
|
|
|
| 193 |
kind: InferenceService
|
| 194 |
metadata:
|
| 195 |
annotations:
|
| 196 |
+
openshift.io/display-name: mistral-small-3-1-24b-instruct-2503-fp8-dynamic # OPTIONAL CHANGE
|
| 197 |
serving.kserve.io/deploymentMode: RawDeployment
|
| 198 |
+
name: mistral-small-3-1-24b-instruct-2503-fp8-dynamic # specify model name. This value will be used to invoke the model in the payload
|
| 199 |
labels:
|
| 200 |
opendatahub.io/dashboard: 'true'
|
| 201 |
spec:
|
|
|
|
| 244 |
curl https://<inference-service-name>-predictor-default.<domain>/v1/chat/completions
|
| 245 |
-H "Content-Type: application/json" \
|
| 246 |
-d '{
|
| 247 |
+
"model": "mistral-small-3-1-24b-instruct-2503-fp8-dynamic",
|
| 248 |
"stream": true,
|
| 249 |
"stream_options": {
|
| 250 |
"include_usage": true
|