scale-catalog/stable/fabulinus/3.0.9/ix_values.yaml

image:
  repository: tytn/fabulinus
  pullPolicy: IfNotPresent
  tag: latest-cpu@sha256:f8b30eaa5b61d6085fc715f40619733846927c2266a3e20523e93fb58afeef38
gpuImage:
  repository: tytn/fabulinus
  pullPolicy: IfNotPresent
  tag: latest-gpu@sha256:9a826a578ca4157fbe3c091eb78aa35dfca6675f9fae24dcb90495ba4d9715d1

securityContext:
  container:
    readOnlyRootFilesystem: false
    runAsUser: 0
    runAsGroup: 0

service:
  main:
    ports:
      main:
        protocol: http
        targetPort: 80
        port: 10687

fabulinus:
  # cpu | gpu
  device: cpu
  model: "google/flan-t5-small"
  # int8 | float16 | bfloat16 | int8_float16 | int8_bfloat16
  quant_type: int8
  max_batch_size: 32
  disable_batching: true

workload:
  main:
    podSpec:
      containers:
        main:
          imageSelector: image
          probes:
            liveness:
              enabled: true
              type: http
              path: /docs
            readiness:
              enabled: true
              type: http
              path: /docs
            startup:
              enabled: true
              type: tcp
          env:
            TAKEOFF_DEVICE: "{{ .Values.fabulinus.device }}"
            TAKEOFF_MODEL_NAME: "{{ .Values.fabulinus.model }}"
            TAKEOFF_QUANT_TYPE: "{{ .Values.fabulinus.quant_type }}"
            TAKEOFF_MAX_BATCH_SIZE: "{{ .Values.fabulinus.max_batch_size }}"
            TAKEOFF_DISABLE_BATCHING: "{{ .Values.fabulinus.disable_batching }}"

persistence:
  models:
    enabled: true
    mountPath: "/code/models"

portal:
  open:
    enabled: true