1616# cmake -B build
1717# cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
1818
19+ # Run 'ollama serve' in a separate terminal
20+
1921export TOKENIZERS_PARALLELISM=false
2022LLAMA_CPP_PATH=/Users/appthreat/work/llama.cpp
2123cd $LLAMA_CPP_PATH
@@ -52,12 +54,14 @@ GGUF_MODEL_Q8_0_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-Q8_0-${FORMAT}
5254GGUF_MODEL_Q8_0_PATH=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -Q8_0-${FORMAT}
5355FUSED_MODEL=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${TUNING_TOOL}
5456
57+ # Direct conversion to 8-bit from the fused BF16 version
5558rm -rf ${GGUF_MODEL_Q8_0_PATH}
5659mkdir -p ${GGUF_MODEL_Q8_0_PATH}
5760python convert_hf_to_gguf.py --outtype q8_0 --outfile ${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -Q8_0-${FORMAT} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -q8_0.gguf --model-name ${GGUF_MODEL_Q8_0_NAME} ${FUSED_MODEL}
5861cp ${MODEL_FILE_PATH} ${GGUF_MODEL_Q8_0_PATH} /Modelfile
5962cp ${FUSED_MODEL} /* .json ${FUSED_MODEL} /merges.txt ${GGUF_MODEL_Q8_0_PATH} /
6063
64+ # BF16
6165GGUF_MODEL_BF16_NAME=${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -BF16-${FORMAT}
6266GGUF_MODEL_BF16_PATH=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -BF16-${FORMAT}
6367rm -rf ${GGUF_MODEL_BF16_PATH}
@@ -67,6 +71,16 @@ cp ${MODEL_FILE_PATH} ${GGUF_MODEL_BF16_PATH}/Modelfile
6771sed -i ' ' ' s|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-q8_0.gguf|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-bf16.gguf|g' ${GGUF_MODEL_BF16_PATH} /Modelfile
6872cp ${FUSED_MODEL} /* .json ${FUSED_MODEL} /merges.txt ${GGUF_MODEL_BF16_PATH} /
6973
74+ # MXFP4 - MOE only
75+ GGUF_MODEL_MXFP4_NAME=${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -MXFP4-${FORMAT}
76+ GGUF_MODEL_MXFP4_PATH=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -MXFP4-${FORMAT}
77+ rm -rf ${GGUF_MODEL_MXFP4_PATH}
78+ mkdir -p ${GGUF_MODEL_MXFP4_PATH}
79+ llama-quantize ${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -BF16-${FORMAT} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -bf16.gguf ${GGUF_MODEL_MXFP4_PATH} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -MXFP4.gguf MXFP4_MOE
80+ cp ${MODEL_FILE_PATH} ${GGUF_MODEL_MXFP4_PATH} /Modelfile
81+ sed -i ' ' ' s|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-q8_0.gguf|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-MXFP4.gguf|g' ${GGUF_MODEL_MXFP4_PATH} /Modelfile
82+ cp ${FUSED_MODEL} /* .json ${FUSED_MODEL} /merges.txt ${GGUF_MODEL_MXFP4_PATH} /
83+
7084if [ " $TOOL_BASE_MODEL " == " cdx1-mini" ] || [ " $TOOL_BASE_MODEL " == " cdx1-nano" ]; then
7185 GGUF_MODEL_Q6_K_NAME=${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -Q6_K-${FORMAT}
7286 GGUF_MODEL_Q6_K_PATH=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -Q6_K-${FORMAT}
114128export HF_HUB_ENABLE_HF_TRANSFER=0
115129hf auth whoami
116130hf upload --quiet --exclude " **/README.md" --repo-type model ${GGUF_MODEL_Q8_0_NAME} ${GGUF_MODEL_Q8_0_PATH} .
131+ hf upload --quiet --exclude " **/README.md" --repo-type model ${GGUF_MODEL_MXFP4_NAME} ${GGUF_MODEL_MXFP4_PATH} .
117132if [ " $TOOL_BASE_MODEL " == " cdx1-mini" ] || [ " $TOOL_BASE_MODEL " == " cdx1-nano" ]; then
118133 hf upload --quiet --exclude " **/README.md" --repo-type model ${GGUF_MODEL_Q6_K_NAME} ${GGUF_MODEL_Q6_K_PATH} .
119134else
@@ -123,11 +138,18 @@ else
123138fi
124139hf upload --quiet --exclude " **/README.md" --repo-type model ${GGUF_MODEL_BF16_NAME} ${GGUF_MODEL_BF16_PATH} .
125140
141+ # ## upload to ollama registry. Move this to a separate script in the future.
142+
126143ollama pull hf.co/${GGUF_MODEL_Q8_0_NAME}
127144ollama cp hf.co/${GGUF_MODEL_Q8_0_NAME} ${GGUF_MODEL_Q8_0_NAME}
128145ollama push ${GGUF_MODEL_Q8_0_NAME}
129146ollama rm hf.co/${GGUF_MODEL_Q8_0_NAME}
130147
148+ ollama pull hf.co/${GGUF_MODEL_MXFP4_NAME}
149+ ollama cp hf.co/${GGUF_MODEL_MXFP4_NAME} ${GGUF_MODEL_MXFP4_NAME}
150+ ollama push ${GGUF_MODEL_MXFP4_NAME}
151+ ollama rm hf.co/${GGUF_MODEL_MXFP4_NAME}
152+
131153if [ " $TOOL_BASE_MODEL " == " cdx1-mini" ] || [ " $TOOL_BASE_MODEL " == " cdx1-nano" ]; then
132154 ollama pull hf.co/${GGUF_MODEL_Q6_K_NAME}
133155 ollama cp hf.co/${GGUF_MODEL_Q6_K_NAME} ${GGUF_MODEL_Q6_K_NAME}
0 commit comments