@@ -120,11 +120,43 @@ def o200k_base():
120
120
}
121
121
122
122
123
+ def o200k_harmony ():
124
+ base_enc = o200k_base ()
125
+ name = "o200k_harmony"
126
+ pat_str = base_enc ["pat_str" ]
127
+ mergeable_ranks = base_enc ["mergeable_ranks" ]
128
+ special_tokens = {
129
+ ** base_enc ["special_tokens" ],
130
+ "<|startoftext|>" : 199998 ,
131
+ "<|endoftext|>" : 199999 ,
132
+ "<|reserved_200000|>" : 200000 ,
133
+ "<|reserved_200001|>" : 200001 ,
134
+ "<|return|>" : 200002 ,
135
+ "<|constrain|>" : 200003 ,
136
+ "<|reserved_200004|>" : 200004 ,
137
+ "<|channel|>" : 200005 ,
138
+ "<|start|>" : 200006 ,
139
+ "<|end|>" : 200007 ,
140
+ "<|message|>" : 200008 ,
141
+ "<|reserved_200009|>" : 200009 ,
142
+ "<|reserved_200010|>" : 200010 ,
143
+ "<|reserved_200011|>" : 200011 ,
144
+ "<|call|>" : 200012 ,
145
+ } | {f"<|reserved_{ i } |>" : i for i in range (200013 , 201088 )}
146
+ return {
147
+ "name" : name ,
148
+ "pat_str" : pat_str ,
149
+ "mergeable_ranks" : mergeable_ranks ,
150
+ "special_tokens" : special_tokens ,
151
+ }
152
+
153
+
123
154
ENCODING_CONSTRUCTORS = {
124
155
"gpt2" : gpt2 ,
125
156
"r50k_base" : r50k_base ,
126
157
"p50k_base" : p50k_base ,
127
158
"p50k_edit" : p50k_edit ,
128
159
"cl100k_base" : cl100k_base ,
129
160
"o200k_base" : o200k_base ,
161
+ "o200k_harmony" : o200k_harmony ,
130
162
}
0 commit comments