Skip to content

Commit 9376149

Browse files
authored
Merge pull request #38 from modelscope/v1.0.3_dev
v1.0.3
2 parents 7a9f90e + 01fd833 commit 9376149

File tree

94 files changed

+8775
-416
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+8775
-416
lines changed

requirements/framework.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
albumentations
2+
beautifulsoup4
23
bezier
34
einops
4-
modelscope
5+
modelscope==1.14.0
56
ms-swift>=2.0.1
67
numpy
78
open_clip_torch

requirements/scepter_studio.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
bitsandbytes
2-
gradio>=3.47.1,<4.0.0
2+
gradio
33
imagehash
44
psutil
55
tiktoken

scepter/methods/edit/edit_512_lora.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ SOLVER:
2020
#
2121
FILE_SYSTEM:
2222
NAME: "ModelscopeFs"
23-
TEMP_DIR: "./cache/data"
23+
TEMP_DIR: "./cache/cache_data"
2424
#
2525
TUNER:
2626
-

scepter/methods/examples/classification/example.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ SOLVER:
1111
# NUM_FOLDS DESCRIPTION: Num folds for training. TYPE: int default: 0
1212
NUM_FOLDS: 1
1313
# WORK_DIR DESCRIPTION: Save dir of the training log or model. TYPE: str default: ''
14-
WORK_DIR: ./exp12/
14+
WORK_DIR: ./cache/save_data/example/
1515
LOG_FILE: std_log.txt
1616
# EVAL_INTERVAL DESCRIPTION: Eval the model interval. TYPE: int default: 1
1717
EVAL_INTERVAL: 1
@@ -102,7 +102,7 @@ SOLVER:
102102
# DATASET DESCRIPTION: the public dataset name TYPE: str default: 'cifar10'
103103
DATASET: cifar10
104104
# DATA_ROOT DESCRIPTION: the download data save path TYPE: str default: ''
105-
DATA_ROOT: ./local_data/cifar10
105+
DATA_ROOT: ./cache/cache_data/cifar10
106106
# MODE DESCRIPTION: test TYPE: str default: test
107107
MODE: test
108108
# PIN_MEMORY DESCRIPTION: pin_memory for data loader TYPE: bool default: False
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
ENV:
2+
BACKEND: nccl
3+
#
4+
SOLVER:
5+
NAME: LatentDiffusionSolver
6+
RESUME_FROM:
7+
LOAD_MODEL_ONLY: True
8+
USE_FSDP: False
9+
SHARDING_STRATEGY:
10+
USE_AMP: True
11+
DTYPE: float16
12+
CHANNELS_LAST: True
13+
MAX_STEPS: 1000
14+
MAX_EPOCHS: -1
15+
NUM_FOLDS: 1
16+
ACCU_STEP: 1
17+
EVAL_INTERVAL: 100
18+
RESCALE_LR: False
19+
#
20+
WORK_DIR: ./cache/save_data/dit_pixart_alpha_1024_lora
21+
LOG_FILE: std_log.txt
22+
#
23+
FILE_SYSTEM:
24+
NAME: "ModelscopeFs"
25+
TEMP_DIR: "./cache/cache_data"
26+
#
27+
FREEZE:
28+
#
29+
TUNER:
30+
- NAME: SwiftLoRA
31+
R: 128
32+
LORA_ALPHA: 128
33+
LORA_DROPOUT: 0.0
34+
BIAS: "none"
35+
TARGET_MODULES: "model.*(.q|.k|.v|.o|mlp.fc1|mlp.fc2)$"
36+
#
37+
MODEL:
38+
NAME: LatentDiffusionPixart
39+
PARAMETERIZATION: eps
40+
TIMESTEPS: 1000
41+
MIN_SNR_GAMMA:
42+
ZERO_TERMINAL_SNR: False
43+
PRETRAINED_MODEL:
44+
IGNORE_KEYS: [ ]
45+
SCALE_FACTOR: 0.18215
46+
SIZE_FACTOR: 8
47+
DECODER_BIAS: 0.5
48+
DEFAULT_N_PROMPT:
49+
SCHEDULE_ARGS:
50+
"NAME": "linear"
51+
"BETA_MIN": 0.0001
52+
"BETA_MAX": 0.02
53+
USE_EMA: False
54+
LOAD_REFINER: False
55+
#
56+
DIFFUSION_MODEL:
57+
NAME: PixArt
58+
PRETRAINED_MODEL: ms://AI-ModelScope/[email protected]
59+
INPUT_SIZE: 128
60+
PATCH_SIZE: 2
61+
IN_CHANNELS: 4
62+
HIDDEN_SIZE: 1152
63+
DEPTH: 28
64+
NUM_HEADS: 16
65+
MLP_RATIO: 4.0
66+
CLASS_DROPOUT_PROB: 0.1
67+
PRED_SIGMA: True
68+
DROP_PATH: 0.0
69+
WINDOW_DIZE: 0
70+
USE_REL_POS: False
71+
CAPTION_CHANNELS: 4096
72+
LEWEI_SCALE: 2
73+
MODEL_MAX_LENGTH: 120
74+
#
75+
FIRST_STAGE_MODEL:
76+
NAME: AutoencoderKL
77+
PRETRAINED_MODEL: ms://AI-ModelScope/[email protected]
78+
EMBED_DIM: 4
79+
IGNORE_KEYS: [ ]
80+
BATCH_SIZE: 1
81+
#
82+
ENCODER:
83+
NAME: Encoder
84+
CH: 128
85+
OUT_CH: 3
86+
NUM_RES_BLOCKS: 2
87+
IN_CHANNELS: 3
88+
ATTN_RESOLUTIONS: [ ]
89+
CH_MULT: [ 1, 2, 4, 4 ]
90+
Z_CHANNELS: 4
91+
DOUBLE_Z: True
92+
DROPOUT: 0.0
93+
RESAMP_WITH_CONV: True
94+
#
95+
DECODER:
96+
NAME: Decoder
97+
CH: 128
98+
OUT_CH: 3
99+
NUM_RES_BLOCKS: 2
100+
IN_CHANNELS: 3
101+
ATTN_RESOLUTIONS: [ ]
102+
CH_MULT: [ 1, 2, 4, 4 ]
103+
Z_CHANNELS: 4
104+
DROPOUT: 0.0
105+
RESAMP_WITH_CONV: True
106+
GIVE_PRE_END: False
107+
TANH_OUT: False
108+
#
109+
COND_STAGE_MODEL:
110+
NAME: T5EmbedderHF
111+
PRETRAINED_MODEL: ms://AI-ModelScope/PixArt-alpha@t5-v1_1-xxl/
112+
TOKENIZER_PATH: ms://AI-ModelScope/PixArt-alpha@t5-v1_1-xxl/
113+
LENGTH: 120
114+
CLEAN: heavy
115+
USE_GRAD: False
116+
#
117+
LOSS:
118+
NAME: ReconstructLoss
119+
LOSS_TYPE: l2
120+
#
121+
SAMPLE_ARGS:
122+
SAMPLER: ddim
123+
SAMPLE_STEPS: 20
124+
SEED: 2024
125+
GUIDE_SCALE: 4.5
126+
GUIDE_RESCALE: 0.5
127+
DISCRETIZATION: trailing
128+
RUN_TRAIN_N: False
129+
#
130+
OPTIMIZER:
131+
NAME: AdamW
132+
LEARNING_RATE: 0.0001
133+
BETAS: [ 0.9, 0.999 ]
134+
EPS: 1e-8
135+
WEIGHT_DECAY: 1e-2
136+
AMSGRAD: False
137+
#
138+
TRAIN_DATA:
139+
NAME: ImageTextPairMSDataset
140+
MODE: train
141+
MS_DATASET_NAME: style_custom_dataset
142+
MS_DATASET_NAMESPACE: damo
143+
MS_DATASET_SUBNAME: 3D
144+
PROMPT_PREFIX: ""
145+
MS_DATASET_SPLIT: train
146+
MS_REMAP_KEYS: { 'Image:FILE': 'Target:FILE' }
147+
REPLACE_STYLE: False
148+
PIN_MEMORY: True
149+
BATCH_SIZE: 1
150+
NUM_WORKERS: 4
151+
SAMPLER:
152+
NAME: LoopSampler
153+
TRANSFORMS:
154+
- NAME: LoadImageFromFile
155+
RGB_ORDER: RGB
156+
BACKEND: pillow
157+
- NAME: FlexibleResize
158+
INTERPOLATION: bilinear
159+
SIZE: [ 1024, 1024 ]
160+
INPUT_KEY: [ 'img' ]
161+
OUTPUT_KEY: [ 'img' ]
162+
BACKEND: pillow
163+
- NAME: FlexibleCenterCrop
164+
SIZE: [ 1024, 1024 ]
165+
INPUT_KEY: [ 'img' ]
166+
OUTPUT_KEY: [ 'img' ]
167+
BACKEND: pillow
168+
- NAME: ImageToTensor
169+
INPUT_KEY: [ 'img' ]
170+
OUTPUT_KEY: [ 'img' ]
171+
BACKEND: pillow
172+
- NAME: Normalize
173+
MEAN: [ 0.5, 0.5, 0.5 ]
174+
STD: [ 0.5, 0.5, 0.5 ]
175+
INPUT_KEY: [ 'img' ]
176+
OUTPUT_KEY: [ 'image' ]
177+
BACKEND: torchvision
178+
- NAME: Select
179+
KEYS: [ 'image', 'prompt' ]
180+
META_KEYS: [ 'data_key' ]
181+
#
182+
EVAL_DATA:
183+
NAME: Text2ImageDataset
184+
MODE: eval
185+
PROMPT_FILE:
186+
PROMPT_DATA: [ "a boy wearing a jacket", "a dog running on the lawn" ]
187+
IMAGE_SIZE: [ 1024, 1024 ]
188+
FIELDS: [ "prompt" ]
189+
DELIMITER: '#;#'
190+
PROMPT_PREFIX: ''
191+
PIN_MEMORY: True
192+
BATCH_SIZE: 1
193+
NUM_WORKERS: 4
194+
TRANSFORMS:
195+
- NAME: Select
196+
KEYS: [ 'index', 'prompt' ]
197+
META_KEYS: [ 'image_size' ]
198+
#
199+
TRAIN_HOOKS:
200+
-
201+
NAME: BackwardHook
202+
PRIORITY: 0
203+
-
204+
NAME: LogHook
205+
LOG_INTERVAL: 10
206+
SHOW_GPU_MEM: True
207+
-
208+
NAME: TensorboardLogHook
209+
-
210+
NAME: CheckpointHook
211+
INTERVAL: 10000
212+
PRIORITY: 200
213+
SAVE_LAST: True
214+
SAVE_NAME_PREFIX: 'step'
215+
DISABLE_SNAPSHOT: True
216+
#
217+
EVAL_HOOKS:
218+
-
219+
NAME: ProbeDataHook
220+
PROB_INTERVAL: 100
221+
SAVE_LAST: True
222+
SAVE_NAME_PREFIX: 'step'
223+
SAVE_PROBE_PREFIX: 'image'

0 commit comments

Comments
 (0)