Merge branch 'develop', version v4.1.0

deeperlearner · deeperlearner · commit 0c6cc065019e · 2021-09-12T12:49:21.000+08:00
Add new run_args: `--mp` and `--n_jobs`
diff --git a/logger/logger.py b/logger/logger.py
@@ -19,12 +19,12 @@ def setup_logging(
     if log_config.is_file():
         config = read_json(log_config)
         # modify logging paths based on run config
-        for _, handler in config["handlers"].items():
-            if "filename" in handler:
-                if filename is None:
-                    handler["filename"] = str(save_dir / handler["filename"])
+        for handler_k, handler_v in config["handlers"].items():
+            if "filename" in handler_v:
+                if filename is None or handler_k != "info_file_handler":
+                    handler_v["filename"] = str(save_dir / handler_v["filename"])
                 else:
-                    handler["filename"] = str(save_dir / filename)
+                    handler_v["filename"] = str(save_dir / filename)
 
         logging.config.dictConfig(config)
     else:
diff --git a/logger/logger_config_mp.json b/logger/logger_config_mp.json
@@ -30,6 +30,6 @@
     },
     "root": {
         "level": "INFO",
-        "handlers": ["info_file_handler"]
+        "handlers": ["warning_file_handler"]
     }
 }
diff --git a/mains/__init__.py b/mains/__init__.py
@@ -1,3 +1,5 @@
+from .bootstrap import bootstrapping
 from .cross_validation import Cross_Valid
-from .train import train
+from .multiprocess import Multiprocessor
+from .train import train, train_mp
 from .test import test
diff --git a/mains/bootstrap.py b/mains/bootstrap.py
diff --git a/mains/main.py b/mains/main.py
@@ -9,7 +9,7 @@
 sys.path.insert(1, os.path.join(sys.path[0], ".."))
 from logger import get_logger
 from parse_config import ConfigParser
-from mains import train, test
+from mains import train, train_mp, test
 from utils import msg_box, consuming_time
 
 
@@ -18,13 +18,13 @@
 
     # crutial args executed in scripts
     run_args = args.add_argument_group("run_args")
+    run_args.add_argument("--optuna", action="store_true")
+    run_args.add_argument("--mp", action="store_true", help="multiprocessing")
+    run_args.add_argument("--n_jobs", default=2, type=int, help="number of jobs running at the same time")
     run_args.add_argument("-c", "--config", default="configs/config.json", type=str)
     run_args.add_argument("--mode", default="train", type=str)
-    run_args.add_argument("--optuna", action="store_true")
     run_args.add_argument("--resume", default=None, type=str)
     run_args.add_argument("--run_id", default=None, type=str)
-    run_args.add_argument("--log_name", default=None, type=str)
-    run_args.add_argument("--mp", action="store_true", help="multiprocessing")
 
     # custom cli options to modify configuration from default values given in json file.
     mod_args = args.add_argument_group("mod_args")
@@ -55,6 +55,7 @@
     test_args.add_argument("--output_path", default=None, type=str)
 
     config = ConfigParser.from_args(args, options)
+    config.set_log()
     logger = get_logger("main")
     mode = config.run_args.mode
     msg = msg_box(mode.upper())
@@ -66,6 +67,8 @@
             objective = config.init_obj(["optuna"])
             n_trials = config["optuna"]["n_trials"]
 
+            config.set_log(log_name="optuna.log")
+            logger = get_logger("optuna")
             optuna.logging.enable_propagation()
             optuna.logging.disable_default_handler()
             direction = "maximize" if max_min == "max" else "minimize"
@@ -81,6 +84,9 @@
             msg += f"\nBest hyperparameters: {study.best_params}"
             logger.info(msg)
         else:
-            train(config)
+            if config.run_args.mp:
+                train_mp(config)
+            else:
+                train(config)
     elif mode == "test":
         test(config)
diff --git a/mains/multiprocess.py b/mains/multiprocess.py
@@ -0,0 +1,29 @@
+from multiprocessing import Process, Queue
+
+
+# ref: https://stackoverflow.com/a/45829852/8380054
+class Multiprocessor():
+
+    def __init__(self):
+        self.processes = []
+        self.queue = Queue()
+
+    @staticmethod
+    def _wrapper(func, queue, args, kwargs):
+        ret = func(*args, **kwargs)
+        queue.put(ret)
+
+    def run(self, func, *args, **kwargs):
+        args2 = [func, self.queue, args, kwargs]
+        p = Process(target=self._wrapper, args=args2)
+        self.processes.append(p)
+        p.start()
+
+    def wait(self):
+        rets = []
+        for p in self.processes:
+            ret = self.queue.get()
+            rets.append(ret)
+        for p in self.processes:
+            p.join()
+        return rets
diff --git a/mains/test.py b/mains/test.py
@@ -8,11 +8,10 @@
 from tqdm import tqdm
 
 from logger import get_logger
-from mains import Cross_Valid
+from mains import Cross_Valid, bootstrapping
 import models.metric as module_metric
 from models.metric import MetricTracker
 from utils import prepare_device, get_by_path, msg_box, consuming_time
-from utils.bootstrap import bootstrapping
 
 # fix random seeds for reproducibility
 SEED = 123
diff --git a/mains/train.py b/mains/train.py
@@ -6,7 +6,7 @@
 from sklearn.utils.class_weight import compute_class_weight
 
 from logger import get_logger
-from mains import Cross_Valid
+from mains import Cross_Valid, Multiprocessor
 import models.metric as module_metric
 from utils import (
     prepare_device,
@@ -19,10 +19,36 @@
 if is_apex_available():
     from apex import amp
 
-logger = get_logger("train")
 
+def train_mp(config):
+    k_fold = config["cross_validation"]["k_fold"]
+    do_mp = config.run_args.mp
+    n_jobs = config.run_args.n_jobs
+    assert n_jobs <= k_fold, "n_jobs can not be more than k_fold."
+
+    results = []
+    fold_idx = 0
+    while fold_idx < k_fold:
+        mp = Multiprocessor()
+        job_idx = 0
+        while job_idx < n_jobs and fold_idx < k_fold:
+            mp.run(train, config, do_mp, fold_idx)
+            job_idx += 1
+            fold_idx += 1
+        ret = mp.wait()  # get results of processes
+        results.extend(ret)
+
+    return results
+
+
+def train(config, do_mp=False, fold_idx=0):
+    # different logging when multiprocessing
+    if do_mp:
+        config.set_log(log_name=f"fold_{fold_idx}.log")
+    else:
+        config.set_log()
+    logger = get_logger("train")
 
-def train(config):
     # setup GPU device if available, move model into configured device
     device, device_ids = prepare_device(config["n_gpu"])
 
@@ -83,12 +109,14 @@ def train(config):
     k_fold = config["cross_validation"]["k_fold"]
 
     results = pd.DataFrame()
-    Cross_Valid.create_CV(repeat_time, k_fold)
+    Cross_Valid.create_CV(repeat_time, k_fold, fold_idx=fold_idx)
     start = time.time()
     for t in range(repeat_time):
         if k_fold > 1:  # cross validation enabled
             train_datasets["data"].split_cv_indexes(k_fold)
-        for k in range(k_fold):
+        # 1 loop for multi-process; k_fold loops for single-process
+        k_time = 1 if do_mp else k_fold
+        for k in range(k_time):
             # data_loaders
             train_data_loaders = dict()
             valid_data_loaders = dict()
@@ -165,7 +193,7 @@ def train(config):
             train_log = trainer.train()
             results = pd.concat((results, train_log), axis=1)
 
-            if k_fold > 1:
+            if k_time > 1:
                 Cross_Valid.next_fold()
 
         if repeat_time > 1:
@@ -184,4 +212,7 @@ def train(config):
 
     logger.info(msg)
 
+    max_min, mnt_metric = config["trainer"]["kwargs"]["monitor"].split()
+    result = result.at[mnt_metric, "mean"]
+
     return result
diff --git a/mains/train_mp.py b/mains/train_mp.py
diff --git a/parse_config.py b/parse_config.py
@@ -31,6 +31,8 @@ class to parse configuration json file. Handles hyperparameters for training,
         if modification is None:
             modification = {}
         modification.update(self.mod_args)
+        if self.run_args.mp:  # lower trainer verbosity when multiprocessing
+            modification.update({"trainer;kwargs;verbosity": 0})
         self._config = _update_config(config, modification)
 
         # test_args: self.test_args
@@ -49,20 +51,9 @@ class to parse configuration json file. Handles hyperparameters for training,
             ensure_dir(dir_path)
             self.save_dir[dir_name] = dir_path
 
-        log_config = {}
         if self.run_args.mode == "train":
-            if self.run_args.mp:  # multiprocessing
-                log_config.update({"log_config": "logger/logger_config_mp.json"})
             self.backup()
 
-        # configure logging module
-        setup_logging(
-            self.save_dir["log"],
-            root_dir=self.root_dir,
-            filename=self.run_args.log_name,
-            **log_config
-        )
-
     @classmethod
     def from_args(cls, parser, options=""):
         """
@@ -133,6 +124,14 @@ def __getitem__(self, name):
         """Access items like ordinary dict."""
         return self.config[name]
 
+    # configure logging module
+    def set_log(self, log_name=None):
+        setup_logging(
+            self.save_dir["log"],
+            root_dir=self.root_dir,
+            filename=log_name,
+        )
+
     # read-only attributes
     @property
     def config(self):
diff --git a/scripts/examples.sh b/scripts/examples.sh
@@ -3,15 +3,15 @@
 #  PyTorch Template
 # ------------------
 # Repository    : https://github.com/deeperlearner/pytorch-template
-VERSION="v4.0.0"
+VERSION="v4.1.0"
 
 
 # This script run train and test for examples
 usage() { echo "Usage: $0 [-dpr]" 1>&2; exit 1; }
 
 # record execution time to log
 time_log() {
-    RUNNING_TIME=$(date +%T -d "1/1 + $SECONDS sec")
+    RUNNING_TIME=$(($SECONDS/86400))" days, "$(date +%T -d "1/1 + $SECONDS sec")
     echo -e "---------------------------------" | tee -a $LOG_FILE
     echo -e "$TYPE running time: $RUNNING_TIME" | tee -a $LOG_FILE
     let "TOTAL_SECONDS += $SECONDS"
@@ -34,7 +34,7 @@ while getopts "dpr" flag; do
       EXP="Adult_logistic"
       RUN_ID=${VERSION}
       # use optuna to find the best h.p.
-      python3 mains/main.py -c "configs/$CONFIG.json" --mode train --optuna --run_id $RUN_ID --log_name "optuna.log" --name $EXP
+      python3 mains/main.py --optuna -c "configs/$CONFIG.json" --mode train --run_id $RUN_ID --name $EXP
       python3 mains/main.py -c "saved/$EXP/$RUN_ID/best_hp/${CONFIG##*/}.json" --mode test \
           --resume "saved/$EXP/$RUN_ID/best_hp/model_best.pth" --run_id $RUN_ID
 
@@ -43,6 +43,13 @@ while getopts "dpr" flag; do
       # python3 mains/main.py -c "saved/$EXP/$RUN_ID/${CONFIG##*/}.json" --mode test \
       #     --resume "saved/$EXP/$RUN_ID/model/model_best.pth" --run_id $RUN_ID --bootstrapping
 
+      CONFIG="examples/Adult_logistic"
+      EXP="Adult_logistic_mp"
+      RUN_ID=${VERSION}
+      python3 mains/main.py --optuna --mp -c "configs/$CONFIG.json" --mode train --run_id $RUN_ID --name $EXP
+      python3 mains/main.py -c "saved/$EXP/$RUN_ID/best_hp/${CONFIG##*/}.json" --mode test \
+          --resume "saved/$EXP/$RUN_ID/best_hp/model_best.pth" --run_id $RUN_ID
+
       time_log
       ;;
     p)
@@ -64,7 +71,7 @@ while getopts "dpr" flag; do
       python3 mains/main.py -c "configs/$CONFIG.json" --mode train --run_id $RUN_ID
       python3 mains/main.py -c "saved/$EXP/$RUN_ID/${CONFIG##*/}.json" --mode test \
           --resume "saved/$EXP/$RUN_ID/model/model_best.pth" --run_id $RUN_ID
-        
+
       # ImageNet_VGG16 (need to download ImageNet dataset)
       CONFIG="examples/ImageNet_VGG16"
       EXP="ImageNet_VGG16"
@@ -73,15 +80,15 @@ while getopts "dpr" flag; do
       # no test data
       # python3 mains/main.py -c "saved/$EXP/$RUN_ID/${CONFIG##*/}.json" --mode test \
       #     --resume "saved/$EXP/$RUN_ID/model/model_best.pth" --run_id $RUN_ID
-        
+
       # Adult_logistic cv by single-process
       CONFIG="examples/Adult_logistic"
       EXP="Adult_logistic"
       RUN_ID=${VERSION}
       python3 mains/main.py -c "configs/$CONFIG.json" --mode train --run_id $RUN_ID
       python3 mains/main.py -c "saved/$EXP/$RUN_ID/${CONFIG##*/}.json" --mode test \
           --resume "saved/$EXP/$RUN_ID/model/model_best.pth" --run_id $RUN_ID
-        
+
       # Not implemented yet
       # I'm going to try `import torch.multiprocessing as mp`
       ## Adult_logistic cv by multi-process
@@ -97,6 +104,6 @@ while getopts "dpr" flag; do
   esac
 done
 
-TOTAL_TIME=$(date +%T -d "1/1 + $TOTAL_SECONDS sec")
+TOTAL_TIME=$(($TOTAL_SECONDS/86400))" days, "$(date +%T -d "1/1 + $TOTAL_SECONDS sec")
 echo -e "---------------------------------" | tee -a $LOG_FILE
 echo -e "total running time: $TOTAL_TIME" | tee -a $LOG_FILE
diff --git a/scripts/run.sh b/scripts/run.sh
@@ -11,7 +11,7 @@ usage() { echo "Usage: $0 [-dpr]" 1>&2; exit 1; }
 
 # record execution time to log
 time_log() {
-    RUNNING_TIME=$(date +%T -d "1/1 + $SECONDS sec")
+    RUNNING_TIME=$(($SECONDS/86400))" days, "$(date +%T -d "1/1 + $SECONDS sec")
     echo -e "---------------------------------" | tee -a $LOG_FILE
     echo -e "$TYPE running time: $RUNNING_TIME" | tee -a $LOG_FILE
     let "TOTAL_SECONDS += $SECONDS"
@@ -34,7 +34,7 @@ while getopts "dpr" flag; do
       EXP="dataset_model"
       RUN_ID=${VERSION}
       # search for best hp
-      python3 mains/main.py -c "configs/$CONFIG.json" --mode train --optuna --run_id $RUN_ID --log_name "optuna.log" --name $EXP
+      python3 mains/main.py --optuna -c "configs/$CONFIG.json" --mode train --run_id $RUN_ID --name $EXP
       python3 mains/main.py -c "saved/$EXP/$RUN_ID/best_hp/${CONFIG##*/}.json" --mode test \
           --resume "saved/$EXP/$RUN_ID/best_hp/model_best.pth" --run_id $RUN_ID
 
@@ -69,6 +69,6 @@ while getopts "dpr" flag; do
   esac
 done
 
-TOTAL_TIME=$(date +%T -d "1/1 + $TOTAL_SECONDS sec")
+TOTAL_TIME=$(($TOTAL_SECONDS/86400))" days, "$(date +%T -d "1/1 + $TOTAL_SECONDS sec")
 echo -e "---------------------------------" | tee -a $LOG_FILE
 echo -e "total running time: $TOTAL_TIME" | tee -a $LOG_FILE
diff --git a/tune/objective.py b/tune/objective.py

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,6 @@`
`30`	`30`	`},`
`31`	`31`	`"root": {`
`32`	`32`	`"level": "INFO",`
`33`		`- "handlers": ["info_file_handler"]`
	`33`	`+ "handlers": ["warning_file_handler"]`
`34`	`34`	`}`
`35`	`35`	`}`