Support custom train loop model prediction and evaluation (#2759)

typhoonzero · web-flow · commit b5e09a8eada8 · 2020-07-29T16:19:38.000+08:00
* support custom train loop model prediction and evaluation

* update models repo

* update
diff --git a/docker/dev/build.sh b/docker/dev/build.sh
@@ -68,11 +68,11 @@ cp target/*.jar $SQLFLOW_BIN
 echo "Build model zoo ..."
 cd $SQLFLOW_BIN
 if [[ ! -d models ]]; then
-    git clone https://github.com/sql-machine-learning/models
+    git clone https://github.com/sql-machine-learning/models.git
 fi
 cd models
 git fetch origin # The residual local repo might not be on a branch.
-git checkout v0.0.5 -b v0.0.5
+git checkout v0.0.6 -b v0.0.6
 python setup.py bdist_wheel -q --dist-dir $SQLFLOW_BIN > /dev/null
 
 echo "Convert tutorials from Markdown to IPython notebooks ..."
diff --git a/go/attribute/attribute_test.go b/go/attribute/attribute_test.go
@@ -143,7 +143,7 @@ func TestParamsDocs(t *testing.T) {
 
 	a.Equal(11, len(PremadeModelParamsDocs))
 	ExtractSQLFlowModelsSymbolOnce()
-	a.Equal(20, len(PremadeModelParamsDocs))
+	a.Equal(21, len(PremadeModelParamsDocs))
 	a.Equal(len(PremadeModelParamsDocs["DNNClassifier"]), 12)
 	a.NotContains(PremadeModelParamsDocs["DNNClassifier"], "feature_columns")
 	a.Contains(PremadeModelParamsDocs["DNNClassifier"], "optimizer")
diff --git a/go/cmd/sqlflow/main_test.go b/go/cmd/sqlflow/main_test.go
@@ -636,7 +636,7 @@ func TestComplete(t *testing.T) {
 
 	p.InsertText(`RAIN `, false, true)
 	c = s.completer(*p.Document())
-	a.Equal(20, len(c))
+	a.Equal(21, len(c))
 	a.Equal("BoostedTreesClassifier", c[0].Text)
 
 	p.InsertText(`DNN`, false, true)
diff --git a/go/cmd/sqlflowserver/e2e_mysql_test.go b/go/cmd/sqlflowserver/e2e_mysql_test.go
@@ -26,6 +26,34 @@ import (
 	server "sqlflow.org/sqlflow/go/sqlflowserver"
 )
 
+func caseCustomLoopModel(t *testing.T) {
+	a := assert.New(t)
+	trainSQL := fmt.Sprintf(`SELECT * FROM %s
+TO TRAIN sqlflow_models.CustomClassifier
+LABEL class
+INTO sqlflow_models.custom_loop_model;`, caseTrainTable)
+	_, _, _, err := connectAndRunSQL(trainSQL)
+	if err != nil {
+		a.Fail("Run trainSQL error: %v", err)
+	}
+	predSQL := fmt.Sprintf(`SELECT * FROM %s
+TO PREDICT sqlflow_models.custom_loop_model_pred_result.class
+USING sqlflow_models.custom_loop_model;`, caseTrainTable)
+	_, _, _, err = connectAndRunSQL(predSQL)
+	if err != nil {
+		a.Fail("Run trainSQL error: %v", err)
+	}
+	evalSQL := fmt.Sprintf(`SELECT * FROM %s
+TO EVALUATE sqlflow_models.custom_loop_model
+WITH validation.metrics="Accuracy"
+LABEL class
+INTO sqlflow_models.custom_loop_model_eval_result;`, caseTrainTable)
+	_, _, _, err = connectAndRunSQL(evalSQL)
+	if err != nil {
+		a.Fail("Run trainSQL error: %v", err)
+	}
+}
+
 func TestEnd2EndMySQL(t *testing.T) {
 	if os.Getenv("SQLFLOW_TEST_DB") != "mysql" {
 		t.Skip("Skipping mysql tests")
@@ -54,6 +82,7 @@ func TestEnd2EndMySQL(t *testing.T) {
 	t.Run("CaseCoverage", CaseCoverageMysql)
 	t.Run("CaseTrainWithCommaSeparatedLabel", CaseTrainWithCommaSeparatedLabel)
 	t.Run("CaseTrainCustomModelFunctional", CaseTrainCustomModelFunctional)
+	t.Run("CaseCustomLoopModel", caseCustomLoopModel)
 	t.Run("CaseSQLByPassLeftJoin", CaseSQLByPassLeftJoin)
 	t.Run("CaseTrainRegression", caseTrainRegression)
 
diff --git a/python/runtime/tensorflow/evaluate.py b/python/runtime/tensorflow/evaluate.py
@@ -60,7 +60,8 @@ def evaluate(datasource,
         result_metrics = estimator_evaluate(estimator, eval_dataset,
                                             validation_metrics)
     else:
-        keras_model = init_model_with_feature_column(estimator, model_params)
+        keras_model = init_model_with_feature_column(estimator_cls,
+                                                     model_params)
         keras_model_pkg = sys.modules[estimator_cls.__module__]
         result_metrics = keras_evaluate(keras_model, eval_dataset, save,
                                         keras_model_pkg, validation_metrics)
@@ -119,10 +120,12 @@ def keras_evaluate(keras_model, eval_dataset_fn, save, keras_model_pkg,
         else:
             # default
             keras_metrics = metrics.get_keras_metrics(["Accuracy"])
+    has_custom_evaluate_func = hasattr(keras_model, 'sqlflow_evaluate_loop')
 
-    # compile the model with default arguments only for evaluation (run forward
-    # only).
-    keras_model.compile(loss=keras_model_pkg.loss, metrics=keras_metrics)
+    if not has_custom_evaluate_func:
+        # compile the model with default arguments only for evaluation
+        # (run forward only).
+        keras_model.compile(loss=keras_model_pkg.loss, metrics=keras_metrics)
 
     eval_dataset = eval_dataset_fn()
 
@@ -131,12 +134,17 @@ def get_features(sample, label):
 
     eval_dataset_x = eval_dataset.map(get_features)
 
-    one_batch = next(iter(eval_dataset_x))
-    # NOTE: must run predict one batch to initialize parameters
-    # see: https://www.tensorflow.org/alpha/guide/keras/saving_and_serializing#saving_subclassed_models # noqa: E501
-    keras_model.predict_on_batch(one_batch)
-    keras_model.load_weights(save)
-    result = keras_model.evaluate(eval_dataset)
+    if has_custom_evaluate_func:
+        result = keras_model.sqlflow_evaluate_loop(eval_dataset,
+                                                   validation_metrics)
+    else:
+        one_batch = next(iter(eval_dataset_x))
+        # NOTE: must run predict one batch to initialize parameters
+        # see: https://www.tensorflow.org/alpha/guide/keras/saving_and_serializing#saving_subclassed_models # noqa: E501
+        keras_model.predict_on_batch(one_batch)
+        keras_model.load_weights(save)
+        result = keras_model.evaluate(eval_dataset)
+
     assert (len(result) == len(validation_metrics) + 1)
     result_metrics = dict()
     for idx, m in enumerate(["loss"] + validation_metrics):
diff --git a/python/runtime/tensorflow/predict.py b/python/runtime/tensorflow/predict.py
@@ -66,14 +66,16 @@ def eval_input_fn(batch_size, cache=False):
             dataset = dataset.cache()
         return dataset
 
-    # NOTE: always use batch_size=1 when predicting to get the pairs of
-    #       features and predict results to insert into result table.
-    pred_dataset = eval_input_fn(1)
-    one_batch = next(iter(pred_dataset))
-    # NOTE: must run predict one batch to initialize parameters. See:
-    # https://www.tensorflow.org/alpha/guide/keras/saving_and_serializing#saving_subclassed_models  # noqa: E501
-    classifier.predict_on_batch(one_batch)
-    classifier.load_weights(save)
+    if not hasattr(classifier, 'sqlflow_predict_one'):
+        # NOTE: load_weights should be called by keras models only.
+        # NOTE: always use batch_size=1 when predicting to get the pairs of
+        #       features and predict results to insert into result table.
+        pred_dataset = eval_input_fn(1)
+        one_batch = next(iter(pred_dataset))
+        # NOTE: must run predict one batch to initialize parameters. See:
+        # https://www.tensorflow.org/alpha/guide/keras/saving_and_serializing#saving_subclassed_models  # noqa: E501
+        classifier.predict_on_batch(one_batch)
+        classifier.load_weights(save)
     pred_dataset = eval_input_fn(1, cache=True).make_one_shot_iterator()
 
     column_names = selected_cols[:]
@@ -89,7 +91,10 @@ def eval_input_fn(batch_size, cache=False):
                                hdfs_namenode_addr, hive_location, hdfs_user,
                                hdfs_pass) as w:
         for features in pred_dataset:
-            result = classifier.predict_on_batch(features)
+            if hasattr(classifier, 'sqlflow_predict_one'):
+                result = classifier.sqlflow_predict_one(features)
+            else:
+                result = classifier.predict_on_batch(features)
             # FIXME(typhoonzero): determine the predict result is
             # classification by adding the prediction result together
             # to see if it is close to 1.0.
diff --git a/python/runtime/tensorflow/train_keras.py b/python/runtime/tensorflow/train_keras.py
@@ -99,8 +99,19 @@ def keras_train_and_save(estimator, model_params, save, is_pai,
                          verbose, metric_names, validation_steps,
                          load_pretrained_model, model_meta):
     print("Start training using keras model...")
-    classifier, has_none_optimizer = keras_compile(estimator, model_params,
-                                                   save, metric_names)
+    try:
+        classifier, has_none_optimizer = keras_compile(estimator, model_params,
+                                                       save, metric_names)
+    except Exception as e:
+        if hasattr(estimator, "sqlflow_train_loop"):
+            sys.stderr.write(
+                "compile keras model failed, ignoring this error since the model seems to defined sqlflow_train_loop."
+            )
+            classifier = init_model_with_feature_column(
+                estimator, model_params, has_none_optimizer=True)
+            has_none_optimizer = True
+        else:
+            raise e
 
     train_dataset = train_dataset_fn()
     if val_dataset_fn is not None:
@@ -165,9 +176,11 @@ def keras_train_compiled(classifier, save, train_dataset, validate_dataset,
         model_meta["evaluation"] = val_metrics
 
     try:
-        classifier.save_weights(save, save_format="h5")
         # write model metadata to model_meta.json
         save_model_metadata("model_meta.json", model_meta)
+        # NOTE: classifier.save_weights may fail if the model has sqlflow_train_loop
+        # and does not have Keras layers defined. So save metadata before calling save_weights.
+        classifier.save_weights(save, save_format="h5")
     except:  # noqa: E722
         if has_none_optimizer:
             warnings.warn("Saving model with None optimizer fails")
diff --git a/scripts/test/prepare.sh b/scripts/test/prepare.sh
@@ -44,7 +44,7 @@ python -m pip install --quiet \
 
 git clone https://github.com/sql-machine-learning/models.git
 (cd models && git fetch origin && \
-git checkout v0.0.5 -b v0.0.5 && \
+git checkout v0.0.6 -b v0.0.6 && \
 python setup.py install)
 
 # 3. install java parser