add sam predict

Signed-off-by: hsj576 <[email protected]> add segformer example Signed-off-by: hsj576 <[email protected]> add segformer example Signed-off-by: hsj576 <[email protected]> add description for workflow of lifelong training Signed-off-by: hsj576 <[email protected]> update Readme for robot-sam Signed-off-by: hsj576 <[email protected]> fix pylint issues Signed-off-by: hsj576 <[email protected]>
kubeedge · Oct 31, 2023 · a497002 · a497002
1 parent e00151d
commit a497002
Show file tree

Hide file tree

Showing 24 changed files with 1,555 additions and 54 deletions.
diff --git a/core/storymanager/rank/rank.py b/core/storymanager/rank/rank.py
@@ -196,7 +196,7 @@ def _get_selected(self, test_cases, test_results) -> pd.DataFrame:
         all_df = copy.deepcopy(self.all_df)
         selected_df = pd.DataFrame(all_df, columns=header)
         selected_df = selected_df.drop_duplicates(header[:-2])
-
+        # pylint: disable=E1136
         paradigms = self.selected_dataitem.get("paradigms")
         if paradigms != ["all"]:
             selected_df = selected_df.loc[selected_df["paradigm"].isin(paradigms)]

diff --git a/core/testcasecontroller/algorithm/paradigm/lifelong_learning/lifelong_learning.py b/core/testcasecontroller/algorithm/paradigm/lifelong_learning/lifelong_learning.py
@@ -24,7 +24,7 @@
 from core.testcasecontroller.metrics import get_metric_func
 from core.common.utils import get_file_format, is_local_dir
 
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 
 class LifelongLearning(ParadigmBase):
     # pylint: disable=too-many-locals
@@ -162,6 +162,102 @@ def run(self):
                 #BWT, FWT = self.compute(key, matrix)
                 self.system_metric_info[SystemMetricType.Matrix.value][key] = matrix
 
+        elif mode == 'hard-example-mining':
+            dataset_files = self._split_dataset(splitting_dataset_times=rounds)
+            # pylint: disable=C0103
+            # pylint: disable=C0206
+            # pylint: disable=C0201
+            # pylint: disable=W1203
+            my_dict = {}
+            for r in range(rounds + 1):
+                train_dataset_file, eval_dataset_file = dataset_files[r]
+                if r == 0:
+                    self.cloud_task_index = self._train(self.cloud_task_index,
+                                                    train_dataset_file,
+                                                    r)
+
+                    self.edge_task_index, tasks_detail, res = self.my_eval(
+                                                    self.cloud_task_index,
+                                                    eval_dataset_file,
+                                                    r)
+
+                else:
+                    infer_dataset_file, eval_dataset_file = dataset_files[r]
+                    inference_results, unseen_task_train_samples = self._inference(
+                                                    self.cloud_task_index,
+                                                    infer_dataset_file,
+                                                    r)
+                    samples_transfer_ratio_info.append((inference_results,
+                                                unseen_task_train_samples.x))
+
+                    # If no unseen task samples in the this round, starting the next round
+                    if len(unseen_task_train_samples.x) <= 0:
+                        continue
+
+                    self.cloud_task_index = self._train(self.cloud_task_index,
+                                                        unseen_task_train_samples,
+                                                        r)
+
+                tmp_dict = {}
+                for j in range(1, rounds+1):
+                    _, eval_dataset_file = dataset_files[j]
+                    self.edge_task_index, tasks_detail, res = self.my_eval(
+                                                    self.cloud_task_index,
+                                                    eval_dataset_file,
+                                                    r)
+                    LOGGER.info(f"train from round {r}")
+                    LOGGER.info(f"test round {j}")
+                    LOGGER.info(f"all scores: {res}")
+                    score_list = tmp_dict.get("all", ['' for i in range(rounds)])
+                    score_list[j-1] = res
+                    tmp_dict["all"] = score_list
+                    task_avg_score = {'accuracy':0.0}
+                    i = 0
+                    for detail in tasks_detail:
+                        i += 1
+                        scores = detail.scores
+                        entry = detail.entry
+                        LOGGER.info(f"{entry} scores: {scores}")
+                        task_avg_score['accuracy'] += scores['accuracy']
+                        score_list = tmp_dict.get(entry, ['' for i in range(rounds)])
+                        score_list[j-1] = scores
+                        tmp_dict[entry] = score_list
+                    task_avg_score['accuracy'] = task_avg_score['accuracy']/i
+                    score_list = tmp_dict.get("task_avg", [{'accuracy':0.0} for i in range(rounds)])
+                    score_list[j-1] = task_avg_score
+                    tmp_dict["task_avg"] = score_list
+
+                for key in tmp_dict.keys():
+                    scores_list = my_dict.get(key, [])
+                    scores_list.append(tmp_dict[key])
+                    my_dict[key] = scores_list
+                    LOGGER.info(f"{key} scores: {scores_list}")
+
+
+            self.edge_task_index, tasks_detail, res = self.my_eval(self.cloud_task_index,
+                                                      self.dataset.test_url,
+                                                      rounds + 1)
+            task_avg_score = {'accuracy':0.0}
+            i = 0
+            for detail in tasks_detail:
+                i += 1
+                scores = detail.scores
+                entry = detail.entry
+                LOGGER.info(f"{entry} scores: {scores}")
+                task_avg_score['accuracy'] += scores['accuracy']
+            task_avg_score['accuracy'] = task_avg_score['accuracy']/i
+            self.system_metric_info[SystemMetricType.Task_Avg_Acc.value] = task_avg_score
+            LOGGER.info(task_avg_score)
+            test_res, unseen_task_train_samples = self._inference(self.edge_task_index,
+                                                              self.dataset.test_url,
+                                                              "test")
+            for key in my_dict.keys():
+                LOGGER.info(f"{key} scores: {my_dict[key]}")
+            for key in my_dict.keys():
+                matrix = my_dict[key]
+                #BWT, FWT = self.compute(key, matrix)
+                self.system_metric_info[SystemMetricType.Matrix.value][key] = matrix
+
         elif mode != 'multi-inference':
             dataset_files = self._split_dataset(splitting_dataset_times=rounds)
             # pylint: disable=C0103
@@ -214,6 +310,7 @@ def _inference(self, edge_task_index, data_index_file, rounds):
             os.makedirs(unseen_task_saved_dir)
 
         os.environ["INFERENCE_RESULT_DIR"] = output_dir
+        os.environ["OUTPUT_URL"] = output_dir
         os.environ["MODEL_URLS"] = f"{edge_task_index}"
 
         inference_dataset = self.dataset.load_data(data_index_file, "eval",
@@ -234,7 +331,7 @@ def _inference(self, edge_task_index, data_index_file, rounds):
         for i, _ in enumerate(inference_dataset.x):
             data = BaseDataSource(data_type="test")
             data.x = inference_dataset.x[i:(i + 1)]
-            res, is_unseen_task, _ = job.inference(data, **kwargs)
+            res, is_unseen_task, _ = job.inference_2(data, **kwargs)
             inference_results.append(res)
             if is_unseen_task:
                 unseen_tasks.append(inference_dataset.x[i])
@@ -257,7 +354,7 @@ def _train(self, cloud_task_index, train_dataset, rounds):
 
         os.environ["CLOUD_KB_INDEX"] = cloud_task_index
         os.environ["OUTPUT_URL"] = train_output_dir
-        if rounds <= 1:
+        if rounds < 1:
             os.environ["HAS_COMPLETED_INITIAL_TRAINING"] = 'False'
         else:
             os.environ["HAS_COMPLETED_INITIAL_TRAINING"] = 'True'

diff --git a/core/testenvmanager/dataset/dataset.py b/core/testenvmanager/dataset/dataset.py
@@ -168,6 +168,12 @@ def split_dataset(self, dataset_url, dataset_format, ratio, method="default",
                                               output_dir=output_dir,
                                               times=times)
 
+        if method == "hard-example_splitting":
+            return self._hard_example_splitting(dataset_url, dataset_format, ratio,
+                                              data_types=dataset_types,
+                                              output_dir=output_dir,
+                                              times=times)
+
         raise ValueError(f"dataset splitting method({method}) is not supported,"
                          f"currently, method supports 'default'.")
 
@@ -320,6 +326,44 @@ def _city_splitting(self, data_file, data_format, ratio,
 
         return data_files
 
+    def _hard_example_splitting(self, data_file, data_format, ratio,
+                              data_types=None, output_dir=None, times=1):
+        if not data_types:
+            data_types = ("train", "eval")
+
+        if not output_dir:
+            output_dir = tempfile.mkdtemp()
+
+        all_data = self._read_data_file(data_file, data_format)
+
+        data_files = []
+
+        all_num = len(all_data)
+        step = int(all_num / (times*2))
+        data_files.append((
+            self._get_dataset_file(all_data[:int((all_num * ratio)/2)], output_dir,
+                                       data_types[0], 0, data_format),
+            self._get_dataset_file(all_data[int((all_num * ratio)/2):int(all_num/2)], output_dir,
+                                       data_types[1], 0, data_format)))
+        index = 1
+        while index <= times:
+            if index == times:
+                new_dataset = all_data[int(all_num/2)+step*(index-1):]
+            else:
+                new_dataset = all_data[int(all_num/2)+step*(index-1): int(all_num/2)+step*index]
+
+            new_num = len(new_dataset)
+
+            data_files.append((
+                self._get_dataset_file(new_dataset[:int(new_num * ratio)], output_dir,
+                                       data_types[0], index, data_format),
+                self._get_dataset_file(new_dataset[int(new_num * ratio):], output_dir,
+                                       data_types[1], index, data_format)))
+
+            index += 1
+
+        return data_files
+
     @classmethod
     def load_data(cls, file: str, data_type: str, label=None, use_raw=False, feature_process=None):
         """

diff --git a/...ption Systems Based on Edge-Cloud Collaboration with Large Foundation Models.md b/...ption Systems Based on Edge-Cloud Collaboration with Large Foundation Models.md
@@ -63,7 +63,7 @@ The overall workflow of the system is as follows:
 
 ![plugin-workflow](images/plugin-workflow.jpg)
 
-The Hard Example Mining Module will be implemented in the unknown sample recognition module of Ianvs. The Edge Inference Module will be implemented in the known sample inference module of Ianvs' edge-side knowledge management. The Cloud Inference Module will be implemented in the unknown task processing module of Ianvs' cloud-side knowledge management.
+The Hard Example Mining Module will be implemented in the unknown sample recognition module of Ianvs. The Edge Inference Module will be implemented in the known sample inference module of Ianvs' edge-side knowledge management. The Cloud Inference Module will be implemented in the unknown sample inference module. The Liflelong Training module will be implemented in the unknown task processing module of Ianvs' cloud-side knowledge management.
 
 ### 3.2 Implementation of SAM-based Semantic Segmentation
 

diff --git a/docs/proposals/algorithms/lifelong-learning/images/joint-inference.jpg b/docs/proposals/algorithms/lifelong-learning/images/joint-inference.jpg
diff --git a/docs/proposals/algorithms/lifelong-learning/images/plugin-workflow.jpg b/docs/proposals/algorithms/lifelong-learning/images/plugin-workflow.jpg
diff --git a/examples/resources/third_party/sedna-0.4.1-py3-none-any.whl b/examples/resources/third_party/sedna-0.4.1-py3-none-any.whl
diff --git a/examples/robot-sam/lifelong_learning_bench/README.md b/examples/robot-sam/lifelong_learning_bench/README.md
@@ -14,7 +14,7 @@ Before using Ianvs, you might want to have the device ready:
 - Python 3.6+ installed
 
 
-In this example, we are using the Linux platform with Python 3.8.5. If you are using Windows, most steps should still apply but a few like commands and package requirements might be different. 
+In this example, we are using the Linux platform with Python 3.9. If you are using Windows, most steps should still apply but a few like commands and package requirements might be different. 
 
 ## Step 1. Ianvs Preparation
 
@@ -70,19 +70,60 @@ we have done that for you and the interested readers can refer to [testenv.yaml]
 
 Related algorithm is also ready in this quick start. 
 ``` shell
-export PYTHONPATH=$PYTHONPATH:/ianvs/project/ianvs/examples/robot/lifelong_learning_bench/testalgorithms/rfnet/RFNet
+export PYTHONPATH=$PYTHONPATH:/ianvs/project/ianvs/examples/robot-sam/lifelong_learning_bench/testalgorithms/rfnet/RFNet
 ```
 
 The URL address of this algorithm then should be filled in the configuration file ``algorithm.yaml``. In this quick
 start, we have done that for you and the interested readers can refer to [algorithm.yaml](https://ianvs.readthedocs.io/en/latest/guides/how-to-test-algorithms.html#step-1-test-environment-preparation) for more details.
 
+In this example, we use [SAM model](https://segment-anything.com/) as the cloud large model. So, we need to install SAM by the following instructions:
+
+~~~bash
+cd /ianvs/project
+git clone https://github.com/facebookresearch/segment-anything.git
+cd segment-anything 
+python -m pip install -e .
+~~~
+
+Then, we need to download the pretrained SAM model:
+
+~~~bash
+wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
+~~~
+
+In order to save the inference result, we need to install mmcv and mmdetection by the following instructions:
+
+~~~bash
+python -m pip install https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/mmcv-2.0.0-cp39-cp39-manylinux1_x86_64.whl
+cd /ianvs/project
+git clone https://github.com/hsj576/mmdetection.git
+cd mmdetection
+python -m pip install -v -e .
+~~~
+
+In case that your computer couldn't run SAM model, we prepare a cache for all the SAM inference results in Cloud-Robotics dataset. You could download the cache from [this link](https://pan.baidu.com/s/1oGGBa8TjZn0ccbznQsl48g?pwd=wpp1) and put the cache file in "/ianvs/project/":
+
+~~~bash
+cp cache.pickle /ianvs/project
+~~~
+
+ By using the cache, you could simulate the edge-cloud joint inference without installing SAM model.
+
+Besides that, we also provided you a pretrained RFNet model in [this link](https://pan.baidu.com/s/1h8JnUgr1hfx5QnaFLLkMAg?pwd=jts4), you could use it if you don't want to train the RFNet model from zero. This instruction is optional:
+
+~~~bash
+cd /ianvs/project
+mkdir pretrain
+cp pretrain_model.pth /ianvs/project/pretrain
+~~~
+
 ## Step 3. Ianvs Execution and Presentation
 
 We are now ready to run the ianvs for benchmarking. 
 
 ``` shell
 cd /ianvs/project/ianvs
-ianvs -f examples/robot/lifelong_learning_bench/benchmarkingjob.yaml
+ianvs -f examples/robot-sam/lifelong_learning_bench/benchmarkingjob.yaml
 ```
 
 Finally, the user can check the result of benchmarking on the console and also in the output path(
@@ -93,9 +134,9 @@ can refer to [benchmarkingJob.yaml](https://ianvs.readthedocs.io/en/latest/guide
 The final output might look like this:   
 
 
-| rank |        algorithm        |      accuracy      |         BWT         |         FWT         |     paradigm     | basemodel |    task_definition     |    task_allocation     | basemodel-learning_rate | basemodel-epochs | task_definition-origins | task_allocation-origins |         time        |                                                               url                                                               |
-|:------:|:-------------------------:|:--------------------:|:---------------------:|:---------------------:|:------------------:|:-----------:|:------------------------:|:------------------------:|:-------------------------:|:------------------:|:-------------------------:|:-------------------------:|:---------------------:|:---------------------------------------------------------------------------------------------------------------------------------:|
-|  1   | rfnet_lifelong_learning | 0.2970033189775575 | 0.04239649121511442 | 0.02299711942108413 | lifelonglearning | BaseModel | TaskDefinitionByOrigin | TaskAllocationByOrigin |          0.0001         |        1         |   ['front', 'garden']   |   ['front', 'garden']   | 2023-05-24 15:07:57 | /ianvs/lifelong_learning_bench/robot-workspace-bwt/benchmarkingjob/rfnet_lifelong_learning/efdc47a2-f9fb-11ed-8f8b-0242ac110007 |
+| rank |        algorithm        |      accuracy      |         Task_Avg_Acc         |     paradigm     | basemodel |    task_definition     |    task_allocation     | unseen_sample_recognition | basemodel-learning_rate | basemodel-epochs | task_definition-origins | task_allocation-origins | unseen_sample_recognition-threhold |         time        |                                                               url                                                               |
+|:------:|:-------------------------:|:--------------------:|:---------------------:|:------------------:|:-----------:|:------------------------:|:------------------------:|:-------------------------:|:------------------:|:-------------------------:|:-------------------------:|:---------------------:|:---------------------------------------------------------------------------------------------------------------------------------:|-----------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------:|
+|  1   | sam_rfnet_lifelong_learning | 0.7052917006987501 | 0.6258875117354328 | lifelonglearning | BaseModel | TaskDefinitionByOrigin | TaskAllocationByOrigin | HardSampleMining |          0.0001         |        1         |   ['front', 'garden']   |   ['front', 'garden']   |   0.95   | 2023-08-24 12:43:19 | /ianvs/sam_bench/robot-workspace/benchmarkingjob/sam_rfnet_lifelong_learning/9465c47a-4235-11ee-8519-ec2a724ccd3e |
 
 
 

diff --git a/examples/robot-sam/lifelong_learning_bench/benchmarkingjob.yaml b/examples/robot-sam/lifelong_learning_bench/benchmarkingjob.yaml
@@ -2,11 +2,13 @@ benchmarkingjob:
   # job name of bechmarking; string type;
   name: "benchmarkingjob"
   # the url address of job workspace that will reserve the output of tests; string type;
-  workspace: "/home/vipuser/ianvs/lifelong_learning_bench/robot-workspace-test"
+  workspace: "/ianvs/sam_bench/robot-workspace"
+  #workspace: "/home/hsj/ianvs/sam_bench/cloud-robot-workspace"
 
   # the url address of test environment configuration file; string type;
   # the file format supports yaml/yml;
-  testenv: "./examples/robot/lifelong_learning_bench/testenv/testenv-robot.yaml"
+  testenv: "./examples/robot-sam/lifelong_learning_bench/testenv/testenv-robot-small.yaml"
+  #testenv: "./examples/robot-sam/lifelong_learning_bench/testenv/testenv-robot.yaml"
 
   # the configuration of test object
   test_object:
@@ -16,16 +18,24 @@ benchmarkingjob:
     # test algorithm configuration files; list type;
     algorithms:
       # algorithm name; string type;
-      - name: "rfnet_lifelong_learning"
+      #- name: "rfnet_lifelong_learning"
+      - name: "sam_rfnet_lifelong_learning"
+      #- name: "vit_lifelong_learning"
+      #- name: "sam_vit_lifelong_learning"
+        # the url address of test algorithm configuration file; string type;
+        # the file format supports yaml/yml
+        #url: "./examples/robot-sam/lifelong_learning_bench/testalgorithms/rfnet/rfnet_algorithm.yaml"
+        url: "./examples/robot-sam/lifelong_learning_bench/testalgorithms/rfnet/sam_algorithm.yaml"
+        #url: "./examples/robot-sam/lifelong_learning_bench/testalgorithms/rfnet/vit_algorithm.yaml"
+        #url: "./examples/robot-sam/lifelong_learning_bench/testalgorithms/rfnet/sam_vit_algorithm.yaml"
         # the url address of test algorithm configuration file; string type;
         # the file format supports yaml/yml
-        url: "./examples/robot/lifelong_learning_bench/testalgorithms/rfnet/rfnet_algorithm-simple.yaml"
 
   # the configuration of ranking leaderboard
   rank:
     # rank leaderboard with metric of test case's evaluation and order ; list type;
     # the sorting priority is based on the sequence of metrics in the list from front to back;
-    sort_by: [ { "accuracy": "descend" }, { "BWT": "descend" } ]
+    sort_by: [ { "accuracy": "descend" } ]
 
     # visualization configuration
     visualization:
@@ -57,7 +67,7 @@ benchmarkingjob:
       # currently the options of value are as follows:
       #   1> "all": select all metrics in the leaderboard;
       #   2> metrics in the leaderboard, e.g., "F1_SCORE"
-      metrics: [ "accuracy", "Task_Avg_Acc", "BWT", "FWT"]
+      metrics: [ "accuracy", "Task_Avg_Acc"]
 
     # model of save selected and all dataitems in workspace `./rank` ; string type;
     # currently the options of value are as follows: