ai-process.json

{
  "name": "AI process",
  "children": [
    {
      "name": "Vision process",
      "children": [
        {
          "name": "Image tagging",
          "description": "Annotate an image of some sort, typically with terms from a controlled vocabulary.",
          "has_input": [
            "Image"
          ],
          "has_output": [
            "Text"
          ],
          "children": [
            {
              "name": "Image captioning",
              "description": "Image captioning is the automatic generation of natural language descriptions of the content of an input image.",
              "has_input": [
                "Image"
              ],
              "has_output": [
                "Text"
              ],
              "hasExactSynonym": "['Automatic image annotation']",
              "children": [
                {
                  "name": "Phrase grounding",
                  "description": "Assigning each entity mentioned in a given caption of an image to a corresponding location in the respective image.",
                  "has_input": [
                    "Image"
                  ]
                },
                {
                  "name": "Relational captioning"
                }
              ]
            },
            {
              "name": "Meme classification",
              "description": "Meme classification refers to the task of classifying internet memes.",
              "has_input": [
                "Image"
              ]
            }
          ]
        },
        {
          "name": "Optical character recognition",
          "children": [
            {
              "name": "Handwriting recognition",
              "children": [
                {
                  "name": "Handwritten digit recognition"
                }
              ]
            },
            {
              "name": "Active learning",
              "description": "**Active Learning** is a paradigm in supervised machine learning which uses fewer training examples to achieve better optimization by iteratively training a predictor, and using the predictor in each iteration to choose the training examples which will increase its chances of finding better configurations and at the same time improving the accuracy of the prediction model (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Active object detection",
                  "description": "Active Learning for Object Detection (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "Trajectory prediction",
          "description": "**Trajectory Prediction** is the problem of predicting the short-term (1-3 seconds) and long-term (3-5 seconds) spatial coordinates of various road-agents such as cars, buses, pedestrians, rickshaws, and animals, etc. These road-agents have different dynamic behaviors that may correspond to aggressive or conservative driving styles. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Trajectory forecasting",
              "description": "Trajectory forecasting is a sequential prediction task, where a forecasting model predicts future trajectories of all moving agents (humans, vehicles, etc.) in a scene, based on their past trajectories and/or the scene context.\n\n(Illustrative figure from [Social NCE: Contrastive Learning of Socially-aware Motion Representations](https://github.com/vita-epfl/social-nce)) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Depth estimation",
          "description": "**Depth Estimation** is a crucial step towards inferring scene geometry from 2D images. The goal in monocular Depth Estimation is to predict the depth value of each pixel, given only a single RGB image as input. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Monocular depth estimation",
              "description": "The **Monocular Depth Estimation** is the task of estimating scene depth using a single image. (Source: paperswithcode.com)"
            },
            {
              "name": "Stereo depth estimation"
            },
            {
              "name": "Indoor monocular depth estimation"
            },
            {
              "name": "Stereo-lidar fusion",
              "description": "Depth estimation using stereo cameras and a LiDAR sensor. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Super-resolution",
          "description": "Super resolution is the task of taking an input of a low resolution (LR) and upscaling it to that of a high resolution. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Image super-resolution",
              "description": "Image super-resolution (SR) techniques reconstruct a higher-resolution image or sequence from the observed lower-resolution images. Usually the benchmarks are single-image super-resolution (SISR) tasks. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Multi-frame super-resolution",
                  "description": "When multiple images of the same view are taken from slightly different positions, perhaps also at different times, then they collectively contain more information than any single image on its own. Multi-Frame Super-Resolution fuses these low-res inputs into a composite high-res image that can reveal some of the original detail that cannot be recovered from any low-res image alone. (Source: paperswithcode.com)"
                },
                {
                  "name": "Audio super-resolution",
                  "description": "AUDIO SUPER-RESOLUTION or speech bandwidth extension (Upsampling Ratio = 2) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Video super-resolution",
              "description": "Video super-resolution is the task of upscaling a video from a low-resolution to a high-resolution. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Video process",
          "children": [
            {
              "name": "Video super-resolution",
              "description": "Video super-resolution is the task of upscaling a video from a low-resolution to a high-resolution. (Source: paperswithcode.com)"
            },
            {
              "name": "Video understanding",
              "description": "A crucial task of **Video Understanding** is to recognise and localise (in space and time) different actions or events appearing in the video. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Video alignment"
                }
              ]
            },
            {
              "name": "Object tracking",
              "description": "Object tracking is the task of taking an initial set of object detections, creating a unique ID for each of the initial detections, and then tracking each of the objects as they move around frames in a video, maintaining the ID assignment. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Visual object tracking",
                  "description": "**Visual Object Tracking** is an important research topic in computer vision, image understanding and pattern recognition. Given the initial state (centre location and scale) of a target in the first frame of a video sequence, the aim of Visual Object Tracking is to automatically obtain the states of the object in the subsequent video frames. (Source: paperswithcode.com)"
                },
                {
                  "name": "Multi-object tracking",
                  "children": [
                    {
                      "name": "3D multi-object tracking",
                      "description": "Image: [Weng et al](https://arxiv.org/pdf/1907.03961v4.pdf) (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Multiple object tracking",
                  "description": "**Multiple Object Tracking** is the problem of automatically identifying multiple objects in a video and representing them as a set of trajectories with high accuracy. (Source: paperswithcode.com)"
                },
                {
                  "name": "Online multi-object tracking",
                  "description": "The goal of **Online Multi-Object Tracking** is to estimate the spatio-temporal trajectories of multiple objects in an online video stream (i.e., the video is provided frame-by-frame), which is a fundamental problem for numerous real-time applications, such as video surveillance, autonomous driving, and robot navigation. (Source: paperswithcode.com)"
                },
                {
                  "name": "3D object tracking"
                }
              ]
            },
            {
              "name": "Video prediction",
              "description": "**Video Prediction** is the task of predicting future frames given past video frames. (Source: paperswithcode.com)",
              "has_input": [
                "Video"
              ],
              "has_output": [
                "Video"
              ]
            },
            {
              "name": "Action classification",
              "children": [
                {
                  "name": "Skeleton based action recognition"
                }
              ]
            },
            {
              "name": "Video classification",
              "description": "**Video Classification** is the task of producing a label that is relevant to the video given its frames. A good video level classifier is one that not only provides accurate frame labels, but also best describes the entire video given the features and the annotations of the various frames in the video. For example, a video might contain a tree in some frame, but the label that is central to the video might be something else (e.g., \u201chiking\u201d). The granularity of the labels that are needed to describe the frames and the video depends on the task. Typical tasks include assigning one or more global labels to the video, and assigning one or more labels for each frame inside the video. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Action recognition in videos",
                  "children": [
                    {
                      "name": "Self-supervised action recognition"
                    }
                  ]
                },
                {
                  "name": "Action recognition",
                  "description": "Please note some benchmarks may be located in the [Action Classification](https://paperswithcode.com/task/action-classification) or [Video Classification](https://paperswithcode.com/task/video-classification) tasks, e.g. Kinetics-400. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D action recognition",
                      "description": "Image: [Rahmani et al](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Rahmani_3D_Action_Recognition_CVPR_2016_paper.pdf) (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Skeleton based action recognition"
                        },
                        {
                          "name": "Zero shot skeletal action recognition",
                          "description": "Zero-Shot Learning for 3D skeletal action recognition (Source: paperswithcode.com)",
                          "children": [
                            {
                              "name": "Generalized zero shot skeletal action recognition",
                              "description": "Generalized Zero Shot Learning for 3d Skeletal Action Recognition (Source: paperswithcode.com)"
                            }
                          ]
                        }
                      ]
                    },
                    {
                      "name": "Self-supervised action recognition"
                    },
                    {
                      "name": "Action triplet recognition",
                      "description": "Recognising action as a triplet of subject verb and object. Example HOI = Human Object Interaction, Surgical IVT = Instrument Verb Target, etc. (Source: paperswithcode.com)"
                    }
                  ]
                }
              ]
            },
            {
              "name": "Video generation"
            },
            {
              "name": "Video frame interpolation",
              "description": "The goal of **Video Frame Interpolation** is to synthesize several frames in the middle of two adjacent frames of the original video. Video Frame Interpolation can be applied to generate slow motion video, increase video frame rate, and frame recovery in video streaming. (Source: paperswithcode.com)"
            },
            {
              "name": "Video retrieval",
              "description": "The objective of video retrieval is as follows: given a text query and a pool of candidate videos, select the video which corresponds to the text query.  Typically, the videos are returned as a ranked list of candidates and scored via document retrieval metrics. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Replay grounding",
                  "description": "Replay grounding is introduced in SoccerNet-v2 in the case of videos of soccer games. Given a replay shot of a soccer action, the objective is to retrieve when said action occurs within the whole live game. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Video denoising"
            },
            {
              "name": "Video summarization",
              "description": "**Video Summarization** is the process of compacting a video down to only important components in the video. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Unsupervised video summarization"
                },
                {
                  "name": "Supervised video summarization"
                }
              ]
            },
            {
              "name": "Video-to-video synthesis",
              "description": "Learning a mapping function from an input source video to an output video. (Source: paperswithcode.com)"
            },
            {
              "name": "Activity recognition in videos",
              "children": [
                {
                  "name": "Activity prediction",
                  "description": "Predict human activities in videos (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Anomaly detection in surveillance videos"
            },
            {
              "name": "Abnormal event detection in video",
              "description": "**Abnormal Event Detection In Video** is a challenging task in computer vision, as the definition of what an abnormal event looks like depends very much on the context. For instance, a car driving by on the street is regarded as a normal event, but if the car enters a pedestrian area, this is regarded as an abnormal event. A person running on a sports court (normal event) versus running outside from a bank (abnormal event) is another example. Although what is considered abnormal depends on the context, we can generally agree that abnormal events should be unexpected events that occur less often than familiar (normal) events (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Anomaly detection in surveillance videos"
                },
                {
                  "name": "Semi-supervised anomaly detection"
                }
              ]
            },
            {
              "name": "Action spotting"
            },
            {
              "name": "Video deinterlacing"
            },
            {
              "name": "Video story qa",
              "description": "MCQ about clips from movies/tvshows/etc (Source: paperswithcode.com)"
            },
            {
              "name": "Video object segmentation",
              "description": "Video object segmentation is a binary labeling problem aiming to separate foreground object(s) from the background region of a video.\n\nFor leaderboards please refer to the different subtasks. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Video salient object detection",
                  "description": "Video salient object detection (VSOD) is significantly essential for understanding the underlying mechanism behind HVS during free-viewing in general and instrumental to a wide range of real-world applications, e.g., video segmentation, video captioning, video compression, autonomous driving, robotic interaction, weakly supervised attention. Besides its academic value and practical significance, VSOD presents great difficulties due to the challenges carried by video data (diverse motion patterns, occlusions, blur, large object deformations, etc.) and the inherent complexity of human visual attention behavior (i.e., selective attention allocation, attention shift) during dynamic scenes. Online benchmark: http://dpfan.net/davsod. (Source: paperswithcode.com)"
                },
                {
                  "name": "Unsupervised video object segmentation",
                  "description": "The unsupervised scenario assumes that the user does not interact with the algorithm to obtain the segmentation masks. Methods should provide a set of object candidates with no overlapping pixels that span through the whole video sequence. This set of objects should contain at least the objects that capture human attention when watching the whole video sequence i.e objects that are more likely to be followed by human gaze. (Source: paperswithcode.com)"
                },
                {
                  "name": "Semi-supervised video object segmentation",
                  "description": "The semi-supervised scenario assumes the user inputs a full mask of the object(s) of interest in the first frame of a video sequence. Methods have to produce the segmentation mask for that object(s) in the subsequent frames. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "One-shot visual object segmentation"
                    }
                  ]
                },
                {
                  "name": "Interactive video object segmentation",
                  "description": "The interactive scenario assumes the user gives iterative refinement inputs to the algorithm, in our case in the form of a scribble, to segment the objects of interest. Methods have to produce a segmentation mask for that object in all the frames of a video sequence taking into account all the user interactions. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "SpO2 estimation",
              "description": "SpO2 estimation (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Reconstruction",
          "children": [
            {
              "name": "Single-view 3D reconstruction"
            },
            {
              "name": "Single-image-based Hdr reconstruction"
            },
            {
              "name": "3D human reconstruction"
            }
          ]
        },
        {
          "name": "Facial recognition and modelling",
          "description": "Facial tasks in machine learning operate based on images or video frames (or other datasets) focussed on human faces. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Face recognition",
              "description": "Facial recognition is the task of making a positive identification of a face in a photo or video image against a pre-existing database of faces. It begins with detection - distinguishing human faces from other objects in the image - and then works on identification of those detected faces.\n\nThe state of the art tables for this task are contained mainly in the consistent parts of the task : the face verification and face identification tasks. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Age-invariant face recognition",
                  "description": "Age-invariant face recognition is the task of performing face recognition that is invariant to differences in age. (Source: paperswithcode.com)"
                },
                {
                  "name": "Heterogeneous face recognition",
                  "description": "Heterogeneous face recognition is the task of matching face images acquired from different sources (i.e., different sensors or different wavelengths) for identification or verification. (Source: paperswithcode.com)"
                },
                {
                  "name": "Face quality assessement",
                  "description": "Estimate the usability of a given face image for recognition (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Face detection",
              "description": "Face detection is the task of detecting faces in a photo or video (and distinguishing them from other objects). (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Occluded face detection"
                }
              ]
            },
            {
              "name": "Face alignment",
              "description": "Face alignment is the task of identifying the geometric structure of faces in digital images, and attempting to obtain a canonical alignment of the face based on translation, scale, and rotation. (Source: paperswithcode.com)"
            },
            {
              "name": "Face verification",
              "description": "Face verification is the task of comparing a candidate face to another, and verifying whether it is a match. It is a one-to-one mapping: you have to check if this person is the correct one. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Disguised face verification"
                }
              ]
            },
            {
              "name": "Facial expression recognition",
              "description": "Facial expression recognition is the task of classifying the expressions on face images into various categories such as anger, fear, surprise, sadness, happiness and so on. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D facial expression recognition",
                  "description": "3D facial expression recognition is the task of modelling facial expressions in 3D from an image or video. (Source: paperswithcode.com)"
                },
                {
                  "name": "Smile recognition",
                  "description": "Smile recognition is the task of recognising a smiling face in a photo or video. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Facial landmark detection",
              "description": "Facial landmark detection is the task of detecting key landmarks on the face and tracking them (being robust to rigid and non-rigid facial deformations due to head movements and facial expressions). (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Unsupervised facial landmark detection",
                  "description": "Facial landmark detection in the unsupervised setting popularized by [1].  The evaluation occurs in two stages:\n(1) Embeddings are first learned in an unsupervised manner (i.e. without labels);\n(2) A simple regressor is trained to regress landmarks from the unsupervised embedding.\n\n[1]  Thewlis, James, Hakan Bilen, and Andrea Vedaldi. \"Unsupervised learning of object landmarks by factorized spatial embeddings.\" Proceedings of the IEEE International Conference on Computer Vision. 2017. (Source: paperswithcode.com)"
                },
                {
                  "name": "3D facial landmark localization",
                  "description": "Image: [Zhang et al](https://arxiv.org/pdf/1801.09242v1.pdf) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Face reconstruction",
              "description": "Face reconstruction is the task of recovering the facial geometry of a face from an image. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D face reconstruction",
                  "description": "3D face reconstruction is the task of reconstructing a face from an image into a 3D form (or mesh). (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Face identification",
              "description": "Face identification is the task of matching a given face image to one in an existing database of faces. It is the second part of face recognition (the first part being detection). It is a one-to-many mapping: you have to find an unknown person in a database to find who that person is. (Source: paperswithcode.com)"
            },
            {
              "name": "Face swapping",
              "description": "Face swapping refers to the task of swapping faces between images or in an video, while maintaining the rest of the body and environment context. (Source: paperswithcode.com)"
            },
            {
              "name": "Facial action unit detection",
              "description": "Facial action unit detection is the task of detecting action units from a video of a face - for example, lip tightening and cheek raising. (Source: paperswithcode.com)"
            },
            {
              "name": "Action unit detection",
              "description": "Action unit detection is the task of detecting action units from a video - for example, types of facial action units (lip tightening, cheek raising) from a video of a face. (Source: paperswithcode.com)"
            },
            {
              "name": "Age estimation",
              "description": "Age Estimation is the task of estimating the age of a person from an image. (Source: paperswithcode.com)"
            },
            {
              "name": "Gender prediction"
            },
            {
              "name": "Face hallucination",
              "description": "Face hallucination is the task of generating high-resolution (HR) facial images from low-resolution (LR) inputs. (Source: paperswithcode.com)"
            },
            {
              "name": "Facial beauty prediction",
              "description": "Facial beauty prediction is the task of predicting the attractiveness of a face. (Source: paperswithcode.com)"
            },
            {
              "name": "Facial attribute classification",
              "description": "Facial attribute classification is the task of classifying various attributes of a facial image - e.g. whether someone has a beard, is wearing a hat, and so on. (Source: paperswithcode.com)"
            },
            {
              "name": "Age and gender classification",
              "description": "Age and gender classification is a dual-task of identifying the age and gender of a person from an image or video. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Face anti-spoofing"
        },
        {
          "name": "Deception detection",
          "children": [
            {
              "name": "Face anti-spoofing"
            }
          ]
        },
        {
          "name": "Sketch",
          "children": [
            {
              "name": "Face sketch synthesis",
              "description": "Face sketch synthesis is the task of generating a sketch from an input face photo. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Autonomous vehicle task",
          "description": "Autonomous vehicles is the task of making a vehicle that can guide itself without human conduction.\n\nMany of the state-of-the-art results can be found at more general task pages such as [3D Object Detection](https://paperswithcode.com/task/3d-object-detection) and [Semantic Segmentation](https://paperswithcode.com/task/semantic-segmentation). (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Autonomous driving",
              "description": "Autonomous driving is the task of driving a vehicle without human conduction. \n\nMany of the state-of-the-art results can be found at more general task pages such as [3D Object Detection](https://paperswithcode.com/task/3d-object-detection) and [Semantic Segmentation](https://paperswithcode.com/task/semantic-segmentation). (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Motion forecasting",
                  "description": "Motion forecasting is the task of predicting the location of a tracked object in the future (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Multiple object forecasting"
                    }
                  ]
                }
              ]
            },
            {
              "name": "Autonomous navigation",
              "description": "Autonomous navigation is the task of autonomously navigating a vehicle or robot to or around a location without human guidance. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Autonomous flight (dense forest)",
                  "description": "Number of interventions during autonomous flight under the forest canopy. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Pedestrian detection",
              "description": "Pedestrian detection is the task of detecting pedestrians from a camera.\n\nFurther state-of-the-art results (e.g. on the KITTI dataset) can be found at [3D Object Detection](https://paperswithcode.com/task/object-detection). (Source: paperswithcode.com)"
            },
            {
              "name": "Lane detection",
              "description": "Lane detection is the task of detecting lanes on a road from a camera. (Source: paperswithcode.com)"
            },
            {
              "name": "Traffic sign recognition",
              "description": "Traffic sign recognition is the task of recognising traffic signs in an image or video. (Source: paperswithcode.com)"
            },
            {
              "name": "Pedestrian attribute recognition",
              "description": "Pedestrian attribution recognition is the task of recognising pedestrian features - such as whether they are talking on a phone, whether they have a backpack, and so on. (Source: paperswithcode.com)"
            },
            {
              "name": "3D car instance understanding",
              "description": "3D Car Instance Understanding is the task of estimating properties (e.g.translation, rotation and shape) of a moving or parked vehicle on the road. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Hand gesture recognition",
                  "children": [
                    {
                      "name": "Skeleton based action recognition"
                    }
                  ]
                }
              ]
            }
          ]
        },
        {
          "name": "3D vision process",
          "children": [
            {
              "name": "Motion forecasting",
              "description": "Motion forecasting is the task of predicting the location of a tracked object in the future (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Multiple object forecasting"
                }
              ]
            },
            {
              "name": "3D car instance understanding",
              "description": "3D Car Instance Understanding is the task of estimating properties (e.g.translation, rotation and shape) of a moving or parked vehicle on the road. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Hand gesture recognition",
                  "children": [
                    {
                      "name": "Skeleton based action recognition"
                    }
                  ]
                }
              ]
            },
            {
              "name": "3D human pose estimation",
              "children": [
                {
                  "name": "Pose prediction",
                  "description": "Pose prediction is to predict future poses given a window of previous poses. (Source: paperswithcode.com)"
                },
                {
                  "name": "3D absolute human pose estimation",
                  "description": "This task aims to solve absolute (camera-centric not root-relative) 3D human pose estimation. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D face animation",
                      "description": "Image: [Cudeiro et al](https://arxiv.org/pdf/1905.03079v1.pdf) (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Image generation",
                          "description": "Image generation (synthesis) is the task of generating new images from an existing dataset.\n\n- **Unconditional generation** refers to generating samples unconditionally from the dataset, i.e. $p(y)$\n- **Conditional image generation** (subtask) refers to generating samples conditionally from the dataset, based on a label, i.e. $p(y|x)$.\n\nIn this section, you can find state-of-the-art leaderboards for **unconditional generation**. For conditional  generation, and other types of image generations, refer to the subtasks. (Source: paperswithcode.com)",
                          "children": [
                            {
                              "name": "Face generation",
                              "description": "Face generation is the task of generating (or interpolating) new faces from an existing dataset.\n\nThe state-of-the-art results for this task are located in the Image Generation parent. (Source: paperswithcode.com)",
                              "children": [
                                {
                                  "name": "Talking face generation",
                                  "description": "Talking face generation aims to synthesize a sequence of face images that correspond to given speech semantics. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Talking head generation",
                                  "description": "Talking head generation is the task of generating a talking face from a set of images of a person. (Source: paperswithcode.com)",
                                  "children": [
                                    {
                                      "name": "Unconstrained lip-synchronization",
                                      "description": "Given a video of an arbitrary person, and an arbitrary driving speech, the task is to generate a lip-synced video that matches the given speech. \n\nThis task requires the approach to not be constrained by identity, voice, or language. (Source: paperswithcode.com)"
                                    }
                                  ]
                                }
                              ]
                            },
                            {
                              "name": "Image inpainting",
                              "description": "**Image Inpainting** is a task of reconstructing missing regions in an image. It is an important problem in computer vision and an essential functionality in many imaging and graphics applications, e.g. object removal, image restoration, manipulation, re-targeting, compositing, and image-based rendering. (Source: paperswithcode.com)",
                              "children": [
                                {
                                  "name": "Facial inpainting",
                                  "description": "Facial inpainting (or face completion) is the task of generating plausible facial structures for missing pixels in a face image. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Image outpainting",
                                  "description": "Predicting the visual context of an image beyond its boundary. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Cloud removal"
                                }
                              ]
                            },
                            {
                              "name": "Image-to-image translation",
                              "description": "Image-to-image translation is the task of taking images from one domain and transforming them so they have the style (or characteristics) of images from another domain. (Source: paperswithcode.com)",
                              "children": [
                                {
                                  "name": "Unsupervised image-to-image translation",
                                  "description": "Unsupervised image-to-image translation is the task of doing image-to-image translation without ground truth image-to-image pairings. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Synthetic-to-real translation",
                                  "description": "Synthetic-to-real translation is the task of domain adaptation from synthetic (or virtual) data to real data. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Multimodal unsupervised image-to-image translation",
                                  "description": "Multimodal unsupervised image-to-image translation is the task of producing multiple translations to one domain from a single image in another domain. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Cross-view image-to-image translation"
                                },
                                {
                                  "name": "Facial makeup transfer",
                                  "description": "Facial makeup transfer aims to translate the **makeup style** from a given *reference* makeup face image to another non-makeup one while *preserving face identity*. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Fundus to angiography generation",
                                  "description": "Generating Retinal Fluorescein Angiography from Retinal Fundus Image using Generative Adversarial Networks. (Source: paperswithcode.com)"
                                }
                              ]
                            },
                            {
                              "name": "Conditional image generation",
                              "description": "Conditional image generation is the task of generating new images from a dataset conditional on their class. (Source: paperswithcode.com)"
                            },
                            {
                              "name": "Text-to-image generation",
                              "has_input": [
                                "Text"
                              ],
                              "has_output": [
                                "Image"
                              ],
                              "children": [
                                {
                                  "name": "Zero-shot text-to-image generation"
                                }
                              ]
                            },
                            {
                              "name": "Pose transfer"
                            },
                            {
                              "name": "Layout-to-image generation",
                              "description": "Layout-to-image generation its the task to generate a scene based on the given layout. The layout describes the location of the objects to be included in the output image.\nIn this section, you can find state-of-the-art leaderboards for Layout-to-image generation. (Source: paperswithcode.com)"
                            }
                          ]
                        }
                      ]
                    }
                  ]
                },
                {
                  "name": "3D multi-person pose estimation",
                  "description": "This task aims to solve root-relative 3D multi-person pose estimation. No human bounding box and root joint coordinate groundtruth are used in testing time. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D multi-person pose estimation (root-relative)",
                      "description": "This task aims to solve root-relative 3D multi-person pose estimation (person-centric coordinate system). No ground truth human bounding box and human root joint coordinates are used during testing stage. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "3D multi-person pose estimation (absolute)",
                      "description": "This task aims to solve absolute 3D multi-person pose Estimation (camera-centric coordinates). No ground truth human bounding box and human root joint coordinates are used during testing stage. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Monocular 3D human pose estimation",
                  "description": "This task targets at 3D human pose estimation with a single RGB camera. (Source: paperswithcode.com)"
                },
                {
                  "name": "Weakly-supervised 3D human pose estimation",
                  "description": "This task targets at 3D Human Pose Estimation with fewer 3D annotation. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "3D reconstruction",
              "description": "Image: [Gwak et al](https://arxiv.org/pdf/1705.10904v2.pdf) (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D scene reconstruction",
                  "children": [
                    {
                      "name": "Face generation",
                      "description": "Face generation is the task of generating (or interpolating) new faces from an existing dataset.\n\nThe state-of-the-art results for this task are located in the Image Generation parent. (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Talking face generation",
                          "description": "Talking face generation aims to synthesize a sequence of face images that correspond to given speech semantics. (Source: paperswithcode.com)"
                        },
                        {
                          "name": "Talking head generation",
                          "description": "Talking head generation is the task of generating a talking face from a set of images of a person. (Source: paperswithcode.com)",
                          "children": [
                            {
                              "name": "Unconstrained lip-synchronization",
                              "description": "Given a video of an arbitrary person, and an arbitrary driving speech, the task is to generate a lip-synced video that matches the given speech. \n\nThis task requires the approach to not be constrained by identity, voice, or language. (Source: paperswithcode.com)"
                            }
                          ]
                        }
                      ]
                    }
                  ]
                },
                {
                  "name": "3D room layouts from a single rgb panorama",
                  "description": "Image: [Zou et al](https://arxiv.org/pdf/1803.08999v1.pdf) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "3D human action recognition",
              "children": [
                {
                  "name": "Skeleton based action recognition"
                }
              ]
            },
            {
              "name": "3D pose estimation"
            },
            {
              "name": "Playing FPS games",
              "description": "First-person shooter (FPS) games Involve like call of duty so enjoy (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Playing game of Doom",
                  "description": "Doom is an FPS game : the task is typically to train an agent to navigate the game environment, and additionally, acquire points by eliminating enemies. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "3D object classification",
              "description": "Image: [Sedaghat et al](https://arxiv.org/pdf/1604.03351v2.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "3D point cloud matching",
              "description": "Image: [Gojic et al](https://openaccess.thecvf.com/content_CVPR_2019/papers/Gojcic_The_Perfect_Match_3D_Point_Cloud_Matching_With_Smoothed_Densities_CVPR_2019_paper.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "3D feature matching",
              "description": "Image: [Choy et al](https://paperswithcode.com/paper/fully-convolutional-geometric-features) (Source: paperswithcode.com)"
            },
            {
              "name": "3D shape modeling",
              "description": "Image: [Gkioxari et al](https://arxiv.org/pdf/1906.02739v2.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "Video reconstruction"
            },
            {
              "name": "3D shape reconstruction",
              "description": "Image credit: [GSNet: Joint Vehicle Pose and Shape Reconstruction with Geometrical and Scene-aware Supervision\n, ECCV'20](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123600511.pdf) (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D shape reconstruction from a single 2D image",
                  "description": "Image: [Liao et al](https://arxiv.org/pdf/1811.12016v1.pdf) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "3D shape classification",
              "description": "Image: [Sun et al](https://arxiv.org/pdf/1804.04610v1.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "3D face modeling"
            },
            {
              "name": "Occluded 3D object symmetry detection"
            },
            {
              "name": "3D object tracking"
            }
          ]
        },
        {
          "name": "Domain adaptation",
          "description": "Domain adaptation is the task of adapting models across domains. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Unsupervised domain adaptation",
              "description": "**Unsupervised Domain Adaptation** is a learning framework to transfer knowledge learned from source domains with a large number of annotated training examples to target domains with unlabeled data only. (Source: paperswithcode.com)"
            },
            {
              "name": "Domain generalization",
              "description": "The idea of **Domain Generalization** is to learn from one or multiple training domains, to extract a domain-agnostic model which can be applied to an unseen domain (Source: paperswithcode.com)"
            },
            {
              "name": "Partial domain adaptation",
              "description": "**Partial Domain Adaptation** is a transfer learning paradigm, which manages to transfer relevant knowledge from a large-scale source domain to a small-scale target domain. (Source: paperswithcode.com)"
            },
            {
              "name": "Continuously indexed domain adaptation",
              "description": "Continuously indexed domain adaptation adapts across continuously indexed domains, e.g., across patients of different ages, where 'age' is a continuous notion. (Source: paperswithcode.com)"
            },
            {
              "name": "Wildly unsupervised domain adaptation",
              "description": "Transferring knowledge from a noisy source domain to unlabeled target domain. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Visual speech recognition",
          "has_input": [
            "Video"
          ],
          "children": [
            {
              "name": "Lip to speech synthesis",
              "description": "Given a silent video of a speaker, generate the corresponding speech that matches the lip movements. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Speaker-specific lip to speech synthesis",
                  "description": "How accurately can we infer an individual\u2019s speech style and content from his/her lip movements? [1]\n\nIn this task, the model is trained on a specific speaker, or a very limited set of speakers. \n\n[1] Learning Individual Speaking Styles for Accurate Lip to Speech Synthesis, CVPR 2020. (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "Visual dialog",
          "description": "Visual Dialog requires an AI agent to hold a meaningful dialog with humans in natural, conversational language about visual content. Specifically, given an image, a dialog history, and a follow-up question about the image, the task is to answer the question. (Source: paperswithcode.com)"
        },
        {
          "name": "Person search",
          "description": "**Person Search** is a task which aims at matching a specific person among a great number of whole scene images. (Source: paperswithcode.com)"
        },
        {
          "name": "Pose estimation",
          "description": "Pose Estimation is a general problem in Computer Vision where we detect the position and orientation of an object. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "2D human pose estimation",
              "children": [
                {
                  "name": "Deblurring",
                  "children": [
                    {
                      "name": "Blind image deblurring",
                      "description": "**Blind Image Deblurring** is a classical problem in image processing and computer vision, which aims to recover a latent image from a blurred input. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Single-image blind deblurring"
                    }
                  ]
                }
              ]
            },
            {
              "name": "3D human pose estimation",
              "children": [
                {
                  "name": "Pose prediction",
                  "description": "Pose prediction is to predict future poses given a window of previous poses. (Source: paperswithcode.com)"
                },
                {
                  "name": "3D absolute human pose estimation",
                  "description": "This task aims to solve absolute (camera-centric not root-relative) 3D human pose estimation. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D face animation",
                      "description": "Image: [Cudeiro et al](https://arxiv.org/pdf/1905.03079v1.pdf) (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Image generation",
                          "description": "Image generation (synthesis) is the task of generating new images from an existing dataset.\n\n- **Unconditional generation** refers to generating samples unconditionally from the dataset, i.e. $p(y)$\n- **Conditional image generation** (subtask) refers to generating samples conditionally from the dataset, based on a label, i.e. $p(y|x)$.\n\nIn this section, you can find state-of-the-art leaderboards for **unconditional generation**. For conditional  generation, and other types of image generations, refer to the subtasks. (Source: paperswithcode.com)",
                          "children": [
                            {
                              "name": "Face generation",
                              "description": "Face generation is the task of generating (or interpolating) new faces from an existing dataset.\n\nThe state-of-the-art results for this task are located in the Image Generation parent. (Source: paperswithcode.com)",
                              "children": [
                                {
                                  "name": "Talking face generation",
                                  "description": "Talking face generation aims to synthesize a sequence of face images that correspond to given speech semantics. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Talking head generation",
                                  "description": "Talking head generation is the task of generating a talking face from a set of images of a person. (Source: paperswithcode.com)",
                                  "children": [
                                    {
                                      "name": "Unconstrained lip-synchronization",
                                      "description": "Given a video of an arbitrary person, and an arbitrary driving speech, the task is to generate a lip-synced video that matches the given speech. \n\nThis task requires the approach to not be constrained by identity, voice, or language. (Source: paperswithcode.com)"
                                    }
                                  ]
                                }
                              ]
                            },
                            {
                              "name": "Image inpainting",
                              "description": "**Image Inpainting** is a task of reconstructing missing regions in an image. It is an important problem in computer vision and an essential functionality in many imaging and graphics applications, e.g. object removal, image restoration, manipulation, re-targeting, compositing, and image-based rendering. (Source: paperswithcode.com)",
                              "children": [
                                {
                                  "name": "Facial inpainting",
                                  "description": "Facial inpainting (or face completion) is the task of generating plausible facial structures for missing pixels in a face image. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Image outpainting",
                                  "description": "Predicting the visual context of an image beyond its boundary. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Cloud removal"
                                }
                              ]
                            },
                            {
                              "name": "Image-to-image translation",
                              "description": "Image-to-image translation is the task of taking images from one domain and transforming them so they have the style (or characteristics) of images from another domain. (Source: paperswithcode.com)",
                              "children": [
                                {
                                  "name": "Unsupervised image-to-image translation",
                                  "description": "Unsupervised image-to-image translation is the task of doing image-to-image translation without ground truth image-to-image pairings. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Synthetic-to-real translation",
                                  "description": "Synthetic-to-real translation is the task of domain adaptation from synthetic (or virtual) data to real data. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Multimodal unsupervised image-to-image translation",
                                  "description": "Multimodal unsupervised image-to-image translation is the task of producing multiple translations to one domain from a single image in another domain. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Cross-view image-to-image translation"
                                },
                                {
                                  "name": "Facial makeup transfer",
                                  "description": "Facial makeup transfer aims to translate the **makeup style** from a given *reference* makeup face image to another non-makeup one while *preserving face identity*. (Source: paperswithcode.com)"
                                },
                                {
                                  "name": "Fundus to angiography generation",
                                  "description": "Generating Retinal Fluorescein Angiography from Retinal Fundus Image using Generative Adversarial Networks. (Source: paperswithcode.com)"
                                }
                              ]
                            },
                            {
                              "name": "Conditional image generation",
                              "description": "Conditional image generation is the task of generating new images from a dataset conditional on their class. (Source: paperswithcode.com)"
                            },
                            {
                              "name": "Text-to-image generation",
                              "has_input": [
                                "Text"
                              ],
                              "has_output": [
                                "Image"
                              ],
                              "children": [
                                {
                                  "name": "Zero-shot text-to-image generation"
                                }
                              ]
                            },
                            {
                              "name": "Pose transfer"
                            },
                            {
                              "name": "Layout-to-image generation",
                              "description": "Layout-to-image generation its the task to generate a scene based on the given layout. The layout describes the location of the objects to be included in the output image.\nIn this section, you can find state-of-the-art leaderboards for Layout-to-image generation. (Source: paperswithcode.com)"
                            }
                          ]
                        }
                      ]
                    }
                  ]
                },
                {
                  "name": "3D multi-person pose estimation",
                  "description": "This task aims to solve root-relative 3D multi-person pose estimation. No human bounding box and root joint coordinate groundtruth are used in testing time. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D multi-person pose estimation (root-relative)",
                      "description": "This task aims to solve root-relative 3D multi-person pose estimation (person-centric coordinate system). No ground truth human bounding box and human root joint coordinates are used during testing stage. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "3D multi-person pose estimation (absolute)",
                      "description": "This task aims to solve absolute 3D multi-person pose Estimation (camera-centric coordinates). No ground truth human bounding box and human root joint coordinates are used during testing stage. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Monocular 3D human pose estimation",
                  "description": "This task targets at 3D human pose estimation with a single RGB camera. (Source: paperswithcode.com)"
                },
                {
                  "name": "Weakly-supervised 3D human pose estimation",
                  "description": "This task targets at 3D Human Pose Estimation with fewer 3D annotation. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Hand pose estimation",
              "description": "Hand pose estimation is the task of finding the joints of the hand from an image or set of video frames. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D hand pose estimation",
                  "description": "Image: [Zimmerman et l](https://arxiv.xsrg/pdf/1705.01389v3.pdf) (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D canonical hand pose estimation",
                      "description": "Image: [Lin et al](https://arxiv.org/pdf/2006.01320v1.pdf) (Source: paperswithcode.com)"
                    }
                  ]
                }
              ]
            },
            {
              "name": "3D pose estimation"
            },
            {
              "name": "Keypoint detection",
              "description": "Keypoint detection involves simultaneously detecting people and localizing their keypoints. Keypoints are the same thing as interest points. They are spatial locations, or points in the image that define what is interesting or what stand out in the image. They are invariant to image rotation, shrinkage, translation, distortion, and so on. (Source: paperswithcode.com)"
            },
            {
              "name": "Multi-person pose estimation",
              "description": "Multi-person pose estimation is the task of estimating the pose of multiple people in one frame. (Source: paperswithcode.com)"
            },
            {
              "name": "6D pose estimation using rgb",
              "description": "6D pose estimation is the task of detecting the 6D pose of an object, which include its location and orientation. This is an important task in robotics, where a robotic arm needs to know the location and orientation to detect and move objects in its vicinity successfully.  This allows the robot to operate safely and effectively alongside humans. The awareness of the position and orientation of objects in a scene is sometimes referred to as 6D, where the D stands for degrees of freedom pose. (Source: paperswithcode.com)"
            },
            {
              "name": "6D pose estimation",
              "description": "Image: [Zeng et al](https://arxiv.org/pdf/1609.09475v3.pdf) (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Hand-object pose",
                  "description": "6D pose estimation of hand and object (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Head pose estimation",
              "description": "Estimating the head pose of a person is a crucial problem that has a large amount of applications such as aiding in gaze estimation, modeling attention, fitting 3D models to video and performing face alignment. (Source: paperswithcode.com)"
            },
            {
              "name": "Human pose forecasting",
              "description": "Human pose forecasting is the task of detecting and predicting future human poses. (Source: paperswithcode.com)"
            },
            {
              "name": "6D pose estimation using rgbd",
              "description": "Image: [Zeng et al](https://arxiv.org/pdf/1609.09475v3.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "Animal pose estimation",
              "description": "Animal pose estimation is the task of identifying the pose of an animal. (Source: paperswithcode.com)"
            },
            {
              "name": "RF-based pose estimation",
              "description": "Detect human actions through walls and occlusions, and in poor lighting conditions. Taking radio frequency (RF) signals as input (e.g. Wifi), generating 3D human skeletons as an intermediate representation, and recognizing actions and interactions.\n\nSee e.g. RF-Pose from MIT for a good illustration of the approach\nhttp://rfpose.csail.mit.edu/ (Source: paperswithcode.com)"
            },
            {
              "name": "Hand joint reconstruction"
            },
            {
              "name": "Vehicle pose estimation",
              "description": "Image Credit: [GSNet: Joint Vehicle Pose and Shape Reconstruction with Geometrical and Scene-aware Supervision, ECCV'20](https://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123600511.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "Activeness detection",
              "description": "Determining activeness via images (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Imputation",
          "children": [
            {
              "name": "Multivariate time series imputation"
            }
          ]
        },
        {
          "name": "Image quality assessment",
          "children": [
            {
              "name": "No-reference image quality assessment"
            },
            {
              "name": "Aesthetics quality assessment",
              "description": "Automatic assessment of aesthetic-related subjective ratings. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Image enhancement",
          "description": "**Image Enhancement** is basically improving the interpretability or perception of information in images for human viewers and providing \u2018better\u2019 input for other automated image processing techniques. The principal objective of Image Enhancement is to modify attributes of an image to make it more suitable for a given task and a specific observer. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Low-light image enhancement"
            },
            {
              "name": "Image relighting",
              "description": "Image relighting involves changing the illumination settings of an image. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Rain removal",
          "children": [
            {
              "name": "Single image deraining"
            }
          ]
        },
        {
          "name": "Dehazing",
          "children": [
            {
              "name": "Single image dehazing"
            },
            {
              "name": "Image dehazing"
            }
          ]
        },
        {
          "name": "Multimodal machine translation",
          "description": "Multimodal machine translation is the task of doing machine translation with multiple data sources - for example, translating \"a bird is flying over water\" + an image of a bird over water to German text. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Multimodal lexical translation",
              "description": "Translate a given word in a source language to a word in the target language, given the source sentence and one or more images illustrating the word. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Biomedical vision process",
          "children": [
            {
              "name": "Medical image segmentation",
              "description": "Medical image segmentation is the task of segmenting objects of interest in a medical image. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Lesion segmentation",
                  "description": "Lesion segmentation is the task of segmenting out lesions from other objects in medical based images. (Source: paperswithcode.com)"
                },
                {
                  "name": "Brain image segmentation",
                  "children": [
                    {
                      "name": "Brain tumor segmentation",
                      "description": "Brain tumor segmentation is the task of segmenting tumors from other brain artefacts in MRI image of the brain. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "3D medical imaging segmentation",
                  "description": "3D medical imaging segmentation is the task of segmenting medical objects of interest from 3D medical imaging. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Pancreas segmentation",
                      "description": "Pancreas segmentation is the task of segmenting out the pancreas from medical imaging.\n\nConvolutional neural network (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Retinal vessel segmentation",
                  "description": "Retinal vessel segmentation is the task of segmenting vessels in retina imagery. (Source: paperswithcode.com)"
                },
                {
                  "name": "Cell segmentation",
                  "description": "**Cell Segmentation** is a task of splitting a microscopic image domain into segments, which represent individual instances of cells. It is a fundamental step in many biomedical studies, and it is regarded as a cornerstone of image-based cellular research. Cellular morphology is an indicator of a physiological state of the cell, and a well-segmented image can capture biologically relevant morphological information. (Source: paperswithcode.com)"
                },
                {
                  "name": "Lung nodule segmentation"
                },
                {
                  "name": "Nuclear segmentation"
                },
                {
                  "name": "Iris segmentation"
                },
                {
                  "name": "Electron microscopy image segmentation"
                },
                {
                  "name": "Liver segmentation"
                },
                {
                  "name": "Volumetric medical image segmentation",
                  "children": [
                    {
                      "name": "Skin cancer segmentation"
                    }
                  ]
                },
                {
                  "name": "Infant brain mri segmentation"
                },
                {
                  "name": "Pulmorary vessel segmentation",
                  "children": [
                    {
                      "name": "Pulmonary artery\u2013vein classification"
                    }
                  ]
                },
                {
                  "name": "Colorectal gland segmentation"
                },
                {
                  "name": "Optic cup segmentation",
                  "description": "Optic cup segmentation, concentric with optic disc, useful for glaucoma management (ophthalmology) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Diabetic retinopathy detection"
            },
            {
              "name": "Ultrasound imaging process"
            },
            {
              "name": "Computed tomography (CT) task",
              "description": "The term \u201ccomputed tomography\u201d, or CT, refers to a computerized x-ray imaging procedure in which a narrow beam of x-rays is aimed at a patient and quickly rotated around the body, producing signals that are processed by the machine's computer to generate cross-sectional images\u2014or \u201cslices\u201d\u2014of the body. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Stroke classification from CT data"
                }
              ]
            },
            {
              "name": "Medical image registration",
              "children": [
                {
                  "name": "BIRL: benchmark on image registration methods with landmark validations",
                  "description": "BIRL: Benchmark on Image Registration methods with Landmark validation, in particular, Biomedical image registration on WSI microscopy images of a multi-strain histology tissue sample. (Source: paperswithcode.com)"
                },
                {
                  "name": "Diffeomorphic medical image registration"
                }
              ]
            },
            {
              "name": "Medical image generation",
              "description": "Medical image generation is the task of synthesising new medical images. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Radiologist binary classification",
                  "description": "This task measures a radiologist's performance on distinguishing between generated (e.g. with a GAN, VAE, etc.)  and real images, ascribing to the high visual quality of the synthesized images, and to their potential use in advancing and facilitating downstream medical tasks. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Retinal OCT disease classification",
              "description": "Classifying different Retinal degeneration from Optical Coherence Tomography Images (OCT). (Source: paperswithcode.com)"
            },
            {
              "name": "Tumor segmentation"
            },
            {
              "name": "Polyp segmentation",
              "description": "The goal of the project is to develop a computer-aided detection and diagnosis system for automatic polyp segmentation and detection. (Source: paperswithcode.com)"
            },
            {
              "name": "Surgical tool detection",
              "description": "Presence detection of various classes of surgical instruments in endoscopy videos. (Source: paperswithcode.com)"
            },
            {
              "name": "Breast tumour classification"
            },
            {
              "name": "Multi-tissue nucleus segmentation"
            },
            {
              "name": "Medical image denoising"
            },
            {
              "name": "Breast tissue identification"
            },
            {
              "name": "Diabetic retinopathy grading",
              "description": "Grading the severity of diabetic retinopathy from (ophthalmic) fundus images (Source: paperswithcode.com)"
            },
            {
              "name": "Optic cup detection",
              "description": "Region proposal for optic cup (Source: paperswithcode.com)"
            },
            {
              "name": "Optic disc detection",
              "description": "Region proposal for optic disc (Source: paperswithcode.com)"
            },
            {
              "name": "Fovea detection"
            }
          ]
        },
        {
          "name": "Scene generation"
        },
        {
          "name": "Style transfer",
          "description": "Style transfer is the task of changing the style of an image in one domain to the style of an image in another domain. (Source: paperswithcode.com)"
        },
        {
          "name": "Person re-identification",
          "description": "Person re-identification is the task of associating images of the same person taken from different cameras or from the same camera in different occasions. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Unsupervised person re-identification"
            },
            {
              "name": "Cross-modal  person re-identification"
            },
            {
              "name": "Generalizable person re-identification",
              "description": "Generalizable person re-identification refers to methods trained on a source dataset but directly evaluated on a target dataset without domain adaptation or transfer learning. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Person identification"
        },
        {
          "name": "Human interaction recognition",
          "children": [
            {
              "name": "Skeleton based action recognition"
            },
            {
              "name": "One-shot 3D action recognition"
            }
          ]
        },
        {
          "name": "Action detection",
          "children": [
            {
              "name": "Skeleton based action recognition"
            },
            {
              "name": "Audio-visual active speaker detection",
              "description": "Determine if and when each visible person in the video is speaking. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Activity recognition",
          "description": "Human **Activity Recognition** is the problem of identifying events performed by humans given a video input. It is formulated as a binary (or multiclass) classification problem of outputting activity class labels. Activity Recognition is an important problem with many societal applications including smart surveillance, video search/retrieval, intelligent robots, and other monitoring systems. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Action recognition in videos",
              "children": [
                {
                  "name": "Self-supervised action recognition"
                }
              ]
            },
            {
              "name": "Action recognition",
              "description": "Please note some benchmarks may be located in the [Action Classification](https://paperswithcode.com/task/action-classification) or [Video Classification](https://paperswithcode.com/task/video-classification) tasks, e.g. Kinetics-400. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D action recognition",
                  "description": "Image: [Rahmani et al](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Rahmani_3D_Action_Recognition_CVPR_2016_paper.pdf) (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Skeleton based action recognition"
                    },
                    {
                      "name": "Zero shot skeletal action recognition",
                      "description": "Zero-Shot Learning for 3D skeletal action recognition (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Generalized zero shot skeletal action recognition",
                          "description": "Generalized Zero Shot Learning for 3d Skeletal Action Recognition (Source: paperswithcode.com)"
                        }
                      ]
                    }
                  ]
                },
                {
                  "name": "Self-supervised action recognition"
                },
                {
                  "name": "Action triplet recognition",
                  "description": "Recognising action as a triplet of subject verb and object. Example HOI = Human Object Interaction, Surgical IVT = Instrument Verb Target, etc. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Multimodal activity recognition"
            },
            {
              "name": "Egocentric activity recognition"
            },
            {
              "name": "Group activity recognition",
              "description": "**Group Activity Recognition** is a subset of human activity recognition problem which focuses on the collective behavior of a group of people, resulted from the individual actions of the persons and their interactions. Collective activity recognition is a basic task for automatic human behavior analysis in many areas like surveillance or sports videos. (Source: paperswithcode.com)"
            },
            {
              "name": "Recognizing and localizing human actions"
            },
            {
              "name": "Human action generation",
              "description": "Yan et al. (2019) CSGN:\n\n\"When the dancer is stepping, jumping and spinning on the\nstage, attentions of all audiences are attracted by the streamof the fluent and graceful movements. Building a  model that is capable of dancing is as fascinating a task as appreciating the performance itself. In this paper, we aim to generate long-duration human actions represented as skeleton sequences, e.g. those that cover the entirety of a dance, with hundreds of moves and countless possible combinations.\" (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Action localization",
          "children": [
            {
              "name": "Temporal action localization",
              "description": "Temporal Action Localization aims to detect activities in the video stream and  output beginning and end timestamps. It is closely related to  Temporal Action Proposal Generation. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D human action recognition",
                  "children": [
                    {
                      "name": "Skeleton based action recognition"
                    }
                  ]
                },
                {
                  "name": "Action recognition in videos",
                  "children": [
                    {
                      "name": "Self-supervised action recognition"
                    }
                  ]
                },
                {
                  "name": "Action recognition",
                  "description": "Please note some benchmarks may be located in the [Action Classification](https://paperswithcode.com/task/action-classification) or [Video Classification](https://paperswithcode.com/task/video-classification) tasks, e.g. Kinetics-400. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D action recognition",
                      "description": "Image: [Rahmani et al](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Rahmani_3D_Action_Recognition_CVPR_2016_paper.pdf) (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Skeleton based action recognition"
                        },
                        {
                          "name": "Zero shot skeletal action recognition",
                          "description": "Zero-Shot Learning for 3D skeletal action recognition (Source: paperswithcode.com)",
                          "children": [
                            {
                              "name": "Generalized zero shot skeletal action recognition",
                              "description": "Generalized Zero Shot Learning for 3d Skeletal Action Recognition (Source: paperswithcode.com)"
                            }
                          ]
                        }
                      ]
                    },
                    {
                      "name": "Self-supervised action recognition"
                    },
                    {
                      "name": "Action triplet recognition",
                      "description": "Recognising action as a triplet of subject verb and object. Example HOI = Human Object Interaction, Surgical IVT = Instrument Verb Target, etc. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Activity recognition in videos",
                  "children": [
                    {
                      "name": "Activity prediction",
                      "description": "Predict human activities in videos (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Weakly-supervised temporal action localization",
                  "description": "Temporal Action Localization with weak supervision where only video-level labels are given for training (Source: paperswithcode.com)"
                },
                {
                  "name": "Temporal action proposal generation"
                },
                {
                  "name": "Weakly supervised action localization",
                  "description": "In this task, the training data consists of videos with a list of activities in them without any temporal boundary annotations. However, while testing, given a video, the algorithm should recognize the activities in the video and also provide the start and end time. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Action segmentation",
              "description": "**Action Segmentation** is a challenging problem in high-level video understanding. In its simplest form, Action Segmentation aims to segment a temporally untrimmed video by time and label each segmented part with one of pre-defined action labels. The results of Action Segmentation can be further used as input to various applications, such as video-to-text and action localization. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Image classification",
          "description": "**Image Classification** is a fundamental task that attempts to comprehend an entire image as a whole. The goal is to classify the image by assigning it to a specific label. Typically, Image Classification refers to images in which only one object appears and is analyzed. In contrast, object detection involves both classification and localization tasks, and is used to analyze more realistic cases in which multiple objects may exist in an image. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Few-shot image classification",
              "description": "Few-shot image classification is the task of doing image classification with only a few examples for each category (typically < 6 examples). (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Generalized few-shot classification",
                  "children": [
                    {
                      "name": "Long-tail learning",
                      "children": [
                        {
                          "name": "Long-tail learning with class descriptors",
                          "description": "Long-tail learning by using class descriptors (like attributes, class embedding, etc) to learn tail classes as well as head classes. (Source: paperswithcode.com)"
                        }
                      ]
                    }
                  ]
                }
              ]
            },
            {
              "name": "Document image classification",
              "description": "Document image classification is the task of classifying documents based on images of their contents. (Source: paperswithcode.com)"
            },
            {
              "name": "Hyperspectral image classification",
              "description": "Hyperspectral image classification is the task of classifying a class label to every pixel in an image that was captured using (hyper)spectral sensors. (Source: paperswithcode.com)"
            },
            {
              "name": "Semi-supervised image classification",
              "description": "Semi-supervised image classification leverages unlabelled data as well as labelled data to increase classification performance.\n\nYou may want to read some blog posts to get an overview before reading the papers and checking the leaderboards:\n\n- [An overview of proxy-label approaches for semi-supervised learning](https://ruder.io/semi-supervised/) - Sebastian Ruder\n- [Semi-Supervised Learning in Computer Vision](https://amitness.com/2020/07/semi-supervised-learning/) - Amit Chaudhary (Source: paperswithcode.com)"
            },
            {
              "name": "Fine-grained image classification",
              "description": "The Fine-Grained Image Classification task focuses on differentiating between hard-to-distinguish object classes, such as species of birds, flowers, or animals; and identifying the makes or models of vehicles. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Displaced people recognition",
                  "description": "Recognise displaced people from images. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Sequential image classification",
              "description": "Sequential image classification is the task of classifying a sequence of images. (Source: paperswithcode.com)"
            },
            {
              "name": "Self-supervised image classification"
            },
            {
              "name": "Unsupervised image classification",
              "description": "Models that learn to label each image (i.e. cluster the dataset into its ground truth classes) without seeing the ground truth labels. (Source: paperswithcode.com)"
            },
            {
              "name": "Sparse representation-based classification"
            },
            {
              "name": "Classification consistency",
              "description": "How often two shifts of the same image are classified the same (Source: paperswithcode.com)"
            },
            {
              "name": "Satellite image classification"
            },
            {
              "name": "Small data image classification",
              "description": "Supervised image classification with tens to hundreds of labeled training examples. (Source: paperswithcode.com)"
            },
            {
              "name": "Learning with noisy labels",
              "description": "Learning with noisy labels (Source: paperswithcode.com)"
            },
            {
              "name": "Genre classification",
              "description": "Classification of the genre of an artwork (Source: paperswithcode.com)"
            },
            {
              "name": "Superpixel image classification"
            },
            {
              "name": "Photo geolocation estimation"
            },
            {
              "name": "Artistic style classification",
              "description": "Classify the artistic style of an artwork image (Source: paperswithcode.com)"
            },
            {
              "name": "Scale generalisation",
              "description": "Scale generalisation implies that learning is performed at some scale(s) and testing at other scales. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Material property prediction",
          "children": [
            {
              "name": "Formation energy prediction"
            }
          ]
        },
        {
          "name": "Image question answering",
          "description": "Image question answering aims to answer questions based on an image.\n\n(Source: Adapted from paperswithcode.com)"
        },
        {
          "name": "Video question answering"
        },
        {
          "name": "Scene parsing",
          "children": [
            {
              "name": "Scene understanding",
              "children": [
                {
                  "name": "Outdoor light source estimation"
                },
                {
                  "name": "3D room layouts from a single rgb panorama",
                  "description": "Image: [Zou et al](https://arxiv.org/pdf/1803.08999v1.pdf) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Scene text recognition",
              "description": "See [Scene Text Detection](https://paperswithcode.com/task/scene-text-detection) for leaderboards in this task. (Source: paperswithcode.com)"
            },
            {
              "name": "Scene recognition"
            },
            {
              "name": "Scene graph generation",
              "description": "A scene graph is a structured representation of an image, where nodes in a scene graph correspond to object bounding boxes with their object categories, and edges correspond to their pairwise relationships between objects. The task of **Scene Graph Generation** is to generate a visually-grounded scene graph that most accurately correlates with an image. (Source: paperswithcode.com)"
            },
            {
              "name": "Indoor scene reconstruction",
              "children": [
                {
                  "name": "Plan2scene",
                  "description": "Converting floorplans + RGB photos to textured 3D mesh models of houses. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Face parsing",
              "description": "Classify pixels of a face image into different classes based on a given bounding box. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Text-to-image",
          "children": [
            {
              "name": "Story visualization"
            }
          ]
        },
        {
          "name": "Crowds",
          "children": [
            {
              "name": "Crowd counting",
              "description": "**Crowd Counting** is a task to count people in image. It is mainly used in real-life for automated public monitoring such as surveillance and traffic control. Different from object detection, Crowd Counting aims at recognizing arbitrarily sized targets in various situations including sparse and cluttering scenes at the same time. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Visual reasoning",
          "description": "Ability to understand  actions and reasoning  associated with any  visual images (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Visual commonsense reasoning"
            }
          ]
        },
        {
          "name": "Medical diagnosis",
          "description": "**Medical Diagnosis** is the process of identifying the disease a patient is affected by, based on the assessment of specific risk factors, signs, symptoms and results of exams. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Retinal OCT disease classification",
              "description": "Classifying different Retinal degeneration from Optical Coherence Tomography Images (OCT). (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Saliency detection",
          "description": "**Saliency Detection** is a preprocessing step in computer vision which aims at finding salient objects in an image. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Video saliency detection"
            },
            {
              "name": "Co-salient object detection",
              "description": "**Co-Salient Object Detection** is a computational problem that aims at highlighting the common and salient foreground regions (or objects) in an image group. Please also refer to the online benchmark: http://dpfan.net/cosod3k/ (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Edge detection",
          "description": "**Edge Detection** is a fundamental image processing technique which involves computing an image gradient to quantify the magnitude and direction of edges in an image. Image gradients are used in various downstream tasks in computer vision such as line detection, feature detection, and image classification. (Source: paperswithcode.com)"
        },
        {
          "name": "Boundary detection",
          "description": "**Boundary Detection** is a vital part of extracting information encoded in images, allowing for the computation of quantities of interest including density, velocity, pressure, etc. (Source: paperswithcode.com)"
        },
        {
          "name": "Semantic segmentation",
          "description": "Semantic segmentation, or image segmentation, is the task of clustering parts of an image together which belong to the same object class. It is a form of pixel-level prediction because each pixel in an image is classified according to a category. Some example benchmarks for this task are Cityscapes, PASCAL VOC and ADE20K. Models are usually evaluated with the Mean Intersection-Over-Union (Mean IoU) and Pixel Accuracy metrics. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Real-time semantic segmentation",
              "description": "Real-time semantic segmentation is the task of achieving computationally efficient semantic segmentation (while maintaining a base level of accuracy). (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Real-time 3D semantic segmentation"
                }
              ]
            },
            {
              "name": "3D semantic segmentation",
              "children": [
                {
                  "name": "Real-time 3D semantic segmentation"
                }
              ]
            },
            {
              "name": "Weakly-supervised semantic segmentation",
              "description": "The semantic segmentation task is to assign a label from a label set to each pixel in an image. In the case of fully supervised setting, the dataset  consists of images and their corresponding\npixel-level class-specific annotations (expensive pixel-level annotations). However, in the\nweakly-supervised setting, the dataset consists of images and corresponding annotations that\nare relatively easy to obtain, such as tags/labels of objects present in the image. (Source: paperswithcode.com)"
            },
            {
              "name": "Scene segmentation",
              "description": "Scene segmentation is the task of splitting a scene into its various object components.\n\nImage adapted from [Temporally coherent 4D reconstruction of complex dynamic scenes](https://paperswithcode.com/paper/temporally-coherent-4d-reconstruction-of2). (Source: paperswithcode.com)"
            },
            {
              "name": "Panoptic segmentation",
              "description": "Panoptic segmentation unifies the typically distinct tasks of semantic segmentation (assign a class label to each pixel) and instance segmentation (detect and segment each object instance). (Source: paperswithcode.com)"
            },
            {
              "name": "3D part segmentation",
              "description": "Segmenting 3D object parts (Source: paperswithcode.com)"
            },
            {
              "name": "Semi-supervised semantic segmentation"
            },
            {
              "name": "Unsupervised semantic segmentation",
              "description": "Models that learn to segment each image (i.e. cluster the pixels into their ground truth classes) without seeing the ground truth labels. (Source: paperswithcode.com)"
            },
            {
              "name": "One-shot segmentation"
            },
            {
              "name": "Few-shot semantic segmentation"
            },
            {
              "name": "4D spatio temporal semantic segmentation",
              "description": "Image: [Choy et al](https://paperswithcode.com/paper/4d-spatio-temporal-convnets-minkowski) (Source: paperswithcode.com)"
            },
            {
              "name": "Tumor segmentation"
            },
            {
              "name": "Polyp segmentation",
              "description": "The goal of the project is to develop a computer-aided detection and diagnosis system for automatic polyp segmentation and detection. (Source: paperswithcode.com)"
            },
            {
              "name": "LIDAR semantic segmentation"
            }
          ]
        },
        {
          "name": "Object localization",
          "description": "**Object Localization** is the task of locating an instance of a particular object category in an image, typically by specifying a tightly cropped bounding box centered on the instance. An object proposal specifies a candidate bounding box, and an object proposal is said to be a correct localization if it sufficiently overlaps a human-labeled \u201cground-truth\u201d bounding box for the given object. In the literature, the \u201cObject Localization\u201d task is to locate one instance of an object category, whereas \u201cobject detection\u201d focuses on locating all instances of a category in a given image. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Weakly-supervised object localization"
            },
            {
              "name": "Image-based localization"
            },
            {
              "name": "Multi-object colocalization"
            }
          ]
        },
        {
          "name": "Emotion recognition",
          "description": "**Emotion Recognition** is an important area of research to enable effective human-computer interaction. Human emotions can be detected using speech signal, facial expressions, body language, and electroencephalography (EEG). (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Multimodal emotion recognition"
            },
            {
              "name": "Emotion recognition in conversation"
            },
            {
              "name": "Speech emotion recognition"
            },
            {
              "name": "Emotion recognition in context"
            },
            {
              "name": "Emotion-cause pair extraction"
            },
            {
              "name": "Emotion cause extraction"
            },
            {
              "name": "Recognizing emotion cause in conversations",
              "description": "Given an utterance U, labeled with emotion E, the task is to extract the causal spans S from the conversational history H (including utterance U) that sufficiently represent the causes of emotion E. (Source: paperswithcode.com)"
            },
            {
              "name": "Causal emotion entailment",
              "description": "The Causal Emotion Entailment is a simpler version of the span extraction task. In this task, given a\ntarget utterance (U) with emotion E, the goal is to predict which particular utterances in the conversation\nhistory H(U) are responsible for the\nemotion E in the target utterance. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Visual localization",
          "description": "**Visual Localization** is the problem of estimating the camera pose of a given image relative to a visual representation of a known scene. (Source: paperswithcode.com)"
        },
        {
          "name": "Hand-related vision process",
          "children": [
            {
              "name": "Hand gesture recognition",
              "children": [
                {
                  "name": "Skeleton based action recognition"
                }
              ]
            },
            {
              "name": "Hand pose estimation",
              "description": "Hand pose estimation is the task of finding the joints of the hand from an image or set of video frames. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D hand pose estimation",
                  "description": "Image: [Zimmerman et l](https://arxiv.xsrg/pdf/1705.01389v3.pdf) (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D canonical hand pose estimation",
                      "description": "Image: [Lin et al](https://arxiv.org/pdf/2006.01320v1.pdf) (Source: paperswithcode.com)"
                    }
                  ]
                }
              ]
            },
            {
              "name": "Hand-gesture recognition"
            },
            {
              "name": "Gesture-to-gesture translation"
            }
          ]
        },
        {
          "name": "Gesture recognition",
          "description": "**Gesture Recognition** is an active field of research with applications such as automatic recognition of sign language, interaction of humans and robots or for new ways of controlling video games. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Hand gesture recognition",
              "children": [
                {
                  "name": "Skeleton based action recognition"
                }
              ]
            },
            {
              "name": "Hand-gesture recognition"
            }
          ]
        },
        {
          "name": "Object classification"
        },
        {
          "name": "Scene classification",
          "description": "**Scene Classification** is a task in which scenes from photographs are categorically classified. Unlike object classification, which focuses on classifying prominent objects in the foreground, Scene Classification uses the layout of objects within the scene, in addition to the ambient context, for classification. (Source: paperswithcode.com)"
        },
        {
          "name": "Horizon line estimation"
        },
        {
          "name": "Camera localization"
        },
        {
          "name": "Remote sensing",
          "children": [
            {
              "name": "Change detection for remote sensing images"
            },
            {
              "name": "Building change detection for remote sensing images"
            }
          ]
        },
        {
          "name": "Shadow detection"
        },
        {
          "name": "Scene text detection",
          "description": "**Scene Text Detection** is a task to detect text regions in the complex background and label them with bounding boxes. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Curved text detection"
            },
            {
              "name": "Multi-oriented scene text detection"
            }
          ]
        },
        {
          "name": "Continuous control",
          "children": [
            {
              "name": "Steering control"
            }
          ]
        },
        {
          "name": "Point cloud registration",
          "description": "**Point Cloud Registration** is a fundamental problem in 3D computer vision and photogrammetry. Given several sets of points in different coordinate systems, the aim of registration is to find the transformation that best aligns all of them into a common coordinate system. Point Cloud Registration plays a significant role in many vision applications such as 3D model reconstruction, cultural heritage management, landslide monitoring and solar energy analysis. (Source: paperswithcode.com)"
        },
        {
          "name": "Image registration"
        },
        {
          "name": "Video captioning",
          "description": "**Video Captioning** is a task of automatic captioning a video by understanding the action and event in the video which can help in the retrieval of the video efficiently through text. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Dense video captioning"
            }
          ]
        },
        {
          "name": "Robot navigation",
          "description": "The fundamental objective of mobile **Robot Navigation** is to arrive at a goal position without collision. The mobile robot is supposed to be aware of obstacles and move freely in different working scenarios. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Pointgoal navigation"
            }
          ]
        },
        {
          "name": "Point cloud generation",
          "children": [
            {
              "name": "Point cloud completion"
            }
          ]
        },
        {
          "name": "Human-object interaction detection"
        },
        {
          "name": "Personality trait recognition"
        },
        {
          "name": "Image restoration",
          "description": "**Image Restoration** is a family of inverse problems for obtaining a high quality image from a corrupted input image. Corruption may occur due to the image-capture process (e.g., noise, lens blur), post-processing (e.g., JPEG compression), or photography in non-ideal conditions (e.g., haze, motion blur). (Source: paperswithcode.com)",
          "children": [
            {
              "name": "JPEG artifact correction",
              "description": "Correction of visual artifacts caused by JPEG compression, these artifacts are usually grouped into three types: blocking, blurring, and ringing. They are caused by quantization and removal of high frequency DCT coefficients. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Image reconstruction",
          "children": [
            {
              "name": "MRI reconstruction",
              "description": "In its most basic form, MRI reconstruction consists in retrieving a complex-valued image from its under-sampled Fourier coefficients. \nBesides, it can be addressed as a encoder-decoder task, in which the normative model in the latent space will only capture the relevant information without noise or corruptions. Then, we decode the latent space in order to have a reconstructed MRI. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Image/document clustering"
        },
        {
          "name": "Image clustering",
          "description": "Models that learn to label each image (i.e. cluster the dataset into its ground truth classes) without seeing the ground truth labels. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Multi-view subspace clustering"
            }
          ]
        },
        {
          "name": "Visual place recognition",
          "description": "**Visual Place Recognition** is the task of matching a view of a place with a different view of the same place taken at a different time. (Source: paperswithcode.com)"
        },
        {
          "name": "Image stitching",
          "description": "**Image Stitching** is a process of composing multiple images with narrow but overlapping fields of view to create a larger image with a wider field of view. (Source: paperswithcode.com)"
        },
        {
          "name": "Denoising",
          "description": "Denoising is the task of removing noise from an image. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Image denoising",
              "description": "Image Denoising is the task of removing noise from an image, e.g. the application of Gaussian noise to an image. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Salt-and-pepper noise removal",
                  "description": "Salt-and-pepper noise is a form of noise sometimes seen on images. It is also known as impulse noise. This noise can be caused by sharp and sudden disturbances in the image signal. It presents itself as sparsely occurring white and black pixels. (Source: paperswithcode.com)"
                },
                {
                  "name": "Intensity image denoising"
                },
                {
                  "name": "Color image denoising"
                },
                {
                  "name": "Grayscale image denoising"
                },
                {
                  "name": "Medical image denoising"
                }
              ]
            }
          ]
        },
        {
          "name": "Image compression",
          "description": "**Image Compression** is an application of data compression for digital images to lower their storage and/or transmission requirements. (Source: paperswithcode.com)"
        },
        {
          "name": "Optical flow estimation",
          "description": "**Optical Flow Estimation** is the problem of finding pixel-wise motions between consecutive images. (Source: paperswithcode.com)"
        },
        {
          "name": "Object counting",
          "description": "The goal of **Object Counting** task is to count the number of object instances in a single image or video sequence. It has many real-world applications such as traffic flow monitoring, crowdedness estimation, and product counting. (Source: paperswithcode.com)"
        },
        {
          "name": "Object reconstruction",
          "children": [
            {
              "name": "3D object reconstruction",
              "description": "Image: [Choy et al](https://arxiv.org/pdf/1604.00449v1.pdf) (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D object reconstruction from a single image",
                  "description": "Image: [Fan et al](https://arxiv.org/pdf/1612.00603v2.pdf) (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "Gait recognition",
          "children": [
            {
              "name": "Multiview gait recognition"
            }
          ]
        },
        {
          "name": "Motion segmentation",
          "description": "**Motion Segmentation** is an essential task in many applications in Computer Vision and Robotics, such as surveillance, action recognition and scene understanding. The classic way to state the problem is the following: given a set of feature points that are tracked through a sequence of images, the goal is to cluster those trajectories according to the different motions they belong to. It is assumed that the scene contains multiple objects that are moving rigidly and independently in 3D-space. (Source: paperswithcode.com)"
        },
        {
          "name": "Hyperspectral",
          "children": [
            {
              "name": "Hyperspectral image classification",
              "description": "Hyperspectral image classification is the task of classifying a class label to every pixel in an image that was captured using (hyper)spectral sensors. (Source: paperswithcode.com)"
            },
            {
              "name": "Classification of hyperspectral images"
            }
          ]
        },
        {
          "name": "Saliency prediction",
          "description": "A saliency map is a model that predicts eye fixations on a visual scene. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Few-shot transfer learning for saliency prediction",
              "description": "Saliency prediction aims to predict important locations in a visual scene. It is a per-pixel regression task with predicted values ranging from 0 to 1.\n\nBenefiting from deep learning research and large-scale datasets, saliency prediction has achieved significant success in the past decade. However, it still remains challenging to predict saliency maps on images in new domains that lack sufficient data for data-hungry models. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Gaze estimation",
          "description": "**Gaze Estimation** is a task to predict where a person is looking at given the person\u2019s full face. The task contains two directions: 3-D gaze vector and 2-D gaze position estimation. 3-D gaze vector estimation is to predict the gaze vector, which is usually used in the automotive safety. 2-D gaze position estimation is to predict the horizontal and vertical coordinates on a 2-D screen, which allows utilizing gaze point to control a cursor for human-machine interaction. (Source: paperswithcode.com)"
        },
        {
          "name": "Disparity estimation"
        },
        {
          "name": "Human part segmentation"
        },
        {
          "name": "Image retrieval",
          "description": "Image retrieval systems aim to find similar images to a query image among an image dataset. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Image retrieval with multi-modal query",
              "description": "The problem of retrieving images from a database based on a multi-modal (image- text) query. Specifically, the query text prompts some modification in the query image and the task is to retrieve images with the desired modifications. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Cross-modal retrieval",
                  "description": "**Cross-Modal Retrieval** is used for implementing a retrieval task across different modalities. such as image-text, video-text, and audio-text Cross-Modal Retrieval. The main challenge of Cross-Modal Retrieval is the modality gap and the key solution of Cross-Modal Retrieval is to generate new representations from different modalities in the shared subspace, such that new generated features can be applied in the computation of distance metrics, such as cosine distance and Euclidean distance. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Content-based image retrieval",
              "description": "**Content-Based Image Retrieval** is a well studied problem in computer vision, with retrieval problems generally divided into two groups: category-level retrieval and instance-level retrieval. Given a query image of the Sydney Harbour bridge, for instance, category-level retrieval aims to find any bridge in a given dataset of images, whilst instance-level retrieval must find the Sydney Harbour bridge to be considered a match. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Drone navigation",
                  "description": "(Satellite -> Drone) Given one satellite-view image, the drone intends to find the most relevant place (drone-view images) that it has passed by. According to its flight history, the drone could be navigated back to the target place. (Source: paperswithcode.com)"
                },
                {
                  "name": "Drone-view target localization",
                  "description": "(Drone -> Satellite) Given one drone-view image or video, the task aims to find the most similar satellite-view image to localize the target building in the satellite view. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Sketch-based image retrieval"
            },
            {
              "name": "Text-image retrieval",
              "description": "It include two tasks: (1) Image as Query and Text as Targets; (2) Text as Query and Image as Targets. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Multi-view learning",
          "description": "**Multi-View Learning** is a machine learning framework where data are represented by multiple distinct feature groups, and each feature group is referred to as a particular view. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Incomplete multi-view clustering"
            }
          ]
        },
        {
          "name": "Object detection",
          "description": "Object detection is the task of detecting instances of objects of a certain class within an image. The state-of-the-art methods can be categorized into two main types: one-stage methods and two stage-methods. One-stage methods prioritize inference speed, and example models include YOLO, SSD and RetinaNet. Two-stage methods prioritize detection accuracy, and example models include Faster R-CNN, Mask R-CNN and Cascade R-CNN.\n\nThe most popular benchmark is the MSCOCO dataset. Models are typically evaluated according to a Mean Average Precision metric. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "2D object detection",
              "children": [
                {
                  "name": "Image captioning",
                  "description": "Image captioning is the automatic generation of natural language descriptions of the content of an input image.",
                  "has_input": [
                    "Image"
                  ],
                  "has_output": [
                    "Text"
                  ],
                  "hasExactSynonym": "['Automatic image annotation']",
                  "children": [
                    {
                      "name": "Phrase grounding",
                      "description": "Assigning each entity mentioned in a given caption of an image to a corresponding location in the respective image.",
                      "has_input": [
                        "Image"
                      ]
                    },
                    {
                      "name": "Relational captioning"
                    }
                  ]
                }
              ]
            },
            {
              "name": "3D object detection",
              "description": "2D object detection classifies the object category and estimates oriented 2D bounding boxes of physical objects from 3D sensor data. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Monocular 3D object detection"
                }
              ]
            },
            {
              "name": "Salient object detection",
              "children": [
                {
                  "name": "Video salient object detection",
                  "description": "Video salient object detection (VSOD) is significantly essential for understanding the underlying mechanism behind HVS during free-viewing in general and instrumental to a wide range of real-world applications, e.g., video segmentation, video captioning, video compression, autonomous driving, robotic interaction, weakly supervised attention. Besides its academic value and practical significance, VSOD presents great difficulties due to the challenges carried by video data (diverse motion patterns, occlusions, blur, large object deformations, etc.) and the inherent complexity of human visual attention behavior (i.e., selective attention allocation, attention shift) during dynamic scenes. Online benchmark: http://dpfan.net/davsod. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "RGB salient object detection",
              "description": "RGB Salient object detection is a task-based on a visual attention mechanism, in which algorithms aim to explore objects or regions more attentive than the surrounding areas on the scene or RGB images. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Video salient object detection",
                  "description": "Video salient object detection (VSOD) is significantly essential for understanding the underlying mechanism behind HVS during free-viewing in general and instrumental to a wide range of real-world applications, e.g., video segmentation, video captioning, video compression, autonomous driving, robotic interaction, weakly supervised attention. Besides its academic value and practical significance, VSOD presents great difficulties due to the challenges carried by video data (diverse motion patterns, occlusions, blur, large object deformations, etc.) and the inherent complexity of human visual attention behavior (i.e., selective attention allocation, attention shift) during dynamic scenes. Online benchmark: http://dpfan.net/davsod. (Source: paperswithcode.com)"
                },
                {
                  "name": "Co-salient object detection",
                  "description": "**Co-Salient Object Detection** is a computational problem that aims at highlighting the common and salient foreground regions (or objects) in an image group. Please also refer to the online benchmark: http://dpfan.net/cosod3k/ (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Real-time object detection",
              "description": "Real-time object detection is the task of doing object detection in real-time with fast inference while maintaining a base level of accuracy. (Source: paperswithcode.com)"
            },
            {
              "name": "Weakly supervised object detection",
              "description": "Weakly Supervised Object Detection (WSOD) is the task of training object detectors with only image tag supervisions. (Source: paperswithcode.com)"
            },
            {
              "name": "Video object detection",
              "description": "Video object detection is the task of detecting objects from a video as opposed to images. (Source: paperswithcode.com)"
            },
            {
              "name": "Object proposal generation",
              "description": "Object proposal generation is a preprocessing technique that has been widely used in current object detection pipelines to guide the search of objects and avoid exhaustive sliding window search across images. (Source: paperswithcode.com)"
            },
            {
              "name": "Object detection in aerial images",
              "description": "Object Detection in Aerial Images is the task of detecting objects from aerial images. (Source: paperswithcode.com)"
            },
            {
              "name": "Few-shot object detection",
              "description": "Target: To detect objects of novel categories with just a few training samples. (Source: paperswithcode.com)"
            },
            {
              "name": "One-shot object detection"
            },
            {
              "name": "Robust object detection",
              "description": "A Benchmark for the: \nRobustness of Object Detection Models to Image Corruptions and Distortions\n\nTo allow fair comparison of robustness enhancing methods all models have to use a standard ResNet50 backbone because performance strongly scales with backbone capacity. If requested an unrestricted category can be added later.\n\nBenchmark Homepage: https://github.com/bethgelab/robust-detection-benchmark\n\n\nMetrics:\n\nmPC [AP]: Mean Performance under Corruption [measured in AP]\n\nrPC [%]: Relative Performance under Corruption [measured in %]\n\nTest sets:\nCoco: val 2017; Pascal VOC: test 2007; Cityscapes: val; (Source: paperswithcode.com)"
            },
            {
              "name": "Small object detection",
              "description": "Small object detection is the task of detecting small objects. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Rice grain disease detection"
                }
              ]
            },
            {
              "name": "Medical object detection",
              "description": "Medical object detection is the task of identifying medical-based objects within an image. (Source: paperswithcode.com)"
            },
            {
              "name": "Dense object detection"
            },
            {
              "name": "Zero-shot object detection",
              "description": "Zero-shot object detection (ZSD) is the task of object detection where no visual training data is available for some of the target object classes. (Source: paperswithcode.com)"
            },
            {
              "name": "Head detection"
            },
            {
              "name": "Object skeleton detection",
              "description": "Object skeleton detection is the task of detecting the skeleton of an object in an image. (Source: paperswithcode.com)"
            },
            {
              "name": "RGB-D salient object detection",
              "description": "RGB-D Salient object detection (SOD) aims at distinguishing the most visually distinctive objects or regions in a scene from the given RGB and Depth data. It has a wide range of applications, including video/image segmentation, object recognition, visual tracking, foreground maps evaluation, image retrieval, content-aware image editing, information discovery, photosynthesis, and weakly\nsupervised semantic segmentation. Here, depth information plays an important complementary role in finding salient objects. Online benchmark: http://dpfan.net/d3netbenchmark. (Source: paperswithcode.com)"
            },
            {
              "name": "Camouflaged object segmentation",
              "description": "Camouflaged object segmentation (COS) or Camouflaged object detection (COD), which was originally promoted by [T.-N. Le et al.](https://www.sciencedirect.com/science/article/abs/pii/S1077314219300608) (2017), aims to identify objects that conceal their texture into the surrounding environment. The high intrinsic similarities between the target object and the background make COS/COD far more challenging than the traditional object segmentation task. Also, refer to the online benchmarks on [CAMO dataset](https://sites.google.com/view/ltnghia/research/camo), [COD dataset](http://dpfan.net/Camouflage/), and [online demo](http://mc.nankai.edu.cn/cod). (Source: paperswithcode.com)"
            },
            {
              "name": "Surgical tool detection",
              "description": "Presence detection of various classes of surgical instruments in endoscopy videos. (Source: paperswithcode.com)"
            },
            {
              "name": "Semantic part detection"
            },
            {
              "name": "Open world object detection",
              "description": "Open World Object Detection is a computer vision problem where a model is tasked to: 1) identify objects that have not been introduced to it as `unknown', without explicit supervision to do so, and 2) incrementally learn these identified unknown categories without forgetting previously learned classes, when the corresponding labels are progressively received. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Out-of-distribution detection",
          "description": "Detect out-of-distribution or anomalous examples. (Source: paperswithcode.com)"
        },
        {
          "name": "Image matching",
          "children": [
            {
              "name": "Semantic correspondence",
              "description": "The task of semantic correspondence aims to establish reliable visual correspondence between different instances of the same object category. (Source: paperswithcode.com)"
            },
            {
              "name": "Patch matching",
              "children": [
                {
                  "name": "Multimodal patch matching",
                  "description": "Multimodal patch matching focuses on matching patches originating from different sources, such as visible RGB and near-infrared. (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "Spoof detection",
          "children": [
            {
              "name": "Face presentation attack detection"
            }
          ]
        },
        {
          "name": "Image recognition",
          "children": [
            {
              "name": "License plate recognition"
            }
          ]
        },
        {
          "name": "Video semantic segmentation",
          "children": [
            {
              "name": "Camera shot segmentation",
              "description": "Camera shot temporal segmentation consists in classifying each video frame according to the type of camera used to record said frame. This task is introduced with the SoccerNet-v2 dataset, where 13 camera classes are considered (main camera, behind the goal, corner camera, etc.). (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Intelligent surveillance",
          "children": [
            {
              "name": "Vehicle re-identification",
              "description": "Vehicle re-identification is the task of identifying the same vehicle across multiple cameras. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Video inpainting",
          "description": "The goal of **Video Inpainting** is to fill in missing regions of a given video sequence with contents that are both spatially and temporally coherent. Video Inpainting, also known as video completion, has many real-world applications such as undesired object removal and video restoration. (Source: paperswithcode.com)"
        },
        {
          "name": "Defocus estimation"
        },
        {
          "name": "Point cloud super resolution",
          "description": "Point cloud super-resolution is a fundamental problem\nfor 3D reconstruction and 3D data understanding. It takes\na low-resolution (LR) point cloud as input and generates\na high-resolution (HR) point cloud with rich details (Source: paperswithcode.com)"
        },
        {
          "name": "Face anonymization"
        },
        {
          "name": "Depth completion",
          "description": "The **Depth Completion** task is a sub-problem of depth estimation. In the sparse-to-dense depth completion problem, one wants to infer the dense depth map of a 3-D scene given an RGB image and its corresponding sparse reconstruction in the form of a sparse depth map obtained either from computational methods such as SfM (Strcuture-from-Motion) or active sensors such as lidar or structured light sensors. (Source: paperswithcode.com)"
        },
        {
          "name": "Interest point detection",
          "children": [
            {
              "name": "Homography estimation"
            }
          ]
        },
        {
          "name": "Deepfake detection",
          "description": "DeepFakes involves videos, often obscene, in which a face can be swapped with someone else\u2019s using neural networks. DeepFakes are a general public concern, thus it's important to develop methods to detect them. \n\nDescription source: [DeepFakes: a New Threat to Face Recognition? Assessment and Detection](https://arxiv.org/pdf/1812.08685.pdf)\n\nImage source: [DeepFakes: a New Threat to Face Recognition? Assessment and Detection](https://paperswithcode.com/paper/deepfakes-a-new-threat-to-face-recognition) (Source: paperswithcode.com)"
        },
        {
          "name": "Weakly-supervised instance segmentation"
        },
        {
          "name": "Scene-aware dialogue"
        },
        {
          "name": "Vehicle key-point and orientation estimation"
        },
        {
          "name": "Instance segmentation",
          "description": "Instance segmentation is the task of detecting and delineating each distinct object of interest appearing in an image.\n\nImage Credit: [Deep Occlusion-Aware Instance Segmentation with Overlapping BiLayers, CVPR'21](https://openaccess.thecvf.com/content/CVPR2021/papers/Ke_Deep_Occlusion-Aware_Instance_Segmentation_With_Overlapping_BiLayers_CVPR_2021_paper.pdf) (Source: paperswithcode.com)",
          "children": [
            {
              "name": "3D instance segmentation",
              "description": "Image: [OccuSeg](https://arxiv.org/pdf/2003.06537v3.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "Real-time instance segmentation",
              "description": "Similar to its parent task, instance segmentation, but with the goal of achieving real-time capabilities under a defined setting.\n\nImage Credit: [SipMask: Spatial Information Preservation for Fast Image and Video Instance Segmentation](https://arxiv.org/pdf/2007.14772v1.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "3D semantic instance segmentation",
              "description": "Image: [3D-SIS](https://github.com/Sekunde/3D-SIS) (Source: paperswithcode.com)"
            },
            {
              "name": "Human instance segmentation",
              "description": "Instance segmentation is the task of detecting and delineating each distinct object of interest appearing in an image.\n\nImage Credit: [Deep Occlusion-Aware Instance Segmentation with Overlapping BiLayers](https://arxiv.org/abs/2103.12340) (Source: paperswithcode.com)"
            },
            {
              "name": "One-shot instance segmentation"
            },
            {
              "name": "Referring expression segmentation",
              "description": "The task aims at labelling the pixels of an image or video that represent an object instance referred by a linguistic expression. In particular, the referring expression (RE) must allow the identification of an indivisual object in a discourse or scene (the referent). REs unambiguosly identify the target instace. (Source: paperswithcode.com)"
            },
            {
              "name": "Unseen object instance segmentation",
              "description": "Instance segmentation is the task of detecting and delineating each distinct object of interest appearing in an image.\n\nImage Credit: [Deep Occlusion-Aware Instance Segmentation with Overlapping BiLayers](https://arxiv.org/abs/2103.12340) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Event-based vision"
        },
        {
          "name": "Document layout analysis",
          "description": "\"Document Layout Analysis is performed to determine physical structure of a document, that is, to determine document components. These document components can consist of single connected components-regions [...] of\npixels that are adjacent to form single regions [...] , or group\nof text lines. A text line is a group of characters, symbols,\nand words that are adjacent, \u201crelatively close\u201d to each other\nand through which a straight line can be drawn (usually with\nhorizontal or vertical orientation).\"  L. O'Gorman, \"The document spectrum for page layout analysis,\" in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 15, no. 11, pp. 1162-1173, Nov. 1993. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Ms-ssim",
              "description": "A MS-SSIM score helps to analyze how much a De-warping module has been able to de-warp a document image from its initial distorted view. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Pose tracking",
          "description": "**Pose Tracking** is the task of estimating multi-person human poses in videos and assigning unique instance IDs for each keypoint across frames. Accurate estimation of human keypoint-trajectories is useful for human action recognition, human interaction understanding, motion capture and animation. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "3D human pose tracking"
            }
          ]
        },
        {
          "name": "Safety perception recognition",
          "description": "City safety perception recognition (Source: paperswithcode.com)"
        },
        {
          "name": "Scene flow estimation",
          "description": "**Scene Flow Estimation** is the task of obtaining 3D structure and 3D motion of dynamic scenes, which is crucial to environment perception, e.g., in the context of autonomous navigation. (Source: paperswithcode.com)"
        },
        {
          "name": "Line segment detection"
        },
        {
          "name": "Metric learning",
          "description": "The goal of **Metric Learning** is to learn a representation function that maps objects into an embedded space. The distance in the embedded space should preserve the objects\u2019 similarity \u2014 similar objects get close and dissimilar objects get far away. Various loss functions have been developed for Metric Learning. For example, the contrastive loss guides the objects from the same class to be mapped to the same point and those from different classes to be mapped to different points whose distances are larger than a margin. Triplet loss is also popular, which requires the distance between the anchor sample and the positive sample to be smaller than the distance between the anchor sample and the negative sample. (Source: paperswithcode.com)"
        },
        {
          "name": "Surface normals estimation"
        },
        {
          "name": "Quantization",
          "description": "**Quantization** is a promising technique to reduce the computation cost of neural network training, which can replace high-cost floating-point numbers (e.g., float32) with low-cost fixed-point numbers (e.g., int8/int16). (Source: paperswithcode.com)"
        },
        {
          "name": "Dense pixel correspondence estimation"
        },
        {
          "name": "Image matting",
          "description": "**Image Matting** is the process of accurately estimating the foreground object in images and videos. It is a very important technique in image and video editing applications, particularly in film production for creating visual effects. In case of image segmentation, we segment the image into foreground and background by labeling the pixels. Image segmentation generates a binary image, in which a pixel either belongs to foreground or background. However, Image Matting is different from the image segmentation, wherein some pixels may belong to foreground as well as background, such pixels are called partial or mixed pixels. In order to fully separate the foreground from the background in an image, accurate estimation of the alpha values for partial or mixed pixels is necessary. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Semantic image matting"
            }
          ]
        },
        {
          "name": "Interactive segmentation"
        },
        {
          "name": "Image cropping",
          "description": "**Image Cropping** is a common photo manipulation process, which improves the overall composition by removing unwanted regions. Image Cropping is widely used in photographic, film processing, graphic design, and printing businesses. (Source: paperswithcode.com)"
        },
        {
          "name": "Sign language translation",
          "description": "Given a video containing sign language, the task is to predict the translation into (written) spoken language.\n\nImage credit: [How2Sign](https://how2sign.github.io/) (Source: paperswithcode.com)"
        },
        {
          "name": "Birds eye view object detection",
          "description": "KITTI birds eye view detection task (Source: paperswithcode.com)"
        },
        {
          "name": "Person retrieval"
        },
        {
          "name": "Spatial relation recognition"
        },
        {
          "name": "Object recognition",
          "description": "Object recognition is a computer vision technique for detecting + classifying objects in images or videos. Since this is a combined task of object detection plus image classification, the state-of-the-art tables are recorded for each component task [here](https://www.paperswithcode.com/task/object-detection) and  [here](https://www.paperswithcode.com/task/image-classification2). (Source: paperswithcode.com)",
          "children": [
            {
              "name": "3D object recognition",
              "description": "3D object recognition is the task of recognising objects from 3D data.\n\nNote that there are related tasks you can look at, such as [3D Object Detection](https://paperswithcode.com/task/3d-object-detection) which have more leaderboards. (Source: paperswithcode.com)"
            },
            {
              "name": "Depiction invariant object recognition",
              "description": "Depiction invariant object recognition is the task of recognising objects irrespective of how they are visually depicted (line drawing, realistic shaded drawing, photograph etc.). (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Vision-language navigation",
          "description": "Vision-language navigation (VLN) is the task of navigating an embodied agent to carry out natural language instructions inside real 3D environments. (Source: paperswithcode.com)"
        },
        {
          "name": "Human parsing",
          "description": "Human parsing is the task of segmenting a human image into different fine-grained semantic parts such as head, torso, arms and legs. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Multi-human parsing",
              "description": "Multi-human parsing is the task of parsing multiple humans in crowded scenes. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Multi-person pose estimation and tracking",
          "description": "Joint multi-person pose estimation and tracking following the PoseTrack benchmark. \nhttps://posetrack.net/ (Source: paperswithcode.com)"
        },
        {
          "name": "Novel view synthesis",
          "description": "Synthesize a target image with an arbitrary target camera pose from given source images and their camera poses. (Source: paperswithcode.com)"
        },
        {
          "name": "Sign language recognition",
          "description": "Given a signed video input the task is to predict the (sequence of) sign(s) that are performed. (Source: paperswithcode.com)"
        },
        {
          "name": "Compressive sensing",
          "description": "**Compressive Sensing** is a new signal processing framework for efficiently acquiring and reconstructing a signal that have a sparse representation in a fixed linear basis. (Source: paperswithcode.com)"
        },
        {
          "name": "Event data classification"
        },
        {
          "name": "Virtual try-on",
          "description": "Virtual try-on of clothing or other items such as glasses and makeup. Most recent techniques use Generative Adversarial Networks. (Source: paperswithcode.com)"
        },
        {
          "name": "Rotated mnist"
        },
        {
          "name": "Self-supervised learning",
          "description": "**Self-Supervised Learning** is proposed for utilizing unlabeled data with the success of supervised learning. Producing a dataset with good labels is expensive, while unlabeled data is being generated all the time. The motivation of Self-Supervised Learning is to make use of the large amount of unlabeled data. The main idea of Self-Supervised Learning is to generate the labels from unlabeled data, according to the structure or characteristics of the data itself, and then train on this unsupervised data in a supervised manner. Self-Supervised Learning is wildly used in representation learning to make a model learn the latent features of the data. This technique is often employed in computer vision, video processing and robot control. (Source: paperswithcode.com)"
        },
        {
          "name": "Object segmentation",
          "children": [
            {
              "name": "Camouflaged object segmentation",
              "description": "Camouflaged object segmentation (COS) or Camouflaged object detection (COD), which was originally promoted by [T.-N. Le et al.](https://www.sciencedirect.com/science/article/abs/pii/S1077314219300608) (2017), aims to identify objects that conceal their texture into the surrounding environment. The high intrinsic similarities between the target object and the background make COS/COD far more challenging than the traditional object segmentation task. Also, refer to the online benchmarks on [CAMO dataset](https://sites.google.com/view/ltnghia/research/camo), [COD dataset](http://dpfan.net/Camouflage/), and [online demo](http://mc.nankai.edu.cn/cod). (Source: paperswithcode.com)"
            },
            {
              "name": "Unsupervised object segmentation"
            }
          ]
        },
        {
          "name": "Traffic sign detection"
        },
        {
          "name": "Symmetry detection"
        },
        {
          "name": "Blink estimation"
        },
        {
          "name": "Historical color image dating"
        },
        {
          "name": "Scanpath prediction",
          "description": "Learning to Predict Sequences of Human Fixations. (Source: paperswithcode.com)"
        },
        {
          "name": "Action quality assessment",
          "description": "Assessing/analyzing/quantifying how well an action was performed. (Source: paperswithcode.com)"
        },
        {
          "name": "Action anticipation"
        },
        {
          "name": "Point cloud classification",
          "description": "Point Cloud Classification is a task involving the classification of unordered 3D point sets (point clouds). (Source: paperswithcode.com)",
          "children": [
            {
              "name": "3D point cloud classification",
              "description": "Image: [Qi et al](https://arxiv.org/pdf/1612.00593v2.pdf) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Pose retrieval",
          "description": "Retrieval of similar human poses from images or videos (Source: paperswithcode.com)"
        },
        {
          "name": "Text based person retrieval"
        },
        {
          "name": "Single-object discovery"
        },
        {
          "name": "Multi-object discovery"
        },
        {
          "name": "Traffic accident detection",
          "children": [
            {
              "name": "Accident anticipation"
            }
          ]
        },
        {
          "name": "Severity prediction",
          "children": [
            {
              "name": "Intubation support prediction",
              "description": "Prediction of need for Intubation support of Covid-19 patients. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Handwriting verification",
          "description": "The goal of handwriting verification is to find a measure of confidence whether the given handwritten samples are written by the same or different writer. (Source: paperswithcode.com)"
        },
        {
          "name": "Fashion understanding"
        },
        {
          "name": "Scene change detection",
          "description": "Scene change detection (SCD) refers to the task  of localizing changes and identifying change-categories given two scenes. A scene can be either an RGB (+D) image or a 3D reconstruction (point cloud).  If the scene is an image, SCD is a form of pixel-level prediction because each pixel in the image is classified according to a category. On the other hand, if the scene is point cloud, SCD is a form of point-level prediction because each point in the cloud is classified according to a category. \n\nSome example benchmarks for this task are VL-CMU-CD, PCD, and CD2014. Recently, more complicated benchmarks such as ChangeSim, HDMap, and Mallscape are released.\n\nModels are usually evaluated with the Mean Intersection-Over-Union (Mean IoU), Pixel Accuracy, or F1 metrics. (Source: paperswithcode.com)"
        },
        {
          "name": "Table recognition"
        },
        {
          "name": "Video instance segmentation",
          "description": "The goal of video instance segmentation is simultaneous detection, segmentation and tracking of instances in videos. In words, it is the first time that the image instance segmentation problem is extended to the video domain.\n\nTo facilitate research on this new task, a large-scale benchmark called YouTube-VIS, which consists of 2,883 high-resolution YouTube videos, a 40-category label set and 131k high-quality instance masks is built. (Source: paperswithcode.com)"
        },
        {
          "name": "Single-image portrait relighting"
        },
        {
          "name": "Lip reading",
          "description": "**Lip Reading** is a task to infer the speech content in a video by using only the visual information, especially the lip movements. It has many crucial applications in practice, such as assisting audio-based speech recognition, biometric authentication and aiding hearing-impaired people. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Lip password classification",
              "description": "A classification task that predicts whether the designated user is uttering the designated password. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Semi-supervised object detection"
        },
        {
          "name": "Partially view-aligned multi-view learning",
          "description": "In multi-view learning, Partially View-aligned Problem (PVP) refers to the case when only a portion of data is aligned, thus leading to data inconsistency. (Source: paperswithcode.com)"
        },
        {
          "name": "Action understanding"
        },
        {
          "name": "Skills assessment"
        },
        {
          "name": "Video segmentation",
          "children": [
            {
              "name": "Camera shot boundary detection",
              "description": "The objective of camera shot boundary detection is to find the transitions between the camera shots in a video and classify the type of camera transition. This task is introduced in SoccerNet-v2, where 3 types of transitions are considered (abrupt, logo, smooth). (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Meter reading"
        },
        {
          "name": "Blind face restoration",
          "description": "Blind face restoration aims at recovering high-quality faces from the low-quality counterparts suffering from unknown degradation, such as low-resolution, noise, blur, compression artifacts, etc. When applied to real-world scenarios, it becomes more challenging, due to more complicated degradation, diverse poses and expressions.\n\n\nDescription source: [Towards Real-World Blind Face Restoration with Generative Facial Prior](https://paperswithcode.com/paper/towards-real-world-blind-face-restoration)\n\nImage source: [Towards Real-World Blind Face Restoration with Generative Facial Prior](https://paperswithcode.com/paper/towards-real-world-blind-face-restoration) (Source: paperswithcode.com)"
        },
        {
          "name": "IFC entity classification"
        }
      ]
    },
    {
      "name": "Natural language processing",
      "children": [
        {
          "name": "Image tagging",
          "description": "Annotate an image of some sort, typically with terms from a controlled vocabulary.",
          "has_input": [
            "Image"
          ],
          "has_output": [
            "Text"
          ],
          "children": [
            {
              "name": "Image captioning",
              "description": "Image captioning is the automatic generation of natural language descriptions of the content of an input image.",
              "has_input": [
                "Image"
              ],
              "has_output": [
                "Text"
              ],
              "hasExactSynonym": "['Automatic image annotation']",
              "children": [
                {
                  "name": "Phrase grounding",
                  "description": "Assigning each entity mentioned in a given caption of an image to a corresponding location in the respective image.",
                  "has_input": [
                    "Image"
                  ]
                },
                {
                  "name": "Relational captioning"
                }
              ]
            },
            {
              "name": "Meme classification",
              "description": "Meme classification refers to the task of classifying internet memes.",
              "has_input": [
                "Image"
              ]
            }
          ]
        },
        {
          "name": "Optical character recognition",
          "children": [
            {
              "name": "Handwriting recognition",
              "children": [
                {
                  "name": "Handwritten digit recognition"
                }
              ]
            },
            {
              "name": "Active learning",
              "description": "**Active Learning** is a paradigm in supervised machine learning which uses fewer training examples to achieve better optimization by iteratively training a predictor, and using the predictor in each iteration to choose the training examples which will increase its chances of finding better configurations and at the same time improving the accuracy of the prediction model (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Active object detection",
                  "description": "Active Learning for Object Detection (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "Dialog process",
          "description": "Dialog process refers to tasks that have a dialog as input and / or output.",
          "children": [
            {
              "name": "Dialog generation",
              "description": "**Dialogue Generation** is a fundamental component for real-world virtual assistants such as Siri and Alexa. It is the text generation task that automatically generate a response given a post by the user. (Source: paperswithcode.com)"
            },
            {
              "name": "Dialog state tracking",
              "description": "Determining at each turn of a dialog the full representation of what the user wants at that point in the dialog, which contains a goal constraint, a set of requested slots, and the user's dialogue act (Source: nlpprogress.com)"
            },
            {
              "name": "Visual dialog",
              "description": "Visual Dialog requires an AI agent to hold a meaningful dialog with humans in natural, conversational language about visual content. Specifically, given an image, a dialog history, and a follow-up question about the image, the task is to answer the question. (Source: paperswithcode.com)"
            },
            {
              "name": "Task-oriented dialog systems",
              "description": "Achieving a pre-defined task through a dialog."
            },
            {
              "name": "Goal-oriented dialog",
              "description": "Achieving a pre-defined goal through a dialog."
            },
            {
              "name": "Dialog understanding",
              "has_input": [
                "Dialog"
              ],
              "children": [
                {
                  "name": "Spoken language understanding",
                  "children": [
                    {
                      "name": "Spoken language identification",
                      "description": "Identify the language being spoken from an audio input only. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Open-domain dialog"
                }
              ]
            },
            {
              "name": "Dialog act classification",
              "description": "Classifying an utterance with respect to the function it serves in a dialog, i.e. the act the speaker is performing (Source nlpprogress.com)."
            },
            {
              "name": "Dialog act classification"
            },
            {
              "name": "End-to-end dialogue modelling"
            },
            {
              "name": "Conversation disentanglement",
              "description": "Automatic disentanglement could be used to provide more interpretable results when searching over chat logs, and to help users understand what is happening when they join a channel. \n\nSource: [Kummerfeld et al.](https://arxiv.org/pdf/1810.11118v2.pdf) (Source: paperswithcode.com)"
            },
            {
              "name": "Interactive evaluation of dialog",
              "description": "Task that involve building/adapting conversation models to work effectively in an interactive setting. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Natural language generation",
          "description": "Generating natural language based on internal machine representations.",
          "has_output": [
            "Text"
          ],
          "children": [
            {
              "name": "Machine translation",
              "description": "Machine translation is the task of translating a sentence in a source language to a different target language (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Word alignment",
                  "description": "**Word Alignment** is the task of finding the correspondence between source and target words in a pair of sentences that are translations of each other. (Source: paperswithcode.com)"
                },
                {
                  "name": "Unsupervised machine translation",
                  "description": "Translate text or speech from one language to another without translation resources for training."
                },
                {
                  "name": "Multimodal machine translation",
                  "description": "Multimodal machine translation is the task of doing machine translation with multiple data sources - for example, translating \"a bird is flying over water\" + an image of a bird over water to German text. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Multimodal lexical translation",
                      "description": "Translate a given word in a source language to a word in the target language, given the source sentence and one or more images illustrating the word. (Source: paperswithcode.com)"
                    }
                  ]
                }
              ]
            },
            {
              "name": "Text generation",
              "description": "Text generation is the task of generating text with the goal of appearing indistinguishable to human-written text. (Source: paperswithcode.com)",
              "has_output": [
                "Text data"
              ],
              "children": [
                {
                  "name": "Dialog generation",
                  "description": "**Dialogue Generation** is a fundamental component for real-world virtual assistants such as Siri and Alexa. It is the text generation task that automatically generate a response given a post by the user. (Source: paperswithcode.com)"
                },
                {
                  "name": "Data-to-text generation",
                  "description": "Data-to-text generation is the task of generating text from a data source. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Visual storytelling",
                      "description": "Generating stories from a sequence of images.",
                      "has_input": [
                        "Image"
                      ],
                      "has_output": [
                        "Text"
                      ]
                    }
                  ]
                },
                {
                  "name": "Text style transfer",
                  "description": "Transfer Text from one Style to Another (Source: paperswithcode.com)"
                },
                {
                  "name": "Paraphrase generation",
                  "description": "Given an input text, such as a sentence, generate a paraphrase of that text, i.e. a different text with the same meaning."
                },
                {
                  "name": "Table-to-text generation",
                  "description": "**Table-to-Text Generation** is to generate a description from the structured table. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "KB-to-language generation",
                      "description": "Given information from a knowledge base, generate a description of this information in natural language."
                    }
                  ]
                },
                {
                  "name": "Paper generation",
                  "description": "Generating scientific paper texts, such as abstracts."
                },
                {
                  "name": "Concept-to-text generation",
                  "description": "Generating natural language text from a conceptualized representation, such as an ontology."
                },
                {
                  "name": "Multi-document summarization",
                  "description": "**Multi-Document Summarization** is a process of representing a set of documents with a short piece of text by capturing the relevant information and filtering out the redundant information. Two prominent approaches to Multi-Document Summarization are extractive and abstractive summarization. Extractive summarization systems aim to extract salient snippets, sentences or passages from documents, while abstractive summarization systems aim to concisely paraphrase the content of the documents. (Source: paperswithcode.com)"
                },
                {
                  "name": "Distractor generation",
                  "description": "Given a passage, a question, and an answer phrase, the goal of distractor generation (DG) is to generate context-related wrong options (i.e., distractor) for multiple-choice questions (MCQ). (Source: paperswithcode.com)"
                },
                {
                  "name": "Fact-based text editing",
                  "description": "Fact-based Text Editing aims to revise a given document to better describe the facts in a knowledge base (e.g., several triples). (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Chatbot",
              "description": "**Chatbot** or conversational AI is a language model designed and implemented to have conversations with humans. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Dialog generation",
                  "description": "**Dialogue Generation** is a fundamental component for real-world virtual assistants such as Siri and Alexa. It is the text generation task that automatically generate a response given a post by the user. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Task-oriented dialog systems",
              "description": "Achieving a pre-defined task through a dialog."
            },
            {
              "name": "Goal-oriented dialog",
              "description": "Achieving a pre-defined goal through a dialog."
            },
            {
              "name": "Text-to-speech synthesis",
              "description": "Converting written text in natural language to speech.",
              "has_input": [
                "Text"
              ],
              "has_output": [
                "Audio data"
              ],
              "children": [
                {
                  "name": "Prosody prediction",
                  "description": "Predicting prosodic prominence from text.",
                  "has_input": [
                    "Text"
                  ]
                }
              ]
            },
            {
              "name": "Question answering",
              "description": "Automatically answering questions posed in natural language (Source: Adapted from Wikipedia).",
              "children": [
                {
                  "name": "Open-domain question answering",
                  "description": "Finding the answer to a question from a large collection of documents (https://arxiv.org/abs/2006.05244)"
                },
                {
                  "name": "Answer selection",
                  "description": "**Answer Selection** is the task of identifying the correct answer to a question from a pool of candidate answers. This task can be formulated as a classification or a ranking problem. (Source: paperswithcode.com)"
                },
                {
                  "name": "Community question answering",
                  "description": "Building systems that automatically answers to questions posed in a natural language on a Q&A platform, such as Stack Overflow or Quora (Source: Adapted from Wikipedia and nlpprogress.com)."
                },
                {
                  "name": "Knowledge base question answering",
                  "description": "Given a natural language question, find a set of entities in a knowledge base that constitutes the answer (Source: https://www.aclweb.org/anthology/C18-1280.pdf)"
                },
                {
                  "name": "Mathematical question answering",
                  "description": "Building systems that automatically answer mathematical questions.",
                  "children": [
                    {
                      "name": "Math word problem solving"
                    }
                  ]
                },
                {
                  "name": "Question quality assessment",
                  "description": "Checking whether a question is of high quality according to certain pre-defined criteria."
                },
                {
                  "name": "Image question answering",
                  "description": "Image question answering aims to answer questions based on an image.\n\n(Source: Adapted from paperswithcode.com)"
                },
                {
                  "name": "Video question answering"
                },
                {
                  "name": "Memex question answering",
                  "description": "Given a collection of photos or videos from a user, to automatically answer questions that help users recover their memory about events captured in the collection (Source: Adapted from https://arxiv.org/abs/1708.01336)"
                },
                {
                  "name": "Abstractive question answering"
                },
                {
                  "name": "Extractive question answering"
                },
                {
                  "name": "Generative question answering"
                },
                {
                  "name": "Cross-lingual question answering"
                },
                {
                  "name": "Logical reasoning question answering",
                  "description": "Introduced by ReClor (ICLR 2020), logical reasoning is to evaluate the logical reasoning ability of models for question answering. (Source: paperswithcode.com)"
                },
                {
                  "name": "Multilingual machine comprehension in English Hindi",
                  "description": "Multilingual Machine Comprehension (MMC) is a Question-Answering (QA) sub-task that involves quoting the answer for a question from a given snippet, where the question and the snippet can be in different languages. Results on an extended version of the recently released XQuAD dataset, which we propose to use as the evaluation benchmark for future research. (Source: paperswithcode.com)"
                },
                {
                  "name": "Graph question answering"
                },
                {
                  "name": "Conversational search"
                }
              ]
            },
            {
              "name": "Language modelling",
              "description": "Given a sequence of words, predicting the next most probable word based on statistical and probabilistic modeling.",
              "has_input": [
                "Text"
              ],
              "has_output": [
                "Text"
              ],
              "children": [
                {
                  "name": "Sentence pair modeling",
                  "description": "Comparing two sentences and their relationship based on their internal representation.",
                  "has_input": [
                    "Sentence"
                  ],
                  "children": [
                    {
                      "name": "Semantic similarity estimation",
                      "description": "The main objective **Semantic Similarity** is to measure the distance between the semantic meanings of a pair of words, phrases, sentences, or documents. For example, the word \u201ccar\u201d is more similar to \u201cbus\u201d than it is to \u201ccat\u201d. The two main approaches to measuring Semantic Similarity are knowledge-based approaches and corpus-based, distributional methods. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Cross-document language modeling",
                  "description": "Involves pretraining language models to support multi-document NLP tasks.\n\nSource: [Cross-Document Language Modeling](https://arxiv.org/pdf/2101.00406v1.pdf)\n\nImage Credit: [Cross-Document Language Modeling](https://arxiv.org/pdf/2101.00406v1.pdf) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Image captioning",
              "description": "Image captioning is the automatic generation of natural language descriptions of the content of an input image.",
              "has_input": [
                "Image"
              ],
              "has_output": [
                "Text"
              ],
              "hasExactSynonym": "['Automatic image annotation']",
              "children": [
                {
                  "name": "Phrase grounding",
                  "description": "Assigning each entity mentioned in a given caption of an image to a corresponding location in the respective image.",
                  "has_input": [
                    "Image"
                  ]
                },
                {
                  "name": "Relational captioning"
                }
              ]
            },
            {
              "name": "Text simplification",
              "description": "**Text Simplification** is the task of reducing the complexity of the vocabulary and sentence structure of text while retaining its original meaning, with the goal of improving readability and understanding. Simplification has a variety of important societal applications, for example increasing accessibility for those with cognitive disabilities such as aphasia, dyslexia, and autism, or for non-native speakers and children with reading difficulties. (Source: paperswithcode.com)"
            },
            {
              "name": "Text summarization",
              "description": "Shortening a set of data computationally, to create a summary that represents the most important or relevant information within the original content (Source: Wikipedia).",
              "children": [
                {
                  "name": "Reasoning",
                  "children": [
                    {
                      "name": "Reasoning",
                      "children": [
                        {
                          "name": "Mathematical question answering",
                          "description": "Building systems that automatically answer mathematical questions.",
                          "children": [
                            {
                              "name": "Math word problem solving"
                            }
                          ]
                        },
                        {
                          "name": "Reasoning (defined by goal)",
                          "children": [
                            {
                              "name": "Decision making",
                              "description": "**Decision Making** is a complex task that involves analyzing data (of different level of abstraction) from disparate sources and with different levels of certainty, merging the information by weighing in on some data source more than other, and arriving at a conclusion by exploring all possible alternatives. (Source: paperswithcode.com)"
                            },
                            {
                              "name": "Causal identification"
                            }
                          ]
                        },
                        {
                          "name": "Reasoning (defined by data)",
                          "children": [
                            {
                              "name": "Visual reasoning",
                              "description": "Ability to understand  actions and reasoning  associated with any  visual images (Source: paperswithcode.com)",
                              "children": [
                                {
                                  "name": "Visual commonsense reasoning"
                                }
                              ]
                            },
                            {
                              "name": "Common sense reasoning",
                              "description": "Common sense reasoning tasks are intended to require the model to go beyond pattern \nrecognition. Instead, the model should use \"common sense\" or world knowledge\nto make inferences. (Source: paperswithcode.com)"
                            },
                            {
                              "name": "Decision making under uncertainty"
                            },
                            {
                              "name": "Math word problem solving"
                            }
                          ]
                        },
                        {
                          "name": "Natural language visual grounding"
                        },
                        {
                          "name": "Program synthesis",
                          "has_output": [
                            "Computer code"
                          ],
                          "children": [
                            {
                              "name": "Type prediction"
                            },
                            {
                              "name": "Value prediction"
                            },
                            {
                              "name": "Program repair",
                              "children": [
                                {
                                  "name": "Fault localization"
                                },
                                {
                                  "name": "Variable misuse"
                                },
                                {
                                  "name": "Function-docstring mismatch"
                                },
                                {
                                  "name": "Wrong binary operator"
                                },
                                {
                                  "name": "Swapped operands"
                                },
                                {
                                  "name": "Exception type"
                                }
                              ]
                            }
                          ]
                        },
                        {
                          "name": "Abstract argumentation",
                          "description": "Identifying argumentative statements from natural language dialogs."
                        },
                        {
                          "name": "Reasoning (defined by method)",
                          "children": [
                            {
                              "name": "Inductive reasoning",
                              "description": "The process of reasoning in which premises are viewed as supplying some evidence, but not full assurance, of the truth of the conclusion. It is also described as a method where one's experiences and observations, including what are learned from others, are synthesized to come up with a general truth. Many dictionaries define inductive reasoning as the derivation of general principles from specific observations (arguing from specific to general), although there are many inductive arguments that do not have that form.\n\nInductive reasoning is distinct from deductive reasoning. While, if the premises are correct, the conclusion of a deductive argument is certain, the truth of the conclusion of an inductive argument is probable, based upon the evidence given.\n(Source: Adapted from Wikipedia)"
                            },
                            {
                              "name": "Deductive reasoning",
                              "description": "The process of reasoning from one or more statements (premises) to reach a logical conclusion. \n(Source: Adapted from Wikipedia)"
                            },
                            {
                              "name": "Bayesian inference"
                            },
                            {
                              "name": "Analogical reasoning"
                            },
                            {
                              "name": "Abductive reasoning"
                            },
                            {
                              "name": "Complex reasoning",
                              "hasNarrowSynonym": "['Abductive reasoning']"
                            }
                          ]
                        },
                        {
                          "name": "Reasoning (defined by exactness)",
                          "children": [
                            {
                              "name": "Approximate reasoning"
                            },
                            {
                              "name": "Exact reasoning"
                            }
                          ]
                        },
                        {
                          "name": "Systematic generalization"
                        },
                        {
                          "name": "Commonsense reasoning for RL",
                          "description": "Commonsense reasoning for Reinforcement Learning agents (Source: paperswithcode.com)"
                        }
                      ]
                    }
                  ]
                },
                {
                  "name": "Sentence summarization",
                  "description": "Generating a summary of a given sentence.",
                  "children": [
                    {
                      "name": "Sentence compression",
                      "description": "Producing a shorter sentence by removing redundant information, preserving the grammatically and the important content of the original sentence. (Source: nlpprogress.com)",
                      "children": [
                        {
                          "name": "Unsupervised sentence compression",
                          "description": "Producing a shorter sentence by removing redundant information, preserving the grammatically and the important content of the original sentence without supervision. (Source: nlpprogress.com)"
                        }
                      ]
                    },
                    {
                      "name": "Abstractive sentence summarization",
                      "description": "Generating a summary of a given sentence by condensing and paraphrasing the sentence."
                    },
                    {
                      "name": "Unsupervised sentence summarization",
                      "description": "Generating a summary of a given sentence without supervision."
                    }
                  ]
                },
                {
                  "name": "Cross-language text summarization",
                  "description": "Using data and models available for one language for which ample such resources are available (e.g., English) to solve summarization tasks in another, commonly more low-resource, language."
                },
                {
                  "name": "Abstractive text summarization",
                  "description": "**Abstractive Text Summarization** is the task of generating a short and concise summary that captures the salient ideas of the source text. The generated summaries potentially contain new phrases and sentences that may not appear in the source text. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Timeline summarization",
                      "description": "Identifying key dates of major events and providing short descriptions of what happened on these dates. (Source: https://www.aclweb.org/anthology/D19-5403/)"
                    },
                    {
                      "name": "Reader-aware summarization",
                      "description": "Using reader comments to improve summarization performance."
                    },
                    {
                      "name": "Multimodal abstractive text summarization",
                      "description": "Abstractive text summarization by utilizing information from multiple modalities. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Extractive document summarization",
                  "description": "Given a document, selecting a subset of the words or sentences which best represents a summary of the document.",
                  "children": [
                    {
                      "name": "Reader-aware summarization",
                      "description": "Using reader comments to improve summarization performance."
                    }
                  ]
                },
                {
                  "name": "Extractive text summarization",
                  "description": "Given a document, selecting a subset of the words or sentences which best represents a summary of the document. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Reader-aware summarization",
                      "description": "Using reader comments to improve summarization performance."
                    }
                  ]
                },
                {
                  "name": "Document summarization",
                  "description": "Automatic **Document Summarization** is the task of rewriting a document into its shorter form while still retaining its important content. The most popular two paradigms are extractive approaches and abstractive approaches. Extractive approaches generate summaries by extracting parts of the original document (usually sentences), while abstractive methods may generate new words or phrases which are not in the original document. (Source: paperswithcode.com)"
                },
                {
                  "name": "Multi-document summarization",
                  "description": "**Multi-Document Summarization** is a process of representing a set of documents with a short piece of text by capturing the relevant information and filtering out the redundant information. Two prominent approaches to Multi-Document Summarization are extractive and abstractive summarization. Extractive summarization systems aim to extract salient snippets, sentences or passages from documents, while abstractive summarization systems aim to concisely paraphrase the content of the documents. (Source: paperswithcode.com)"
                },
                {
                  "name": "Query-based extractive summarization",
                  "description": "Extracting summarized information that answers a given query based on a reference text."
                },
                {
                  "name": "Meeting summarization",
                  "description": "Generating a summary from meeting transcriptions."
                },
                {
                  "name": "Scientific document summarization"
                }
              ]
            },
            {
              "name": "Question generation",
              "description": "Generating a valid and fluent question given a text passage and the target answer. (Source: https://arxiv.org/abs/1910.03401)"
            },
            {
              "name": "Graph-to-sequence",
              "description": "Mapping an input graph to a sequence of vectors.",
              "children": [
                {
                  "name": "KG-to-text generation",
                  "description": "Knowledge-graph-to-text (KG-to-text) generation aims to generate high-quality texts which are consistent with input graphs.\n\nDescription from: [JointGT: Graph-Text Joint Representation Learning for Text Generation from Knowledge Graphs](https://arxiv.org/pdf/2106.10502v1.pdf) (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Unsupervised KG-to-text",
                      "description": "Generate natural text from a knowledge graph without supervision from parallel graph-text pairs. (Source: paperswithcode.com)"
                    }
                  ]
                }
              ]
            },
            {
              "name": "Dialogue rewriting"
            },
            {
              "name": "Speech-to-text translation",
              "description": "Translate audio signals of speech in one language into text in a foreign language, either in an end-to-end or cascade manner. (Source: paperswithcode.com)"
            },
            {
              "name": "Multimodal text prediction"
            }
          ]
        },
        {
          "name": "Natural language understanding",
          "description": "Natural language understanding refers to tasks that focus on automated reasoning and inference.",
          "has_input": [
            "Text"
          ],
          "children": [
            {
              "name": "Dialog understanding",
              "has_input": [
                "Dialog"
              ],
              "children": [
                {
                  "name": "Spoken language understanding",
                  "children": [
                    {
                      "name": "Spoken language identification",
                      "description": "Identify the language being spoken from an audio input only. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Open-domain dialog"
                }
              ]
            },
            {
              "name": "Inference and reasoning",
              "children": [
                {
                  "name": "Reading comprehension",
                  "description": "Given a text and a question, finding the text span that contains the answer to the question.",
                  "has_input": [
                    "Question"
                  ],
                  "has_output": [
                    "Text"
                  ],
                  "children": [
                    {
                      "name": "Machine reading comprehension",
                      "description": "**Machine Reading Comprehension** is one of the key problems in Natural Language Understanding, where the task is to read and comprehend a given text passage, and then answer questions based on it. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Relational reasoning",
                  "description": "Identifying the relationship among different entities."
                },
                {
                  "name": "Natural language inference",
                  "description": "Given a premise, determining whether a hypothesis is true (entailment), false (contradiction), or undetermined (neutral) (Source: nlpprogress.com)",
                  "has_input": [
                    "Text"
                  ],
                  "children": [
                    {
                      "name": "Truth inference",
                      "children": [
                        {
                          "name": "Crowdsourced text aggregation",
                          "description": "One of the most important parts of processing responses from crowd workers is **aggregation**: given several conflicting opinions, a method should extract the truth. This problem is also known as *truth-inference* in crowdsourcing. Text aggregation problem is dedicated to extracting the correct information from crowd workers' responses for a crowdsourcing task where the output is a *text*: audio transcription, translation, character recognition, etc. (Source: paperswithcode.com)"
                        }
                      ]
                    },
                    {
                      "name": "Cross-lingual natural language inference",
                      "description": "Using data and models available for one language for which ample such resources are available (e.g., English) to solve a natural language inference task in another, commonly more low-resource, language.",
                      "has_input": [
                        "Text"
                      ]
                    }
                  ]
                },
                {
                  "name": "Table-based fact verification",
                  "description": "Verifying facts given semi-structured data.",
                  "has_input": [
                    "Table"
                  ]
                },
                {
                  "name": "Abstract argumentation",
                  "description": "Identifying argumentative statements from natural language dialogs."
                },
                {
                  "name": "Commonsense reasoning for RL",
                  "description": "Commonsense reasoning for Reinforcement Learning agents (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Discourse parsing"
            }
          ]
        },
        {
          "name": "Taxonomy learning",
          "description": "Taxonomy learning is the task of hierarchically classifying concepts in an automatic manner from text corpora. The process of building taxonomies is usually divided into two main steps: (1) extracting hypernyms for concepts, which may constitute a field of research in itself (see Hypernym Discovery below) and (2) refining the structure into a taxonomy.\n\nDescription from [NLP Progress](http://nlpprogress.com/english/taxonomy_learning.html) (Source: paperswithcode.com)",
          "has_input": [
            "Text"
          ],
          "has_output": [
            "Taxonomy"
          ],
          "children": [
            {
              "name": "Hypernym discovery",
              "description": "Finding hypernyms for a given word or phrase."
            }
          ]
        },
        {
          "name": "Text-to-image generation",
          "has_input": [
            "Text"
          ],
          "has_output": [
            "Image"
          ],
          "children": [
            {
              "name": "Zero-shot text-to-image generation"
            }
          ]
        },
        {
          "name": "Natural language analysis",
          "description": "Natural language analysis refers to the tasks of determining the lexical, morphological, syntactical, semantical properties of words, sentences, texts or copora and to the retrieval or extraction of information from these sources.",
          "has_input": [
            "Text"
          ],
          "children": [
            {
              "name": "Syntactic analysis",
              "description": "Syntactic analysis is the process of analyzing a string of symbols, either in natural language, computer languages or data structures, conforming to the rules of a formal grammar.",
              "hasExactSynonym": "['Parsing', 'Syntax analysis']",
              "children": [
                {
                  "name": "Part-of-speech tagging",
                  "description": "Marking up a word in a text (corpus) as corresponding to a particular part of speech based on both its definition and its context (Source: Wikipedia).",
                  "hasExactSynonym": "['Grammatical tagging', 'POS tagging']"
                },
                {
                  "name": "Grammar checking",
                  "description": "Verify text for grammatical correctness.",
                  "children": [
                    {
                      "name": "Grammatical error correction",
                      "description": "Correct grammatical errors in text.",
                      "children": [
                        {
                          "name": "Grammatical error detection"
                        }
                      ]
                    },
                    {
                      "name": "Linguistic acceptability assessment",
                      "description": "Assess whether a sentence is grammatical or ungrammatical."
                    },
                    {
                      "name": "Chinese spell checking",
                      "description": "Chinese Spell Checking (CSC) aims to detect and correct erroneous characters for user-generated text in Chinese language. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Dependency parsing",
                  "description": "Dependency parsing is the task of extracting a dependency parse of a sentence that represents its grammatical\nstructure and defines the relationships between \"head\" words and words, which modify those heads.\n\nExample:\n\n```\n     root\n      |\n      | +-------dobj---------+\n      | |                    |\nnsubj | |   +------det-----+ | +-----nmod------+\n+--+  | |   |              | | |               |\n|  |  | |   |      +-nmod-+| | |      +-case-+ |\n+  |  + |   +      +      || + |      +      | |\nI  prefer  the  morning   flight  through  Denver\n```\n\nRelations among the words are illustrated above the sentence with directed, labeled\narcs from heads to dependents (+ indicates the dependent). (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Dependency grammar induction",
                      "description": "Also known as \"unsupervised dependency parsing\" (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Cross-lingual zero-shot dependency parsing",
                      "description": "Cross-lingual zero-shot parsing is the task of inferring the dependency parse of sentences from one language without any labeled training trees for that language.\n\nDescription from [NLP Progress](http://nlpprogress.com/english/dependency_parsing.html) (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Unsupervised dependency parsing",
                      "description": "Unsupervised dependency parsing is the task of inferring the dependency parse of sentences without any labeled training data.\n\nDescription from [NLP Progress](http://nlpprogress.com/english/dependency_parsing.html) (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Chunking",
                  "description": "Chunking, also known as shallow parsing, identifies continuous spans of tokens that form syntactic units such as noun phrases or verb phrases.\n\nExample:\n\n| Vinken | , | 61 | years | old |\n| --- | ---| --- | --- | --- |\n| B-NLP| I-NP | I-NP | I-NP | I-NP | (Source: paperswithcode.com)"
                },
                {
                  "name": "Constituency parsing",
                  "description": "Extract a constituency-based parse tree from a sentence that represents its syntactic structure according to a phrase structure grammar. (Source: nlpprogress.com)",
                  "children": [
                    {
                      "name": "Constituency grammar induction",
                      "description": "Inducing a constituency-based phrase structure grammar."
                    }
                  ]
                },
                {
                  "name": "Anaphora resolution",
                  "description": "Resolving what expression a pronoun or a noun phrase refers to.",
                  "children": [
                    {
                      "name": "Abstract anaphora resolution",
                      "description": "Abstract Anaphora Resolution aims to resolve nominal expressions (e.g., this result, those two actions) and pronominal expressions (e.g. this, that, it) that refer to abstract-object-antecedents such as facts, events, plans, actions, or situations. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Query wellformedness",
                  "description": "Assessing whether a query is grammatically correct, contains no spelling mistakes, and asks an explicit question."
                },
                {
                  "name": "Tokenization",
                  "description": "Splitting a string into parts, i.e., tokens."
                },
                {
                  "name": "Combinatory categorical grammar (CCG) supertagging",
                  "description": "Combinatory Categorical Grammar (CCG; [Steedman, 2000](http://www.citeulike.org/group/14833/article/8971002)) is a\nhighly lexicalized formalism. The standard parsing model of [Clark and Curran (2007)](https://www.mitpressjournals.org/doi/abs/10.1162/coli.2007.33.4.493)\nuses over 400 lexical categories (or _supertags_), compared to about 50 part-of-speech tags for typical parsers.\n\nExample:\n\n| Vinken | , | 61 | years | old |\n| --- | ---| --- | --- | --- |\n| N| , | N/N | N | (S[adj]\\ NP)\\ NP | (Source: paperswithcode.com)"
                },
                {
                  "name": "Arabic text diacritization",
                  "description": "Addition of diacritics for undiacritized arabic texts for words disambiguation. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Semantic analysis",
              "description": "Relating syntactic structures, from the levels of phrases, clauses, sentences and paragraphs to the level of the writing as a whole, to their language-independent meanings (Source: Wikipedia)",
              "children": [
                {
                  "name": "Dialog state tracking",
                  "description": "Determining at each turn of a dialog the full representation of what the user wants at that point in the dialog, which contains a goal constraint, a set of requested slots, and the user's dialogue act (Source: nlpprogress.com)"
                },
                {
                  "name": "Dialog act classification",
                  "description": "Classifying an utterance with respect to the function it serves in a dialog, i.e. the act the speaker is performing (Source nlpprogress.com)."
                },
                {
                  "name": "Hypernym discovery",
                  "description": "Finding hypernyms for a given word or phrase."
                },
                {
                  "name": "Data mining",
                  "description": "Data mining is a process of discovering patterns in large data sets involving methods at the intersection of machine learning, statistics, and database systems (Source: Wikipedia).",
                  "children": [
                    {
                      "name": "Argument mining",
                      "description": "**Argument Mining** is a field of corpus-based discourse analysis that involves the automatic identification of argumentative structures in text. (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Component classification",
                          "description": "Classification of argumentative components inside a document (Source: paperswithcode.com)"
                        }
                      ]
                    }
                  ]
                },
                {
                  "name": "Question quality assessment",
                  "description": "Checking whether a question is of high quality according to certain pre-defined criteria."
                },
                {
                  "name": "Word sense disambiguation",
                  "description": "Identifying which sense of a word is used in a sentence.",
                  "hasExactSynonym": "['WSD']",
                  "children": [
                    {
                      "name": "Word sense induction",
                      "description": "https://en.wikipedia.org/wiki/Word-sense_induction"
                    }
                  ]
                },
                {
                  "name": "Sentence pair modeling",
                  "description": "Comparing two sentences and their relationship based on their internal representation.",
                  "has_input": [
                    "Sentence"
                  ],
                  "children": [
                    {
                      "name": "Semantic similarity estimation",
                      "description": "The main objective **Semantic Similarity** is to measure the distance between the semantic meanings of a pair of words, phrases, sentences, or documents. For example, the word \u201ccar\u201d is more similar to \u201cbus\u201d than it is to \u201ccat\u201d. The two main approaches to measuring Semantic Similarity are knowledge-based approaches and corpus-based, distributional methods. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Semantic role labeling",
                  "description": "Assigning labels to words or phrases in a sentence that indicate their semantic role in the sentence, such as that of an agent, goal, or result (Source: Wikipedia).",
                  "children": [
                    {
                      "name": "Predicate detection",
                      "description": "Detecting predicates in sentences."
                    },
                    {
                      "name": "Semantic role labeling (predicted predicates)",
                      "description": "Detecting predicates in sentences."
                    }
                  ]
                },
                {
                  "name": "Semantic textual similarity",
                  "description": "Determining how similar two texts are in their meaning.",
                  "children": [
                    {
                      "name": "Paraphrase identification",
                      "description": "Paraphrase identificiation is the task of determining whether two text passages have the same meaning."
                    },
                    {
                      "name": "Question similarity",
                      "description": "Determining how similar two questions are in their meaning."
                    }
                  ]
                },
                {
                  "name": "Semantic parsing",
                  "description": "**Semantic Parsing** is the task of transducing natural language utterances into formal meaning representations. The target meaning representations can be defined according to a wide variety of formalisms. This include linguistically-motivated semantic representations that are designed to capture the meaning of any sentence such as \u03bb-calculus or the abstract meaning representations. Alternatively, for more task-driven approaches to Semantic Parsing, it is common for meaning representations to represent executable programs such as SQL queries, robotic commands, smart phone instructions, and even general-purpose programming languages like Python and Java. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Semantic dependency parsing",
                      "description": "Identify semantic relationships between words in a text using a graph representation."
                    },
                    {
                      "name": "AMR parsing",
                      "description": "Each AMR is a single rooted, directed graph. AMRs include PropBank semantic roles, within-sentence coreference, named entities and types, modality, negation, questions, quantities, and so on. [See](https://amr.isi.edu/index.html). (Source: paperswithcode.com)"
                    },
                    {
                      "name": "UCCA parsing"
                    },
                    {
                      "name": "DRS parsing",
                      "description": "Discourse Representation Structures (DRS) are formal meaning representations introduced by Discourse Representation Theory. DRS parsing is a complex task, comprising other NLP tasks, such as semantic role labeling, word sense disambiguation, co-reference resolution and named entity tagging. Also, DRSs show explicit scope for certain operators, which allows for a more principled and linguistically motivated treatment of negation, modals and quantification, as has been advocated in formal semantics. Moreover, DRSs can be translated to formal logic, which allows for automatic forms of inference by third parties.\n\nDescription from [NLP Progress](http://nlpprogress.com/english/semantic_parsing.html) (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Unsupervised semantic parsing"
                    }
                  ]
                },
                {
                  "name": "Dialog act classification"
                },
                {
                  "name": "Entity alignment",
                  "description": "**Entity Alignment** is the task of finding entities in two knowledge bases that refer to the same real-world object. It plays a vital role in automatically integrating multiple knowledge bases. (Source: paperswithcode.com)"
                },
                {
                  "name": "Entity linking",
                  "description": "Assigning a unique identity to entities (such as famous individuals, locations, or companies) mentioned in text (Source: Wikipedia)."
                },
                {
                  "name": "Text clustering",
                  "description": "Grouping a set of texts in such a way that objects in the same group (called a cluster) are more similar (in some sense) to each other than to those in other groups (clusters). (Source: Adapted from Wikipedia)",
                  "children": [
                    {
                      "name": "Short text clustering"
                    }
                  ]
                },
                {
                  "name": "Text classification",
                  "description": "Assign a text to one ore more classes or categories.",
                  "children": [
                    {
                      "name": "Multi-label text classification",
                      "description": "According to Wikipedia \"In machine learning, multi-label classification and the strongly related problem of multi-output classification are variants of the classification problem where multiple labels may be assigned to each instance. Multi-label classification is a generalization of multiclass classification, which is the single-label problem of categorizing instances into precisely one of more than two classes; in the multi-label problem there is no constraint on how many of the classes the instance can be assigned to.\" (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Multi-label classification of biomedical texts",
                          "description": "Assigning the appropriate labels to a biomedical text based on a set of pre-defined labels.",
                          "has_input": [
                            "Text"
                          ]
                        }
                      ]
                    },
                    {
                      "name": "Document classification",
                      "description": "**Document Classification** is a procedure of assigning one or more labels to a document from a predetermined set of labels. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Sentence classification"
                    },
                    {
                      "name": "Emotion classification",
                      "description": "Given text, classify it as 'neutral or no emotion' or as one, or more, of several given emotions that best represent the mental state of the writer. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Citation intent classification",
                      "description": "Identifying the reason why an author cited another author."
                    },
                    {
                      "name": "Topic modeling",
                      "description": "A topic model is a type of statistical model for discovering the abstract \"topics\" that occur in a collection of documents. Topic modeling is a frequently used text-mining tool for the discovery of hidden semantic structures in a text body. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Semi-supervised text classification"
                    },
                    {
                      "name": "Coherence evaluation",
                      "description": "Evaluating the overall coherence of text as measured by its readability and flow through ideas. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Document text classification",
                      "children": [
                        {
                          "name": "Learning with noisy labels",
                          "description": "Learning with noisy labels (Source: paperswithcode.com)"
                        }
                      ]
                    },
                    {
                      "name": "Clinical assertion status detection",
                      "description": "Classifying the assertions made on given medical concepts as being present, absent, or possible in the patient, conditionally present in the patient under certain circumstances, hypothetically present in the patient at some future point, and mentioned in the patient report but associated with someoneelse. (e.g. clinical finding pertains to the patient by assigning a label such as present (\u201dpatient is diabetic\u201d), absent (\u201dpatient denies nausea\u201d), conditional (\u201ddyspnea while climbing stairs\u201d), or associated with someone else (\u201dfamily history of depression\u201d))\n\n( [Source](https://arxiv.org/pdf/2012.04005v1.pdf) ) (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Domain labelling"
                    },
                    {
                      "name": "Weakly supervised classification"
                    },
                    {
                      "name": "Poem meters classification"
                    }
                  ]
                },
                {
                  "name": "Cross-lingual document classification",
                  "description": "Cross-lingual document classification refers to the task of using data and models available for one language for which ample such resources are available (e.g., English) to solve classification tasks in another, commonly low-resource, language."
                },
                {
                  "name": "Cross-lingual bitext mining",
                  "description": "Cross-lingual bitext mining is the task of mining sentence pairs that are translations of each other from large text corpora."
                },
                {
                  "name": "Entity typing",
                  "description": "**Entity Typing** is an important task in text analysis. Assigning types (e.g., person, location, organization) to mentions of entities in documents enables effective structured analysis of unstructured text corpora. The extracted type information can be used in a wide range of ways (e.g., serving as primitives for information extraction and knowledge base (KB) completion, and assisting question answering). Traditional Entity Typing systems focus on a small set of coarse types (typically fewer than 10). Recent studies work on a much larger set of fine-grained types which form a tree-structured hierarchy (e.g., actor as a subtype of artist, and artist is a subtype of person). (Source: paperswithcode.com)"
                },
                {
                  "name": "Entity disambiguation",
                  "description": "**Entity Disambiguation** is the task of linking mentions of ambiguous entities to their referent entities in a knowledge base such as Wikipedia. (Source: paperswithcode.com)"
                },
                {
                  "name": "Phrase tagging",
                  "description": "A fine-grained task that aims to find all occurrences of phrases in sentences. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Information extraction",
              "description": "Information extraction is the task of automatically extracting structured information from unstructured and / or semi-structured machine-readable documents and other electronically represented sources (Source: Wikipedia).",
              "children": [
                {
                  "name": "Relation extraction",
                  "description": "**Relation Extraction** is the task of predicting attributes and relations for entities in a sentence. For example, given a sentence \u201cBarack Obama was born in Honolulu, Hawaii.\u201d, a relation classifier aims at predicting the relation of \u201cbornInCity\u201d. Relation Extraction is the key component for building relation knowledge graphs, and it is of crucial significance to natural language processing applications such as structured search, sentiment analysis, question answering, and summarization. (Source: paperswithcode.com)",
                  "hasExactSynonym": "['Relation discovery', 'Relation inference', 'Relationship discovery', 'Relationship extraction', 'Relationship inference']",
                  "children": [
                    {
                      "name": "Relation classification",
                      "description": "**Relation Classification** is the task of identifying the semantic relation holding between two nominal entities in text. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Relationship extraction using distant supervision",
                      "description": "Relation extraction using distant supervision refers to detecting and classifying semantic relationships from text by using an already existing knowledge base that contains examples of the types of relations to be extracted."
                    },
                    {
                      "name": "Joint entity and relation extraction",
                      "description": "Joint entity and relation extraction refers to the combined task of identifying entities and extracting relations from a given text."
                    },
                    {
                      "name": "Biomedical relation extraction",
                      "description": "Biomedical relation extraction is the task of detecting and classifying semantic relationships from biomedical text.",
                      "has_input": [
                        "Text"
                      ]
                    },
                    {
                      "name": "Dialog relation extraction"
                    }
                  ]
                },
                {
                  "name": "Open information extraction",
                  "description": "In natural language processing, open information extraction is the task of generating a structured, machine-readable representation of the information in text, usually in the form of triples or n-ary propositions (Source: Wikipedia).",
                  "children": [
                    {
                      "name": "Event extraction"
                    }
                  ]
                },
                {
                  "name": "Entity extraction",
                  "description": "Entity extraction is the task of finding and classifying mentions of entities based on pre-defined categories (e.g., names, places) in a given text.",
                  "children": [
                    {
                      "name": "Joint entity and relation extraction",
                      "description": "Joint entity and relation extraction refers to the combined task of identifying entities and extracting relations from a given text."
                    }
                  ]
                },
                {
                  "name": "Temporal information extraction",
                  "description": "Temporal information extraction is the task of identifying events and temporal expressions in a given text, extracting the temporal relations between those events and constructing a timeline from these relations (Source: Adapted from https://arxiv.org/abs/1808.09401)"
                },
                {
                  "name": "Low resource named entity recognition",
                  "description": "Low resource named entity recognition is the task of using data and models available for one language for which ample such resources are available (e.g., English) to solve named entity recognition tasks in another, commonly more low-resource, language."
                },
                {
                  "name": "Language identification",
                  "description": "Language identification is the task of determining the language of a text.",
                  "children": [
                    {
                      "name": "Native language identification",
                      "description": "Native Language Identification (NLI) is the task of determining an author's native language (L1) based only on their writings in a second language (L2). (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Named entity recognition",
                  "description": "Named entity recognition (NER) is the task of tagging entities in text with their corresponding type.\nApproaches typically use BIO notation, which differentiates the beginning (B) and the inside (I) of entities.\nO is used for non-entity tokens.\n\nExample:\n\n| Mark | Watney | visited | Mars |\n| --- | ---| --- | --- |\n| B-PER | I-PER | O | B-LOC | (Source: paperswithcode.com)",
                  "hasExactSynonym": "['Entity chunking', 'Entity extraction', 'Entity identification']",
                  "children": [
                    {
                      "name": "Nested named entity recognition",
                      "description": "Nested named entity recognition is a subtask of information extraction that seeks to locate and classify nested named entities (i.e., hierarchically structured entities) mentioned in unstructured text (Source: Adapted from Wikipedia)."
                    },
                    {
                      "name": "Chinese named entity recognition",
                      "description": "Chinese named entity recognition is a subtask of information extraction that seeks to locate and classify named entities mentioned in unstructured text into pre-defined categories such as person names, organizations, locations, medical codes, time expressions, quantities, monetary values, percentages, etc. from Chinese text (Source: Adapted from Wikipedia)."
                    },
                    {
                      "name": "Medical named entity recognition"
                    },
                    {
                      "name": "Cross-domain named entity recognition"
                    },
                    {
                      "name": "Named entity recognition in Vietnamese"
                    },
                    {
                      "name": "Scientific concept extraction",
                      "description": "Identification of scientific concepts in research articles. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Few-shot NER"
                    }
                  ]
                },
                {
                  "name": "Nested mention recognition",
                  "description": "Nested mention recognition is the task of correctly modeling the nested structure of mentions."
                },
                {
                  "name": "Keyword extraction",
                  "description": "Keyword extraction is tasked with the automatic identification of terms that best describe the subject of a document (Source: Wikipedia)."
                },
                {
                  "name": "Scientific results extraction",
                  "description": "Scientific results extraction is the task of extracting relevant result information (e.g., in the case of Machine learning performance results: task, dataset, metric name, metric value) from the scientific literature."
                },
                {
                  "name": "Drug\u2013drug interaction extraction"
                },
                {
                  "name": "Clinical concept extraction",
                  "description": "Automatic extraction of clinical named entities such as clinical problems, treatments, tests and anatomical parts from clinical notes.\n\n( [Source](https://arxiv.org/pdf/2012.04005v1.pdf) ) (Source: paperswithcode.com)"
                },
                {
                  "name": "Timex normalization",
                  "description": "Temporal expression normalisation is the grounding of a lexicalisation of a time to a calendar date or other formal temporal representation.\n\nExample:\n\n10/18/2000 21:01:00.65\nDozens of Palestinians were wounded in scattered clashes in the West Bank and Gaza Strip, Wednesday, despite the Sharm el-Sheikh truce accord.\n\nChuck Rich reports on entertainment every Saturday\n\nDescription from [NLP Progress](http://nlpprogress.com/english/temporal_processing.html) (Source: paperswithcode.com)"
                },
                {
                  "name": "Keyphrase extraction",
                  "description": "A classic task to extract salient phrases that best summarize a document, which essentially has two stages: candidate generation and keyphrase ranking. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Pragmatics analysis",
              "description": "Pragmatics analysis is the task of analyzing the meaning of a text beyond its literal meaning considering its context.",
              "children": [
                {
                  "name": "Abuse detection",
                  "description": "Abuse detection is the task of identifying abusive behaviors, such as hate speech, offensive language, sexism and racism, in utterances from social media platforms (Source: https://arxiv.org/abs/1802.00385).",
                  "children": [
                    {
                      "name": "Hate speech detection",
                      "description": "**Hate Speech Detection** is the automated task of detecting if a piece of text contains hate speech. (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Hope speech detection",
                          "description": "Detecting speech associated with positive, uplifting,\npromise, potential, support, reassurance, suggestions, or inspiration. (Source: paperswithcode.com)",
                          "children": [
                            {
                              "name": "Hope speech detection for English",
                              "description": "Detecting Hope Speech in the English Language (Source: paperswithcode.com)"
                            },
                            {
                              "name": "Hope speech detection for Tamil",
                              "description": "Detecting Hope Speech in the Tamil language (Source: paperswithcode.com)"
                            },
                            {
                              "name": "Hope speech detection for Malayalam",
                              "description": "Detecting Hopespeech in the Malayalam language (Source: paperswithcode.com)"
                            }
                          ]
                        }
                      ]
                    }
                  ]
                },
                {
                  "name": "Negation detection",
                  "description": "Negation detection is the task of identifying negation cues in text.",
                  "children": [
                    {
                      "name": "Negation scope resolution"
                    }
                  ]
                },
                {
                  "name": "Stance detection",
                  "description": "Extracting a subject's reaction to a claim made by a primary actor (Source: nlpprogress.com)",
                  "children": [
                    {
                      "name": "Stance detection (US election 2020 - Biden)"
                    },
                    {
                      "name": "Stance detection (US election 2020 - Trump)"
                    }
                  ]
                },
                {
                  "name": "Emotion recognition",
                  "description": "**Emotion Recognition** is an important area of research to enable effective human-computer interaction. Human emotions can be detected using speech signal, facial expressions, body language, and electroencephalography (EEG). (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Multimodal emotion recognition"
                    },
                    {
                      "name": "Emotion recognition in conversation"
                    },
                    {
                      "name": "Speech emotion recognition"
                    },
                    {
                      "name": "Emotion recognition in context"
                    },
                    {
                      "name": "Emotion-cause pair extraction"
                    },
                    {
                      "name": "Emotion cause extraction"
                    },
                    {
                      "name": "Recognizing emotion cause in conversations",
                      "description": "Given an utterance U, labeled with emotion E, the task is to extract the causal spans S from the conversational history H (including utterance U) that sufficiently represent the causes of emotion E. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Causal emotion entailment",
                      "description": "The Causal Emotion Entailment is a simpler version of the span extraction task. In this task, given a\ntarget utterance (U) with emotion E, the goal is to predict which particular utterances in the conversation\nhistory H(U) are responsible for the\nemotion E in the target utterance. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Intent classification",
                  "description": "**Intent Classification** is the task of correctly labeling a natural language utterance from a predetermined set of intents (Source: paperswithcode.com)"
                },
                {
                  "name": "Sentiment analysis",
                  "description": "Sentiment analysis is the process of computationally identifying and categorizing opinions expressed in a piece of text, especially in order to determine whether the writer's attitude towards a particular topic, product, etc. is positive, negative, or neutral (Source: Oxford Languages)",
                  "children": [
                    {
                      "name": "Aspect-based sentiment analysis",
                      "description": "Aspect-based sentiment analysis is the task of identifying fine-grained opinion polarity towards a specific aspect associated with a given target. (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Aspect extraction",
                          "description": "Aspect extraction is the task of identifying and extracting terms relevant for opinion mining and sentiment analysis, for example terms for product attributes or features."
                        },
                        {
                          "name": "Extract aspect-polarity tuple"
                        },
                        {
                          "name": "Extract aspect",
                          "description": "Aspect extraction is the task of identifying and extracting terms relevant for opinion mining and sentiment analysis, for example terms for product attributes or features."
                        },
                        {
                          "name": "Aspect-oriented  opinion extraction",
                          "description": "Extracting the paired opinion terms for every given aspect term in a sentence. (Source: paperswithcode.com)"
                        }
                      ]
                    },
                    {
                      "name": "Multimodal sentiment analysis",
                      "description": "Multimodal sentiment analysis is a new dimension of the traditional text-based sentiment analysis, which goes beyond the analysis of texts, and includes other modalities such as audio and visual data (Source: Wikipedia)."
                    },
                    {
                      "name": "Twitter sentiment analysis",
                      "description": "Twitter sentiment analysis is the task of performing sentiment analysis on tweets from Twitter. (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Tweet-reply sentiment analysis",
                          "description": "To predict the predominant sentiment among (potential) first-order replies to a given tweet, in a Message-level Polarity Classification paradigm. (Source: paperswithcode.com)"
                        }
                      ]
                    },
                    {
                      "name": "Fine-grained opinion analysis",
                      "description": "Fine-Grained Opinion Analysis aims to: (i) detect opinion expressions that convey attitudes such as sentiments, agreements, beliefs, or intentions, (ii) measure their intensity, (iii) identify their holders i.e. entities that express an attitude, (iv) identify their targets i.e. entities or propositions at which the attitude is directed, and (v) classify their target-dependent attitude. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Aspect sentiment triplet extraction",
                      "description": "Aspect Sentiment Triplet Extraction (ASTE)\nis the task of extracting the triplets of target\nentities, their associated sentiment, and opinion spans explaining the reason for the sentiment. (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Aspect term extraction and sentiment classification",
                      "description": "Extracting the aspect terms as well as the corresponding sentiment polarities simultaneously. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Slot filling",
                  "description": "Slot filling is the task of extracting the values of certain types of attributes, i.e. slots, for a given entity from a large collection of source documents (Source: Adapted from https://www.aclweb.org/anthology/D17-1274/)",
                  "children": [
                    {
                      "name": "Extracting Covid-19 events from twitter"
                    }
                  ]
                },
                {
                  "name": "Intent detection",
                  "description": "**Intent Detection** is a vital component of any task-oriented conversational system. In order to understand the user\u2019s current goal, the system must leverage its intent detector to classify the user\u2019s utterance (provided in varied natural language) into one of several predefined classes, that is, intents.  However, the performance of intent detection has been hindered by the data scarcity issue, as it is non-trivial to collect sufficient examples for new intents. How to effectively identify user intents in few-shot learning has become popular. (Source: paperswithcode.com)"
                },
                {
                  "name": "Fake news detection",
                  "description": "Fake news detection is the task of detecting forms of news consisting of deliberate disinformation or hoaxes spread via traditional news media (print and broadcast) or online social media (Source: Adapted from Wikipedia)."
                },
                {
                  "name": "Sarcasm detection",
                  "description": "Sarcasm detection is the task of identifying cutting, often ironic remarks intended to express contempt or ridicule, in text (Source: Adapted from Wikipedia)"
                },
                {
                  "name": "Hate speech detection",
                  "description": "Hate speech detection is the task of detecting speech that expresses hate or encourages violence towards a person or group based on something such as race, religion, sex, or sexual orientation (Source: Adapted from Wikipedia).",
                  "children": [
                    {
                      "name": "Hate speech detection",
                      "description": "**Hate Speech Detection** is the automated task of detecting if a piece of text contains hate speech. (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Hope speech detection",
                          "description": "Detecting speech associated with positive, uplifting,\npromise, potential, support, reassurance, suggestions, or inspiration. (Source: paperswithcode.com)",
                          "children": [
                            {
                              "name": "Hope speech detection for English",
                              "description": "Detecting Hope Speech in the English Language (Source: paperswithcode.com)"
                            },
                            {
                              "name": "Hope speech detection for Tamil",
                              "description": "Detecting Hope Speech in the Tamil language (Source: paperswithcode.com)"
                            },
                            {
                              "name": "Hope speech detection for Malayalam",
                              "description": "Detecting Hopespeech in the Malayalam language (Source: paperswithcode.com)"
                            }
                          ]
                        }
                      ]
                    }
                  ]
                },
                {
                  "name": "Subjectivity analysis",
                  "description": "A related task to sentiment analysis is the subjectivity analysis with the goal of labeling an opinion as either subjective or objective. (Source: paperswithcode.com)"
                },
                {
                  "name": "Coreference resolution",
                  "description": "Coreference resolution is the task of clustering mentions in text that refer to the same underlying real world entities.\n\nExample:\n\n```\n               +-----------+\n               |           |\nI voted for Obama because he was most aligned with my values\", she said.\n |                                                 |            |\n +-------------------------------------------------+------------+\n```\n\n\"I\", \"my\", and \"she\" belong to the same cluster and \"Obama\" and \"he\" belong to the same cluster. (Source: paperswithcode.com)"
                },
                {
                  "name": "Event detection",
                  "description": "Detection of real-life events based on a collection of documents.",
                  "children": [
                    {
                      "name": "Twitter event detection",
                      "description": "Detection of real-life events, such as protests, based on tweets."
                    }
                  ]
                },
                {
                  "name": "Paraphrase identification",
                  "description": "Paraphrase identificiation is the task of determining whether two text passages have the same meaning."
                },
                {
                  "name": "Counter-speech detection",
                  "description": "Counter-speech detection is the task of detecting counter-speech, i.e., a crowd-sourced response that argues, disagrees, or presents an opposing view to extremism or hateful content on social media platforms (Source: Adapted from: https://icsr.info/wp-content/uploads/2018/03/ICSR-Report-Challenging-Hate-Counter-speech-Practices-in-Europe.pdf)"
                },
                {
                  "name": "Language acquisition",
                  "description": "Language acquisition refers to tasks related to the learning of a second language."
                },
                {
                  "name": "Humor detection",
                  "description": "Humor detection is the task of identifying comical or amusing elements."
                },
                {
                  "name": "Speculation detection",
                  "description": "Identifying information in text that is speculative as opposed to factual information.",
                  "children": [
                    {
                      "name": "Speculation scope resolution",
                      "description": "Identifiy the scope of a speculation cue that indicates uncertainty in a given text."
                    }
                  ]
                },
                {
                  "name": "Bias detection",
                  "description": "Bias detection is the task of detecting and measuring racism, sexism and otherwise discriminatory behavior in a model (Source: https://stereoset.mit.edu/)"
                },
                {
                  "name": "Fact verification",
                  "description": "Fact verification, also called \"fact checking\", is a process of verifying facts in natural text against a database of facts. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Temporal processing",
              "children": [
                {
                  "name": "Temporal information extraction",
                  "description": "Temporal information extraction is the task of identifying events and temporal expressions in a given text, extracting the temporal relations between those events and constructing a timeline from these relations (Source: Adapted from https://arxiv.org/abs/1808.09401)"
                },
                {
                  "name": "Document dating",
                  "description": "Document Dating is the problem of automatically predicting the date of a document based on its content. Date of a document, also referred to as the Document Creation Time (DCT), is at the core of many important tasks, such as, information retrieval, temporal reasoning, text summarization, event detection, and analysis of historical text, among others.\n\nFor example, in the following document, the correct creation year is 1999. This can be inferred by the presence of terms 1995 and Four years after.\n\nSwiss adopted that form of taxation in 1995. The concession was approved by the govt last September. Four years after, the IOC\u2026.\n\nDescription from [NLP Progress](http://nlpprogress.com/english/temporal_processing.html) (Source: paperswithcode.com)"
                },
                {
                  "name": "Timex normalization",
                  "description": "Temporal expression normalisation is the grounding of a lexicalisation of a time to a calendar date or other formal temporal representation.\n\nExample:\n\n10/18/2000 21:01:00.65\nDozens of Palestinians were wounded in scattered clashes in the West Bank and Gaza Strip, Wednesday, despite the Sharm el-Sheikh truce accord.\n\nChuck Rich reports on entertainment every Saturday\n\nDescription from [NLP Progress](http://nlpprogress.com/english/temporal_processing.html) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Morphological analysis",
              "description": "**Morphological Analysis** is a central task in language processing that can take a word as input and detect the various morphological entities in the word and provide a morphological representation of it. (Source: paperswithcode.com)",
              "hasExactSynonym": "['Morphological parsing']",
              "children": [
                {
                  "name": "Lexical normalization",
                  "description": "Lexical normalization is the task of transforming non-canonical words into canonical ones."
                },
                {
                  "name": "Thai word segmentation",
                  "description": "Thai word segmentation (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Information retrieval",
              "description": "Information retrieval is the activity of obtaining information system resources that are relevant to an information need from a collection of those resources (Source: Wikipedia).",
              "children": [
                {
                  "name": "Ad-hoc information retrieval",
                  "description": "Ad-hoc information retrieval refers to the task of returning information resources related to a user query formulated in natural language.",
                  "children": [
                    {
                      "name": "Document ranking",
                      "description": "Sort documents according to some criterion so that the \"best\" results appear early in the result list displayed to the user (Source: Wikipedia)."
                    }
                  ]
                },
                {
                  "name": "Passage re-ranking",
                  "description": "Passage re-ranking is the task of scoring and re-ranking a collection of retrieved documents based on an input query."
                },
                {
                  "name": "Conversational response selection",
                  "description": "Conversational response selection refers to the task of identifying the most relevant response to a given input sentence from a collection of sentences."
                },
                {
                  "name": "Plagiarism detection"
                },
                {
                  "name": "Scientific results extraction",
                  "description": "Scientific results extraction is the task of extracting relevant result information (e.g., in the case of Machine learning performance results: task, dataset, metric name, metric value) from the scientific literature."
                },
                {
                  "name": "Table search"
                },
                {
                  "name": "Semantic retrieval"
                },
                {
                  "name": "Phrase ranking",
                  "description": "This task aims to evaluate the \u201cglobal\u201d rank list of phrases that a method finds from the input corpus. (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "Code generation",
          "description": "**Code Generation** is an important field to predict explicit code or program structure from multimodal data sources such as incomplete code, programs in another programming language, natural language descriptions or execution examples. Code Generation tools can assist the development of automatic programming tools to improve programming productivity. (Source: paperswithcode.com)",
          "has_output": [
            "Computer code"
          ],
          "children": [
            {
              "name": "Code documentation generation",
              "description": "Code Documentation Generation is a supervised task where a code function is the input to the model, and the model generates the documentation for this function.\n\nDescription from: [CodeTrans: Towards Cracking the Language of Silicone's Code Through Self-Supervised Deep Learning and High Performance Computing](https://arxiv.org/pdf/2104.02443.pdf) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Sentence embedding",
          "has_input": [
            "Text"
          ],
          "children": [
            {
              "name": "Sentence compression",
              "description": "Producing a shorter sentence by removing redundant information, preserving the grammatically and the important content of the original sentence. (Source: nlpprogress.com)",
              "children": [
                {
                  "name": "Unsupervised sentence compression",
                  "description": "Producing a shorter sentence by removing redundant information, preserving the grammatically and the important content of the original sentence without supervision. (Source: nlpprogress.com)"
                }
              ]
            },
            {
              "name": "Sentence embeddings for biomedical texts"
            }
          ]
        },
        {
          "name": "Biomedical natural language processing",
          "description": "Biomedical natural language processing is the task of processing text related to the biomedical domain (e.g., scientific literature, electronic health records).",
          "children": [
            {
              "name": "Biomedical relation extraction",
              "description": "Biomedical relation extraction is the task of detecting and classifying semantic relationships from biomedical text.",
              "has_input": [
                "Text"
              ]
            },
            {
              "name": "Multi-label classification of biomedical texts",
              "description": "Assigning the appropriate labels to a biomedical text based on a set of pre-defined labels.",
              "has_input": [
                "Text"
              ]
            },
            {
              "name": "Clinical assertion status detection",
              "description": "Classifying the assertions made on given medical concepts as being present, absent, or possible in the patient, conditionally present in the patient under certain circumstances, hypothetically present in the patient at some future point, and mentioned in the patient report but associated with someoneelse. (e.g. clinical finding pertains to the patient by assigning a label such as present (\u201dpatient is diabetic\u201d), absent (\u201dpatient denies nausea\u201d), conditional (\u201ddyspnea while climbing stairs\u201d), or associated with someone else (\u201dfamily history of depression\u201d))\n\n( [Source](https://arxiv.org/pdf/2012.04005v1.pdf) ) (Source: paperswithcode.com)"
            },
            {
              "name": "Clinical concept extraction",
              "description": "Automatic extraction of clinical named entities such as clinical problems, treatments, tests and anatomical parts from clinical notes.\n\n( [Source](https://arxiv.org/pdf/2012.04005v1.pdf) ) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Chinese language processing",
          "description": "Chinese language processing is the task of applying natural language processing to the Chinese language.",
          "children": [
            {
              "name": "Chinese word segmentation",
              "description": "Chinese word segmentation is the task of splitting Chinese text (i.e. a sequence of Chinese characters) into words (Source: www.nlpprogress.com)."
            }
          ]
        },
        {
          "name": "Cross-lingual natural language processing",
          "description": "Cross-lingual natural language processing is the task of using data and models available for one language for which ample such resources are available (e.g., English) to solve tasks in another, commonly more low-resource, language.",
          "children": [
            {
              "name": "Cross-lingual natural language inference",
              "description": "Using data and models available for one language for which ample such resources are available (e.g., English) to solve a natural language inference task in another, commonly more low-resource, language.",
              "has_input": [
                "Text"
              ]
            },
            {
              "name": "Cross-lingual transfer",
              "description": "Cross-lingual transfer refers to transfer learning using data and models available for one language for which ample such resources are available (e.g., English) to solve tasks in another, commonly more low-resource, language.",
              "children": [
                {
                  "name": "Zero-shot cross-lingual transfer"
                },
                {
                  "name": "Cross-lingual NER"
                }
              ]
            },
            {
              "name": "Cross-lingual document classification",
              "description": "Cross-lingual document classification refers to the task of using data and models available for one language for which ample such resources are available (e.g., English) to solve classification tasks in another, commonly low-resource, language."
            },
            {
              "name": "Cross-language text summarization",
              "description": "Using data and models available for one language for which ample such resources are available (e.g., English) to solve summarization tasks in another, commonly more low-resource, language."
            },
            {
              "name": "Cross-lingual bitext mining",
              "description": "Cross-lingual bitext mining is the task of mining sentence pairs that are translations of each other from large text corpora."
            }
          ]
        },
        {
          "name": "Source code summarization",
          "description": "**Code Summarization** is a task that tries to comprehend code and automatically generate descriptions directly from the source code. (Source: paperswithcode.com)",
          "has_input": [
            "Computer code"
          ],
          "has_output": [
            "Text"
          ],
          "children": [
            {
              "name": "Method name prediction"
            }
          ]
        },
        {
          "name": "Multimodal deep learning",
          "children": [
            {
              "name": "Multimodal text and image classification",
              "description": "Classification with both source Image and Text (Source: paperswithcode.com)"
            }
          ]
        }
      ]
    },
    {
      "name": "Benchmarking"
    },
    {
      "name": "Miscellaneous process",
      "children": [
        {
          "name": "Trajectory prediction",
          "description": "**Trajectory Prediction** is the problem of predicting the short-term (1-3 seconds) and long-term (3-5 seconds) spatial coordinates of various road-agents such as cars, buses, pedestrians, rickshaws, and animals, etc. These road-agents have different dynamic behaviors that may correspond to aggressive or conservative driving styles. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Trajectory forecasting",
              "description": "Trajectory forecasting is a sequential prediction task, where a forecasting model predicts future trajectories of all moving agents (humans, vehicles, etc.) in a scene, based on their past trajectories and/or the scene context.\n\n(Illustrative figure from [Social NCE: Contrastive Learning of Socially-aware Motion Representations](https://github.com/vita-epfl/social-nce)) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Deception detection",
          "children": [
            {
              "name": "Face anti-spoofing"
            }
          ]
        },
        {
          "name": "Autonomous driving",
          "description": "Autonomous driving is the task of driving a vehicle without human conduction. \n\nMany of the state-of-the-art results can be found at more general task pages such as [3D Object Detection](https://paperswithcode.com/task/3d-object-detection) and [Semantic Segmentation](https://paperswithcode.com/task/semantic-segmentation). (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Motion forecasting",
              "description": "Motion forecasting is the task of predicting the location of a tracked object in the future (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Multiple object forecasting"
                }
              ]
            }
          ]
        },
        {
          "name": "Anomaly detection",
          "description": "Anomaly Detection, Anomaly Segmentation, Novelty Detection, Out-of-Distribution Detection (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Abnormal event detection in video",
              "description": "**Abnormal Event Detection In Video** is a challenging task in computer vision, as the definition of what an abnormal event looks like depends very much on the context. For instance, a car driving by on the street is regarded as a normal event, but if the car enters a pedestrian area, this is regarded as an abnormal event. A person running on a sports court (normal event) versus running outside from a bank (abnormal event) is another example. Although what is considered abnormal depends on the context, we can generally agree that abnormal events should be unexpected events that occur less often than familiar (normal) events (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Anomaly detection in surveillance videos"
                },
                {
                  "name": "Semi-supervised anomaly detection"
                }
              ]
            },
            {
              "name": "Unsupervised anomaly detection",
              "description": "The objective of **Unsupervised Anomaly Detection** is to detect previously unseen rare objects or events without any prior knowledge about these. The only information available is that the percentage of anomalies in the dataset is small, usually less than 1%. Since anomalies are rare and unknown to the user at training time, anomaly detection in most cases boils down to the problem of modelling the normal data distribution and defining a measurement in this space in order to classify samples as anomalous or normal. In high-dimensional data such as images, distances in the original space quickly lose descriptive power (curse of dimensionality) and a mapping to some more suitable space is required. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Formation energy prediction"
        },
        {
          "name": "Human action generation",
          "description": "Yan et al. (2019) CSGN:\n\n\"When the dancer is stepping, jumping and spinning on the\nstage, attentions of all audiences are attracted by the streamof the fluent and graceful movements. Building a  model that is capable of dancing is as fascinating a task as appreciating the performance itself. In this paper, we aim to generate long-duration human actions represented as skeleton sequences, e.g. those that cover the entirety of a dance, with hundreds of moves and countless possible combinations.\" (Source: paperswithcode.com)"
        },
        {
          "name": "Malware classification",
          "description": "**Malware Classification** is the process of assigning a malware sample to a specific malware family. Malware within a family shares similar properties that can be used to create signatures for detection and classification. Signatures can be categorized as static or dynamic based on how they are extracted. A static signature can be based on a byte-code sequence, binary assembly instruction, or an imported Dynamic Link Library (DLL). Dynamic signatures can be based on file system activities, terminal commands, network communications, or function and system call sequences. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Malware detection",
              "description": "**Malware Detection** is a significant part of endpoint security including workstations, servers, cloud instances, and mobile devices. Malware Detection is used to detect and identify malicious activities caused by malware. With the increase in the variety of malware activities on CMS based websites such as [malicious malware redirects on WordPress site](https://secure.wphackedhelp.com/blog/wordpress-malware-redirect-hack-cleanup/) (Aka, WordPress Malware Redirect Hack) where the site redirects to spam, being the most widespread, the need for automatic detection and classifier amplifies as well. The signature-based Malware Detection system is commonly used for existing malware that has a signature but it is not suitable for unknown malware or zero-day malware (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Fraud detection",
          "description": "**Fraud Detection** is a vital topic that applies to many industries including the financial sectors, banking, government agencies, insurance, and law enforcement, and more. Fraud endeavors have detected a radical rise in current years, creating this topic more critical than ever. Despite struggles on the part of the troubled organizations, hundreds of millions of dollars are wasted to fraud each year. Because nearly a few samples confirm fraud in a vast community, locating these can be complex. Data mining and statistics help to predict and immediately distinguish fraud and take immediate action to minimize costs. (Source: paperswithcode.com)"
        },
        {
          "name": "Intrusion detection",
          "description": "**Intrusion Detection** is the process of dynamically monitoring events occurring in a computer system or network, analyzing them for signs of possible incidents and often interdicting the unauthorized access. This is typically accomplished by automatically collecting information from a variety of systems and network sources, and then analyzing the information for possible security problems. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Network intrusion detection"
            }
          ]
        },
        {
          "name": "Robotic grasping",
          "children": [
            {
              "name": "Grasp contact prediction",
              "description": "Predict contact between object and hand (human or robot). (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Recipe generation"
        },
        {
          "name": "Emotion recognition",
          "description": "**Emotion Recognition** is an important area of research to enable effective human-computer interaction. Human emotions can be detected using speech signal, facial expressions, body language, and electroencephalography (EEG). (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Multimodal emotion recognition"
            },
            {
              "name": "Emotion recognition in conversation"
            },
            {
              "name": "Speech emotion recognition"
            },
            {
              "name": "Emotion recognition in context"
            },
            {
              "name": "Emotion-cause pair extraction"
            },
            {
              "name": "Emotion cause extraction"
            },
            {
              "name": "Recognizing emotion cause in conversations",
              "description": "Given an utterance U, labeled with emotion E, the task is to extract the causal spans S from the conversational history H (including utterance U) that sufficiently represent the causes of emotion E. (Source: paperswithcode.com)"
            },
            {
              "name": "Causal emotion entailment",
              "description": "The Causal Emotion Entailment is a simpler version of the span extraction task. In this task, given a\ntarget utterance (U) with emotion E, the goal is to predict which particular utterances in the conversation\nhistory H(U) are responsible for the\nemotion E in the target utterance. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Remote sensing",
          "children": [
            {
              "name": "Change detection for remote sensing images"
            },
            {
              "name": "Building change detection for remote sensing images"
            }
          ]
        },
        {
          "name": "Image/document clustering"
        },
        {
          "name": "Model compression",
          "description": "**Model Compression** is an actively pursued area of research over the last few years with the goal of deploying state-of-the-art deep networks in low-power and resource limited devices without significant drop in accuracy. Parameter pruning, low-rank factorization and weight quantization are some of the proposed methods to compress the size of deep networks. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Neural network compression"
            }
          ]
        },
        {
          "name": "Automated theorem proving",
          "description": "The goal of **Automated Theorem Proving** is to automatically generate a proof, given a conjecture (the target theorem) and a knowledge base of known facts, all expressed in a formal language. Automated Theorem Proving is useful in a wide range of applications, including the verification and synthesis of software and hardware systems. (Source: paperswithcode.com)"
        },
        {
          "name": "Recommendation systems",
          "description": "The recommendation systems task is to produce a list of recommendations for a user. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Session-based recommendations",
              "description": "Recommendation based on a sequence of events. e.g. next item prediction (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Topic modeling",
          "description": "A topic model is a type of statistical model for discovering the abstract \"topics\" that occur in a collection of documents. Topic modeling is a frequently used text-mining tool for the discovery of hidden semantic structures in a text body. (Source: paperswithcode.com)"
        },
        {
          "name": "Table detection"
        },
        {
          "name": "Non-linear elasticity",
          "children": [
            {
              "name": "Stress-strain relation",
              "description": "Data-driven techniques for finding stress-strain relation in non-linearly elastic bodies. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Click-through rate prediction",
          "description": "Click-through rate prediction is the task of predicting the likelihood that something on a website (such as an advertisement) will be clicked. (Source: paperswithcode.com)"
        },
        {
          "name": "Twitter bot detection",
          "description": "Academic studies estimate that up to 15% of Twitter users are automated bot accounts [1]. The prevalence of Twitter bots coupled with the ability of some bots to give seemingly human responses has enabled these non-human accounts to garner widespread influence. Hence, detecting non-human Twitter users or automated bot accounts using machine learning techniques has become an area of interest to researchers in the last few years.  \n\n[1] https://aaai.org/ocs/index.php/ICWSM/ICWSM17/paper/view/15587 (Source: paperswithcode.com)"
        },
        {
          "name": "Vulnerability detection",
          "children": [
            {
              "name": "Website fingerprinting defense"
            },
            {
              "name": "Website fingerprinting attacks"
            }
          ]
        },
        {
          "name": "Synthetic data generation",
          "description": "The generation of tabular data by any means possible. (Source: paperswithcode.com)"
        },
        {
          "name": "Two-sample testing",
          "description": "In statistical hypothesis testing, a two-sample test is a test performed on the data of two random samples, each independently obtained from a different given population. The purpose of the test is to determine whether the difference between these two populations is statistically significant. The statistics used in two-sample tests can be used to solve many machine learning problems, such as domain adaptation, covariate shift and generative adversarial networks. (Source: paperswithcode.com)"
        },
        {
          "name": "Deep clustering"
        },
        {
          "name": "Offline RL",
          "children": [
            {
              "name": "Dqn replay dataset"
            }
          ]
        },
        {
          "name": "Multi-target regression"
        },
        {
          "name": "Misinformation"
        },
        {
          "name": "Next-basket recommendation"
        },
        {
          "name": "Making hiring decisions"
        },
        {
          "name": "Social media popularity prediction"
        }
      ]
    },
    {
      "name": "Fundamental AI process",
      "children": [
        {
          "name": "Structured prediction",
          "description": "**Structured Prediction** is an area of machine learning focusing on representations of spaces with combinatorial structure, and algorithms for inference and parameter estimation over these structures. Core methods include both tractable exact approaches like dynamic programming and spanning tree algorithms as well as heuristic techniques such as linear programming relaxations and greedy search. (Source: paperswithcode.com)"
        },
        {
          "name": "Domain adaptation",
          "description": "Domain adaptation is the task of adapting models across domains. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Unsupervised domain adaptation",
              "description": "**Unsupervised Domain Adaptation** is a learning framework to transfer knowledge learned from source domains with a large number of annotated training examples to target domains with unlabeled data only. (Source: paperswithcode.com)"
            },
            {
              "name": "Domain generalization",
              "description": "The idea of **Domain Generalization** is to learn from one or multiple training domains, to extract a domain-agnostic model which can be applied to an unseen domain (Source: paperswithcode.com)"
            },
            {
              "name": "Partial domain adaptation",
              "description": "**Partial Domain Adaptation** is a transfer learning paradigm, which manages to transfer relevant knowledge from a large-scale source domain to a small-scale target domain. (Source: paperswithcode.com)"
            },
            {
              "name": "Continuously indexed domain adaptation",
              "description": "Continuously indexed domain adaptation adapts across continuously indexed domains, e.g., across patients of different ages, where 'age' is a continuous notion. (Source: paperswithcode.com)"
            },
            {
              "name": "Wildly unsupervised domain adaptation",
              "description": "Transferring knowledge from a noisy source domain to unlabeled target domain. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Stochastic optimization",
          "description": "**Stochastic Optimization** is the task of optimizing certain objective functional by generating and using stochastic random variables. Usually the Stochastic Optimization is an iterative process of generating random variables that progressively finds out the minima or the maxima of the objective functional. Stochastic Optimization is usually applied in the non-convex functional spaces where the usual deterministic optimization such as linear or quadratic programming or their variants cannot be used. (Source: paperswithcode.com)"
        },
        {
          "name": "Zero-shot learning",
          "children": [
            {
              "name": "Temporal action localization",
              "description": "Temporal Action Localization aims to detect activities in the video stream and  output beginning and end timestamps. It is closely related to  Temporal Action Proposal Generation. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "3D human action recognition",
                  "children": [
                    {
                      "name": "Skeleton based action recognition"
                    }
                  ]
                },
                {
                  "name": "Action recognition in videos",
                  "children": [
                    {
                      "name": "Self-supervised action recognition"
                    }
                  ]
                },
                {
                  "name": "Action recognition",
                  "description": "Please note some benchmarks may be located in the [Action Classification](https://paperswithcode.com/task/action-classification) or [Video Classification](https://paperswithcode.com/task/video-classification) tasks, e.g. Kinetics-400. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "3D action recognition",
                      "description": "Image: [Rahmani et al](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Rahmani_3D_Action_Recognition_CVPR_2016_paper.pdf) (Source: paperswithcode.com)",
                      "children": [
                        {
                          "name": "Skeleton based action recognition"
                        },
                        {
                          "name": "Zero shot skeletal action recognition",
                          "description": "Zero-Shot Learning for 3D skeletal action recognition (Source: paperswithcode.com)",
                          "children": [
                            {
                              "name": "Generalized zero shot skeletal action recognition",
                              "description": "Generalized Zero Shot Learning for 3d Skeletal Action Recognition (Source: paperswithcode.com)"
                            }
                          ]
                        }
                      ]
                    },
                    {
                      "name": "Self-supervised action recognition"
                    },
                    {
                      "name": "Action triplet recognition",
                      "description": "Recognising action as a triplet of subject verb and object. Example HOI = Human Object Interaction, Surgical IVT = Instrument Verb Target, etc. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Activity recognition in videos",
                  "children": [
                    {
                      "name": "Activity prediction",
                      "description": "Predict human activities in videos (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Weakly-supervised temporal action localization",
                  "description": "Temporal Action Localization with weak supervision where only video-level labels are given for training (Source: paperswithcode.com)"
                },
                {
                  "name": "Temporal action proposal generation"
                },
                {
                  "name": "Weakly supervised action localization",
                  "description": "In this task, the training data consists of videos with a list of activities in them without any temporal boundary annotations. However, while testing, given a video, the algorithm should recognize the activities in the video and also provide the start and end time. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Compositional zero-shot learning"
            },
            {
              "name": "Zero shot skeletal action recognition",
              "description": "Zero-Shot Learning for 3D skeletal action recognition (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Generalized zero shot skeletal action recognition",
                  "description": "Generalized Zero Shot Learning for 3d Skeletal Action Recognition (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Generalized zero-shot learning"
            },
            {
              "name": "Multi-label zero-shot learning"
            }
          ]
        },
        {
          "name": "Few-shot learning",
          "description": "**Few-Shot Learning** is an example of meta-learning, where a learner is trained on several related tasks, during the meta-training phase, so that it can generalize well to unseen (but related) tasks with just few examples, during the meta-testing phase. An effective approach to the Few-Shot Learning problem is to learn a common representation for various tasks and train task specific classifiers on top of this representation. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Few-shot image classification",
              "description": "Few-shot image classification is the task of doing image classification with only a few examples for each category (typically < 6 examples). (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Generalized few-shot classification",
                  "children": [
                    {
                      "name": "Long-tail learning",
                      "children": [
                        {
                          "name": "Long-tail learning with class descriptors",
                          "description": "Long-tail learning by using class descriptors (like attributes, class embedding, etc) to learn tail classes as well as head classes. (Source: paperswithcode.com)"
                        }
                      ]
                    }
                  ]
                }
              ]
            },
            {
              "name": "One-shot learning",
              "description": "One-shot learning is the task of learning information about object categories from a single training example. (Source: paperswithcode.com)"
            },
            {
              "name": "Cross-domain few-shot"
            },
            {
              "name": "Few-shot semantic segmentation"
            }
          ]
        },
        {
          "name": "Feature selection",
          "description": "**Feature Selection** is the process of selecting a subset of the original variables such that a model built on data containing only these features has the best performance. Feature Selection avoids overfitting, improves model performance by getting rid of redundant features and has the added advantage of keeping the original feature representation, thus offering better interpretability. (Source: paperswithcode.com)"
        },
        {
          "name": "Multivariate time series forecasting"
        },
        {
          "name": "Representation learning",
          "description": "Representation learning is concerned with training machine learning algorithms to learn useful representations, e.g. those that are interpretable, have latent features, or can be used for transfer learning. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Graph embedding",
              "description": "Graph embeddings learn a mapping from a network to a vector space, while preserving relevant network properties. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Knowledge graph embedding"
                }
              ]
            },
            {
              "name": "Graph representation learning",
              "description": "The goal of **Graph Representation Learning** is to construct a set of features (\u2018embeddings\u2019) representing the structure of the graph and the data thereon. We can distinguish among Node-wise embeddings, representing each node of the graph, Edge-wise embeddings, representing each edge in the graph, and Graph-wise embeddings representing the graph as a whole. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Knowledge graph embedding"
                }
              ]
            },
            {
              "name": "Sentence embedding",
              "has_input": [
                "Text"
              ],
              "children": [
                {
                  "name": "Sentence compression",
                  "description": "Producing a shorter sentence by removing redundant information, preserving the grammatically and the important content of the original sentence. (Source: nlpprogress.com)",
                  "children": [
                    {
                      "name": "Unsupervised sentence compression",
                      "description": "Producing a shorter sentence by removing redundant information, preserving the grammatically and the important content of the original sentence without supervision. (Source: nlpprogress.com)"
                    }
                  ]
                },
                {
                  "name": "Sentence embeddings for biomedical texts"
                }
              ]
            },
            {
              "name": "Unsupervised representation learning",
              "children": [
                {
                  "name": "Unsupervised mnist",
                  "description": "Accuracy on MNIST when training without any labels (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "Contrastive learning",
          "children": [
            {
              "name": "Knowledge graph embedding"
            }
          ]
        },
        {
          "name": "Outlier detection",
          "description": "**Outlier Detection** is a task of identifying a subset of a given data set which are considered anomalous in that they are unusual from other instances. It is one of the core data mining tasks and is central to many applications. In the security field, it can be used to identify potentially threatening users, in the manufacturing field it can be used to identify parts that are likely to fail. (Source: paperswithcode.com)"
        },
        {
          "name": "Semantic segmentation",
          "description": "Semantic segmentation, or image segmentation, is the task of clustering parts of an image together which belong to the same object class. It is a form of pixel-level prediction because each pixel in an image is classified according to a category. Some example benchmarks for this task are Cityscapes, PASCAL VOC and ADE20K. Models are usually evaluated with the Mean Intersection-Over-Union (Mean IoU) and Pixel Accuracy metrics. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Real-time semantic segmentation",
              "description": "Real-time semantic segmentation is the task of achieving computationally efficient semantic segmentation (while maintaining a base level of accuracy). (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Real-time 3D semantic segmentation"
                }
              ]
            },
            {
              "name": "3D semantic segmentation",
              "children": [
                {
                  "name": "Real-time 3D semantic segmentation"
                }
              ]
            },
            {
              "name": "Weakly-supervised semantic segmentation",
              "description": "The semantic segmentation task is to assign a label from a label set to each pixel in an image. In the case of fully supervised setting, the dataset  consists of images and their corresponding\npixel-level class-specific annotations (expensive pixel-level annotations). However, in the\nweakly-supervised setting, the dataset consists of images and corresponding annotations that\nare relatively easy to obtain, such as tags/labels of objects present in the image. (Source: paperswithcode.com)"
            },
            {
              "name": "Scene segmentation",
              "description": "Scene segmentation is the task of splitting a scene into its various object components.\n\nImage adapted from [Temporally coherent 4D reconstruction of complex dynamic scenes](https://paperswithcode.com/paper/temporally-coherent-4d-reconstruction-of2). (Source: paperswithcode.com)"
            },
            {
              "name": "Panoptic segmentation",
              "description": "Panoptic segmentation unifies the typically distinct tasks of semantic segmentation (assign a class label to each pixel) and instance segmentation (detect and segment each object instance). (Source: paperswithcode.com)"
            },
            {
              "name": "3D part segmentation",
              "description": "Segmenting 3D object parts (Source: paperswithcode.com)"
            },
            {
              "name": "Semi-supervised semantic segmentation"
            },
            {
              "name": "Unsupervised semantic segmentation",
              "description": "Models that learn to segment each image (i.e. cluster the pixels into their ground truth classes) without seeing the ground truth labels. (Source: paperswithcode.com)"
            },
            {
              "name": "One-shot segmentation"
            },
            {
              "name": "Few-shot semantic segmentation"
            },
            {
              "name": "4D spatio temporal semantic segmentation",
              "description": "Image: [Choy et al](https://paperswithcode.com/paper/4d-spatio-temporal-convnets-minkowski) (Source: paperswithcode.com)"
            },
            {
              "name": "Tumor segmentation"
            },
            {
              "name": "Polyp segmentation",
              "description": "The goal of the project is to develop a computer-aided detection and diagnosis system for automatic polyp segmentation and detection. (Source: paperswithcode.com)"
            },
            {
              "name": "LIDAR semantic segmentation"
            }
          ]
        },
        {
          "name": "Transfer learning",
          "description": "Transfer learning is a methodology where weights from a model trained on one task are taken and either used (a) to construct a fixed feature extractor, (b) as weight initialization and/or fine-tuning. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Multi-task learning",
              "description": "Multi-task learning aims to learn multiple different tasks simultaneously while maximizing\nperformance on one or all of the tasks. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Continuous control",
          "children": [
            {
              "name": "Steering control"
            }
          ]
        },
        {
          "name": "AutoML",
          "description": "Automated Machine Learning (**AutoML**) is a general concept which covers diverse techniques for automated model learning including automatic data preprocessing, architecture search, and model selection.\nSource: Evaluating recommender systems for AI-driven data science (1905.09205) (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Neural architecture search"
            },
            {
              "name": "Hyperparameter optimization",
              "description": "**Hyperparameter Optimization** is the problem of choosing a set of optimal hyperparameters for a learning algorithm. Whether the algorithm is suitable for the data directly depends on hyperparameters, which directly influence overfitting or underfitting. Each model requires different assumptions, weights or training speeds for different types of data under the conditions of a given loss function. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Multi-label classification",
          "description": "**Multi-Label Classification** is the supervised learning problem where an instance may be associated with multiple labels. This is an extension of single-label classification (i.e., multi-class, or binary) where each instance is only associated with a single class label. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Multi-label text classification",
              "description": "According to Wikipedia \"In machine learning, multi-label classification and the strongly related problem of multi-output classification are variants of the classification problem where multiple labels may be assigned to each instance. Multi-label classification is a generalization of multiclass classification, which is the single-label problem of categorizing instances into precisely one of more than two classes; in the multi-label problem there is no constraint on how many of the classes the instance can be assigned to.\" (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Multi-label classification of biomedical texts",
                  "description": "Assigning the appropriate labels to a biomedical text based on a set of pre-defined labels.",
                  "has_input": [
                    "Text"
                  ]
                }
              ]
            }
          ]
        },
        {
          "name": "Model compression",
          "description": "**Model Compression** is an actively pursued area of research over the last few years with the goal of deploying state-of-the-art deep networks in low-power and resource limited devices without significant drop in accuracy. Parameter pruning, low-rank factorization and weight quantization are some of the proposed methods to compress the size of deep networks. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Neural network compression"
            }
          ]
        },
        {
          "name": "Causal inference",
          "description": "Causal inference is the task of drawing a conclusion about a causal connection based on the conditions of the occurrence of an effect. (Source: paperswithcode.com)"
        },
        {
          "name": "Active learning",
          "description": "**Active Learning** is a paradigm in supervised machine learning which uses fewer training examples to achieve better optimization by iteratively training a predictor, and using the predictor in each iteration to choose the training examples which will increase its chances of finding better configurations and at the same time improving the accuracy of the prediction model (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Active object detection",
              "description": "Active Learning for Object Detection (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Feature importance explanation"
        },
        {
          "name": "Multi-agent reinforcement learning",
          "description": "The target of **Multi-agent Reinforcement Learning** is to solve complex problems by integrating multiple agents that focus on different sub-tasks. In general, there are two types of multi-agent systems: independent and cooperative systems. (Source: paperswithcode.com)"
        },
        {
          "name": "Meta-learning",
          "description": "Meta-learning is a methodology considered with \"learning to learn\" machine learning algorithms. (Source: paperswithcode.com)"
        },
        {
          "name": "Network pruning",
          "description": "**Network Pruning** is a popular approach to reduce a heavy network to obtain a light-weight form by removing redundancy in the heavy network. In this approach, a complex over-parameterized network is first trained, then pruned based on come criterions, and finally fine-tuned to achieve comparable performance with reduced parameters. (Source: paperswithcode.com)"
        },
        {
          "name": "Point processes"
        },
        {
          "name": "Metric learning",
          "description": "The goal of **Metric Learning** is to learn a representation function that maps objects into an embedded space. The distance in the embedded space should preserve the objects\u2019 similarity \u2014 similar objects get close and dissimilar objects get far away. Various loss functions have been developed for Metric Learning. For example, the contrastive loss guides the objects from the same class to be mapped to the same point and those from different classes to be mapped to different points whose distances are larger than a margin. Triplet loss is also popular, which requires the distance between the anchor sample and the positive sample to be smaller than the distance between the anchor sample and the negative sample. (Source: paperswithcode.com)"
        },
        {
          "name": "Quantization",
          "description": "**Quantization** is a promising technique to reduce the computation cost of neural network training, which can replace high-cost floating-point numbers (e.g., float32) with low-cost fixed-point numbers (e.g., int8/int16). (Source: paperswithcode.com)"
        },
        {
          "name": "Density estimation",
          "description": "The goal of **Density Estimation** is to give an accurate description of the underlying probabilistic density distribution of an observable data set with unknown density. (Source: paperswithcode.com)"
        },
        {
          "name": "Clustering"
        },
        {
          "name": "Feature engineering",
          "description": "Feature engineering is the process of taking a dataset and constructing explanatory variables\u200a\u2014\u200afeatures\u200a\u2014\u200athat can be used to train a machine learning model for a prediction problem. Often, data is spread across multiple tables and must be gathered into a single table with rows containing the observations and features in the columns.\n\nThe traditional approach to feature engineering is to build features one at a time using domain knowledge, a tedious, time-consuming, and error-prone process known as manual feature engineering. The code for manual feature engineering is problem-dependent and must be re-written for each new dataset. (Source: paperswithcode.com)"
        },
        {
          "name": "Decision making under uncertainty"
        },
        {
          "name": "Multi-armed bandits",
          "description": "Multi-armed bandits refer to a task where a fixed amount of resources must be allocated between competing resources that maximizes expected gain. Typically these problems involve an exploration/exploitation trade-off. (Source: paperswithcode.com)"
        },
        {
          "name": "Sparse learning"
        },
        {
          "name": "Latent variable models"
        },
        {
          "name": "Continual learning",
          "description": "**Continual Learning** is a concept to learn a model for a large number of tasks sequentially without forgetting knowledge obtained from the preceding tasks, where the data in the old tasks are not available any more during training new ones. (Source: paperswithcode.com)"
        },
        {
          "name": "Reinforcement learning",
          "description": "Reinforcement learning is a subfield of Machine learning and refers to algorithms that incrementally learn the best sequence of actions to achieve an objective by rewarding actions considered relevant to achieve the objective and penalizing actions considered irrelevant or unhelpful to attain the objective.",
          "children": [
            {
              "name": "General reinforcement learning",
              "children": [
                {
                  "name": "Curriculum learning"
                },
                {
                  "name": "Model-based reinforcement learning"
                },
                {
                  "name": "Temporal logic",
                  "description": "Learning logic with respect to time and ordering of events. (Source: paperswithcode.com)"
                },
                {
                  "name": "Offline RL",
                  "children": [
                    {
                      "name": "Dqn replay dataset"
                    }
                  ]
                }
              ]
            }
          ]
        },
        {
          "name": "Generalized few-shot learning",
          "children": [
            {
              "name": "Long-tail learning",
              "children": [
                {
                  "name": "Long-tail learning with class descriptors",
                  "description": "Long-tail learning by using class descriptors (like attributes, class embedding, etc) to learn tail classes as well as head classes. (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "General classification",
          "description": "Algorithms trying to solve the general task of classification. (Source: paperswithcode.com)"
        },
        {
          "name": "Two-sample testing",
          "description": "In statistical hypothesis testing, a two-sample test is a test performed on the data of two random samples, each independently obtained from a different given population. The purpose of the test is to determine whether the difference between these two populations is statistically significant. The statistics used in two-sample tests can be used to solve many machine learning problems, such as domain adaptation, covariate shift and generative adversarial networks. (Source: paperswithcode.com)"
        },
        {
          "name": "Core set discovery",
          "description": "A core set in machine learning is defined as the minimal set of training samples that allows a supervised algorithm to deliver a result as good as the one obtained when the whole set is used. (Source: paperswithcode.com)"
        },
        {
          "name": "Incremental learning"
        },
        {
          "name": "Inductive logic programming"
        },
        {
          "name": "Multimodal deep learning",
          "children": [
            {
              "name": "Multimodal text and image classification",
              "description": "Classification with both source Image and Text (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Universal domain adaptation"
        },
        {
          "name": "Unsupervised pre-training",
          "description": "Pre-training a neural network using unsupervised (self-supervised) auxiliary tasks on unlabeled data. (Source: paperswithcode.com)"
        },
        {
          "name": "Federated learning",
          "description": "Federated Learning is a framework to train a centralized model for a task where the data is de-centralized across different devices/ silos. \n\nThis helps preserve privacy of data on various devices as only the weight updates are shared with the centralized model so the data can remain on each device and we can still train a model using that data. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Personalized federated learning",
              "description": "The federated learning setup presents numerous challenges including data heterogeneity (differences in data distribution), device heterogeneity (in terms of computation capabilities, network connection, etc.), and communication efficiency.\nEspecially data heterogeneity makes it hard to learn a single shared global model that applies to all clients. To overcome these issues, Personalized Federated Learning (PFL) aims to personalize the global model for each client in the federation. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Multi-target domain adaptation",
          "description": "The idea of Multi-target Domain Adaptation is to adapt a model from a single labelled source domain to multiple unlabelled target domains. (Source: paperswithcode.com)"
        }
      ]
    },
    {
      "name": "Time series process",
      "description": "Time series deals with sequential data where the data is indexed (ordered) by a time dimension. (Source: paperswithcode.com)",
      "children": [
        {
          "name": "Trajectory prediction",
          "description": "**Trajectory Prediction** is the problem of predicting the short-term (1-3 seconds) and long-term (3-5 seconds) spatial coordinates of various road-agents such as cars, buses, pedestrians, rickshaws, and animals, etc. These road-agents have different dynamic behaviors that may correspond to aggressive or conservative driving styles. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Trajectory forecasting",
              "description": "Trajectory forecasting is a sequential prediction task, where a forecasting model predicts future trajectories of all moving agents (humans, vehicles, etc.) in a scene, based on their past trajectories and/or the scene context.\n\n(Illustrative figure from [Social NCE: Contrastive Learning of Socially-aware Motion Representations](https://github.com/vita-epfl/social-nce)) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Stock market prediction",
          "children": [
            {
              "name": "Stock price prediction"
            },
            {
              "name": "Stock trend prediction"
            }
          ]
        },
        {
          "name": "Electroencephalogram (EEG) process",
          "children": [
            {
              "name": "Attention score prediction",
              "description": "Auditory Attention Score Prediction: Estimating the attention level of Listener from physiological signals (EEG, GSR, PPG), a regression task. The attention score ranges from 0 to 100. (Source: paperswithcode.com)"
            },
            {
              "name": "Noise level prediction",
              "description": "T2: Noise Level Prediction: Estimating the noise level experienced by the Listener from physiological signals (EEG, GSR, PPG). Six different levels of  background noise (SNR)\nLabel: -6, -3, 0, 3, 6, and inf (noise-free)  in dB. (Source: paperswithcode.com)"
            },
            {
              "name": "Semanticity prediction",
              "description": "T3: Semanticity Prediction: Estimating the semanticity perceived by Listener from physiological signals (EEG, GSP, PPG). Label: 0-(semantic), 1-(non-semantic). Binary classification problem. (Source: paperswithcode.com)"
            },
            {
              "name": "Lwr classification",
              "description": "T4: LWR Classification: Predicting if the subject is Listening, Writing, or Resting from physiological signals (EEG, GSR, PPG). Labels: 0-listening, 1-writing, 2-resting. Classification tasks. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Imputation",
          "children": [
            {
              "name": "Multivariate time series imputation"
            }
          ]
        },
        {
          "name": "Video prediction",
          "description": "**Video Prediction** is the task of predicting future frames given past video frames. (Source: paperswithcode.com)",
          "has_input": [
            "Video"
          ],
          "has_output": [
            "Video"
          ]
        },
        {
          "name": "Activity prediction",
          "description": "Predict human activities in videos (Source: paperswithcode.com)"
        },
        {
          "name": "Time series classification",
          "description": "**Time Series Classification** is a general task that can be useful across many subject-matter domains and applications. The overall goal is to identify a time series as coming from one of possibly many sources or predefined groups, using labeled training data. That is, in this setting we conduct supervised learning, where the different time series sources are considered known. (Source: paperswithcode.com)"
        },
        {
          "name": "Time series prediction",
          "description": "The goal of **Time Series Prediction** is to infer the future values of a time series from the past. (Source: paperswithcode.com)"
        },
        {
          "name": "Time series forecasting",
          "description": "Time series forecasting is the task of predicting future values of a time series (as well as uncertainty bounds). (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Multivariate time series forecasting"
            },
            {
              "name": "Univariate time series forecasting"
            },
            {
              "name": "Probabilistic time series forecasting"
            },
            {
              "name": "Covid-19 tracking",
              "children": [
                {
                  "name": "Covid-19 modelling"
                }
              ]
            }
          ]
        },
        {
          "name": "Gesture recognition",
          "description": "**Gesture Recognition** is an active field of research with applications such as automatic recognition of sign language, interaction of humans and robots or for new ways of controlling video games. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Hand gesture recognition",
              "children": [
                {
                  "name": "Skeleton based action recognition"
                }
              ]
            },
            {
              "name": "Hand-gesture recognition"
            }
          ]
        },
        {
          "name": "Traffic prediction",
          "description": "Traffic prediction is the task of predicting traffic volumes, utilising historical speed and volume data. (Source: paperswithcode.com)"
        },
        {
          "name": "Change point detection",
          "description": "Change point detection is concerned with the accurate detection of abrupt and significant changes in the behavior of a time series. (Source: paperswithcode.com)"
        },
        {
          "name": "Time series clustering",
          "description": "**Time Series Clustering** is an unsupervised data mining technique for organizing data points into groups based on their similarity. The objective is to maximize data similarity within clusters and minimize it across clusters. Time-series clustering is often used as a subroutine of other more complex algorithms and is employed as a standard tool in data science for anomaly detection, character recognition, pattern discovery, visualization of time series. (Source: paperswithcode.com)"
        },
        {
          "name": "Time series regression",
          "description": "Predicting one or more scalars for an entire time series example. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Covid-19 modelling"
            }
          ]
        },
        {
          "name": "Edge-computing",
          "description": "Deep Learning on EDGE devices (Source: paperswithcode.com)"
        },
        {
          "name": "Trajectory modeling",
          "description": "The equivalent of language modeling but for trajectories. (Source: paperswithcode.com)"
        }
      ]
    },
    {
      "name": "Biomedical AI process",
      "children": [
        {
          "name": "Disease prediction",
          "children": [
            {
              "name": "Diabetes prediction"
            },
            {
              "name": "Disease trajectory forecasting"
            },
            {
              "name": "Epilepsy prediction"
            },
            {
              "name": "Retinal OCT disease classification",
              "description": "Classifying different Retinal degeneration from Optical Coherence Tomography Images (OCT). (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Biomedical AI process (by disease)",
          "children": [
            {
              "name": "Diabetes prediction"
            },
            {
              "name": "Cancer task",
              "children": [
                {
                  "name": "Breast cancer histology image classification",
                  "children": [
                    {
                      "name": "Breast cancer detection"
                    }
                  ]
                }
              ]
            },
            {
              "name": "Disease trajectory forecasting"
            },
            {
              "name": "Amyotrophic lateral sclerosis detection"
            },
            {
              "name": "Seizure detection",
              "description": "**Seizure Detection** is a binary supervised classification problem with the aim of classifying between seizure and non-seizure states of a patient. (Source: paperswithcode.com)"
            },
            {
              "name": "Atrial fibrillation",
              "children": [
                {
                  "name": "Atrial fibrillation detection"
                }
              ]
            },
            {
              "name": "Arrhythmia detection"
            },
            {
              "name": "Myocardial infarction detection"
            },
            {
              "name": "Congestive heart failure detection"
            },
            {
              "name": "Seizure prediction"
            },
            {
              "name": "Pneumonia detection"
            },
            {
              "name": "Epilepsy prediction"
            },
            {
              "name": "Covid-19 diagnosis",
              "description": "Covid-19 Diagnosis is the task of diagnosing the presence of COVID-19 in an individual with machine learning. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Electronic health record (EHR) classification"
        },
        {
          "name": "Electroencephalogram (EEG) process",
          "children": [
            {
              "name": "Attention score prediction",
              "description": "Auditory Attention Score Prediction: Estimating the attention level of Listener from physiological signals (EEG, GSR, PPG), a regression task. The attention score ranges from 0 to 100. (Source: paperswithcode.com)"
            },
            {
              "name": "Noise level prediction",
              "description": "T2: Noise Level Prediction: Estimating the noise level experienced by the Listener from physiological signals (EEG, GSR, PPG). Six different levels of  background noise (SNR)\nLabel: -6, -3, 0, 3, 6, and inf (noise-free)  in dB. (Source: paperswithcode.com)"
            },
            {
              "name": "Semanticity prediction",
              "description": "T3: Semanticity Prediction: Estimating the semanticity perceived by Listener from physiological signals (EEG, GSP, PPG). Label: 0-(semantic), 1-(non-semantic). Binary classification problem. (Source: paperswithcode.com)"
            },
            {
              "name": "Lwr classification",
              "description": "T4: LWR Classification: Predicting if the subject is Listening, Writing, or Resting from physiological signals (EEG, GSR, PPG). Labels: 0-listening, 1-writing, 2-resting. Classification tasks. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Biomedical vision process",
          "children": [
            {
              "name": "Medical image segmentation",
              "description": "Medical image segmentation is the task of segmenting objects of interest in a medical image. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Lesion segmentation",
                  "description": "Lesion segmentation is the task of segmenting out lesions from other objects in medical based images. (Source: paperswithcode.com)"
                },
                {
                  "name": "Brain image segmentation",
                  "children": [
                    {
                      "name": "Brain tumor segmentation",
                      "description": "Brain tumor segmentation is the task of segmenting tumors from other brain artefacts in MRI image of the brain. (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "3D medical imaging segmentation",
                  "description": "3D medical imaging segmentation is the task of segmenting medical objects of interest from 3D medical imaging. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Pancreas segmentation",
                      "description": "Pancreas segmentation is the task of segmenting out the pancreas from medical imaging.\n\nConvolutional neural network (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Retinal vessel segmentation",
                  "description": "Retinal vessel segmentation is the task of segmenting vessels in retina imagery. (Source: paperswithcode.com)"
                },
                {
                  "name": "Cell segmentation",
                  "description": "**Cell Segmentation** is a task of splitting a microscopic image domain into segments, which represent individual instances of cells. It is a fundamental step in many biomedical studies, and it is regarded as a cornerstone of image-based cellular research. Cellular morphology is an indicator of a physiological state of the cell, and a well-segmented image can capture biologically relevant morphological information. (Source: paperswithcode.com)"
                },
                {
                  "name": "Lung nodule segmentation"
                },
                {
                  "name": "Nuclear segmentation"
                },
                {
                  "name": "Iris segmentation"
                },
                {
                  "name": "Electron microscopy image segmentation"
                },
                {
                  "name": "Liver segmentation"
                },
                {
                  "name": "Volumetric medical image segmentation",
                  "children": [
                    {
                      "name": "Skin cancer segmentation"
                    }
                  ]
                },
                {
                  "name": "Infant brain mri segmentation"
                },
                {
                  "name": "Pulmorary vessel segmentation",
                  "children": [
                    {
                      "name": "Pulmonary artery\u2013vein classification"
                    }
                  ]
                },
                {
                  "name": "Colorectal gland segmentation"
                },
                {
                  "name": "Optic cup segmentation",
                  "description": "Optic cup segmentation, concentric with optic disc, useful for glaucoma management (ophthalmology) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Diabetic retinopathy detection"
            },
            {
              "name": "Ultrasound imaging process"
            },
            {
              "name": "Computed tomography (CT) task",
              "description": "The term \u201ccomputed tomography\u201d, or CT, refers to a computerized x-ray imaging procedure in which a narrow beam of x-rays is aimed at a patient and quickly rotated around the body, producing signals that are processed by the machine's computer to generate cross-sectional images\u2014or \u201cslices\u201d\u2014of the body. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Stroke classification from CT data"
                }
              ]
            },
            {
              "name": "Medical image registration",
              "children": [
                {
                  "name": "BIRL: benchmark on image registration methods with landmark validations",
                  "description": "BIRL: Benchmark on Image Registration methods with Landmark validation, in particular, Biomedical image registration on WSI microscopy images of a multi-strain histology tissue sample. (Source: paperswithcode.com)"
                },
                {
                  "name": "Diffeomorphic medical image registration"
                }
              ]
            },
            {
              "name": "Medical image generation",
              "description": "Medical image generation is the task of synthesising new medical images. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Radiologist binary classification",
                  "description": "This task measures a radiologist's performance on distinguishing between generated (e.g. with a GAN, VAE, etc.)  and real images, ascribing to the high visual quality of the synthesized images, and to their potential use in advancing and facilitating downstream medical tasks. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Retinal OCT disease classification",
              "description": "Classifying different Retinal degeneration from Optical Coherence Tomography Images (OCT). (Source: paperswithcode.com)"
            },
            {
              "name": "Tumor segmentation"
            },
            {
              "name": "Polyp segmentation",
              "description": "The goal of the project is to develop a computer-aided detection and diagnosis system for automatic polyp segmentation and detection. (Source: paperswithcode.com)"
            },
            {
              "name": "Surgical tool detection",
              "description": "Presence detection of various classes of surgical instruments in endoscopy videos. (Source: paperswithcode.com)"
            },
            {
              "name": "Breast tumour classification"
            },
            {
              "name": "Multi-tissue nucleus segmentation"
            },
            {
              "name": "Medical image denoising"
            },
            {
              "name": "Breast tissue identification"
            },
            {
              "name": "Diabetic retinopathy grading",
              "description": "Grading the severity of diabetic retinopathy from (ophthalmic) fundus images (Source: paperswithcode.com)"
            },
            {
              "name": "Optic cup detection",
              "description": "Region proposal for optic cup (Source: paperswithcode.com)"
            },
            {
              "name": "Optic disc detection",
              "description": "Region proposal for optic disc (Source: paperswithcode.com)"
            },
            {
              "name": "Fovea detection"
            }
          ]
        },
        {
          "name": "Length-of-stay prediction"
        },
        {
          "name": "Biomedical natural language processing",
          "description": "Biomedical natural language processing is the task of processing text related to the biomedical domain (e.g., scientific literature, electronic health records).",
          "children": [
            {
              "name": "Biomedical relation extraction",
              "description": "Biomedical relation extraction is the task of detecting and classifying semantic relationships from biomedical text.",
              "has_input": [
                "Text"
              ]
            },
            {
              "name": "Multi-label classification of biomedical texts",
              "description": "Assigning the appropriate labels to a biomedical text based on a set of pre-defined labels.",
              "has_input": [
                "Text"
              ]
            },
            {
              "name": "Clinical assertion status detection",
              "description": "Classifying the assertions made on given medical concepts as being present, absent, or possible in the patient, conditionally present in the patient under certain circumstances, hypothetically present in the patient at some future point, and mentioned in the patient report but associated with someoneelse. (e.g. clinical finding pertains to the patient by assigning a label such as present (\u201dpatient is diabetic\u201d), absent (\u201dpatient denies nausea\u201d), conditional (\u201ddyspnea while climbing stairs\u201d), or associated with someone else (\u201dfamily history of depression\u201d))\n\n( [Source](https://arxiv.org/pdf/2012.04005v1.pdf) ) (Source: paperswithcode.com)"
            },
            {
              "name": "Clinical concept extraction",
              "description": "Automatic extraction of clinical named entities such as clinical problems, treatments, tests and anatomical parts from clinical notes.\n\n( [Source](https://arxiv.org/pdf/2012.04005v1.pdf) ) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Sentence embeddings for biomedical texts"
        },
        {
          "name": "Sleep analysis",
          "children": [
            {
              "name": "Sleep stage detection",
              "children": [
                {
                  "name": "Multimodal sleep stage detection",
                  "description": "Using multiple modalities such as EEG+EOG, EEG+HR instead of just relying on EEG (polysomnography) (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Spindle detection",
              "children": [
                {
                  "name": "Sleep spindles detection"
                }
              ]
            },
            {
              "name": "Sleep arousal detection"
            },
            {
              "name": "K-complex detection"
            },
            {
              "name": "Sleep apnea detection"
            }
          ]
        },
        {
          "name": "Electromyography (EMG)",
          "children": [
            {
              "name": "Amyotrophic lateral sclerosis detection"
            },
            {
              "name": "Medial knee jrf prediction"
            },
            {
              "name": "Muscle force prediction"
            },
            {
              "name": "EMG signal prediction"
            }
          ]
        },
        {
          "name": "Drug discovery",
          "description": "Drug discovery is the task of applying machine learning to discover new candidate drugs. (Source: paperswithcode.com)"
        },
        {
          "name": "Seizure detection",
          "description": "**Seizure Detection** is a binary supervised classification problem with the aim of classifying between seizure and non-seizure states of a patient. (Source: paperswithcode.com)"
        },
        {
          "name": "Medical diagnosis",
          "description": "**Medical Diagnosis** is the process of identifying the disease a patient is affected by, based on the assessment of specific risk factors, signs, symptoms and results of exams. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Retinal OCT disease classification",
              "description": "Classifying different Retinal degeneration from Optical Coherence Tomography Images (OCT). (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Patient outcomes task",
          "children": [
            {
              "name": "Predicting patient outcomes"
            }
          ]
        },
        {
          "name": "Electrocardiography (ECG) process",
          "has_input": [
            "ECG dataset"
          ],
          "children": [
            {
              "name": "ECG risk stratification"
            },
            {
              "name": "Mortality prediction"
            },
            {
              "name": "Arrhythmia detection"
            },
            {
              "name": "Heartbeat classification"
            },
            {
              "name": "ECG classification",
              "children": [
                {
                  "name": "Photoplethysmography (PPG)",
                  "description": "**Photoplethysmography (PPG)** is a non-invasive light-based method that has been used since the 1930s for monitoring cardiovascular activity. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Heart rate estimation",
                      "description": "RR interval detection and R peak detection from QRS complex (Source: paperswithcode.com)"
                    },
                    {
                      "name": "Blood pressure estimation"
                    }
                  ]
                }
              ]
            },
            {
              "name": "Myocardial infarction detection"
            },
            {
              "name": "ECG denoising"
            },
            {
              "name": "Congestive heart failure detection"
            },
            {
              "name": "QRS complex detection"
            }
          ]
        },
        {
          "name": "Gene interaction prediction"
        },
        {
          "name": "Protein secondary structure prediction"
        },
        {
          "name": "Medical object detection",
          "description": "Medical object detection is the task of identifying medical-based objects within an image. (Source: paperswithcode.com)"
        },
        {
          "name": "Pain intensity regression"
        },
        {
          "name": "Lung nodule detection"
        },
        {
          "name": "Seizure prediction"
        },
        {
          "name": "Participant intervention comparison outcome extraction"
        },
        {
          "name": "Lung nodule classification"
        },
        {
          "name": "Surgical skills evaluation",
          "description": "The task is to classify surgical skills using data that is recorded during the surgical intervention. (Source: paperswithcode.com)"
        },
        {
          "name": "Pulmonary embolism detection"
        },
        {
          "name": "Molecular property prediction",
          "children": [
            {
              "name": "NMR J-coupling",
              "description": "https://github.com/larsbratholm/champs_kaggle (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Multiple sequence alignment"
        },
        {
          "name": "Medical procedure",
          "description": "Predicting medical procedures performed during a hospital admission. (Source: paperswithcode.com)"
        },
        {
          "name": "Biomedical information retrieval"
        }
      ]
    },
    {
      "name": "Audio process",
      "children": [
        {
          "name": "Audio restoration",
          "has_input": [
            "Audio"
          ],
          "has_output": [
            "Audio"
          ],
          "children": [
            {
              "name": "Audio denoising",
              "children": [
                {
                  "name": "Acoustic echo cancellation",
                  "description": "Acoustic echo cancellation (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Speech dereverberation",
                      "description": "Removing reverberation from audio signals (Source: paperswithcode.com)"
                    }
                  ]
                },
                {
                  "name": "Speech denoising",
                  "description": "Obtain the clean speech of the target speaker by suppressing the background noise. (Source: paperswithcode.com)"
                }
              ]
            }
          ]
        },
        {
          "name": "Audio generation",
          "description": "Audio generation (synthesis) is the task of generating raw audio such as speech. (Source: paperswithcode.com)",
          "has_output": [
            "Audio data"
          ],
          "children": [
            {
              "name": "Audio super-resolution",
              "description": "AUDIO SUPER-RESOLUTION or speech bandwidth extension (Upsampling Ratio = 2) (Source: paperswithcode.com)"
            },
            {
              "name": "Speech synthesis",
              "description": "Speech synthesis is the task of generating speech from some other modality like text, lip movements etc. \n\nPlease note that the leaderboards here are not really comparable between studies - as they use mean opinion score as a metric and collect different samples from Amazon Mechnical Turk. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Lip to speech synthesis",
                  "description": "Given a silent video of a speaker, generate the corresponding speech that matches the lip movements. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Speaker-specific lip to speech synthesis",
                      "description": "How accurately can we infer an individual\u2019s speech style and content from his/her lip movements? [1]\n\nIn this task, the model is trained on a specific speaker, or a very limited set of speakers. \n\n[1] Learning Individual Speaking Styles for Accurate Lip to Speech Synthesis, CVPR 2020. (Source: paperswithcode.com)"
                    }
                  ]
                }
              ]
            }
          ]
        },
        {
          "name": "Speech process",
          "has_input": [
            "Audio"
          ],
          "children": [
            {
              "name": "Speech recognition",
              "description": "Speech recognition is the task of recognising speech within audio and converting it into text. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Distant speech recognition"
                },
                {
                  "name": "Noisy speech recognition"
                },
                {
                  "name": "Accented speech recognition"
                },
                {
                  "name": "Speech emotion recognition"
                },
                {
                  "name": "Speech-to-text translation",
                  "description": "Translate audio signals of speech in one language into text in a foreign language, either in an end-to-end or cascade manner. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Text-to-speech synthesis",
              "description": "Converting written text in natural language to speech.",
              "has_input": [
                "Text"
              ],
              "has_output": [
                "Audio data"
              ],
              "children": [
                {
                  "name": "Prosody prediction",
                  "description": "Predicting prosodic prominence from text.",
                  "has_input": [
                    "Text"
                  ]
                }
              ]
            },
            {
              "name": "Keyword spotting",
              "description": "In speech processing, keyword spotting deals with the identification of keywords in utterances. (Source: paperswithcode.com)"
            },
            {
              "name": "Speech separation",
              "description": "The task of extracting all overlapping speech sources in a given mixed speech signal refers to the **Speech Separation**. Speech Separation is a special scenario of source separation problem, where the focus is only on the overlapping speech signal sources and other interferences such as music or noise signals are not the main concern of the study. (Source: paperswithcode.com)"
            },
            {
              "name": "Speaker identification"
            },
            {
              "name": "Speaker diarization",
              "description": "**Speaker Diarization** is the task of segmenting and co-indexing audio recordings by speaker. The way the task is commonly defined, the goal is not to identify known speakers, but to co-index segments that are attributed to the same speaker; in other words, diarization implies finding speaker boundaries and grouping segments that belong to the same speaker, and, as a by-product, determining the number of distinct speakers. In combination with speech recognition, diarization enables speaker-attributed speech-to-text transcription. (Source: paperswithcode.com)"
            },
            {
              "name": "Speaker verification",
              "description": "Speaker verification is the verifying the identity of a person from characteristics of the voice. (Source: paperswithcode.com)"
            },
            {
              "name": "Emotion recognition",
              "description": "**Emotion Recognition** is an important area of research to enable effective human-computer interaction. Human emotions can be detected using speech signal, facial expressions, body language, and electroencephalography (EEG). (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Multimodal emotion recognition"
                },
                {
                  "name": "Emotion recognition in conversation"
                },
                {
                  "name": "Speech emotion recognition"
                },
                {
                  "name": "Emotion recognition in context"
                },
                {
                  "name": "Emotion-cause pair extraction"
                },
                {
                  "name": "Emotion cause extraction"
                },
                {
                  "name": "Recognizing emotion cause in conversations",
                  "description": "Given an utterance U, labeled with emotion E, the task is to extract the causal spans S from the conversational history H (including utterance U) that sufficiently represent the causes of emotion E. (Source: paperswithcode.com)"
                },
                {
                  "name": "Causal emotion entailment",
                  "description": "The Causal Emotion Entailment is a simpler version of the span extraction task. In this task, given a\ntarget utterance (U) with emotion E, the goal is to predict which particular utterances in the conversation\nhistory H(U) are responsible for the\nemotion E in the target utterance. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Voice conversion",
              "description": "**Voice Conversion** is a technology that modifies the speech of a source speaker and makes their speech sound like that of another target speaker without changing the linguistic information. (Source: paperswithcode.com)"
            },
            {
              "name": "Speech enhancement",
              "description": "Speech enhancement is the task of taking a noisy speech input and producing an enhanced speech output. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Speech dereverberation",
                  "description": "Removing reverberation from audio signals (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Speech synthesis",
              "description": "Speech synthesis is the task of generating speech from some other modality like text, lip movements etc. \n\nPlease note that the leaderboards here are not really comparable between studies - as they use mean opinion score as a metric and collect different samples from Amazon Mechnical Turk. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Lip to speech synthesis",
                  "description": "Given a silent video of a speaker, generate the corresponding speech that matches the lip movements. (Source: paperswithcode.com)",
                  "children": [
                    {
                      "name": "Speaker-specific lip to speech synthesis",
                      "description": "How accurately can we infer an individual\u2019s speech style and content from his/her lip movements? [1]\n\nIn this task, the model is trained on a specific speaker, or a very limited set of speakers. \n\n[1] Learning Individual Speaking Styles for Accurate Lip to Speech Synthesis, CVPR 2020. (Source: paperswithcode.com)"
                    }
                  ]
                }
              ]
            },
            {
              "name": "Acoustic unit discovery"
            },
            {
              "name": "Acoustic echo cancellation",
              "description": "Acoustic echo cancellation (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Speech dereverberation",
                  "description": "Removing reverberation from audio signals (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Speech denoising",
              "description": "Obtain the clean speech of the target speaker by suppressing the background noise. (Source: paperswithcode.com)"
            },
            {
              "name": "Pronunciation assessment",
              "children": [
                {
                  "name": "Phone-level pronunciation scoring"
                }
              ]
            }
          ]
        },
        {
          "name": "Audio classification",
          "description": "Audio classification or audio tagging are tasks to predict the tags of audio clips. (Source: paperswithcode.com)",
          "has_input": [
            "Audio data"
          ],
          "children": [
            {
              "name": "Environmental sound classification",
              "description": "Classification of Environmental Sounds. Most often sounds found in Urban environments. Task related to noise monitoring. (Source: paperswithcode.com)"
            },
            {
              "name": "Acoustic scene classification",
              "description": "The goal of acoustic scene classification is to classify a test recording into one of the provided predefined classes that characterizes the environment in which it was recorded.\n\nSource: [DCASE website](http://dcase.community/challenge2019/task-acoustic-scene-classification) (Source: paperswithcode.com)",
              "has_input": [
                "Audio data"
              ]
            },
            {
              "name": "Audio tagging",
              "description": "Audio tagging is a task to predict the tags of audio clips. Audio tagging tasks include music tagging, acoustic scene classification, audio event classification, etc. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Sound event detection",
          "description": "**Sound Event Detection** (SED) is the task of recognizing the sound events and their respective temporal start and end time in a recording. Sound events in real life do not always occur in isolation, but tend to considerably overlap with each other. Recognizing such overlapping sound events is referred as polyphonic SED. (Source: paperswithcode.com)"
        },
        {
          "name": "Audio signal recognition",
          "children": [
            {
              "name": "Acoustic novelty detection",
              "description": "Detect novel events given acoustic signals, either in domestic or outdoor environments. (Source: paperswithcode.com)",
              "has_input": [
                "Audio data"
              ]
            }
          ]
        },
        {
          "name": "Direction of arrival estimation",
          "description": "Estimating the direction-of-arrival (DOA) of a sound source from multi-channel recordings. (Source: paperswithcode.com)",
          "has_input": [
            "Audio"
          ]
        },
        {
          "name": "Audio source separation",
          "description": "**Audio Source Separation** is the process of separating a mixture (e.g. a pop band recording) into isolated sounds from individual sources (e.g. just the lead vocals). (Source: paperswithcode.com)",
          "has_input": [
            "Audio"
          ],
          "has_output": [
            "Audio"
          ]
        }
      ]
    },
    {
      "name": "Graph process",
      "children": [
        {
          "name": "Link prediction",
          "description": "Link prediction is a task to estimate the probability of links between nodes in a graph. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Dynamic link prediction"
            },
            {
              "name": "Calibration for link prediction"
            },
            {
              "name": "Gene interaction prediction"
            },
            {
              "name": "Anchor link prediction"
            }
          ]
        },
        {
          "name": "Style transfer",
          "description": "Style transfer is the task of changing the style of an image in one domain to the style of an image in another domain. (Source: paperswithcode.com)"
        },
        {
          "name": "Material property prediction",
          "children": [
            {
              "name": "Formation energy prediction"
            }
          ]
        },
        {
          "name": "Knowledge graph process",
          "children": [
            {
              "name": "Knowledge graph completion",
              "description": "Knowledge graphs $G$ are represented as a collection of triples $\\\\{(h, r, t)\\\\}\\subseteq E\\times R\\times E$, where $E$ and $R$ are the entity set and relation set. The task of **Knowledge Graph Completion** is to either predict unseen relations $r$ between two existing entities: $(h, ?, t)$ or predict the tail entity $t$ given the head entity and the query relation: $(h, r, ?)$. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Inductive knowledge graph completion"
                }
              ]
            },
            {
              "name": "Open knowledge graph canonicalization",
              "description": "Open Information Extraction approaches leads to creation of large Knowledge bases (KB) from the web. The problem with such methods is that their entities and relations are not canonicalized, which leads to storage of redundant and ambiguous facts. For example, an Open KB storing *\\* and *\\* doesn't know that *Barack Obama* and *Obama* mean the same entity. Similarly, *took birth in* and *was born in* also refer to the same relation. Problem of Open KB canonicalization involves identifying groups of equivalent entities and relations in the KB. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Graph embedding",
          "description": "Graph embeddings learn a mapping from a network to a vector space, while preserving relevant network properties. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Knowledge graph embedding"
            }
          ]
        },
        {
          "name": "Graph representation learning",
          "description": "The goal of **Graph Representation Learning** is to construct a set of features (\u2018embeddings\u2019) representing the structure of the graph and the data thereon. We can distinguish among Node-wise embeddings, representing each node of the graph, Edge-wise embeddings, representing each edge in the graph, and Graph-wise embeddings representing the graph as a whole. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Knowledge graph embedding"
            }
          ]
        },
        {
          "name": "Community detection",
          "description": "**Community Detection** is one of the fundamental problems in network analysis, where the goal is to find groups of nodes that are, in some sense, more similar to each other than to the other nodes. (Source: paperswithcode.com)"
        },
        {
          "name": "Graph matching",
          "description": "**Graph Matching** is the problem of finding correspondences between two sets of vertices while preserving complex relational information among them. Since the graph structure has a strong capacity to represent objects and robustness to severe deformation and outliers, it is frequently adopted to formulate various correspondence problems in the field of computer vision. Theoretically, the Graph Matching problem can be solved by exhaustively searching the entire solution space. However, this approach is infeasible in practice because the solution space expands exponentially as the size of input data increases. For that reason, previous studies have attempted to solve the problem by using various approximation techniques. (Source: paperswithcode.com)"
        },
        {
          "name": "Graph similarity"
        },
        {
          "name": "Graph clustering",
          "description": "**Graph Clustering** is the process of grouping the nodes of the graph into clusters, taking into account the edge structure of the graph in such a way that there are several edges within each cluster and very few between clusters. Graph Clustering intends to partition the nodes in the graph into disjoint groups. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Clustering ensemble"
            }
          ]
        },
        {
          "name": "Graph regression",
          "description": "The regression task is similar to graph classification but using different loss function and performance metric. (Source: paperswithcode.com)"
        },
        {
          "name": "Graph generation",
          "description": "**Graph Generation** is an important research area with significant applications in drug and material designs. (Source: paperswithcode.com)"
        },
        {
          "name": "Graph partitioning"
        },
        {
          "name": "Link sign prediction"
        },
        {
          "name": "Graph classification"
        },
        {
          "name": "Node classification",
          "description": "The node classification task is one where the algorithm has to determine the labelling of samples (represented as nodes) by looking at the labels of their neighbours. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Heterogeneous node classification",
              "description": "Node classification in heterogeneous graphs, where nodes and/or edges have multiple types. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Molecular property prediction",
          "children": [
            {
              "name": "NMR J-coupling",
              "description": "https://github.com/larsbratholm/champs_kaggle (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Triple classification"
        },
        {
          "name": "Graph question answering"
        },
        {
          "name": "Graph property prediction",
          "description": "Predicting properties of entire graphs or subgraphs."
        }
      ]
    },
    {
      "name": "Knowledge base process",
      "description": "This task is a duplicate of the top-level area named Knowledge Base (Source: paperswithcode.com)",
      "children": [
        {
          "name": "Link prediction",
          "description": "Link prediction is a task to estimate the probability of links between nodes in a graph. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Dynamic link prediction"
            },
            {
              "name": "Calibration for link prediction"
            },
            {
              "name": "Gene interaction prediction"
            },
            {
              "name": "Anchor link prediction"
            }
          ]
        },
        {
          "name": "Knowledge graph process",
          "children": [
            {
              "name": "Knowledge graph completion",
              "description": "Knowledge graphs $G$ are represented as a collection of triples $\\\\{(h, r, t)\\\\}\\subseteq E\\times R\\times E$, where $E$ and $R$ are the entity set and relation set. The task of **Knowledge Graph Completion** is to either predict unseen relations $r$ between two existing entities: $(h, ?, t)$ or predict the tail entity $t$ given the head entity and the query relation: $(h, r, ?)$. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Inductive knowledge graph completion"
                }
              ]
            },
            {
              "name": "Open knowledge graph canonicalization",
              "description": "Open Information Extraction approaches leads to creation of large Knowledge bases (KB) from the web. The problem with such methods is that their entities and relations are not canonicalized, which leads to storage of redundant and ambiguous facts. For example, an Open KB storing *\\* and *\\* doesn't know that *Barack Obama* and *Obama* mean the same entity. Similarly, *took birth in* and *was born in* also refer to the same relation. Problem of Open KB canonicalization involves identifying groups of equivalent entities and relations in the KB. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Knowledge graph embedding"
        },
        {
          "name": "Entity alignment",
          "description": "**Entity Alignment** is the task of finding entities in two knowledge bases that refer to the same real-world object. It plays a vital role in automatically integrating multiple knowledge bases. (Source: paperswithcode.com)"
        }
      ]
    },
    {
      "name": "Robotics process",
      "children": [
        {
          "name": "Autonomous navigation",
          "description": "Autonomous navigation is the task of autonomously navigating a vehicle or robot to or around a location without human guidance. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Autonomous flight (dense forest)",
              "description": "Number of interventions during autonomous flight under the forest canopy. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Robotic grasping",
          "children": [
            {
              "name": "Grasp contact prediction",
              "description": "Predict contact between object and hand (human or robot). (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Robot task planning"
        },
        {
          "name": "Robot navigation",
          "description": "The fundamental objective of mobile **Robot Navigation** is to arrive at a goal position without collision. The mobile robot is supposed to be aware of obstacles and move freely in different working scenarios. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Pointgoal navigation"
            }
          ]
        },
        {
          "name": "Visual navigation",
          "description": "**Visual Navigation** is the problem of navigating an agent, e.g. a mobile robot, in an environment using camera input only. The agent is given a target image (an image it will see from the target position), and its goal is to move from its current position to the target by applying a sequence of actions, based on the camera observations only. (Source: paperswithcode.com)"
        },
        {
          "name": "General reinforcement learning",
          "children": [
            {
              "name": "Curriculum learning"
            },
            {
              "name": "Model-based reinforcement learning"
            },
            {
              "name": "Temporal logic",
              "description": "Learning logic with respect to time and ordering of events. (Source: paperswithcode.com)"
            },
            {
              "name": "Offline RL",
              "children": [
                {
                  "name": "Dqn replay dataset"
                }
              ]
            }
          ]
        },
        {
          "name": "Vision and language navigation"
        }
      ]
    },
    {
      "name": "Art-related process",
      "children": [
        {
          "name": "Music process",
          "children": [
            {
              "name": "Music auto-tagging"
            },
            {
              "name": "Style transfer",
              "description": "Style transfer is the task of changing the style of an image in one domain to the style of an image in another domain. (Source: paperswithcode.com)"
            },
            {
              "name": "Music source separation",
              "description": "Music source separation is the task of decomposing music into its constitutive components, e. g., yielding separated stems for the vocals, bass, and drums. (Source: paperswithcode.com)"
            },
            {
              "name": "Music genre recognition",
              "description": "Recognizing the genre (e.g. rock, pop, jazz, etc.) of a piece of music. (Source: paperswithcode.com)"
            },
            {
              "name": "Music modeling"
            },
            {
              "name": "Cover song identification"
            },
            {
              "name": "Music transcription",
              "description": "Music transcription is the task of converting an acoustic musical signal into some form of music notation. (Source: paperswithcode.com)"
            }
          ]
        }
      ]
    },
    {
      "name": "Adversarial process",
      "children": [
        {
          "name": "Adversarial attack",
          "description": "An **Adversarial Attack** is a technique to find a perturbation that changes the prediction of a machine learning model. The perturbation can be very small and imperceptible to human eyes. (Source: paperswithcode.com)"
        },
        {
          "name": "Adversarial defense",
          "description": "Competitions with currently unpublished results:\n\n- [TrojAI](https://pages.nist.gov/trojai/) (Source: paperswithcode.com)"
        }
      ]
    },
    {
      "name": "Computer code process",
      "children": [
        {
          "name": "Feature selection",
          "description": "**Feature Selection** is the process of selecting a subset of the original variables such that a model built on data containing only these features has the best performance. Feature Selection avoids overfitting, improves model performance by getting rid of redundant features and has the added advantage of keeping the original feature representation, thus offering better interpretability. (Source: paperswithcode.com)"
        },
        {
          "name": "Code generation",
          "description": "**Code Generation** is an important field to predict explicit code or program structure from multimodal data sources such as incomplete code, programs in another programming language, natural language descriptions or execution examples. Code Generation tools can assist the development of automatic programming tools to improve programming productivity. (Source: paperswithcode.com)",
          "has_output": [
            "Computer code"
          ],
          "children": [
            {
              "name": "Code documentation generation",
              "description": "Code Documentation Generation is a supervised task where a code function is the input to the model, and the model generates the documentation for this function.\n\nDescription from: [CodeTrans: Towards Cracking the Language of Silicone's Code Through Self-Supervised Deep Learning and High Performance Computing](https://arxiv.org/pdf/2104.02443.pdf) (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Program synthesis",
          "has_output": [
            "Computer code"
          ],
          "children": [
            {
              "name": "Type prediction"
            },
            {
              "name": "Value prediction"
            },
            {
              "name": "Program repair",
              "children": [
                {
                  "name": "Fault localization"
                },
                {
                  "name": "Variable misuse"
                },
                {
                  "name": "Function-docstring mismatch"
                },
                {
                  "name": "Wrong binary operator"
                },
                {
                  "name": "Swapped operands"
                },
                {
                  "name": "Exception type"
                }
              ]
            }
          ]
        },
        {
          "name": "Code search",
          "description": "The goal of code search is to retrieve code fragments from a large code corpus that most closely match a developer\u2019s intent, which is expressed in natural language. (Source: paperswithcode.com)",
          "has_input": [
            "Computer code"
          ],
          "children": [
            {
              "name": "Annotated code search",
              "description": "Annotated code search is the retrieval of code snippets paired with brief descriptions of their intent using natural language queries. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Text-to-SQL",
          "has_input": [
            "Text"
          ],
          "has_output": [
            "Query script"
          ]
        },
        {
          "name": "SQL-to-text",
          "has_input": [
            "Query script"
          ],
          "has_output": [
            "Text"
          ]
        },
        {
          "name": "Source code summarization",
          "description": "**Code Summarization** is a task that tries to comprehend code and automatically generate descriptions directly from the source code. (Source: paperswithcode.com)",
          "has_input": [
            "Computer code"
          ],
          "has_output": [
            "Text"
          ],
          "children": [
            {
              "name": "Method name prediction"
            }
          ]
        },
        {
          "name": "Git commit message generation"
        },
        {
          "name": "API sequence recommendation"
        },
        {
          "name": "Code comment generation"
        }
      ]
    },
    {
      "name": "Reasoning",
      "children": [
        {
          "name": "Mathematical question answering",
          "description": "Building systems that automatically answer mathematical questions.",
          "children": [
            {
              "name": "Math word problem solving"
            }
          ]
        },
        {
          "name": "Reasoning (defined by goal)",
          "children": [
            {
              "name": "Decision making",
              "description": "**Decision Making** is a complex task that involves analyzing data (of different level of abstraction) from disparate sources and with different levels of certainty, merging the information by weighing in on some data source more than other, and arriving at a conclusion by exploring all possible alternatives. (Source: paperswithcode.com)"
            },
            {
              "name": "Causal identification"
            }
          ]
        },
        {
          "name": "Reasoning (defined by data)",
          "children": [
            {
              "name": "Visual reasoning",
              "description": "Ability to understand  actions and reasoning  associated with any  visual images (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Visual commonsense reasoning"
                }
              ]
            },
            {
              "name": "Common sense reasoning",
              "description": "Common sense reasoning tasks are intended to require the model to go beyond pattern \nrecognition. Instead, the model should use \"common sense\" or world knowledge\nto make inferences. (Source: paperswithcode.com)"
            },
            {
              "name": "Decision making under uncertainty"
            },
            {
              "name": "Math word problem solving"
            }
          ]
        },
        {
          "name": "Natural language visual grounding"
        },
        {
          "name": "Program synthesis",
          "has_output": [
            "Computer code"
          ],
          "children": [
            {
              "name": "Type prediction"
            },
            {
              "name": "Value prediction"
            },
            {
              "name": "Program repair",
              "children": [
                {
                  "name": "Fault localization"
                },
                {
                  "name": "Variable misuse"
                },
                {
                  "name": "Function-docstring mismatch"
                },
                {
                  "name": "Wrong binary operator"
                },
                {
                  "name": "Swapped operands"
                },
                {
                  "name": "Exception type"
                }
              ]
            }
          ]
        },
        {
          "name": "Abstract argumentation",
          "description": "Identifying argumentative statements from natural language dialogs."
        },
        {
          "name": "Reasoning (defined by method)",
          "children": [
            {
              "name": "Inductive reasoning",
              "description": "The process of reasoning in which premises are viewed as supplying some evidence, but not full assurance, of the truth of the conclusion. It is also described as a method where one's experiences and observations, including what are learned from others, are synthesized to come up with a general truth. Many dictionaries define inductive reasoning as the derivation of general principles from specific observations (arguing from specific to general), although there are many inductive arguments that do not have that form.\n\nInductive reasoning is distinct from deductive reasoning. While, if the premises are correct, the conclusion of a deductive argument is certain, the truth of the conclusion of an inductive argument is probable, based upon the evidence given.\n(Source: Adapted from Wikipedia)"
            },
            {
              "name": "Deductive reasoning",
              "description": "The process of reasoning from one or more statements (premises) to reach a logical conclusion. \n(Source: Adapted from Wikipedia)"
            },
            {
              "name": "Bayesian inference"
            },
            {
              "name": "Analogical reasoning"
            },
            {
              "name": "Abductive reasoning"
            },
            {
              "name": "Complex reasoning",
              "hasNarrowSynonym": "['Abductive reasoning']"
            }
          ]
        },
        {
          "name": "Reasoning (defined by exactness)",
          "children": [
            {
              "name": "Approximate reasoning"
            },
            {
              "name": "Exact reasoning"
            }
          ]
        },
        {
          "name": "Systematic generalization"
        },
        {
          "name": "Commonsense reasoning for RL",
          "description": "Commonsense reasoning for Reinforcement Learning agents (Source: paperswithcode.com)"
        }
      ]
    },
    {
      "name": "Playing games",
      "children": [
        {
          "name": "Continuous control",
          "children": [
            {
              "name": "Steering control"
            }
          ]
        },
        {
          "name": "Playing video games",
          "children": [
            {
              "name": "Playing Atari games",
              "description": "The Atari 2600 Games task (and dataset) involves training an agent to achieve high game scores. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Playing montezuma's revenge",
                  "description": "Montezuma's Revenge is an ATARI 2600 Benchmark game that is known to be difficult to perform on for reinforcement learning algorithms. Solutions typically employ algorithms that incentivise environment exploration in different ways.\n\nFor the state-of-the art tables, please consult the parent Atari Games task. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Playing real-time strategy games",
              "description": "Real-Time Strategy (RTS) tasks involve training an agent to play video games with continuous gameplay and high-level macro-strategic goals such as map control, economic superiority and more. (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Playing Starcraft II",
                  "description": "Starcraft II is a RTS game; the task is to train an agent to play the game. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Playing FPS games",
              "description": "First-person shooter (FPS) games Involve like call of duty so enjoy (Source: paperswithcode.com)",
              "children": [
                {
                  "name": "Playing game of Doom",
                  "description": "Doom is an FPS game : the task is typically to train an agent to navigate the game environment, and additionally, acquire points by eliminating enemies. (Source: paperswithcode.com)"
                }
              ]
            },
            {
              "name": "Playing SNES games",
              "description": "The task is to train an agent to play SNES games such as Super Mario. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Playing board games",
          "children": [
            {
              "name": "Playing game of go",
              "description": "Go is an abstract strategy board game for two players, in which the aim is to surround more territory than the opponent. The task is to train an agent to play the game and be superior to other players. (Source: paperswithcode.com)"
            },
            {
              "name": "Playing game of shogi"
            }
          ]
        },
        {
          "name": "Multi-agent reinforcement learning",
          "description": "The target of **Multi-agent Reinforcement Learning** is to solve complex problems by integrating multiple agents that focus on different sub-tasks. In general, there are two types of multi-agent systems: independent and cooperative systems. (Source: paperswithcode.com)"
        },
        {
          "name": "Playing game of suduko"
        },
        {
          "name": "Nethack",
          "description": "Mean in-game score over 1000 episodes with random seeds not seen during training. See https://arxiv.org/abs/2006.13760 (Section 2.4 Evaluation Protocol) for details. (Source: paperswithcode.com)",
          "children": [
            {
              "name": "NetHack score",
              "description": "Mean in-game score over 1000 episodes with random seeds not seen during training. See https://arxiv.org/abs/2006.13760 (Section 2.4 Evaluation Protocol) for details. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "General reinforcement learning",
          "children": [
            {
              "name": "Curriculum learning"
            },
            {
              "name": "Model-based reinforcement learning"
            },
            {
              "name": "Temporal logic",
              "description": "Learning logic with respect to time and ordering of events. (Source: paperswithcode.com)"
            },
            {
              "name": "Offline RL",
              "children": [
                {
                  "name": "Dqn replay dataset"
                }
              ]
            }
          ]
        },
        {
          "name": "OpenAI gym",
          "description": "An open-source toolkit from OpenAI that implements several Reinforcement Learning benchmarks including: classic control, Atari, Robotics and MuJoCo tasks.\n\n(Description by [Evolutionary learning of interpretable decision trees](https://paperswithcode.com/paper/evolutionary-learning-of-interpretable))\n\n(Image Credit: [OpenAI Gym](https://gym.openai.com/)) (Source: paperswithcode.com)",
          "children": [
            {
              "name": "Acrobot",
              "description": "The acrobot system includes two joints and two links, where the joint between the two links is actuated. Initially, the links are hanging downwards, and the goal is to swing the end of the lower link up to a given height. (Source: paperswithcode.com)"
            }
          ]
        },
        {
          "name": "Control with prametrised actions",
          "description": "Most reinforcement learning research papers focus on environments where the agent\u2019s actions are either discrete or continuous. However, when training an agent to play a video game, it is common to encounter situations where actions have both discrete and continuous components. For example,  a set of high-level discrete actions (ex: move, jump, fire), each of them being associated with continuous parameters (ex: target coordinates for the move action, direction for the jump action, aiming angle for the fire action). These kinds of tasks are included in Control with Parameterised Actions. (Source: paperswithcode.com)"
        }
      ]
    }
  ]
}