diff --git a/.gitignore b/.gitignore index 205f4f1c86..377d71685e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ .DS_Store .AppleDouble .LSOverride +env/ +*.wav +.env # Icon must end with two \r Icon @@ -200,3 +203,4 @@ bundle/ webpack-stats.json .vscode +local.env diff --git a/app/api/filters.py b/app/api/filters.py index 1efbc44888..c4b56e0fab 100644 --- a/app/api/filters.py +++ b/app/api/filters.py @@ -1,13 +1,19 @@ from django.db.models import Count, Q -from django_filters.rest_framework import FilterSet, BooleanFilter +from django_filters.rest_framework import FilterSet, BooleanFilter, NumberFilter -from .models import Document +from .models import Document, ConversationItem class DocumentFilter(FilterSet): seq_annotations__isnull = BooleanFilter(field_name='seq_annotations', method='filter_annotations') doc_annotations__isnull = BooleanFilter(field_name='doc_annotations', method='filter_annotations') seq2seq_annotations__isnull = BooleanFilter(field_name='seq2seq_annotations', method='filter_annotations') + conversation = NumberFilter(method='filter_conversation') + + def filter_conversation(self, queryset, name, value): + if value: + return queryset.filter(conversationitem__conversation=value) + return queryset def filter_annotations(self, queryset, field_name, value): queryset = queryset.annotate(num_annotations= @@ -24,6 +30,6 @@ def filter_annotations(self, queryset, field_name, value): class Meta: model = Document - fields = ('project', 'text', 'meta', 'created_at', 'updated_at', + fields = ('project', 'conversation', 'text', 'meta', 'created_at', 'updated_at', 'doc_annotations__label__id', 'seq_annotations__label__id', 'doc_annotations__isnull', 'seq_annotations__isnull', 'seq2seq_annotations__isnull') diff --git a/app/api/migrations/0002_auto_20191218_0134.py b/app/api/migrations/0002_auto_20191218_0134.py new file mode 100644 index 0000000000..55629fc206 --- /dev/null +++ b/app/api/migrations/0002_auto_20191218_0134.py @@ -0,0 +1,47 @@ +# Generated by Django 2.1.11 on 2019-12-18 01:34 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Conversation', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('meta', models.TextField(default='{}')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('audio_url', models.TextField(default='')), + ('audio_file', models.FileField(upload_to='')), + ('documents', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conversation', to='api.Document')), + ], + ), + migrations.CreateModel( + name='ConversationsProject', + fields=[ + ('project_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='api.Project')), + ], + options={ + 'abstract': False, + 'base_manager_name': 'objects', + }, + bases=('api.project',), + ), + migrations.AlterField( + model_name='project', + name='project_type', + field=models.CharField(choices=[('DocumentClassification', 'document classification'), ('SequenceLabeling', 'sequence labeling'), ('Seq2seq', 'sequence to sequence'), ('Conversations', 'conversations validation and labelling')], max_length=30), + ), + migrations.AddField( + model_name='conversation', + name='project', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conversations', to='api.ConversationsProject'), + ), + ] diff --git a/app/api/migrations/0003_auto_20191218_0203.py b/app/api/migrations/0003_auto_20191218_0203.py new file mode 100644 index 0000000000..f8fb1e8661 --- /dev/null +++ b/app/api/migrations/0003_auto_20191218_0203.py @@ -0,0 +1,22 @@ +# Generated by Django 2.1.11 on 2019-12-18 02:03 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0002_auto_20191218_0134'), + ] + + operations = [ + migrations.RenameField( + model_name='conversation', + old_name='documents', + new_name='document', + ), + migrations.AlterUniqueTogether( + name='conversation', + unique_together={('project', 'document')}, + ), + ] diff --git a/app/api/migrations/0004_auto_20191218_0244.py b/app/api/migrations/0004_auto_20191218_0244.py new file mode 100644 index 0000000000..07dbf2b034 --- /dev/null +++ b/app/api/migrations/0004_auto_20191218_0244.py @@ -0,0 +1,36 @@ +# Generated by Django 2.1.11 on 2019-12-18 02:44 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0003_auto_20191218_0203'), + ] + + operations = [ + migrations.CreateModel( + name='ConversationItem', + fields=[ + ('document_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='api.Document')), + ('start_timestamp', models.TimeField()), + ('end_timestamp', models.TimeField()), + ], + bases=('api.document',), + ), + migrations.AlterUniqueTogether( + name='conversation', + unique_together=set(), + ), + migrations.AddField( + model_name='conversationitem', + name='conversation', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conversation_item', to='api.Conversation'), + ), + migrations.RemoveField( + model_name='conversation', + name='document', + ), + ] diff --git a/app/api/migrations/0005_auto_20191218_1704.py b/app/api/migrations/0005_auto_20191218_1704.py new file mode 100644 index 0000000000..ac3abda986 --- /dev/null +++ b/app/api/migrations/0005_auto_20191218_1704.py @@ -0,0 +1,60 @@ +# Generated by Django 2.1.11 on 2019-12-18 17:04 + +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone + + +class Migration(migrations.Migration): + + dependencies = [ + ('contenttypes', '0002_remove_content_type_name'), + ('api', '0004_auto_20191218_0244'), + ] + + operations = [ + migrations.AlterModelOptions( + name='conversationitem', + options={'base_manager_name': 'objects'}, + ), + migrations.AlterModelOptions( + name='document', + options={'base_manager_name': 'objects'}, + ), + migrations.RemoveField( + model_name='conversationitem', + name='end_timestamp', + ), + migrations.RemoveField( + model_name='conversationitem', + name='start_timestamp', + ), + migrations.AddField( + model_name='conversationitem', + name='end_time', + field=models.FloatField(default=0), + preserve_default=False, + ), + migrations.AddField( + model_name='conversationitem', + name='machine_text', + field=models.TextField(default=''), + preserve_default=False, + ), + migrations.AddField( + model_name='conversationitem', + name='start_time', + field=models.FloatField(default=0), + preserve_default=False, + ), + migrations.AddField( + model_name='document', + name='polymorphic_ctype', + field=models.ForeignKey(editable=False, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='polymorphic_api.document_set+', to='contenttypes.ContentType'), + ), + migrations.AlterField( + model_name='conversationitem', + name='conversation', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conversation_items', to='api.Conversation'), + ), + ] diff --git a/app/api/migrations/0006_auto_20191218_1740.py b/app/api/migrations/0006_auto_20191218_1740.py new file mode 100644 index 0000000000..d45bf1ecc1 --- /dev/null +++ b/app/api/migrations/0006_auto_20191218_1740.py @@ -0,0 +1,47 @@ +# Generated by Django 2.1.11 on 2019-12-18 17:40 + +from django.conf import settings +import django.contrib.postgres.fields.jsonb +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('api', '0005_auto_20191218_1704'), + ] + + operations = [ + migrations.CreateModel( + name='ConversationItemAnnotation', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('prob', models.FloatField(default=0.0)), + ('manual', models.BooleanField(default=False)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('text', models.TextField()), + ('start_offset', models.IntegerField()), + ('end_offset', models.IntegerField()), + ('document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='conversation_annotations', to='api.Document')), + ('label', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='api.Label')), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + ), + migrations.AlterField( + model_name='conversation', + name='audio_file', + field=models.FileField(upload_to='audio'), + ), + migrations.AlterField( + model_name='conversation', + name='meta', + field=django.contrib.postgres.fields.jsonb.JSONField(blank=True), + ), + migrations.AlterUniqueTogether( + name='conversationitemannotation', + unique_together={('start_offset', 'end_offset', 'label', 'document')}, + ), + ] diff --git a/app/api/migrations/0007_conversationitem_text_validated.py b/app/api/migrations/0007_conversationitem_text_validated.py new file mode 100644 index 0000000000..5a498d61b2 --- /dev/null +++ b/app/api/migrations/0007_conversationitem_text_validated.py @@ -0,0 +1,18 @@ +# Generated by Django 2.1.11 on 2019-12-19 00:05 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0006_auto_20191218_1740'), + ] + + operations = [ + migrations.AddField( + model_name='conversationitem', + name='text_validated', + field=models.BooleanField(default=False), + ), + ] diff --git a/app/api/migrations/0008_auto_20191220_1329.py b/app/api/migrations/0008_auto_20191220_1329.py new file mode 100644 index 0000000000..1107ca47ba --- /dev/null +++ b/app/api/migrations/0008_auto_20191220_1329.py @@ -0,0 +1,18 @@ +# Generated by Django 2.1.11 on 2019-12-20 13:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0007_conversationitem_text_validated'), + ] + + operations = [ + migrations.AlterField( + model_name='conversation', + name='meta', + field=models.TextField(default='{}'), + ), + ] diff --git a/app/api/migrations/0009_auto_20200113_0950.py b/app/api/migrations/0009_auto_20200113_0950.py new file mode 100644 index 0000000000..aa9f5d3bdf --- /dev/null +++ b/app/api/migrations/0009_auto_20200113_0950.py @@ -0,0 +1,23 @@ +# Generated by Django 2.1.11 on 2020-01-13 09:50 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0008_auto_20191220_1329'), + ] + + operations = [ + migrations.RenameField( + model_name='conversationitem', + old_name='text_validated', + new_name='is_validated', + ), + migrations.AddField( + model_name='conversationitem', + name='is_ignored', + field=models.BooleanField(default=False), + ), + ] diff --git a/app/api/models.py b/app/api/models.py index 697ceb0f02..d86c32b731 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -5,6 +5,7 @@ from django.db.models.signals import post_save, pre_delete from django.urls import reverse from django.conf import settings +from django.contrib.postgres.fields import JSONField from django.contrib.auth.models import User from django.contrib.staticfiles.storage import staticfiles_storage from django.core.exceptions import ValidationError @@ -15,10 +16,12 @@ DOCUMENT_CLASSIFICATION = 'DocumentClassification' SEQUENCE_LABELING = 'SequenceLabeling' SEQ2SEQ = 'Seq2seq' +CONVERSATIONS = 'Conversations' PROJECT_CHOICES = ( (DOCUMENT_CLASSIFICATION, 'document classification'), (SEQUENCE_LABELING, 'sequence labeling'), (SEQ2SEQ, 'sequence to sequence'), + (CONVERSATIONS, 'conversations validation and labelling'), ) @@ -143,6 +146,33 @@ def get_storage(self, data): return Seq2seqStorage(data, self) +class ConversationsProject(Project): + + @property + def image(self): + return staticfiles_storage.url('assets/images/cats/seq2seq.jpg') + + def get_bundle_name(self): + return 'conversations' + + def get_bundle_name_upload(self): + return 'upload_conversations' + + def get_bundle_name_download(self): + return 'download_conversations' + + def get_annotation_serializer(self): + from .serializers import ConversationItemAnnotationSerializer + return ConversationItemAnnotationSerializer + + def get_annotation_class(self): + return ConversationItemAnnotation + + def get_storage(self, data): + from .utils import ConversationStorage + return ConversationStorage(data, self) + + class Label(models.Model): PREFIX_KEYS = ( ('ctrl', 'ctrl'), @@ -184,7 +214,7 @@ class Meta: ) -class Document(models.Model): +class Document(PolymorphicModel): text = models.TextField() project = models.ForeignKey(Project, related_name='documents', on_delete=models.CASCADE) meta = models.TextField(default='{}') @@ -196,6 +226,24 @@ def __str__(self): return self.text[:50] +class Conversation(models.Model): + project = models.ForeignKey(ConversationsProject, related_name='conversations', on_delete=models.CASCADE) + meta = models.TextField(default='{}', null=False) + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + audio_url = models.TextField(default='', null=False) + audio_file = models.FileField(upload_to='audio', null=False, blank=False) + + +class ConversationItem(Document): + conversation = models.ForeignKey(Conversation, related_name='conversation_items', on_delete=models.CASCADE) + start_time = models.FloatField() + end_time = models.FloatField() + machine_text = models.TextField() + is_validated = models.BooleanField(default=False) + is_ignored = models.BooleanField(default=False) + + class Annotation(models.Model): objects = AnnotationManager() @@ -241,6 +289,15 @@ class Seq2seqAnnotation(Annotation): class Meta: unique_together = ('document', 'user', 'text') +class ConversationItemAnnotation(Annotation): + document = models.ForeignKey(Document, related_name='conversation_annotations',on_delete=models.CASCADE) + label = models.ForeignKey(Label, on_delete=models.CASCADE) + text = models.TextField() + start_offset = models.IntegerField() + end_offset = models.IntegerField() + + class Meta: + unique_together = ('start_offset', 'end_offset', 'label', 'document') class Role(models.Model): name = models.CharField(max_length=100, unique=True) diff --git a/app/api/serializers.py b/app/api/serializers.py index eaa2af2cf7..419ba608c1 100644 --- a/app/api/serializers.py +++ b/app/api/serializers.py @@ -1,14 +1,17 @@ +import requests, uuid from django.conf import settings from django.contrib.auth import get_user_model from django.shortcuts import get_object_or_404 +from django.core.files.base import ContentFile from rest_framework import serializers from rest_polymorphic.serializers import PolymorphicSerializer from rest_framework.exceptions import ValidationError +import json -from .models import Label, Project, Document, RoleMapping, Role -from .models import TextClassificationProject, SequenceLabelingProject, Seq2seqProject -from .models import DocumentAnnotation, SequenceAnnotation, Seq2seqAnnotation +from .models import Label, Project, Document, RoleMapping, Role, Conversation, ConversationItem +from .models import TextClassificationProject, SequenceLabelingProject, Seq2seqProject, ConversationsProject +from .models import DocumentAnnotation, SequenceAnnotation, Seq2seqAnnotation, ConversationItemAnnotation class UserSerializer(serializers.ModelSerializer): @@ -137,12 +140,93 @@ class Meta: read_only_fields = ('image', 'updated_at', 'users', 'current_users_role') +class ConversationsProjectSerializer(ProjectSerializer): + + class Meta: + model = ConversationsProject + fields = ('id', 'name', 'description', 'guideline', 'users', 'current_users_role', 'project_type', 'image', + 'updated_at', 'randomize_document_order') + read_only_fields = ('image', 'updated_at', 'users', 'current_users_role') + + +class ConversationItemSerializer(DocumentSerializer): + startTimeInSeconds = serializers.FloatField(source='start_time') + endTimeInSeconds = serializers.FloatField(source='end_time') + machineTranscription = serializers.CharField(source='machine_text', allow_blank=True) + humanTranscription = serializers.CharField(source='text', allow_blank=True) + isValidated = serializers.BooleanField(source='is_validated', default=False) + isIgnored = serializers.BooleanField(source='is_ignored', default=False) + conversation = serializers.PrimaryKeyRelatedField(read_only=True) + metadata = serializers.JSONField(source='meta', required=False) + + class Meta: + model = ConversationItem + fields = ('id', 'startTimeInSeconds', 'endTimeInSeconds', 'machineTranscription', 'humanTranscription', 'metadata', 'isValidated', 'isIgnored', 'conversation', 'annotations', 'annotation_approver') + + +class DocumentPolymorphicSerializer(PolymorphicSerializer): + model_serializer_mapping = { + Document: DocumentSerializer, + ConversationItem: ConversationItemSerializer, + } + + +class ConversationSerializer(serializers.ModelSerializer): + audioFileUrl = serializers.URLField(source='audio_url', write_only=True) + metadata = serializers.JSONField(source='meta', required=False) + sentences = ConversationItemSerializer(source='conversation_items', many=True, write_only=True) + # @FIXME(Jeremie): Hack to be able to save file from audioUrl, a new field shoud be created + # following this example : https://github.com/Hipo/drf-extra-fields#base64filefield + audioFile = serializers.FileField(source='audio_file', required=False) + + def create(self, validated_data): + # As conversation item inherit from documents we need to set the project id + if 'conversation_items' in validated_data.keys(): + validated_data.update({'conversation_items': [ + {'project': validated_data.get('project'), **item } for item in validated_data.get('conversation_items') ]}) + + if 'meta' in validated_data.keys(): + validated_data.update({ + 'meta': json.dumps(validated_data.get('meta')) + }) + + many_to_many = {} + for field_name in ['conversation_items']: + if field_name in validated_data: + many_to_many[field_name] = validated_data.pop(field_name) + + instance = self.Meta.model.objects.create(**validated_data) + + # Save many-to-many relationships after the instance is created. + if many_to_many: + for field_name, value in many_to_many.items(): + if type(value) is list: + for related in value: + field = getattr(instance, field_name) + field.create(**related) + + return instance + + def validate(self, data): + if data.get('audio_url'): + # @FIXME(Jeremie): This will probably not scale as this is sync and blocking call + res = requests.get(data.get('audio_url')) + if res.status_code is 200: + data['audio_file'] = ContentFile(res.content, name=f'{uuid.uuid4()}.wav') + return data + + class Meta: + model = Conversation + fields = ('id', 'audioFileUrl', 'metadata', 'audioFile', 'sentences') + + class ProjectPolymorphicSerializer(PolymorphicSerializer): model_serializer_mapping = { Project: ProjectSerializer, TextClassificationProject: TextClassificationProjectSerializer, SequenceLabelingProject: SequenceLabelingProjectSerializer, - Seq2seqProject: Seq2seqProjectSerializer + Seq2seqProject: Seq2seqProjectSerializer, + ConversationsProject: ConversationsProjectSerializer, } @@ -188,6 +272,14 @@ class Meta: read_only_fields = ('user',) +class ConversationItemAnnotationSerializer(DocumentAnnotationSerializer): + + class Meta: + model = ConversationItemAnnotation + fields = ('id', 'prob', 'label', 'start_offset', 'end_offset', 'user', 'document') + read_only_fields = ('user', ) + + class RoleSerializer(serializers.ModelSerializer): class Meta: model = Role diff --git a/app/api/urls.py b/app/api/urls.py index b65d87a00b..6255a29fa3 100644 --- a/app/api/urls.py +++ b/app/api/urls.py @@ -4,7 +4,8 @@ from .views import Me, Features, Users from .views import ProjectList, ProjectDetail -from .views import LabelList, LabelDetail, ApproveLabelsAPI +from .views import LabelList, LabelDetail, ApproveLabelsAPI, ApproveCorrectionsAPI +from .views import ConversationList from .views import DocumentList, DocumentDetail from .views import AnnotationList, AnnotationDetail from .views import TextUploadAPI, TextDownloadAPI, CloudUploadAPI @@ -24,6 +25,13 @@ StatisticsAPI.as_view(), name='statistics'), path('projects//labels', LabelList.as_view(), name='label_list'), + + path('projects//conversations', + ConversationList.as_view(), name='conversation_list'), + + path('projects//docs//approve-correction', + ApproveCorrectionsAPI.as_view(), name='approve_correction'), + path('projects//labels/', LabelDetail.as_view(), name='label_detail'), path('projects//docs', diff --git a/app/api/utils.py b/app/api/utils.py index 396a66b4b4..aac77d0f0c 100644 --- a/app/api/utils.py +++ b/app/api/utils.py @@ -16,7 +16,7 @@ from .exceptions import FileParseException from .models import Label -from .serializers import DocumentSerializer, LabelSerializer +from .serializers import DocumentSerializer, LabelSerializer, ConversationSerializer def extract_label(tag): @@ -217,6 +217,57 @@ def make_annotations(cls, docs, labels): return annotations +class ConversationStorage(BaseStorage): + """Store json for speech2text with conversation. + + The format is as follows: + {"audioUrl": "https://.....wav", "metadata": { "service": "AzureSpeechAPI", "duration": 30 },"sentences": [{ }, { ... }, ...]} + ... + """ + @transaction.atomic + def save(self, user): + saved_labels = {label.text: label for label in self.project.labels.all()} + for data in self.data: + conversations = self.save_conversations(data) + labels = self.extract_label(data) + unique_labels = self.extract_unique_labels(labels) + unique_labels = self.exclude_created_labels(unique_labels, saved_labels) + unique_labels = self.to_serializer_format(unique_labels, saved_labels) + new_labels = self.save_label(unique_labels) + saved_labels = self.update_saved_labels(saved_labels, new_labels) + annotations = self.make_annotations(conversations, labels, saved_labels) + self.save_annotation(annotations, user) + + + def save_conversations(self, data): + serializer = ConversationSerializer(data=data, many=True) + serializer.is_valid(raise_exception=True) + conversation = serializer.save(project=self.project) + return conversation + + @classmethod + def extract_unique_labels(cls, labels): + return set([label for _, _, label in itertools.chain(*labels)]) + + @classmethod + def extract_label(cls, data): + return [sentence.get('labels', []) for conversation in data for sentence in conversation.get('sentences', [])] + + @classmethod + def make_annotations(cls, conversations, labels, saved_labels): + sentences = [sentence for conversation in conversations for sentence in conversation.conversation_items.all()] + annotations = [] + for sentence, spans in zip(sentences, labels): + for span in spans: + start_offset, end_offset, name = span + label = saved_labels[name] + annotations.append({'document': sentence.id, + 'label': label.id, + 'start_offset': start_offset, + 'end_offset': end_offset}) + return annotations + + class FileParser(object): def parse(self, file): diff --git a/app/api/views.py b/app/api/views.py index 4b31bcd8b6..be300d7d3a 100644 --- a/app/api/views.py +++ b/app/api/views.py @@ -14,9 +14,9 @@ from rest_framework_csv.renderers import CSVRenderer from .filters import DocumentFilter -from .models import Project, Label, Document, RoleMapping, Role +from .models import Project, Label, Document, RoleMapping, Role, Conversation from .permissions import IsProjectAdmin, IsAnnotatorAndReadOnly, IsAnnotator, IsAnnotationApproverAndReadOnly, IsOwnAnnotation, IsAnnotationApprover -from .serializers import ProjectSerializer, LabelSerializer, DocumentSerializer, UserSerializer +from .serializers import ProjectSerializer, LabelSerializer, DocumentSerializer, UserSerializer, ConversationSerializer, DocumentPolymorphicSerializer from .serializers import ProjectPolymorphicSerializer, RoleMappingSerializer, RoleSerializer from .utils import CSVParser, ExcelParser, JSONParser, PlainTextParser, CoNLLParser, iterable_to_io from .utils import JSONLRenderer @@ -70,15 +70,16 @@ def get(self, request, *args, **kwargs): p = get_object_or_404(Project, pk=self.kwargs['project_id']) include = set(request.GET.getlist('include')) + conversation = request.GET.get('conversation') response = {} - + if not include or 'label' in include or 'user' in include: label_count, user_count = self.label_per_data(p) response['label'] = label_count response['user'] = user_count if not include or 'total' in include or 'remaining' in include: - progress = self.progress(project=p) + progress = self.progress(project=p, conversation=conversation) response.update(progress) if include: @@ -86,8 +87,11 @@ def get(self, request, *args, **kwargs): return Response(response) - def progress(self, project): - docs = project.documents + def progress(self, project, conversation=None): + if conversation: + docs = project.conversations.get(pk=conversation).conversation_items + else: + docs = project.documents annotation_class = project.get_annotation_class() total = docs.count() done = annotation_class.objects.filter(document_id__in=docs.all(), @@ -111,6 +115,20 @@ def post(self, request, *args, **kwargs): document.save() return Response(DocumentSerializer(document).data) +class ApproveCorrectionsAPI(APIView): + permission_classes = [IsAuthenticated & (IsAnnotationApprover | IsProjectAdmin)] + + def post(self, request, *args, **kwargs): + corrected = self.request.data.get('corrected') + document = get_object_or_404(Document, pk=self.kwargs['doc_id']) + document.text_validated = corrected + if (corrected): + # We don't want to change the text if someones clicks "correct again" + humanText = self.request.data.get('correctedText') + document.text = humanText + document.save() + return Response(DocumentSerializer(document).data) + class LabelList(generics.ListCreateAPIView): serializer_class = LabelSerializer @@ -125,6 +143,15 @@ def perform_create(self, serializer): project = get_object_or_404(Project, pk=self.kwargs['project_id']) serializer.save(project=project) +class ConversationList(generics.ListCreateAPIView): + serializer_class = ConversationSerializer + pagination_class = None + permission_classes = [IsAuthenticated & IsInProjectReadOnlyOrAdmin] + + def get_queryset(self): + project = get_object_or_404(Project, pk=self.kwargs['project_id']) + return project.conversations + class LabelDetail(generics.RetrieveUpdateDestroyAPIView): queryset = Label.objects.all() @@ -134,7 +161,7 @@ class LabelDetail(generics.RetrieveUpdateDestroyAPIView): class DocumentList(generics.ListCreateAPIView): - serializer_class = DocumentSerializer + serializer_class = DocumentPolymorphicSerializer filter_backends = (DjangoFilterBackend, filters.SearchFilter, filters.OrderingFilter) search_fields = ('text', ) ordering_fields = ('created_at', 'updated_at', 'doc_annotations__updated_at', @@ -142,7 +169,7 @@ class DocumentList(generics.ListCreateAPIView): filter_class = DocumentFilter permission_classes = [IsAuthenticated & IsInProjectReadOnlyOrAdmin] - def get_queryset(self): + def get_queryset(self): project = get_object_or_404(Project, pk=self.kwargs['project_id']) queryset = project.documents @@ -158,7 +185,7 @@ def perform_create(self, serializer): class DocumentDetail(generics.RetrieveUpdateDestroyAPIView): queryset = Document.objects.all() - serializer_class = DocumentSerializer + serializer_class = DocumentPolymorphicSerializer lookup_url_kwarg = 'doc_id' permission_classes = [IsAuthenticated & IsInProjectReadOnlyOrAdmin] diff --git a/app/app/settings.py b/app/app/settings.py index c871ff5b25..0d8c8610fc 100644 --- a/app/app/settings.py +++ b/app/app/settings.py @@ -123,6 +123,13 @@ if path.isdir(static_path) ] +DEFAULT_FILE_STORAGE = 'storages.backends.azure_storage.AzureStorage' +AZURE_ACCOUNT_NAME = env('AZURE_ACCOUNT_NAME', None) +AZURE_ACCOUNT_KEY = env('AZURE_ACCOUNT_KEY', None) +AZURE_CONTAINER = env('AZURE_CONTAINER', None) +AZURE_URL_EXPIRATION_SECS=3600 +AZURE_CACHE_CONTROL = "public,max-age=31536000,immutable" + STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage' WEBPACK_LOADER = { @@ -155,6 +162,7 @@ SOCIAL_AUTH_AZUREAD_TENANT_OAUTH2_KEY = env('OAUTH_AAD_KEY', None) SOCIAL_AUTH_AZUREAD_TENANT_OAUTH2_SECRET = env('OAUTH_AAD_SECRET', None) SOCIAL_AUTH_AZUREAD_TENANT_OAUTH2_TENANT_ID = env('OAUTH_AAD_TENANT', None) +SOCIAL_AUTH_AZUREAD_TENANT_OAUTH2_WHITELISTED_DOMAINS = env('OAUTH_AAD_WHITELISTED_DOMAINS', '').split(',') AZUREAD_ADMIN_GROUP_ID = env('AZUREAD_ADMIN_GROUP_ID', None) if AZUREAD_ADMIN_GROUP_ID: diff --git a/app/server/static/assets/css/annotation.css b/app/server/static/assets/css/annotation.css index f7f6abf622..8324e39de1 100644 --- a/app/server/static/assets/css/annotation.css +++ b/app/server/static/assets/css/annotation.css @@ -23,6 +23,14 @@ body { overflow-x: hidden; } +.sidebar-scrollable.half-sized { + max-height: 45vh; +} + +.hero.is-fullheight { + min-height: calc(100vh - 52px); +} + .messages { display: block; background-color: #fff; diff --git a/app/server/static/components/annotationMixin.js b/app/server/static/components/annotationMixin.js index b1c1d2ddd2..9094cd9e12 100644 --- a/app/server/static/components/annotationMixin.js +++ b/app/server/static/components/annotationMixin.js @@ -224,7 +224,7 @@ export default { HTTP.post(`docs/${document.id}/approve-labels`, { approved }).then((response) => { const documents = this.docs.slice(); - documents[this.pageNumber] = response.data; + documents[this.pageNumber].annotation_approver = response.data.annotation_approver; this.docs = documents; }); }, diff --git a/app/server/static/components/conversations.pug b/app/server/static/components/conversations.pug new file mode 100644 index 0000000000..8d24953fd9 --- /dev/null +++ b/app/server/static/components/conversations.pug @@ -0,0 +1,211 @@ +div.columns(v-cloak="") + aside.column.is-3.aside.hero.is-fullheight + div + + div.main.pr20.pl20 + + div + | Please select a conversation: + + div.main.sidebar-scrollable.half-sized + a.item( + v-for="(conversation, index) in conversations" + v-bind:class="{ active: conversation.id === selectedConversationId }" + v-on:click="selectedConversationId = conversation.id" + href="#" + ) + //- span.icon + //- i.fa.fa-check(v-show="doc.isValidated && !doc.annotation_approver") + //- i.fa.fa-check-double(v-show="doc.isValidated && doc.annotation_approver") + span.name {{ parseConversationsTitle(conversation).slice(0, 60) }}... + + div.main.pt0.pb0.pr20.pl20 + span About {{ count }} results (page {{ paginationPage }} of {{ paginationPages }}) + + div.main.pt0.pb0.pr20.pl20 + div.columns + div.column.is-5 + div.field.has-addons + div.control.is-expanded + input.input( + v-model="searchQuery" + v-on:keyup.enter="submit" + type="text" + placeholder="Search document" + style="border-right: none; box-shadow: none; -webkit-box-shadow: none;" + ) + + div.control + div.dropdown.is-hoverable + div.dropdown-trigger + button.button( + aria-haspopup="true" + aria-controls="dropdown-menu" + style="border-left: none" + ) + span.icon.has-text-grey.pr0 + i.fas.fa-angle-down(aria-hidden="true") + + div.dropdown-menu.pt0#dropdown-menu(role="menu") + div.dropdown-content + a.dropdown-item + label.radio + input( + v-model="picked" + type="radio" + value="all" + checked="" + ) + | All + a.dropdown-item + label.radio + input( + v-model="picked" + type="radio" + value="active" + ) + | Active + a.dropdown-item + label.radio + input( + v-model="picked" + type="radio" + value="completed" + ) + | Completed + div.column.is-5 + div.select + select(v-model="ordering") + option(value="", disabled, selected) Sort by + option(value="created_at") Created : Ascending + option(value="-created_at") Created : Descending + option(value="updated_at") Updated : Ascending + option(value="-updated_at") Updated : Descending + + div.main.sidebar-scrollable.half-sized + a.item( + v-for="(doc, index) in docs" + v-bind:class="{ active: index == pageNumber }" + v-bind:data-preview-id="index" + v-on:click="pageNumber = index" + href="#" + ) + span.icon + i.fa.fa-check(v-show="doc.isValidated && !doc.annotation_approver") + i.fa.fa-check-double(v-show="doc.isValidated && doc.annotation_approver") + i.fas.fa-minus-circle(v-show="doc.isIgnored && !doc.annotation_approver") + span.name {{ doc.text.slice(0, 60) }}... + + div.column.is-7.is-offset-1.message.hero.is-fullheight#message-pane + div.modal(v-bind:class="{ 'is-active': isAnnotationGuidelineActive }") + div.modal-background + div.modal-card + header.modal-card-head + p.modal-card-title Annotation Guideline + button.delete( + v-on:click="isAnnotationGuidelineActive = !isAnnotationGuidelineActive" + aria-label="close" + ) + section.modal-card-body.modal-card-body-footer.content( + v-html="compiledMarkdown" + style="line-height: 150%" + ) + + div.modal(v-bind:class="{ 'is-active': isMetadataActive }") + div.modal-background + div.modal-card + header.modal-card-head + p.modal-card-title Document Metadata + button.delete( + v-on:click="isMetadataActive = !isMetadataActive" + aria-label="close" + ) + section.modal-card-body.modal-card-body-footer + vue-json-pretty( + v-bind:data="documentMetadata" + v-bind:show-double-quotes="false" + v-bind:show-line="false" + ) + + div.columns.is-multiline.is-gapless.is-mobile.is-vertical-center + div.column.is-12 Conversation : + strong {{ parseConversationsTitle(conversation) }} + div.column.is-3 + progress.progress.is-inline-block( + v-bind:class="progressColor" + v-bind:value="achievement" + max="100" + ) 30% + div.column.is-5 + span.ml10 + strong {{ total - remaining }} + | / + span {{ total }} + + div.column.is-1.has-text-right + a.button.tooltip.is-tooltip-bottom( + v-if="isAnnotationApprover && documentIsCorrected" + v-on:click="approveDocumentAnnotations" + v-bind:data-tooltip="documentAnnotationsApprovalTooltip" + ) + span.icon + i.fas(v-bind:class="[documentAnnotationsAreApproved ? 'fa-thumbs-down' : 'fa-thumbs-up']") + div.column.is-1.has-text-right + a.button(v-on:click="isAnnotationGuidelineActive = !isAnnotationGuidelineActive") + span.icon + i.fas.fa-book + div.column.is-1.has-text-right + a.button( + v-on:click="isMetadataActive = !isMetadataActive && documentMetadata != null" + v-bind:disabled="documentMetadata == null" + v-bind:title="documentMetadata == null ? 'No document metadata available.' : null" + ) + span.icon + i.fas.fa-box + + div.columns + div.column + block annotation-area + + div.column(v-if="documentMetadata != null && documentMetadata.documentSourceUrl != null") + preview(v-bind:url="documentMetadata.documentSourceUrl") + + div.level.mt30 + div.level-left + div.buttons + a.button( + v-shortkey="{ prev1: ['shift', 'ctrl', 'p'], prev2: ['shift', 'arrowup'], prev3: ['shift', 'arrowleft'] }" + v-on:click="prevPagination" + v-on:shortkey="prevPagination" + ) + span.icon.tooltip(data-tooltip="Previous page") + i.fas.fa-arrow-left + + a.button( + v-shortkey="{ prev1: ['ctrl', 'p'], prev2: ['arrowup'], prev3: ['arrowleft'] }" + v-on:click="prevPage" + v-on:shortkey="prevPage" + ) + span.icon.tooltip(data-tooltip="Previous document") + i.fas.fa-chevron-left + + div.level-center + span.button.is-static {{ offset + pageNumber + 1 }} / {{ count }} + + div.level-right + div.buttons + a.button( + v-shortkey="{ next1: ['ctrl', 'n'], next2: ['arrowdown'], next3: ['arrowright'] }" + v-on:click="nextPage" + v-on:shortkey="nextPage" + ) + span.icon.tooltip(data-tooltip="Next document") + i.fas.fa-chevron-right + + a.button( + v-shortkey="{ next1: ['shift', 'ctrl', 'n'], next2: ['shift', 'arrowdown'], next3: ['shift', 'arrowright'] }" + v-on:click="nextPagination" + v-on:shortkey="nextPagination" + ) + span.icon.tooltip(data-tooltip="Next page") + i.fas.fa-arrow-right diff --git a/app/server/static/components/conversations.vue b/app/server/static/components/conversations.vue new file mode 100644 index 0000000000..b09190e212 --- /dev/null +++ b/app/server/static/components/conversations.vue @@ -0,0 +1,146 @@ + + + + + diff --git a/app/server/static/components/corrector.vue b/app/server/static/components/corrector.vue new file mode 100644 index 0000000000..c872f34f2a --- /dev/null +++ b/app/server/static/components/corrector.vue @@ -0,0 +1,333 @@ + + + + + diff --git a/app/server/static/components/download_conversations.vue b/app/server/static/components/download_conversations.vue new file mode 100644 index 0000000000..9a4c66a6b7 --- /dev/null +++ b/app/server/static/components/download_conversations.vue @@ -0,0 +1,41 @@ + + + diff --git a/app/server/static/components/projects.vue b/app/server/static/components/projects.vue index a692ce4a1d..920e1cb97b 100644 --- a/app/server/static/components/projects.vue +++ b/app/server/static/components/projects.vue @@ -40,6 +40,7 @@ option(value="DocumentClassification") document classification option(value="SequenceLabeling") sequence labeling option(value="Seq2seq") sequence to sequence + option(value="Conversations") conversations validation and labelling p.help.is-danger {{ projectTypeError }} div.field @@ -200,6 +201,9 @@ export default { if (projectType === 'Seq2seq') { return this.selected === 'Seq2seq'; } + if (projectType === 'Conversations') { + return this.selected === 'Conversations'; + } return false; }, @@ -243,6 +247,9 @@ export default { if (this.projectType === 'Seq2seq') { return 'Seq2seqProject'; } + if (this.projectType === 'Conversations') { + return 'ConversationsProject'; + } return ''; }, }, diff --git a/app/server/static/components/upload_conversations.vue b/app/server/static/components/upload_conversations.vue new file mode 100644 index 0000000000..7dc3b4dea9 --- /dev/null +++ b/app/server/static/components/upload_conversations.vue @@ -0,0 +1,46 @@ + + + diff --git a/app/server/static/package-lock.json b/app/server/static/package-lock.json index 59e579441d..f268e20337 100644 --- a/app/server/static/package-lock.json +++ b/app/server/static/package-lock.json @@ -2004,6 +2004,11 @@ "integrity": "sha512-ZIzRpLJrOj7jjP2miAtgqIfmzbxa4ZOr5jJc601zklsfEx9oTzmmj2nVpIPRpNlRTIh8lc1kyViIY7BWSGNmKw==", "dev": true }, + "diff": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.1.tgz", + "integrity": "sha512-s2+XdvhPCOF01LRQBC8hf4vhbVmI2CGS5aZnxLJlT5FtdhPCDFq80q++zK2KlrVorVDdL5BOGZ/VfLrVtYNF+Q==" + }, "diffie-hellman": { "version": "5.0.3", "resolved": "https://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz", @@ -8582,6 +8587,11 @@ "neo-async": "^2.5.0" } }, + "wavesurfer.js": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/wavesurfer.js/-/wavesurfer.js-3.2.0.tgz", + "integrity": "sha512-P75SSpYTpzpCKFKjxmHkdp2jGpcm4neaGncBGV/jzfI5FPSIfl5mXTTZlZFpJhs7To+NI34Dj5y661JI43HEzA==" + }, "wbuf": { "version": "1.7.3", "resolved": "https://registry.npmjs.org/wbuf/-/wbuf-1.7.3.tgz", diff --git a/app/server/static/package.json b/app/server/static/package.json index d54be622af..7c0e0fb3c1 100644 --- a/app/server/static/package.json +++ b/app/server/static/package.json @@ -18,6 +18,7 @@ "axios-mock-adapter": "^1.17.0", "buefy": "^0.8.2", "chart.js": "^2.7.2", + "diff": "^4.0.1", "highlight.js": "^9.12.0", "lodash.isempty": "^4.4.0", "marked": "^0.7.0", @@ -28,7 +29,8 @@ "vue-json-pretty": "^1.6.0", "vue-loader": "^15.2.4", "vue-shortkey": "^3.1.6", - "vue-swatches": "^1.0.3" + "vue-swatches": "^1.0.3", + "wavesurfer.js": "^3.2.0" }, "devDependencies": { "babel-eslint": "^10.0.1", diff --git a/app/server/static/pages/conversations.js b/app/server/static/pages/conversations.js new file mode 100644 index 0000000000..a017aba9d6 --- /dev/null +++ b/app/server/static/pages/conversations.js @@ -0,0 +1,14 @@ +import Vue from 'vue'; +import Conversations from '../components/conversations.vue'; + +Vue.use(require('vue-shortkey'), { + prevent: ['input', 'textarea'], +}); + +new Vue({ + el: '#mail-app', + + components: { Conversations }, + + template: '', +}); diff --git a/app/server/static/pages/download_conversations.js b/app/server/static/pages/download_conversations.js new file mode 100644 index 0000000000..20acc48431 --- /dev/null +++ b/app/server/static/pages/download_conversations.js @@ -0,0 +1,10 @@ +import Vue from 'vue'; +import DownloadConversations from '../components/download_conversations.vue'; + +new Vue({ + el: '#mail-app', + + components: { DownloadConversations }, + + template: '', +}); diff --git a/app/server/static/pages/upload_conversations.js b/app/server/static/pages/upload_conversations.js new file mode 100644 index 0000000000..7a10543c49 --- /dev/null +++ b/app/server/static/pages/upload_conversations.js @@ -0,0 +1,10 @@ +import Vue from 'vue'; +import UploadConversations from '../components/upload_conversations.vue'; + +new Vue({ + el: '#mail-app', + + components: { UploadConversations }, + + template: '', +}); diff --git a/app/server/templates/base.html b/app/server/templates/base.html index 12925c693b..9aabfa4267 100644 --- a/app/server/templates/base.html +++ b/app/server/templates/base.html @@ -9,7 +9,7 @@ doccano - Document Annotation Tool - diff --git a/docker-compose.yml b/docker-compose.yml index 9b41948997..0d2d93449f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,10 @@ services: ADMIN_PASSWORD: "password" ADMIN_EMAIL: "admin@example.com" DATABASE_URL: "postgres://doccano:doccano@postgres:5432/doccano?sslmode=disable" - ALLOW_SIGNUP: "False" + ALLOW_SIGNUP: "False" + DOCCANO_PAGE_SIZE: 50 + env_file: + - local.env ports: - 8000:8000 @@ -31,6 +34,8 @@ services: postgres: image: postgres:9.6 + volumes: + - postgres_data:/var/lib/postgresql/data/ environment: POSTGRES_USER: "doccano" POSTGRES_PASSWORD: "doccano" @@ -41,3 +46,4 @@ services: volumes: node_modules: venv: + postgres_data: \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 12b9fe1342..c661f4bd14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ django-polymorphic==2.0.3 django-pyodbc-azure==2.1.0.0 django-rest-polymorphic==0.1.8 djangorestframework==3.10 +django-storages[azure]==1.8 djangorestframework-csv==2.1.0 djangorestframework-filters==0.10.2 environs==4.1.0