-
Notifications
You must be signed in to change notification settings - Fork 0
/
hyperobject_annotator.py
143 lines (116 loc) · 5.22 KB
/
hyperobject_annotator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import json
import cv2
import os
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
import base64
from tqdm import tqdm
load_dotenv()
class HyperobjectAnnotator:
def __init__(self, ontology_file='ontology_map.json', output_file='annotated_ontology.json'):
"""Initialize the annotator with OpenAI client and load ontology"""
self.client = OpenAI(
api_key=os.getenv('OPENAI_API_KEY')
)
self.output_file = output_file
# Load existing annotations if they exist
if os.path.exists(output_file):
with open(output_file, 'r', encoding='utf-8') as f:
self.ontology = json.load(f)
print(f"Loaded {len(self.ontology)} existing annotations")
else:
# Load fresh ontology if no annotations exist
with open(ontology_file, 'r', encoding='utf-8') as f:
self.ontology = json.load(f)
self.root_dir = Path("Generados")
def extract_middle_frame(self, video_path):
"""Extract a frame from the middle of the video"""
try:
cap = cv2.VideoCapture(str(video_path))
if not cap.isOpened():
return None
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
middle_frame = total_frames // 2
cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame)
ret, frame = cap.read()
if ret:
ret, buffer = cv2.imencode('.jpg', frame)
if ret:
return base64.b64encode(buffer).decode('utf-8')
return None
except Exception as e:
print(f"Error extracting frame: {str(e)}")
return None
finally:
cap.release()
def get_hyperobject_description(self, base64_image, video_data):
"""Generate description using GPT-4o"""
try:
prompt = f"""
Observa los presentes en esta imagen.
Genera una descripción que:
1. Identifique y describa objetos, formas o elementos específicos visibles en la imagen
2. Presente estos elementos de manera difusa o creativa, sugiriendo múltiples interpretaciones posibles
3. Mantenga un balance entre lo concreto de los elementos observados y lo difuso de su interpretación, relacionándolo con la categoría '{video_data.get('category', 'desconocida')}'
No hagas mención de la imagen solo escribe la idea.
"""
response = self.client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
}
]
}
],
max_tokens=8000,
temperature=1
)
return response.choices[0].message.content.strip()
except Exception as e:
print(f"Error generating description: {str(e)}")
return None
def save_current_progress(self):
"""Save current state of ontology"""
with open(self.output_file, 'w', encoding='utf-8') as f:
json.dump(self.ontology, f, indent=4, ensure_ascii=False)
def annotate_ontology(self):
"""Process videos and add GPT-4o generated descriptions"""
print("Starting annotation process with GPT-4o...")
# Filter out already annotated videos
to_process = [video for video in self.ontology if 'texto' not in video]
if not to_process:
print("All videos have been annotated!")
return
print(f"Found {len(to_process)} videos to process")
for video_data in tqdm(to_process, desc="Annotating videos"):
video_path = self.root_dir / video_data['path']
base64_image = self.extract_middle_frame(video_path)
if not base64_image:
print(f"Could not extract frame from {video_path}")
continue
description = self.get_hyperobject_description(base64_image, video_data)
if description:
# Find the video in the main ontology and update it
for video in self.ontology:
if video['path'] == video_data['path']:
video['texto'] = description
break
# Save progress after each successful annotation
self.save_current_progress()
print(f"\nProcessed: {video_data['path']}")
print(f"Description: {description[:100]}...")
def main():
try:
annotator = HyperobjectAnnotator()
annotator.annotate_ontology()
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main()