Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added evaluation scripts and files #13

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions scripts/evaluation/convertToAnswerList.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import json

# Read the first JSON file
with open('abcout.json') as file:
data = json.load(file)

# Initialize empty lists
ids = []
questions = []
responses = []

# Iterate over the data
for item in data:
# Extract ID
id = item['id']
ids.append(id)

# Extract conversations
conversations = item['conversations']

# Extract question and response from each conversation
for conversation in conversations:
if conversation['from'] == 'human':
question = conversation['value']
elif conversation['from'] == 'gpt':
response = conversation['value']

questions.append(question)
responses.append(response)

# Read the second JSON file
with open('questionsl.json') as file:
question_data = file.readlines()

# Initialize empty list
question_ids = []

# Extract question IDs from the second JSON file
for question in questions:
question_id = None
for line in question_data:
data = json.loads(line)
if data['text'] == question:
question_id = data['question_id']
question_ids.append(question_id)
break

print(question_id)
# Print the first 10 questions and question IDs
print("First 10 Questions:")
for question in questions[:10]:
print(question)

print("\nFirst 10 Question IDs:")
print(question_ids[:10])


# Create a list to hold the formatted data
formatted_data = []

# Iterate over the arrays and format the data
for i in range(len(ids)):
data = {
"answer_id": ids[i],
"model_id": "gpt-3.5-turbo:20230327",
"question_id": question_ids[i],
"text": responses[i],
"metadata": {}
}
formatted_data.append(data)

# Convert the formatted data to a JSON string with line breaks
json_data = "\n".join(json.dumps(entry) for entry in formatted_data)

# Write the formatted data to a JSON file
with open('formatted_data.json', 'w') as file:
file.write(json_data)
115 changes: 115 additions & 0 deletions scripts/evaluation/eval_gpt_review.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import argparse
import json
import os

import openai

import ray
import time


@ray.remote(num_cpus=4)
def get_eval(content: str, max_tokens: int):
counter = 0
while counter < 5:
counter += 1
try:
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
messages=[{
'role': 'system',
'content': 'You are a helpful and precise assistant for checking the quality of the answer.'
}, {
'role': 'user',
'content': content,
}],
temperature=0.2, # TODO: figure out which temperature is best for evaluation
max_tokens=max_tokens,
)
break
except openai.error.RateLimitError:
pass
except Exception as e:
print(e)
time.sleep(1)

print('success!')
return response['choices'][0]['message']['content']


def parse_score(review):
try:
score_pair = review.split('\n')[0]
score_pair = score_pair.replace(',', ' ')
sp = score_pair.split(' ')
if len(sp) == 2:
return [float(sp[0]), float(sp[1])]
else:
print('error', review)
return [-1, -1]
except Exception as e:
print(e)
print('error', review)
return [-1, -1]


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='ChatGPT-based QA evaluation.')
parser.add_argument('-q', '--question')
# parser.add_argument('-a', '--answer')
parser.add_argument('-a', '--answer-list', nargs='+', default=[])
parser.add_argument('-r', '--rule')
parser.add_argument('-o', '--output')

parser.add_argument('--max-tokens', type=int, default=1024, help='maximum number of tokens produced in the output')
args = parser.parse_args()

ray.init()

f_q = open(os.path.expanduser(args.question))
f_ans1 = open(os.path.expanduser(args.answer_list[0]))
f_ans2 = open(os.path.expanduser(args.answer_list[1]))
rule_dict = json.load(open(os.path.expanduser(args.rule), 'r'))

review_file = open(f'{args.output}', 'w')

js_list = []
handles = []
idx = 0
for ques_js, ans1_js, ans2_js in zip(f_q, f_ans1, f_ans2):
# if idx == 1:
# break

ques = json.loads(ques_js)
ans1 = json.loads(ans1_js)
ans2 = json.loads(ans2_js)

category = json.loads(ques_js)['category']
if category in rule_dict:
rule = rule_dict[category]
else:
rule = rule_dict['default']
prompt = rule['prompt']
role = rule['role']
content = (f'[Question]\n{ques["text"]}\n\n'
f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n'
f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n'
f'[System]\n{prompt}\n\n')
js_list.append({
'id': idx+1,
'question_id': ques['question_id'],
'answer1_id': ans1['answer_id'],
'answer2_id': ans2['answer_id'],
'category': category})
idx += 1
handles.append(get_eval.remote(content, args.max_tokens))
# To avoid the rate limit set by OpenAI
time.sleep(1)

reviews = ray.get(handles)
for idx, review in enumerate(reviews):
scores = parse_score(review)
js_list[idx]['content'] = review
js_list[idx]['tuple'] = scores
review_file.write(json.dumps(js_list[idx]) + '\n')
review_file.close()
28 changes: 28 additions & 0 deletions scripts/evaluation/formatted_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{"answer_id": "36237313", "model_id": "gpt-3.5-turbo:20230327", "question_id": 15, "text": "The image appears to show three red objects that could potentially be supernovas. However, it is more likely that these red objects are foreground stars from our own galaxy. Additionally, there is a blue banner at the top of the image that mentions blog posts which may provide further information about the peculiar clumpy galaxies depicted.", "metadata": {}}
{"answer_id": "37897318", "model_id": "gpt-3.5-turbo:20230327", "question_id": 22, "text": "Based on the visual characteristics of the image, it appears to show a galaxy that could be classified as either elliptical or possibly a late disk galaxy.", "metadata": {}}
{"answer_id": "55345398", "model_id": "gpt-3.5-turbo:20230327", "question_id": 3, "text": "The image shows a galaxy with a lenticular shape, indicating a disk-like structure with a central bulge. The galaxy appears to be disturbed, as evidenced by the presence of tidal debris forming an arm-like feature. This suggests that the galaxy has undergone some sort of gravitational interaction or merger with another object.", "metadata": {}}
{"answer_id": "39374539", "model_id": "gpt-3.5-turbo:20230327", "question_id": 19, "text": "The image shows a galaxy that appears to be viewed from its edge, with a noticeable dust lane running through it. The galaxy also appears slightly disturbed at the tips, which show signs of star formation.", "metadata": {}}
{"answer_id": "55350794", "model_id": "gpt-3.5-turbo:20230327", "question_id": 12, "text": "The image appears to show a galaxy that initially resembles an elliptical shape. However, upon closer examination of the residual image, additional details become apparent.", "metadata": {}}
{"answer_id": "36255152", "model_id": "gpt-3.5-turbo:20230327", "question_id": 22, "text": "Based on the visual characteristics of the image, it appears to show a galaxy with an edge-on orientation. The presence of broad lines suggests the presence of a quasar, which is a type of active galactic nucleus. The galaxy appears to belong to the Sc type, indicating a spiral structure.", "metadata": {}}
{"answer_id": "40361164", "model_id": "gpt-3.5-turbo:20230327", "question_id": 10, "text": "The image shows a galaxy with spiral arms that have a noticeable blue color. The galaxy appears to have a clumpy and barred spiral structure, with a short strong bar and tightly wrapped spiral arms that almost form a ring. There is a possible tidal trail above the galaxy, which may be caused by the presence of a companion galaxy below it. Additionally, there are two greenish ansae located at either end of the bar.", "metadata": {}}
{"answer_id": "36250543", "model_id": "gpt-3.5-turbo:20230327", "question_id": 13, "text": "The image appears to show two separate objects. One object seems to be a starforming region, indicated by its bright and active appearance. The other object appears to be a galaxy, with a slightly larger redshift value. The galaxy seems to have a more diffuse and extended structure compared to the starforming region. Overall, the image showcases different astronomical phenomena, highlighting the diversity of objects in the universe.", "metadata": {}}
{"answer_id": "25811434", "model_id": "gpt-3.5-turbo:20230327", "question_id": 5, "text": "The image shows a central core with an x-shape pattern. The orbits of the core appear to be structured in a way that resembles the letter \"X\". This pattern suggests a complex and dynamic system, possibly involving multiple interacting objects or forces. Further analysis would be needed to determine the nature and significance of this x-shape core and its orbits.", "metadata": {}}
{"answer_id": "34240953", "model_id": "gpt-3.5-turbo:20230327", "question_id": 6, "text": "In the image, there are various aspects that can be observed. However, without specific details or context, it is difficult to provide a comprehensive explanation.", "metadata": {}}
{"answer_id": "36222753", "model_id": "gpt-3.5-turbo:20230327", "question_id": 12, "text": "The image shows a galaxy with a spiral structure. There are several objects in contact with the galaxy's core, including three objects with rings. Additionally, there appears to be a gas ring surrounding the core of the galaxy.", "metadata": {}}
{"answer_id": "32498813", "model_id": "gpt-3.5-turbo:20230327", "question_id": 4, "text": "The image shows a subject with a large core, possibly indicating a central concentration of stars or a dense region of gas and dust. There are suggestions of a barred structure, which could be a feature within the galaxy. Scattered galaxies are also visible in the image, indicating the presence of other galaxies in the vicinity. The subject appears undisturbed and is viewed face-on. Additionally, there is a separate object labeled as a quasar in the image.", "metadata": {}}
{"answer_id": "21661664", "model_id": "gpt-3.5-turbo:20230327", "question_id": 17, "text": "The image appears to show a galaxy with irregular shape and active star formation, indicated by the presence of starburst regions.", "metadata": {}}
{"answer_id": "20923069", "model_id": "gpt-3.5-turbo:20230327", "question_id": 7, "text": "The image shows a double galactic nucleus, with a green blob on the very right that appears to be an asteroid. There is also a noticeable double-core with a debris cloud.", "metadata": {}}
{"answer_id": "51727000", "model_id": "gpt-3.5-turbo:20230327", "question_id": 8, "text": "The image appears to show a celestial object that is not clearly defined. It has a shape that could be described as a blob or a fuzzy disturbed spiral. The details of the object are not discernible from the image alone.", "metadata": {}}
{"answer_id": "16215408", "model_id": "gpt-3.5-turbo:20230327", "question_id": 4, "text": "The image shows a galaxy with a slightly twisted disc. There is a red oval-shaped object above the galaxy, which appears to be smeared. Other stars in the image also show similar smearing, but not all of them. The circular appearance of the red oval star in a different image suggests that it may be an artifact of the CCD collection. There is a mention of incorrect coordinates for the image, but it is not clear which image is being referred to. Additionally, there seems to be a halo of stars above and below the central bulge of the galaxy, possibly resembling a polar ring.", "metadata": {}}
{"answer_id": "36238970", "model_id": "gpt-3.5-turbo:20230327", "question_id": 12, "text": "The image appears to show multiple objects with distinct features. There are indications of star formation and starburst activity, as well as a clumpy structure.", "metadata": {}}
{"answer_id": "34060815", "model_id": "gpt-3.5-turbo:20230327", "question_id": 4, "text": "The image appears to show a barred spiral galaxy. The presence of a bar structure in the center of the galaxy is evident. The spiral arms extend outward from the bar, creating a distinct pattern. The overall shape of the galaxy is symmetrical and exhibits a spiral structure.", "metadata": {}}
{"answer_id": "21660643", "model_id": "gpt-3.5-turbo:20230327", "question_id": 5, "text": "Based on the image, it appears that there may be an issue with the size of the object being observed. The repeated use of the hashtag #wrong-size suggests that the users have noticed something unusual or unexpected in terms of the size of the object. However, without access to the actual image, it is difficult to provide a more detailed analysis.", "metadata": {}}
{"answer_id": "21661614", "model_id": "gpt-3.5-turbo:20230327", "question_id": 3, "text": "Based on the image, there appears to be a suggestion of another galaxy located at the 11 o'clock position. However, without further details or visual information, it is difficult to provide a more elaborate explanation of the image.", "metadata": {}}
{"answer_id": "28483442", "model_id": "gpt-3.5-turbo:20230327", "question_id": 10, "text": "In the image, there appears to be an irregular galaxy. The central region of the galaxy shows a star in contact with the core or possibly two cores. The image was taken over a period of 9 hours.", "metadata": {}}
{"answer_id": "55351013", "model_id": "gpt-3.5-turbo:20230327", "question_id": 27, "text": "The image shows a celestial object with a distinct ring-like structure surrounding a central region. The outer ring appears to have a lens-like shape. The overall appearance of the object is reminiscent of a sunny-side-up egg. The image is visually intriguing and invites further exploration and analysis.", "metadata": {}}
{"answer_id": "21668459", "model_id": "gpt-3.5-turbo:20230327", "question_id": 15, "text": "The image appears to show a galaxy with a ring-like structure, a lenticular shape, and a bar in the center. There is also a suggestion of a spiral structure starting to form. Additionally, the image indicates the presence of a Seyfert-2 galaxy and an active galactic nucleus (AGN).", "metadata": {}}
{"answer_id": "40355779", "model_id": "gpt-3.5-turbo:20230327", "question_id": 17, "text": "The image appears to show a spiral galaxy that exhibits characteristics typically associated with a young age.", "metadata": {}}
{"answer_id": "28490503", "model_id": "gpt-3.5-turbo:20230327", "question_id": 22, "text": "The image appears to show a portion of the night sky with multiple celestial objects. There are various bright spots and patterns, suggesting the presence of stars and possibly galaxies. The image also includes some fainter structures, which could be remnants of previous observations or artifacts. Overall, it seems to capture a small section of the vastness of the universe.", "metadata": {}}
{"answer_id": "36213141", "model_id": "gpt-3.5-turbo:20230327", "question_id": 25, "text": "The photo shows blue clumps of star formation.", "metadata": {}}
{"answer_id": "21665278", "model_id": "gpt-3.5-turbo:20230327", "question_id": 10, "text": "The image shows a spiral galaxy with a prominent bar feature in its center. There is also a bulge and a ring structure visible in the image.", "metadata": {}}
{"answer_id": "20926746", "model_id": "gpt-3.5-turbo:20230327", "question_id": 12, "text": "The image shows a spiral galaxy with a faint arm that appears tightly wound. There is a prominent bar structure in the center of the galaxy. The galaxy exhibits signs of active star formation, as indicated by the presence of starburst regions. Additionally, there are nodes of resonance and areas of overlap within the galaxy.", "metadata": {}}
39 changes: 39 additions & 0 deletions scripts/evaluation/getGPTtoLAVASTRO.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import json

# Read in file (1)
with open('llavastro.jsonl', 'r') as file1:
data1 = file1.readlines()

# Read in file (2)
with open('qa.json', 'r') as file2:
data2 = json.load(file2)

# Create an empty list to store the matching entries
output = []

# Iterate over entries in file (1)
for line in data1:
entry = json.loads(line)
question_id = entry['image_id']

# Search for matching entry in file (2)
for item in data2:
if item['id'] == str(question_id):
entry2 = item.copy()
entry2['answer'] = entry
output.append(entry2)
break

# Write the output to a JSON file
with open('ABCOUT.json', 'w') as outfile:
json.dump(output, outfile)

# Create a set of all image_ids from qa.json
image_ids = {item['id'] for item in data2}

# Filter out entries in data1 that don't have a corresponding image_id in data2
filtered_data1 = [line for line in data1 if json.loads(line)['image_id'] in image_ids]

# Write the filtered data to a new file
with open('llavastro_cleared.jsonl', 'w') as outfile:
outfile.writelines(filtered_data1)
Loading