113 lines
4.2 KiB
Python
113 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
import re
|
|
|
|
def test_parse_markdown_questions():
|
|
with open('data/balotario_clase_a_cat_I.md', 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
|
|
questions = []
|
|
|
|
# Dividir el contenido por preguntas usando ### como separador
|
|
question_blocks = re.split(r'\n### (\d+)\n', content)[1:] # Ignorar el primer elemento vacío
|
|
|
|
for i in range(0, len(question_blocks), 2):
|
|
if i + 1 >= len(question_blocks):
|
|
break
|
|
|
|
question_num = int(question_blocks[i])
|
|
question_content = question_blocks[i + 1].strip()
|
|
|
|
# Solo procesar las primeras 3 preguntas para prueba
|
|
if question_num > 3:
|
|
break
|
|
|
|
# Verificar si hay imagen al inicio (ahora soporta imágenes locales)
|
|
has_image = ('
|
|
image_url = ""
|
|
if has_image:
|
|
# Intentar primero imágenes locales, luego remotas
|
|
img_match = re.search(r'!\[\]\((/static/images/[^)]+)\)', question_content)
|
|
if not img_match:
|
|
img_match = re.search(r'!\[\]\((https://sierdgtt\.mtc\.gob\.pe/Content/img-data/[^)]+)\)', question_content)
|
|
|
|
if img_match:
|
|
image_url = img_match.group(1)
|
|
question_content = re.sub(r'!\[\]\([^)]+\)\n*', '', question_content).strip()
|
|
|
|
# Separar pregunta de opciones
|
|
lines = question_content.split('\n')
|
|
question_lines = []
|
|
option_lines = []
|
|
in_options = False
|
|
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
if re.match(r'^✅?\s*[a-d]\)', line):
|
|
in_options = True
|
|
option_lines.append(line)
|
|
elif not in_options:
|
|
question_lines.append(line)
|
|
|
|
question_text = ' '.join(question_lines).strip()
|
|
|
|
# Extraer opciones y respuesta correcta
|
|
options = []
|
|
correct_option = ""
|
|
|
|
for line in option_lines:
|
|
original_line = line.strip()
|
|
|
|
# Verificar si esta línea tiene el ✅
|
|
if '✅' in original_line:
|
|
# Extraer la letra de la opción correcta
|
|
match = re.search(r'✅\s*([a-d])\)', original_line)
|
|
if match:
|
|
correct_option = match.group(1)
|
|
|
|
# Limpiar la línea removiendo el ✅ completamente
|
|
clean_line = re.sub(r'✅\s*', '', original_line).strip()
|
|
options.append(clean_line)
|
|
|
|
if len(options) >= 2 and correct_option and question_text: # Validar que tenemos datos completos
|
|
questions.append({
|
|
'id': question_num,
|
|
'question': question_text,
|
|
'options': options,
|
|
'correct': correct_option,
|
|
'image': image_url,
|
|
'has_image': has_image
|
|
})
|
|
|
|
# Assertions para validar que el parsing funciona correctamente
|
|
assert len(questions) > 0, "No se parsearon preguntas"
|
|
assert len(questions) <= 3, "Se parsearon más preguntas de las esperadas"
|
|
|
|
for q in questions:
|
|
assert 'id' in q, "Falta el ID de la pregunta"
|
|
assert 'question' in q, "Falta el texto de la pregunta"
|
|
assert 'options' in q, "Faltan las opciones"
|
|
assert 'correct' in q, "Falta la respuesta correcta"
|
|
assert len(q['options']) >= 2, f"Pregunta {q['id']} tiene menos de 2 opciones"
|
|
assert q['correct'] in ['a', 'b', 'c', 'd'], f"Respuesta correcta inválida en pregunta {q['id']}"
|
|
assert len(q['question']) > 0, f"Pregunta {q['id']} está vacía"
|
|
|
|
# Ejecutar prueba
|
|
if __name__ == "__main__":
|
|
questions = test_parse_markdown_questions()
|
|
|
|
print(f"✅ Se parsearon {len(questions)} preguntas correctamente")
|
|
print("=" * 60)
|
|
|
|
for q in questions:
|
|
print(f"Pregunta {q['id']}: {q['question'][:50]}...")
|
|
print(f"Respuesta correcta: {q['correct']}")
|
|
print("Opciones:")
|
|
for i, option in enumerate(q['options']):
|
|
letter = chr(97 + i) # a, b, c, d
|
|
marker = "✅" if letter == q['correct'] else " "
|
|
print(f" {marker} {option}")
|
|
print("-" * 40)
|