from flask import Flask, request, jsonify
import fitz  # PyMuPDF
import requests
from io import BytesIO

app = Flask(__name__)

@app.route('/extract_pdf_info_test', methods=['POST'])
def extract_pdf_info():
    pdf_url = request.json.get('pdf_url')
    if not pdf_url:
        return jsonify({"error": "Missing pdf_url"}), 400

    response = requests.get(pdf_url)
    if response.status_code != 200:
        return jsonify({"error": "Failed to download PDF"}), 400

    pdf_data = BytesIO(response.content)
    doc = fitz.open(stream=pdf_data, filetype="pdf")

    resultado = []
    for page_number, page in enumerate(doc):
        lines = page.get_text().splitlines()
        resultado.append(f"===== Página {page_number + 1} =====")
        for idx, line in enumerate(lines):
            resultado.append(f"{idx:02d}: {line}")

    return jsonify({"lineas": resultado})


if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5001, debug=True)
