Langchain Extraction

参考: https://python.langchain.com/docs/use_cases/extraction/

示例代码:

import dotenv
import os
import warnings
import uuid
from langchain_core._api.beta_decorator import LangChainBetaWarning
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
from langchain_openai import ChatOpenAI
from typing import List, Optional, Dict, TypedDict


dotenv.load_dotenv(override=True)
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
OPEN_BASE_URL = os.getenv("OPEN_BASE_URL")


class Person(BaseModel):
    """Information about a person."""

    name: Optional[str] = Field(..., description="The name of the person")
    hair_color: Optional[str] = Field(..., description="The color of the peron's hair if known")
    height_in_meters: Optional[str] = Field(..., description="Height measured in meters")


class Data(BaseModel):
    """Extracted data about people."""

    people: List[Person]


class Example(TypedDict):
    """A representation of an example consisting of text input and expected tool calls.

    For extraction, the tool calls are represented as instances of pydantic model.
    """

    input: str  # This is the example text
    tool_calls: List[BaseModel]  # Instances of pydantic model that should be extracted


def tool_example_to_messages(example: Example) -> List[BaseMessage]:
    messages: List[BaseMessage] = [HumanMessage(content=example["input"])]
    openai_tool_calls = []
    for tool_call in example["tool_calls"]:
        openai_tool_calls.append(
            {
                "id": str(uuid.uuid4()),
                "type": "function",
                "function": {
                    "name": tool_call.__class__.__name__,
                    "arguments": tool_call.json(),
                },
            }
        )
    messages.append(
        AIMessage(content="", additional_kwargs={"tool_calls": openai_tool_calls})
    )
    tool_outputs = example.get("tool_outputs") or ["You have correctly called this tool."] * len(openai_tool_calls)
    for output, tool_call in zip(tool_outputs, openai_tool_calls):
        messages.append(ToolMessage(content=output, tool_call_id=tool_call["id"]))
    return messages


def make_examples(examples: List[Example]) -> List[BaseMessage]:
    """Convert examples to messages."""
    messages = []
    for example in examples:
        messages.extend(tool_example_to_messages(example))
    return messages


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        MessagesPlaceholder("examples"),
        ("human", "{text}"),
    ]
)

llm = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0, api_key=OPEN_AI_KEY, base_url=OPEN_BASE_URL)

with warnings.catch_warnings():
    warnings.simplefilter(action="ignore", category=LangChainBetaWarning)
    runnable = prompt | llm.with_structured_output(schema=Data, method="function_calling", include_raw=True)
    text = "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me."
    examples = make_examples(
        [
            {
                "input": "The ocean is vast and blue. It's more than 20,000 feet deep. There are many fish in it.",
                "tool_calls": [Person(name=None, height_in_meters=None, hair_color=None)],
            },
            {
                "input": "Fiona traveled far from France to Spain.",
                "tool_calls": [Person(name="Fiona", height_in_meters=None, hair_color=None)],
            }
        ]
    )
    result = runnable.invoke({"text": text, "examples": examples})
    if result["parsing_error"]:
        print(f"Error parsing result: {result['parsing_error']}")
    else:
        print(result["parsed"])

请求抓包:

{
    "messages": [
        {
            "content": "You are an expert extraction algorithm. Only extract relevant information from the text. If you do not know the value of an attribute asked to extract, return null for the attribute's value.",
            "role": "system"
        },
        {
            "content": "The ocean is vast and blue. It's more than 20,000 feet deep. There are many fish in it.",
            "role": "user"
        },
        {
            "content": null,
            "role": "assistant",
            "tool_calls": [
                {
                    "id": "49450167-a747-428a-bbc7-3090cd3a9040",
                    "type": "function",
                    "function": {
                        "name": "Person",
                        "arguments": "{\"name\": null, \"hair_color\": null, \"height_in_meters\": null}"
                    }
                }
            ]
        },
        {
            "content": "You have correctly called this tool.",
            "role": "tool",
            "tool_call_id": "49450167-a747-428a-bbc7-3090cd3a9040"
        },
        {
            "content": "Fiona traveled far from France to Spain.",
            "role": "user"
        },
        {
            "content": null,
            "role": "assistant",
            "tool_calls": [
                {
                    "id": "4613a4c1-adff-46a6-a928-e98e20592050",
                    "type": "function",
                    "function": {
                        "name": "Person",
                        "arguments": "{\"name\": \"Fiona\", \"hair_color\": null, \"height_in_meters\": null}"
                    }
                }
            ]
        },
        {
            "content": "You have correctly called this tool.",
            "role": "tool",
            "tool_call_id": "4613a4c1-adff-46a6-a928-e98e20592050"
        },
        {
            "content": "My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me.",
            "role": "user"
        }
    ],
    "model": "gpt-3.5-turbo-1106",
    "n": 1,
    "stream": false,
    "temperature": 0,
    "tool_choice": {
        "type": "function",
        "function": {
            "name": "Data",
            "description": "Extracted data about people.",
            "parameters": {
                "type": "object",
                "properties": {
                    "people": {
                        "type": "array",
                        "items": {
                            "description": "Information about a person.",
                            "type": "object",
                            "properties": {
                                "name": {
                                    "description": "The name of the person",
                                    "type": "string"
                                },
                                "hair_color": {
                                    "description": "The color of the peron's hair if known",
                                    "type": "string"
                                },
                                "height_in_meters": {
                                    "description": "Height measured in meters",
                                    "type": "string"
                                }
                            },
                            "required": [
                                "name",
                                "hair_color",
                                "height_in_meters"
                            ]
                        }
                    }
                },
                "required": [
                    "people"
                ]
            }
        }
    },
    "tools": [
        {
            "type": "function",
            "function": {
                "name": "Data",
                "description": "Extracted data about people.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "people": {
                            "type": "array",
                            "items": {
                                "description": "Information about a person.",
                                "type": "object",
                                "properties": {
                                    "name": {
                                        "description": "The name of the person",
                                        "type": "string"
                                    },
                                    "hair_color": {
                                        "description": "The color of the peron's hair if known",
                                        "type": "string"
                                    },
                                    "height_in_meters": {
                                        "description": "Height measured in meters",
                                        "type": "string"
                                    }
                                },
                                "required": [
                                    "name",
                                    "hair_color",
                                    "height_in_meters"
                                ]
                            }
                        }
                    },
                    "required": [
                        "people"
                    ]
                }
            }
        }
    ]
}

响应抓包:

{
    "id": "chatcmpl-99E8SN5DwQ4hFoIpwiv3b0RaIuq55",
    "object": "chat.completion",
    "created": 1711986316,
    "model": "gpt-3.5-turbo-1106",
    "choices": [
        {
            "index": 0,
            "message": {
                "role": "assistant",
                "content": null,
                "tool_calls": [
                    {
                        "id": "call_h9BMvKJ0fm4oqGeUMchAYahG",
                        "type": "function",
                        "function": {
                            "name": "Data",
                            "arguments": "{\"people\":[{\"name\":\"Jeff\",\"hair_color\":\"black\",\"height_in_meters\":\"1.83\"},{\"name\":\"Anna\",\"hair_color\":\"black\",\"height_in_meters\":null}]}"
                        }
                    }
                ]
            },
            "logprobs": null,
            "finish_reason": "stop"
        }
    ],
    "usage": {
        "prompt_tokens": 286,
        "completion_tokens": 38,
        "total_tokens": 324
    },
    "system_fingerprint": "fp_89448ee5dc",
    "code": 0,
    "msg": "ok"
}