-
Notifications
You must be signed in to change notification settings - Fork 3
/
11-hf-hosted-falcon.py
38 lines (31 loc) · 1.16 KB
/
11-hf-hosted-falcon.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from langchain.llms import HuggingFaceEndpoint
from langchain import HuggingFaceHub
from langchain import PromptTemplate, LLMChain
''' Based on: https://huggingface.co/tiiuae/falcon-40b-instruct/discussions/51 '''
HUGGINGFACEHUB_API_TOKEN = ""
from dotenv import load_dotenv
load_dotenv()
endpoint_url = (
'https://YOUR_ENDPOINT.us-east-1.aws.endpoints.huggingface.cloud'
)
hf = HuggingFaceEndpoint(
endpoint_url=endpoint_url,
huggingfacehub_api_token= HUGGINGFACEHUB_API_TOKEN,
task = 'text-generation',
model_kwargs = {
"min_length": 200,
"max_length":2000,
"temperature":0.01,
"max_new_tokens":200,
"num_return_sequences":1
}
)
template = """Context: {context}
Question: {question}
Answer: """
prompt = PromptTemplate(template=template, input_variables=["question", "context"])
llm_chain = LLMChain(prompt=prompt, llm=hf, verbose=True)
context = '''Don't make up your response. If you don't know it, just tell me you don't know.
'''
question = "What's the difference between fusion and fission?"
print(llm_chain.run({'question': question, 'context': context}))