CodeShieldUsageDemo.ipynb 6.83 KB
Newer Older
Rayyyyy's avatar
Rayyyyy committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# [CodeShield](https://github.com/meta-llama/PurpleLlama/tree/main/CodeShield) Usage Walkthrough\n",
    "\n",
    "This notebook shows examples of how to use CodeShield. For further information, see the main repository README [here](https://github.com/meta-llama/PurpleLlama/tree/main/CodeShield).\n",
    "\n",
    "# Getting Started \n",
    "\n",
    "Either install via PyPi using pip, or install it locally from source. \n",
    "\n",
    "#### Install Option 1. Install CodeShield package. Run the following in your terminal\n",
    "\n",
    "```\n",
    "pip3 install codeshield\n",
    "```\n",
    "\n",
    "#### Install Option 2. Install CodeShield package locally from source code\n",
    "\n",
    "```\n",
    "git clone https://github.com/meta-llama/PurpleLlama\n",
    "cd PurpleLlama/CodeShield\n",
    "pip install .\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Examples\n",
    "\n",
    "Define a helper function to run scan and process output."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from codeshield.cs import CodeShield\n",
    "\n",
    "async def scan_llm_output(llm_output_code):\n",
    "    result = await CodeShield.scan_code(llm_output_code)\n",
    "    if result.is_insecure:\n",
    "        # perform actions based on treatment recommendation\n",
    "        if result.recommended_treatment == \"block\":\n",
    "            llm_output_code = \"*** Code Security issues found, blocking the code ***\"\n",
    "        if result.recommended_treatment == \"warn\":\n",
    "            llm_output_code = llm_output_code + \"*** Warning: The generated snippit contains insecure code ***\"\n",
    "    \n",
    "    \n",
    "    summary = \"Security issue detected\" if result.is_insecure else \"No issues found\"\n",
    "    print(\"__LLM output after treatment___\")\n",
    "    print(llm_output_code)\n",
    "    print (\"__Results__\")\n",
    "    print(summary)\n",
    "    print(result.recommended_treatment)\n",
    "    print (\"__Details__\")\n",
    "    print(result.issues_found)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 1 - Simple hard-coded example\n",
    "In this example, we are taking a hard coded example where the code snippet has weak hash md5 function usage. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "__LLM output after treatment___\n",
      "\n",
      "def hashString(input):\n",
      "    return hashlib.md5(input)\n",
      "*** Warning: The generated snippit contains insecure code ***\n",
      "__Results__\n",
      "Security issue detected\n",
      "Treatment.WARN\n",
      "__Details__\n",
      "[Issue(description='Use of weak hashing algorithm', cwe_id='CWE-327', severity=<Severity.WARNING: 'warning'>, rule='\\\\.getMd5Digest\\\\(\\\\)|\\\\.md5\\\\(|\\\\.md5Hex\\\\(|\\\\.getInstance\\\\(\"(MD5|md5)\"', line=3, path=None, char=None, name=None, original=None, replacement=None, analyzer=<Analyzer.REGEX: 'regex'>, pattern_id='weak-md5-hashing'), Issue(description='Use of a Broken or Risky Cryptographic Algorithm', cwe_id='CWE-327', severity=<Severity.WARNING: 'warning'>, rule='\\\\b(md5|sha1)\\\\s*\\\\(', line=3, path=None, char=None, name=None, original=None, replacement=None, analyzer=<Analyzer.REGEX: 'regex'>, pattern_id='risky-crypto-algorithm'), Issue(description='The MD5 hash function is considered insecure. Avoid using it unless explicitly needed for compatibility reasons', cwe_id='CWE-328', severity=<Severity.WARNING: 'warning'>, rule='\\\\bhashlib\\\\.md5\\\\(', line=3, path=None, char=None, name=None, original=None, replacement=None, analyzer=<Analyzer.REGEX: 'regex'>, pattern_id='insecure-md5-hash-usage')]\n"
     ]
    }
   ],
   "source": [
    "llm_output_code = \"\"\"\n",
    "def hashString(input):\n",
    "    return hashlib.md5(input)\n",
    "\"\"\"\n",
    "\n",
    "await scan_llm_output(llm_output_code)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 2 - use openAI API\n",
    "\n",
    "Requires openai package (pip install openai)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt = \"please generate some example code to demonstrate strcpy usage\"\n",
    "\n",
    "import openai\n",
    "client = openai.OpenAI(api_key=\"YOUR_OPEN_AI_KEY\")\n",
    "response = client.chat.completions.create(\n",
    "    model= \"gpt-3.5-turbo\",\n",
    "    messages=[\n",
    "        {\"role\": \"user\", \"content\": prompt},\n",
    "    ],\n",
    "    max_tokens=1000,\n",
    ")\n",
    "\n",
    "await scan_llm_output(response.choices[0].message.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example 3 - use externally hosted LLM \n",
    "\n",
    "Requires [llama-recipes package](https://github.com/meta-llama/llama-recipes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import getpass\n",
    "\n",
    "from llama_recipes.inference.llm import TOGETHER, OPENAI, ANYSCALE\n",
    "\n",
    "if \"EXTERNALLY_HOSTED_LLM_TOKEN\" not in os.environ:\n",
    "    os.environ[\"EXTERNALLY_HOSTED_LLM_TOKEN\"] = getpass.getpass(prompt=\"Provide token for LLM provider\")\n",
    "\n",
    "# Delete as appropriate\n",
    "model = TOGETHER(\"togethercomputer/CodeLlama-13b-Instruct\", os.environ[\"EXTERNALLY_HOSTED_LLM_TOKEN\"])\n",
    "model = OPENAI(\"gpt-4\",os.environ[\"EXTERNALLY_HOSTED_LLM_TOKEN\"])\n",
    "model = ANYSCALE(\"codellama/CodeLlama-34b-Instruct-hf\",os.environ[\"EXTERNALLY_HOSTED_LLM_TOKEN\"])\n",
    "\n",
    "llm_output_code = model.query_with_system_prompt_with_retries(\n",
    "    system_prompt= \"You are an expert code developer. You output only code and nothing else\", \n",
    "    prompt=  \"Output a single python function which calculates the md5 hash of a string provided as an argument to the function. Output only the code and nothing else.\"\n",
    "    )\n",
    "await scan_llm_output(llm_output_code)"
   ]
  }
 ],
 "metadata": {
  "fileHeader": "",
  "fileUid": "a811f690-1583-439b-98c3-98bd7eb9880c",
  "isAdHoc": false,
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}