yigitcolakoglu
/
KulYutmaz


								import re

								import requests

								import math

								import jsbeautifier


								testUrl = "https://apple.com/metrics/target/scripts/1.0/at.js"


								js_keywords = ["break", "case", "catch", "continue", "debugger", "default", "delete", "do", "else", "finally", "for", "function", "if", "in", "instanceof", "new", "return", "switch", "this", "throw", "try", "typeof", "var", "void", "while", "with"]


								def parseJavascript(context, isUrl):

								    if isUrl:

								        try:

								            r = requests.get(context, allow_redirects=False)

								        except:

								            print("Unexpected Error")

								            return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)

								        temp_code = r.content.decode("ISO-8859-1")

								    else:

								        temp_code = context

								    js_code = jsbeautifier.beautify(temp_code)


								    noOfEvalFunc = len(re.findall("eval\(", js_code))

								    noOfSetTimeOutFunc = len(re.findall("setTimeout\(", js_code))

								    noOfiframe = len(re.findall("iframe", js_code))

								    noOfUnescapeFunc = len(re.findall("unescape\(", js_code))

								    noOfEscapeFunc = len(re.findall("escape\(", js_code))

								    noOfClassid = len(re.findall("classid", js_code))

								    noOfParseIntFunc = len(re.findall("parseInt\(", js_code))

								    noOfFromCharCodeFunc = len(re.findall("fromCharCode\(", js_code))

								    noOfActiveXObjectFunc = len(re.findall("ActiveXObject\(", js_code))

								    noOfStringAssigments = len(re.findall(r'"((?<=\\)"|([^"]))"', js_code))

								    noOfConcatFunc = len(re.findall("concat\(", js_code))

								    noOfIndexOfFunc = len(re.findall("indexOf\(", js_code))

								    noOfSubstringFunc = len(re.findall("substring\(", js_code))

								    noOfReplaceFunc = len(re.findall("replace\(", js_code))

								    noOfEventListenerFunc = len(re.findall("document.addEventListener\(", js_code))

								    noOfAttachEventFunc = len(re.findall("attachEvent\(", js_code))

								    noOfCreateElementFunc = len(re.findall("createElement\(", js_code))

								    noOfGetElementByIdFunc = len(re.findall("getElementById\(", js_code))

								    noOfDocumentWriteFunc = len(re.findall("document.write\(", js_code))

								    noOfWords = len(re.findall(r'\w+', js_code))

								    noOfKeyWords = findKeyWords(js_code)

								    noOfCharacters = len(js_code)

								    try:

								        ratioOfKeywordsAndWords = noOfKeyWords / noOfWords

								    except ZeroDivisionError:

								        ratioOfKeywordsAndWords = 0

								    entropyOfJS = Entropy(js_code)

								    longestWord = len(findTheLongestWord(js_code))

								    noOfLongStirngs = len(re.findall(r'"((?<=\\)"|([^"])){200,}"', js_code))

								    shortestWord = len(findTheShortestWord(js_code))

								    entropyOfLongestWord = Entropy(findTheLongestWord(js_code))

								    noOfBlankSpaces = len(re.findall(" ", js_code))

								    try:

								        avgLenOfWords = noOfCharacters / noOfWords

								    except ZeroDivisionError:

								        avgLenOfWords = 0

								    noOfHexValues = findHexNumbers(js_code)

								    try:

								        shareOfSpaceChar = len(re.findall(" ", js_code)) / noOfCharacters

								    except ZeroDivisionError:

								        shareOfSpaceChar = 0

								    #print(noOfHexValues)


								    return (noOfEvalFunc, noOfSetTimeOutFunc, noOfiframe, noOfUnescapeFunc, noOfEscapeFunc, noOfClassid, noOfParseIntFunc, noOfFromCharCodeFunc, noOfActiveXObjectFunc,

								    noOfStringAssigments, noOfConcatFunc, noOfIndexOfFunc, noOfSubstringFunc, noOfReplaceFunc, noOfEventListenerFunc, noOfAttachEventFunc, noOfCreateElementFunc, noOfGetElementByIdFunc,

								    noOfDocumentWriteFunc, noOfWords, noOfKeyWords, noOfCharacters, ratioOfKeywordsAndWords, entropyOfJS, longestWord, noOfLongStirngs, shortestWord, entropyOfLongestWord, noOfBlankSpaces,

								    avgLenOfWords, noOfHexValues, shareOfSpaceChar)


								def Entropy(string,base = 2.0): # I copied this from net

								    dct = dict.fromkeys(list(string))


								    pkvec =  [float(string.count(c)) / len(string) for c in dct]


								    H = -sum([pk  * math.log(pk) / math.log(base) for pk in pkvec ])

								    return H


								def findTheLongestWord(text):

								  tempList = text.split(' ')

								  return max(tempList, key=len)


								def findTheShortestWord(text):

								  tempList = text.split(' ')

								  return min(tempList, key=len)


								"""def findLongStings(text):

								    longs = []

								    temp = re.findall(r'var = .*\".*\"', text)

								    for string in temp:

								        if len(string) > 200:

								            longs.append(string)"""


								def findKeyWords(text):

								    numberOfKeywords = 0

								    temp = re.findall(r'\w+', text)

								    for word in temp:

								        if word in js_keywords:

								            numberOfKeywords += 1

								    temp = re.findall(r'"((?<=\\)"|([^"]))"', text)

								    for word in temp:

								        if word in js_keywords:

								            numberOfKeywords += -1

								    return numberOfKeywords


								def findHexNumbers(text):

								    number = len(re.findall(r'0x', text)) + len(re.findall(r'parseInt\([^,]*,? *16\)', text))

								    return number


								#print(parseJavascript("https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"))