diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 43b1d0fdee..4da8ae3536 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -9,7 +9,12 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import math -from yt_dlp.jsinterp import JS_Undefined, JSInterpreter, js_number_to_string +from yt_dlp.jsinterp import ( + JS_Undefined, + JSInterpreter, + int_to_int32, + js_number_to_string, +) class NaN: @@ -101,8 +106,8 @@ class TestJSInterpreter(unittest.TestCase): self._test('function f(){return 5 ^ 9;}', 12) self._test('function f(){return 0.0 << NaN}', 0) self._test('function f(){return null << undefined}', 0) - # TODO: Does not work due to number too large - # self._test('function f(){return 21 << 4294967297}', 42) + self._test('function f(){return -12616 ^ 5041}', -8951) + self._test('function f(){return 21 << 4294967297}', 42) def test_array_access(self): self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7]) @@ -447,6 +452,22 @@ class TestJSInterpreter(unittest.TestCase): def test_splice(self): self._test('function f(){var T = ["0", "1", "2"]; T["splice"](2, 1, "0")[0]; return T }', ['0', '1', '0']) + def test_int_to_int32(self): + for inp, exp in [ + (0, 0), + (1, 1), + (-1, -1), + (-8951, -8951), + (2147483647, 2147483647), + (2147483648, -2147483648), + (2147483649, -2147483647), + (-2147483649, 2147483647), + (-2147483648, -2147483648), + (-16799986688, 379882496), + (39570129568, 915423904), + ]: + assert int_to_int32(inp) == exp + def test_js_number_to_string(self): for test, radix, expected in [ (0, None, '0'), diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index c174b5bac1..f6927565f7 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -3,6 +3,7 @@ import re import urllib.parse from .common import InfoExtractor +from ..jsinterp import int_to_int32 from ..utils import ( ExtractorError, clean_html, @@ -20,73 +21,69 @@ from ..utils import ( ) -def to_signed_32(n): - return n % ((-1 if n < 0 else 1) * 2**32) - - class _ByteGenerator: def __init__(self, algo_id, seed): try: self._algorithm = getattr(self, f'_algo{algo_id}') except AttributeError: raise ExtractorError(f'Unknown algorithm ID "{algo_id}"') - self._s = to_signed_32(seed) + self._s = int_to_int32(seed) def _algo1(self, s): # LCG (a=1664525, c=1013904223, m=2^32) # Ref: https://en.wikipedia.org/wiki/Linear_congruential_generator - s = self._s = to_signed_32(s * 1664525 + 1013904223) + s = self._s = int_to_int32(s * 1664525 + 1013904223) return s def _algo2(self, s): # xorshift32 # Ref: https://en.wikipedia.org/wiki/Xorshift - s = to_signed_32(s ^ (s << 13)) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 17)) - s = self._s = to_signed_32(s ^ (s << 5)) + s = int_to_int32(s ^ (s << 13)) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 17)) + s = self._s = int_to_int32(s ^ (s << 5)) return s def _algo3(self, s): # Weyl Sequence (k≈2^32*φ, m=2^32) + MurmurHash3 (fmix32) # Ref: https://en.wikipedia.org/wiki/Weyl_sequence # https://commons.apache.org/proper/commons-codec/jacoco/org.apache.commons.codec.digest/MurmurHash3.java.html - s = self._s = to_signed_32(s + 0x9e3779b9) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16)) - s = to_signed_32(s * to_signed_32(0x85ebca77)) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 13)) - s = to_signed_32(s * to_signed_32(0xc2b2ae3d)) - return to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 16)) + s = self._s = int_to_int32(s + 0x9e3779b9) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 16)) + s = int_to_int32(s * int_to_int32(0x85ebca77)) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 13)) + s = int_to_int32(s * int_to_int32(0xc2b2ae3d)) + return int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 16)) def _algo4(self, s): # Custom scrambling function involving a left rotation (ROL) - s = self._s = to_signed_32(s + 0x6d2b79f5) - s = to_signed_32((s << 7) | ((s & 0xFFFFFFFF) >> 25)) # ROL 7 - s = to_signed_32(s + 0x9e3779b9) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 11)) - return to_signed_32(s * 0x27d4eb2d) + s = self._s = int_to_int32(s + 0x6d2b79f5) + s = int_to_int32((s << 7) | ((s & 0xFFFFFFFF) >> 25)) # ROL 7 + s = int_to_int32(s + 0x9e3779b9) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 11)) + return int_to_int32(s * 0x27d4eb2d) def _algo5(self, s): # xorshift variant with a final addition - s = to_signed_32(s ^ (s << 7)) - s = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 9)) - s = to_signed_32(s ^ (s << 8)) - s = self._s = to_signed_32(s + 0xa5a5a5a5) + s = int_to_int32(s ^ (s << 7)) + s = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 9)) + s = int_to_int32(s ^ (s << 8)) + s = self._s = int_to_int32(s + 0xa5a5a5a5) return s def _algo6(self, s): # LCG (a=0x2c9277b5, c=0xac564b05) with a variable right shift scrambler - s = self._s = to_signed_32(s * to_signed_32(0x2c9277b5) + to_signed_32(0xac564b05)) - s2 = to_signed_32(s ^ ((s & 0xFFFFFFFF) >> 18)) + s = self._s = int_to_int32(s * int_to_int32(0x2c9277b5) + int_to_int32(0xac564b05)) + s2 = int_to_int32(s ^ ((s & 0xFFFFFFFF) >> 18)) shift = (s & 0xFFFFFFFF) >> 27 & 31 - return to_signed_32((s2 & 0xFFFFFFFF) >> shift) + return int_to_int32((s2 & 0xFFFFFFFF) >> shift) def _algo7(self, s): # Weyl Sequence (k=0x9e3779b9) + custom multiply-xor-shift mixing function - s = self._s = to_signed_32(s + to_signed_32(0x9e3779b9)) - e = to_signed_32(s ^ (s << 5)) - e = to_signed_32(e * to_signed_32(0x7feb352d)) - e = to_signed_32(e ^ ((e & 0xFFFFFFFF) >> 15)) - return to_signed_32(e * to_signed_32(0x846ca68b)) + s = self._s = int_to_int32(s + int_to_int32(0x9e3779b9)) + e = int_to_int32(s ^ (s << 5)) + e = int_to_int32(e * int_to_int32(0x7feb352d)) + e = int_to_int32(e ^ ((e & 0xFFFFFFFF) >> 15)) + return int_to_int32(e * int_to_int32(0x846ca68b)) def __next__(self): return self._algorithm(self._s) & 0xFF diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index d22d176d2f..6ca2b16375 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -18,6 +18,14 @@ from .utils import ( ) +def int_to_int32(n): + """Converts an integer to a signed 32-bit integer""" + n &= 0xFFFFFFFF + if n & 0x80000000: + return n - 0x100000000 + return n + + def _js_bit_op(op): def zeroise(x): if x in (None, JS_Undefined): @@ -28,7 +36,7 @@ def _js_bit_op(op): return int(float(x)) def wrapped(a, b): - return op(zeroise(a), zeroise(b)) & 0xffffffff + return int_to_int32(op(int_to_int32(zeroise(a)), int_to_int32(zeroise(b)))) return wrapped