From 3851474aa349a82ce144964eb68ee7d793c495de Mon Sep 17 00:00:00 2001 From: fengruixiang <474182370@qq.com> Date: Fri, 15 May 2026 09:15:23 +0800 Subject: [PATCH] =?UTF-8?q?qwen3=20=E5=89=8D=E5=90=8E5000=20=E5=88=86?= =?UTF-8?q?=E6=9E=90=E7=BB=93=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AGENTS.md | 1 + en_cases_hkcfa/2025_HKCFA_20/summary_fast.yml | 49 ++++++++--------- hk_case_extractor.py | 52 ++++++++++--------- 3 files changed, 50 insertions(+), 52 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index a69f1f0..c62a384 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1 +1,2 @@ +人与助手之间的语言交互默认为简体中文。 python 要使用 uv run 运行。 \ No newline at end of file diff --git a/en_cases_hkcfa/2025_HKCFA_20/summary_fast.yml b/en_cases_hkcfa/2025_HKCFA_20/summary_fast.yml index 7c2378f..73bfaf5 100644 --- a/en_cases_hkcfa/2025_HKCFA_20/summary_fast.yml +++ b/en_cases_hkcfa/2025_HKCFA_20/summary_fast.yml @@ -1,33 +1,33 @@ plaintiff: - HKSAR defendant: -- MAK KWONG YIU (麥光耀) (D1) -- CHAN LAI YEE (陳麗兒) (D2) -- WONG SHUK ON (黃淑安) (D3) -- LEE YICK MING (李易明) (D4) +- MAK KWONG YIU (麥光耀) +- CHAN LAI YEE (陳麗兒) +- WONG SHUK ON (黃淑安) +- LEE YICK MING (李易明) jurisdiction_code: HKCFA jurisdiction_name: Court of Final Appeal of the Hong Kong Special Administrative Region case_location: - 香港特別行政區 - Hong Kong Special Administrative Region case_reason: >- - HKSAR appeals against the conviction of respondents for conspiracy to defraud, challenging the Court - of Appeal's decision (CACC No. 239 of 2021) regarding the validity of the prosecution's case on the + HKSAR appeals against the Court of Appeal's decision (CACC No. 239 of 2021) to overturn convictions + of defendants for conspiracy to conceal CISL's role as placing agent in a connected transaction. case_object: -- criminal charges of conspiracy to defraud -- concealment of financial arrangements -- validity of placing agreements +- conviction +- right to have conviction upheld +- connected transaction determination judgment_result: -- charge: Whether the placement/sub-placement agreements were continuing connected transactions (liability - issue) - result: No view expressed. Court did not hear arguments on this matter, therefore expressed no view - as to whether they were continuing connected transactions. (See [12] of Reasons for Verdict) +- charge: Conspiracy to conceal CISL's role as placing agent (liability issue) + result: Upheld. Appeal allowed, convictions restored. Court found concealment of CISL's agency constituted + unlawful connected transaction under Listing Rules, and directors' conflict of interest was central + to conspiracy charges. judgment_summary: >- - HKSAR appealed the conviction of defendants for conspiracy to defraud, challenging the validity of prosecution's - case on concealment of financial arrangements. Core issues centered on whether agreements constituted - continuing connected transactions and if concealment justified criminal liability. The Court rejected - limiting concealment to sham transactions (Snook v London), citing Adams v The Queen, and emphasized - dishonesty as a required element. It did not address the agreements' status but up + HKSAR appeals against the Court of Appeal's overturning of convictions for conspiracy to conceal CISL's + role in a connected transaction. Core issues: upholding convictions and determining connected transaction + status. Court rejected narrow 'sham' test from Snook v London, citing Adams v The Queen, finding concealment + of CISL's agency constituted unlawful connected transaction under Listing Rules. Convictions upheld, + appeal allowed, with directors' conflicts central to charges. Hong Kong Stock Ex involved_entities: - entity_name: Mr Justice Ribeiro PJ reason: Presiding judge in this case, responsible for fact-finding and adjudication. @@ -39,13 +39,8 @@ involved_entities: reason: Presiding judge in this case, responsible for fact-finding and adjudication. - entity_name: Sir William Young NPJ reason: Presiding judge in this case, responsible for fact-finding and adjudication. -- entity_name: HKSAR - reason: Appellant in this criminal case, representing the Hong Kong Special Administrative Region. -- entity_name: Hong Kong Stock Exchange - reason: Institution involved in the regulatory framework governing connected transactions. - entity_name: Diplock LJ - reason: Served as judge in Snook v London and West Riding Investments Ltd [4], articulated test for - determining sham transactions. -- entity_name: Lord Jauncey of Tullichettle - reason: Served as judge in Adams v The Queen [5], articulated legal principle regarding concealment - of director's breach of duties. + reason: Served as judge in Snook v London and West Riding Investments Ltd, articulated test for determining + sham transactions. +- entity_name: Hong Kong Stock Exchange + reason: Institution involved in regulatory framework for connected transactions. diff --git a/hk_case_extractor.py b/hk_case_extractor.py index edfbd3b..f5277c5 100644 --- a/hk_case_extractor.py +++ b/hk_case_extractor.py @@ -365,35 +365,37 @@ def gather_all(text: str) -> dict[str, str]: """為每個 group 召回對應的上下文片段 優化策略: - 1. 基礎信息(當事人):直接取開頭2000字符,不使用關鍵詞召回 - 2. 判決結果:重點關注尾部4000字符 + 關鍵詞召回 - 3. 其他字段:保持關鍵詞召回策略 + 1. 基礎信息(當事人):直接取開頭5000字符,不使用關鍵詞召回 + 2. 事由與標的(reason_object):直接取開頭5000字符,不使用關鍵詞召回 + 3. 判決結果:取開頭5000字符 + 尾部5000字符 + 4. 其他字段:保持關鍵詞召回策略 """ out: dict[str, str] = {} - # 1. 當事人信息:直接從開頭2000字符提取 - out["parties"] = text[:2000] + # 1. 當事人信息:直接從開頭5000字符提取 + out["parties"] = text[:5000] out["_parties_hits"] = "0" # 不使用關鍵詞,標記為0 - # 2. 判決結果:優先使用尾部4000字符,再補充關鍵詞召回 - tail_text = text[-4000:] if len(text) > 4000 else text - # 如果尾部不足4000字,用關鍵詞召回補充 - if len(tail_text) < 4000: - kws = KEYWORD_GROUPS["judgment_result"] - ctx, hits = gather_chunks(text, kws, half_window=500, max_total=6500) - out["judgment_result"] = ctx - out["_judgment_result_hits"] = str(hits) - else: - out["judgment_result"] = tail_text - out["_judgment_result_hits"] = "0" # 直接使用尾部,不計算關鍵詞命中 + # 2. 事由與標的:直接從開頭5000字符提取 + out["reason_object"] = text[:5000] + out["_reason_object_hits"] = "0" # 不使用關鍵詞,標記為0 - # 3. 其他字段:使用關鍵詞召回 + # 3. 判決結果:取開頭5000字符 + 尾部5000字符 + head_text = text[:5000] if len(text) > 5000 else text + tail_text = text[-5000:] if len(text) > 5000 else "" + # 如果文本足夠長,拼接頭尾;否則只用全文 + if tail_text and head_text != tail_text: + out["judgment_result"] = head_text + "\n\n[…]\n\n" + tail_text + else: + out["judgment_result"] = head_text + out["_judgment_result_hits"] = "0" # 直接截取,不計算關鍵詞命中 + + # 4. 其他字段:使用關鍵詞召回 params: dict[str, tuple[int, int]] = { - "reason_object": (500, 6000), "entities": (400, 6500), "analysis": (500, 6500), } - for group in ["reason_object", "entities", "analysis"]: + for group in ["entities", "analysis"]: kws = KEYWORD_GROUPS[group] hw, mt = params[group] ctx, hits = gather_chunks(text, kws, half_window=hw, max_total=mt) @@ -554,9 +556,9 @@ def extract_parties(client: OllamaClient, context: str, lang: str = 'zh') -> dic fewshot = PARTIES_FEWSHOT_ZH if lang == 'zh' else PARTIES_FEWSHOT_EN if lang == 'zh': - user = f"{fewshot}\n\n請從以下判決書開頭部分抽取:\n```\n{context[:2000]}\n```" + user = f"{fewshot}\n\n請從以下判決書開頭部分抽取:\n```\n{context[:5000]}\n```" else: - user = f"{fewshot}\n\nPlease extract from the following judgment header:\n```\n{context[:2000]}\n```" + user = f"{fewshot}\n\nPlease extract from the following judgment header:\n```\n{context[:5000]}\n```" return client.chat_json_with_retry(system, user, PARTIES_SCHEMA) @@ -658,10 +660,10 @@ def extract_reason_object(client: OllamaClient, context: str, lang: str = 'zh') if lang == 'zh': user = (f"{fewshot}\n\n" - f"請從以下片段(多處關鍵詞召回拼接)抽取:\n```\n{context[:6000]}\n```") + f"請從以下判決書開頭部分抽取:\n```\n{context[:5000]}\n```") else: user = (f"{fewshot}\n\n" - f"Please extract from the following segments (keyword-based retrieval):\n```\n{context[:6000]}\n```") + f"Please extract from the following judgment header:\n```\n{context[:5000]}\n```") out = client.chat_json_with_retry(system, user, schema, @@ -774,10 +776,10 @@ def extract_judgment_result(client: OllamaClient, context: str, lang: str = 'zh' if lang == 'zh': user = (f"{fewshot}\n\n" - f"請從以下判決書尾部(命令/裁定部分)抽取:\n```\n{context[-4000:]}\n```") + f"請從以下判決書片段(開頭5000字符 + 尾部5000字符)抽取:\n```\n{context}\n```") else: user = (f"{fewshot}\n\n" - f"Please extract from the following judgment ending (order/disposition section):\n```\n{context[-4000:]}\n```") + f"Please extract from the following judgment segments (first 5000 + last 5000 characters):\n```\n{context}\n```") return client.chat_json_with_retry(system, user, JUDGMENT_RESULT_SCHEMA,