MibooGram/modules/wiki/material.py

import re
from typing import List

from bs4 import BeautifulSoup
from httpx import URL
from typing_extensions import Self

from modules.wiki.base import SCRAPE_HOST, WikiModel

__all__ = ['Material']


class Material(WikiModel):
    """材料

    Attributes:
        type: 类型
        source: 获取方式
        description: 描述
        serise: 材料系列
    """

    type: str
    source: List[str]
    description: str

    @staticmethod
    def scrape_urls() -> List[URL]:
        return [SCRAPE_HOST.join(f'fam_wep_{i}/?lang=CHS') for i in ['primary', 'secondary', 'common']]

    @classmethod
    async def _parse_soup(cls, soup: BeautifulSoup) -> Self:
        """解析材料页"""
        soup = soup.select('.wp-block-post-content')[0]
        tables = soup.find_all('table')
        table_rows = tables[0].find_all('tr')

        def get_table_text(row_num: int) -> str:
            """一个快捷函数，用于返回表格对应行的最后一个单元格中的文本"""
            return table_rows[row_num].find_all('td')[-1].text.replace('\xa0', '')

        id_ = re.findall(r'/img/(.*?)\.webp', str(table_rows[0]))[0]
        name = get_table_text(0)
        rarity = len(table_rows[3].find_all('img'))
        type_ = get_table_text(1)
        source = list(
            filter(
                lambda x: x,  # filter 在这里的作用是过滤掉为空的数据
                table_rows[-2].find_all('td')[-1].encode_contents().decode().split('<br/>')
            )
        )
        description = get_table_text(-1)
        return Material(id=id_, name=name, rarity=rarity, type=type_, source=source, description=description)

    @property
    def icon(self) -> str:
        return str(SCRAPE_HOST.join(f'/img/{self.id}.webp'))
♻️ 重写 wiki 模块和相关插件 1. 使用 `pydantic` 重写了 wiki 模块所使用的 model 2. 添加了 weapon_level.json 用于后续计算武器升级所需的经验 3. 修改了 wiki 插件，以适应新的 model 2022-08-28 14:37:31 +00:00			`import re`
			`from typing import List`

			`from bs4 import BeautifulSoup`
			`from httpx import URL`
			`from typing_extensions import Self`

♻ 更新V3版本 ♻️ 重构插件系统 ⚙️ 重写插件 🎨 改进代码结构 📝 完善文档 Co-authored-by: zhxy-CN <admin@owo.cab> Co-authored-by: 洛水居室 <luoshuijs@outlook.com> Co-authored-by: xtaodada <xtao@xtaolink.cn> Co-authored-by: Li Chuangbo <im@chuangbo.li> 2022-09-08 01:08:37 +00:00			`from modules.wiki.base import SCRAPE_HOST, WikiModel`
♻️ 重写 wiki 模块和相关插件 1. 使用 `pydantic` 重写了 wiki 模块所使用的 model 2. 添加了 weapon_level.json 用于后续计算武器升级所需的经验 3. 修改了 wiki 插件，以适应新的 model 2022-08-28 14:37:31 +00:00
			`__all__ = ['Material']`


			`class Material(WikiModel):`
			`"""材料`

			`Attributes:`
			`type: 类型`
			`source: 获取方式`
			`description: 描述`
			`serise: 材料系列`
			`"""`

			`type: str`
			`source: List[str]`
			`description: str`

			`@staticmethod`
			`def scrape_urls() -> List[URL]:`
			`return [SCRAPE_HOST.join(f'fam_wep_{i}/?lang=CHS') for i in ['primary', 'secondary', 'common']]`

			`@classmethod`
			`async def _parse_soup(cls, soup: BeautifulSoup) -> Self:`
			`"""解析材料页"""`
			`soup = soup.select('.wp-block-post-content')[0]`
			`tables = soup.find_all('table')`
			`table_rows = tables[0].find_all('tr')`

			`def get_table_text(row_num: int) -> str:`
			`"""一个快捷函数，用于返回表格对应行的最后一个单元格中的文本"""`
			`return table_rows[row_num].find_all('td')[-1].text.replace('\xa0', '')`

			`id_ = re.findall(r'/img/(.*?)\.webp', str(table_rows[0]))[0]`
			`name = get_table_text(0)`
			`rarity = len(table_rows[3].find_all('img'))`
			`type_ = get_table_text(1)`
			`source = list(`
			`filter(`
			`lambda x: x, # filter 在这里的作用是过滤掉为空的数据`
			`table_rows[-2].find_all('td')[-1].encode_contents().decode().split('<br/>')`
			`)`
			`)`
			`description = get_table_text(-1)`
			`return Material(id=id_, name=name, rarity=rarity, type=type_, source=source, description=description)`

			`@property`
			`def icon(self) -> str:`
🐛 修复爬虫图片类型错误的问题 2022-09-01 04:20:00 +00:00			`return str(SCRAPE_HOST.join(f'/img/{self.id}.webp'))`