17 参考文献
17.1 批量生成参考文献列表
- 适用于revealjs
import os
for f in os.listdir('data-doctor'):
if f.endswith('.pdf'):
print(f"- [{os.path.split(f)[1]}](data-doctor/{f})")- 适用于books项目
#| echo: false
from IPython.display import Markdown
import os
from urllib.parse import quote
n = 0
mdStr = ''
file_folder = 'data/恒格列净复审'
abs_path = "https://www.mmphcrc.com/pdf/jupyter/HuLinhuiPy/ethics"
for f in os.listdir(file_folder):
n += 1
if f != ".ipynb_checkpoints":
mdStr += f"- [{os.path.split(f)[1]}]({abs_path}/{file_folder}/{quote(f)})\n"
if n==10:
mdStr += "\n"
Markdown(mdStr)17.2 jupyterlab生成交互搜索的表格
通过关键词搜索定位至pdf文件名称、页码和行数
import os
import PyPDF2
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
import re
def extract_text_from_pdf(pdf_path):
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
num_pages = len(reader.pages)
data = {
'文件名': [],
'页码': [],
'行': [],
'文本': []
}
for page_num in range(num_pages):
page = reader.pages[page_num]
page_text = page.extract_text()
lines = page_text.split('\n')
for line_num, line in enumerate(lines):
data['文件名'].append(pdf_path)
data['页码'].append(page_num + 1)
data['行'].append(line_num + 1)
data['文本'].append(line)
return pd.DataFrame(data)
def extract_text_from_pdfs(folder_path):
dfs = []
for filename in os.listdir(folder_path):
if filename.endswith('.pdf'):
pdf_path = os.path.join(folder_path, filename)
df = extract_text_from_pdf(pdf_path)
dfs.append(df)
return pd.concat(dfs, ignore_index=True)
folder_path = 'data-doctor'
df = extract_text_from_pdfs(folder_path)
pd.set_option('display.max_colwidth', None)
# 创建一个文本框用于搜索
search_box = widgets.Text(description='搜索:')
page_text = widgets.BoundedIntText(description='页数', min=1, max=1, step=1, value=1) # 初始总页数为1
prev_button = widgets.Button(description='◀')
next_button = widgets.Button(description='▶')
output = widgets.Output()
def filter_table(search_value, page):
filtered_df = df[df['文本'].str.contains(search_value)].copy()
# 更新总页数
total_pages = len(filtered_df) // 5 + 1
page_text.max = total_pages
page_text.value = min(page, total_pages) # 确保当前页数不超过总页数
# 计算当前页的起始和结束索引
page_start = (page - 1) * 5
page_end = page_start + 5
with output:
output.clear_output()
filtered_df.文本 = filtered_df.文本.str.replace(search_value, f'<span style="background-color:yellow">{search_value}</span>')
# 使用 Pandas 的 to_html 方法生成带有 HTML 标签的字符串
html_str = filtered_df.iloc[page_start:page_end].to_html(escape=False)
display(HTML(html_str))
update_pagination_label() # 更新总页数的显示
def prev_page(button_event):
if page_text.value > 1:
page_text.value -= 1
def next_page(button_event):
if page_text.value < page_text.max:
page_text.value += 1
def update_pagination_label():
pagination_label.value = f'/{page_text.max}' # 更新总页数的显示
# 监听文本框值和页数值的变化并更新表格
search_box.observe(lambda event: filter_table(search_box.value, page_text.value), names='value')
page_text.observe(lambda event: filter_table(search_box.value, page_text.value), names='value')
prev_button.on_click(prev_page)
next_button.on_click(next_page)
pagination_box = widgets.HBox([prev_button, widgets.Label('页码:'), page_text, widgets.Label(f'/{page_text.max}')])
def update_pagination_box():
pagination_box.children = [prev_button, widgets.Label('页码:'), page_text, pagination_label, next_button]
update_pagination_box()
display(widgets.VBox([search_box, pagination_box, output]))17.3 引文footnote位置下移
custom.css里加上以下样式内容:
aside {
margin-bottom: -200px; /*--默认情况下是0。置为负数,代表 使引文位置向下移一点距离,这样可以避免与下文重合。 --*/
}注意:如何引文条数增加至某一数量时,可会与上文内容出现重合。